telegraf/plugins/outputs/pagerduty/pagerduty.go

134 lines
2.9 KiB
Go
Raw Normal View History

2016-05-27 16:02:59 +00:00
package pagerduty
import (
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/outputs"
)
2016-08-21 07:35:45 +00:00
type Tag struct {
Name string `toml:"name"`
Value string `toml:"value"`
}
2016-05-27 16:02:59 +00:00
type PD struct {
2016-08-21 07:35:45 +00:00
ServiceKey string `toml:"service_key"`
Desc string `toml:"description"`
Metric string `toml:"metric"`
Field string `toml:"field"`
Expression string `toml:"expression"`
TagFilter []Tag `toml:"tags"`
incidentKey string `toml:"-"`
2016-05-27 16:02:59 +00:00
}
var sampleConfig = `
## PagerDuty service key
service_key = <SERVICE KEY>
## Metric name that will be checked
metric = "cpu"
## Description of the check
description = "Check CPU"
## Name of the metric field which will be used to check
field = "time_iowait"
## Tag filter, when present only metrics with the specified tag value
## will be considered for further processing
[[outputs.pagerduty.tag_filter]]
role = "web-server"
## Expression is used to evaluate the alert
expression = "> 50.0"
2016-05-27 16:02:59 +00:00
`
func (p *PD) Connect() error {
return nil
}
func (p *PD) Close() error {
return nil
}
func (p *PD) SampleConfig() string {
return sampleConfig
}
func (p *PD) Description() string {
return "Send PagerDuty alert based on metric values"
}
func (p *PD) isMatch(metric telegraf.Metric) bool {
2016-05-27 16:02:59 +00:00
if p.Metric != metric.Name() {
return false
}
2016-08-21 07:35:45 +00:00
for _, tag := range p.TagFilter {
v, ok := metric.Tags()[tag.Name]
if !ok || (v != tag.Value) {
2016-05-27 16:02:59 +00:00
return false
}
}
return true
2016-05-27 16:02:59 +00:00
}
func init() {
outputs.Add("pagerduty", func() telegraf.Output {
return &PD{}
})
2016-05-27 16:02:59 +00:00
}
func (p *PD) Write(metrics []telegraf.Metric) error {
for _, metric := range metrics {
if p.isMatch(metric) {
p.processForEvent(metric)
}
}
return nil
2016-05-27 16:02:59 +00:00
}
func (p *PD) processForEvent(metric telegraf.Metric) error {
value, ok := metric.Fields()[p.Field]
if !ok {
return fmt.Errorf("Filed '%s' absent", p.Field)
2016-05-27 16:02:59 +00:00
}
expr := fmt.Sprintf("%v %s", value, p.Expression)
trigger, err := evalBoolExpr(expr)
if err != nil {
return err
}
2016-06-03 21:53:42 +00:00
event := Event{
ServiceKey: p.ServiceKey,
Client: "telegraf",
2016-05-27 16:02:59 +00:00
}
m := make(map[string]interface{})
m["tags"] = metric.Tags()
m["fields"] = metric.Fields()
m["name"] = metric.Name()
m["timestamp"] = metric.UnixNano() / 1000000000
event.Details = m
event.Description = p.Desc
// either retrigger of create a new event
if trigger {
event.Type = "trigger"
if p.incidentKey != "" {
// already triggered incident, retriggering it
// PagerDuty dedups alerts when we reuse the incidentkey
event.IncidentKey = p.incidentKey
}
resp, err := createEvent(event)
if err != nil {
return err
2016-05-27 16:02:59 +00:00
}
p.incidentKey = resp.IncidentKey
return nil
}
if p.incidentKey != "" {
event.IncidentKey = p.incidentKey
event.Type = "resolve"
2016-06-03 21:53:42 +00:00
_, err := createEvent(event)
2016-05-27 16:02:59 +00:00
if err != nil {
return err
}
// incident is resolved hence reset incident key
p.incidentKey = ""
2016-05-27 16:02:59 +00:00
}
return nil
}