From e1c3800cd98cfe92a4781cd604808bc7fb0f5b53 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 7 Jul 2016 12:15:47 +0200 Subject: [PATCH 01/47] Prometheus parser fix, parse headers properly closes #1458 --- CHANGELOG.md | 3 +- plugins/inputs/prometheus/parser.go | 117 ++++++++--------------- plugins/inputs/prometheus/parser_test.go | 22 ++--- plugins/inputs/prometheus/prometheus.go | 19 +--- 4 files changed, 55 insertions(+), 106 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f130fe60..0cb8d3349 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## v1.0 +## v1.0 [unreleased] ### Release Notes @@ -42,6 +42,7 @@ should now look like: - [#1405](https://github.com/influxdata/telegraf/issues/1405): Fix memory/connection leak in prometheus input plugin. - [#1378](https://github.com/influxdata/telegraf/issues/1378): Trim BOM from config file for Windows support. - [#1339](https://github.com/influxdata/telegraf/issues/1339): Prometheus client output panic on service reload. +- [#1461](https://github.com/influxdata/telegraf/pull/1461): Prometheus parser, protobuf format header fix. ## v1.0 beta 2 [2016-06-21] diff --git a/plugins/inputs/prometheus/parser.go b/plugins/inputs/prometheus/parser.go index e8a7c0892..3c9ddc503 100644 --- a/plugins/inputs/prometheus/parser.go +++ b/plugins/inputs/prometheus/parser.go @@ -10,6 +10,7 @@ import ( "io" "math" "mime" + "net/http" "time" "github.com/influxdata/telegraf" @@ -19,17 +20,9 @@ import ( "github.com/prometheus/common/expfmt" ) -// PrometheusParser is an object for Parsing incoming metrics. -type PrometheusParser struct { - // PromFormat - PromFormat map[string]string - // DefaultTags will be added to every parsed metric - // DefaultTags map[string]string -} - // Parse returns a slice of Metrics from a text representation of a // metrics -func (p *PrometheusParser) Parse(buf []byte) ([]telegraf.Metric, error) { +func Parse(buf []byte, header http.Header) ([]telegraf.Metric, error) { var metrics []telegraf.Metric var parser expfmt.TextParser // parse even if the buffer begins with a newline @@ -38,97 +31,71 @@ func (p *PrometheusParser) Parse(buf []byte) ([]telegraf.Metric, error) { buffer := bytes.NewBuffer(buf) reader := bufio.NewReader(buffer) - // Get format - mediatype, params, err := mime.ParseMediaType(p.PromFormat["Content-Type"]) + mediatype, params, err := mime.ParseMediaType(header.Get("Content-Type")) // Prepare output metricFamilies := make(map[string]*dto.MetricFamily) + if err == nil && mediatype == "application/vnd.google.protobuf" && params["encoding"] == "delimited" && params["proto"] == "io.prometheus.client.MetricFamily" { for { - metricFamily := &dto.MetricFamily{} - if _, err = pbutil.ReadDelimited(reader, metricFamily); err != nil { - if err == io.EOF { + mf := &dto.MetricFamily{} + if _, ierr := pbutil.ReadDelimited(reader, mf); ierr != nil { + if ierr == io.EOF { break } - return nil, fmt.Errorf("reading metric family protocol buffer failed: %s", err) + return nil, fmt.Errorf("reading metric family protocol buffer failed: %s", ierr) } - metricFamilies[metricFamily.GetName()] = metricFamily + metricFamilies[mf.GetName()] = mf } } else { metricFamilies, err = parser.TextToMetricFamilies(reader) if err != nil { return nil, fmt.Errorf("reading text format failed: %s", err) } - // read metrics - for metricName, mf := range metricFamilies { - for _, m := range mf.Metric { - // reading tags - tags := makeLabels(m) - /* - for key, value := range p.DefaultTags { - tags[key] = value - } - */ - // reading fields - fields := make(map[string]interface{}) - if mf.GetType() == dto.MetricType_SUMMARY { - // summary metric - fields = makeQuantiles(m) - fields["count"] = float64(m.GetSummary().GetSampleCount()) - fields["sum"] = float64(m.GetSummary().GetSampleSum()) - } else if mf.GetType() == dto.MetricType_HISTOGRAM { - // historgram metric - fields = makeBuckets(m) - fields["count"] = float64(m.GetHistogram().GetSampleCount()) - fields["sum"] = float64(m.GetHistogram().GetSampleSum()) + } + // read metrics + for metricName, mf := range metricFamilies { + for _, m := range mf.Metric { + // reading tags + tags := makeLabels(m) + // reading fields + fields := make(map[string]interface{}) + if mf.GetType() == dto.MetricType_SUMMARY { + // summary metric + fields = makeQuantiles(m) + fields["count"] = float64(m.GetSummary().GetSampleCount()) + fields["sum"] = float64(m.GetSummary().GetSampleSum()) + } else if mf.GetType() == dto.MetricType_HISTOGRAM { + // historgram metric + fields = makeBuckets(m) + fields["count"] = float64(m.GetHistogram().GetSampleCount()) + fields["sum"] = float64(m.GetHistogram().GetSampleSum()) + + } else { + // standard metric + fields = getNameAndValue(m) + } + // converting to telegraf metric + if len(fields) > 0 { + var t time.Time + if m.TimestampMs != nil && *m.TimestampMs > 0 { + t = time.Unix(0, *m.TimestampMs*1000000) } else { - // standard metric - fields = getNameAndValue(m) + t = time.Now() } - // converting to telegraf metric - if len(fields) > 0 { - var t time.Time - if m.TimestampMs != nil && *m.TimestampMs > 0 { - t = time.Unix(0, *m.TimestampMs*1000000) - } else { - t = time.Now() - } - metric, err := telegraf.NewMetric(metricName, tags, fields, t) - if err == nil { - metrics = append(metrics, metric) - } + metric, err := telegraf.NewMetric(metricName, tags, fields, t) + if err == nil { + metrics = append(metrics, metric) } } } } + return metrics, err } -// Parse one line -func (p *PrometheusParser) ParseLine(line string) (telegraf.Metric, error) { - metrics, err := p.Parse([]byte(line + "\n")) - - if err != nil { - return nil, err - } - - if len(metrics) < 1 { - return nil, fmt.Errorf( - "Can not parse the line: %s, for data format: prometheus", line) - } - - return metrics[0], nil -} - -/* -// Set default tags -func (p *PrometheusParser) SetDefaultTags(tags map[string]string) { - p.DefaultTags = tags -} -*/ - // Get Quantiles from summary metric func makeQuantiles(m *dto.Metric) map[string]interface{} { fields := make(map[string]interface{}) diff --git a/plugins/inputs/prometheus/parser_test.go b/plugins/inputs/prometheus/parser_test.go index 6259a4ef6..4f2a8516f 100644 --- a/plugins/inputs/prometheus/parser_test.go +++ b/plugins/inputs/prometheus/parser_test.go @@ -1,6 +1,7 @@ package prometheus import ( + "net/http" "testing" "time" @@ -101,10 +102,8 @@ cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 ` func TestParseValidPrometheus(t *testing.T) { - parser := PrometheusParser{} - // Gauge value - metrics, err := parser.Parse([]byte(validUniqueGauge)) + metrics, err := Parse([]byte(validUniqueGauge), http.Header{}) assert.NoError(t, err) assert.Len(t, metrics, 1) assert.Equal(t, "cadvisor_version_info", metrics[0].Name()) @@ -118,8 +117,7 @@ func TestParseValidPrometheus(t *testing.T) { }, metrics[0].Tags()) // Counter value - //parser.SetDefaultTags(map[string]string{"mytag": "mytagvalue"}) - metrics, err = parser.Parse([]byte(validUniqueCounter)) + metrics, err = Parse([]byte(validUniqueCounter), http.Header{}) assert.NoError(t, err) assert.Len(t, metrics, 1) assert.Equal(t, "get_token_fail_count", metrics[0].Name()) @@ -129,8 +127,8 @@ func TestParseValidPrometheus(t *testing.T) { assert.Equal(t, map[string]string{}, metrics[0].Tags()) // Summary data - //parser.SetDefaultTags(map[string]string{}) - metrics, err = parser.Parse([]byte(validUniqueSummary)) + //SetDefaultTags(map[string]string{}) + metrics, err = Parse([]byte(validUniqueSummary), http.Header{}) assert.NoError(t, err) assert.Len(t, metrics, 1) assert.Equal(t, "http_request_duration_microseconds", metrics[0].Name()) @@ -144,7 +142,7 @@ func TestParseValidPrometheus(t *testing.T) { assert.Equal(t, map[string]string{"handler": "prometheus"}, metrics[0].Tags()) // histogram data - metrics, err = parser.Parse([]byte(validUniqueHistogram)) + metrics, err = Parse([]byte(validUniqueHistogram), http.Header{}) assert.NoError(t, err) assert.Len(t, metrics, 1) assert.Equal(t, "apiserver_request_latencies", metrics[0].Name()) @@ -165,11 +163,3 @@ func TestParseValidPrometheus(t *testing.T) { metrics[0].Tags()) } - -func TestParseLineInvalidPrometheus(t *testing.T) { - parser := PrometheusParser{} - metric, err := parser.ParseLine(validUniqueLine) - assert.NotNil(t, err) - assert.Nil(t, metric) - -} diff --git a/plugins/inputs/prometheus/prometheus.go b/plugins/inputs/prometheus/prometheus.go index 2eabcf92c..12f7fd38e 100644 --- a/plugins/inputs/prometheus/prometheus.go +++ b/plugins/inputs/prometheus/prometheus.go @@ -13,6 +13,8 @@ import ( "time" ) +const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3` + type Prometheus struct { Urls []string @@ -86,7 +88,7 @@ var client = &http.Client{ func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error { collectDate := time.Now() var req, err = http.NewRequest("GET", url, nil) - req.Header = make(http.Header) + req.Header.Add("Accept", acceptHeader) var token []byte var resp *http.Response @@ -129,20 +131,9 @@ func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error { return fmt.Errorf("error reading body: %s", err) } - // Headers - headers := make(map[string]string) - for key, value := range headers { - headers[key] = value - } - - // Prepare Prometheus parser config - promparser := PrometheusParser{ - PromFormat: headers, - } - - metrics, err := promparser.Parse(body) + metrics, err := Parse(body, resp.Header) if err != nil { - return fmt.Errorf("error getting processing samples for %s: %s", + return fmt.Errorf("error reading metrics for %s: %s", url, err) } // Add (or not) collected metrics From c873937356cec0dda64007d05e420daab9004ccc Mon Sep 17 00:00:00 2001 From: Jack Zampolin Date: Sun, 10 Jul 2016 03:11:43 -0700 Subject: [PATCH 02/47] Add note about influxdb compatability (#1465) --- plugins/inputs/logparser/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/inputs/logparser/README.md b/plugins/inputs/logparser/README.md index 1ff50bddd..64e8909f5 100644 --- a/plugins/inputs/logparser/README.md +++ b/plugins/inputs/logparser/README.md @@ -32,6 +32,8 @@ regex patterns. ''' ``` +> **Note:** The InfluxDB log pattern in the default configuration only works for Influx versions 1.0.0-beta1 or higher. + ## Grok Parser The grok parser uses a slightly modified version of logstash "grok" patterns, From d14e7536ab34e05e4bb003921a812dc2accb188f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20de=20Metz?= Date: Sun, 10 Jul 2016 12:12:33 +0200 Subject: [PATCH 03/47] Cleanup the list of plugins. (#1423) Github and Rollbar are now part of the webhooks plugin. --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 53e672534..8264be7f6 100644 --- a/README.md +++ b/README.md @@ -221,8 +221,6 @@ Telegraf can also collect metrics via the following service plugins: * [github](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/github) * [rollbar](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/rollbar) * [nsq_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nsq_consumer) -* [github_webhooks](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/github_webhooks) -* [rollbar_webhooks](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/rollbar_webhooks) We'll be adding support for many more over the coming months. Read on if you want to add support for another service or third-party API. From 5f0a63f554861e1ea8f39a6293f09d63b1da85d8 Mon Sep 17 00:00:00 2001 From: Vladimir S Date: Sun, 10 Jul 2016 15:17:53 +0300 Subject: [PATCH 04/47] fixes #1450 (#1472) --- CHANGELOG.md | 1 + plugins/inputs/system/disk.go | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cb8d3349..b988508ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ should now look like: ### Bugfixes +- [#1472](https://github.com/influxdata/telegraf/pull/1472): diskio input plugin: set 'skip_serial_number = true' by default to avoid high cardinality. - [#1426](https://github.com/influxdata/telegraf/pull/1426): nil metrics panic fix. - [#1384](https://github.com/influxdata/telegraf/pull/1384): Fix datarace in apache input plugin. - [#1399](https://github.com/influxdata/telegraf/issues/1399): Add `read_repairs` statistics to riak plugin. diff --git a/plugins/inputs/system/disk.go b/plugins/inputs/system/disk.go index 5784a7322..f79295294 100644 --- a/plugins/inputs/system/disk.go +++ b/plugins/inputs/system/disk.go @@ -92,8 +92,8 @@ var diskIoSampleConfig = ` ## disk partitions. ## Setting devices will restrict the stats to the specified devices. # devices = ["sda", "sdb"] - ## Uncomment the following line if you do not need disk serial numbers. - # skip_serial_number = true + ## Uncomment the following line if you need disk serial numbers. + # skip_serial_number = false ` func (_ *DiskIOStats) SampleConfig() string { @@ -151,6 +151,6 @@ func init() { }) inputs.Add("diskio", func() telegraf.Input { - return &DiskIOStats{ps: &systemPS{}} + return &DiskIOStats{ps: &systemPS{}, SkipSerialNumber: true} }) } From 6efe91ea9cece66e864b9e472f48811eaf61365a Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Sun, 10 Jul 2016 14:47:47 +0100 Subject: [PATCH 05/47] prometheus_client, implement Collector interface closes #1334 --- CHANGELOG.md | 1 + .../prometheus_client/prometheus_client.go | 70 ++++++++++--------- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b988508ae..9e4c9a968 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ should now look like: - [#1378](https://github.com/influxdata/telegraf/issues/1378): Trim BOM from config file for Windows support. - [#1339](https://github.com/influxdata/telegraf/issues/1339): Prometheus client output panic on service reload. - [#1461](https://github.com/influxdata/telegraf/pull/1461): Prometheus parser, protobuf format header fix. +- [#1334](https://github.com/influxdata/telegraf/issues/1334): Prometheus output, metric refresh and caching fixes. ## v1.0 beta 2 [2016-06-21] diff --git a/plugins/outputs/prometheus_client/prometheus_client.go b/plugins/outputs/prometheus_client/prometheus_client.go index 804ae1fad..790784a2b 100644 --- a/plugins/outputs/prometheus_client/prometheus_client.go +++ b/plugins/outputs/prometheus_client/prometheus_client.go @@ -6,6 +6,7 @@ import ( "net/http" "regexp" "strings" + "sync" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/outputs" @@ -26,6 +27,10 @@ var ( type PrometheusClient struct { Listen string + + metrics map[string]prometheus.Metric + + sync.Mutex } var sampleConfig = ` @@ -34,6 +39,7 @@ var sampleConfig = ` ` func (p *PrometheusClient) Start() error { + prometheus.MustRegister(p) defer func() { if r := recover(); r != nil { // recovering from panic here because there is no way to stop a @@ -78,7 +84,27 @@ func (p *PrometheusClient) Description() string { return "Configuration for the Prometheus client to spawn" } +// Implements prometheus.Collector +func (p *PrometheusClient) Describe(ch chan<- *prometheus.Desc) { + prometheus.NewGauge(prometheus.GaugeOpts{Name: "Dummy", Help: "Dummy"}).Describe(ch) +} + +// Implements prometheus.Collector +func (p *PrometheusClient) Collect(ch chan<- prometheus.Metric) { + p.Lock() + defer p.Unlock() + + for _, m := range p.metrics { + ch <- m + } +} + func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { + p.Lock() + defer p.Unlock() + + p.metrics = make(map[string]prometheus.Metric) + if len(metrics) == 0 { return nil } @@ -124,45 +150,23 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { continue } - mVec := prometheus.NewUntypedVec( - prometheus.UntypedOpts{ - Name: mname, - Help: "Telegraf collected metric", - }, - labels, - ) - collector, err := prometheus.RegisterOrGet(mVec) - if err != nil { - log.Printf("prometheus_client: Metric failed to register with prometheus, %s", err) - continue - } - mVec, ok := collector.(*prometheus.UntypedVec) - if !ok { - continue - } - + desc := prometheus.NewDesc(mname, "Telegraf collected metric", nil, l) + var metric prometheus.Metric + var err error switch val := val.(type) { case int64: - m, err := mVec.GetMetricWith(l) - if err != nil { - log.Printf("ERROR Getting metric in Prometheus output, "+ - "key: %s, labels: %v,\nerr: %s\n", - mname, l, err.Error()) - continue - } - m.Set(float64(val)) + metric, err = prometheus.NewConstMetric(desc, prometheus.UntypedValue, float64(val)) case float64: - m, err := mVec.GetMetricWith(l) - if err != nil { - log.Printf("ERROR Getting metric in Prometheus output, "+ - "key: %s, labels: %v,\nerr: %s\n", - mname, l, err.Error()) - continue - } - m.Set(val) + metric, err = prometheus.NewConstMetric(desc, prometheus.UntypedValue, val) default: continue } + if err != nil { + log.Printf("ERROR creating prometheus metric, "+ + "key: %s, labels: %v,\nerr: %s\n", + mname, l, err.Error()) + } + p.metrics[desc.String()] = metric } } return nil From bb4f18ca887bd4ed66c11a6c01f1768be41a5b22 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 14 Jul 2016 08:52:37 -0600 Subject: [PATCH 06/47] temp ci fix, aerospike changed their metrics see http://www.aerospike.com/docs/operations/upgrade/stats_to_3_9 TODO change aerospike input plugin to use official go client library. --- Makefile | 4 ++-- plugins/inputs/aerospike/aerospike_test.go | 13 ------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 816c93cf1..6d4f8c35e 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ docker-run: docker run --name postgres -p "5432:5432" -d postgres docker run --name rabbitmq -p "15672:15672" -p "5672:5672" -d rabbitmq:3-management docker run --name redis -p "6379:6379" -d redis - docker run --name aerospike -p "3000:3000" -d aerospike + docker run --name aerospike -p "3000:3000" -d aerospike/aerospike-server docker run --name nsq -p "4150:4150" -d nsqio/nsq /nsqd docker run --name mqtt -p "1883:1883" -d ncarlier/mqtt docker run --name riemann -p "5555:5555" -d blalor/riemann @@ -68,7 +68,7 @@ docker-run-circle: -e ADVERTISED_PORT=9092 \ -p "2181:2181" -p "9092:9092" \ -d spotify/kafka - docker run --name aerospike -p "3000:3000" -d aerospike + docker run --name aerospike -p "3000:3000" -d aerospike/aerospike-server docker run --name nsq -p "4150:4150" -d nsqio/nsq /nsqd docker run --name mqtt -p "1883:1883" -d ncarlier/mqtt docker run --name riemann -p "5555:5555" -d blalor/riemann diff --git a/plugins/inputs/aerospike/aerospike_test.go b/plugins/inputs/aerospike/aerospike_test.go index 74b70eb1d..2717a15b9 100644 --- a/plugins/inputs/aerospike/aerospike_test.go +++ b/plugins/inputs/aerospike/aerospike_test.go @@ -22,19 +22,6 @@ func TestAerospikeStatistics(t *testing.T) { err := a.Gather(&acc) require.NoError(t, err) - - // Only use a few of the metrics - asMetrics := []string{ - "transactions", - "stat_write_errs", - "stat_read_reqs", - "stat_write_reqs", - } - - for _, metric := range asMetrics { - assert.True(t, acc.HasIntField("aerospike", metric), metric) - } - } func TestAerospikeMsgLenFromToBytes(t *testing.T) { From 7b550c11cb2aee6ec91bce50c32ebff41e25a737 Mon Sep 17 00:00:00 2001 From: Kostas Botsas Date: Thu, 14 Jul 2016 18:06:00 +0300 Subject: [PATCH 07/47] Documentation for load balancing on graphite output servers (#1469) * Added documentation for load balancing on graphite output servers * clarifications * updates1 * updates2 * updates3 --- etc/telegraf.conf | 2 ++ plugins/outputs/graphite/README.md | 2 ++ plugins/outputs/graphite/graphite.go | 2 ++ 3 files changed, 6 insertions(+) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index c9011536a..10e949302 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -197,6 +197,8 @@ # # Configuration for Graphite server to send metrics to # [[outputs.graphite]] # ## TCP endpoint for your graphite instance. +# ## If multiple endpoints are configured, the output will be load balanced. +# ## Only one of the endpoints will be written to with each iteration. # servers = ["localhost:2003"] # ## Prefix metrics name # prefix = "" diff --git a/plugins/outputs/graphite/README.md b/plugins/outputs/graphite/README.md index 2de699dea..3e2369e21 100644 --- a/plugins/outputs/graphite/README.md +++ b/plugins/outputs/graphite/README.md @@ -9,6 +9,8 @@ via raw TCP. # Configuration for Graphite server to send metrics to [[outputs.graphite]] ## TCP endpoint for your graphite instance. + ## If multiple endpoints are configured, the output will be load balanced. + ## Only one of the endpoints will be written to with each iteration. servers = ["localhost:2003"] ## Prefix metrics name prefix = "" diff --git a/plugins/outputs/graphite/graphite.go b/plugins/outputs/graphite/graphite.go index 30aee0eb6..4e127ed7c 100644 --- a/plugins/outputs/graphite/graphite.go +++ b/plugins/outputs/graphite/graphite.go @@ -25,6 +25,8 @@ type Graphite struct { var sampleConfig = ` ## TCP endpoint for your graphite instance. + ## If multiple endpoints are configured, output will be load balanced. + ## Only one of the endpoints will be written to with each iteration. servers = ["localhost:2003"] ## Prefix metrics name prefix = "" From 69ab8a645c5aceddc48d3882c3db769071fe8ce0 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 12 Jul 2016 14:44:11 -0600 Subject: [PATCH 08/47] graphite output: set write deadline on TCP connection --- plugins/outputs/graphite/graphite.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/plugins/outputs/graphite/graphite.go b/plugins/outputs/graphite/graphite.go index 4e127ed7c..fb95aff83 100644 --- a/plugins/outputs/graphite/graphite.go +++ b/plugins/outputs/graphite/graphite.go @@ -2,7 +2,6 @@ package graphite import ( "errors" - "fmt" "log" "math/rand" "net" @@ -98,9 +97,12 @@ func (g *Graphite) Write(metrics []telegraf.Metric) error { // Send data to a random server p := rand.Perm(len(g.conns)) for _, n := range p { - if _, e := fmt.Fprint(g.conns[n], graphitePoints); e != nil { + if g.Timeout > 0 { + g.conns[n].SetWriteDeadline(time.Now().Add(time.Duration(g.Timeout) * time.Second)) + } + if _, e := g.conns[n].Write([]byte(graphitePoints)); e != nil { // Error - log.Println("ERROR: " + err.Error()) + log.Println("ERROR: " + e.Error()) // Let's try the next one } else { // Success From 7c9b312cee6228c7e7af4e9fa4b86b179f99d444 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 12 Jul 2016 15:31:08 -0600 Subject: [PATCH 09/47] Make race detector build in CI --- plugins/serializers/graphite/graphite.go | 7 ++++--- scripts/circle-test.sh | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/plugins/serializers/graphite/graphite.go b/plugins/serializers/graphite/graphite.go index 43e32c244..db114ce9d 100644 --- a/plugins/serializers/graphite/graphite.go +++ b/plugins/serializers/graphite/graphite.go @@ -55,8 +55,9 @@ func (s *GraphiteSerializer) SerializeBucketName( measurement string, tags map[string]string, ) string { - if s.Template == "" { - s.Template = DEFAULT_TEMPLATE + template := s.Template + if template == "" { + template = DEFAULT_TEMPLATE } tagsCopy := make(map[string]string) for k, v := range tags { @@ -64,7 +65,7 @@ func (s *GraphiteSerializer) SerializeBucketName( } var out []string - templateParts := strings.Split(s.Template, ".") + templateParts := strings.Split(template, ".") for _, templatePart := range templateParts { switch templatePart { case "measurement": diff --git a/scripts/circle-test.sh b/scripts/circle-test.sh index 2333b5b73..93bafe320 100755 --- a/scripts/circle-test.sh +++ b/scripts/circle-test.sh @@ -69,6 +69,8 @@ exit_if_fail telegraf -config $tmpdir/config.toml \ -test -input-filter cpu:mem cat $GOPATH/bin/telegraf | gzip > $CIRCLE_ARTIFACTS/telegraf.gz +go build -o telegraf-race -race -ldflags "-X main.version=${VERSION}-RACE" cmd/telegraf/telegraf.go +cat telegraf-race | gzip > $CIRCLE_ARTIFACTS/telegraf-race.gz eval "git describe --exact-match HEAD" if [ $? -eq 0 ]; then From 821d3fafa6562acce148b1e08c3c0b310b6f0639 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 12 Jul 2016 17:08:03 -0600 Subject: [PATCH 10/47] Refactor SerializeBucketName to be read-only for struct fields --- plugins/outputs/librato/librato.go | 3 +-- plugins/serializers/graphite/graphite.go | 20 +++++++------- plugins/serializers/graphite/graphite_test.go | 27 +++++++------------ 3 files changed, 21 insertions(+), 29 deletions(-) diff --git a/plugins/outputs/librato/librato.go b/plugins/outputs/librato/librato.go index 15d6adbb2..ccb2acd9a 100644 --- a/plugins/outputs/librato/librato.go +++ b/plugins/outputs/librato/librato.go @@ -153,8 +153,7 @@ func (l *Librato) Description() string { func (l *Librato) buildGauges(m telegraf.Metric) ([]*Gauge, error) { gauges := []*Gauge{} - serializer := graphite.GraphiteSerializer{Template: l.Template} - bucket := serializer.SerializeBucketName(m.Name(), m.Tags()) + bucket := graphite.SerializeBucketName(m.Name(), m.Tags(), l.Template, "") for fieldName, value := range m.Fields() { gauge := &Gauge{ Name: graphite.InsertField(bucket, fieldName), diff --git a/plugins/serializers/graphite/graphite.go b/plugins/serializers/graphite/graphite.go index db114ce9d..6e5c4e879 100644 --- a/plugins/serializers/graphite/graphite.go +++ b/plugins/serializers/graphite/graphite.go @@ -10,22 +10,23 @@ import ( const DEFAULT_TEMPLATE = "host.tags.measurement.field" -var fieldDeleter = strings.NewReplacer(".FIELDNAME", "", "FIELDNAME.", "") +var ( + fieldDeleter = strings.NewReplacer(".FIELDNAME", "", "FIELDNAME.", "") + sanitizedChars = strings.NewReplacer("/", "-", "@", "-", "*", "-", " ", "_", "..", ".") +) type GraphiteSerializer struct { Prefix string Template string } -var sanitizedChars = strings.NewReplacer("/", "-", "@", "-", "*", "-", " ", "_", "..", ".") - -func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]string, error) { +func (s GraphiteSerializer) Serialize(metric telegraf.Metric) ([]string, error) { out := []string{} // Convert UnixNano to Unix timestamps timestamp := metric.UnixNano() / 1000000000 - bucket := s.SerializeBucketName(metric.Name(), metric.Tags()) + bucket := SerializeBucketName(metric.Name(), metric.Tags(), s.Template, s.Prefix) if bucket == "" { return out, nil } @@ -51,11 +52,12 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]string, error) // FIELDNAME. It is up to the user to replace this. This is so that // SerializeBucketName can be called just once per measurement, rather than // once per field. See GraphiteSerializer.InsertField() function. -func (s *GraphiteSerializer) SerializeBucketName( +func SerializeBucketName( measurement string, tags map[string]string, + template string, + prefix string, ) string { - template := s.Template if template == "" { template = DEFAULT_TEMPLATE } @@ -97,10 +99,10 @@ func (s *GraphiteSerializer) SerializeBucketName( return "" } - if s.Prefix == "" { + if prefix == "" { return sanitizedChars.Replace(strings.Join(out, ".")) } - return sanitizedChars.Replace(s.Prefix + "." + strings.Join(out, ".")) + return sanitizedChars.Replace(prefix + "." + strings.Join(out, ".")) } // InsertField takes the bucket string from SerializeBucketName and replaces the diff --git a/plugins/serializers/graphite/graphite_test.go b/plugins/serializers/graphite/graphite_test.go index 64c65d16b..50ba0e2e0 100644 --- a/plugins/serializers/graphite/graphite_test.go +++ b/plugins/serializers/graphite/graphite_test.go @@ -225,8 +225,7 @@ func TestSerializeBucketNameNoHost(t *testing.T) { m, err := telegraf.NewMetric("cpu", tags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), "", "") expS := "cpu0.us-west-2.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -240,8 +239,7 @@ func TestSerializeBucketNameHost(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), "", "") expS := "localhost.cpu0.us-west-2.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -255,8 +253,7 @@ func TestSerializeBucketNamePrefix(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Prefix: "prefix"} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), "", "prefix") expS := "prefix.localhost.cpu0.us-west-2.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -270,8 +267,7 @@ func TestTemplate1(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template1} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template1, "") expS := "cpu0.us-west-2.localhost.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -285,8 +281,7 @@ func TestTemplate2(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template2} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template2, "") expS := "localhost.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -300,8 +295,7 @@ func TestTemplate3(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template3} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template3, "") expS := "localhost.cpu0.us-west-2.FIELDNAME" assert.Equal(t, expS, mS) @@ -315,8 +309,7 @@ func TestTemplate4(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template4} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template4, "") expS := "localhost.cpu0.us-west-2.cpu" assert.Equal(t, expS, mS) @@ -330,8 +323,7 @@ func TestTemplate5(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template5} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template5, "") expS := "localhost.us-west-2.cpu0.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -345,8 +337,7 @@ func TestTemplate6(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template6} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template6, "") expS := "localhost.cpu0.us-west-2.cpu.FIELDNAME" assert.Equal(t, expS, mS) From bfdd665435a1e7f987a0b2d00bfbf972012e7a92 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 13 Jul 2016 08:14:48 -0600 Subject: [PATCH 11/47] Copy metrics for each configured output This is for better thread-safety when running with multiple outputs, which can cause very odd panics at very high loads primarily this is to address #1432 closes #1432 --- CHANGELOG.md | 1 + Makefile | 4 ---- agent/agent.go | 24 +++++++++++++++++++-- plugins/inputs/tcp_listener/tcp_listener.go | 11 ++++++++-- plugins/inputs/udp_listener/udp_listener.go | 10 ++++++++- plugins/serializers/graphite/graphite.go | 2 +- 6 files changed, 42 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e4c9a968..d206a7d54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ should now look like: - [#1339](https://github.com/influxdata/telegraf/issues/1339): Prometheus client output panic on service reload. - [#1461](https://github.com/influxdata/telegraf/pull/1461): Prometheus parser, protobuf format header fix. - [#1334](https://github.com/influxdata/telegraf/issues/1334): Prometheus output, metric refresh and caching fixes. +- [#1432](https://github.com/influxdata/telegraf/issues/1432): Panic fix for multiple graphite outputs under very high load. ## v1.0 beta 2 [2016-06-21] diff --git a/Makefile b/Makefile index 6d4f8c35e..ee96e10bd 100644 --- a/Makefile +++ b/Makefile @@ -25,10 +25,6 @@ build-for-docker: "-s -X main.version=$(VERSION)" \ ./cmd/telegraf/telegraf.go -# Build with race detector -dev: prepare - go build -race -ldflags "-X main.version=$(VERSION)" ./... - # run package script package: ./scripts/build.py --package --version="$(VERSION)" --platform=linux --arch=all --upload diff --git a/agent/agent.go b/agent/agent.go index d1d36186e..ae520b89e 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -268,13 +268,33 @@ func (a *Agent) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) er internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown) a.flush() case m := <-metricC: - for _, o := range a.Config.Outputs { - o.AddMetric(m) + for i, o := range a.Config.Outputs { + if i == len(a.Config.Outputs)-1 { + o.AddMetric(m) + } else { + o.AddMetric(copyMetric(m)) + } } } } } +func copyMetric(m telegraf.Metric) telegraf.Metric { + t := time.Time(m.Time()) + + tags := make(map[string]string) + fields := make(map[string]interface{}) + for k, v := range m.Tags() { + tags[k] = v + } + for k, v := range m.Fields() { + fields[k] = v + } + + out, _ := telegraf.NewMetric(m.Name(), tags, fields, t) + return out +} + // Run runs the agent daemon, gathering every Interval func (a *Agent) Run(shutdown chan struct{}) error { var wg sync.WaitGroup diff --git a/plugins/inputs/tcp_listener/tcp_listener.go b/plugins/inputs/tcp_listener/tcp_listener.go index 053fc927e..4688e008b 100644 --- a/plugins/inputs/tcp_listener/tcp_listener.go +++ b/plugins/inputs/tcp_listener/tcp_listener.go @@ -31,6 +31,8 @@ type TcpListener struct { accept chan bool // drops tracks the number of dropped metrics. drops int + // malformed tracks the number of malformed packets + malformed int // track the listener here so we can close it in Stop() listener *net.TCPListener @@ -45,6 +47,9 @@ var dropwarn = "ERROR: tcp_listener message queue full. " + "We have dropped %d messages so far. " + "You may want to increase allowed_pending_messages in the config\n" +var malformedwarn = "WARNING: tcp_listener has received %d malformed packets" + + " thus far." + const sampleConfig = ` ## Address and port to host TCP listener on service_address = ":8094" @@ -243,8 +248,10 @@ func (t *TcpListener) tcpParser() error { if err == nil { t.storeMetrics(metrics) } else { - log.Printf("Malformed packet: [%s], Error: %s\n", - string(packet), err) + t.malformed++ + if t.malformed == 1 || t.malformed%1000 == 0 { + log.Printf(malformedwarn, t.malformed) + } } } } diff --git a/plugins/inputs/udp_listener/udp_listener.go b/plugins/inputs/udp_listener/udp_listener.go index a20a5583f..120ee50e5 100644 --- a/plugins/inputs/udp_listener/udp_listener.go +++ b/plugins/inputs/udp_listener/udp_listener.go @@ -27,6 +27,8 @@ type UdpListener struct { done chan struct{} // drops tracks the number of dropped metrics. drops int + // malformed tracks the number of malformed packets + malformed int parser parsers.Parser @@ -44,6 +46,9 @@ var dropwarn = "ERROR: udp_listener message queue full. " + "We have dropped %d messages so far. " + "You may want to increase allowed_pending_messages in the config\n" +var malformedwarn = "WARNING: udp_listener has received %d malformed packets" + + " thus far." + const sampleConfig = ` ## Address and port to host UDP listener on service_address = ":8092" @@ -152,7 +157,10 @@ func (u *UdpListener) udpParser() error { if err == nil { u.storeMetrics(metrics) } else { - log.Printf("Malformed packet: [%s], Error: %s\n", packet, err) + u.malformed++ + if u.malformed == 1 || u.malformed%1000 == 0 { + log.Printf(malformedwarn, u.malformed) + } } } } diff --git a/plugins/serializers/graphite/graphite.go b/plugins/serializers/graphite/graphite.go index 6e5c4e879..2cc4add56 100644 --- a/plugins/serializers/graphite/graphite.go +++ b/plugins/serializers/graphite/graphite.go @@ -20,7 +20,7 @@ type GraphiteSerializer struct { Template string } -func (s GraphiteSerializer) Serialize(metric telegraf.Metric) ([]string, error) { +func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]string, error) { out := []string{} // Convert UnixNano to Unix timestamps From 97d92bba67301c0e0758894cb7ce41b9774170f5 Mon Sep 17 00:00:00 2001 From: Andrei Burd Date: Thu, 14 Jul 2016 20:28:36 +0100 Subject: [PATCH 12/47] Redis input enhancement (#1387) master_last_io_seconds_ago added role tag renamed to replication_role --- CHANGELOG.md | 1 + plugins/inputs/redis/README.md | 2 ++ plugins/inputs/redis/redis.go | 5 +++-- plugins/inputs/redis/redis_test.go | 4 ++-- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d206a7d54..d62675803 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ should now look like: - [#1402](https://github.com/influxdata/telegraf/pull/1402): docker-machine/boot2docker no longer required for unit tests. - [#1350](https://github.com/influxdata/telegraf/pull/1350): cgroup input plugin. - [#1369](https://github.com/influxdata/telegraf/pull/1369): Add input plugin for consuming metrics from NSQD. +- [#1387](https://github.com/influxdata/telegraf/pull/1387): **Breaking Change** - Redis `role` tag renamed to `replication_role` to avoid global_tags override ### Bugfixes diff --git a/plugins/inputs/redis/README.md b/plugins/inputs/redis/README.md index 1cbaea0ca..51b596aa0 100644 --- a/plugins/inputs/redis/README.md +++ b/plugins/inputs/redis/README.md @@ -43,6 +43,7 @@ - latest_fork_usec - connected_slaves - master_repl_offset + - master_last_io_seconds_ago - repl_backlog_active - repl_backlog_size - repl_backlog_histlen @@ -57,6 +58,7 @@ - All measurements have the following tags: - port - server + - replication role ### Example Output: diff --git a/plugins/inputs/redis/redis.go b/plugins/inputs/redis/redis.go index 94f562471..76cbc89cb 100644 --- a/plugins/inputs/redis/redis.go +++ b/plugins/inputs/redis/redis.go @@ -66,6 +66,7 @@ var Tracking = map[string]string{ "latest_fork_usec": "latest_fork_usec", "connected_slaves": "connected_slaves", "master_repl_offset": "master_repl_offset", + "master_last_io_seconds_ago": "master_last_io_seconds_ago", "repl_backlog_active": "repl_backlog_active", "repl_backlog_size": "repl_backlog_size", "repl_backlog_histlen": "repl_backlog_histlen", @@ -74,7 +75,7 @@ var Tracking = map[string]string{ "used_cpu_user": "used_cpu_user", "used_cpu_sys_children": "used_cpu_sys_children", "used_cpu_user_children": "used_cpu_user_children", - "role": "role", + "role": "replication_role", } var ErrProtocolError = errors.New("redis protocol error") @@ -208,7 +209,7 @@ func gatherInfoOutput( } if name == "role" { - tags["role"] = val + tags["replication_role"] = val continue } diff --git a/plugins/inputs/redis/redis_test.go b/plugins/inputs/redis/redis_test.go index b12950ee4..2e2fc1e37 100644 --- a/plugins/inputs/redis/redis_test.go +++ b/plugins/inputs/redis/redis_test.go @@ -35,7 +35,7 @@ func TestRedis_ParseMetrics(t *testing.T) { err := gatherInfoOutput(rdr, &acc, tags) require.NoError(t, err) - tags = map[string]string{"host": "redis.net", "role": "master"} + tags = map[string]string{"host": "redis.net", "replication_role": "master"} fields := map[string]interface{}{ "uptime": uint64(238), "clients": uint64(1), @@ -71,7 +71,7 @@ func TestRedis_ParseMetrics(t *testing.T) { "used_cpu_user_children": float64(0.00), "keyspace_hitrate": float64(0.50), } - keyspaceTags := map[string]string{"host": "redis.net", "role": "master", "database": "db0"} + keyspaceTags := map[string]string{"host": "redis.net", "replication_role": "master", "database": "db0"} keyspaceFields := map[string]interface{}{ "avg_ttl": uint64(0), "expires": uint64(0), From 53f40063b31fd9ef3d92e7fc22e821d0f71ac46d Mon Sep 17 00:00:00 2001 From: Sebastian Borza Date: Thu, 14 Jul 2016 15:18:55 -0500 Subject: [PATCH 13/47] Moving cgroup path name to field from tag to reduce cardinality (#1457) adding assertContainsFields function to cgroup_test for custom validation --- plugins/inputs/cgroup/README.md | 5 +- plugins/inputs/cgroup/cgroup_linux.go | 5 +- plugins/inputs/cgroup/cgroup_test.go | 84 +++++++++++++++------------ 3 files changed, 53 insertions(+), 41 deletions(-) diff --git a/plugins/inputs/cgroup/README.md b/plugins/inputs/cgroup/README.md index ab06342bf..feb332dd9 100644 --- a/plugins/inputs/cgroup/README.md +++ b/plugins/inputs/cgroup/README.md @@ -33,8 +33,9 @@ KEY1 VAL1\n ### Tags: -All measurements have the following tags: - - path +Measurements don't have any specific tags unless you define them at the telegraf level (defaults). We +used to have the path listed as a tag, but to keep cardinality in check it's easier to move this +value to a field. Thanks @sebito91! ### Configuration: diff --git a/plugins/inputs/cgroup/cgroup_linux.go b/plugins/inputs/cgroup/cgroup_linux.go index e8ba6f881..ecaf8126d 100644 --- a/plugins/inputs/cgroup/cgroup_linux.go +++ b/plugins/inputs/cgroup/cgroup_linux.go @@ -56,10 +56,9 @@ func (g *CGroup) gatherDir(dir string, acc telegraf.Accumulator) error { return err } } + fields["path"] = dir - tags := map[string]string{"path": dir} - - acc.AddFields(metricName, fields, tags) + acc.AddFields(metricName, fields, nil) return nil } diff --git a/plugins/inputs/cgroup/cgroup_test.go b/plugins/inputs/cgroup/cgroup_test.go index 206b51f6d..ff9b8d7a8 100644 --- a/plugins/inputs/cgroup/cgroup_test.go +++ b/plugins/inputs/cgroup/cgroup_test.go @@ -3,10 +3,13 @@ package cgroup import ( + "fmt" "testing" "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "reflect" ) var cg1 = &CGroup{ @@ -21,15 +24,32 @@ var cg1 = &CGroup{ }, } +func assertContainsFields(a *testutil.Accumulator, t *testing.T, measurement string, fieldSet []map[string]interface{}) { + a.Lock() + defer a.Unlock() + + numEquals := 0 + for _, p := range a.Metrics { + if p.Measurement == measurement { + for _, fields := range fieldSet { + if reflect.DeepEqual(fields, p.Fields) { + numEquals++ + } + } + } + } + + if numEquals != len(fieldSet) { + assert.Fail(t, fmt.Sprintf("only %d of %d are equal", numEquals, len(fieldSet))) + } +} + func TestCgroupStatistics_1(t *testing.T) { var acc testutil.Accumulator err := cg1.Gather(&acc) require.NoError(t, err) - tags := map[string]string{ - "path": "testdata/memory", - } fields := map[string]interface{}{ "memory.stat.cache": 1739362304123123123, "memory.stat.rss": 1775325184, @@ -42,8 +62,9 @@ func TestCgroupStatistics_1(t *testing.T) { "memory.limit_in_bytes": 223372036854771712, "memory.use_hierarchy": "12-781", "notify_on_release": 0, + "path": "testdata/memory", } - acc.AssertContainsTaggedFields(t, "cgroup", fields, tags) + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields}) } // ====================================================================== @@ -59,16 +80,14 @@ func TestCgroupStatistics_2(t *testing.T) { err := cg2.Gather(&acc) require.NoError(t, err) - tags := map[string]string{ - "path": "testdata/cpu", - } fields := map[string]interface{}{ "cpuacct.usage_percpu.0": -1452543795404, "cpuacct.usage_percpu.1": 1376681271659, "cpuacct.usage_percpu.2": 1450950799997, "cpuacct.usage_percpu.3": -1473113374257, + "path": "testdata/cpu", } - acc.AssertContainsTaggedFields(t, "cgroup", fields, tags) + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields}) } // ====================================================================== @@ -84,18 +103,16 @@ func TestCgroupStatistics_3(t *testing.T) { err := cg3.Gather(&acc) require.NoError(t, err) - tags := map[string]string{ - "path": "testdata/memory/group_1", - } fields := map[string]interface{}{ "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_1", } - acc.AssertContainsTaggedFields(t, "cgroup", fields, tags) - tags = map[string]string{ - "path": "testdata/memory/group_2", + fieldsTwo := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_2", } - acc.AssertContainsTaggedFields(t, "cgroup", fields, tags) + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields, fieldsTwo}) } // ====================================================================== @@ -111,23 +128,22 @@ func TestCgroupStatistics_4(t *testing.T) { err := cg4.Gather(&acc) require.NoError(t, err) - tags := map[string]string{ - "path": "testdata/memory/group_1/group_1_1", - } fields := map[string]interface{}{ "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_1/group_1_1", } - acc.AssertContainsTaggedFields(t, "cgroup", fields, tags) - tags = map[string]string{ - "path": "testdata/memory/group_1/group_1_2", + fieldsTwo := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_1/group_1_2", } - acc.AssertContainsTaggedFields(t, "cgroup", fields, tags) - tags = map[string]string{ - "path": "testdata/memory/group_2", + fieldsThree := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_2", } - acc.AssertContainsTaggedFields(t, "cgroup", fields, tags) + + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields, fieldsTwo, fieldsThree}) } // ====================================================================== @@ -143,18 +159,16 @@ func TestCgroupStatistics_5(t *testing.T) { err := cg5.Gather(&acc) require.NoError(t, err) - tags := map[string]string{ - "path": "testdata/memory/group_1/group_1_1", - } fields := map[string]interface{}{ "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_1/group_1_1", } - acc.AssertContainsTaggedFields(t, "cgroup", fields, tags) - tags = map[string]string{ - "path": "testdata/memory/group_2/group_1_1", + fieldsTwo := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_2/group_1_1", } - acc.AssertContainsTaggedFields(t, "cgroup", fields, tags) + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields, fieldsTwo}) } // ====================================================================== @@ -170,13 +184,11 @@ func TestCgroupStatistics_6(t *testing.T) { err := cg6.Gather(&acc) require.NoError(t, err) - tags := map[string]string{ - "path": "testdata/memory", - } fields := map[string]interface{}{ "memory.usage_in_bytes": 3513667584, "memory.use_hierarchy": "12-781", "memory.kmem.limit_in_bytes": 9223372036854771712, + "path": "testdata/memory", } - acc.AssertContainsTaggedFields(t, "cgroup", fields, tags) + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields}) } From 4651ab88ad45b55162b51091f9bfe073ce369e37 Mon Sep 17 00:00:00 2001 From: Shashank Sahni Date: Fri, 1 Jul 2016 13:31:14 -0700 Subject: [PATCH 14/47] Fetching galera status metrics in MySQL These are useful for Percona Xtradb cluster. closes #1437 --- CHANGELOG.md | 1 + plugins/inputs/mysql/mysql.go | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d62675803..b1daa60ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ should now look like: - [#1350](https://github.com/influxdata/telegraf/pull/1350): cgroup input plugin. - [#1369](https://github.com/influxdata/telegraf/pull/1369): Add input plugin for consuming metrics from NSQD. - [#1387](https://github.com/influxdata/telegraf/pull/1387): **Breaking Change** - Redis `role` tag renamed to `replication_role` to avoid global_tags override +- [#1437](https://github.com/influxdata/telegraf/pull/1437): Fetching Galera status metrics in MySQL ### Bugfixes diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index b8ff3945a..5011e82b9 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -306,6 +306,10 @@ var mappings = []*mapping{ onServer: "Threadpool_", inExport: "threadpool_", }, + { + onServer: "wsrep_", + inExport: "wsrep_", + }, } var ( From 21add2c79995eb2297f020abb2d59872c7c3047e Mon Sep 17 00:00:00 2001 From: Joel Meador Date: Tue, 21 Jun 2016 16:28:31 -0400 Subject: [PATCH 15/47] instrumental plugin, rewrite connection retries closes #1412 separate hello and authenticate functions, force connection close at end of write cycle so we don't hold open idle connections, which has the benefit of mostly removing the chance of getting hopelessly connection lost bump instrumental agent version fix test to deal with better better connect/reconnect logic and changed ident & auth handshake Update CHANGELOG.md correct URL from instrumental fork to origin and put the change in the correct part of the file go fmt undo split hello and auth commands, to reduce roundtrips --- CHANGELOG.md | 1 + plugins/outputs/instrumental/instrumental.go | 14 +++++++++++--- plugins/outputs/instrumental/instrumental_test.go | 10 ++-------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1daa60ac..da4cbf5cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ should now look like: - [#1461](https://github.com/influxdata/telegraf/pull/1461): Prometheus parser, protobuf format header fix. - [#1334](https://github.com/influxdata/telegraf/issues/1334): Prometheus output, metric refresh and caching fixes. - [#1432](https://github.com/influxdata/telegraf/issues/1432): Panic fix for multiple graphite outputs under very high load. +- [#1412](https://github.com/influxdata/telegraf/pull/1412): Instrumental output has better reconnect behavior ## v1.0 beta 2 [2016-06-21] diff --git a/plugins/outputs/instrumental/instrumental.go b/plugins/outputs/instrumental/instrumental.go index 461ba9d9e..2fcc28cc0 100644 --- a/plugins/outputs/instrumental/instrumental.go +++ b/plugins/outputs/instrumental/instrumental.go @@ -28,8 +28,10 @@ type Instrumental struct { } const ( - DefaultHost = "collector.instrumentalapp.com" - AuthFormat = "hello version go/telegraf/1.0\nauthenticate %s\n" + DefaultHost = "collector.instrumentalapp.com" + HelloMessage = "hello version go/telegraf/1.1\n" + AuthFormat = "authenticate %s\n" + HandshakeFormat = HelloMessage + AuthFormat ) var ( @@ -52,6 +54,7 @@ var sampleConfig = ` func (i *Instrumental) Connect() error { connection, err := net.DialTimeout("tcp", i.Host+":8000", i.Timeout.Duration) + if err != nil { i.conn = nil return err @@ -151,6 +154,11 @@ func (i *Instrumental) Write(metrics []telegraf.Metric) error { return err } + // force the connection closed after sending data + // to deal with various disconnection scenarios and eschew holding + // open idle connections en masse + i.Close() + return nil } @@ -163,7 +171,7 @@ func (i *Instrumental) SampleConfig() string { } func (i *Instrumental) authenticate(conn net.Conn) error { - _, err := fmt.Fprintf(conn, AuthFormat, i.ApiToken) + _, err := fmt.Fprintf(conn, HandshakeFormat, i.ApiToken) if err != nil { return err } diff --git a/plugins/outputs/instrumental/instrumental_test.go b/plugins/outputs/instrumental/instrumental_test.go index ceb53bac6..9708a2590 100644 --- a/plugins/outputs/instrumental/instrumental_test.go +++ b/plugins/outputs/instrumental/instrumental_test.go @@ -24,7 +24,6 @@ func TestWrite(t *testing.T) { ApiToken: "abc123token", Prefix: "my.prefix", } - i.Connect() // Default to gauge m1, _ := telegraf.NewMetric( @@ -40,10 +39,8 @@ func TestWrite(t *testing.T) { time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), ) - // Simulate a connection close and reconnect. metrics := []telegraf.Metric{m1, m2} i.Write(metrics) - i.Close() // Counter and Histogram are increments m3, _ := telegraf.NewMetric( @@ -70,7 +67,6 @@ func TestWrite(t *testing.T) { i.Write(metrics) wg.Wait() - i.Close() } func TCPServer(t *testing.T, wg *sync.WaitGroup) { @@ -82,10 +78,9 @@ func TCPServer(t *testing.T, wg *sync.WaitGroup) { tp := textproto.NewReader(reader) hello, _ := tp.ReadLine() - assert.Equal(t, "hello version go/telegraf/1.0", hello) + assert.Equal(t, "hello version go/telegraf/1.1", hello) auth, _ := tp.ReadLine() assert.Equal(t, "authenticate abc123token", auth) - conn.Write([]byte("ok\nok\n")) data1, _ := tp.ReadLine() @@ -99,10 +94,9 @@ func TCPServer(t *testing.T, wg *sync.WaitGroup) { tp = textproto.NewReader(reader) hello, _ = tp.ReadLine() - assert.Equal(t, "hello version go/telegraf/1.0", hello) + assert.Equal(t, "hello version go/telegraf/1.1", hello) auth, _ = tp.ReadLine() assert.Equal(t, "authenticate abc123token", auth) - conn.Write([]byte("ok\nok\n")) data3, _ := tp.ReadLine() From d5e743934380aa3e95e96b0e2a1b7f0c2b4fda4e Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 13 Jul 2016 18:49:17 -0600 Subject: [PATCH 16/47] procstat plugin: store PID as a field closes #1460 --- CHANGELOG.md | 1 + plugins/inputs/procstat/procstat.go | 6 +----- plugins/inputs/procstat/spec_processor.go | 5 ++++- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da4cbf5cc..eda9f2f63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ should now look like: - [#1334](https://github.com/influxdata/telegraf/issues/1334): Prometheus output, metric refresh and caching fixes. - [#1432](https://github.com/influxdata/telegraf/issues/1432): Panic fix for multiple graphite outputs under very high load. - [#1412](https://github.com/influxdata/telegraf/pull/1412): Instrumental output has better reconnect behavior +- [#1460](https://github.com/influxdata/telegraf/issues/1460): Remove PID from procstat plugin to fix cardinality issues. ## v1.0 beta 2 [2016-06-21] diff --git a/plugins/inputs/procstat/procstat.go b/plugins/inputs/procstat/procstat.go index 3b9f0f76c..358dc4c0f 100644 --- a/plugins/inputs/procstat/procstat.go +++ b/plugins/inputs/procstat/procstat.go @@ -70,7 +70,7 @@ func (p *Procstat) Gather(acc telegraf.Accumulator) error { p.Exe, p.PidFile, p.Pattern, p.User, err.Error()) } else { for pid, proc := range p.pidmap { - p := NewSpecProcessor(p.ProcessName, p.Prefix, acc, proc, p.tagmap[pid]) + p := NewSpecProcessor(p.ProcessName, p.Prefix, pid, acc, proc, p.tagmap[pid]) p.pushMetrics() } } @@ -140,7 +140,6 @@ func (p *Procstat) pidsFromFile() ([]int32, error) { out = append(out, int32(pid)) p.tagmap[int32(pid)] = map[string]string{ "pidfile": p.PidFile, - "pid": strings.TrimSpace(string(pidString)), } } } @@ -165,7 +164,6 @@ func (p *Procstat) pidsFromExe() ([]int32, error) { out = append(out, int32(ipid)) p.tagmap[int32(ipid)] = map[string]string{ "exe": p.Exe, - "pid": pid, } } else { outerr = err @@ -193,7 +191,6 @@ func (p *Procstat) pidsFromPattern() ([]int32, error) { out = append(out, int32(ipid)) p.tagmap[int32(ipid)] = map[string]string{ "pattern": p.Pattern, - "pid": pid, } } else { outerr = err @@ -221,7 +218,6 @@ func (p *Procstat) pidsFromUser() ([]int32, error) { out = append(out, int32(ipid)) p.tagmap[int32(ipid)] = map[string]string{ "user": p.User, - "pid": pid, } } else { outerr = err diff --git a/plugins/inputs/procstat/spec_processor.go b/plugins/inputs/procstat/spec_processor.go index 0e73b60e9..3789e99d0 100644 --- a/plugins/inputs/procstat/spec_processor.go +++ b/plugins/inputs/procstat/spec_processor.go @@ -10,6 +10,7 @@ import ( type SpecProcessor struct { Prefix string + pid int32 tags map[string]string fields map[string]interface{} acc telegraf.Accumulator @@ -19,6 +20,7 @@ type SpecProcessor struct { func NewSpecProcessor( processName string, prefix string, + pid int32, acc telegraf.Accumulator, p *process.Process, tags map[string]string, @@ -33,6 +35,7 @@ func NewSpecProcessor( } return &SpecProcessor{ Prefix: prefix, + pid: pid, tags: tags, fields: make(map[string]interface{}), acc: acc, @@ -45,7 +48,7 @@ func (p *SpecProcessor) pushMetrics() { if p.Prefix != "" { prefix = p.Prefix + "_" } - fields := map[string]interface{}{} + fields := map[string]interface{}{"pid": p.pid} numThreads, err := p.proc.NumThreads() if err == nil { From 207c5498e718af25768f5de0655cb786c45e9fc0 Mon Sep 17 00:00:00 2001 From: Pierre Fersing Date: Thu, 14 Jul 2016 23:53:05 +0200 Subject: [PATCH 17/47] Remove systemd Install alias (#1470) Alias is a list of additional names. Adding it's cannonical name cause systemctl enable telegraf to show a warning "Too many levels of symbolic links" --- scripts/post-install.sh | 4 ++++ scripts/telegraf.service | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/post-install.sh b/scripts/post-install.sh index fb0b441e8..95045be1f 100644 --- a/scripts/post-install.sh +++ b/scripts/post-install.sh @@ -37,6 +37,10 @@ chmod 755 $LOG_DIR if [[ -L /etc/init.d/telegraf ]]; then rm -f /etc/init.d/telegraf fi +# Remove legacy symlink, if it exists +if [[ -L /etc/systemd/system/telegraf.service ]]; then + rm -f /etc/systemd/system/telegraf.service +fi # Add defaults file, if it doesn't exist if [[ ! -f /etc/default/telegraf ]]; then diff --git a/scripts/telegraf.service b/scripts/telegraf.service index a7824c9a7..81c9b5408 100644 --- a/scripts/telegraf.service +++ b/scripts/telegraf.service @@ -15,4 +15,3 @@ KillMode=control-group [Install] WantedBy=multi-user.target -Alias=telegraf.service From 300d9adbd027ff87f5120e0e917d9787f83081d5 Mon Sep 17 00:00:00 2001 From: tuier Date: Sat, 16 Jul 2016 19:19:21 +0100 Subject: [PATCH 18/47] Considere zookeeper's state as a tags (#1417) This change will send the state of zookeeper (leader|follower) as a tag and not a metrics That way it will be easier to search for filter per state --- plugins/inputs/zookeeper/README.md | 10 +++++++--- plugins/inputs/zookeeper/zookeeper.go | 23 ++++++++++++++++------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/plugins/inputs/zookeeper/README.md b/plugins/inputs/zookeeper/README.md index fe7a8a4ad..bc7c17a4b 100644 --- a/plugins/inputs/zookeeper/README.md +++ b/plugins/inputs/zookeeper/README.md @@ -32,7 +32,7 @@ echo mntr | nc localhost 2181 Meta: - units: int64 -- tags: `server= port=` +- tags: `server= port= state=` Measurement names: - zookeeper_avg_latency @@ -55,8 +55,12 @@ Measurement names: Meta: - units: string -- tags: `server= port=` +- tags: `server= port= state=` Measurement names: - zookeeper_version -- zookeeper_server_state \ No newline at end of file + +### Tags: + +- All measurements have the following tags: + - diff --git a/plugins/inputs/zookeeper/zookeeper.go b/plugins/inputs/zookeeper/zookeeper.go index 54defc56f..c11b55f68 100644 --- a/plugins/inputs/zookeeper/zookeeper.go +++ b/plugins/inputs/zookeeper/zookeeper.go @@ -55,6 +55,7 @@ func (z *Zookeeper) Gather(acc telegraf.Accumulator) error { } func (z *Zookeeper) gatherServer(address string, acc telegraf.Accumulator) error { + var zookeeper_state string _, _, err := net.SplitHostPort(address) if err != nil { address = address + ":2181" @@ -78,7 +79,6 @@ func (z *Zookeeper) gatherServer(address string, acc telegraf.Accumulator) error if len(service) != 2 { return fmt.Errorf("Invalid service address: %s", address) } - tags := map[string]string{"server": service[0], "port": service[1]} fields := make(map[string]interface{}) for scanner.Scan() { @@ -92,15 +92,24 @@ func (z *Zookeeper) gatherServer(address string, acc telegraf.Accumulator) error } measurement := strings.TrimPrefix(parts[1], "zk_") - sValue := string(parts[2]) - - iVal, err := strconv.ParseInt(sValue, 10, 64) - if err == nil { - fields[measurement] = iVal + if measurement == "server_state" { + zookeeper_state = parts[2] } else { - fields[measurement] = sValue + sValue := string(parts[2]) + + iVal, err := strconv.ParseInt(sValue, 10, 64) + if err == nil { + fields[measurement] = iVal + } else { + fields[measurement] = sValue + } } } + tags := map[string]string{ + "server": service[0], + "port": service[1], + "state": zookeeper_state, + } acc.AddFields("zookeeper", fields, tags) return nil From 704d9ad76c898c9f14c8ed7e33de416c8e4f1259 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 14 Jul 2016 23:12:32 -0600 Subject: [PATCH 19/47] Refactor aerospike plugin to use client lib --- CHANGELOG.md | 7 + Godeps | 2 + plugins/inputs/aerospike/aerospike.go | 357 +++++---------------- plugins/inputs/aerospike/aerospike_test.go | 97 ++---- 4 files changed, 104 insertions(+), 359 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eda9f2f63..d01567eba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ ### Release Notes +**Breaking Change**: Aerospike main server node measurements have been renamed +aerospike_node. Aerospike namespace measurements have been renamed to +aerospike_namespace. They will also now be tagged with the node_name +that they correspond to. This has been done to differentiate measurements +that pertain to node vs. namespace statistics. + **Breaking Change**: users of github_webhooks must change to the new `[[inputs.webhooks]]` plugin. @@ -35,6 +41,7 @@ should now look like: - [#1369](https://github.com/influxdata/telegraf/pull/1369): Add input plugin for consuming metrics from NSQD. - [#1387](https://github.com/influxdata/telegraf/pull/1387): **Breaking Change** - Redis `role` tag renamed to `replication_role` to avoid global_tags override - [#1437](https://github.com/influxdata/telegraf/pull/1437): Fetching Galera status metrics in MySQL +- [#1500](https://github.com/influxdata/telegraf/pull/1500): Aerospike plugin refactored to use official client lib. ### Bugfixes diff --git a/Godeps b/Godeps index f47a57806..1546bb627 100644 --- a/Godeps +++ b/Godeps @@ -1,5 +1,6 @@ github.com/Shopify/sarama 8aadb476e66ca998f2f6bb3c993e9a2daa3666b9 github.com/Sirupsen/logrus 219c8cb75c258c552e999735be6df753ffc7afdc +github.com/aerospike/aerospike-client-go 45863b7fd8640dc12f7fdd397104d97e1986f25a github.com/amir/raidman 53c1b967405155bfc8758557863bf2e14f814687 github.com/aws/aws-sdk-go 13a12060f716145019378a10e2806c174356b857 github.com/beorn7/perks 3ac7bf7a47d159a033b107610db8a1b6575507a4 @@ -50,6 +51,7 @@ github.com/stretchr/testify 1f4a1643a57e798696635ea4c126e9127adb7d3c github.com/vjeantet/grok 83bfdfdfd1a8146795b28e547a8e3c8b28a466c2 github.com/wvanbergen/kafka 46f9a1cf3f670edec492029fadded9c2d9e18866 github.com/wvanbergen/kazoo-go 0f768712ae6f76454f987c3356177e138df258f8 +github.com/yuin/gopher-lua bf3808abd44b1e55143a2d7f08571aaa80db1808 github.com/zensqlmonitor/go-mssqldb ffe5510c6fa5e15e6d983210ab501c815b56b363 golang.org/x/crypto 5dc8cb4b8a8eb076cbb5a06bc3b8682c15bdbbd3 golang.org/x/net 6acef71eb69611914f7a30939ea9f6e194c78172 diff --git a/plugins/inputs/aerospike/aerospike.go b/plugins/inputs/aerospike/aerospike.go index cd2ebe25c..4bb652c0a 100644 --- a/plugins/inputs/aerospike/aerospike.go +++ b/plugins/inputs/aerospike/aerospike.go @@ -1,104 +1,19 @@ package aerospike import ( - "bytes" - "encoding/binary" - "fmt" - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/inputs" "net" "strconv" "strings" "sync" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" + "github.com/influxdata/telegraf/plugins/inputs" + + as "github.com/aerospike/aerospike-client-go" ) -const ( - MSG_HEADER_SIZE = 8 - MSG_TYPE = 1 // Info is 1 - MSG_VERSION = 2 -) - -var ( - STATISTICS_COMMAND = []byte("statistics\n") - NAMESPACES_COMMAND = []byte("namespaces\n") -) - -type aerospikeMessageHeader struct { - Version uint8 - Type uint8 - DataLen [6]byte -} - -type aerospikeMessage struct { - aerospikeMessageHeader - Data []byte -} - -// Taken from aerospike-client-go/types/message.go -func (msg *aerospikeMessage) Serialize() []byte { - msg.DataLen = msgLenToBytes(int64(len(msg.Data))) - buf := bytes.NewBuffer([]byte{}) - binary.Write(buf, binary.BigEndian, msg.aerospikeMessageHeader) - binary.Write(buf, binary.BigEndian, msg.Data[:]) - return buf.Bytes() -} - -type aerospikeInfoCommand struct { - msg *aerospikeMessage -} - -// Taken from aerospike-client-go/info.go -func (nfo *aerospikeInfoCommand) parseMultiResponse() (map[string]string, error) { - responses := make(map[string]string) - offset := int64(0) - begin := int64(0) - - dataLen := int64(len(nfo.msg.Data)) - - // Create reusable StringBuilder for performance. - for offset < dataLen { - b := nfo.msg.Data[offset] - - if b == '\t' { - name := nfo.msg.Data[begin:offset] - offset++ - begin = offset - - // Parse field value. - for offset < dataLen { - if nfo.msg.Data[offset] == '\n' { - break - } - offset++ - } - - if offset > begin { - value := nfo.msg.Data[begin:offset] - responses[string(name)] = string(value) - } else { - responses[string(name)] = "" - } - offset++ - begin = offset - } else if b == '\n' { - if offset > begin { - name := nfo.msg.Data[begin:offset] - responses[string(name)] = "" - } - offset++ - begin = offset - } else { - offset++ - } - } - - if offset > begin { - name := nfo.msg.Data[begin:offset] - responses[string(name)] = "" - } - return responses, nil -} - type Aerospike struct { Servers []string } @@ -115,7 +30,7 @@ func (a *Aerospike) SampleConfig() string { } func (a *Aerospike) Description() string { - return "Read stats from an aerospike server" + return "Read stats from aerospike server(s)" } func (a *Aerospike) Gather(acc telegraf.Accumulator) error { @@ -124,214 +39,90 @@ func (a *Aerospike) Gather(acc telegraf.Accumulator) error { } var wg sync.WaitGroup - - var outerr error - + errChan := errchan.New(len(a.Servers)) + wg.Add(len(a.Servers)) for _, server := range a.Servers { - wg.Add(1) - go func(server string) { + go func(serv string) { defer wg.Done() - outerr = a.gatherServer(server, acc) + errChan.C <- a.gatherServer(serv, acc) }(server) } wg.Wait() - return outerr + return errChan.Error() } -func (a *Aerospike) gatherServer(host string, acc telegraf.Accumulator) error { - aerospikeInfo, err := getMap(STATISTICS_COMMAND, host) +func (a *Aerospike) gatherServer(hostport string, acc telegraf.Accumulator) error { + host, port, err := net.SplitHostPort(hostport) if err != nil { - return fmt.Errorf("Aerospike info failed: %s", err) + return err } - readAerospikeStats(aerospikeInfo, acc, host, "") - namespaces, err := getList(NAMESPACES_COMMAND, host) + + iport, err := strconv.Atoi(port) if err != nil { - return fmt.Errorf("Aerospike namespace list failed: %s", err) + iport = 3000 } - for ix := range namespaces { - nsInfo, err := getMap([]byte("namespace/"+namespaces[ix]+"\n"), host) - if err != nil { - return fmt.Errorf("Aerospike namespace '%s' query failed: %s", namespaces[ix], err) + + c, err := as.NewClient(host, iport) + if err != nil { + return err + } + defer c.Close() + + nodes := c.GetNodes() + for _, n := range nodes { + tags := map[string]string{ + "node_name": n.GetName(), + "aerospike_host": hostport, + } + fields := make(map[string]interface{}) + stats, err := as.RequestNodeStats(n) + if err != nil { + return err + } + for k, v := range stats { + if iv, err := strconv.ParseInt(v, 10, 64); err == nil { + fields[strings.Replace(k, "-", "_", -1)] = iv + } + } + acc.AddFields("aerospike_node", fields, tags, time.Now()) + + info, err := as.RequestNodeInfo(n, "namespaces") + if err != nil { + return err + } + namespaces := strings.Split(info["namespaces"], ";") + + for _, namespace := range namespaces { + nTags := copyTags(tags) + nTags["namespace"] = namespace + nFields := make(map[string]interface{}) + info, err := as.RequestNodeInfo(n, "namespace/"+namespace) + if err != nil { + continue + } + stats := strings.Split(info["namespace/"+namespace], ";") + for _, stat := range stats { + parts := strings.Split(stat, "=") + if len(parts) < 2 { + continue + } + if iv, err := strconv.ParseInt(parts[1], 10, 64); err == nil { + nFields[strings.Replace(parts[0], "-", "_", -1)] = iv + } + } + acc.AddFields("aerospike_namespace", nFields, nTags, time.Now()) } - readAerospikeStats(nsInfo, acc, host, namespaces[ix]) } return nil } -func getMap(key []byte, host string) (map[string]string, error) { - data, err := get(key, host) - if err != nil { - return nil, fmt.Errorf("Failed to get data: %s", err) +func copyTags(m map[string]string) map[string]string { + out := make(map[string]string) + for k, v := range m { + out[k] = v } - parsed, err := unmarshalMapInfo(data, string(key)) - if err != nil { - return nil, fmt.Errorf("Failed to unmarshal data: %s", err) - } - - return parsed, nil -} - -func getList(key []byte, host string) ([]string, error) { - data, err := get(key, host) - if err != nil { - return nil, fmt.Errorf("Failed to get data: %s", err) - } - parsed, err := unmarshalListInfo(data, string(key)) - if err != nil { - return nil, fmt.Errorf("Failed to unmarshal data: %s", err) - } - - return parsed, nil -} - -func get(key []byte, host string) (map[string]string, error) { - var err error - var data map[string]string - - asInfo := &aerospikeInfoCommand{ - msg: &aerospikeMessage{ - aerospikeMessageHeader: aerospikeMessageHeader{ - Version: uint8(MSG_VERSION), - Type: uint8(MSG_TYPE), - DataLen: msgLenToBytes(int64(len(key))), - }, - Data: key, - }, - } - - cmd := asInfo.msg.Serialize() - addr, err := net.ResolveTCPAddr("tcp", host) - if err != nil { - return data, fmt.Errorf("Lookup failed for '%s': %s", host, err) - } - - conn, err := net.DialTCP("tcp", nil, addr) - if err != nil { - return data, fmt.Errorf("Connection failed for '%s': %s", host, err) - } - defer conn.Close() - - _, err = conn.Write(cmd) - if err != nil { - return data, fmt.Errorf("Failed to send to '%s': %s", host, err) - } - - msgHeader := bytes.NewBuffer(make([]byte, MSG_HEADER_SIZE)) - _, err = readLenFromConn(conn, msgHeader.Bytes(), MSG_HEADER_SIZE) - if err != nil { - return data, fmt.Errorf("Failed to read header: %s", err) - } - err = binary.Read(msgHeader, binary.BigEndian, &asInfo.msg.aerospikeMessageHeader) - if err != nil { - return data, fmt.Errorf("Failed to unmarshal header: %s", err) - } - - msgLen := msgLenFromBytes(asInfo.msg.aerospikeMessageHeader.DataLen) - - if int64(len(asInfo.msg.Data)) != msgLen { - asInfo.msg.Data = make([]byte, msgLen) - } - - _, err = readLenFromConn(conn, asInfo.msg.Data, len(asInfo.msg.Data)) - if err != nil { - return data, fmt.Errorf("Failed to read from connection to '%s': %s", host, err) - } - - data, err = asInfo.parseMultiResponse() - if err != nil { - return data, fmt.Errorf("Failed to parse response from '%s': %s", host, err) - } - - return data, err -} - -func readAerospikeStats( - stats map[string]string, - acc telegraf.Accumulator, - host string, - namespace string, -) { - fields := make(map[string]interface{}) - tags := map[string]string{ - "aerospike_host": host, - "namespace": "_service", - } - - if namespace != "" { - tags["namespace"] = namespace - } - for key, value := range stats { - // We are going to ignore all string based keys - val, err := strconv.ParseInt(value, 10, 64) - if err == nil { - if strings.Contains(key, "-") { - key = strings.Replace(key, "-", "_", -1) - } - fields[key] = val - } - } - acc.AddFields("aerospike", fields, tags) -} - -func unmarshalMapInfo(infoMap map[string]string, key string) (map[string]string, error) { - key = strings.TrimSuffix(key, "\n") - res := map[string]string{} - - v, exists := infoMap[key] - if !exists { - return res, fmt.Errorf("Key '%s' missing from info", key) - } - - values := strings.Split(v, ";") - for i := range values { - kv := strings.Split(values[i], "=") - if len(kv) > 1 { - res[kv[0]] = kv[1] - } - } - - return res, nil -} - -func unmarshalListInfo(infoMap map[string]string, key string) ([]string, error) { - key = strings.TrimSuffix(key, "\n") - - v, exists := infoMap[key] - if !exists { - return []string{}, fmt.Errorf("Key '%s' missing from info", key) - } - - values := strings.Split(v, ";") - return values, nil -} - -func readLenFromConn(c net.Conn, buffer []byte, length int) (total int, err error) { - var r int - for total < length { - r, err = c.Read(buffer[total:length]) - total += r - if err != nil { - break - } - } - return -} - -// Taken from aerospike-client-go/types/message.go -func msgLenToBytes(DataLen int64) [6]byte { - b := make([]byte, 8) - binary.BigEndian.PutUint64(b, uint64(DataLen)) - res := [6]byte{} - copy(res[:], b[2:]) - return res -} - -// Taken from aerospike-client-go/types/message.go -func msgLenFromBytes(buf [6]byte) int64 { - nbytes := append([]byte{0, 0}, buf[:]...) - DataLen := binary.BigEndian.Uint64(nbytes) - return int64(DataLen) + return out } func init() { diff --git a/plugins/inputs/aerospike/aerospike_test.go b/plugins/inputs/aerospike/aerospike_test.go index 2717a15b9..8463432f5 100644 --- a/plugins/inputs/aerospike/aerospike_test.go +++ b/plugins/inputs/aerospike/aerospike_test.go @@ -1,7 +1,6 @@ package aerospike import ( - "reflect" "testing" "github.com/influxdata/telegraf/testutil" @@ -22,84 +21,30 @@ func TestAerospikeStatistics(t *testing.T) { err := a.Gather(&acc) require.NoError(t, err) + + assert.True(t, acc.HasMeasurement("aerospike_node")) + assert.True(t, acc.HasMeasurement("aerospike_namespace")) + assert.True(t, acc.HasIntField("aerospike_node", "batch_error")) } -func TestAerospikeMsgLenFromToBytes(t *testing.T) { - var i int64 = 8 - assert.True(t, i == msgLenFromBytes(msgLenToBytes(i))) -} +func TestAerospikeStatisticsPartialErr(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + a := &Aerospike{ + Servers: []string{ + testutil.GetLocalHost() + ":3000", + testutil.GetLocalHost() + ":9999", + }, + } -func TestReadAerospikeStatsNoNamespace(t *testing.T) { - // Also test for re-writing var acc testutil.Accumulator - stats := map[string]string{ - "stat-write-errs": "12345", - "stat_read_reqs": "12345", - } - readAerospikeStats(stats, &acc, "host1", "") - fields := map[string]interface{}{ - "stat_write_errs": int64(12345), - "stat_read_reqs": int64(12345), - } - tags := map[string]string{ - "aerospike_host": "host1", - "namespace": "_service", - } - acc.AssertContainsTaggedFields(t, "aerospike", fields, tags) -} - -func TestReadAerospikeStatsNamespace(t *testing.T) { - var acc testutil.Accumulator - stats := map[string]string{ - "stat_write_errs": "12345", - "stat_read_reqs": "12345", - } - readAerospikeStats(stats, &acc, "host1", "test") - - fields := map[string]interface{}{ - "stat_write_errs": int64(12345), - "stat_read_reqs": int64(12345), - } - tags := map[string]string{ - "aerospike_host": "host1", - "namespace": "test", - } - acc.AssertContainsTaggedFields(t, "aerospike", fields, tags) -} - -func TestAerospikeUnmarshalList(t *testing.T) { - i := map[string]string{ - "test": "one;two;three", - } - - expected := []string{"one", "two", "three"} - - list, err := unmarshalListInfo(i, "test2") - assert.True(t, err != nil) - - list, err = unmarshalListInfo(i, "test") - assert.True(t, err == nil) - equal := true - for ix := range expected { - if list[ix] != expected[ix] { - equal = false - break - } - } - assert.True(t, equal) -} - -func TestAerospikeUnmarshalMap(t *testing.T) { - i := map[string]string{ - "test": "key1=value1;key2=value2", - } - - expected := map[string]string{ - "key1": "value1", - "key2": "value2", - } - m, err := unmarshalMapInfo(i, "test") - assert.True(t, err == nil) - assert.True(t, reflect.DeepEqual(m, expected)) + err := a.Gather(&acc) + require.Error(t, err) + + assert.True(t, acc.HasMeasurement("aerospike_node")) + assert.True(t, acc.HasMeasurement("aerospike_namespace")) + assert.True(t, acc.HasIntField("aerospike_node", "batch_error")) } From 6afe9ceef1222c1d9dae0262865662bcf57d3f79 Mon Sep 17 00:00:00 2001 From: ashish Date: Mon, 18 Jul 2016 12:06:41 +0530 Subject: [PATCH 20/47] cassandra plugin lower version support added closes #1427 closes #1508 --- CHANGELOG.md | 1 + plugins/inputs/cassandra/cassandra.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d01567eba..e5388cb84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ should now look like: - [#1432](https://github.com/influxdata/telegraf/issues/1432): Panic fix for multiple graphite outputs under very high load. - [#1412](https://github.com/influxdata/telegraf/pull/1412): Instrumental output has better reconnect behavior - [#1460](https://github.com/influxdata/telegraf/issues/1460): Remove PID from procstat plugin to fix cardinality issues. +- [#1427](https://github.com/influxdata/telegraf/issues/1427): Cassandra input: version 2.x "column family" fix. ## v1.0 beta 2 [2016-06-21] diff --git a/plugins/inputs/cassandra/cassandra.go b/plugins/inputs/cassandra/cassandra.go index 351232aca..e7edf7153 100644 --- a/plugins/inputs/cassandra/cassandra.go +++ b/plugins/inputs/cassandra/cassandra.go @@ -148,7 +148,7 @@ func (c cassandraMetric) addTagsFields(out map[string]interface{}) { tokens := parseJmxMetricRequest(r.(map[string]interface{})["mbean"].(string)) // Requests with wildcards for keyspace or table names will return nested // maps in the json response - if tokens["type"] == "Table" && (tokens["keyspace"] == "*" || + if (tokens["type"] == "Table" || tokens["type"] == "ColumnFamily") && (tokens["keyspace"] == "*" || tokens["scope"] == "*") { if valuesMap, ok := out["value"]; ok { for k, v := range valuesMap.(map[string]interface{}) { From b4a6d9c6475e8bca374f072d9e7f8dd9cc25f702 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Jul 2016 11:45:25 +0100 Subject: [PATCH 21/47] Change prometheus replacer to reverse regex replacer closes #1474 --- plugins/outputs/prometheus_client/prometheus_client.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/plugins/outputs/prometheus_client/prometheus_client.go b/plugins/outputs/prometheus_client/prometheus_client.go index 790784a2b..4f7ce8053 100644 --- a/plugins/outputs/prometheus_client/prometheus_client.go +++ b/plugins/outputs/prometheus_client/prometheus_client.go @@ -5,7 +5,6 @@ import ( "log" "net/http" "regexp" - "strings" "sync" "github.com/influxdata/telegraf" @@ -14,7 +13,7 @@ import ( ) var ( - sanitizedChars = strings.NewReplacer("/", "_", "@", "_", " ", "_", "-", "_", ".", "_") + invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) // Prometheus metric names must match this regex // see https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels @@ -111,12 +110,12 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { for _, point := range metrics { key := point.Name() - key = sanitizedChars.Replace(key) + key = invalidNameCharRE.ReplaceAllString(key, "_") var labels []string l := prometheus.Labels{} for k, v := range point.Tags() { - k = sanitizedChars.Replace(k) + k = invalidNameCharRE.ReplaceAllString(k, "_") if len(k) == 0 { continue } @@ -137,7 +136,7 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { } // sanitize the measurement name - n = sanitizedChars.Replace(n) + n = invalidNameCharRE.ReplaceAllString(n, "_") var mname string if n == "value" { mname = key From 2d6c8767f775cc612facc1fe82d53719a66b4b22 Mon Sep 17 00:00:00 2001 From: Mark McKinstry Date: Mon, 18 Jul 2016 07:03:39 -0400 Subject: [PATCH 22/47] add ability to read redis from a socket (#1480) * add ability to read redis from a socket * update CHANGELOG --- CHANGELOG.md | 1 + plugins/inputs/redis/redis.go | 48 +++++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5388cb84..6128a698b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ should now look like: - [#1402](https://github.com/influxdata/telegraf/pull/1402): docker-machine/boot2docker no longer required for unit tests. - [#1350](https://github.com/influxdata/telegraf/pull/1350): cgroup input plugin. - [#1369](https://github.com/influxdata/telegraf/pull/1369): Add input plugin for consuming metrics from NSQD. +- [#1369](https://github.com/influxdata/telegraf/pull/1480): add ability to read redis from a socket. - [#1387](https://github.com/influxdata/telegraf/pull/1387): **Breaking Change** - Redis `role` tag renamed to `replication_role` to avoid global_tags override - [#1437](https://github.com/influxdata/telegraf/pull/1437): Fetching Galera status metrics in MySQL - [#1500](https://github.com/influxdata/telegraf/pull/1500): Aerospike plugin refactored to use official client lib. diff --git a/plugins/inputs/redis/redis.go b/plugins/inputs/redis/redis.go index 76cbc89cb..fc50387df 100644 --- a/plugins/inputs/redis/redis.go +++ b/plugins/inputs/redis/redis.go @@ -25,6 +25,7 @@ var sampleConfig = ` ## e.g. ## tcp://localhost:6379 ## tcp://:password@192.168.99.100 + ## unix:///var/run/redis.sock ## ## If no servers are specified, then localhost is used as the host. ## If no port is specified, 6379 is used @@ -80,12 +81,15 @@ var Tracking = map[string]string{ var ErrProtocolError = errors.New("redis protocol error") +const defaultPort = "6379" + // Reads stats from all configured servers accumulates stats. // Returns one of the errors encountered while gather stats (if any). func (r *Redis) Gather(acc telegraf.Accumulator) error { if len(r.Servers) == 0 { url := &url.URL{ - Host: ":6379", + Scheme: "tcp", + Host: ":6379", } r.gatherServer(url, acc) return nil @@ -96,6 +100,10 @@ func (r *Redis) Gather(acc telegraf.Accumulator) error { var outerr error for _, serv := range r.Servers { + if !strings.HasPrefix(serv, "tcp://") || !strings.HasPrefix(serv, "unix://") { + serv = "tcp://" + serv + } + u, err := url.Parse(serv) if err != nil { return fmt.Errorf("Unable to parse to address '%s': %s", serv, err) @@ -105,6 +113,13 @@ func (r *Redis) Gather(acc telegraf.Accumulator) error { u.Host = serv u.Path = "" } + if u.Scheme == "tcp" { + _, _, err := net.SplitHostPort(u.Host) + if err != nil { + u.Host = u.Host + ":" + defaultPort + } + } + wg.Add(1) go func(serv string) { defer wg.Done() @@ -117,17 +132,17 @@ func (r *Redis) Gather(acc telegraf.Accumulator) error { return outerr } -const defaultPort = "6379" - func (r *Redis) gatherServer(addr *url.URL, acc telegraf.Accumulator) error { - _, _, err := net.SplitHostPort(addr.Host) - if err != nil { - addr.Host = addr.Host + ":" + defaultPort - } + var address string - c, err := net.DialTimeout("tcp", addr.Host, defaultTimeout) + if addr.Scheme == "unix" { + address = addr.Path + } else { + address = addr.Host + } + c, err := net.DialTimeout(addr.Scheme, address, defaultTimeout) if err != nil { - return fmt.Errorf("Unable to connect to redis server '%s': %s", addr.Host, err) + return fmt.Errorf("Unable to connect to redis server '%s': %s", address, err) } defer c.Close() @@ -155,12 +170,17 @@ func (r *Redis) gatherServer(addr *url.URL, acc telegraf.Accumulator) error { c.Write([]byte("EOF\r\n")) rdr := bufio.NewReader(c) - // Setup tags for all redis metrics - host, port := "unknown", "unknown" - // If there's an error, ignore and use 'unknown' tags - host, port, _ = net.SplitHostPort(addr.Host) - tags := map[string]string{"server": host, "port": port} + var tags map[string]string + if addr.Scheme == "unix" { + tags = map[string]string{"socket": addr.Path} + } else { + // Setup tags for all redis metrics + host, port := "unknown", "unknown" + // If there's an error, ignore and use 'unknown' tags + host, port, _ = net.SplitHostPort(addr.Host) + tags = map[string]string{"server": host, "port": port} + } return gatherInfoOutput(rdr, acc, tags) } From 1d9745ee98806fda6c20910d572ae15b35a7f036 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Mon, 11 Jul 2016 08:58:00 -0500 Subject: [PATCH 23/47] Move exec WaitGroup from Exec instance level to Gather. If Gather is run concurently the shared WaitGroup variable never finishes. closes #1463 closes #1464 --- CHANGELOG.md | 1 + plugins/inputs/exec/exec.go | 13 ++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6128a698b..0e8dd69cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,7 @@ should now look like: - [#1412](https://github.com/influxdata/telegraf/pull/1412): Instrumental output has better reconnect behavior - [#1460](https://github.com/influxdata/telegraf/issues/1460): Remove PID from procstat plugin to fix cardinality issues. - [#1427](https://github.com/influxdata/telegraf/issues/1427): Cassandra input: version 2.x "column family" fix. +- [#1463](https://github.com/influxdata/telegraf/issues/1463): Shared WaitGroup in Exec plugin ## v1.0 beta 2 [2016-06-21] diff --git a/plugins/inputs/exec/exec.go b/plugins/inputs/exec/exec.go index c8d4cee50..060a4f308 100644 --- a/plugins/inputs/exec/exec.go +++ b/plugins/inputs/exec/exec.go @@ -48,8 +48,6 @@ type Exec struct { parser parsers.Parser - wg sync.WaitGroup - runner Runner errChan chan error } @@ -119,8 +117,8 @@ func (c CommandRunner) Run( return out.Bytes(), nil } -func (e *Exec) ProcessCommand(command string, acc telegraf.Accumulator) { - defer e.wg.Done() +func (e *Exec) ProcessCommand(command string, acc telegraf.Accumulator, wg *sync.WaitGroup) { + defer wg.Done() out, err := e.runner.Run(e, command, acc) if err != nil { @@ -151,6 +149,7 @@ func (e *Exec) SetParser(parser parsers.Parser) { } func (e *Exec) Gather(acc telegraf.Accumulator) error { + var wg sync.WaitGroup // Legacy single command support if e.Command != "" { e.Commands = append(e.Commands, e.Command) @@ -190,11 +189,11 @@ func (e *Exec) Gather(acc telegraf.Accumulator) error { errChan := errchan.New(len(commands)) e.errChan = errChan.C - e.wg.Add(len(commands)) + wg.Add(len(commands)) for _, command := range commands { - go e.ProcessCommand(command, acc) + go e.ProcessCommand(command, acc, &wg) } - e.wg.Wait() + wg.Wait() return errChan.Error() } From 8c7edeb53bfdf07f51d7d809399c22aee9905679 Mon Sep 17 00:00:00 2001 From: Nathaniel Cook Date: Fri, 1 Jul 2016 08:49:48 -0600 Subject: [PATCH 24/47] allow measurement to be defined for logparser_grok plugin --- plugins/inputs/logparser/grok/grok.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/plugins/inputs/logparser/grok/grok.go b/plugins/inputs/logparser/grok/grok.go index a463c0f6a..54ecb464b 100644 --- a/plugins/inputs/logparser/grok/grok.go +++ b/plugins/inputs/logparser/grok/grok.go @@ -56,6 +56,7 @@ type Parser struct { Patterns []string CustomPatterns string CustomPatternFiles []string + Measurement string // typeMap is a map of patterns -> capture name -> modifier, // ie, { @@ -114,6 +115,10 @@ func (p *Parser) Compile() error { p.addCustomPatterns(scanner) } + if p.Measurement == "" { + p.Measurement = "logparser_grok" + } + return p.compileCustomPatterns() } @@ -215,7 +220,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { } } - return telegraf.NewMetric("logparser_grok", tags, fields, p.tsModder.tsMod(timestamp)) + return telegraf.NewMetric(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp)) } func (p *Parser) addCustomPatterns(scanner *bufio.Scanner) { From 5dc4cce15712d7000e30506e3100d8771a631e82 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Jul 2016 12:27:46 +0100 Subject: [PATCH 25/47] Fixup adding 'measurement' to logparser grok closes #1434 --- CHANGELOG.md | 1 + plugins/inputs/logparser/grok/grok.go | 2 +- plugins/inputs/logparser/grok/grok_test.go | 26 ++++++++++++++++++++++ plugins/inputs/logparser/logparser.go | 2 ++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e8dd69cf..2be040bf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,7 @@ should now look like: - [#1387](https://github.com/influxdata/telegraf/pull/1387): **Breaking Change** - Redis `role` tag renamed to `replication_role` to avoid global_tags override - [#1437](https://github.com/influxdata/telegraf/pull/1437): Fetching Galera status metrics in MySQL - [#1500](https://github.com/influxdata/telegraf/pull/1500): Aerospike plugin refactored to use official client lib. +- [#1434](https://github.com/influxdata/telegraf/pull/1434): Add measurement name arg to logparser plugin. ### Bugfixes diff --git a/plugins/inputs/logparser/grok/grok.go b/plugins/inputs/logparser/grok/grok.go index 54ecb464b..16e62b223 100644 --- a/plugins/inputs/logparser/grok/grok.go +++ b/plugins/inputs/logparser/grok/grok.go @@ -56,7 +56,7 @@ type Parser struct { Patterns []string CustomPatterns string CustomPatternFiles []string - Measurement string + Measurement string // typeMap is a map of patterns -> capture name -> modifier, // ie, { diff --git a/plugins/inputs/logparser/grok/grok_test.go b/plugins/inputs/logparser/grok/grok_test.go index 02f69f67a..979553f88 100644 --- a/plugins/inputs/logparser/grok/grok_test.go +++ b/plugins/inputs/logparser/grok/grok_test.go @@ -83,6 +83,32 @@ func Benchmark_ParseLine_CustomPattern(b *testing.B) { benchM = m } +func TestMeasurementName(t *testing.T) { + p := &Parser{ + Measurement: "my_web_log", + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank", + "client_ip": "127.0.0.1", + "resp_code": int64(200), + "http_version": float64(1.0), + "ident": "user-identifier", + "request": "/apache_pb.gif", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET"}, m.Tags()) + assert.Equal(t, "my_web_log", m.Name()) +} + func TestBuiltinInfluxdbHttpd(t *testing.T) { p := &Parser{ Patterns: []string{"%{INFLUXDB_HTTPD_LOG}"}, diff --git a/plugins/inputs/logparser/logparser.go b/plugins/inputs/logparser/logparser.go index 82003582f..4737ace65 100644 --- a/plugins/inputs/logparser/logparser.go +++ b/plugins/inputs/logparser/logparser.go @@ -58,6 +58,8 @@ const sampleConfig = ` ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) patterns = ["%{INFLUXDB_HTTPD_LOG}"] + ## Name of the outputted measurement name. + measurement = "influxdb_log" ## Full path(s) to custom pattern files. custom_pattern_files = [] ## Custom patterns can also be defined here. Put one pattern per line. From 1c2965703dbc2f989ce4a0974d4769009b966048 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20de=20Metz?= Date: Mon, 18 Jul 2016 13:41:13 +0200 Subject: [PATCH 26/47] Webhooks plugin: add mandrill (#1408) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add mandrill webhook. * Store the id of the msg as part of event. Signed-off-by: Cyril Duez Signed-off-by: François de Metz * Decode body to get the mandrill_events. Signed-off-by: Cyril Duez Signed-off-by: François de Metz * Handle HEAD request. Signed-off-by: Cyril Duez Signed-off-by: François de Metz * Add the README. Signed-off-by: Cyril Duez Signed-off-by: François de Metz * Add mandrill_webhooks to the README. Signed-off-by: Cyril Duez Signed-off-by: François de Metz * Update changelog. Signed-off-by: Cyril Duez Signed-off-by: François de Metz * Run gofmt. Signed-off-by: Cyril Duez Signed-off-by: François de Metz --- CHANGELOG.md | 1 + README.md | 1 + plugins/inputs/webhooks/README.md | 1 + plugins/inputs/webhooks/mandrill/README.md | 15 ++++ .../webhooks/mandrill/mandrill_webhooks.go | 56 ++++++++++++ .../mandrill/mandrill_webhooks_events.go | 24 ++++++ .../mandrill_webhooks_events_json_test.go | 58 +++++++++++++ .../mandrill/mandrill_webhooks_test.go | 85 +++++++++++++++++++ plugins/inputs/webhooks/webhooks.go | 9 +- 9 files changed, 248 insertions(+), 2 deletions(-) create mode 100644 plugins/inputs/webhooks/mandrill/README.md create mode 100644 plugins/inputs/webhooks/mandrill/mandrill_webhooks.go create mode 100644 plugins/inputs/webhooks/mandrill/mandrill_webhooks_events.go create mode 100644 plugins/inputs/webhooks/mandrill/mandrill_webhooks_events_json_test.go create mode 100644 plugins/inputs/webhooks/mandrill/mandrill_webhooks_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 2be040bf3..46239894f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ should now look like: - [#1289](https://github.com/influxdata/telegraf/pull/1289): webhooks input plugin. Thanks @francois2metz and @cduez! - [#1247](https://github.com/influxdata/telegraf/pull/1247): rollbar webhook plugin. +- [#1408](https://github.com/influxdata/telegraf/pull/1408): mandrill webhook plugin. - [#1402](https://github.com/influxdata/telegraf/pull/1402): docker-machine/boot2docker no longer required for unit tests. - [#1350](https://github.com/influxdata/telegraf/pull/1350): cgroup input plugin. - [#1369](https://github.com/influxdata/telegraf/pull/1369): Add input plugin for consuming metrics from NSQD. diff --git a/README.md b/README.md index 8264be7f6..738f9eaea 100644 --- a/README.md +++ b/README.md @@ -219,6 +219,7 @@ Telegraf can also collect metrics via the following service plugins: * [nats_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nats_consumer) * [webhooks](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks) * [github](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/github) + * [mandrill](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/mandrill) * [rollbar](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/rollbar) * [nsq_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nsq_consumer) diff --git a/plugins/inputs/webhooks/README.md b/plugins/inputs/webhooks/README.md index 5a42f6ea7..86e6685b8 100644 --- a/plugins/inputs/webhooks/README.md +++ b/plugins/inputs/webhooks/README.md @@ -16,6 +16,7 @@ $ sudo service telegraf start ## Available webhooks - [Github](github/) +- [Mandrill](mandrill/) - [Rollbar](rollbar/) ## Adding new webhooks plugin diff --git a/plugins/inputs/webhooks/mandrill/README.md b/plugins/inputs/webhooks/mandrill/README.md new file mode 100644 index 000000000..2fb4914e1 --- /dev/null +++ b/plugins/inputs/webhooks/mandrill/README.md @@ -0,0 +1,15 @@ +# mandrill webhook + +You should configure your Mandrill's Webhooks to point at the `webhooks` service. To do this go to `mandrillapp.com/` and click `Settings > Webhooks`. In the resulting page, click on `Add a Webhook`, select all events, and set the `URL` to `http://:1619/mandrill`, and click on `Create Webhook`. + +## Events + +See the [webhook doc](https://mandrill.zendesk.com/hc/en-us/articles/205583307-Message-Event-Webhook-format). + +All events for logs the original timestamp, the event name and the unique identifier of the message that generated the event. + +**Tags:** +* 'event' = `event.event` string + +**Fields:** +* 'id' = `event._id` string diff --git a/plugins/inputs/webhooks/mandrill/mandrill_webhooks.go b/plugins/inputs/webhooks/mandrill/mandrill_webhooks.go new file mode 100644 index 000000000..e9d4a6de4 --- /dev/null +++ b/plugins/inputs/webhooks/mandrill/mandrill_webhooks.go @@ -0,0 +1,56 @@ +package mandrill + +import ( + "encoding/json" + "io/ioutil" + "log" + "net/http" + "net/url" + "time" + + "github.com/gorilla/mux" + "github.com/influxdata/telegraf" +) + +type MandrillWebhook struct { + Path string + acc telegraf.Accumulator +} + +func (md *MandrillWebhook) Register(router *mux.Router, acc telegraf.Accumulator) { + router.HandleFunc(md.Path, md.returnOK).Methods("HEAD") + router.HandleFunc(md.Path, md.eventHandler).Methods("POST") + + log.Printf("Started the webhooks_mandrill on %s\n", md.Path) + md.acc = acc +} + +func (md *MandrillWebhook) returnOK(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) +} + +func (md *MandrillWebhook) eventHandler(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + body, err := ioutil.ReadAll(r.Body) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return + } + data, err := url.ParseQuery(string(body)) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return + } + var events []MandrillEvent + err = json.Unmarshal([]byte(data.Get("mandrill_events")), &events) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return + } + + for _, event := range events { + md.acc.AddFields("mandrill_webhooks", event.Fields(), event.Tags(), time.Unix(event.TimeStamp, 0)) + } + + w.WriteHeader(http.StatusOK) +} diff --git a/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events.go b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events.go new file mode 100644 index 000000000..b36b13e54 --- /dev/null +++ b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events.go @@ -0,0 +1,24 @@ +package mandrill + +type Event interface { + Tags() map[string]string + Fields() map[string]interface{} +} + +type MandrillEvent struct { + EventName string `json:"event"` + TimeStamp int64 `json:"ts"` + Id string `json:"_id"` +} + +func (me *MandrillEvent) Tags() map[string]string { + return map[string]string{ + "event": me.EventName, + } +} + +func (me *MandrillEvent) Fields() map[string]interface{} { + return map[string]interface{}{ + "id": me.Id, + } +} diff --git a/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events_json_test.go b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events_json_test.go new file mode 100644 index 000000000..4ab385e18 --- /dev/null +++ b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events_json_test.go @@ -0,0 +1,58 @@ +package mandrill + +func SendEventJSON() string { + return ` + { + "event": "send", + "msg": { + "ts": 1365109999, + "subject": "This an example webhook message", + "email": "example.webhook@mandrillapp.com", + "sender": "example.sender@mandrillapp.com", + "tags": [ + "webhook-example" + ], + "opens": [ + + ], + "clicks": [ + + ], + "state": "sent", + "metadata": { + "user_id": 111 + }, + "_id": "exampleaaaaaaaaaaaaaaaaaaaaaaaaa", + "_version": "exampleaaaaaaaaaaaaaaa" + }, + "_id": "id1", + "ts": 1384954004 + }` +} + +func HardBounceEventJSON() string { + return ` + { + "event": "hard_bounce", + "msg": { + "ts": 1365109999, + "subject": "This an example webhook message", + "email": "example.webhook@mandrillapp.com", + "sender": "example.sender@mandrillapp.com", + "tags": [ + "webhook-example" + ], + "state": "bounced", + "metadata": { + "user_id": 111 + }, + "_id": "exampleaaaaaaaaaaaaaaaaaaaaaaaaa2", + "_version": "exampleaaaaaaaaaaaaaaa", + "bounce_description": "bad_mailbox", + "bgtools_code": 10, + "diag": "smtp;550 5.1.1 The email account that you tried to reach does not exist. Please try double-checking the recipient's email address for typos or unnecessary spaces." + }, + "_id": "id2", + "ts": 1384954004 + }` +} diff --git a/plugins/inputs/webhooks/mandrill/mandrill_webhooks_test.go b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_test.go new file mode 100644 index 000000000..94ac68684 --- /dev/null +++ b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_test.go @@ -0,0 +1,85 @@ +package mandrill + +import ( + "github.com/influxdata/telegraf/testutil" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" +) + +func postWebhooks(md *MandrillWebhook, eventBody string) *httptest.ResponseRecorder { + body := url.Values{} + body.Set("mandrill_events", eventBody) + req, _ := http.NewRequest("POST", "/mandrill", strings.NewReader(body.Encode())) + w := httptest.NewRecorder() + + md.eventHandler(w, req) + + return w +} + +func headRequest(md *MandrillWebhook) *httptest.ResponseRecorder { + req, _ := http.NewRequest("HEAD", "/mandrill", strings.NewReader("")) + w := httptest.NewRecorder() + + md.returnOK(w, req) + + return w +} + +func TestHead(t *testing.T) { + md := &MandrillWebhook{Path: "/mandrill"} + resp := headRequest(md) + if resp.Code != http.StatusOK { + t.Errorf("HEAD returned HTTP status code %v.\nExpected %v", resp.Code, http.StatusOK) + } +} + +func TestSendEvent(t *testing.T) { + var acc testutil.Accumulator + md := &MandrillWebhook{Path: "/mandrill", acc: &acc} + resp := postWebhooks(md, "["+SendEventJSON()+"]") + if resp.Code != http.StatusOK { + t.Errorf("POST send returned HTTP status code %v.\nExpected %v", resp.Code, http.StatusOK) + } + + fields := map[string]interface{}{ + "id": "id1", + } + + tags := map[string]string{ + "event": "send", + } + + acc.AssertContainsTaggedFields(t, "mandrill_webhooks", fields, tags) +} + +func TestMultipleEvents(t *testing.T) { + var acc testutil.Accumulator + md := &MandrillWebhook{Path: "/mandrill", acc: &acc} + resp := postWebhooks(md, "["+SendEventJSON()+","+HardBounceEventJSON()+"]") + if resp.Code != http.StatusOK { + t.Errorf("POST send returned HTTP status code %v.\nExpected %v", resp.Code, http.StatusOK) + } + + fields := map[string]interface{}{ + "id": "id1", + } + + tags := map[string]string{ + "event": "send", + } + + acc.AssertContainsTaggedFields(t, "mandrill_webhooks", fields, tags) + + fields = map[string]interface{}{ + "id": "id2", + } + + tags = map[string]string{ + "event": "hard_bounce", + } + acc.AssertContainsTaggedFields(t, "mandrill_webhooks", fields, tags) +} diff --git a/plugins/inputs/webhooks/webhooks.go b/plugins/inputs/webhooks/webhooks.go index d8c74850a..884435c36 100644 --- a/plugins/inputs/webhooks/webhooks.go +++ b/plugins/inputs/webhooks/webhooks.go @@ -11,6 +11,7 @@ import ( "github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/inputs/webhooks/github" + "github.com/influxdata/telegraf/plugins/inputs/webhooks/mandrill" "github.com/influxdata/telegraf/plugins/inputs/webhooks/rollbar" ) @@ -25,8 +26,9 @@ func init() { type Webhooks struct { ServiceAddress string - Github *github.GithubWebhook - Rollbar *rollbar.RollbarWebhook + Github *github.GithubWebhook + Mandrill *mandrill.MandrillWebhook + Rollbar *rollbar.RollbarWebhook } func NewWebhooks() *Webhooks { @@ -41,6 +43,9 @@ func (wb *Webhooks) SampleConfig() string { [inputs.webhooks.github] path = "/github" + [inputs.webhooks.mandrill] + path = "/mandrill" + [inputs.webhooks.rollbar] path = "/rollbar" ` From 281a4d550021f88ea36eb05b3c0536b0ad6c68f6 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Jul 2016 12:54:33 +0100 Subject: [PATCH 27/47] Change resp_code from field to tag in logparser closes #1479 --- CHANGELOG.md | 1 + plugins/inputs/logparser/grok/grok_test.go | 15 +++++---------- plugins/inputs/logparser/grok/influx_patterns.go | 2 +- .../logparser/grok/patterns/influx-patterns | 2 +- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46239894f..a0f0cca16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ should now look like: - [#1437](https://github.com/influxdata/telegraf/pull/1437): Fetching Galera status metrics in MySQL - [#1500](https://github.com/influxdata/telegraf/pull/1500): Aerospike plugin refactored to use official client lib. - [#1434](https://github.com/influxdata/telegraf/pull/1434): Add measurement name arg to logparser plugin. +- [#1479](https://github.com/influxdata/telegraf/pull/1479): logparser: change resp_code from a field to a tag. ### Bugfixes diff --git a/plugins/inputs/logparser/grok/grok_test.go b/plugins/inputs/logparser/grok/grok_test.go index 979553f88..1181e85ae 100644 --- a/plugins/inputs/logparser/grok/grok_test.go +++ b/plugins/inputs/logparser/grok/grok_test.go @@ -99,13 +99,12 @@ func TestMeasurementName(t *testing.T) { "resp_bytes": int64(2326), "auth": "frank", "client_ip": "127.0.0.1", - "resp_code": int64(200), "http_version": float64(1.0), "ident": "user-identifier", "request": "/apache_pb.gif", }, m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET"}, m.Tags()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) assert.Equal(t, "my_web_log", m.Name()) } @@ -124,7 +123,6 @@ func TestBuiltinInfluxdbHttpd(t *testing.T) { "resp_bytes": int64(0), "auth": "-", "client_ip": "::1", - "resp_code": int64(204), "http_version": float64(1.1), "ident": "-", "referrer": "-", @@ -133,7 +131,7 @@ func TestBuiltinInfluxdbHttpd(t *testing.T) { "agent": "InfluxDBClient", }, m.Fields()) - assert.Equal(t, map[string]string{"verb": "POST"}, m.Tags()) + assert.Equal(t, map[string]string{"verb": "POST", "resp_code": "204"}, m.Tags()) // Parse an influxdb GET request m, err = p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:12:10:02 +0100] "GET /query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h HTTP/1.1" 200 578 "http://localhost:8083/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36" 8a3806f1-3220-11e6-8006-000000000000 988`) @@ -144,7 +142,6 @@ func TestBuiltinInfluxdbHttpd(t *testing.T) { "resp_bytes": int64(578), "auth": "-", "client_ip": "::1", - "resp_code": int64(200), "http_version": float64(1.1), "ident": "-", "referrer": "http://localhost:8083/", @@ -153,7 +150,7 @@ func TestBuiltinInfluxdbHttpd(t *testing.T) { "agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36", }, m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET"}, m.Tags()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) } // common log format @@ -173,13 +170,12 @@ func TestBuiltinCommonLogFormat(t *testing.T) { "resp_bytes": int64(2326), "auth": "frank", "client_ip": "127.0.0.1", - "resp_code": int64(200), "http_version": float64(1.0), "ident": "user-identifier", "request": "/apache_pb.gif", }, m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET"}, m.Tags()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) } // combined log format @@ -199,7 +195,6 @@ func TestBuiltinCombinedLogFormat(t *testing.T) { "resp_bytes": int64(2326), "auth": "frank", "client_ip": "127.0.0.1", - "resp_code": int64(200), "http_version": float64(1.0), "ident": "user-identifier", "request": "/apache_pb.gif", @@ -207,7 +202,7 @@ func TestBuiltinCombinedLogFormat(t *testing.T) { "agent": "Mozilla", }, m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET"}, m.Tags()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) } func TestCompileStringAndParse(t *testing.T) { diff --git a/plugins/inputs/logparser/grok/influx_patterns.go b/plugins/inputs/logparser/grok/influx_patterns.go index 0622c61ef..53be0e20d 100644 --- a/plugins/inputs/logparser/grok/influx_patterns.go +++ b/plugins/inputs/logparser/grok/influx_patterns.go @@ -66,7 +66,7 @@ INFLUXDB_HTTPD_LOG \[httpd\] %{COMBINED_LOG_FORMAT} %{UUID:uuid:drop} %{NUMBER:r # apache & nginx logs, this is also known as the "common log format" # see https://en.wikipedia.org/wiki/Common_Log_Format -COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:int} (?:%{NUMBER:resp_bytes:int}|-) +COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) # Combined log format is the same as the common log format but with the addition # of two quoted strings at the end for "referrer" and "agent" diff --git a/plugins/inputs/logparser/grok/patterns/influx-patterns b/plugins/inputs/logparser/grok/patterns/influx-patterns index f4d375f4d..1db74a17a 100644 --- a/plugins/inputs/logparser/grok/patterns/influx-patterns +++ b/plugins/inputs/logparser/grok/patterns/influx-patterns @@ -62,7 +62,7 @@ INFLUXDB_HTTPD_LOG \[httpd\] %{COMBINED_LOG_FORMAT} %{UUID:uuid:drop} %{NUMBER:r # apache & nginx logs, this is also known as the "common log format" # see https://en.wikipedia.org/wiki/Common_Log_Format -COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:int} (?:%{NUMBER:resp_bytes:int}|-) +COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) # Combined log format is the same as the common log format but with the addition # of two quoted strings at the end for "referrer" and "agent" From dabb6f54663ca16f8c62d0f725fc3e302b98e87d Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Jul 2016 14:44:25 +0100 Subject: [PATCH 28/47] Internally name all patterns for log parsing flexibility closes #1436 This also fixes the bad behavior of waiting until runtime to return log parsing pattern compile errors when a pattern was simply unfound. closes #1418 Also protect against user error when the telegraf user does not have permission to open the provided file. We will now error and exit in this case, rather than silently waiting to get permission to open it. --- CHANGELOG.md | 2 ++ plugins/inputs/logparser/grok/grok.go | 22 ++++++++++-- plugins/inputs/logparser/grok/grok_test.go | 39 ++++++++++++++++++++-- plugins/inputs/logparser/logparser.go | 33 +++++++++--------- plugins/inputs/logparser/logparser_test.go | 7 ++-- plugins/inputs/tail/tail.go | 7 ++-- 6 files changed, 84 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0f0cca16..99e8ffe56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,6 +63,8 @@ should now look like: - [#1460](https://github.com/influxdata/telegraf/issues/1460): Remove PID from procstat plugin to fix cardinality issues. - [#1427](https://github.com/influxdata/telegraf/issues/1427): Cassandra input: version 2.x "column family" fix. - [#1463](https://github.com/influxdata/telegraf/issues/1463): Shared WaitGroup in Exec plugin +- [#1436](https://github.com/influxdata/telegraf/issues/1436): logparser: honor modifiers in "pattern" config. +- [#1418](https://github.com/influxdata/telegraf/issues/1418): logparser: error and exit on file permissions/missing errors. ## v1.0 beta 2 [2016-06-21] diff --git a/plugins/inputs/logparser/grok/grok.go b/plugins/inputs/logparser/grok/grok.go index 16e62b223..d8691d7b9 100644 --- a/plugins/inputs/logparser/grok/grok.go +++ b/plugins/inputs/logparser/grok/grok.go @@ -53,7 +53,12 @@ var ( ) type Parser struct { - Patterns []string + Patterns []string + // namedPatterns is a list of internally-assigned names to the patterns + // specified by the user in Patterns. + // They will look like: + // GROK_INTERNAL_PATTERN_0, GROK_INTERNAL_PATTERN_1, etc. + namedPatterns []string CustomPatterns string CustomPatternFiles []string Measurement string @@ -98,13 +103,24 @@ func (p *Parser) Compile() error { return err } - p.CustomPatterns = DEFAULT_PATTERNS + p.CustomPatterns + // Give Patterns fake names so that they can be treated as named + // "custom patterns" + p.namedPatterns = make([]string, len(p.Patterns)) + for i, pattern := range p.Patterns { + name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) + p.CustomPatterns += "\n" + name + " " + pattern + "\n" + p.namedPatterns[i] = "%{" + name + "}" + } + // Combine user-supplied CustomPatterns with DEFAULT_PATTERNS and parse + // them together as the same type of pattern. + p.CustomPatterns = DEFAULT_PATTERNS + p.CustomPatterns if len(p.CustomPatterns) != 0 { scanner := bufio.NewScanner(strings.NewReader(p.CustomPatterns)) p.addCustomPatterns(scanner) } + // Parse any custom pattern files supplied. for _, filename := range p.CustomPatternFiles { file, err := os.Open(filename) if err != nil { @@ -127,7 +143,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { var values map[string]string // the matching pattern string var patternName string - for _, pattern := range p.Patterns { + for _, pattern := range p.namedPatterns { if values, err = p.g.Parse(pattern, line); err != nil { return nil, err } diff --git a/plugins/inputs/logparser/grok/grok_test.go b/plugins/inputs/logparser/grok/grok_test.go index 1181e85ae..295f32609 100644 --- a/plugins/inputs/logparser/grok/grok_test.go +++ b/plugins/inputs/logparser/grok/grok_test.go @@ -207,7 +207,7 @@ func TestBuiltinCombinedLogFormat(t *testing.T) { func TestCompileStringAndParse(t *testing.T) { p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + Patterns: []string{"%{TEST_LOG_A}"}, CustomPatterns: ` DURATION %{NUMBER}[nuµm]?s RESPONSE_CODE %{NUMBER:response_code:tag} @@ -230,6 +230,41 @@ func TestCompileStringAndParse(t *testing.T) { assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) } +func TestCompileErrorsOnInvalidPattern(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatterns: ` + DURATION %{NUMBER}[nuµm]?s + RESPONSE_CODE %{NUMBER:response_code:tag} + RESPONSE_TIME %{DURATION:response_time:duration} + TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + `, + } + assert.Error(t, p.Compile()) + + metricA, _ := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) + require.Nil(t, metricA) +} + +func TestParsePatternsWithoutCustom(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{POSINT:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}"}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1466004605359052000 response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) + assert.Equal(t, time.Unix(0, 1466004605359052000), metricA.Time()) +} + func TestParseEpochNano(t *testing.T) { p := &Parser{ Patterns: []string{"%{MYAPP}"}, @@ -413,7 +448,7 @@ func TestParseErrors(t *testing.T) { TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int} %{} `, } - assert.NoError(t, p.Compile()) + assert.Error(t, p.Compile()) _, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`) assert.Error(t, err) diff --git a/plugins/inputs/logparser/logparser.go b/plugins/inputs/logparser/logparser.go index 4737ace65..6b29ea031 100644 --- a/plugins/inputs/logparser/logparser.go +++ b/plugins/inputs/logparser/logparser.go @@ -9,6 +9,7 @@ import ( "github.com/hpcloud/tail" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/internal/globpath" "github.com/influxdata/telegraf/plugins/inputs" @@ -110,11 +111,15 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { } // compile log parser patterns: + errChan := errchan.New(len(l.parsers)) for _, parser := range l.parsers { if err := parser.Compile(); err != nil { - return err + errChan.C <- err } } + if err := errChan.Error(); err != nil { + return err + } var seek tail.SeekInfo if !l.FromBeginning { @@ -125,24 +130,25 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { l.wg.Add(1) go l.parser() - var errS string // Create a "tailer" for each file for _, filepath := range l.Files { g, err := globpath.Compile(filepath) if err != nil { log.Printf("ERROR Glob %s failed to compile, %s", filepath, err) + continue } - for file, _ := range g.Match() { + files := g.Match() + errChan = errchan.New(len(files)) + for file, _ := range files { tailer, err := tail.TailFile(file, tail.Config{ - ReOpen: true, - Follow: true, - Location: &seek, + ReOpen: true, + Follow: true, + Location: &seek, + MustExist: true, }) - if err != nil { - errS += err.Error() + " " - continue - } + errChan.C <- err + // create a goroutine for each "tailer" l.wg.Add(1) go l.receiver(tailer) @@ -150,10 +156,7 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { } } - if errS != "" { - return fmt.Errorf(errS) - } - return nil + return errChan.Error() } // receiver is launched as a goroutine to continuously watch a tailed logfile @@ -201,8 +204,6 @@ func (l *LogParserPlugin) parser() { if m != nil { l.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) } - } else { - log.Printf("Malformed log line in [%s], Error: %s\n", line, err) } } } diff --git a/plugins/inputs/logparser/logparser_test.go b/plugins/inputs/logparser/logparser_test.go index 095b627ef..97f33067e 100644 --- a/plugins/inputs/logparser/logparser_test.go +++ b/plugins/inputs/logparser/logparser_test.go @@ -37,7 +37,7 @@ func TestGrokParseLogFilesNonExistPattern(t *testing.T) { } acc := testutil.Accumulator{} - assert.NoError(t, logparser.Start(&acc)) + assert.Error(t, logparser.Start(&acc)) time.Sleep(time.Millisecond * 500) logparser.Stop() @@ -80,6 +80,8 @@ func TestGrokParseLogFiles(t *testing.T) { map[string]string{}) } +// Test that test_a.log line gets parsed even though we don't have the correct +// pattern available for test_b.log func TestGrokParseLogFilesOneBad(t *testing.T) { thisdir := getCurrentDir() p := &grok.Parser{ @@ -90,11 +92,12 @@ func TestGrokParseLogFilesOneBad(t *testing.T) { logparser := &LogParserPlugin{ FromBeginning: true, - Files: []string{thisdir + "grok/testdata/*.log"}, + Files: []string{thisdir + "grok/testdata/test_a.log"}, GrokParser: p, } acc := testutil.Accumulator{} + acc.SetDebug(true) assert.NoError(t, logparser.Start(&acc)) time.Sleep(time.Millisecond * 500) diff --git a/plugins/inputs/tail/tail.go b/plugins/inputs/tail/tail.go index 7386e053d..942fd6bae 100644 --- a/plugins/inputs/tail/tail.go +++ b/plugins/inputs/tail/tail.go @@ -86,9 +86,10 @@ func (t *Tail) Start(acc telegraf.Accumulator) error { for file, _ := range g.Match() { tailer, err := tail.TailFile(file, tail.Config{ - ReOpen: true, - Follow: true, - Location: &seek, + ReOpen: true, + Follow: true, + Location: &seek, + MustExist: true, }) if err != nil { errS += err.Error() + " " From b58cd78c79f3326bd6be9b76a286f4a5ac8a5fcd Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Jul 2016 17:26:44 +0100 Subject: [PATCH 29/47] Use errchan in redis input plugin this may address, or at least log issue #1462 --- plugins/inputs/redis/redis.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/plugins/inputs/redis/redis.go b/plugins/inputs/redis/redis.go index fc50387df..649786c2c 100644 --- a/plugins/inputs/redis/redis.go +++ b/plugins/inputs/redis/redis.go @@ -12,6 +12,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -96,9 +97,7 @@ func (r *Redis) Gather(acc telegraf.Accumulator) error { } var wg sync.WaitGroup - - var outerr error - + errChan := errchan.New(len(r.Servers)) for _, serv := range r.Servers { if !strings.HasPrefix(serv, "tcp://") || !strings.HasPrefix(serv, "unix://") { serv = "tcp://" + serv @@ -123,13 +122,12 @@ func (r *Redis) Gather(acc telegraf.Accumulator) error { wg.Add(1) go func(serv string) { defer wg.Done() - outerr = r.gatherServer(u, acc) + errChan.C <- r.gatherServer(u, acc) }(serv) } wg.Wait() - - return outerr + return errChan.Error() } func (r *Redis) gatherServer(addr *url.URL, acc telegraf.Accumulator) error { From 03d02fa67a06b73614cae657f36adb8dd7e147ba Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Jul 2016 17:37:21 +0100 Subject: [PATCH 30/47] Telegraf v1.0 beta 3 --- CHANGELOG.md | 2 ++ Godeps | 1 + README.md | 18 +++++++++--------- plugins/inputs/aerospike/aerospike.go | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99e8ffe56..5aa149a89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ ## v1.0 [unreleased] +## v1.0 beta 3 [2016-07-18] + ### Release Notes **Breaking Change**: Aerospike main server node measurements have been renamed diff --git a/Godeps b/Godeps index 1546bb627..5caa6a9e2 100644 --- a/Godeps +++ b/Godeps @@ -46,6 +46,7 @@ github.com/prometheus/procfs 406e5b7bfd8201a36e2bb5f7bdae0b03380c2ce8 github.com/samuel/go-zookeeper 218e9c81c0dd8b3b18172b2bbfad92cc7d6db55f github.com/shirou/gopsutil 586bb697f3ec9f8ec08ffefe18f521a64534037c github.com/soniah/gosnmp b1b4f885b12c5dcbd021c5cee1c904110de6db7d +github.com/sparrc/aerospike-client-go d4bb42d2c2d39dae68e054116f4538af189e05d5 github.com/streadway/amqp b4f3ceab0337f013208d31348b578d83c0064744 github.com/stretchr/testify 1f4a1643a57e798696635ea4c126e9127adb7d3c github.com/vjeantet/grok 83bfdfdfd1a8146795b28e547a8e3c8b28a466c2 diff --git a/README.md b/README.md index 738f9eaea..aa8d9e039 100644 --- a/README.md +++ b/README.md @@ -20,12 +20,12 @@ new plugins. ### Linux deb and rpm Packages: Latest: -* https://dl.influxdata.com/telegraf/releases/telegraf_1.0.0-beta2_amd64.deb -* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_beta2.x86_64.rpm +* https://dl.influxdata.com/telegraf/releases/telegraf_1.0.0-beta3_amd64.deb +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_beta3.x86_64.rpm Latest (arm): -* https://dl.influxdata.com/telegraf/releases/telegraf_1.0.0-beta2_armhf.deb -* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_beta2.armhf.rpm +* https://dl.influxdata.com/telegraf/releases/telegraf_1.0.0-beta3_armhf.deb +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_beta3.armhf.rpm ##### Package Instructions: @@ -46,14 +46,14 @@ to use this repo to install & update telegraf. ### Linux tarballs: Latest: -* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta2_linux_amd64.tar.gz -* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta2_linux_i386.tar.gz -* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta2_linux_armhf.tar.gz +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta3_linux_amd64.tar.gz +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta3_linux_i386.tar.gz +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta3_linux_armhf.tar.gz ### FreeBSD tarball: Latest: -* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta2_freebsd_amd64.tar.gz +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta3_freebsd_amd64.tar.gz ### Ansible Role: @@ -69,7 +69,7 @@ brew install telegraf ### Windows Binaries (EXPERIMENTAL) Latest: -* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta2_windows_amd64.zip +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta3_windows_amd64.zip ### From Source: diff --git a/plugins/inputs/aerospike/aerospike.go b/plugins/inputs/aerospike/aerospike.go index 4bb652c0a..29e51cb82 100644 --- a/plugins/inputs/aerospike/aerospike.go +++ b/plugins/inputs/aerospike/aerospike.go @@ -11,7 +11,7 @@ import ( "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" - as "github.com/aerospike/aerospike-client-go" + as "github.com/sparrc/aerospike-client-go" ) type Aerospike struct { From 375710488df06ce5f2b6af4d234a65a64585fae8 Mon Sep 17 00:00:00 2001 From: Matt Jones Date: Tue, 19 Jul 2016 05:24:06 -0400 Subject: [PATCH 31/47] Add support for self-signed certs to RabbitMQ input plugin (#1503) * add initial support to allow self-signed certs When using self-signed the metrics collection will fail, this will allow the user to specify in the input configuration file if they want to skip certificate verification. This is functionally identical to `curl -k` At some point this functionality should be moved to the agent as it is already implemented identically in several different input plugins. * Add initial comment strings to remove noise These should be properly fleshed out at some point to ensure code completeness * refactor to use generic helper function * fix import statement against fork * update changelog --- CHANGELOG.md | 1 + plugins/inputs/rabbitmq/rabbitmq.go | 57 ++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5aa149a89..517abea96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ should now look like: ### Features +- [#1503](https://github.com/influxdata/telegraf/pull/1503): Add tls support for certs to RabbitMQ input plugin - [#1289](https://github.com/influxdata/telegraf/pull/1289): webhooks input plugin. Thanks @francois2metz and @cduez! - [#1247](https://github.com/influxdata/telegraf/pull/1247): rollbar webhook plugin. - [#1408](https://github.com/influxdata/telegraf/pull/1408): mandrill webhook plugin. diff --git a/plugins/inputs/rabbitmq/rabbitmq.go b/plugins/inputs/rabbitmq/rabbitmq.go index 18d666a08..8a879d179 100644 --- a/plugins/inputs/rabbitmq/rabbitmq.go +++ b/plugins/inputs/rabbitmq/rabbitmq.go @@ -9,35 +9,59 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) +// DefaultUsername will set a default value that corrasponds to the default +// value used by Rabbitmq const DefaultUsername = "guest" + +// DefaultPassword will set a default value that corrasponds to the default +// value used by Rabbitmq const DefaultPassword = "guest" + +// DefaultURL will set a default value that corrasponds to the default value +// used by Rabbitmq const DefaultURL = "http://localhost:15672" +// RabbitMQ defines the configuration necessary for gathering metrics, +// see the sample config for further details type RabbitMQ struct { URL string Name string Username string Password string - Nodes []string - Queues []string + // Path to CA file + SSLCA string `toml:"ssl_ca"` + // Path to host cert file + SSLCert string `toml:"ssl_cert"` + // Path to cert key file + SSLKey string `toml:"ssl_key"` + // Use SSL but skip chain & host verification + InsecureSkipVerify bool + + // InsecureSkipVerify bool + Nodes []string + Queues []string Client *http.Client } +// OverviewResponse ... type OverviewResponse struct { MessageStats *MessageStats `json:"message_stats"` ObjectTotals *ObjectTotals `json:"object_totals"` QueueTotals *QueueTotals `json:"queue_totals"` } +// Details ... type Details struct { Rate float64 } +// MessageStats ... type MessageStats struct { Ack int64 AckDetails Details `json:"ack_details"` @@ -51,6 +75,7 @@ type MessageStats struct { RedeliverDetails Details `json:"redeliver_details"` } +// ObjectTotals ... type ObjectTotals struct { Channels int64 Connections int64 @@ -59,6 +84,7 @@ type ObjectTotals struct { Queues int64 } +// QueueTotals ... type QueueTotals struct { Messages int64 MessagesReady int64 `json:"messages_ready"` @@ -66,10 +92,11 @@ type QueueTotals struct { MessageBytes int64 `json:"message_bytes"` MessageBytesReady int64 `json:"message_bytes_ready"` MessageBytesUnacknowledged int64 `json:"message_bytes_unacknowledged"` - MessageRam int64 `json:"message_bytes_ram"` + MessageRAM int64 `json:"message_bytes_ram"` MessagePersistent int64 `json:"message_bytes_persistent"` } +// Queue ... type Queue struct { QueueTotals // just to not repeat the same code MessageStats `json:"message_stats"` @@ -83,6 +110,7 @@ type Queue struct { AutoDelete bool `json:"auto_delete"` } +// Node ... type Node struct { Name string @@ -99,6 +127,7 @@ type Node struct { SocketsUsed int64 `json:"sockets_used"` } +// gatherFunc ... type gatherFunc func(r *RabbitMQ, acc telegraf.Accumulator, errChan chan error) var gatherFunctions = []gatherFunc{gatherOverview, gatherNodes, gatherQueues} @@ -109,22 +138,40 @@ var sampleConfig = ` # username = "guest" # password = "guest" + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false + ## A list of nodes to pull metrics about. If not specified, metrics for ## all nodes are gathered. # nodes = ["rabbit@node1", "rabbit@node2"] ` +// SampleConfig ... func (r *RabbitMQ) SampleConfig() string { return sampleConfig } +// Description ... func (r *RabbitMQ) Description() string { return "Read metrics from one or many RabbitMQ servers via the management API" } +// Gather ... func (r *RabbitMQ) Gather(acc telegraf.Accumulator) error { if r.Client == nil { - tr := &http.Transport{ResponseHeaderTimeout: time.Duration(3 * time.Second)} + tlsCfg, err := internal.GetTLSConfig( + r.SSLCert, r.SSLKey, r.SSLCA, r.InsecureSkipVerify) + if err != nil { + return err + } + tr := &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), + TLSClientConfig: tlsCfg, + } r.Client = &http.Client{ Transport: tr, Timeout: time.Duration(4 * time.Second), @@ -286,7 +333,7 @@ func gatherQueues(r *RabbitMQ, acc telegraf.Accumulator, errChan chan error) { "message_bytes": queue.MessageBytes, "message_bytes_ready": queue.MessageBytesReady, "message_bytes_unacked": queue.MessageBytesUnacknowledged, - "message_bytes_ram": queue.MessageRam, + "message_bytes_ram": queue.MessageRAM, "message_bytes_persist": queue.MessagePersistent, "messages": queue.Messages, "messages_ready": queue.MessagesReady, From 0be69b8a44aa56fa012b7a24d384de411ad8c962 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20GERMAIN?= Date: Fri, 15 Jul 2016 13:35:32 +0000 Subject: [PATCH 32/47] Make the user able to specify full path for HAproxy stats closes #1499 closes #1019 Do no try to guess HAproxy stats url, just add ";csv" at the end of the url if not present. Signed-off-by: tgermain --- CHANGELOG.md | 1 + plugins/inputs/haproxy/haproxy.go | 18 ++++++++++++------ plugins/inputs/haproxy/haproxy_test.go | 2 +- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 517abea96..60949047f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,7 @@ should now look like: - [#1463](https://github.com/influxdata/telegraf/issues/1463): Shared WaitGroup in Exec plugin - [#1436](https://github.com/influxdata/telegraf/issues/1436): logparser: honor modifiers in "pattern" config. - [#1418](https://github.com/influxdata/telegraf/issues/1418): logparser: error and exit on file permissions/missing errors. +- [#1499](https://github.com/influxdata/telegraf/pull/1499): Make the user able to specify full path for HAproxy stats ## v1.0 beta 2 [2016-06-21] diff --git a/plugins/inputs/haproxy/haproxy.go b/plugins/inputs/haproxy/haproxy.go index 0a0b3da82..9529bad3f 100644 --- a/plugins/inputs/haproxy/haproxy.go +++ b/plugins/inputs/haproxy/haproxy.go @@ -92,9 +92,11 @@ type haproxy struct { var sampleConfig = ` ## An array of address to gather stats about. Specify an ip on hostname ## with optional port. ie localhost, 10.10.3.33:1936, etc. - - ## If no servers are specified, then default to 127.0.0.1:1936 - servers = ["http://myhaproxy.com:1936", "http://anotherhaproxy.com:1936"] + ## Make sure you specify the complete path to the stats endpoint + ## ie 10.10.3.33:1936/haproxy?stats + # + ## If no servers are specified, then default to 127.0.0.1:1936/haproxy?stats + servers = ["http://myhaproxy.com:1936/haproxy?stats"] ## Or you can also use local socket ## servers = ["socket:/run/haproxy/admin.sock"] ` @@ -111,7 +113,7 @@ func (r *haproxy) Description() string { // Returns one of the errors encountered while gather stats (if any). func (g *haproxy) Gather(acc telegraf.Accumulator) error { if len(g.Servers) == 0 { - return g.gatherServer("http://127.0.0.1:1936", acc) + return g.gatherServer("http://127.0.0.1:1936/haproxy?stats", acc) } var wg sync.WaitGroup @@ -167,12 +169,16 @@ func (g *haproxy) gatherServer(addr string, acc telegraf.Accumulator) error { g.client = client } + if !strings.HasSuffix(addr, ";csv") { + addr += "/;csv" + } + u, err := url.Parse(addr) if err != nil { return fmt.Errorf("Unable parse server address '%s': %s", addr, err) } - req, err := http.NewRequest("GET", fmt.Sprintf("%s://%s%s/;csv", u.Scheme, u.Host, u.Path), nil) + req, err := http.NewRequest("GET", addr, nil) if u.User != nil { p, _ := u.User.Password() req.SetBasicAuth(u.User.Username(), p) @@ -184,7 +190,7 @@ func (g *haproxy) gatherServer(addr string, acc telegraf.Accumulator) error { } if res.StatusCode != 200 { - return fmt.Errorf("Unable to get valid stat result from '%s': %s", addr, err) + return fmt.Errorf("Unable to get valid stat result from '%s', http response code : %d", addr, res.StatusCode) } return importCsvResult(res.Body, acc, u.Host) diff --git a/plugins/inputs/haproxy/haproxy_test.go b/plugins/inputs/haproxy/haproxy_test.go index f9057e0cd..befcabd97 100644 --- a/plugins/inputs/haproxy/haproxy_test.go +++ b/plugins/inputs/haproxy/haproxy_test.go @@ -243,7 +243,7 @@ func TestHaproxyDefaultGetFromLocalhost(t *testing.T) { err := r.Gather(&acc) require.Error(t, err) - assert.Contains(t, err.Error(), "127.0.0.1:1936/;csv") + assert.Contains(t, err.Error(), "127.0.0.1:1936/haproxy?stats/;csv") } const csvOutputSample = ` From 5f14ad9fa1e0b375552b6412d3079d5743e756e9 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 19 Jul 2016 11:15:09 +0100 Subject: [PATCH 33/47] clean up and finish aerospike refactor & readme --- plugins/inputs/aerospike/README.md | 280 ++++---------------------- plugins/inputs/aerospike/aerospike.go | 31 ++- plugins/inputs/ceph/README.md | 14 +- 3 files changed, 63 insertions(+), 262 deletions(-) diff --git a/plugins/inputs/aerospike/README.md b/plugins/inputs/aerospike/README.md index 6fb6bb189..60c470cd3 100644 --- a/plugins/inputs/aerospike/README.md +++ b/plugins/inputs/aerospike/README.md @@ -1,265 +1,55 @@ -## Telegraf Plugin: Aerospike +# Aerospike Input Plugin -#### Plugin arguments: -- **servers** string array: List of aerospike servers to query (def: 127.0.0.1:3000) - -#### Description - -The aerospike plugin queries aerospike server(s) and get node statistics. It also collects stats for +The aerospike plugin queries aerospike server(s) and get node statistics & stats for all the configured namespaces. For what the measurements mean, please consult the [Aerospike Metrics Reference Docs](http://www.aerospike.com/docs/reference/metrics). The metric names, to make it less complicated in querying, have replaced all `-` with `_` as Aerospike metrics come in both forms (no idea why). -# Measurements: -#### Aerospike Statistics [values]: +All metrics are attempted to be cast to integers, then booleans, then strings. -Meta: -- units: Integer +### Measurements: -Measurement names: -- batch_index_queue -- batch_index_unused_buffers -- batch_queue -- batch_tree_count -- client_connections -- data_used_bytes_memory -- index_used_bytes_memory -- info_queue -- migrate_progress_recv -- migrate_progress_send -- migrate_rx_objs -- migrate_tx_objs -- objects -- ongoing_write_reqs -- partition_absent -- partition_actual -- partition_desync -- partition_object_count -- partition_ref_count -- partition_replica -- proxy_in_progress -- query_agg_avg_rec_count -- query_avg_rec_count -- query_lookup_avg_rec_count -- queue -- record_locks -- record_refs -- sindex_used_bytes_memory -- sindex_gc_garbage_cleaned -- system_free_mem_pct -- total_bytes_disk -- total_bytes_memory -- tree_count -- scans_active -- uptime -- used_bytes_disk -- used_bytes_memory -- cluster_size -- waiting_transactions +The aerospike metrics are under two measurement names: -#### Aerospike Statistics [cumulative]: +***aerospike_node***: These are the aerospike **node** measurements, which are +available from the aerospike `statistics` command. -Meta: -- units: Integer + ie, + ``` + telnet localhost 3003 + statistics + ... + ``` -Measurement names: -- batch_errors -- batch_index_complete -- batch_index_errors -- batch_index_initiate -- batch_index_timeout -- batch_initiate -- batch_timeout -- err_duplicate_proxy_request -- err_out_of_space -- err_replica_non_null_node -- err_replica_null_node -- err_rw_cant_put_unique -- err_rw_pending_limit -- err_rw_request_not_found -- err_storage_queue_full -- err_sync_copy_null_master -- err_sync_copy_null_node -- err_tsvc_requests -- err_write_fail_bin_exists -- err_write_fail_generation -- err_write_fail_generation_xdr -- err_write_fail_incompatible_type -- err_write_fail_key_exists -- err_write_fail_key_mismatch -- err_write_fail_not_found -- err_write_fail_noxdr -- err_write_fail_parameter -- err_write_fail_prole_delete -- err_write_fail_prole_generation -- err_write_fail_prole_unknown -- err_write_fail_unknown -- fabric_msgs_rcvd -- fabric_msgs_sent -- heartbeat_received_foreign -- heartbeat_received_self -- migrate_msgs_recv -- migrate_msgs_sent -- migrate_num_incoming_accepted -- migrate_num_incoming_refused -- proxy_action -- proxy_initiate -- proxy_retry -- proxy_retry_new_dest -- proxy_retry_q_full -- proxy_retry_same_dest -- proxy_unproxy -- query_abort -- query_agg -- query_agg_abort -- query_agg_err -- query_agg_success -- query_bad_records -- query_fail -- query_long_queue_full -- query_long_running -- query_lookup_abort -- query_lookup_err -- query_lookups -- query_lookup_success -- query_reqs -- query_short_queue_full -- query_short_running -- query_success -- query_tracked -- read_dup_prole -- reaped_fds -- rw_err_ack_badnode -- rw_err_ack_internal -- rw_err_ack_nomatch -- rw_err_dup_cluster_key -- rw_err_dup_internal -- rw_err_dup_send -- rw_err_write_cluster_key -- rw_err_write_internal -- rw_err_write_send -- sindex_ucgarbage_found -- sindex_gc_locktimedout -- sindex_gc_inactivity_dur -- sindex_gc_activity_dur -- sindex_gc_list_creation_time -- sindex_gc_list_deletion_time -- sindex_gc_objects_validated -- sindex_gc_garbage_found -- stat_cluster_key_err_ack_dup_trans_reenqueue -- stat_cluster_key_err_ack_rw_trans_reenqueue -- stat_cluster_key_prole_retry -- stat_cluster_key_regular_processed -- stat_cluster_key_trans_to_proxy_retry -- stat_deleted_set_object -- stat_delete_success -- stat_duplicate_operation -- stat_evicted_objects -- stat_evicted_objects_time -- stat_evicted_set_objects -- stat_expired_objects -- stat_nsup_deletes_not_shipped -- stat_proxy_errs -- stat_proxy_reqs -- stat_proxy_reqs_xdr -- stat_proxy_success -- stat_read_errs_notfound -- stat_read_errs_other -- stat_read_reqs -- stat_read_reqs_xdr -- stat_read_success -- stat_rw_timeout -- stat_slow_trans_queue_batch_pop -- stat_slow_trans_queue_pop -- stat_slow_trans_queue_push -- stat_write_errs -- stat_write_errs_notfound -- stat_write_errs_other -- stat_write_reqs -- stat_write_reqs_xdr -- stat_write_success -- stat_xdr_pipe_miss -- stat_xdr_pipe_writes -- stat_zero_bin_records -- storage_defrag_corrupt_record -- storage_defrag_wait -- transactions -- basic_scans_succeeded -- basic_scans_failed -- aggr_scans_succeeded -- aggr_scans_failed -- udf_bg_scans_succeeded -- udf_bg_scans_failed -- udf_delete_err_others -- udf_delete_reqs -- udf_delete_success -- udf_lua_errs -- udf_query_rec_reqs -- udf_read_errs_other -- udf_read_reqs -- udf_read_success -- udf_replica_writes -- udf_scan_rec_reqs -- udf_write_err_others -- udf_write_reqs -- udf_write_success -- write_master -- write_prole +***aerospike_namespace***: These are aerospike namespace measurements, which +are available from the aerospike `namespace/` command. -#### Aerospike Statistics [percentage]: + ie, + ``` + telnet localhost 3003 + namespaces + ;;etc. + namespace/ + ... + ``` -Meta: -- units: percent (out of 100) +### Tags: -Measurement names: -- free_pct_disk -- free_pct_memory +All measurements have tags: -# Measurements: -#### Aerospike Namespace Statistics [values]: +- aerospike_host -Meta: -- units: Integer -- tags: `namespace=` +Namespace metrics have tags: -Measurement names: -- available_bin_names -- available_pct -- current_time -- data_used_bytes_memory -- index_used_bytes_memory -- master_objects -- max_evicted_ttl -- max_void_time -- non_expirable_objects -- objects -- prole_objects -- sindex_used_bytes_memory -- total_bytes_disk -- total_bytes_memory -- used_bytes_disk -- used_bytes_memory +- namespace_name -#### Aerospike Namespace Statistics [cumulative]: +### Example Output: -Meta: -- units: Integer -- tags: `namespace=` - -Measurement names: -- evicted_objects -- expired_objects -- set_deleted_objects -- set_evicted_objects - -#### Aerospike Namespace Statistics [percentage]: - -Meta: -- units: percent (out of 100) -- tags: `namespace=` - -Measurement names: -- free_pct_disk -- free_pct_memory +``` +% telegraf --config ~/db/ws/telegraf.conf --input-filter aerospike --test +* Plugin: aerospike, Collection 1 +> aerospike_node,aerospike_host=localhost:3000,host=tars batch_error=0i,batch_index_complete=0i,batch_index_created_buffers=0i,batch_index_destroyed_buffers=0i,batch_index_error=0i,batch_index_huge_buffers=0i,batch_index_initiate=0i,batch_index_queue="0:0,0:0,0:0,0:0",batch_index_timeout=0i,batch_index_unused_buffers=0i,batch_initiate=0i,batch_queue=0i,batch_timeout=0i,client_connections=6i,cluster_integrity=true,cluster_key="8AF422E05281249E",cluster_size=1i,delete_queue=0i,demarshal_error=0i,early_tsvc_batch_sub_error=0i,early_tsvc_client_error=0i,early_tsvc_udf_sub_error=0i,fabric_connections=16i,fabric_msgs_rcvd=0i,fabric_msgs_sent=0i,heartbeat_connections=0i,heartbeat_received_foreign=0i,heartbeat_received_self=0i,info_complete=47i,info_queue=0i,migrate_allowed=true,migrate_partitions_remaining=0i,migrate_progress_recv=0i,migrate_progress_send=0i,node_name="BB9020011AC4202",objects=0i,paxos_principal="BB9020011AC4202",proxy_in_progress=0i,proxy_retry=0i,query_long_running=0i,query_short_running=0i,reaped_fds=0i,record_refs=0i,rw_in_progress=0i,scans_active=0i,sindex_gc_activity_dur=0i,sindex_gc_garbage_cleaned=0i,sindex_gc_garbage_found=0i,sindex_gc_inactivity_dur=0i,sindex_gc_list_creation_time=0i,sindex_gc_list_deletion_time=0i,sindex_gc_locktimedout=0i,sindex_gc_objects_validated=0i,sindex_ucgarbage_found=0i,sub_objects=0i,system_free_mem_pct=92i,system_swapping=false,tsvc_queue=0i,uptime=1457i 1468923222000000000 +> aerospike_namespace,aerospike_host=localhost:3000,host=tars,namespace=test allow_nonxdr_writes=true,allow_xdr_writes=true,available_bin_names=32768i,batch_sub_proxy_complete=0i,batch_sub_proxy_error=0i,batch_sub_proxy_timeout=0i,batch_sub_read_error=0i,batch_sub_read_not_found=0i,batch_sub_read_success=0i,batch_sub_read_timeout=0i,batch_sub_tsvc_error=0i,batch_sub_tsvc_timeout=0i,client_delete_error=0i,client_delete_not_found=0i,client_delete_success=0i,client_delete_timeout=0i,client_lang_delete_success=0i,client_lang_error=0i,client_lang_read_success=0i,client_lang_write_success=0i,client_proxy_complete=0i,client_proxy_error=0i,client_proxy_timeout=0i,client_read_error=0i,client_read_not_found=0i,client_read_success=0i,client_read_timeout=0i,client_tsvc_error=0i,client_tsvc_timeout=0i,client_udf_complete=0i,client_udf_error=0i,client_udf_timeout=0i,client_write_error=0i,client_write_success=0i,client_write_timeout=0i,cold_start_evict_ttl=4294967295i,conflict_resolution_policy="generation",current_time=206619222i,data_in_index=false,default_ttl=432000i,device_available_pct=99i,device_free_pct=100i,device_total_bytes=4294967296i,device_used_bytes=0i,disallow_null_setname=false,enable_benchmarks_batch_sub=false,enable_benchmarks_read=false,enable_benchmarks_storage=false,enable_benchmarks_udf=false,enable_benchmarks_udf_sub=false,enable_benchmarks_write=false,enable_hist_proxy=false,enable_xdr=false,evict_hist_buckets=10000i,evict_tenths_pct=5i,evict_ttl=0i,evicted_objects=0i,expired_objects=0i,fail_generation=0i,fail_key_busy=0i,fail_record_too_big=0i,fail_xdr_forbidden=0i,geo2dsphere_within.earth_radius_meters=6371000i,geo2dsphere_within.level_mod=1i,geo2dsphere_within.max_cells=12i,geo2dsphere_within.max_level=30i,geo2dsphere_within.min_level=1i,geo2dsphere_within.strict=true,geo_region_query_cells=0i,geo_region_query_falsepos=0i,geo_region_query_points=0i,geo_region_query_reqs=0i,high_water_disk_pct=50i,high_water_memory_pct=60i,hwm_breached=false,ldt_enabled=false,ldt_gc_rate=0i,ldt_page_size=8192i,master_objects=0i,master_sub_objects=0i,max_ttl=315360000i,max_void_time=0i,memory_free_pct=100i,memory_size=1073741824i,memory_used_bytes=0i,memory_used_data_bytes=0i,memory_used_index_bytes=0i,memory_used_sindex_bytes=0i,migrate_order=5i,migrate_record_receives=0i,migrate_record_retransmits=0i,migrate_records_skipped=0i,migrate_records_transmitted=0i,migrate_rx_instances=0i,migrate_rx_partitions_active=0i,migrate_rx_partitions_initial=0i,migrate_rx_partitions_remaining=0i,migrate_sleep=1i,migrate_tx_instances=0i,migrate_tx_partitions_active=0i,migrate_tx_partitions_imbalance=0i,migrate_tx_partitions_initial=0i,migrate_tx_partitions_remaining=0i,node_name="BB9020011AC4202",non_expirable_objects=0i,ns_forward_xdr_writes=false,nsup_cycle_duration=0i,nsup_cycle_sleep_pct=0i,objects=0i,prole_objects=0i,prole_sub_objects=0i,query_agg=0i,query_agg_abort=0i,query_agg_avg_rec_count=0i,query_agg_error=0i,query_agg_success=0i,query_fail=0i,query_long_queue_full=0i,query_long_reqs=0i,query_lookup_abort=0i,query_lookup_avg_rec_count=0i,query_lookup_error=0i,query_lookup_success=0i,query_lookups=0i,query_reqs=0i,query_short_queue_full=0i,query_short_reqs=0i,query_udf_bg_failure=0i,query_udf_bg_success=0i,read_consistency_level_override="off",repl_factor=1i,scan_aggr_abort=0i,scan_aggr_complete=0i,scan_aggr_error=0i,scan_basic_abort=0i,scan_basic_complete=0i,scan_basic_error=0i,scan_udf_bg_abort=0i,scan_udf_bg_complete=0i,scan_udf_bg_error=0i,set_deleted_objects=0i,sets_enable_xdr=true,sindex.data_max_memory="ULONG_MAX",sindex.num_partitions=32i,single_bin=false,stop_writes=false,stop_writes_pct=90i,storage_engine="device",storage_engine.cold_start_empty=false,storage_engine.data_in_memory=true,storage_engine.defrag_lwm_pct=50i,storage_engine.defrag_queue_min=0i,storage_engine.defrag_sleep=1000i,storage_engine.defrag_startup_minimum=10i,storage_engine.disable_odirect=false,storage_engine.enable_osync=false,storage_engine.file="/opt/aerospike/data/test.dat",storage_engine.filesize=4294967296i,storage_engine.flush_max_ms=1000i,storage_engine.fsync_max_sec=0i,storage_engine.max_write_cache=67108864i,storage_engine.min_avail_pct=5i,storage_engine.post_write_queue=0i,storage_engine.scheduler_mode="null",storage_engine.write_block_size=1048576i,storage_engine.write_threads=1i,sub_objects=0i,udf_sub_lang_delete_success=0i,udf_sub_lang_error=0i,udf_sub_lang_read_success=0i,udf_sub_lang_write_success=0i,udf_sub_tsvc_error=0i,udf_sub_tsvc_timeout=0i,udf_sub_udf_complete=0i,udf_sub_udf_error=0i,udf_sub_udf_timeout=0i,write_commit_level_override="off",xdr_write_error=0i,xdr_write_success=0i,xdr_write_timeout=0i,{test}_query_hist_track_back=300i,{test}_query_hist_track_slice=10i,{test}_query_hist_track_thresholds="1,8,64",{test}_read_hist_track_back=300i,{test}_read_hist_track_slice=10i,{test}_read_hist_track_thresholds="1,8,64",{test}_udf_hist_track_back=300i,{test}_udf_hist_track_slice=10i,{test}_udf_hist_track_thresholds="1,8,64",{test}_write_hist_track_back=300i,{test}_write_hist_track_slice=10i,{test}_write_hist_track_thresholds="1,8,64" 1468923222000000000 +``` \ No newline at end of file diff --git a/plugins/inputs/aerospike/aerospike.go b/plugins/inputs/aerospike/aerospike.go index 29e51cb82..eb608723e 100644 --- a/plugins/inputs/aerospike/aerospike.go +++ b/plugins/inputs/aerospike/aerospike.go @@ -72,18 +72,17 @@ func (a *Aerospike) gatherServer(hostport string, acc telegraf.Accumulator) erro nodes := c.GetNodes() for _, n := range nodes { tags := map[string]string{ - "node_name": n.GetName(), "aerospike_host": hostport, } - fields := make(map[string]interface{}) + fields := map[string]interface{}{ + "node_name": n.GetName(), + } stats, err := as.RequestNodeStats(n) if err != nil { return err } for k, v := range stats { - if iv, err := strconv.ParseInt(v, 10, 64); err == nil { - fields[strings.Replace(k, "-", "_", -1)] = iv - } + fields[strings.Replace(k, "-", "_", -1)] = parseValue(v) } acc.AddFields("aerospike_node", fields, tags, time.Now()) @@ -94,9 +93,13 @@ func (a *Aerospike) gatherServer(hostport string, acc telegraf.Accumulator) erro namespaces := strings.Split(info["namespaces"], ";") for _, namespace := range namespaces { - nTags := copyTags(tags) + nTags := map[string]string{ + "aerospike_host": hostport, + } nTags["namespace"] = namespace - nFields := make(map[string]interface{}) + nFields := map[string]interface{}{ + "node_name": n.GetName(), + } info, err := as.RequestNodeInfo(n, "namespace/"+namespace) if err != nil { continue @@ -107,9 +110,7 @@ func (a *Aerospike) gatherServer(hostport string, acc telegraf.Accumulator) erro if len(parts) < 2 { continue } - if iv, err := strconv.ParseInt(parts[1], 10, 64); err == nil { - nFields[strings.Replace(parts[0], "-", "_", -1)] = iv - } + nFields[strings.Replace(parts[0], "-", "_", -1)] = parseValue(parts[1]) } acc.AddFields("aerospike_namespace", nFields, nTags, time.Now()) } @@ -117,6 +118,16 @@ func (a *Aerospike) gatherServer(hostport string, acc telegraf.Accumulator) erro return nil } +func parseValue(v string) interface{} { + if parsed, err := strconv.ParseInt(v, 10, 64); err == nil { + return parsed + } else if parsed, err := strconv.ParseBool(v); err == nil { + return parsed + } else { + return v + } +} + func copyTags(m map[string]string) map[string]string { out := make(map[string]string) for k, v := range m { diff --git a/plugins/inputs/ceph/README.md b/plugins/inputs/ceph/README.md index 61b275650..ab358daaa 100644 --- a/plugins/inputs/ceph/README.md +++ b/plugins/inputs/ceph/README.md @@ -1,18 +1,18 @@ # Ceph Storage Input Plugin -Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster. +Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster. The plugin works by scanning the configured SocketDir for OSD and MON socket files. When it finds -a MON socket, it runs **ceph --admin-daemon $file perfcounters_dump**. For OSDs it runs **ceph --admin-daemon $file perf dump** +a MON socket, it runs **ceph --admin-daemon $file perfcounters_dump**. For OSDs it runs **ceph --admin-daemon $file perf dump** The resulting JSON is parsed and grouped into collections, based on top-level key. Top-level keys are used as collection tags, and all sub-keys are flattened. For example: ``` - { - "paxos": { + { + "paxos": { "refresh": 9363435, - "refresh_latency": { + "refresh_latency": { "avgcount": 9363435, "sum": 5378.794002000 } @@ -50,7 +50,7 @@ Would be parsed into the following metrics, all of which would be tagged with co ### Measurements & Fields: -All fields are collected under the **ceph** measurement and stored as float64s. For a full list of fields, see the sample perf dumps in ceph_test.go. +All fields are collected under the **ceph** measurement and stored as float64s. For a full list of fields, see the sample perf dumps in ceph_test.go. ### Tags: @@ -95,7 +95,7 @@ All measurements will have the following tags: - throttle-objecter_ops - throttle-osd_client_bytes - throttle-osd_client_messages - + ### Example Output: From cbf5a55c7df8e24cc9835a6d94e28ac5dfea47be Mon Sep 17 00:00:00 2001 From: Victor Garcia Date: Tue, 19 Jul 2016 13:47:12 +0200 Subject: [PATCH 34/47] MongoDB input plugin: Adding per DB stats (#1466) --- CHANGELOG.md | 1 + plugins/inputs/mongodb/README.md | 13 ++++ plugins/inputs/mongodb/mongodb.go | 10 +-- plugins/inputs/mongodb/mongodb_data.go | 46 +++++++++++++ plugins/inputs/mongodb/mongodb_server.go | 27 +++++++- plugins/inputs/mongodb/mongodb_server_test.go | 4 +- plugins/inputs/mongodb/mongostat.go | 65 +++++++++++++++++++ 7 files changed, 159 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60949047f..7ca37b1e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ should now look like: - [#1500](https://github.com/influxdata/telegraf/pull/1500): Aerospike plugin refactored to use official client lib. - [#1434](https://github.com/influxdata/telegraf/pull/1434): Add measurement name arg to logparser plugin. - [#1479](https://github.com/influxdata/telegraf/pull/1479): logparser: change resp_code from a field to a tag. +- [#1466](https://github.com/influxdata/telegraf/pull/1466): MongoDB input plugin: adding per DB stats from db.stats() ### Bugfixes diff --git a/plugins/inputs/mongodb/README.md b/plugins/inputs/mongodb/README.md index 66ff2668e..72f87feb8 100644 --- a/plugins/inputs/mongodb/README.md +++ b/plugins/inputs/mongodb/README.md @@ -10,6 +10,7 @@ ## mongodb://10.10.3.33:18832, ## 10.0.0.1:10000, etc. servers = ["127.0.0.1:27017"] + gather_perdb_stats = false ``` For authenticated mongodb istances use connection mongdb connection URI @@ -52,3 +53,15 @@ and create a single measurement containing values e.g. * ttl_passes_per_sec * repl_lag * jumbo_chunks (only if mongos or mongo config) + +If gather_db_stats is set to true, it will also collect per database stats exposed by db.stats() +creating another measurement called mongodb_db_stats and containing values: + * collections + * objects + * avg_obj_size + * data_size + * storage_size + * num_extents + * indexes + * index_size + * ok diff --git a/plugins/inputs/mongodb/mongodb.go b/plugins/inputs/mongodb/mongodb.go index f38fa31ef..0fdb90f74 100644 --- a/plugins/inputs/mongodb/mongodb.go +++ b/plugins/inputs/mongodb/mongodb.go @@ -15,9 +15,10 @@ import ( ) type MongoDB struct { - Servers []string - Ssl Ssl - mongos map[string]*Server + Servers []string + Ssl Ssl + mongos map[string]*Server + GatherPerdbStats bool } type Ssl struct { @@ -32,6 +33,7 @@ var sampleConfig = ` ## mongodb://10.10.3.33:18832, ## 10.0.0.1:10000, etc. servers = ["127.0.0.1:27017"] + gather_perdb_stats = false ` func (m *MongoDB) SampleConfig() string { @@ -135,7 +137,7 @@ func (m *MongoDB) gatherServer(server *Server, acc telegraf.Accumulator) error { } server.Session = sess } - return server.gatherData(acc) + return server.gatherData(acc, m.GatherPerdbStats) } func init() { diff --git a/plugins/inputs/mongodb/mongodb_data.go b/plugins/inputs/mongodb/mongodb_data.go index 7a52d650a..afa4ddd2f 100644 --- a/plugins/inputs/mongodb/mongodb_data.go +++ b/plugins/inputs/mongodb/mongodb_data.go @@ -12,6 +12,12 @@ type MongodbData struct { StatLine *StatLine Fields map[string]interface{} Tags map[string]string + DbData []DbData +} + +type DbData struct { + Name string + Fields map[string]interface{} } func NewMongodbData(statLine *StatLine, tags map[string]string) *MongodbData { @@ -22,6 +28,7 @@ func NewMongodbData(statLine *StatLine, tags map[string]string) *MongodbData { StatLine: statLine, Tags: tags, Fields: make(map[string]interface{}), + DbData: []DbData{}, } } @@ -72,6 +79,34 @@ var WiredTigerStats = map[string]string{ "percent_cache_used": "CacheUsedPercent", } +var DbDataStats = map[string]string{ + "collections": "Collections", + "objects": "Objects", + "avg_obj_size": "AvgObjSize", + "data_size": "DataSize", + "storage_size": "StorageSize", + "num_extents": "NumExtents", + "indexes": "Indexes", + "index_size": "IndexSize", + "ok": "Ok", +} + +func (d *MongodbData) AddDbStats() { + for _, dbstat := range d.StatLine.DbStatsLines { + dbStatLine := reflect.ValueOf(&dbstat).Elem() + newDbData := &DbData{ + Name: dbstat.Name, + Fields: make(map[string]interface{}), + } + newDbData.Fields["type"] = "db_stat" + for key, value := range DbDataStats { + val := dbStatLine.FieldByName(value).Interface() + newDbData.Fields[key] = val + } + d.DbData = append(d.DbData, *newDbData) + } +} + func (d *MongodbData) AddDefaultStats() { statLine := reflect.ValueOf(d.StatLine).Elem() d.addStat(statLine, DefaultStats) @@ -113,4 +148,15 @@ func (d *MongodbData) flush(acc telegraf.Accumulator) { d.StatLine.Time, ) d.Fields = make(map[string]interface{}) + + for _, db := range d.DbData { + d.Tags["db_name"] = db.Name + acc.AddFields( + "mongodb_db_stats", + db.Fields, + d.Tags, + d.StatLine.Time, + ) + db.Fields = make(map[string]interface{}) + } } diff --git a/plugins/inputs/mongodb/mongodb_server.go b/plugins/inputs/mongodb/mongodb_server.go index e4213bbaf..e797fd6ab 100644 --- a/plugins/inputs/mongodb/mongodb_server.go +++ b/plugins/inputs/mongodb/mongodb_server.go @@ -22,7 +22,7 @@ func (s *Server) getDefaultTags() map[string]string { return tags } -func (s *Server) gatherData(acc telegraf.Accumulator) error { +func (s *Server) gatherData(acc telegraf.Accumulator, gatherDbStats bool) error { s.Session.SetMode(mgo.Eventual, true) s.Session.SetSocketTimeout(0) result_server := &ServerStatus{} @@ -42,10 +42,34 @@ func (s *Server) gatherData(acc telegraf.Accumulator) error { JumboChunksCount: int64(jumbo_chunks), } + result_db_stats := &DbStats{} + + if gatherDbStats == true { + names := []string{} + names, err = s.Session.DatabaseNames() + if err != nil { + log.Println("Error getting database names (" + err.Error() + ")") + } + for _, db_name := range names { + db_stat_line := &DbStatsData{} + err = s.Session.DB(db_name).Run(bson.D{{"dbStats", 1}}, db_stat_line) + if err != nil { + log.Println("Error getting db stats from " + db_name + "(" + err.Error() + ")") + } + db := &Db{ + Name: db_name, + DbStatsData: db_stat_line, + } + + result_db_stats.Dbs = append(result_db_stats.Dbs, *db) + } + } + result := &MongoStatus{ ServerStatus: result_server, ReplSetStatus: result_repl, ClusterStatus: result_cluster, + DbStats: result_db_stats, } defer func() { @@ -64,6 +88,7 @@ func (s *Server) gatherData(acc telegraf.Accumulator) error { s.getDefaultTags(), ) data.AddDefaultStats() + data.AddDbStats() data.flush(acc) } return nil diff --git a/plugins/inputs/mongodb/mongodb_server_test.go b/plugins/inputs/mongodb/mongodb_server_test.go index 52869724c..7ad0f38a2 100644 --- a/plugins/inputs/mongodb/mongodb_server_test.go +++ b/plugins/inputs/mongodb/mongodb_server_test.go @@ -29,12 +29,12 @@ func TestGetDefaultTags(t *testing.T) { func TestAddDefaultStats(t *testing.T) { var acc testutil.Accumulator - err := server.gatherData(&acc) + err := server.gatherData(&acc, false) require.NoError(t, err) time.Sleep(time.Duration(1) * time.Second) // need to call this twice so it can perform the diff - err = server.gatherData(&acc) + err = server.gatherData(&acc, false) require.NoError(t, err) for key, _ := range DefaultStats { diff --git a/plugins/inputs/mongodb/mongostat.go b/plugins/inputs/mongodb/mongostat.go index 23bd05f72..50f65333e 100644 --- a/plugins/inputs/mongodb/mongostat.go +++ b/plugins/inputs/mongodb/mongostat.go @@ -35,6 +35,7 @@ type MongoStatus struct { ServerStatus *ServerStatus ReplSetStatus *ReplSetStatus ClusterStatus *ClusterStatus + DbStats *DbStats } type ServerStatus struct { @@ -65,6 +66,32 @@ type ServerStatus struct { Metrics *MetricsStats `bson:"metrics"` } +// DbStats stores stats from all dbs +type DbStats struct { + Dbs []Db +} + +// Db represent a single DB +type Db struct { + Name string + DbStatsData *DbStatsData +} + +// DbStatsData stores stats from a db +type DbStatsData struct { + Db string `bson:"db"` + Collections int64 `bson:"collections"` + Objects int64 `bson:"objects"` + AvgObjSize float64 `bson:"avgObjSize"` + DataSize int64 `bson:"dataSize"` + StorageSize int64 `bson:"storageSize"` + NumExtents int64 `bson:"numExtents"` + Indexes int64 `bson:"indexes"` + IndexSize int64 `bson:"indexSize"` + Ok int64 `bson:"ok"` + GleStats interface{} `bson:"gleStats"` +} + // ClusterStatus stores information related to the whole cluster type ClusterStatus struct { JumboChunksCount int64 @@ -396,6 +423,22 @@ type StatLine struct { // Cluster fields JumboChunksCount int64 + + // DB stats field + DbStatsLines []DbStatLine +} + +type DbStatLine struct { + Name string + Collections int64 + Objects int64 + AvgObjSize float64 + DataSize int64 + StorageSize int64 + NumExtents int64 + Indexes int64 + IndexSize int64 + Ok int64 } func parseLocks(stat ServerStatus) map[string]LockUsage { @@ -677,5 +720,27 @@ func NewStatLine(oldMongo, newMongo MongoStatus, key string, all bool, sampleSec newClusterStat := *newMongo.ClusterStatus returnVal.JumboChunksCount = newClusterStat.JumboChunksCount + newDbStats := *newMongo.DbStats + for _, db := range newDbStats.Dbs { + dbStatsData := db.DbStatsData + // mongos doesn't have the db key, so setting the db name + if dbStatsData.Db == "" { + dbStatsData.Db = db.Name + } + dbStatLine := &DbStatLine{ + Name: dbStatsData.Db, + Collections: dbStatsData.Collections, + Objects: dbStatsData.Objects, + AvgObjSize: dbStatsData.AvgObjSize, + DataSize: dbStatsData.DataSize, + StorageSize: dbStatsData.StorageSize, + NumExtents: dbStatsData.NumExtents, + Indexes: dbStatsData.Indexes, + IndexSize: dbStatsData.IndexSize, + Ok: dbStatsData.Ok, + } + returnVal.DbStatsLines = append(returnVal.DbStatsLines, *dbStatLine) + } + return returnVal } From 82166a36d02e21524c65ef8fcfeb1f0da55bc100 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 19 Jul 2016 14:03:28 +0100 Subject: [PATCH 35/47] Fix err race condition and partial failure issues closes #1439 closes #1440 closes #1441 closes #1442 closes #1443 closes #1444 closes #1445 --- CHANGELOG.md | 6 ++++++ plugins/inputs/dns_query/dns_query.go | 14 ++++++++------ plugins/inputs/dovecot/dovecot.go | 20 ++++++++------------ plugins/inputs/memcached/memcached.go | 12 +++++------- plugins/inputs/mongodb/mongodb.go | 10 ++++------ plugins/inputs/mysql/mysql.go | 25 ++++++++++++++----------- plugins/inputs/mysql/mysql_test.go | 1 - plugins/inputs/nginx/nginx.go | 8 ++++---- plugins/inputs/nsq/nsq.go | 9 ++++----- 9 files changed, 53 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ca37b1e7..76263dc69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ ## v1.0 [unreleased] +### Features + +### Bugfixes + +- [#1519](https://github.com/influxdata/telegraf/pull/1519): Fix error race conditions and partial failures. + ## v1.0 beta 3 [2016-07-18] ### Release Notes diff --git a/plugins/inputs/dns_query/dns_query.go b/plugins/inputs/dns_query/dns_query.go index 2231f2921..1bccc52c0 100644 --- a/plugins/inputs/dns_query/dns_query.go +++ b/plugins/inputs/dns_query/dns_query.go @@ -3,12 +3,14 @@ package dns_query import ( "errors" "fmt" - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/inputs" "github.com/miekg/dns" "net" "strconv" "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" + "github.com/influxdata/telegraf/plugins/inputs" ) type DnsQuery struct { @@ -55,12 +57,12 @@ func (d *DnsQuery) Description() string { } func (d *DnsQuery) Gather(acc telegraf.Accumulator) error { d.setDefaultValues() + + errChan := errchan.New(len(d.Domains) * len(d.Servers)) for _, domain := range d.Domains { for _, server := range d.Servers { dnsQueryTime, err := d.getDnsQueryTime(domain, server) - if err != nil { - return err - } + errChan.C <- err tags := map[string]string{ "server": server, "domain": domain, @@ -72,7 +74,7 @@ func (d *DnsQuery) Gather(acc telegraf.Accumulator) error { } } - return nil + return errChan.Error() } func (d *DnsQuery) setDefaultValues() { diff --git a/plugins/inputs/dovecot/dovecot.go b/plugins/inputs/dovecot/dovecot.go index 0347016d1..56290e759 100644 --- a/plugins/inputs/dovecot/dovecot.go +++ b/plugins/inputs/dovecot/dovecot.go @@ -12,6 +12,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -51,7 +52,6 @@ const defaultPort = "24242" // Reads stats from all configured servers. func (d *Dovecot) Gather(acc telegraf.Accumulator) error { - if !validQuery[d.Type] { return fmt.Errorf("Error: %s is not a valid query type\n", d.Type) @@ -61,31 +61,27 @@ func (d *Dovecot) Gather(acc telegraf.Accumulator) error { d.Servers = append(d.Servers, "127.0.0.1:24242") } - var wg sync.WaitGroup - - var outerr error - if len(d.Filters) <= 0 { d.Filters = append(d.Filters, "") } - for _, serv := range d.Servers { + var wg sync.WaitGroup + errChan := errchan.New(len(d.Servers) * len(d.Filters)) + for _, server := range d.Servers { for _, filter := range d.Filters { wg.Add(1) - go func(serv string, filter string) { + go func(s string, f string) { defer wg.Done() - outerr = d.gatherServer(serv, acc, d.Type, filter) - }(serv, filter) + errChan.C <- d.gatherServer(s, acc, d.Type, f) + }(server, filter) } } wg.Wait() - - return outerr + return errChan.Error() } func (d *Dovecot) gatherServer(addr string, acc telegraf.Accumulator, qtype string, filter string) error { - _, _, err := net.SplitHostPort(addr) if err != nil { return fmt.Errorf("Error: %s on url %s\n", err, addr) diff --git a/plugins/inputs/memcached/memcached.go b/plugins/inputs/memcached/memcached.go index c631a1ed1..5ee538e93 100644 --- a/plugins/inputs/memcached/memcached.go +++ b/plugins/inputs/memcached/memcached.go @@ -9,6 +9,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -73,19 +74,16 @@ func (m *Memcached) Gather(acc telegraf.Accumulator) error { return m.gatherServer(":11211", false, acc) } + errChan := errchan.New(len(m.Servers) + len(m.UnixSockets)) for _, serverAddress := range m.Servers { - if err := m.gatherServer(serverAddress, false, acc); err != nil { - return err - } + errChan.C <- m.gatherServer(serverAddress, false, acc) } for _, unixAddress := range m.UnixSockets { - if err := m.gatherServer(unixAddress, true, acc); err != nil { - return err - } + errChan.C <- m.gatherServer(unixAddress, true, acc) } - return nil + return errChan.Error() } func (m *Memcached) gatherServer( diff --git a/plugins/inputs/mongodb/mongodb.go b/plugins/inputs/mongodb/mongodb.go index 0fdb90f74..a4bdabd96 100644 --- a/plugins/inputs/mongodb/mongodb.go +++ b/plugins/inputs/mongodb/mongodb.go @@ -10,6 +10,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" "gopkg.in/mgo.v2" ) @@ -55,9 +56,7 @@ func (m *MongoDB) Gather(acc telegraf.Accumulator) error { } var wg sync.WaitGroup - - var outerr error - + errChan := errchan.New(len(m.Servers)) for _, serv := range m.Servers { u, err := url.Parse(serv) if err != nil { @@ -73,13 +72,12 @@ func (m *MongoDB) Gather(acc telegraf.Accumulator) error { wg.Add(1) go func(srv *Server) { defer wg.Done() - outerr = m.gatherServer(srv, acc) + errChan.C <- m.gatherServer(srv, acc) }(m.getMongoServer(u)) } wg.Wait() - - return outerr + return errChan.Error() } func (m *MongoDB) getMongoServer(url *url.URL) *Server { diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index 5011e82b9..10b8c2f75 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -7,10 +7,12 @@ import ( "net/url" "strconv" "strings" + "sync" "time" _ "github.com/go-sql-driver/mysql" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -118,26 +120,27 @@ func (m *Mysql) InitMysql() { func (m *Mysql) Gather(acc telegraf.Accumulator) error { if len(m.Servers) == 0 { - // if we can't get stats in this case, thats fine, don't report - // an error. - m.gatherServer(localhost, acc) - return nil + // default to localhost if nothing specified. + return m.gatherServer(localhost, acc) } - // Initialise additional query intervals if !initDone { m.InitMysql() } + var wg sync.WaitGroup + errChan := errchan.New(len(m.Servers)) // Loop through each server and collect metrics - for _, serv := range m.Servers { - err := m.gatherServer(serv, acc) - if err != nil { - return err - } + for _, server := range m.Servers { + wg.Add(1) + go func(s string) { + defer wg.Done() + errChan.C <- m.gatherServer(s, acc) + }(server) } - return nil + wg.Wait() + return errChan.Error() } type mapping struct { diff --git a/plugins/inputs/mysql/mysql_test.go b/plugins/inputs/mysql/mysql_test.go index 989c21722..3ab9187b5 100644 --- a/plugins/inputs/mysql/mysql_test.go +++ b/plugins/inputs/mysql/mysql_test.go @@ -20,7 +20,6 @@ func TestMysqlDefaultsToLocal(t *testing.T) { } var acc testutil.Accumulator - err := m.Gather(&acc) require.NoError(t, err) diff --git a/plugins/inputs/nginx/nginx.go b/plugins/inputs/nginx/nginx.go index b15b539de..3fe8c04d1 100644 --- a/plugins/inputs/nginx/nginx.go +++ b/plugins/inputs/nginx/nginx.go @@ -12,6 +12,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -34,7 +35,7 @@ func (n *Nginx) Description() string { func (n *Nginx) Gather(acc telegraf.Accumulator) error { var wg sync.WaitGroup - var outerr error + errChan := errchan.New(len(n.Urls)) for _, u := range n.Urls { addr, err := url.Parse(u) @@ -45,13 +46,12 @@ func (n *Nginx) Gather(acc telegraf.Accumulator) error { wg.Add(1) go func(addr *url.URL) { defer wg.Done() - outerr = n.gatherUrl(addr, acc) + errChan.C <- n.gatherUrl(addr, acc) }(addr) } wg.Wait() - - return outerr + return errChan.Error() } var tr = &http.Transport{ diff --git a/plugins/inputs/nsq/nsq.go b/plugins/inputs/nsq/nsq.go index 35ba76866..8bfd72788 100644 --- a/plugins/inputs/nsq/nsq.go +++ b/plugins/inputs/nsq/nsq.go @@ -32,6 +32,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -65,19 +66,17 @@ func (n *NSQ) Description() string { func (n *NSQ) Gather(acc telegraf.Accumulator) error { var wg sync.WaitGroup - var outerr error - + errChan := errchan.New(len(n.Endpoints)) for _, e := range n.Endpoints { wg.Add(1) go func(e string) { defer wg.Done() - outerr = n.gatherEndpoint(e, acc) + errChan.C <- n.gatherEndpoint(e, acc) }(e) } wg.Wait() - - return outerr + return errChan.Error() } var tr = &http.Transport{ From d54b169d6798e160a4ecfd5061e568fc4d3c8a88 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 19 Jul 2016 12:42:59 +0100 Subject: [PATCH 36/47] nstat: fix nstat setting path for snmp6 closes #1477 --- CHANGELOG.md | 1 + plugins/inputs/nstat/nstat.go | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76263dc69..9c4a7e35b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Bugfixes - [#1519](https://github.com/influxdata/telegraf/pull/1519): Fix error race conditions and partial failures. +- [#1477](https://github.com/influxdata/telegraf/issues/1477): nstat: fix inaccurate config panic. ## v1.0 beta 3 [2016-07-18] diff --git a/plugins/inputs/nstat/nstat.go b/plugins/inputs/nstat/nstat.go index d32ef004c..5096d7b03 100644 --- a/plugins/inputs/nstat/nstat.go +++ b/plugins/inputs/nstat/nstat.go @@ -43,9 +43,9 @@ var sampleConfig = ` ## file paths for proc files. If empty default paths will be used: ## /proc/net/netstat, /proc/net/snmp, /proc/net/snmp6 ## These can also be overridden with env variables, see README. - proc_net_netstat = "" - proc_net_snmp = "" - proc_net_snmp6 = "" + proc_net_netstat = "/proc/net/netstat" + proc_net_snmp = "/proc/net/snmp" + proc_net_snmp6 = "/proc/net/snmp6" ## dump metrics with 0 values too dump_zeros = true ` @@ -141,7 +141,7 @@ func (ns *Nstat) loadPaths() { ns.ProcNetSNMP = proc(ENV_SNMP, NET_SNMP) } if ns.ProcNetSNMP6 == "" { - ns.ProcNetSNMP = proc(ENV_SNMP6, NET_SNMP6) + ns.ProcNetSNMP6 = proc(ENV_SNMP6, NET_SNMP6) } } From 42d9d5d237f92c3ebcc8a7ecfcae022625f85bd5 Mon Sep 17 00:00:00 2001 From: Pierre Fersing Date: Tue, 19 Jul 2016 16:24:10 +0200 Subject: [PATCH 37/47] Fix Redis url, an extra "tcp://" was added (#1521) --- CHANGELOG.md | 1 + plugins/inputs/redis/redis.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c4a7e35b..84d7bae3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ should now look like: - [#1436](https://github.com/influxdata/telegraf/issues/1436): logparser: honor modifiers in "pattern" config. - [#1418](https://github.com/influxdata/telegraf/issues/1418): logparser: error and exit on file permissions/missing errors. - [#1499](https://github.com/influxdata/telegraf/pull/1499): Make the user able to specify full path for HAproxy stats +- [#1521](https://github.com/influxdata/telegraf/pull/1521): Fix Redis url, an extra "tcp://" was added. ## v1.0 beta 2 [2016-06-21] diff --git a/plugins/inputs/redis/redis.go b/plugins/inputs/redis/redis.go index 649786c2c..b08eedee3 100644 --- a/plugins/inputs/redis/redis.go +++ b/plugins/inputs/redis/redis.go @@ -99,7 +99,7 @@ func (r *Redis) Gather(acc telegraf.Accumulator) error { var wg sync.WaitGroup errChan := errchan.New(len(r.Servers)) for _, serv := range r.Servers { - if !strings.HasPrefix(serv, "tcp://") || !strings.HasPrefix(serv, "unix://") { + if !strings.HasPrefix(serv, "tcp://") && !strings.HasPrefix(serv, "unix://") { serv = "tcp://" + serv } From 191608041f4e421c3e137afc342480f5211f8740 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 19 Jul 2016 17:31:01 +0100 Subject: [PATCH 38/47] Strip container_version from container_image tag closes #1413 --- CHANGELOG.md | 2 ++ plugins/inputs/docker/docker.go | 13 +++++++++++-- plugins/inputs/docker/docker_test.go | 12 +++++++----- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84d7bae3f..729cf5a2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### Features +- [#1413](https://github.com/influxdata/telegraf/issues/1413): Separate container_version from container_image tag. + ### Bugfixes - [#1519](https://github.com/influxdata/telegraf/pull/1519): Fix error race conditions and partial failures. diff --git a/plugins/inputs/docker/docker.go b/plugins/inputs/docker/docker.go index 0af7820e1..dfd768c1a 100644 --- a/plugins/inputs/docker/docker.go +++ b/plugins/inputs/docker/docker.go @@ -207,9 +207,18 @@ func (d *Docker) gatherContainer( cname = strings.TrimPrefix(container.Names[0], "/") } + // the image name sometimes has a version part. + // ie, rabbitmq:3-management + imageParts := strings.Split(container.Image, ":") + imageName := imageParts[0] + imageVersion := "unknown" + if len(imageParts) > 1 { + imageVersion = imageParts[1] + } tags := map[string]string{ - "container_name": cname, - "container_image": container.Image, + "container_name": cname, + "container_image": imageName, + "container_version": imageVersion, } if len(d.ContainerNames) > 0 { if !sliceContains(cname, d.ContainerNames) { diff --git a/plugins/inputs/docker/docker_test.go b/plugins/inputs/docker/docker_test.go index 1574009b8..b1c76f5af 100644 --- a/plugins/inputs/docker/docker_test.go +++ b/plugins/inputs/docker/docker_test.go @@ -378,9 +378,10 @@ func TestDockerGatherInfo(t *testing.T) { "container_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", }, map[string]string{ - "container_name": "etcd2", - "container_image": "quay.io/coreos/etcd:v2.2.2", - "cpu": "cpu3", + "container_name": "etcd2", + "container_image": "quay.io/coreos/etcd", + "cpu": "cpu3", + "container_version": "v2.2.2", }, ) acc.AssertContainsTaggedFields(t, @@ -423,8 +424,9 @@ func TestDockerGatherInfo(t *testing.T) { "container_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", }, map[string]string{ - "container_name": "etcd2", - "container_image": "quay.io/coreos/etcd:v2.2.2", + "container_name": "etcd2", + "container_image": "quay.io/coreos/etcd", + "container_version": "v2.2.2", }, ) From 0af0fa7c2e4063bcc11b975c514950a71a4d65a4 Mon Sep 17 00:00:00 2001 From: Torsten Rehn Date: Wed, 20 Jul 2016 15:47:04 +0200 Subject: [PATCH 39/47] jolokia: handle multiple multi-dimensional attributes (#1524) fixes #1481 --- CHANGELOG.md | 1 + plugins/inputs/jolokia/jolokia.go | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 729cf5a2b..dda3ba750 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - [#1519](https://github.com/influxdata/telegraf/pull/1519): Fix error race conditions and partial failures. - [#1477](https://github.com/influxdata/telegraf/issues/1477): nstat: fix inaccurate config panic. +- [#1481](https://github.com/influxdata/telegraf/issues/1481): jolokia: fix handling multiple multi-dimensional attributes. ## v1.0 beta 3 [2016-07-18] diff --git a/plugins/inputs/jolokia/jolokia.go b/plugins/inputs/jolokia/jolokia.go index 244338559..53bb65fd0 100644 --- a/plugins/inputs/jolokia/jolokia.go +++ b/plugins/inputs/jolokia/jolokia.go @@ -249,7 +249,14 @@ func (j *Jolokia) Gather(acc telegraf.Accumulator) error { switch t := values.(type) { case map[string]interface{}: for k, v := range t { - fields[measurement+"_"+k] = v + switch t2 := v.(type) { + case map[string]interface{}: + for k2, v2 := range t2 { + fields[measurement+"_"+k+"_"+k2] = v2 + } + case interface{}: + fields[measurement+"_"+k] = t2 + } } case interface{}: fields[measurement] = t From 1c24665b2952e9e6bd1e7ef5b51268b3e9e5c7ab Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 20 Jul 2016 09:24:34 +0100 Subject: [PATCH 40/47] Prometheus client & win_perf_counters char changes 1. in prometheus client, do not check for invalid characters anymore, because we are already replacing all invalid characters with regex anyways. 2. in win_perf_counters, sanitize field name _and_ measurement name. Also add '%' to the list of sanitized characters, because this character is invalid for most output plugins, and can also easily cause string formatting issues throughout the stack. 3. All '%' will now be translated to 'Percent' closes #1430 --- CHANGELOG.md | 1 + .../win_perf_counters/win_perf_counters.go | 12 +++++------ .../prometheus_client/prometheus_client.go | 20 +------------------ 3 files changed, 8 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dda3ba750..5137b86df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - [#1519](https://github.com/influxdata/telegraf/pull/1519): Fix error race conditions and partial failures. - [#1477](https://github.com/influxdata/telegraf/issues/1477): nstat: fix inaccurate config panic. - [#1481](https://github.com/influxdata/telegraf/issues/1481): jolokia: fix handling multiple multi-dimensional attributes. +- [#1430](https://github.com/influxdata/telegraf/issues/1430): Fix prometheus character sanitizing. Sanitize more win_perf_counters characters. ## v1.0 beta 3 [2016-07-18] diff --git a/plugins/inputs/win_perf_counters/win_perf_counters.go b/plugins/inputs/win_perf_counters/win_perf_counters.go index 4684289ee..fb7b093c0 100644 --- a/plugins/inputs/win_perf_counters/win_perf_counters.go +++ b/plugins/inputs/win_perf_counters/win_perf_counters.go @@ -107,7 +107,8 @@ type item struct { counterHandle win.PDH_HCOUNTER } -var sanitizedChars = strings.NewReplacer("/sec", "_persec", "/Sec", "_persec", " ", "_") +var sanitizedChars = strings.NewReplacer("/sec", "_persec", "/Sec", "_persec", + " ", "_", "%", "Percent", `\`, "") func (m *Win_PerfCounters) AddItem(metrics *itemList, query string, objectName string, counter string, instance string, measurement string, include_total bool) { @@ -299,13 +300,12 @@ func (m *Win_PerfCounters) Gather(acc telegraf.Accumulator) error { tags["instance"] = s } tags["objectname"] = metric.objectName - fields[sanitizedChars.Replace(string(metric.counter))] = float32(c.FmtValue.DoubleValue) + fields[sanitizedChars.Replace(metric.counter)] = + float32(c.FmtValue.DoubleValue) - var measurement string - if metric.measurement == "" { + measurement := sanitizedChars.Replace(metric.measurement) + if measurement == "" { measurement = "win_perf_counters" - } else { - measurement = metric.measurement } acc.AddFields(measurement, fields, tags) } diff --git a/plugins/outputs/prometheus_client/prometheus_client.go b/plugins/outputs/prometheus_client/prometheus_client.go index 4f7ce8053..ce6dc1f57 100644 --- a/plugins/outputs/prometheus_client/prometheus_client.go +++ b/plugins/outputs/prometheus_client/prometheus_client.go @@ -12,17 +12,7 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -var ( - invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) - - // Prometheus metric names must match this regex - // see https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels - metricName = regexp.MustCompile("^[a-zA-Z_:][a-zA-Z0-9_:]*$") - - // Prometheus labels must match this regex - // see https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels - labelName = regexp.MustCompile("^[a-zA-Z_][a-zA-Z0-9_]*$") -) +var invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) type PrometheusClient struct { Listen string @@ -119,9 +109,6 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { if len(k) == 0 { continue } - if !labelName.MatchString(k) { - continue - } labels = append(labels, k) l[k] = v } @@ -144,11 +131,6 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { mname = fmt.Sprintf("%s_%s", key, n) } - // verify that it is a valid measurement name - if !metricName.MatchString(mname) { - continue - } - desc := prometheus.NewDesc(mname, "Telegraf collected metric", nil, l) var metric prometheus.Metric var err error From 0462af164ec98d45a017ada89ab90d534afd1198 Mon Sep 17 00:00:00 2001 From: Pierre Fersing Date: Thu, 21 Jul 2016 17:50:12 +0200 Subject: [PATCH 41/47] Added option "total/perdevice" to Docker input (#1525) Like cpu plugin, add two option "total" and "perdevice" to send network and diskio metrics either per device and/or the sum of all devices. --- CHANGELOG.md | 1 + etc/telegraf.conf | 5 ++ plugins/inputs/docker/docker.go | 78 ++++++++++++++++++++++++---- plugins/inputs/docker/docker_test.go | 45 +++++++++++++++- 4 files changed, 119 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5137b86df..6a862a0db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Features - [#1413](https://github.com/influxdata/telegraf/issues/1413): Separate container_version from container_image tag. +- [#1525](https://github.com/influxdata/telegraf/pull/1525): Support setting per-device and total metrics for Docker network and blockio. ### Bugfixes diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 10e949302..c667c4c9f 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -666,6 +666,11 @@ # container_names = [] # ## Timeout for docker list, info, and stats commands # timeout = "5s" +# ## Whether to report for each container per-device blkio (8:0, 8:1...) and +# ## network (eth0, eth1, ...) stats or not +# perdevice = true +# ## Whether to report for each container total blkio and network stats or not +# total = false # # Read statistics from one or many dovecot servers diff --git a/plugins/inputs/docker/docker.go b/plugins/inputs/docker/docker.go index dfd768c1a..e3876bd64 100644 --- a/plugins/inputs/docker/docker.go +++ b/plugins/inputs/docker/docker.go @@ -25,6 +25,8 @@ type Docker struct { Endpoint string ContainerNames []string Timeout internal.Duration + PerDevice bool `toml:"perdevice"` + Total bool `toml:"total"` client DockerClient } @@ -58,6 +60,13 @@ var sampleConfig = ` container_names = [] ## Timeout for docker list, info, and stats commands timeout = "5s" + + ## Whether to report for each container per-device blkio (8:0, 8:1...) and + ## network (eth0, eth1, ...) stats or not + perdevice = true + ## Whether to report for each container total blkio and network stats or not + total = false + ` // Description returns input description @@ -246,7 +255,7 @@ func (d *Docker) gatherContainer( tags[k] = label } - gatherContainerStats(v, acc, tags, container.ID) + gatherContainerStats(v, acc, tags, container.ID, d.PerDevice, d.Total) return nil } @@ -256,6 +265,8 @@ func gatherContainerStats( acc telegraf.Accumulator, tags map[string]string, id string, + perDevice bool, + total bool, ) { now := stat.Read @@ -323,6 +334,7 @@ func gatherContainerStats( acc.AddFields("docker_container_cpu", fields, percputags, now) } + totalNetworkStatMap := make(map[string]interface{}) for network, netstats := range stat.Networks { netfields := map[string]interface{}{ "rx_dropped": netstats.RxDropped, @@ -336,12 +348,35 @@ func gatherContainerStats( "container_id": id, } // Create a new network tag dictionary for the "network" tag - nettags := copyTags(tags) - nettags["network"] = network - acc.AddFields("docker_container_net", netfields, nettags, now) + if perDevice { + nettags := copyTags(tags) + nettags["network"] = network + acc.AddFields("docker_container_net", netfields, nettags, now) + } + if total { + for field, value := range netfields { + if field == "container_id" { + continue + } + _, ok := totalNetworkStatMap[field] + if ok { + totalNetworkStatMap[field] = totalNetworkStatMap[field].(uint64) + value.(uint64) + } else { + totalNetworkStatMap[field] = value + } + } + } } - gatherBlockIOMetrics(stat, acc, tags, now, id) + // totalNetworkStatMap could be empty if container is running with --net=host. + if total && len(totalNetworkStatMap) != 0 { + nettags := copyTags(tags) + nettags["network"] = "total" + totalNetworkStatMap["container_id"] = id + acc.AddFields("docker_container_net", totalNetworkStatMap, nettags, now) + } + + gatherBlockIOMetrics(stat, acc, tags, now, id, perDevice, total) } func calculateMemPercent(stat *types.StatsJSON) float64 { @@ -370,6 +405,8 @@ func gatherBlockIOMetrics( tags map[string]string, now time.Time, id string, + perDevice bool, + total bool, ) { blkioStats := stat.BlkioStats // Make a map of devices to their block io stats @@ -431,11 +468,33 @@ func gatherBlockIOMetrics( deviceStatMap[device]["sectors_recursive"] = metric.Value } + totalStatMap := make(map[string]interface{}) for device, fields := range deviceStatMap { - iotags := copyTags(tags) - iotags["device"] = device fields["container_id"] = id - acc.AddFields("docker_container_blkio", fields, iotags, now) + if perDevice { + iotags := copyTags(tags) + iotags["device"] = device + acc.AddFields("docker_container_blkio", fields, iotags, now) + } + if total { + for field, value := range fields { + if field == "container_id" { + continue + } + _, ok := totalStatMap[field] + if ok { + totalStatMap[field] = totalStatMap[field].(uint64) + value.(uint64) + } else { + totalStatMap[field] = value + } + } + } + } + if total { + totalStatMap["container_id"] = id + iotags := copyTags(tags) + iotags["device"] = "total" + acc.AddFields("docker_container_blkio", totalStatMap, iotags, now) } } @@ -480,7 +539,8 @@ func parseSize(sizeStr string) (int64, error) { func init() { inputs.Add("docker", func() telegraf.Input { return &Docker{ - Timeout: internal.Duration{Duration: time.Second * 5}, + PerDevice: true, + Timeout: internal.Duration{Duration: time.Second * 5}, } }) } diff --git a/plugins/inputs/docker/docker_test.go b/plugins/inputs/docker/docker_test.go index b1c76f5af..9f2e97f73 100644 --- a/plugins/inputs/docker/docker_test.go +++ b/plugins/inputs/docker/docker_test.go @@ -24,7 +24,7 @@ func TestDockerGatherContainerStats(t *testing.T) { "container_name": "redis", "container_image": "redis/image", } - gatherContainerStats(stats, &acc, tags, "123456789") + gatherContainerStats(stats, &acc, tags, "123456789", true, true) // test docker_container_net measurement netfields := map[string]interface{}{ @@ -42,6 +42,21 @@ func TestDockerGatherContainerStats(t *testing.T) { nettags["network"] = "eth0" acc.AssertContainsTaggedFields(t, "docker_container_net", netfields, nettags) + netfields = map[string]interface{}{ + "rx_dropped": uint64(6), + "rx_bytes": uint64(8), + "rx_errors": uint64(10), + "tx_packets": uint64(12), + "tx_dropped": uint64(6), + "rx_packets": uint64(8), + "tx_errors": uint64(10), + "tx_bytes": uint64(12), + "container_id": "123456789", + } + nettags = copyTags(tags) + nettags["network"] = "total" + acc.AssertContainsTaggedFields(t, "docker_container_net", netfields, nettags) + // test docker_blkio measurement blkiotags := copyTags(tags) blkiotags["device"] = "6:0" @@ -52,6 +67,15 @@ func TestDockerGatherContainerStats(t *testing.T) { } acc.AssertContainsTaggedFields(t, "docker_container_blkio", blkiofields, blkiotags) + blkiotags = copyTags(tags) + blkiotags["device"] = "total" + blkiofields = map[string]interface{}{ + "io_service_bytes_recursive_read": uint64(100), + "io_serviced_recursive_write": uint64(302), + "container_id": "123456789", + } + acc.AssertContainsTaggedFields(t, "docker_container_blkio", blkiofields, blkiotags) + // test docker_container_mem measurement memfields := map[string]interface{}{ "max_usage": uint64(1001), @@ -186,6 +210,17 @@ func testStats() *types.StatsJSON { TxBytes: 4, } + stats.Networks["eth1"] = types.NetworkStats{ + RxDropped: 5, + RxBytes: 6, + RxErrors: 7, + TxPackets: 8, + TxDropped: 5, + RxPackets: 6, + TxErrors: 7, + TxBytes: 8, + } + sbr := types.BlkioStatEntry{ Major: 6, Minor: 0, @@ -198,11 +233,19 @@ func testStats() *types.StatsJSON { Op: "write", Value: 101, } + sr2 := types.BlkioStatEntry{ + Major: 6, + Minor: 1, + Op: "write", + Value: 201, + } stats.BlkioStats.IoServiceBytesRecursive = append( stats.BlkioStats.IoServiceBytesRecursive, sbr) stats.BlkioStats.IoServicedRecursive = append( stats.BlkioStats.IoServicedRecursive, sr) + stats.BlkioStats.IoServicedRecursive = append( + stats.BlkioStats.IoServicedRecursive, sr2) return stats } From 29ea433763d6f34098f94309ff8e92d94eb98ff2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mendelson=20Gusm=C3=A3o?= Date: Thu, 21 Jul 2016 13:00:54 -0300 Subject: [PATCH 42/47] Implement support for fetching hddtemp data (#1411) --- CHANGELOG.md | 1 + README.md | 1 + plugins/inputs/all/all.go | 1 + plugins/inputs/hddtemp/README.md | 22 ++++ plugins/inputs/hddtemp/go-hddtemp/LICENSE | 21 ++++ plugins/inputs/hddtemp/go-hddtemp/hddtemp.go | 61 +++++++++ .../inputs/hddtemp/go-hddtemp/hddtemp_test.go | 116 ++++++++++++++++++ plugins/inputs/hddtemp/hddtemp.go | 74 +++++++++++ plugins/inputs/hddtemp/hddtemp_nocompile.go | 3 + 9 files changed, 300 insertions(+) create mode 100644 plugins/inputs/hddtemp/README.md create mode 100644 plugins/inputs/hddtemp/go-hddtemp/LICENSE create mode 100644 plugins/inputs/hddtemp/go-hddtemp/hddtemp.go create mode 100644 plugins/inputs/hddtemp/go-hddtemp/hddtemp_test.go create mode 100644 plugins/inputs/hddtemp/hddtemp.go create mode 100644 plugins/inputs/hddtemp/hddtemp_nocompile.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a862a0db..7480bbb59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,6 +62,7 @@ should now look like: - [#1434](https://github.com/influxdata/telegraf/pull/1434): Add measurement name arg to logparser plugin. - [#1479](https://github.com/influxdata/telegraf/pull/1479): logparser: change resp_code from a field to a tag. - [#1466](https://github.com/influxdata/telegraf/pull/1466): MongoDB input plugin: adding per DB stats from db.stats() +- [#1411](https://github.com/influxdata/telegraf/pull/1411): Implement support for fetching hddtemp data ### Bugfixes diff --git a/README.md b/README.md index aa8d9e039..9d2ee3ce1 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,7 @@ Currently implemented sources: * [exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec) (generic executable plugin, support JSON, influx, graphite and nagios) * [filestat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/filestat) * [haproxy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/haproxy) +* [hddtemp](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/hddtemp) * [http_response](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/http_response) * [httpjson](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/httpjson) (generic JSON-emitting http service plugin) * [influxdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/influxdb) diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 529a13bae..ddb7d4039 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -22,6 +22,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/filestat" _ "github.com/influxdata/telegraf/plugins/inputs/graylog" _ "github.com/influxdata/telegraf/plugins/inputs/haproxy" + _ "github.com/influxdata/telegraf/plugins/inputs/hddtemp" _ "github.com/influxdata/telegraf/plugins/inputs/http_response" _ "github.com/influxdata/telegraf/plugins/inputs/httpjson" _ "github.com/influxdata/telegraf/plugins/inputs/influxdb" diff --git a/plugins/inputs/hddtemp/README.md b/plugins/inputs/hddtemp/README.md new file mode 100644 index 000000000..d87ae625d --- /dev/null +++ b/plugins/inputs/hddtemp/README.md @@ -0,0 +1,22 @@ +# Hddtemp Input Plugin + +This plugin reads data from hddtemp daemon + +## Requirements + +Hddtemp should be installed and its daemon running + +## Configuration + +``` +[[inputs.hddtemp]] +## By default, telegraf gathers temps data from all disks detected by the +## hddtemp. +## +## Only collect temps from the selected disks. +## +## A * as the device name will return the temperature values of all disks. +## +# address = "127.0.0.1:7634" +# devices = ["sda", "*"] +``` diff --git a/plugins/inputs/hddtemp/go-hddtemp/LICENSE b/plugins/inputs/hddtemp/go-hddtemp/LICENSE new file mode 100644 index 000000000..d5aed19c6 --- /dev/null +++ b/plugins/inputs/hddtemp/go-hddtemp/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Mendelson Gusmão + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/plugins/inputs/hddtemp/go-hddtemp/hddtemp.go b/plugins/inputs/hddtemp/go-hddtemp/hddtemp.go new file mode 100644 index 000000000..d7d650b79 --- /dev/null +++ b/plugins/inputs/hddtemp/go-hddtemp/hddtemp.go @@ -0,0 +1,61 @@ +package hddtemp + +import ( + "bytes" + "io" + "net" + "strconv" + "strings" +) + +type disk struct { + DeviceName string + Model string + Temperature int32 + Unit string + Status string +} + +func Fetch(address string) ([]disk, error) { + var ( + err error + conn net.Conn + buffer bytes.Buffer + disks []disk + ) + + if conn, err = net.Dial("tcp", address); err != nil { + return nil, err + } + + if _, err = io.Copy(&buffer, conn); err != nil { + return nil, err + } + + fields := strings.Split(buffer.String(), "|") + + for index := 0; index < len(fields)/5; index++ { + status := "" + offset := index * 5 + device := fields[offset+1] + device = device[strings.LastIndex(device, "/")+1:] + + temperatureField := fields[offset+3] + temperature, err := strconv.ParseInt(temperatureField, 10, 32) + + if err != nil { + temperature = 0 + status = temperatureField + } + + disks = append(disks, disk{ + DeviceName: device, + Model: fields[offset+2], + Temperature: int32(temperature), + Unit: fields[offset+4], + Status: status, + }) + } + + return disks, nil +} diff --git a/plugins/inputs/hddtemp/go-hddtemp/hddtemp_test.go b/plugins/inputs/hddtemp/go-hddtemp/hddtemp_test.go new file mode 100644 index 000000000..858e91a90 --- /dev/null +++ b/plugins/inputs/hddtemp/go-hddtemp/hddtemp_test.go @@ -0,0 +1,116 @@ +package hddtemp + +import ( + "net" + "reflect" + "testing" +) + +func TestFetch(t *testing.T) { + l := serve(t, []byte("|/dev/sda|foobar|36|C|")) + defer l.Close() + + disks, err := Fetch(l.Addr().String()) + + if err != nil { + t.Error("expecting err to be nil") + } + + expected := []disk{ + { + DeviceName: "sda", + Model: "foobar", + Temperature: 36, + Unit: "C", + }, + } + + if !reflect.DeepEqual(expected, disks) { + t.Error("disks' slice is different from expected") + } +} + +func TestFetchWrongAddress(t *testing.T) { + _, err := Fetch("127.0.0.1:1") + + if err == nil { + t.Error("expecting err to be non-nil") + } +} + +func TestFetchStatus(t *testing.T) { + l := serve(t, []byte("|/dev/sda|foobar|SLP|C|")) + defer l.Close() + + disks, err := Fetch(l.Addr().String()) + + if err != nil { + t.Error("expecting err to be nil") + } + + expected := []disk{ + { + DeviceName: "sda", + Model: "foobar", + Temperature: 0, + Unit: "C", + Status: "SLP", + }, + } + + if !reflect.DeepEqual(expected, disks) { + t.Error("disks' slice is different from expected") + } +} + +func TestFetchTwoDisks(t *testing.T) { + l := serve(t, []byte("|/dev/hda|ST380011A|46|C||/dev/hdd|ST340016A|SLP|*|")) + defer l.Close() + + disks, err := Fetch(l.Addr().String()) + + if err != nil { + t.Error("expecting err to be nil") + } + + expected := []disk{ + { + DeviceName: "hda", + Model: "ST380011A", + Temperature: 46, + Unit: "C", + }, + { + DeviceName: "hdd", + Model: "ST340016A", + Temperature: 0, + Unit: "*", + Status: "SLP", + }, + } + + if !reflect.DeepEqual(expected, disks) { + t.Error("disks' slice is different from expected") + } +} + +func serve(t *testing.T, data []byte) net.Listener { + l, err := net.Listen("tcp", "127.0.0.1:0") + + if err != nil { + t.Fatal(err) + } + + go func(t *testing.T) { + conn, err := l.Accept() + + if err != nil { + t.Fatal(err) + } + + conn.Write(data) + conn.Close() + }(t) + + return l +} diff --git a/plugins/inputs/hddtemp/hddtemp.go b/plugins/inputs/hddtemp/hddtemp.go new file mode 100644 index 000000000..c1e01c3c6 --- /dev/null +++ b/plugins/inputs/hddtemp/hddtemp.go @@ -0,0 +1,74 @@ +// +build linux + +package hddtemp + +import ( + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" + gohddtemp "github.com/influxdata/telegraf/plugins/inputs/hddtemp/go-hddtemp" +) + +const defaultAddress = "127.0.0.1:7634" + +type HDDTemp struct { + Address string + Devices []string +} + +func (_ *HDDTemp) Description() string { + return "Monitor disks' temperatures using hddtemp" +} + +var hddtempSampleConfig = ` + ## By default, telegraf gathers temps data from all disks detected by the + ## hddtemp. + ## + ## Only collect temps from the selected disks. + ## + ## A * as the device name will return the temperature values of all disks. + ## + # address = "127.0.0.1:7634" + # devices = ["sda", "*"] +` + +func (_ *HDDTemp) SampleConfig() string { + return hddtempSampleConfig +} + +func (h *HDDTemp) Gather(acc telegraf.Accumulator) error { + disks, err := gohddtemp.Fetch(h.Address) + + if err != nil { + return err + } + + for _, disk := range disks { + for _, chosenDevice := range h.Devices { + if chosenDevice == "*" || chosenDevice == disk.DeviceName { + tags := map[string]string{ + "device": disk.DeviceName, + "model": disk.Model, + "unit": disk.Unit, + "status": disk.Status, + } + + fields := map[string]interface{}{ + disk.DeviceName: disk.Temperature, + } + + acc.AddFields("hddtemp", fields, tags) + } + } + } + + return nil +} + +func init() { + inputs.Add("hddtemp", func() telegraf.Input { + return &HDDTemp{ + Address: defaultAddress, + Devices: []string{"*"}, + } + }) +} diff --git a/plugins/inputs/hddtemp/hddtemp_nocompile.go b/plugins/inputs/hddtemp/hddtemp_nocompile.go new file mode 100644 index 000000000..0c5801670 --- /dev/null +++ b/plugins/inputs/hddtemp/hddtemp_nocompile.go @@ -0,0 +1,3 @@ +// +build !linux + +package hddtemp From ee240a5599258473c193577ac2eeed0f5db12cf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Harasimowicz?= Date: Thu, 9 Jun 2016 12:33:14 +0200 Subject: [PATCH 43/47] Added metrics for Mesos slaves and tasks running on them. closes #1356 --- plugins/inputs/mesos/README.md | 156 ++++++++- plugins/inputs/mesos/mesos.go | 497 ++++++++++++++++++++--------- plugins/inputs/mesos/mesos_test.go | 365 ++++++++++++++++++--- 3 files changed, 800 insertions(+), 218 deletions(-) diff --git a/plugins/inputs/mesos/README.md b/plugins/inputs/mesos/README.md index 20a6dd244..affb66463 100644 --- a/plugins/inputs/mesos/README.md +++ b/plugins/inputs/mesos/README.md @@ -1,6 +1,6 @@ # Mesos Input Plugin -This input plugin gathers metrics from Mesos (*currently only Mesos masters*). +This input plugin gathers metrics from Mesos. For more information, please check the [Mesos Observability Metrics](http://mesos.apache.org/documentation/latest/monitoring/) page. ### Configuration: @@ -8,14 +8,41 @@ For more information, please check the [Mesos Observability Metrics](http://meso ```toml # Telegraf plugin for gathering metrics from N Mesos masters [[inputs.mesos]] - # Timeout, in ms. + ## Timeout, in ms. timeout = 100 - # A list of Mesos masters, default value is localhost:5050. + ## A list of Mesos masters. masters = ["localhost:5050"] - # Metrics groups to be collected, by default, all enabled. - master_collections = ["resources","master","system","slaves","frameworks","messages","evqueue","registrar"] + ## Master metrics groups to be collected, by default, all enabled. + master_collections = [ + "resources", + "master", + "system", + "agents", + "frameworks", + "tasks", + "messages", + "evqueue", + "registrar", + ] + ## A list of Mesos slaves, default is [] + # slaves = [] + ## Slave metrics groups to be collected, by default, all enabled. + # slave_collections = [ + # "resources", + # "agent", + # "system", + # "executors", + # "tasks", + # "messages", + # ] + ## Include mesos tasks statistics, default is false + # slave_tasks = true ``` +By dafault this plugin is not configured to gather metrics from mesos. Since mesos cluster can be deployed in numerous ways it does not provide ane default +values in that matter. User needs to specify master/slave nodes this plugin will gather metrics from. Additionally by enabling `slave_tasks` will allow +agthering metrics from takss runing on specified slaves (this options is disabled by default). + ### Measurements & Fields: Mesos master metric groups @@ -33,6 +60,12 @@ Mesos master metric groups - master/disk_revocable_percent - master/disk_revocable_total - master/disk_revocable_used + - master/gpus_percent + - master/gpus_used + - master/gpus_total + - master/gpus_revocable_percent + - master/gpus_revocable_total + - master/gpus_revocable_used - master/mem_percent - master/mem_used - master/mem_total @@ -136,17 +169,111 @@ Mesos master metric groups - registrar/state_store_ms/p999 - registrar/state_store_ms/p9999 +Mesos slave metric groups +- resources + - slave/cpus_percent + - slave/cpus_used + - slave/cpus_total + - slave/cpus_revocable_percent + - slave/cpus_revocable_total + - slave/cpus_revocable_used + - slave/disk_percent + - slave/disk_used + - slave/disk_total + - slave/disk_revocable_percent + - slave/disk_revocable_total + - slave/disk_revocable_used + - slave/gpus_percent + - slave/gpus_used + - slave/gpus_total, + - slave/gpus_revocable_percent + - slave/gpus_revocable_total + - slave/gpus_revocable_used + - slave/mem_percent + - slave/mem_used + - slave/mem_total + - slave/mem_revocable_percent + - slave/mem_revocable_total + - slave/mem_revocable_used + +- agent + - slave/registered + - slave/uptime_secs + +- system + - system/cpus_total + - system/load_15min + - system/load_5min + - system/load_1min + - system/mem_free_bytes + - system/mem_total_bytes + +- executors + - containerizer/mesos/container_destroy_errors + - slave/container_launch_errors + - slave/executors_preempted + - slave/frameworks_active + - slave/executor_directory_max_allowed_age_secs + - slave/executors_registering + - slave/executors_running + - slave/executors_terminated + - slave/executors_terminating + - slave/recovery_errors + +- tasks + - slave/tasks_failed + - slave/tasks_finished + - slave/tasks_killed + - slave/tasks_lost + - slave/tasks_running + - slave/tasks_staging + - slave/tasks_starting + +- messages + - slave/invalid_framework_messages + - slave/invalid_status_updates + - slave/valid_framework_messages + - slave/valid_status_updates + +Mesos tasks metric groups + +- executor_id +- executor_name +- framework_id +- source +- statistics (all metrics below will have `statistics_` prefix included in their names + - cpus_limit + - cpus_system_time_secs + - cpus_user_time_secs + - mem_anon_bytes + - mem_cache_bytes + - mem_critical_pressure_counter + - mem_file_bytes + - mem_limit_bytes + - mem_low_pressure_counter + - mem_mapped_file_bytes + - mem_medium_pressure_counter + - mem_rss_bytes + - mem_swap_bytes + - mem_total_bytes + - mem_total_memsw_bytes + - mem_unevictable_bytes + - timestamp + ### Tags: -- All measurements have the following tags: +- All master/slave measurements have the following tags: + - server + - role (master/slave) + +- Tasks measurements have the following tags: - server ### Example Output: - ``` $ telegraf -config ~/mesos.conf -input-filter mesos -test * Plugin: mesos, Collection 1 -mesos,server=172.17.8.101 allocator/event_queue_dispatches=0,master/cpus_percent=0, +mesos,host=172.17.8.102,server=172.17.8.101 allocator/event_queue_dispatches=0,master/cpus_percent=0, master/cpus_revocable_percent=0,master/cpus_revocable_total=0, master/cpus_revocable_used=0,master/cpus_total=2, master/cpus_used=0,master/disk_percent=0,master/disk_revocable_percent=0, @@ -163,3 +290,16 @@ master/mem_revocable_used=0,master/mem_total=1002, master/mem_used=0,master/messages_authenticate=0, master/messages_deactivate_framework=0 ... ``` + +Meoso tasks metrics (if enabled): +``` +mesos-tasks,host=172.17.8.102,server=172.17.8.101,task_id=hello-world.e4b5b497-2ccd-11e6-a659-0242fb222ce2 +statistics_cpus_limit=0.2,statistics_cpus_system_time_secs=142.49,statistics_cpus_user_time_secs=388.14, +statistics_mem_anon_bytes=359129088,statistics_mem_cache_bytes=3964928, +statistics_mem_critical_pressure_counter=0,statistics_mem_file_bytes=3964928, +statistics_mem_limit_bytes=767557632,statistics_mem_low_pressure_counter=0, +statistics_mem_mapped_file_bytes=114688,statistics_mem_medium_pressure_counter=0, +statistics_mem_rss_bytes=359129088,statistics_mem_swap_bytes=0,statistics_mem_total_bytes=363094016, +statistics_mem_total_memsw_bytes=363094016,statistics_mem_unevictable_bytes=0, +statistics_timestamp=1465486052.70525 1465486053052811792... +``` diff --git a/plugins/inputs/mesos/mesos.go b/plugins/inputs/mesos/mesos.go index b096a20d9..a719dc9f4 100644 --- a/plugins/inputs/mesos/mesos.go +++ b/plugins/inputs/mesos/mesos.go @@ -17,33 +17,57 @@ import ( jsonparser "github.com/influxdata/telegraf/plugins/parsers/json" ) +type Role string + +const ( + MASTER Role = "master" + SLAVE = "slave" +) + type Mesos struct { Timeout int Masters []string MasterCols []string `toml:"master_collections"` + Slaves []string + SlaveCols []string `toml:"slave_collections"` + SlaveTasks bool } -var defaultMetrics = []string{ - "resources", "master", "system", "slaves", "frameworks", - "tasks", "messages", "evqueue", "messages", "registrar", +var allMetrics = map[Role][]string{ + MASTER: []string{"resources", "master", "system", "agents", "frameworks", "tasks", "messages", "evqueue", "registrar"}, + SLAVE: []string{"resources", "agent", "system", "executors", "tasks", "messages"}, } var sampleConfig = ` - # Timeout, in ms. + ## Timeout, in ms. timeout = 100 - # A list of Mesos masters, default value is localhost:5050. + ## A list of Mesos masters. masters = ["localhost:5050"] - # Metrics groups to be collected, by default, all enabled. + ## Master metrics groups to be collected, by default, all enabled. master_collections = [ "resources", "master", "system", - "slaves", + "agents", "frameworks", + "tasks", "messages", "evqueue", "registrar", ] + ## A list of Mesos slaves, default is [] + # slaves = [] + ## Slave metrics groups to be collected, by default, all enabled. + # slave_collections = [ + # "resources", + # "agent", + # "system", + # "executors", + # "tasks", + # "messages", + # ] + ## Include mesos tasks statistics, default is false + # slave_tasks = true ` // SampleConfig returns a sample configuration block @@ -56,21 +80,54 @@ func (m *Mesos) Description() string { return "Telegraf plugin for gathering metrics from N Mesos masters" } +func (m *Mesos) SetDefaults() { + if len(m.MasterCols) == 0 { + m.MasterCols = allMetrics[MASTER] + } + + if len(m.SlaveCols) == 0 { + m.SlaveCols = allMetrics[SLAVE] + } + + if m.Timeout == 0 { + log.Println("[mesos] Missing timeout value, setting default value (100ms)") + m.Timeout = 100 + } +} + // Gather() metrics from given list of Mesos Masters func (m *Mesos) Gather(acc telegraf.Accumulator) error { var wg sync.WaitGroup var errorChannel chan error - if len(m.Masters) == 0 { - m.Masters = []string{"localhost:5050"} - } + m.SetDefaults() - errorChannel = make(chan error, len(m.Masters)*2) + errorChannel = make(chan error, len(m.Masters)+2*len(m.Slaves)) for _, v := range m.Masters { wg.Add(1) go func(c string) { - errorChannel <- m.gatherMetrics(c, acc) + errorChannel <- m.gatherMainMetrics(c, ":5050", MASTER, acc) + wg.Done() + return + }(v) + } + + for _, v := range m.Slaves { + wg.Add(1) + go func(c string) { + errorChannel <- m.gatherMainMetrics(c, ":5051", MASTER, acc) + wg.Done() + return + }(v) + + if !m.SlaveTasks { + continue + } + + wg.Add(1) + go func(c string) { + errorChannel <- m.gatherSlaveTaskMetrics(c, ":5051", acc) wg.Done() return }(v) @@ -94,7 +151,7 @@ func (m *Mesos) Gather(acc telegraf.Accumulator) error { } // metricsDiff() returns set names for removal -func metricsDiff(w []string) []string { +func metricsDiff(role Role, w []string) []string { b := []string{} s := make(map[string]bool) @@ -106,7 +163,7 @@ func metricsDiff(w []string) []string { s[v] = true } - for _, d := range defaultMetrics { + for _, d := range allMetrics[role] { if _, ok := s[d]; !ok { b = append(b, d) } @@ -116,156 +173,239 @@ func metricsDiff(w []string) []string { } // masterBlocks serves as kind of metrics registry groupping them in sets -func masterBlocks(g string) []string { +func getMetrics(role Role, group string) []string { var m map[string][]string m = make(map[string][]string) - m["resources"] = []string{ - "master/cpus_percent", - "master/cpus_used", - "master/cpus_total", - "master/cpus_revocable_percent", - "master/cpus_revocable_total", - "master/cpus_revocable_used", - "master/disk_percent", - "master/disk_used", - "master/disk_total", - "master/disk_revocable_percent", - "master/disk_revocable_total", - "master/disk_revocable_used", - "master/mem_percent", - "master/mem_used", - "master/mem_total", - "master/mem_revocable_percent", - "master/mem_revocable_total", - "master/mem_revocable_used", + if role == MASTER { + m["resources"] = []string{ + "master/cpus_percent", + "master/cpus_used", + "master/cpus_total", + "master/cpus_revocable_percent", + "master/cpus_revocable_total", + "master/cpus_revocable_used", + "master/disk_percent", + "master/disk_used", + "master/disk_total", + "master/disk_revocable_percent", + "master/disk_revocable_total", + "master/disk_revocable_used", + "master/gpus_percent", + "master/gpus_used", + "master/gpus_total", + "master/gpus_revocable_percent", + "master/gpus_revocable_total", + "master/gpus_revocable_used", + "master/mem_percent", + "master/mem_used", + "master/mem_total", + "master/mem_revocable_percent", + "master/mem_revocable_total", + "master/mem_revocable_used", + } + + m["master"] = []string{ + "master/elected", + "master/uptime_secs", + } + + m["system"] = []string{ + "system/cpus_total", + "system/load_15min", + "system/load_5min", + "system/load_1min", + "system/mem_free_bytes", + "system/mem_total_bytes", + } + + m["agents"] = []string{ + "master/slave_registrations", + "master/slave_removals", + "master/slave_reregistrations", + "master/slave_shutdowns_scheduled", + "master/slave_shutdowns_canceled", + "master/slave_shutdowns_completed", + "master/slaves_active", + "master/slaves_connected", + "master/slaves_disconnected", + "master/slaves_inactive", + } + + m["frameworks"] = []string{ + "master/frameworks_active", + "master/frameworks_connected", + "master/frameworks_disconnected", + "master/frameworks_inactive", + "master/outstanding_offers", + } + + m["tasks"] = []string{ + "master/tasks_error", + "master/tasks_failed", + "master/tasks_finished", + "master/tasks_killed", + "master/tasks_lost", + "master/tasks_running", + "master/tasks_staging", + "master/tasks_starting", + } + + m["messages"] = []string{ + "master/invalid_executor_to_framework_messages", + "master/invalid_framework_to_executor_messages", + "master/invalid_status_update_acknowledgements", + "master/invalid_status_updates", + "master/dropped_messages", + "master/messages_authenticate", + "master/messages_deactivate_framework", + "master/messages_decline_offers", + "master/messages_executor_to_framework", + "master/messages_exited_executor", + "master/messages_framework_to_executor", + "master/messages_kill_task", + "master/messages_launch_tasks", + "master/messages_reconcile_tasks", + "master/messages_register_framework", + "master/messages_register_slave", + "master/messages_reregister_framework", + "master/messages_reregister_slave", + "master/messages_resource_request", + "master/messages_revive_offers", + "master/messages_status_update", + "master/messages_status_update_acknowledgement", + "master/messages_unregister_framework", + "master/messages_unregister_slave", + "master/messages_update_slave", + "master/recovery_slave_removals", + "master/slave_removals/reason_registered", + "master/slave_removals/reason_unhealthy", + "master/slave_removals/reason_unregistered", + "master/valid_framework_to_executor_messages", + "master/valid_status_update_acknowledgements", + "master/valid_status_updates", + "master/task_lost/source_master/reason_invalid_offers", + "master/task_lost/source_master/reason_slave_removed", + "master/task_lost/source_slave/reason_executor_terminated", + "master/valid_executor_to_framework_messages", + } + + m["evqueue"] = []string{ + "master/event_queue_dispatches", + "master/event_queue_http_requests", + "master/event_queue_messages", + } + + m["registrar"] = []string{ + "registrar/state_fetch_ms", + "registrar/state_store_ms", + "registrar/state_store_ms/max", + "registrar/state_store_ms/min", + "registrar/state_store_ms/p50", + "registrar/state_store_ms/p90", + "registrar/state_store_ms/p95", + "registrar/state_store_ms/p99", + "registrar/state_store_ms/p999", + "registrar/state_store_ms/p9999", + } + } else if role == SLAVE { + m["resources"] = []string{ + "slave/cpus_percent", + "slave/cpus_used", + "slave/cpus_total", + "slave/cpus_revocable_percent", + "slave/cpus_revocable_total", + "slave/cpus_revocable_used", + "slave/disk_percent", + "slave/disk_used", + "slave/disk_total", + "slave/disk_revocable_percent", + "slave/disk_revocable_total", + "slave/disk_revocable_used", + "slave/gpus_percent", + "slave/gpus_used", + "slave/gpus_total", + "slave/gpus_revocable_percent", + "slave/gpus_revocable_total", + "slave/gpus_revocable_used", + "slave/mem_percent", + "slave/mem_used", + "slave/mem_total", + "slave/mem_revocable_percent", + "slave/mem_revocable_total", + "slave/mem_revocable_used", + } + + m["agent"] = []string{ + "slave/registered", + "slave/uptime_secs", + } + + m["system"] = []string{ + "system/cpus_total", + "system/load_15min", + "system/load_5min", + "system/load_1min", + "system/mem_free_bytes", + "system/mem_total_bytes", + } + + m["executors"] = []string{ + "containerizer/mesos/container_destroy_errors", + "slave/container_launch_errors", + "slave/executors_preempted", + "slave/frameworks_active", + "slave/executor_directory_max_allowed_age_secs", + "slave/executors_registering", + "slave/executors_running", + "slave/executors_terminated", + "slave/executors_terminating", + "slave/recovery_errors", + } + + m["tasks"] = []string{ + "slave/tasks_failed", + "slave/tasks_finished", + "slave/tasks_killed", + "slave/tasks_lost", + "slave/tasks_running", + "slave/tasks_staging", + "slave/tasks_starting", + } + + m["messages"] = []string{ + "slave/invalid_framework_messages", + "slave/invalid_status_updates", + "slave/valid_framework_messages", + "slave/valid_status_updates", + } } - m["master"] = []string{ - "master/elected", - "master/uptime_secs", - } - - m["system"] = []string{ - "system/cpus_total", - "system/load_15min", - "system/load_5min", - "system/load_1min", - "system/mem_free_bytes", - "system/mem_total_bytes", - } - - m["slaves"] = []string{ - "master/slave_registrations", - "master/slave_removals", - "master/slave_reregistrations", - "master/slave_shutdowns_scheduled", - "master/slave_shutdowns_canceled", - "master/slave_shutdowns_completed", - "master/slaves_active", - "master/slaves_connected", - "master/slaves_disconnected", - "master/slaves_inactive", - } - - m["frameworks"] = []string{ - "master/frameworks_active", - "master/frameworks_connected", - "master/frameworks_disconnected", - "master/frameworks_inactive", - "master/outstanding_offers", - } - - m["tasks"] = []string{ - "master/tasks_error", - "master/tasks_failed", - "master/tasks_finished", - "master/tasks_killed", - "master/tasks_lost", - "master/tasks_running", - "master/tasks_staging", - "master/tasks_starting", - } - - m["messages"] = []string{ - "master/invalid_executor_to_framework_messages", - "master/invalid_framework_to_executor_messages", - "master/invalid_status_update_acknowledgements", - "master/invalid_status_updates", - "master/dropped_messages", - "master/messages_authenticate", - "master/messages_deactivate_framework", - "master/messages_decline_offers", - "master/messages_executor_to_framework", - "master/messages_exited_executor", - "master/messages_framework_to_executor", - "master/messages_kill_task", - "master/messages_launch_tasks", - "master/messages_reconcile_tasks", - "master/messages_register_framework", - "master/messages_register_slave", - "master/messages_reregister_framework", - "master/messages_reregister_slave", - "master/messages_resource_request", - "master/messages_revive_offers", - "master/messages_status_update", - "master/messages_status_update_acknowledgement", - "master/messages_unregister_framework", - "master/messages_unregister_slave", - "master/messages_update_slave", - "master/recovery_slave_removals", - "master/slave_removals/reason_registered", - "master/slave_removals/reason_unhealthy", - "master/slave_removals/reason_unregistered", - "master/valid_framework_to_executor_messages", - "master/valid_status_update_acknowledgements", - "master/valid_status_updates", - "master/task_lost/source_master/reason_invalid_offers", - "master/task_lost/source_master/reason_slave_removed", - "master/task_lost/source_slave/reason_executor_terminated", - "master/valid_executor_to_framework_messages", - } - - m["evqueue"] = []string{ - "master/event_queue_dispatches", - "master/event_queue_http_requests", - "master/event_queue_messages", - } - - m["registrar"] = []string{ - "registrar/state_fetch_ms", - "registrar/state_store_ms", - "registrar/state_store_ms/max", - "registrar/state_store_ms/min", - "registrar/state_store_ms/p50", - "registrar/state_store_ms/p90", - "registrar/state_store_ms/p95", - "registrar/state_store_ms/p99", - "registrar/state_store_ms/p999", - "registrar/state_store_ms/p9999", - } - - ret, ok := m[g] + ret, ok := m[group] if !ok { - log.Println("[mesos] Unkown metrics group: ", g) + log.Printf("[mesos] Unkown %s metrics group: %s\n", role, group) return []string{} } return ret } -// removeGroup(), remove unwanted sets -func (m *Mesos) removeGroup(j *map[string]interface{}) { +func (m *Mesos) filterMetrics(role Role, metrics *map[string]interface{}) { var ok bool + var selectedMetrics []string - b := metricsDiff(m.MasterCols) + if role == MASTER { + selectedMetrics = m.MasterCols + } else if role == SLAVE { + selectedMetrics = m.SlaveCols + } - for _, k := range b { - for _, v := range masterBlocks(k) { - if _, ok = (*j)[v]; ok { - delete((*j), v) + for _, k := range metricsDiff(role, selectedMetrics) { + for _, v := range getMetrics(role, k) { + if _, ok = (*metrics)[v]; ok { + delete((*metrics), v) } } } @@ -280,23 +420,66 @@ var client = &http.Client{ Timeout: time.Duration(4 * time.Second), } -// This should not belong to the object -func (m *Mesos) gatherMetrics(a string, acc telegraf.Accumulator) error { - var jsonOut map[string]interface{} +func (m *Mesos) gatherSlaveTaskMetrics(address string, defaultPort string, acc telegraf.Accumulator) error { + var metrics []map[string]interface{} - host, _, err := net.SplitHostPort(a) + host, _, err := net.SplitHostPort(address) if err != nil { - host = a - a = a + ":5050" + host = address + address = address + defaultPort } tags := map[string]string{ "server": host, } - if m.Timeout == 0 { - log.Println("[mesos] Missing timeout value, setting default value (100ms)") - m.Timeout = 100 + ts := strconv.Itoa(m.Timeout) + "ms" + + resp, err := client.Get("http://" + address + "/monitor/statistics?timeout=" + ts) + + if err != nil { + return err + } + + data, err := ioutil.ReadAll(resp.Body) + resp.Body.Close() + if err != nil { + return err + } + + if err = json.Unmarshal([]byte(data), &metrics); err != nil { + return errors.New("Error decoding JSON response") + } + + for _, task := range metrics { + tags["task_id"] = task["executor_id"].(string) + + jf := jsonparser.JSONFlattener{} + err = jf.FlattenJSON("", task) + + if err != nil { + return err + } + + acc.AddFields("mesos-tasks", jf.Fields, tags) + } + + return nil +} + +// This should not belong to the object +func (m *Mesos) gatherMainMetrics(a string, defaultPort string, role Role, acc telegraf.Accumulator) error { + var jsonOut map[string]interface{} + + host, _, err := net.SplitHostPort(a) + if err != nil { + host = a + a = a + defaultPort + } + + tags := map[string]string{ + "server": host, + "role": string(role), } ts := strconv.Itoa(m.Timeout) + "ms" @@ -317,7 +500,7 @@ func (m *Mesos) gatherMetrics(a string, acc telegraf.Accumulator) error { return errors.New("Error decoding JSON response") } - m.removeGroup(&jsonOut) + m.filterMetrics(role, &jsonOut) jf := jsonparser.JSONFlattener{} diff --git a/plugins/inputs/mesos/mesos_test.go b/plugins/inputs/mesos/mesos_test.go index c56580649..062e23e4a 100644 --- a/plugins/inputs/mesos/mesos_test.go +++ b/plugins/inputs/mesos/mesos_test.go @@ -2,70 +2,275 @@ package mesos import ( "encoding/json" + "fmt" "math/rand" "net/http" "net/http/httptest" "os" "testing" + jsonparser "github.com/influxdata/telegraf/plugins/parsers/json" "github.com/influxdata/telegraf/testutil" ) -var mesosMetrics map[string]interface{} -var ts *httptest.Server +var masterMetrics map[string]interface{} +var masterTestServer *httptest.Server +var slaveMetrics map[string]interface{} +var slaveTaskMetrics map[string]interface{} +var slaveTestServer *httptest.Server + +func randUUID() string { + b := make([]byte, 16) + rand.Read(b) + return fmt.Sprintf("%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) +} func generateMetrics() { - mesosMetrics = make(map[string]interface{}) + masterMetrics = make(map[string]interface{}) - metricNames := []string{"master/cpus_percent", "master/cpus_used", "master/cpus_total", - "master/cpus_revocable_percent", "master/cpus_revocable_total", "master/cpus_revocable_used", - "master/disk_percent", "master/disk_used", "master/disk_total", "master/disk_revocable_percent", - "master/disk_revocable_total", "master/disk_revocable_used", "master/mem_percent", - "master/mem_used", "master/mem_total", "master/mem_revocable_percent", "master/mem_revocable_total", - "master/mem_revocable_used", "master/elected", "master/uptime_secs", "system/cpus_total", - "system/load_15min", "system/load_5min", "system/load_1min", "system/mem_free_bytes", - "system/mem_total_bytes", "master/slave_registrations", "master/slave_removals", - "master/slave_reregistrations", "master/slave_shutdowns_scheduled", "master/slave_shutdowns_canceled", - "master/slave_shutdowns_completed", "master/slaves_active", "master/slaves_connected", - "master/slaves_disconnected", "master/slaves_inactive", "master/frameworks_active", - "master/frameworks_connected", "master/frameworks_disconnected", "master/frameworks_inactive", - "master/outstanding_offers", "master/tasks_error", "master/tasks_failed", "master/tasks_finished", - "master/tasks_killed", "master/tasks_lost", "master/tasks_running", "master/tasks_staging", - "master/tasks_starting", "master/invalid_executor_to_framework_messages", "master/invalid_framework_to_executor_messages", - "master/invalid_status_update_acknowledgements", "master/invalid_status_updates", - "master/dropped_messages", "master/messages_authenticate", "master/messages_deactivate_framework", - "master/messages_decline_offers", "master/messages_executor_to_framework", "master/messages_exited_executor", - "master/messages_framework_to_executor", "master/messages_kill_task", "master/messages_launch_tasks", - "master/messages_reconcile_tasks", "master/messages_register_framework", "master/messages_register_slave", - "master/messages_reregister_framework", "master/messages_reregister_slave", "master/messages_resource_request", - "master/messages_revive_offers", "master/messages_status_update", "master/messages_status_update_acknowledgement", - "master/messages_unregister_framework", "master/messages_unregister_slave", "master/messages_update_slave", - "master/recovery_slave_removals", "master/slave_removals/reason_registered", "master/slave_removals/reason_unhealthy", - "master/slave_removals/reason_unregistered", "master/valid_framework_to_executor_messages", "master/valid_status_update_acknowledgements", - "master/valid_status_updates", "master/task_lost/source_master/reason_invalid_offers", - "master/task_lost/source_master/reason_slave_removed", "master/task_lost/source_slave/reason_executor_terminated", - "master/valid_executor_to_framework_messages", "master/event_queue_dispatches", - "master/event_queue_http_requests", "master/event_queue_messages", "registrar/state_fetch_ms", - "registrar/state_store_ms", "registrar/state_store_ms/max", "registrar/state_store_ms/min", - "registrar/state_store_ms/p50", "registrar/state_store_ms/p90", "registrar/state_store_ms/p95", - "registrar/state_store_ms/p99", "registrar/state_store_ms/p999", "registrar/state_store_ms/p9999"} + metricNames := []string{ + // resources + "master/cpus_percent", + "master/cpus_used", + "master/cpus_total", + "master/cpus_revocable_percent", + "master/cpus_revocable_total", + "master/cpus_revocable_used", + "master/disk_percent", + "master/disk_used", + "master/disk_total", + "master/disk_revocable_percent", + "master/disk_revocable_total", + "master/disk_revocable_used", + "master/gpus_percent", + "master/gpus_used", + "master/gpus_total", + "master/gpus_revocable_percent", + "master/gpus_revocable_total", + "master/gpus_revocable_used", + "master/mem_percent", + "master/mem_used", + "master/mem_total", + "master/mem_revocable_percent", + "master/mem_revocable_total", + "master/mem_revocable_used", + // master + "master/elected", + "master/uptime_secs", + // system + "system/cpus_total", + "system/load_15min", + "system/load_5min", + "system/load_1min", + "system/mem_free_bytes", + "system/mem_total_bytes", + // agents + "master/slave_registrations", + "master/slave_removals", + "master/slave_reregistrations", + "master/slave_shutdowns_scheduled", + "master/slave_shutdowns_canceled", + "master/slave_shutdowns_completed", + "master/slaves_active", + "master/slaves_connected", + "master/slaves_disconnected", + "master/slaves_inactive", + // frameworks + "master/frameworks_active", + "master/frameworks_connected", + "master/frameworks_disconnected", + "master/frameworks_inactive", + "master/outstanding_offers", + // tasks + "master/tasks_error", + "master/tasks_failed", + "master/tasks_finished", + "master/tasks_killed", + "master/tasks_lost", + "master/tasks_running", + "master/tasks_staging", + "master/tasks_starting", + // messages + "master/invalid_executor_to_framework_messages", + "master/invalid_framework_to_executor_messages", + "master/invalid_status_update_acknowledgements", + "master/invalid_status_updates", + "master/dropped_messages", + "master/messages_authenticate", + "master/messages_deactivate_framework", + "master/messages_decline_offers", + "master/messages_executor_to_framework", + "master/messages_exited_executor", + "master/messages_framework_to_executor", + "master/messages_kill_task", + "master/messages_launch_tasks", + "master/messages_reconcile_tasks", + "master/messages_register_framework", + "master/messages_register_slave", + "master/messages_reregister_framework", + "master/messages_reregister_slave", + "master/messages_resource_request", + "master/messages_revive_offers", + "master/messages_status_update", + "master/messages_status_update_acknowledgement", + "master/messages_unregister_framework", + "master/messages_unregister_slave", + "master/messages_update_slave", + "master/recovery_slave_removals", + "master/slave_removals/reason_registered", + "master/slave_removals/reason_unhealthy", + "master/slave_removals/reason_unregistered", + "master/valid_framework_to_executor_messages", + "master/valid_status_update_acknowledgements", + "master/valid_status_updates", + "master/task_lost/source_master/reason_invalid_offers", + "master/task_lost/source_master/reason_slave_removed", + "master/task_lost/source_slave/reason_executor_terminated", + "master/valid_executor_to_framework_messages", + // evgqueue + "master/event_queue_dispatches", + "master/event_queue_http_requests", + "master/event_queue_messages", + // registrar + "registrar/state_fetch_ms", + "registrar/state_store_ms", + "registrar/state_store_ms/max", + "registrar/state_store_ms/min", + "registrar/state_store_ms/p50", + "registrar/state_store_ms/p90", + "registrar/state_store_ms/p95", + "registrar/state_store_ms/p99", + "registrar/state_store_ms/p999", + "registrar/state_store_ms/p9999", + } for _, k := range metricNames { - mesosMetrics[k] = rand.Float64() + masterMetrics[k] = rand.Float64() + } + + slaveMetrics = make(map[string]interface{}) + + metricNames = []string{ + // resources + "slave/cpus_percent", + "slave/cpus_used", + "slave/cpus_total", + "slave/cpus_revocable_percent", + "slave/cpus_revocable_total", + "slave/cpus_revocable_used", + "slave/disk_percent", + "slave/disk_used", + "slave/disk_total", + "slave/disk_revocable_percent", + "slave/disk_revocable_total", + "slave/disk_revocable_used", + "slave/gpus_percent", + "slave/gpus_used", + "slave/gpus_total", + "slave/gpus_revocable_percent", + "slave/gpus_revocable_total", + "slave/gpus_revocable_used", + "slave/mem_percent", + "slave/mem_used", + "slave/mem_total", + "slave/mem_revocable_percent", + "slave/mem_revocable_total", + "slave/mem_revocable_used", + // agent + "slave/registered", + "slave/uptime_secs", + // system + "system/cpus_total", + "system/load_15min", + "system/load_5min", + "system/load_1min", + "system/mem_free_bytes", + "system/mem_total_bytes", + // executors + "containerizer/mesos/container_destroy_errors", + "slave/container_launch_errors", + "slave/executors_preempted", + "slave/frameworks_active", + "slave/executor_directory_max_allowed_age_secs", + "slave/executors_registering", + "slave/executors_running", + "slave/executors_terminated", + "slave/executors_terminating", + "slave/recovery_errors", + // tasks + "slave/tasks_failed", + "slave/tasks_finished", + "slave/tasks_killed", + "slave/tasks_lost", + "slave/tasks_running", + "slave/tasks_staging", + "slave/tasks_starting", + // messages + "slave/invalid_framework_messages", + "slave/invalid_status_updates", + "slave/valid_framework_messages", + "slave/valid_status_updates", + } + + for _, k := range metricNames { + slaveMetrics[k] = rand.Float64() + } + + slaveTaskMetrics = map[string]interface{}{ + "executor_id": fmt.Sprintf("task_%s", randUUID()), + "executor_name": "Some task description", + "framework_id": randUUID(), + "source": fmt.Sprintf("task_source_%s", randUUID()), + "statistics": map[string]interface{}{ + "cpus_limit": rand.Float64(), + "cpus_system_time_secs": rand.Float64(), + "cpus_user_time_secs": rand.Float64(), + "mem_anon_bytes": float64(rand.Int63()), + "mem_cache_bytes": float64(rand.Int63()), + "mem_critical_pressure_counter": float64(rand.Int63()), + "mem_file_bytes": float64(rand.Int63()), + "mem_limit_bytes": float64(rand.Int63()), + "mem_low_pressure_counter": float64(rand.Int63()), + "mem_mapped_file_bytes": float64(rand.Int63()), + "mem_medium_pressure_counter": float64(rand.Int63()), + "mem_rss_bytes": float64(rand.Int63()), + "mem_swap_bytes": float64(rand.Int63()), + "mem_total_bytes": float64(rand.Int63()), + "mem_total_memsw_bytes": float64(rand.Int63()), + "mem_unevictable_bytes": float64(rand.Int63()), + "timestamp": rand.Float64(), + }, } } func TestMain(m *testing.M) { generateMetrics() - r := http.NewServeMux() - r.HandleFunc("/metrics/snapshot", func(w http.ResponseWriter, r *http.Request) { + + masterRouter := http.NewServeMux() + masterRouter.HandleFunc("/metrics/snapshot", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(mesosMetrics) + json.NewEncoder(w).Encode(masterMetrics) }) - ts = httptest.NewServer(r) + masterTestServer = httptest.NewServer(masterRouter) + + slaveRouter := http.NewServeMux() + slaveRouter.HandleFunc("/metrics/snapshot", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(slaveMetrics) + }) + slaveRouter.HandleFunc("/monitor/statistics", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode([]map[string]interface{}{slaveTaskMetrics}) + }) + slaveTestServer = httptest.NewServer(slaveRouter) + rc := m.Run() - ts.Close() + + masterTestServer.Close() + slaveTestServer.Close() os.Exit(rc) } @@ -73,7 +278,7 @@ func TestMesosMaster(t *testing.T) { var acc testutil.Accumulator m := Mesos{ - Masters: []string{ts.Listener.Addr().String()}, + Masters: []string{masterTestServer.Listener.Addr().String()}, Timeout: 10, } @@ -83,34 +288,88 @@ func TestMesosMaster(t *testing.T) { t.Errorf(err.Error()) } - acc.AssertContainsFields(t, "mesos", mesosMetrics) + acc.AssertContainsFields(t, "mesos", masterMetrics) } -func TestRemoveGroup(t *testing.T) { - generateMetrics() - +func TestMasterFilter(t *testing.T) { m := Mesos{ MasterCols: []string{ "resources", "master", "registrar", }, } b := []string{ - "system", "slaves", "frameworks", - "messages", "evqueue", + "system", "agents", "frameworks", + "messages", "evqueue", "tasks", } - m.removeGroup(&mesosMetrics) + m.filterMetrics(MASTER, &masterMetrics) for _, v := range b { - for _, x := range masterBlocks(v) { - if _, ok := mesosMetrics[x]; ok { + for _, x := range getMetrics(MASTER, v) { + if _, ok := masterMetrics[x]; ok { t.Errorf("Found key %s, it should be gone.", x) } } } for _, v := range m.MasterCols { - for _, x := range masterBlocks(v) { - if _, ok := mesosMetrics[x]; !ok { + for _, x := range getMetrics(MASTER, v) { + if _, ok := masterMetrics[x]; !ok { + t.Errorf("Didn't find key %s, it should present.", x) + } + } + } +} + +func TestMesosSlave(t *testing.T) { + var acc testutil.Accumulator + + m := Mesos{ + Masters: []string{}, + Slaves: []string{slaveTestServer.Listener.Addr().String()}, + SlaveTasks: true, + Timeout: 10, + } + + err := m.Gather(&acc) + + if err != nil { + t.Errorf(err.Error()) + } + + acc.AssertContainsFields(t, "mesos", slaveMetrics) + + jf := jsonparser.JSONFlattener{} + err = jf.FlattenJSON("", slaveTaskMetrics) + + if err != nil { + t.Errorf(err.Error()) + } + + acc.AssertContainsFields(t, "mesos-tasks", jf.Fields) +} + +func TestSlaveFilter(t *testing.T) { + m := Mesos{ + SlaveCols: []string{ + "resources", "agent", "tasks", + }, + } + b := []string{ + "system", "executors", "messages", + } + + m.filterMetrics(SLAVE, &slaveMetrics) + + for _, v := range b { + for _, x := range getMetrics(SLAVE, v) { + if _, ok := slaveMetrics[x]; ok { + t.Errorf("Found key %s, it should be gone.", x) + } + } + } + for _, v := range m.MasterCols { + for _, x := range getMetrics(SLAVE, v) { + if _, ok := slaveMetrics[x]; !ok { t.Errorf("Didn't find key %s, it should present.", x) } } From 8acda0da8f36fd863f3bea40c3733fbd4803766e Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 21 Jul 2016 17:53:41 +0100 Subject: [PATCH 44/47] Update etc/telegraf.conf --- etc/telegraf.conf | 75 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index c667c4c9f..5189d2e3f 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -197,7 +197,7 @@ # # Configuration for Graphite server to send metrics to # [[outputs.graphite]] # ## TCP endpoint for your graphite instance. -# ## If multiple endpoints are configured, the output will be load balanced. +# ## If multiple endpoints are configured, output will be load balanced. # ## Only one of the endpoints will be written to with each iteration. # servers = ["localhost:2003"] # ## Prefix metrics name @@ -436,8 +436,8 @@ ## disk partitions. ## Setting devices will restrict the stats to the specified devices. # devices = ["sda", "sdb"] - ## Uncomment the following line if you do not need disk serial numbers. - # skip_serial_number = true + ## Uncomment the following line if you need disk serial numbers. + # skip_serial_number = false # Get kernel statistics from /proc/stat @@ -465,7 +465,7 @@ # no configuration -# # Read stats from an aerospike server +# # Read stats from aerospike server(s) # [[inputs.aerospike]] # ## Aerospike servers to connect to (with port) # ## This plugin will query all namespaces the aerospike @@ -666,11 +666,13 @@ # container_names = [] # ## Timeout for docker list, info, and stats commands # timeout = "5s" +# # ## Whether to report for each container per-device blkio (8:0, 8:1...) and # ## network (eth0, eth1, ...) stats or not # perdevice = true # ## Whether to report for each container total blkio and network stats or not # total = false +# # # Read statistics from one or many dovecot servers @@ -787,9 +789,11 @@ # [[inputs.haproxy]] # ## An array of address to gather stats about. Specify an ip on hostname # ## with optional port. ie localhost, 10.10.3.33:1936, etc. -# -# ## If no servers are specified, then default to 127.0.0.1:1936 -# servers = ["http://myhaproxy.com:1936", "http://anotherhaproxy.com:1936"] +# ## Make sure you specify the complete path to the stats endpoint +# ## ie 10.10.3.33:1936/haproxy?stats +# # +# ## If no servers are specified, then default to 127.0.0.1:1936/haproxy?stats +# servers = ["http://myhaproxy.com:1936/haproxy?stats"] # ## Or you can also use local socket # ## servers = ["socket:/run/haproxy/admin.sock"] @@ -975,21 +979,35 @@ # # Telegraf plugin for gathering metrics from N Mesos masters # [[inputs.mesos]] -# # Timeout, in ms. +# ## Timeout, in ms. # timeout = 100 -# # A list of Mesos masters, default value is localhost:5050. +# ## A list of Mesos masters. # masters = ["localhost:5050"] -# # Metrics groups to be collected, by default, all enabled. +# ## Master metrics groups to be collected, by default, all enabled. # master_collections = [ # "resources", # "master", # "system", -# "slaves", +# "agents", # "frameworks", +# "tasks", # "messages", # "evqueue", # "registrar", # ] +# ## A list of Mesos slaves, default is [] +# # slaves = [] +# ## Slave metrics groups to be collected, by default, all enabled. +# # slave_collections = [ +# # "resources", +# # "agent", +# # "system", +# # "executors", +# # "tasks", +# # "messages", +# # ] +# ## Include mesos tasks statistics, default is false +# # slave_tasks = true # # Read metrics from one or many MongoDB servers @@ -1000,6 +1018,7 @@ # ## mongodb://10.10.3.33:18832, # ## 10.0.0.1:10000, etc. # servers = ["127.0.0.1:27017"] +# gather_perdb_stats = false # # Read metrics from one or many mysql servers @@ -1106,9 +1125,9 @@ # ## file paths for proc files. If empty default paths will be used: # ## /proc/net/netstat, /proc/net/snmp, /proc/net/snmp6 # ## These can also be overridden with env variables, see README. -# proc_net_netstat = "" -# proc_net_snmp = "" -# proc_net_snmp6 = "" +# proc_net_netstat = "/proc/net/netstat" +# proc_net_snmp = "/proc/net/snmp" +# proc_net_snmp6 = "/proc/net/snmp6" # ## dump metrics with 0 values too # dump_zeros = true @@ -1310,6 +1329,13 @@ # # username = "guest" # # password = "guest" # +# ## Optional SSL Config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false +# # ## A list of nodes to pull metrics about. If not specified, metrics for # ## all nodes are gathered. # # nodes = ["rabbit@node1", "rabbit@node2"] @@ -1328,6 +1354,7 @@ # ## e.g. # ## tcp://localhost:6379 # ## tcp://:password@192.168.99.100 +# ## unix:///var/run/redis.sock # ## # ## If no servers are specified, then localhost is used as the host. # ## If no port is specified, 6379 is used @@ -1564,6 +1591,8 @@ # ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) # ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) # patterns = ["%{INFLUXDB_HTTPD_LOG}"] +# ## Name of the outputted measurement name. +# measurement = "influxdb_log" # ## Full path(s) to custom pattern files. # custom_pattern_files = [] # ## Custom patterns can also be defined here. Put one pattern per line. @@ -1627,6 +1656,21 @@ # data_format = "influx" +# # Read NSQ topic for metrics. +# [[inputs.nsq_consumer]] +# ## An string representing the NSQD TCP Endpoint +# server = "localhost:4150" +# topic = "telegraf" +# channel = "consumer" +# max_in_flight = 100 +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + # # Statsd Server # [[inputs.statsd]] # ## Address and port to host UDP listener on @@ -1730,6 +1774,9 @@ # [inputs.webhooks.github] # path = "/github" # +# [inputs.webhooks.mandrill] +# path = "/mandrill" +# # [inputs.webhooks.rollbar] # path = "/rollbar" From 1be6ea5696bab27048f96c00f194191160efd56d Mon Sep 17 00:00:00 2001 From: Patrick Hemmer Date: Fri, 22 Jul 2016 04:22:52 -0400 Subject: [PATCH 45/47] remove unused accumulator.prefix (#1535) --- agent/accumulator.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/agent/accumulator.go b/agent/accumulator.go index 504731720..8b0987c41 100644 --- a/agent/accumulator.go +++ b/agent/accumulator.go @@ -32,8 +32,6 @@ type accumulator struct { inputConfig *internal_models.InputConfig - prefix string - precision time.Duration } @@ -146,10 +144,6 @@ func (ac *accumulator) AddFields( } timestamp = timestamp.Round(ac.precision) - if ac.prefix != "" { - measurement = ac.prefix + measurement - } - m, err := telegraf.NewMetric(measurement, tags, result, timestamp) if err != nil { log.Printf("Error adding point [%s]: %s\n", measurement, err.Error()) From 4363eebc1b2da873a76f770abcf5e9aad2d1879a Mon Sep 17 00:00:00 2001 From: Patrick Hemmer Date: Fri, 22 Jul 2016 04:23:45 -0400 Subject: [PATCH 46/47] update gopsutil for FreeBSD disk time metrics (#1534) Results in adding the io_time metric to FreeBSD, and adjusts the read_time and write_time metrics to be in milliseconds like linux. --- CHANGELOG.md | 1 + Godeps | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7480bbb59..4c39f6c53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - [#1477](https://github.com/influxdata/telegraf/issues/1477): nstat: fix inaccurate config panic. - [#1481](https://github.com/influxdata/telegraf/issues/1481): jolokia: fix handling multiple multi-dimensional attributes. - [#1430](https://github.com/influxdata/telegraf/issues/1430): Fix prometheus character sanitizing. Sanitize more win_perf_counters characters. +- [#1534](https://github.com/influxdata/telegraf/pull/1534): Add diskio io_time to FreeBSD & report timing metrics as ms (as linux does). ## v1.0 beta 3 [2016-07-18] diff --git a/Godeps b/Godeps index 5caa6a9e2..2b4fce555 100644 --- a/Godeps +++ b/Godeps @@ -44,7 +44,7 @@ github.com/prometheus/client_model fa8ad6fec33561be4280a8f0514318c79d7f6cb6 github.com/prometheus/common e8eabff8812b05acf522b45fdcd725a785188e37 github.com/prometheus/procfs 406e5b7bfd8201a36e2bb5f7bdae0b03380c2ce8 github.com/samuel/go-zookeeper 218e9c81c0dd8b3b18172b2bbfad92cc7d6db55f -github.com/shirou/gopsutil 586bb697f3ec9f8ec08ffefe18f521a64534037c +github.com/shirou/gopsutil ee66bc560c366dd33b9a4046ba0b644caba46bed github.com/soniah/gosnmp b1b4f885b12c5dcbd021c5cee1c904110de6db7d github.com/sparrc/aerospike-client-go d4bb42d2c2d39dae68e054116f4538af189e05d5 github.com/streadway/amqp b4f3ceab0337f013208d31348b578d83c0064744 From 986735234b68359812f4ab65fb26f6a926874e31 Mon Sep 17 00:00:00 2001 From: Jason Gardner Date: Fri, 22 Jul 2016 10:05:53 -0500 Subject: [PATCH 47/47] Fix output config typo. (#1527) --- internal/config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/config/config.go b/internal/config/config.go index 8f7821624..9408d9efd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -139,7 +139,7 @@ func (c *Config) InputNames() []string { return name } -// Outputs returns a list of strings of the configured inputs. +// Outputs returns a list of strings of the configured outputs. func (c *Config) OutputNames() []string { var name []string for _, output := range c.Outputs {