From f76739cb1bd13d50ceb2d931c21cecd8e6d909f8 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 14 Apr 2016 16:16:26 -0600 Subject: [PATCH 01/84] Release 0.12.1 --- CHANGELOG.md | 4 +++- README.md | 24 ++++++++++++------------ circle.yml | 6 +++--- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d07fb9f9..3df83322a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ -## v0.12.1 [unreleased] +## v0.13 [unreleased] + +## v0.12.1 [2016-04-14] ### Release Notes - Breaking change in the dovecot input plugin. See Features section below. diff --git a/README.md b/README.md index 57cff90e7..e731dad0a 100644 --- a/README.md +++ b/README.md @@ -20,12 +20,12 @@ new plugins. ### Linux deb and rpm Packages: Latest: -* http://get.influxdb.org/telegraf/telegraf_0.12.0-1_amd64.deb -* http://get.influxdb.org/telegraf/telegraf-0.12.0-1.x86_64.rpm +* http://get.influxdb.org/telegraf/telegraf_0.12.1-1_amd64.deb +* http://get.influxdb.org/telegraf/telegraf-0.12.1-1.x86_64.rpm Latest (arm): -* http://get.influxdb.org/telegraf/telegraf_0.12.0-1_armhf.deb -* http://get.influxdb.org/telegraf/telegraf-0.12.0-1.armhf.rpm +* http://get.influxdb.org/telegraf/telegraf_0.12.1-1_armhf.deb +* http://get.influxdb.org/telegraf/telegraf-0.12.1-1.armhf.rpm ##### Package Instructions: @@ -46,28 +46,28 @@ to use this repo to install & update telegraf. ### Linux tarballs: Latest: -* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_amd64.tar.gz -* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_i386.tar.gz -* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_armhf.tar.gz +* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_linux_amd64.tar.gz +* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_linux_i386.tar.gz +* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_linux_armhf.tar.gz ##### tarball Instructions: To install the full directory structure with config file, run: ``` -sudo tar -C / -zxvf ./telegraf-0.12.0-1_linux_amd64.tar.gz +sudo tar -C / -zxvf ./telegraf-0.12.1-1_linux_amd64.tar.gz ``` To extract only the binary, run: ``` -tar -zxvf telegraf-0.12.0-1_linux_amd64.tar.gz --strip-components=3 ./usr/bin/telegraf +tar -zxvf telegraf-0.12.1-1_linux_amd64.tar.gz --strip-components=3 ./usr/bin/telegraf ``` ### FreeBSD tarball: Latest: -* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_freebsd_amd64.tar.gz +* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_freebsd_amd64.tar.gz ##### tarball Instructions: @@ -87,8 +87,8 @@ brew install telegraf ### Windows Binaries (EXPERIMENTAL) Latest: -* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_windows_amd64.zip -* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_windows_i386.zip +* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_windows_amd64.zip +* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_windows_i386.zip ### From Source: diff --git a/circle.yml b/circle.yml index e7b711f9d..cbfdb225c 100644 --- a/circle.yml +++ b/circle.yml @@ -4,9 +4,9 @@ machine: post: - sudo service zookeeper stop - go version - - go version | grep 1.6 || sudo rm -rf /usr/local/go - - wget https://storage.googleapis.com/golang/go1.6.linux-amd64.tar.gz - - sudo tar -C /usr/local -xzf go1.6.linux-amd64.tar.gz + - go version | grep 1.6.1 || sudo rm -rf /usr/local/go + - wget https://storage.googleapis.com/golang/go1.6.1.linux-amd64.tar.gz + - sudo tar -C /usr/local -xzf go1.6.1.linux-amd64.tar.gz - go version dependencies: From 81d0a64d46716817df7e68c21c7198ca8541b3e6 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 12 Apr 2016 17:06:27 -0600 Subject: [PATCH 02/84] Adds support for removing/keeping tags from metrics closes #706 --- CHANGELOG.md | 15 +++ Godeps | 1 + agent/accumulator.go | 1 + agent/accumulator_test.go | 32 +++++ docs/CONFIGURATION.md | 87 ++++++++----- docs/LICENSE_OF_DEPENDENCIES.md | 3 +- internal/config/config.go | 52 ++++++-- internal/config/config_test.go | 120 +++++++++--------- internal/internal.go | 56 --------- internal/internal_test.go | 41 ------- internal/models/filter.go | 163 ++++++++++++++++++------- internal/models/filter_test.go | 123 +++++++++++++++++++ internal/models/running_output.go | 13 ++ internal/models/running_output_test.go | 140 +++++++++++++++++++++ 14 files changed, 611 insertions(+), 236 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3df83322a..f1fd5ed43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ ## v0.13 [unreleased] +### Release Notes +- `tagexclude` and `tagexclude` are now available, which can be used to remove +tags from measurements on inputs and outputs. See +[the configuration doc](https://github.com/influxdata/telegraf/blob/master/docs/CONFIGURATION.md) +for more details. +- **Measurement filtering:** All measurement filters now match based on glob +only. Previously there was an undocumented behavior where filters would match +based on _prefix_ in addition to globs. This means that a filter like +`fielddrop = ["time_"]` will need to be changed to `fielddrop = ["time_*"]` + +### Features +- [#1017](https://github.com/influxdata/telegraf/pull/1017): taginclude and tagexclude arguments. + +### Bugfixes + ## v0.12.1 [2016-04-14] ### Release Notes diff --git a/Godeps b/Godeps index 14430ea5d..71057f497 100644 --- a/Godeps +++ b/Godeps @@ -16,6 +16,7 @@ github.com/eapache/go-resiliency b86b1ec0dd4209a588dc1285cdd471e73525c0b3 github.com/eapache/queue ded5959c0d4e360646dc9e9908cff48666781367 github.com/eclipse/paho.mqtt.golang 0f7a459f04f13a41b7ed752d47944528d4bf9a86 github.com/go-sql-driver/mysql 1fca743146605a172a266e1654e01e5cd5669bee +github.com/gobwas/glob d877f6352135181470c40c73ebb81aefa22115fa github.com/golang/protobuf 552c7b9542c194800fd493123b3798ef0a832032 github.com/golang/snappy 427fb6fc07997f43afa32f35e850833760e489a7 github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2 diff --git a/agent/accumulator.go b/agent/accumulator.go index 7ec22cd7f..6b2ffde2d 100644 --- a/agent/accumulator.go +++ b/agent/accumulator.go @@ -96,6 +96,7 @@ func (ac *accumulator) AddFields( tags[k] = v } } + ac.inputConfig.Filter.FilterTags(tags) result := make(map[string]interface{}) for k, v := range fields { diff --git a/agent/accumulator_test.go b/agent/accumulator_test.go index 05f9b02aa..ee8f65e48 100644 --- a/agent/accumulator_test.go +++ b/agent/accumulator_test.go @@ -300,3 +300,35 @@ func TestAddBools(t *testing.T) { fmt.Sprintf("acctest,acc=test,default=tag value=false %d", now.UnixNano()), actual) } + +// Test that tag filters get applied to metrics. +func TestAccFilterTags(t *testing.T) { + a := accumulator{} + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + filter := internal_models.Filter{ + TagExclude: []string{"acc"}, + } + assert.NoError(t, filter.CompileFilter()) + a.inputConfig = &internal_models.InputConfig{} + a.inputConfig.Filter = filter + + a.Add("acctest", float64(101), map[string]string{}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Contains(t, actual, "acctest value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest value=101 %d", now.UnixNano()), + actual) +} diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 0afaa120f..9f783f87a 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -3,11 +3,20 @@ ## Generating a Configuration File A default Telegraf config file can be generated using the -sample-config flag: -`telegraf -sample-config > telegraf.conf` + +``` +telegraf -sample-config > telegraf.conf +``` To generate a file with specific inputs and outputs, you can use the -input-filter and -output-filter flags: -`telegraf -sample-config -input-filter cpu:mem:net:swap -output-filter influxdb:kafka` + +``` +telegraf -sample-config -input-filter cpu:mem:net:swap -output-filter influxdb:kafka +``` + +You can see the latest config file with all available plugins +[here](https://github.com/influxdata/telegraf/blob/master/etc/telegraf.conf) ## Environment Variables @@ -17,8 +26,8 @@ for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) ## `[global_tags]` Configuration -Global tags can be specific in the `[global_tags]` section of the config file in -key="value" format. All metrics being gathered on this host will be tagged +Global tags can be specified in the `[global_tags]` section of the config file +in key="value" format. All metrics being gathered on this host will be tagged with the tags specified here. ## `[agent]` Configuration @@ -47,9 +56,35 @@ ie, a jitter of 5s and flush_interval 10s means flushes will happen every 10-15s * **quiet**: Run telegraf in quiet mode. * **hostname**: Override default hostname, if empty use os.Hostname(). -## `[inputs.xxx]` Configuration +#### Measurement Filtering -There are some configuration options that are configurable per input: +Filters can be configured per input or output, see below for examples. + +* **namepass**: An array of strings that is used to filter metrics generated by the +current input. Each string in the array is tested as a glob match against +measurement names and if it matches, the field is emitted. +* **namedrop**: The inverse of pass, if a measurement name matches, it is not emitted. +* **fieldpass**: An array of strings that is used to filter metrics generated by the +current input. Each string in the array is tested as a glob match against field names +and if it matches, the field is emitted. fieldpass is not available for outputs. +* **fielddrop**: The inverse of pass, if a field name matches, it is not emitted. +fielddrop is not available for outputs. +* **tagpass**: tag names and arrays of strings that are used to filter +measurements by the current input. Each string in the array is tested as a glob +match against the tag name, and if it matches the measurement is emitted. +* **tagdrop**: The inverse of tagpass. If a tag matches, the measurement is not +emitted. This is tested on measurements that have passed the tagpass test. +* **tagexclude**: tagexclude can be used to exclude a tag from measurement(s). +As opposed to tagdrop, which will drop an entire measurement based on it's +tags, tagexclude simply strips the given tag keys from the measurement. This +can be used on inputs & outputs, but it is _recommended_ to be used on inputs, +as it is more efficient to filter out tags at the ingestion point. +* **taginclude**: taginclude is the inverse of tagexclude. It will only include +the tag keys in the final measurement. + +## Input Configuration + +Some configuration options are configurable per input: * **name_override**: Override the base name of the measurement. (Default is the name of the input). @@ -60,24 +95,6 @@ There are some configuration options that are configurable per input: global interval, but if one particular input should be run less or more often, you can configure that here. -#### Input Filters - -There are also filters that can be configured per input: - -* **namepass**: An array of strings that is used to filter metrics generated by the -current input. Each string in the array is tested as a glob match against -measurement names and if it matches, the field is emitted. -* **namedrop**: The inverse of pass, if a measurement name matches, it is not emitted. -* **fieldpass**: An array of strings that is used to filter metrics generated by the -current input. Each string in the array is tested as a glob match against field names -and if it matches, the field is emitted. -* **fielddrop**: The inverse of pass, if a field name matches, it is not emitted. -* **tagpass**: tag names and arrays of strings that are used to filter -measurements by the current input. Each string in the array is tested as a glob -match against the tag name, and if it matches the measurement is emitted. -* **tagdrop**: The inverse of tagpass. If a tag matches, the measurement is not -emitted. This is tested on measurements that have passed the tagpass test. - #### Input Configuration Examples This is a full working config that will output CPU data to an InfluxDB instance @@ -155,6 +172,20 @@ fields which begin with `time_`. namepass = ["rest_client_*"] ``` +#### Input Config: taginclude and tagexclude + +```toml +# Only include the "cpu" tag in the measurements for the cpu plugin. +[[inputs.cpu]] + percpu = true + totalcpu = true + taginclude = ["cpu"] + +# Exclude the "fstype" tag from the measurements for the disk plugin. +[[inputs.disk]] + tagexclude = ["fstype"] +``` + #### Input config: prefix, suffix, and override This plugin will emit measurements with the name `cpu_total` @@ -180,6 +211,9 @@ This will emit measurements with the name `foobar` This plugin will emit measurements with two additional tags: `tag1=foo` and `tag2=bar` +NOTE: Order matters, the `[inputs.cpu.tags]` table must be at the _end_ of the +plugin definition. + ```toml [[inputs.cpu]] percpu = false @@ -208,15 +242,12 @@ to avoid measurement collisions: fielddrop = ["cpu_time*"] ``` -## `[outputs.xxx]` Configuration +## Output Configuration Telegraf also supports specifying multiple output sinks to send data to, configuring each output sink is different, but examples can be found by running `telegraf -sample-config`. -Outputs also support the same configurable options as inputs -(namepass, namedrop, tagpass, tagdrop) - ```toml [[outputs.influxdb]] urls = [ "http://localhost:8086" ] diff --git a/docs/LICENSE_OF_DEPENDENCIES.md b/docs/LICENSE_OF_DEPENDENCIES.md index c8f3b0926..d448872f6 100644 --- a/docs/LICENSE_OF_DEPENDENCIES.md +++ b/docs/LICENSE_OF_DEPENDENCIES.md @@ -28,6 +28,5 @@ - github.com/wvanbergen/kazoo-go [MIT LICENSE](https://github.com/wvanbergen/kazoo-go/blob/master/MIT-LICENSE) - gopkg.in/dancannon/gorethink.v1 [APACHE LICENSE](https://github.com/dancannon/gorethink/blob/v1.1.2/LICENSE) - gopkg.in/mgo.v2 [BSD LICENSE](https://github.com/go-mgo/mgo/blob/v2/LICENSE) -- golang.org/x/crypto/* [BSD LICENSE](https://github.com/golang/crypto/blob/master/LICENSE) -- internal Glob function [MIT LICENSE](https://github.com/ryanuber/go-glob/blob/master/LICENSE) +- golang.org/x/crypto/ [BSD LICENSE](https://github.com/golang/crypto/blob/master/LICENSE) diff --git a/internal/config/config.go b/internal/config/config.go index cfd6c9593..5d0836964 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -580,9 +580,9 @@ func (c *Config) addInput(name string, table *ast.Table) error { // buildFilter builds a Filter // (tagpass/tagdrop/namepass/namedrop/fieldpass/fielddrop) to -// be inserted into the internal_models.OutputConfig/internal_models.InputConfig to be used for prefix -// filtering on tags and measurements -func buildFilter(tbl *ast.Table) internal_models.Filter { +// be inserted into the internal_models.OutputConfig/internal_models.InputConfig +// to be used for glob filtering on tags and measurements +func buildFilter(tbl *ast.Table) (internal_models.Filter, error) { f := internal_models.Filter{} if node, ok := tbl.Fields["namepass"]; ok { @@ -681,6 +681,33 @@ func buildFilter(tbl *ast.Table) internal_models.Filter { } } + if node, ok := tbl.Fields["tagexclude"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if ary, ok := kv.Value.(*ast.Array); ok { + for _, elem := range ary.Value { + if str, ok := elem.(*ast.String); ok { + f.TagExclude = append(f.TagExclude, str.Value) + } + } + } + } + } + + if node, ok := tbl.Fields["taginclude"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if ary, ok := kv.Value.(*ast.Array); ok { + for _, elem := range ary.Value { + if str, ok := elem.(*ast.String); ok { + f.TagInclude = append(f.TagInclude, str.Value) + } + } + } + } + } + if err := f.CompileFilter(); err != nil { + return f, err + } + delete(tbl.Fields, "namedrop") delete(tbl.Fields, "namepass") delete(tbl.Fields, "fielddrop") @@ -689,7 +716,9 @@ func buildFilter(tbl *ast.Table) internal_models.Filter { delete(tbl.Fields, "pass") delete(tbl.Fields, "tagdrop") delete(tbl.Fields, "tagpass") - return f + delete(tbl.Fields, "tagexclude") + delete(tbl.Fields, "taginclude") + return f, nil } // buildInput parses input specific items from the ast.Table, @@ -748,7 +777,11 @@ func buildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, erro delete(tbl.Fields, "name_override") delete(tbl.Fields, "interval") delete(tbl.Fields, "tags") - cp.Filter = buildFilter(tbl) + var err error + cp.Filter, err = buildFilter(tbl) + if err != nil { + return cp, err + } return cp, nil } @@ -864,13 +897,18 @@ func buildSerializer(name string, tbl *ast.Table) (serializers.Serializer, error return serializers.NewSerializer(c) } -// buildOutput parses output specific items from the ast.Table, builds the filter and returns an +// buildOutput parses output specific items from the ast.Table, +// builds the filter and returns an // internal_models.OutputConfig to be inserted into internal_models.RunningInput // Note: error exists in the return for future calls that might require error func buildOutput(name string, tbl *ast.Table) (*internal_models.OutputConfig, error) { + filter, err := buildFilter(tbl) + if err != nil { + return nil, err + } oc := &internal_models.OutputConfig{ Name: name, - Filter: buildFilter(tbl), + Filter: filter, } // Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass if len(oc.Filter.FieldDrop) > 0 { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index d78a8d6b8..1659cd6ec 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -26,27 +26,29 @@ func TestConfig_LoadSingleInputWithEnvVars(t *testing.T) { memcached := inputs.Inputs["memcached"]().(*memcached.Memcached) memcached.Servers = []string{"192.168.1.1"} - mConfig := &internal_models.InputConfig{ - Name: "memcached", - Filter: internal_models.Filter{ - NameDrop: []string{"metricname2"}, - NamePass: []string{"metricname1"}, - FieldDrop: []string{"other", "stuff"}, - FieldPass: []string{"some", "strings"}, - TagDrop: []internal_models.TagFilter{ - internal_models.TagFilter{ - Name: "badtag", - Filter: []string{"othertag"}, - }, + filter := internal_models.Filter{ + NameDrop: []string{"metricname2"}, + NamePass: []string{"metricname1"}, + FieldDrop: []string{"other", "stuff"}, + FieldPass: []string{"some", "strings"}, + TagDrop: []internal_models.TagFilter{ + internal_models.TagFilter{ + Name: "badtag", + Filter: []string{"othertag"}, }, - TagPass: []internal_models.TagFilter{ - internal_models.TagFilter{ - Name: "goodtag", - Filter: []string{"mytag"}, - }, - }, - IsActive: true, }, + TagPass: []internal_models.TagFilter{ + internal_models.TagFilter{ + Name: "goodtag", + Filter: []string{"mytag"}, + }, + }, + IsActive: true, + } + assert.NoError(t, filter.CompileFilter()) + mConfig := &internal_models.InputConfig{ + Name: "memcached", + Filter: filter, Interval: 10 * time.Second, } mConfig.Tags = make(map[string]string) @@ -64,27 +66,29 @@ func TestConfig_LoadSingleInput(t *testing.T) { memcached := inputs.Inputs["memcached"]().(*memcached.Memcached) memcached.Servers = []string{"localhost"} - mConfig := &internal_models.InputConfig{ - Name: "memcached", - Filter: internal_models.Filter{ - NameDrop: []string{"metricname2"}, - NamePass: []string{"metricname1"}, - FieldDrop: []string{"other", "stuff"}, - FieldPass: []string{"some", "strings"}, - TagDrop: []internal_models.TagFilter{ - internal_models.TagFilter{ - Name: "badtag", - Filter: []string{"othertag"}, - }, + filter := internal_models.Filter{ + NameDrop: []string{"metricname2"}, + NamePass: []string{"metricname1"}, + FieldDrop: []string{"other", "stuff"}, + FieldPass: []string{"some", "strings"}, + TagDrop: []internal_models.TagFilter{ + internal_models.TagFilter{ + Name: "badtag", + Filter: []string{"othertag"}, }, - TagPass: []internal_models.TagFilter{ - internal_models.TagFilter{ - Name: "goodtag", - Filter: []string{"mytag"}, - }, - }, - IsActive: true, }, + TagPass: []internal_models.TagFilter{ + internal_models.TagFilter{ + Name: "goodtag", + Filter: []string{"mytag"}, + }, + }, + IsActive: true, + } + assert.NoError(t, filter.CompileFilter()) + mConfig := &internal_models.InputConfig{ + Name: "memcached", + Filter: filter, Interval: 5 * time.Second, } mConfig.Tags = make(map[string]string) @@ -109,27 +113,29 @@ func TestConfig_LoadDirectory(t *testing.T) { memcached := inputs.Inputs["memcached"]().(*memcached.Memcached) memcached.Servers = []string{"localhost"} - mConfig := &internal_models.InputConfig{ - Name: "memcached", - Filter: internal_models.Filter{ - NameDrop: []string{"metricname2"}, - NamePass: []string{"metricname1"}, - FieldDrop: []string{"other", "stuff"}, - FieldPass: []string{"some", "strings"}, - TagDrop: []internal_models.TagFilter{ - internal_models.TagFilter{ - Name: "badtag", - Filter: []string{"othertag"}, - }, + filter := internal_models.Filter{ + NameDrop: []string{"metricname2"}, + NamePass: []string{"metricname1"}, + FieldDrop: []string{"other", "stuff"}, + FieldPass: []string{"some", "strings"}, + TagDrop: []internal_models.TagFilter{ + internal_models.TagFilter{ + Name: "badtag", + Filter: []string{"othertag"}, }, - TagPass: []internal_models.TagFilter{ - internal_models.TagFilter{ - Name: "goodtag", - Filter: []string{"mytag"}, - }, - }, - IsActive: true, }, + TagPass: []internal_models.TagFilter{ + internal_models.TagFilter{ + Name: "goodtag", + Filter: []string{"mytag"}, + }, + }, + IsActive: true, + } + assert.NoError(t, filter.CompileFilter()) + mConfig := &internal_models.InputConfig{ + Name: "memcached", + Filter: filter, Interval: 5 * time.Second, } mConfig.Tags = make(map[string]string) diff --git a/internal/internal.go b/internal/internal.go index ff73aae84..4b8e1536f 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -139,59 +139,3 @@ func SnakeCase(in string) string { return string(out) } - -// Glob will test a string pattern, potentially containing globs, against a -// subject string. The result is a simple true/false, determining whether or -// not the glob pattern matched the subject text. -// -// Adapted from https://github.com/ryanuber/go-glob/blob/master/glob.go -// thanks Ryan Uber! -func Glob(pattern, measurement string) bool { - // Empty pattern can only match empty subject - if pattern == "" { - return measurement == pattern - } - - // If the pattern _is_ a glob, it matches everything - if pattern == "*" { - return true - } - - parts := strings.Split(pattern, "*") - - if len(parts) == 1 { - // No globs in pattern, so test for match - return pattern == measurement - } - - leadingGlob := strings.HasPrefix(pattern, "*") - trailingGlob := strings.HasSuffix(pattern, "*") - end := len(parts) - 1 - - for i, part := range parts { - switch i { - case 0: - if leadingGlob { - continue - } - if !strings.HasPrefix(measurement, part) { - return false - } - case end: - if len(measurement) > 0 { - return trailingGlob || strings.HasSuffix(measurement, part) - } - default: - if !strings.Contains(measurement, part) { - return false - } - } - - // Trim evaluated text from measurement as we loop over the pattern. - idx := strings.Index(measurement, part) + len(part) - measurement = measurement[idx:] - } - - // All parts of the pattern matched - return true -} diff --git a/internal/internal_test.go b/internal/internal_test.go index e4a5eed14..7ff64e87b 100644 --- a/internal/internal_test.go +++ b/internal/internal_test.go @@ -2,47 +2,6 @@ package internal import "testing" -func testGlobMatch(t *testing.T, pattern, subj string) { - if !Glob(pattern, subj) { - t.Errorf("%s should match %s", pattern, subj) - } -} - -func testGlobNoMatch(t *testing.T, pattern, subj string) { - if Glob(pattern, subj) { - t.Errorf("%s should not match %s", pattern, subj) - } -} - -func TestEmptyPattern(t *testing.T) { - testGlobMatch(t, "", "") - testGlobNoMatch(t, "", "test") -} - -func TestPatternWithoutGlobs(t *testing.T) { - testGlobMatch(t, "test", "test") -} - -func TestGlob(t *testing.T) { - for _, pattern := range []string{ - "*test", // Leading glob - "this*", // Trailing glob - "*is*a*", // Lots of globs - "**test**", // Double glob characters - "**is**a***test*", // Varying number of globs - } { - testGlobMatch(t, pattern, "this_is_a_test") - } - - for _, pattern := range []string{ - "test*", // Implicit substring match should fail - "*is", // Partial match should fail - "*no*", // Globs without a match between them should fail - } { - testGlobNoMatch(t, pattern, "this_is_a_test") - } -} - type SnakeTest struct { input string output string diff --git a/internal/models/filter.go b/internal/models/filter.go index e2b1377f4..d78492a5d 100644 --- a/internal/models/filter.go +++ b/internal/models/filter.go @@ -1,33 +1,104 @@ package internal_models import ( + "fmt" "strings" + "github.com/gobwas/glob" + "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/internal" ) // TagFilter is the name of a tag, and the values on which to filter type TagFilter struct { Name string Filter []string + filter glob.Glob } // Filter containing drop/pass and tagdrop/tagpass rules type Filter struct { NameDrop []string + nameDrop glob.Glob NamePass []string + namePass glob.Glob FieldDrop []string + fieldDrop glob.Glob FieldPass []string + fieldPass glob.Glob TagDrop []TagFilter TagPass []TagFilter + TagExclude []string + tagExclude glob.Glob + TagInclude []string + tagInclude glob.Glob + IsActive bool } -func (f Filter) ShouldMetricPass(metric telegraf.Metric) bool { +// Compile all Filter lists into glob.Glob objects. +func (f *Filter) CompileFilter() error { + var err error + f.nameDrop, err = compileFilter(f.NameDrop) + if err != nil { + return fmt.Errorf("Error compiling 'namedrop', %s", err) + } + f.namePass, err = compileFilter(f.NamePass) + if err != nil { + return fmt.Errorf("Error compiling 'namepass', %s", err) + } + + f.fieldDrop, err = compileFilter(f.FieldDrop) + if err != nil { + return fmt.Errorf("Error compiling 'fielddrop', %s", err) + } + f.fieldPass, err = compileFilter(f.FieldPass) + if err != nil { + return fmt.Errorf("Error compiling 'fieldpass', %s", err) + } + + f.tagExclude, err = compileFilter(f.TagExclude) + if err != nil { + return fmt.Errorf("Error compiling 'tagexclude', %s", err) + } + f.tagInclude, err = compileFilter(f.TagInclude) + if err != nil { + return fmt.Errorf("Error compiling 'taginclude', %s", err) + } + + for i, _ := range f.TagDrop { + f.TagDrop[i].filter, err = compileFilter(f.TagDrop[i].Filter) + if err != nil { + return fmt.Errorf("Error compiling 'tagdrop', %s", err) + } + } + for i, _ := range f.TagPass { + f.TagPass[i].filter, err = compileFilter(f.TagPass[i].Filter) + if err != nil { + return fmt.Errorf("Error compiling 'tagpass', %s", err) + } + } + return nil +} + +func compileFilter(filter []string) (glob.Glob, error) { + if len(filter) == 0 { + return nil, nil + } + var g glob.Glob + var err error + if len(filter) == 1 { + g, err = glob.Compile(filter[0]) + } else { + g, err = glob.Compile("{" + strings.Join(filter, ",") + "}") + } + return g, err +} + +func (f *Filter) ShouldMetricPass(metric telegraf.Metric) bool { if f.ShouldNamePass(metric.Name()) && f.ShouldTagsPass(metric.Tags()) { return true } @@ -36,70 +107,51 @@ func (f Filter) ShouldMetricPass(metric telegraf.Metric) bool { // ShouldFieldsPass returns true if the metric should pass, false if should drop // based on the drop/pass filter parameters -func (f Filter) ShouldNamePass(key string) bool { - if f.NamePass != nil { - for _, pat := range f.NamePass { - // TODO remove HasPrefix check, leaving it for now for legacy support. - // Cam, 2015-12-07 - if strings.HasPrefix(key, pat) || internal.Glob(pat, key) { - return true - } +func (f *Filter) ShouldNamePass(key string) bool { + if f.namePass != nil { + if f.namePass.Match(key) { + return true } return false } - if f.NameDrop != nil { - for _, pat := range f.NameDrop { - // TODO remove HasPrefix check, leaving it for now for legacy support. - // Cam, 2015-12-07 - if strings.HasPrefix(key, pat) || internal.Glob(pat, key) { - return false - } + if f.nameDrop != nil { + if f.nameDrop.Match(key) { + return false } - - return true } return true } // ShouldFieldsPass returns true if the metric should pass, false if should drop // based on the drop/pass filter parameters -func (f Filter) ShouldFieldsPass(key string) bool { - if f.FieldPass != nil { - for _, pat := range f.FieldPass { - // TODO remove HasPrefix check, leaving it for now for legacy support. - // Cam, 2015-12-07 - if strings.HasPrefix(key, pat) || internal.Glob(pat, key) { - return true - } +func (f *Filter) ShouldFieldsPass(key string) bool { + if f.fieldPass != nil { + if f.fieldPass.Match(key) { + return true } return false } - if f.FieldDrop != nil { - for _, pat := range f.FieldDrop { - // TODO remove HasPrefix check, leaving it for now for legacy support. - // Cam, 2015-12-07 - if strings.HasPrefix(key, pat) || internal.Glob(pat, key) { - return false - } + if f.fieldDrop != nil { + if f.fieldDrop.Match(key) { + return false } - - return true } return true } // ShouldTagsPass returns true if the metric should pass, false if should drop // based on the tagdrop/tagpass filter parameters -func (f Filter) ShouldTagsPass(tags map[string]string) bool { +func (f *Filter) ShouldTagsPass(tags map[string]string) bool { if f.TagPass != nil { for _, pat := range f.TagPass { + if pat.filter == nil { + continue + } if tagval, ok := tags[pat.Name]; ok { - for _, filter := range pat.Filter { - if internal.Glob(filter, tagval) { - return true - } + if pat.filter.Match(tagval) { + return true } } } @@ -108,11 +160,12 @@ func (f Filter) ShouldTagsPass(tags map[string]string) bool { if f.TagDrop != nil { for _, pat := range f.TagDrop { + if pat.filter == nil { + continue + } if tagval, ok := tags[pat.Name]; ok { - for _, filter := range pat.Filter { - if internal.Glob(filter, tagval) { - return false - } + if pat.filter.Match(tagval) { + return false } } } @@ -121,3 +174,23 @@ func (f Filter) ShouldTagsPass(tags map[string]string) bool { return true } + +// Apply TagInclude and TagExclude filters. +// modifies the tags map in-place. +func (f *Filter) FilterTags(tags map[string]string) { + if f.tagInclude != nil { + for k, _ := range tags { + if !f.tagInclude.Match(k) { + delete(tags, k) + } + } + } + + if f.tagExclude != nil { + for k, _ := range tags { + if f.tagExclude.Match(k) { + delete(tags, k) + } + } + } +} diff --git a/internal/models/filter_test.go b/internal/models/filter_test.go index c69398494..a37416095 100644 --- a/internal/models/filter_test.go +++ b/internal/models/filter_test.go @@ -2,6 +2,11 @@ package internal_models import ( "testing" + + "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestFilter_Empty(t *testing.T) { @@ -28,6 +33,7 @@ func TestFilter_NamePass(t *testing.T) { f := Filter{ NamePass: []string{"foo*", "cpu_usage_idle"}, } + require.NoError(t, f.CompileFilter()) passes := []string{ "foo", @@ -61,6 +67,7 @@ func TestFilter_NameDrop(t *testing.T) { f := Filter{ NameDrop: []string{"foo*", "cpu_usage_idle"}, } + require.NoError(t, f.CompileFilter()) drops := []string{ "foo", @@ -94,6 +101,7 @@ func TestFilter_FieldPass(t *testing.T) { f := Filter{ FieldPass: []string{"foo*", "cpu_usage_idle"}, } + require.NoError(t, f.CompileFilter()) passes := []string{ "foo", @@ -127,6 +135,7 @@ func TestFilter_FieldDrop(t *testing.T) { f := Filter{ FieldDrop: []string{"foo*", "cpu_usage_idle"}, } + require.NoError(t, f.CompileFilter()) drops := []string{ "foo", @@ -169,6 +178,7 @@ func TestFilter_TagPass(t *testing.T) { f := Filter{ TagPass: filters, } + require.NoError(t, f.CompileFilter()) passes := []map[string]string{ {"cpu": "cpu-total"}, @@ -212,6 +222,7 @@ func TestFilter_TagDrop(t *testing.T) { f := Filter{ TagDrop: filters, } + require.NoError(t, f.CompileFilter()) drops := []map[string]string{ {"cpu": "cpu-total"}, @@ -241,3 +252,115 @@ func TestFilter_TagDrop(t *testing.T) { } } } + +func TestFilter_CompileFilterError(t *testing.T) { + f := Filter{ + NameDrop: []string{"", ""}, + } + assert.Error(t, f.CompileFilter()) + f = Filter{ + NamePass: []string{"", ""}, + } + assert.Error(t, f.CompileFilter()) + f = Filter{ + FieldDrop: []string{"", ""}, + } + assert.Error(t, f.CompileFilter()) + f = Filter{ + FieldPass: []string{"", ""}, + } + assert.Error(t, f.CompileFilter()) + f = Filter{ + TagExclude: []string{"", ""}, + } + assert.Error(t, f.CompileFilter()) + f = Filter{ + TagInclude: []string{"", ""}, + } + assert.Error(t, f.CompileFilter()) + filters := []TagFilter{ + TagFilter{ + Name: "cpu", + Filter: []string{"{foobar}"}, + }} + f = Filter{ + TagDrop: filters, + } + require.Error(t, f.CompileFilter()) + filters = []TagFilter{ + TagFilter{ + Name: "cpu", + Filter: []string{"{foobar}"}, + }} + f = Filter{ + TagPass: filters, + } + require.Error(t, f.CompileFilter()) +} + +func TestFilter_ShouldMetricsPass(t *testing.T) { + m := testutil.TestMetric(1, "testmetric") + f := Filter{ + NameDrop: []string{"foobar"}, + } + require.NoError(t, f.CompileFilter()) + require.True(t, f.ShouldMetricPass(m)) + + m = testutil.TestMetric(1, "foobar") + require.False(t, f.ShouldMetricPass(m)) +} + +func TestFilter_FilterTagsNoMatches(t *testing.T) { + pretags := map[string]string{ + "host": "localhost", + "mytag": "foobar", + } + f := Filter{ + TagExclude: []string{"nomatch"}, + } + require.NoError(t, f.CompileFilter()) + + f.FilterTags(pretags) + assert.Equal(t, map[string]string{ + "host": "localhost", + "mytag": "foobar", + }, pretags) + + f = Filter{ + TagInclude: []string{"nomatch"}, + } + require.NoError(t, f.CompileFilter()) + + f.FilterTags(pretags) + assert.Equal(t, map[string]string{}, pretags) +} + +func TestFilter_FilterTagsMatches(t *testing.T) { + pretags := map[string]string{ + "host": "localhost", + "mytag": "foobar", + } + f := Filter{ + TagExclude: []string{"ho*"}, + } + require.NoError(t, f.CompileFilter()) + + f.FilterTags(pretags) + assert.Equal(t, map[string]string{ + "mytag": "foobar", + }, pretags) + + pretags = map[string]string{ + "host": "localhost", + "mytag": "foobar", + } + f = Filter{ + TagInclude: []string{"my*"}, + } + require.NoError(t, f.CompileFilter()) + + f.FilterTags(pretags) + assert.Equal(t, map[string]string{ + "mytag": "foobar", + }, pretags) +} diff --git a/internal/models/running_output.go b/internal/models/running_output.go index 1e3d44a61..c76dffcdf 100644 --- a/internal/models/running_output.go +++ b/internal/models/running_output.go @@ -59,6 +59,19 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) { ro.Lock() defer ro.Unlock() + // Filter any tagexclude/taginclude parameters before adding metric + if len(ro.Config.Filter.TagExclude) != 0 || len(ro.Config.Filter.TagInclude) != 0 { + // In order to filter out tags, we need to create a new metric, since + // metrics are immutable once created. + tags := metric.Tags() + fields := metric.Fields() + t := metric.Time() + name := metric.Name() + ro.Config.Filter.FilterTags(tags) + // error is not possible if creating from another metric, so ignore. + metric, _ = telegraf.NewMetric(name, tags, fields, t) + } + if len(ro.metrics) < ro.MetricBufferLimit { ro.metrics = append(ro.metrics, metric) } else { diff --git a/internal/models/running_output_test.go b/internal/models/running_output_test.go index 6eee3bd11..9607f2417 100644 --- a/internal/models/running_output_test.go +++ b/internal/models/running_output_test.go @@ -29,6 +29,146 @@ var next5 = []telegraf.Metric{ testutil.TestMetric(101, "metric10"), } +// Test that NameDrop filters ger properly applied. +func TestRunningOutput_DropFilter(t *testing.T) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: true, + NameDrop: []string{"metric1", "metric2"}, + }, + } + assert.NoError(t, conf.Filter.CompileFilter()) + + m := &mockOutput{} + ro := NewRunningOutput("test", m, conf) + + for _, metric := range first5 { + ro.AddMetric(metric) + } + for _, metric := range next5 { + ro.AddMetric(metric) + } + assert.Len(t, m.Metrics(), 0) + + err := ro.Write() + assert.NoError(t, err) + assert.Len(t, m.Metrics(), 8) +} + +// Test that NameDrop filters without a match do nothing. +func TestRunningOutput_PassFilter(t *testing.T) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: true, + NameDrop: []string{"metric1000", "foo*"}, + }, + } + assert.NoError(t, conf.Filter.CompileFilter()) + + m := &mockOutput{} + ro := NewRunningOutput("test", m, conf) + + for _, metric := range first5 { + ro.AddMetric(metric) + } + for _, metric := range next5 { + ro.AddMetric(metric) + } + assert.Len(t, m.Metrics(), 0) + + err := ro.Write() + assert.NoError(t, err) + assert.Len(t, m.Metrics(), 10) +} + +// Test that tags are properly included +func TestRunningOutput_TagIncludeNoMatch(t *testing.T) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: true, + TagInclude: []string{"nothing*"}, + }, + } + assert.NoError(t, conf.Filter.CompileFilter()) + + m := &mockOutput{} + ro := NewRunningOutput("test", m, conf) + + ro.AddMetric(first5[0]) + assert.Len(t, m.Metrics(), 0) + + err := ro.Write() + assert.NoError(t, err) + assert.Len(t, m.Metrics(), 1) + assert.Empty(t, m.Metrics()[0].Tags()) +} + +// Test that tags are properly excluded +func TestRunningOutput_TagExcludeMatch(t *testing.T) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: true, + TagExclude: []string{"tag*"}, + }, + } + assert.NoError(t, conf.Filter.CompileFilter()) + + m := &mockOutput{} + ro := NewRunningOutput("test", m, conf) + + ro.AddMetric(first5[0]) + assert.Len(t, m.Metrics(), 0) + + err := ro.Write() + assert.NoError(t, err) + assert.Len(t, m.Metrics(), 1) + assert.Len(t, m.Metrics()[0].Tags(), 0) +} + +// Test that tags are properly Excluded +func TestRunningOutput_TagExcludeNoMatch(t *testing.T) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: true, + TagExclude: []string{"nothing*"}, + }, + } + assert.NoError(t, conf.Filter.CompileFilter()) + + m := &mockOutput{} + ro := NewRunningOutput("test", m, conf) + + ro.AddMetric(first5[0]) + assert.Len(t, m.Metrics(), 0) + + err := ro.Write() + assert.NoError(t, err) + assert.Len(t, m.Metrics(), 1) + assert.Len(t, m.Metrics()[0].Tags(), 1) +} + +// Test that tags are properly included +func TestRunningOutput_TagIncludeMatch(t *testing.T) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: true, + TagInclude: []string{"tag*"}, + }, + } + assert.NoError(t, conf.Filter.CompileFilter()) + + m := &mockOutput{} + ro := NewRunningOutput("test", m, conf) + + ro.AddMetric(first5[0]) + assert.Len(t, m.Metrics(), 0) + + err := ro.Write() + assert.NoError(t, err) + assert.Len(t, m.Metrics(), 1) + assert.Len(t, m.Metrics()[0].Tags(), 1) +} + // Test that we can write metrics with simple default setup. func TestRunningOutputDefault(t *testing.T) { conf := &OutputConfig{ From d638f6e4119b0b2dd76e3a0d6ed24ec1b94627e3 Mon Sep 17 00:00:00 2001 From: Shahzheeb Khan Date: Fri, 15 Apr 2016 11:10:30 -0400 Subject: [PATCH 03/84] mongodb readme and examples mongodb readme and examples closes #1039 --- plugins/inputs/mongodb/README.md | 53 ++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 plugins/inputs/mongodb/README.md diff --git a/plugins/inputs/mongodb/README.md b/plugins/inputs/mongodb/README.md new file mode 100644 index 000000000..56fe73840 --- /dev/null +++ b/plugins/inputs/mongodb/README.md @@ -0,0 +1,53 @@ +# Telegraf plugin: MongoDB + +#### Configuration + +```toml +[[inputs.mongodb]] + ## An array of URI to gather stats about. Specify an ip or hostname + ## with optional port add password. ie, + ## mongodb://user:auth_key@10.10.3.30:27017, + ## mongodb://10.10.3.33:18832, + ## 10.0.0.1:10000, etc. + servers = ["127.0.0.1:27017"] +``` + +For authenticated mongodb istances use connection mongdb connection URI + +```toml +[[inputs.mongodb]] + servers = ["mongodb://username:password@10.XX.XX.XX:27101/mydatabase?authSource=admin"] +``` +This connection uri may be different based on your environement and mongodb +setup. If the user doesn't have the required privilege to execute serverStatus +command the you will get this error on telegraf + +``` +Error in input [mongodb]: not authorized on admin to execute command { serverStatus: 1, recordStats: 0 } +``` + +#### Description + +The telegraf plugin collects mongodb stats exposed by serverStatus and few more +and create a single measurement containing values e.g. + * active_reads + * active_writes + * commands_per_sec + * deletes_per_sec + * flushes_per_sec + * getmores_per_sec + * inserts_per_sec + * net_in_bytes + * net_out_bytes + * open_connections + * percent_cache_dirty + * percent_cache_used + * queries_per_sec + * queued_reads + * queued_writes + * resident_megabytes + * updates_per_sec + * vsize_megabytes + + + From 75a9845d20c481fc70747911ad9433b07b1049d9 Mon Sep 17 00:00:00 2001 From: Thibault Cohen Date: Sun, 17 Apr 2016 22:08:56 -0400 Subject: [PATCH 04/84] SNMP Fix #995 closes #995 --- plugins/inputs/snmp/snmp.go | 7 ++++--- plugins/inputs/snmp/snmp_test.go | 8 +++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/plugins/inputs/snmp/snmp.go b/plugins/inputs/snmp/snmp.go index 4c2de93c9..3a95e84fa 100644 --- a/plugins/inputs/snmp/snmp.go +++ b/plugins/inputs/snmp/snmp.go @@ -398,15 +398,16 @@ func (s *Snmp) Gather(acc telegraf.Accumulator) error { // only if len(s.OidInstanceMapping) == 0 if len(OidInstanceMapping) >= 0 { if err := host.SNMPMap(acc, s.nameToOid, s.subTableMap); err != nil { - return err + log.Printf("SNMP Mapping error for host '%s': %s", host.Address, err) + continue } } // Launch Get requests if err := host.SNMPGet(acc, s.initNode); err != nil { - return err + log.Printf("SNMP Error for host '%s': %s", host.Address, err) } if err := host.SNMPBulk(acc, s.initNode); err != nil { - return err + log.Printf("SNMP Error for host '%s': %s", host.Address, err) } } return nil diff --git a/plugins/inputs/snmp/snmp_test.go b/plugins/inputs/snmp/snmp_test.go index 22414fb79..5822926dd 100644 --- a/plugins/inputs/snmp/snmp_test.go +++ b/plugins/inputs/snmp/snmp_test.go @@ -5,7 +5,7 @@ import ( "github.com/influxdata/telegraf/testutil" - // "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -45,7 +45,8 @@ func TestSNMPErrorGet2(t *testing.T) { var acc testutil.Accumulator err := s.Gather(&acc) - require.Error(t, err) + require.NoError(t, err) + assert.Equal(t, 0, len(acc.Metrics)) } func TestSNMPErrorBulk(t *testing.T) { @@ -65,7 +66,8 @@ func TestSNMPErrorBulk(t *testing.T) { var acc testutil.Accumulator err := s.Gather(&acc) - require.Error(t, err) + require.NoError(t, err) + assert.Equal(t, 0, len(acc.Metrics)) } func TestSNMPGet1(t *testing.T) { From 21c7378b611303226d7d47f76c21732141d64ea5 Mon Sep 17 00:00:00 2001 From: Pascal Larin Date: Tue, 12 Apr 2016 12:36:43 -0400 Subject: [PATCH 05/84] Handle onConnect --- plugins/inputs/mqtt_consumer/mqtt_consumer.go | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/plugins/inputs/mqtt_consumer/mqtt_consumer.go b/plugins/inputs/mqtt_consumer/mqtt_consumer.go index c64d2139b..0cba92eb6 100644 --- a/plugins/inputs/mqtt_consumer/mqtt_consumer.go +++ b/plugins/inputs/mqtt_consumer/mqtt_consumer.go @@ -116,6 +116,9 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error { return err } + opts.OnConnect = onConnect + + m.client = mqtt.NewClient(opts) if token := m.client.Connect(); token.Wait() && token.Error() != nil { return token.Error() @@ -124,21 +127,24 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error { m.in = make(chan mqtt.Message, 1000) m.done = make(chan struct{}) - topics := make(map[string]byte) - for _, topic := range m.Topics { - topics[topic] = byte(m.QoS) - } - subscribeToken := m.client.SubscribeMultiple(topics, m.recvMessage) - subscribeToken.Wait() - if subscribeToken.Error() != nil { - return subscribeToken.Error() - } + go m.receiver() return nil } - + func onConnect(c *MQTT.Client) { + topics := make(map[string]byte) + for _, topic := range m.Topics { + topics[topic] = byte(m.QoS) + } + subscribeToken := c.SubscribeMultiple(topics, m.recvMessage) + subscribeToken.Wait() + if subscribeToken.Error() != nil { + log.Printf("MQTT SUBSCRIBE ERROR\ntopics: %s\nerror: %s", + string(m.Topics), err.Error()) + } + } // receiver() reads all incoming messages from the consumer, and parses them into // influxdb metric points. func (m *MQTTConsumer) receiver() { From ce94e636bbdb5e589673a6736dc4d4cb9ac83f37 Mon Sep 17 00:00:00 2001 From: Pascal Larin Date: Tue, 12 Apr 2016 21:43:25 -0400 Subject: [PATCH 06/84] Resubscribe if not using persistent sessions --- plugins/inputs/mqtt_consumer/mqtt_consumer.go | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/plugins/inputs/mqtt_consumer/mqtt_consumer.go b/plugins/inputs/mqtt_consumer/mqtt_consumer.go index 0cba92eb6..72e0d3c19 100644 --- a/plugins/inputs/mqtt_consumer/mqtt_consumer.go +++ b/plugins/inputs/mqtt_consumer/mqtt_consumer.go @@ -46,6 +46,8 @@ type MQTTConsumer struct { // keep the accumulator internally: acc telegraf.Accumulator + + started bool false } var sampleConfig = ` @@ -134,16 +136,19 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error { return nil } func onConnect(c *MQTT.Client) { - topics := make(map[string]byte) - for _, topic := range m.Topics { - topics[topic] = byte(m.QoS) - } - subscribeToken := c.SubscribeMultiple(topics, m.recvMessage) - subscribeToken.Wait() - if subscribeToken.Error() != nil { - log.Printf("MQTT SUBSCRIBE ERROR\ntopics: %s\nerror: %s", - string(m.Topics), err.Error()) - } + if (!m.PersistentSession || !m.started) { + topics := make(map[string]byte) + for _, topic := range m.Topics { + topics[topic] = byte(m.QoS) + } + subscribeToken := c.SubscribeMultiple(topics, m.recvMessage) + subscribeToken.Wait() + if subscribeToken.Error() != nil { + log.Printf("MQTT SUBSCRIBE ERROR\ntopics: %s\nerror: %s", + string(m.Topics), err.Error()) + } + m.started = true; + } } // receiver() reads all incoming messages from the consumer, and parses them into // influxdb metric points. @@ -178,6 +183,7 @@ func (m *MQTTConsumer) Stop() { defer m.Unlock() close(m.done) m.client.Disconnect(200) + m.started = false } func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error { From 8ec8ae0587f17484f40fa2f6e36fc5d3ff5531b2 Mon Sep 17 00:00:00 2001 From: chaton78 Date: Wed, 13 Apr 2016 22:58:45 -0400 Subject: [PATCH 07/84] Added onConnection and connectionLost Handlers --- plugins/inputs/mqtt_consumer/mqtt_consumer.go | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/plugins/inputs/mqtt_consumer/mqtt_consumer.go b/plugins/inputs/mqtt_consumer/mqtt_consumer.go index 72e0d3c19..9cb420a42 100644 --- a/plugins/inputs/mqtt_consumer/mqtt_consumer.go +++ b/plugins/inputs/mqtt_consumer/mqtt_consumer.go @@ -3,6 +3,7 @@ package mqtt_consumer import ( "fmt" "log" + "strings" "sync" "time" @@ -46,8 +47,8 @@ type MQTTConsumer struct { // keep the accumulator internally: acc telegraf.Accumulator - - started bool false + + started bool } var sampleConfig = ` @@ -102,6 +103,7 @@ func (m *MQTTConsumer) SetParser(parser parsers.Parser) { func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error { m.Lock() defer m.Unlock() + m.started = false if m.PersistentSession && m.ClientID == "" { return fmt.Errorf("ERROR MQTT Consumer: When using persistent_session" + @@ -118,9 +120,6 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error { return err } - opts.OnConnect = onConnect - - m.client = mqtt.NewClient(opts) if token := m.client.Connect(); token.Wait() && token.Error() != nil { return token.Error() @@ -129,14 +128,12 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error { m.in = make(chan mqtt.Message, 1000) m.done = make(chan struct{}) - - go m.receiver() return nil } - func onConnect(c *MQTT.Client) { - if (!m.PersistentSession || !m.started) { +func (m *MQTTConsumer) onConnect(c mqtt.Client) { + if !m.PersistentSession || !m.started { topics := make(map[string]byte) for _, topic := range m.Topics { topics[topic] = byte(m.QoS) @@ -145,11 +142,18 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error { subscribeToken.Wait() if subscribeToken.Error() != nil { log.Printf("MQTT SUBSCRIBE ERROR\ntopics: %s\nerror: %s", - string(m.Topics), err.Error()) + strings.Join(m.Topics[:], ","), subscribeToken.Error()) } - m.started = true; - } - } + m.started = true + } + return +} + +func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) { + log.Printf("MQTT Connection lost\nerror: %s\nClient should retry to reconnect", err.Error()) + return +} + // receiver() reads all incoming messages from the consumer, and parses them into // influxdb metric points. func (m *MQTTConsumer) receiver() { @@ -231,6 +235,8 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) { opts.SetAutoReconnect(true) opts.SetKeepAlive(time.Second * 60) opts.SetCleanSession(!m.PersistentSession) + opts.SetOnConnectHandler(m.onConnect) + opts.SetConnectionLostHandler(m.onConnectionLost) return opts, nil } From 93f57edd3af607da2eae0da2edad460cfab04261 Mon Sep 17 00:00:00 2001 From: chaton78 Date: Wed, 13 Apr 2016 23:06:30 -0400 Subject: [PATCH 08/84] Better logging for MQTT consumer closes #1023 closes #921 --- CHANGELOG.md | 3 ++- plugins/inputs/mqtt_consumer/mqtt_consumer.go | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f1fd5ed43..cd5ea0625 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ### Release Notes - `tagexclude` and `tagexclude` are now available, which can be used to remove -tags from measurements on inputs and outputs. See +tags from measurements on inputs and outputs. See [the configuration doc](https://github.com/influxdata/telegraf/blob/master/docs/CONFIGURATION.md) for more details. - **Measurement filtering:** All measurement filters now match based on glob @@ -14,6 +14,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1017](https://github.com/influxdata/telegraf/pull/1017): taginclude and tagexclude arguments. ### Bugfixes +- [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78! ## v0.12.1 [2016-04-14] diff --git a/plugins/inputs/mqtt_consumer/mqtt_consumer.go b/plugins/inputs/mqtt_consumer/mqtt_consumer.go index 9cb420a42..beebe00ce 100644 --- a/plugins/inputs/mqtt_consumer/mqtt_consumer.go +++ b/plugins/inputs/mqtt_consumer/mqtt_consumer.go @@ -133,6 +133,7 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error { return nil } func (m *MQTTConsumer) onConnect(c mqtt.Client) { + log.Printf("MQTT Client Connected") if !m.PersistentSession || !m.started { topics := make(map[string]byte) for _, topic := range m.Topics { @@ -150,7 +151,7 @@ func (m *MQTTConsumer) onConnect(c mqtt.Client) { } func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) { - log.Printf("MQTT Connection lost\nerror: %s\nClient should retry to reconnect", err.Error()) + log.Printf("MQTT Connection lost\nerror: %s\nMQTT Client will try to reconnect", err.Error()) return } From 4d46589d398ca8b8b2df05916eed02bba58c57d5 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Apr 2016 13:20:06 -0600 Subject: [PATCH 09/84] JSON input: make string ignores clear --- docs/DATA_FORMATS_INPUT.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index 6a916711b..07134e979 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -75,14 +75,19 @@ metrics are parsed directly into Telegraf metrics. # JSON: -The JSON data format flattens JSON into metric _fields_. For example, this JSON: +The JSON data format flattens JSON into metric _fields_. +NOTE: Only numerical values are converted to fields, and they are converted +into a float. strings are ignored unless specified as a tag_key (see below). + +So for example, this JSON: ```json { "a": 5, "b": { "c": 6 - } + }, + "ignored": "I'm a string" } ``` From 36d330fea044597a6b3eb3ac9356432d3ea3588a Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 12 Apr 2016 14:59:19 -0600 Subject: [PATCH 10/84] docker plugin schema refactor - renaming cont_name and cont_image to container_name and container_image. - cont_id is now a field, called container_id - docker_cpu, docker_mem, docker_net measurements have been renamed to docker_container_cpu, docker_container_mem, and docker_container_net closes #1014 closes #1052 --- CHANGELOG.md | 14 +++++++ plugins/inputs/docker/docker.go | 47 ++++++++++++---------- plugins/inputs/docker/docker_test.go | 59 ++++++++++++++-------------- 3 files changed, 71 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd5ea0625..9d869325c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,19 @@ ## v0.13 [unreleased] ### Release Notes +- **Breaking Change**: docker plugin tags. The cont_id tag no longer exists, it +will now be a field, and be called container_id. Additionally, cont_image and +cont_name are being renamed to container_image and container_name. +- **Breaking Change**: docker plugin measurements. The `docker_cpu`, `docker_mem`, +and `docker_net` measurements are being renamed to `docker_container_cpu`, +`docker_container_mem`, and `docker_container_net`. Why? Because these metrics are +specifically tracking per-container stats. The problem with per-container stats, +in some use-cases, is that if containers are short-lived AND names are not +kept consistent, then the series cardinality will balloon very quickly. +So adding "container" to each metric will: +(1) make it more clear that these metrics are per-container, and +(2) allow users to easily drop per-container metrics if cardinality is an +issue (`namedrop = ["docker_container_*"]`) - `tagexclude` and `tagexclude` are now available, which can be used to remove tags from measurements on inputs and outputs. See [the configuration doc](https://github.com/influxdata/telegraf/blob/master/docs/CONFIGURATION.md) @@ -12,6 +25,7 @@ based on _prefix_ in addition to globs. This means that a filter like ### Features - [#1017](https://github.com/influxdata/telegraf/pull/1017): taginclude and tagexclude arguments. +- [#1015](https://github.com/influxdata/telegraf/pull/1015): Docker plugin schema refactor. ### Bugfixes - [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78! diff --git a/plugins/inputs/docker/docker.go b/plugins/inputs/docker/docker.go index 094bad8ca..897d4f1a5 100644 --- a/plugins/inputs/docker/docker.go +++ b/plugins/inputs/docker/docker.go @@ -111,7 +111,8 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error { defer wg.Done() err := d.gatherContainer(c, acc) if err != nil { - fmt.Println(err.Error()) + log.Printf("Error gathering container %s stats: %s\n", + c.Names, err.Error()) } }(container) } @@ -200,9 +201,8 @@ func (d *Docker) gatherContainer( } tags := map[string]string{ - "cont_id": container.ID, - "cont_name": cname, - "cont_image": container.Image, + "container_name": cname, + "container_image": container.Image, } if len(d.ContainerNames) > 0 { if !sliceContains(cname, d.ContainerNames) { @@ -217,15 +217,18 @@ func (d *Docker) gatherContainer( defer r.Close() dec := json.NewDecoder(r) if err = dec.Decode(&v); err != nil { - log.Printf("Error decoding: %s\n", err.Error()) + if err == io.EOF { + return nil + } + return fmt.Errorf("Error decoding: %s", err.Error()) } // Add labels to tags - for k, v := range container.Labels { - tags[k] = v + for k, label := range container.Labels { + tags[k] = label } - gatherContainerStats(v, acc, tags) + gatherContainerStats(v, acc, tags, container.ID) return nil } @@ -234,6 +237,7 @@ func gatherContainerStats( stat *types.StatsJSON, acc telegraf.Accumulator, tags map[string]string, + id string, ) { now := stat.Read @@ -272,8 +276,9 @@ func gatherContainerStats( "inactive_file": stat.MemoryStats.Stats["inactive_file"], "total_pgpgin": stat.MemoryStats.Stats["total_pgpgin"], "usage_percent": calculateMemPercent(stat), + "container_id": id, } - acc.AddFields("docker_mem", memfields, tags, now) + acc.AddFields("docker_container_mem", memfields, tags, now) cpufields := map[string]interface{}{ "usage_total": stat.CPUStats.CPUUsage.TotalUsage, @@ -284,32 +289,34 @@ func gatherContainerStats( "throttling_throttled_periods": stat.CPUStats.ThrottlingData.ThrottledPeriods, "throttling_throttled_time": stat.CPUStats.ThrottlingData.ThrottledTime, "usage_percent": calculateCPUPercent(stat), + "container_id": id, } cputags := copyTags(tags) cputags["cpu"] = "cpu-total" - acc.AddFields("docker_cpu", cpufields, cputags, now) + acc.AddFields("docker_container_cpu", cpufields, cputags, now) for i, percpu := range stat.CPUStats.CPUUsage.PercpuUsage { percputags := copyTags(tags) percputags["cpu"] = fmt.Sprintf("cpu%d", i) - acc.AddFields("docker_cpu", map[string]interface{}{"usage_total": percpu}, percputags, now) + acc.AddFields("docker_container_cpu", map[string]interface{}{"usage_total": percpu}, percputags, now) } for network, netstats := range stat.Networks { netfields := map[string]interface{}{ - "rx_dropped": netstats.RxDropped, - "rx_bytes": netstats.RxBytes, - "rx_errors": netstats.RxErrors, - "tx_packets": netstats.TxPackets, - "tx_dropped": netstats.TxDropped, - "rx_packets": netstats.RxPackets, - "tx_errors": netstats.TxErrors, - "tx_bytes": netstats.TxBytes, + "rx_dropped": netstats.RxDropped, + "rx_bytes": netstats.RxBytes, + "rx_errors": netstats.RxErrors, + "tx_packets": netstats.TxPackets, + "tx_dropped": netstats.TxDropped, + "rx_packets": netstats.RxPackets, + "tx_errors": netstats.TxErrors, + "tx_bytes": netstats.TxBytes, + "container_id": id, } // Create a new network tag dictionary for the "network" tag nettags := copyTags(tags) nettags["network"] = network - acc.AddFields("docker_net", netfields, nettags, now) + acc.AddFields("docker_container_net", netfields, nettags, now) } gatherBlockIOMetrics(stat, acc, tags, now) diff --git a/plugins/inputs/docker/docker_test.go b/plugins/inputs/docker/docker_test.go index c9fe6cea1..05069ba53 100644 --- a/plugins/inputs/docker/docker_test.go +++ b/plugins/inputs/docker/docker_test.go @@ -21,26 +21,26 @@ func TestDockerGatherContainerStats(t *testing.T) { stats := testStats() tags := map[string]string{ - "cont_id": "foobarbaz", - "cont_name": "redis", - "cont_image": "redis/image", + "container_name": "redis", + "container_image": "redis/image", } - gatherContainerStats(stats, &acc, tags) + gatherContainerStats(stats, &acc, tags, "123456789") - // test docker_net measurement + // test docker_container_net measurement netfields := map[string]interface{}{ - "rx_dropped": uint64(1), - "rx_bytes": uint64(2), - "rx_errors": uint64(3), - "tx_packets": uint64(4), - "tx_dropped": uint64(1), - "rx_packets": uint64(2), - "tx_errors": uint64(3), - "tx_bytes": uint64(4), + "rx_dropped": uint64(1), + "rx_bytes": uint64(2), + "rx_errors": uint64(3), + "tx_packets": uint64(4), + "tx_dropped": uint64(1), + "rx_packets": uint64(2), + "tx_errors": uint64(3), + "tx_bytes": uint64(4), + "container_id": "123456789", } nettags := copyTags(tags) nettags["network"] = "eth0" - acc.AssertContainsTaggedFields(t, "docker_net", netfields, nettags) + acc.AssertContainsTaggedFields(t, "docker_container_net", netfields, nettags) // test docker_blkio measurement blkiotags := copyTags(tags) @@ -51,7 +51,7 @@ func TestDockerGatherContainerStats(t *testing.T) { } acc.AssertContainsTaggedFields(t, "docker_blkio", blkiofields, blkiotags) - // test docker_mem measurement + // test docker_container_mem measurement memfields := map[string]interface{}{ "max_usage": uint64(1001), "usage": uint64(1111), @@ -87,11 +87,12 @@ func TestDockerGatherContainerStats(t *testing.T) { "inactive_file": uint64(3), "total_pgpgin": uint64(4), "usage_percent": float64(55.55), + "container_id": "123456789", } - acc.AssertContainsTaggedFields(t, "docker_mem", memfields, tags) + acc.AssertContainsTaggedFields(t, "docker_container_mem", memfields, tags) - // test docker_cpu measurement + // test docker_container_cpu measurement cputags := copyTags(tags) cputags["cpu"] = "cpu-total" cpufields := map[string]interface{}{ @@ -103,20 +104,21 @@ func TestDockerGatherContainerStats(t *testing.T) { "throttling_throttled_periods": uint64(0), "throttling_throttled_time": uint64(0), "usage_percent": float64(400.0), + "container_id": "123456789", } - acc.AssertContainsTaggedFields(t, "docker_cpu", cpufields, cputags) + acc.AssertContainsTaggedFields(t, "docker_container_cpu", cpufields, cputags) cputags["cpu"] = "cpu0" cpu0fields := map[string]interface{}{ "usage_total": uint64(1), } - acc.AssertContainsTaggedFields(t, "docker_cpu", cpu0fields, cputags) + acc.AssertContainsTaggedFields(t, "docker_container_cpu", cpu0fields, cputags) cputags["cpu"] = "cpu1" cpu1fields := map[string]interface{}{ "usage_total": uint64(1002), } - acc.AssertContainsTaggedFields(t, "docker_cpu", cpu1fields, cputags) + acc.AssertContainsTaggedFields(t, "docker_container_cpu", cpu1fields, cputags) } func testStats() *types.StatsJSON { @@ -367,19 +369,18 @@ func TestDockerGatherInfo(t *testing.T) { }, ) acc.AssertContainsTaggedFields(t, - "docker_cpu", + "docker_container_cpu", map[string]interface{}{ "usage_total": uint64(1231652), }, map[string]string{ - "cont_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", - "cont_name": "etcd2", - "cont_image": "quay.io/coreos/etcd:v2.2.2", - "cpu": "cpu3", + "container_name": "etcd2", + "container_image": "quay.io/coreos/etcd:v2.2.2", + "cpu": "cpu3", }, ) acc.AssertContainsTaggedFields(t, - "docker_mem", + "docker_container_mem", map[string]interface{}{ "total_pgpgout": uint64(0), "usage_percent": float64(0), @@ -415,11 +416,11 @@ func TestDockerGatherInfo(t *testing.T) { "pgfault": uint64(0), "usage": uint64(0), "limit": uint64(18935443456), + "container_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", }, map[string]string{ - "cont_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", - "cont_name": "etcd2", - "cont_image": "quay.io/coreos/etcd:v2.2.2", + "container_name": "etcd2", + "container_image": "quay.io/coreos/etcd:v2.2.2", }, ) From 7828bc09cf6cf18e7a3c3521381c08b42f6c8d7c Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Apr 2016 16:20:46 -0600 Subject: [PATCH 11/84] Fixup docker blkio container name & docker doc --- plugins/inputs/docker/README.md | 59 +++++++++++++++------------- plugins/inputs/docker/docker.go | 2 +- plugins/inputs/docker/docker_test.go | 2 +- 3 files changed, 33 insertions(+), 30 deletions(-) diff --git a/plugins/inputs/docker/README.md b/plugins/inputs/docker/README.md index c22e6af8e..e59b6f513 100644 --- a/plugins/inputs/docker/README.md +++ b/plugins/inputs/docker/README.md @@ -29,10 +29,10 @@ for the stat structure can be found Every effort was made to preserve the names based on the JSON response from the docker API. -Note that the docker_cpu metric may appear multiple times per collection, based -on the availability of per-cpu stats on your system. +Note that the docker_container_cpu metric may appear multiple times per collection, +based on the availability of per-cpu stats on your system. -- docker_mem +- docker_container_mem - total_pgmafault - cache - mapped_file @@ -66,7 +66,8 @@ on the availability of per-cpu stats on your system. - usage - failcnt - limit -- docker_cpu + - container_id +- docker_container_cpu - throttling_periods - throttling_throttled_periods - throttling_throttled_time @@ -75,7 +76,8 @@ on the availability of per-cpu stats on your system. - usage_system - usage_total - usage_percent -- docker_net + - container_id +- docker_container_net - rx_dropped - rx_bytes - rx_errors @@ -84,7 +86,8 @@ on the availability of per-cpu stats on your system. - rx_packets - tx_errors - tx_bytes -- docker_blkio + - container_id +- docker_container_blkio - io_service_bytes_recursive_async - io_service_bytes_recursive_read - io_service_bytes_recursive_sync @@ -125,20 +128,20 @@ on the availability of per-cpu stats on your system. - docker_metadata - unit=bytes -- docker_cpu specific: - - cont_id (container ID) - - cont_image (container image) - - cont_name (container name) +- docker_container_mem specific: + - container_image + - container_name +- docker_container_cpu specific: + - container_image + - container_name - cpu -- docker_net specific: - - cont_id (container ID) - - cont_image (container image) - - cont_name (container name) +- docker_container_net specific: + - container_image + - container_name - network -- docker_blkio specific: - - cont_id (container ID) - - cont_image (container image) - - cont_name (container name) +- docker_container_blkio specific: + - container_image + - container_name - device ### Example Output: @@ -156,8 +159,8 @@ on the availability of per-cpu stats on your system. > docker,unit=bytes pool_blocksize=65540i 1456926671065383978 > docker_data,unit=bytes available=24340000000i,total=107400000000i,used=14820000000i 1456926671065383978 > docker_metadata,unit=bytes available=2126999999i,total=2146999999i,used=20420000i 145692667106538 -> docker_mem,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\ -cont_image=spotify/kafka,cont_name=kafka \ +> docker_container_mem, +container_image=spotify/kafka,container_name=kafka \ active_anon=52568064i,active_file=6926336i,cache=12038144i,fail_count=0i,\ hierarchical_memory_limit=9223372036854771712i,inactive_anon=52707328i,\ inactive_file=5111808i,limit=1044578304i,mapped_file=10301440i,\ @@ -168,21 +171,21 @@ total_inactive_file=5111808i,total_mapped_file=10301440i,total_pgfault=63762i,\ total_pgmafault=0i,total_pgpgin=73355i,total_pgpgout=45736i,\ total_rss=105275392i,total_rss_huge=4194304i,total_unevictable=0i,\ total_writeback=0i,unevictable=0i,usage=117440512i,writeback=0i 1453409536840126713 -> docker_cpu,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\ -cont_image=spotify/kafka,cont_name=kafka,cpu=cpu-total \ +> docker_container_cpu, +container_image=spotify/kafka,container_name=kafka,cpu=cpu-total \ throttling_periods=0i,throttling_throttled_periods=0i,\ throttling_throttled_time=0i,usage_in_kernelmode=440000000i,\ usage_in_usermode=2290000000i,usage_system=84795360000000i,\ usage_total=6628208865i 1453409536840126713 -> docker_cpu,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\ -cont_image=spotify/kafka,cont_name=kafka,cpu=cpu0 \ +> docker_container_cpu, +container_image=spotify/kafka,container_name=kafka,cpu=cpu0 \ usage_total=6628208865i 1453409536840126713 -> docker_net,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\ -cont_image=spotify/kafka,cont_name=kafka,network=eth0 \ +> docker_container_net,\ +container_image=spotify/kafka,container_name=kafka,network=eth0 \ rx_bytes=7468i,rx_dropped=0i,rx_errors=0i,rx_packets=94i,tx_bytes=946i,\ tx_dropped=0i,tx_errors=0i,tx_packets=13i 1453409536840126713 -> docker_blkio,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\ -cont_image=spotify/kafka,cont_name=kafka,device=8:0 \ +> docker_container_blkio, +container_image=spotify/kafka,container_name=kafka,device=8:0 \ io_service_bytes_recursive_async=80216064i,io_service_bytes_recursive_read=79925248i,\ io_service_bytes_recursive_sync=77824i,io_service_bytes_recursive_total=80293888i,\ io_service_bytes_recursive_write=368640i,io_serviced_recursive_async=6562i,\ diff --git a/plugins/inputs/docker/docker.go b/plugins/inputs/docker/docker.go index 897d4f1a5..4241f6b5d 100644 --- a/plugins/inputs/docker/docker.go +++ b/plugins/inputs/docker/docker.go @@ -411,7 +411,7 @@ func gatherBlockIOMetrics( for device, fields := range deviceStatMap { iotags := copyTags(tags) iotags["device"] = device - acc.AddFields("docker_blkio", fields, iotags, now) + acc.AddFields("docker_container_blkio", fields, iotags, now) } } diff --git a/plugins/inputs/docker/docker_test.go b/plugins/inputs/docker/docker_test.go index 05069ba53..4ac05f93b 100644 --- a/plugins/inputs/docker/docker_test.go +++ b/plugins/inputs/docker/docker_test.go @@ -49,7 +49,7 @@ func TestDockerGatherContainerStats(t *testing.T) { "io_service_bytes_recursive_read": uint64(100), "io_serviced_recursive_write": uint64(101), } - acc.AssertContainsTaggedFields(t, "docker_blkio", blkiofields, blkiotags) + acc.AssertContainsTaggedFields(t, "docker_container_blkio", blkiofields, blkiotags) // test docker_container_mem measurement memfields := map[string]interface{}{ From 61d681a7c8f26c8eb8ec12caba6b7136f30aca51 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Apr 2016 16:24:32 -0600 Subject: [PATCH 12/84] docker changelog update --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d869325c..6c8f611f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,9 @@ will now be a field, and be called container_id. Additionally, cont_image and cont_name are being renamed to container_image and container_name. - **Breaking Change**: docker plugin measurements. The `docker_cpu`, `docker_mem`, -and `docker_net` measurements are being renamed to `docker_container_cpu`, -`docker_container_mem`, and `docker_container_net`. Why? Because these metrics are +`docker_blkio` and `docker_net` measurements are being renamed to +`docker_container_cpu`, `docker_container_mem`, `docker_container_blkio` and +`docker_container_net`. Why? Because these metrics are specifically tracking per-container stats. The problem with per-container stats, in some use-cases, is that if containers are short-lived AND names are not kept consistent, then the series cardinality will balloon very quickly. From c732abbda2f07d9c0f50a80382765a2ee5e72e87 Mon Sep 17 00:00:00 2001 From: maksadbek Date: Tue, 22 Dec 2015 12:06:39 +0500 Subject: [PATCH 13/84] Improved mysql plugin shows global variables shows slave statuses shows size and count of binary log files shows information_schema.processlist stats shows perf table stats shows auto increments stats from information schema shows perf index stats shows table lock waits summary by table shows time and operations of event waits shows file event statuses shows events statements stats from perf_schema shows schema statistics refactored plugin, provided multiple fields per insert --- plugins/inputs/mysql/mysql.go | 1156 +++++++++++++++++++++++++++- plugins/inputs/mysql/mysql_test.go | 45 ++ 2 files changed, 1170 insertions(+), 31 deletions(-) diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index 474067716..a4de7e602 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -1,7 +1,9 @@ package mysql import ( + "bytes" "database/sql" + "fmt" "net/url" "strconv" "strings" @@ -13,19 +15,35 @@ import ( ) type Mysql struct { - Servers []string + Servers []string + PerfEventsStatementsDigestTextLimit uint32 + PerfEventsStatementsLimit uint32 + PerfEventsStatementsTimeLimit uint32 + TableSchemaDatabases []string + GatherSlaveStatus bool + GatherBinaryLogs bool + GatherTableIOWaits bool + GatherIndexIOWaits bool } var sampleConfig = ` - ## specify servers via a url matching: - ## [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify]] - ## see https://github.com/go-sql-driver/mysql#dsn-data-source-name - ## e.g. - ## root:passwd@tcp(127.0.0.1:3306)/?tls=false - ## root@tcp(127.0.0.1:3306)/?tls=false - ## - ## If no servers are specified, then localhost is used as the host. + # specify servers via a url matching: + # [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify]] + # see https://github.com/go-sql-driver/mysql#dsn-data-source-name + # e.g. + # root:passwd@tcp(127.0.0.1:3306)/?tls=false + # root@tcp(127.0.0.1:3306)/?tls=false + # + # If no servers are specified, then localhost is used as the host. servers = ["tcp(127.0.0.1:3306)/"] + PerfEventsStatementsDigestTextLimit = 120 + PerfEventsStatementsLimit = 250 + PerfEventsStatementsTimeLimit = 86400 + TableSchemaDatabases = [] + GatherSlaveStatus = false + GatherBinaryLogs = false + GatherTableIOWaits = false + GatherIndexIOWaits = false ` var defaultTimeout = time.Second * time.Duration(5) @@ -118,18 +136,243 @@ var mappings = []*mapping{ }, } -func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { - // If user forgot the '/', add it - if strings.HasSuffix(serv, ")") { - serv = serv + "/" - } else if serv == "localhost" { - serv = "" +var ( + // status counter + generalThreadStates = map[string]uint32{ + "after create": uint32(0), + "altering table": uint32(0), + "analyzing": uint32(0), + "checking permissions": uint32(0), + "checking table": uint32(0), + "cleaning up": uint32(0), + "closing tables": uint32(0), + "converting heap to myisam": uint32(0), + "copying to tmp table": uint32(0), + "creating sort index": uint32(0), + "creating table": uint32(0), + "creating tmp table": uint32(0), + "deleting": uint32(0), + "executing": uint32(0), + "execution of init_command": uint32(0), + "end": uint32(0), + "freeing items": uint32(0), + "flushing tables": uint32(0), + "fulltext initialization": uint32(0), + "idle": uint32(0), + "init": uint32(0), + "killed": uint32(0), + "waiting for lock": uint32(0), + "logging slow query": uint32(0), + "login": uint32(0), + "manage keys": uint32(0), + "opening tables": uint32(0), + "optimizing": uint32(0), + "preparing": uint32(0), + "reading from net": uint32(0), + "removing duplicates": uint32(0), + "removing tmp table": uint32(0), + "reopen tables": uint32(0), + "repair by sorting": uint32(0), + "repair done": uint32(0), + "repair with keycache": uint32(0), + "replication master": uint32(0), + "rolling back": uint32(0), + "searching rows for update": uint32(0), + "sending data": uint32(0), + "sorting for group": uint32(0), + "sorting for order": uint32(0), + "sorting index": uint32(0), + "sorting result": uint32(0), + "statistics": uint32(0), + "updating": uint32(0), + "waiting for tables": uint32(0), + "waiting for table flush": uint32(0), + "waiting on cond": uint32(0), + "writing to net": uint32(0), + "other": uint32(0), } + // plaintext statuses + stateStatusMappings = map[string]string{ + "user sleep": "idle", + "creating index": "altering table", + "committing alter table to storage engine": "altering table", + "discard or import tablespace": "altering table", + "rename": "altering table", + "setup": "altering table", + "renaming result table": "altering table", + "preparing for alter table": "altering table", + "copying to group table": "copying to tmp table", + "copy to tmp table": "copying to tmp table", + "query end": "end", + "update": "updating", + "updating main table": "updating", + "updating reference tables": "updating", + "system lock": "waiting for lock", + "user lock": "waiting for lock", + "table lock": "waiting for lock", + "deleting from main table": "deleting", + "deleting from reference tables": "deleting", + } +) +func dsnAddTimeout(dsn string) (string, error) { + + // DSN "?timeout=5s" is not valid, but "/?timeout=5s" is valid ("" and "/" + // are the same DSN) + if dsn == "" { + dsn = "/" + } + u, err := url.Parse(dsn) + if err != nil { + return "", err + } + v := u.Query() + + // Only override timeout if not already defined + if _, ok := v["timeout"]; ok == false { + v.Add("timeout", defaultTimeout.String()) + u.RawQuery = v.Encode() + } + return u.String(), nil +} + +// Math constants +const ( + picoSeconds = 1e12 +) + +// metric queries +const ( + globalStatusQuery = `SHOW GLOBAL STATUS` + globalVariablesQuery = `SHOW GLOBAL VARIABLES` + slaveStatusQuery = `SHOW SLAVE STATUS` + binaryLogsQuery = `SHOW BINARY LOGS` + infoSchemaProcessListQuery = ` + SELECT COALESCE(command,''),COALESCE(state,''),count(*) + FROM information_schema.processlist + WHERE ID != connection_id() + GROUP BY command,state + ORDER BY null` + infoSchemaAutoIncQuery = ` + SELECT table_schema, table_name, column_name, auto_increment, + pow(2, case data_type + when 'tinyint' then 7 + when 'smallint' then 15 + when 'mediumint' then 23 + when 'int' then 31 + when 'bigint' then 63 + end+(column_type like '% unsigned'))-1 as max_int + FROM information_schema.tables t + JOIN information_schema.columns c USING (table_schema,table_name) + WHERE c.extra = 'auto_increment' AND t.auto_increment IS NOT NULL + ` + perfTableIOWaitsQuery = ` + SELECT OBJECT_SCHEMA, OBJECT_NAME, COUNT_FETCH, COUNT_INSERT, COUNT_UPDATE, COUNT_DELETE, + SUM_TIMER_FETCH, SUM_TIMER_INSERT, SUM_TIMER_UPDATE, SUM_TIMER_DELETE + FROM performance_schema.table_io_waits_summary_by_table + WHERE OBJECT_SCHEMA NOT IN ('mysql', 'performance_schema') + ` + perfIndexIOWaitsQuery = ` + SELECT OBJECT_SCHEMA, OBJECT_NAME, ifnull(INDEX_NAME, 'NONE') as INDEX_NAME, + COUNT_FETCH, COUNT_INSERT, COUNT_UPDATE, COUNT_DELETE, + SUM_TIMER_FETCH, SUM_TIMER_INSERT, SUM_TIMER_UPDATE, SUM_TIMER_DELETE + FROM performance_schema.table_io_waits_summary_by_index_usage + WHERE OBJECT_SCHEMA NOT IN ('mysql', 'performance_schema') + ` + perfTableLockWaitsQuery = ` + SELECT + OBJECT_SCHEMA, + OBJECT_NAME, + COUNT_READ_NORMAL, + COUNT_READ_WITH_SHARED_LOCKS, + COUNT_READ_HIGH_PRIORITY, + COUNT_READ_NO_INSERT, + COUNT_READ_EXTERNAL, + COUNT_WRITE_ALLOW_WRITE, + COUNT_WRITE_CONCURRENT_INSERT, + COUNT_WRITE_DELAYED, + COUNT_WRITE_LOW_PRIORITY, + COUNT_WRITE_NORMAL, + COUNT_WRITE_EXTERNAL, + SUM_TIMER_READ_NORMAL, + SUM_TIMER_READ_WITH_SHARED_LOCKS, + SUM_TIMER_READ_HIGH_PRIORITY, + SUM_TIMER_READ_NO_INSERT, + SUM_TIMER_READ_EXTERNAL, + SUM_TIMER_WRITE_ALLOW_WRITE, + SUM_TIMER_WRITE_CONCURRENT_INSERT, + SUM_TIMER_WRITE_DELAYED, + SUM_TIMER_WRITE_LOW_PRIORITY, + SUM_TIMER_WRITE_NORMAL, + SUM_TIMER_WRITE_EXTERNAL + FROM performance_schema.table_lock_waits_summary_by_table + WHERE OBJECT_SCHEMA NOT IN ('mysql', 'performance_schema', 'information_schema') + ` + perfEventsStatementsQuery = ` + SELECT + ifnull(SCHEMA_NAME, 'NONE') as SCHEMA_NAME, + DIGEST, + LEFT(DIGEST_TEXT, %d) as DIGEST_TEXT, + COUNT_STAR, + SUM_TIMER_WAIT, + SUM_ERRORS, + SUM_WARNINGS, + SUM_ROWS_AFFECTED, + SUM_ROWS_SENT, + SUM_ROWS_EXAMINED, + SUM_CREATED_TMP_DISK_TABLES, + SUM_CREATED_TMP_TABLES, + SUM_SORT_MERGE_PASSES, + SUM_SORT_ROWS, + SUM_NO_INDEX_USED + FROM performance_schema.events_statements_summary_by_digest + WHERE SCHEMA_NAME NOT IN ('mysql', 'performance_schema', 'information_schema') + AND last_seen > DATE_SUB(NOW(), INTERVAL %d SECOND) + ORDER BY SUM_TIMER_WAIT DESC + LIMIT %d + ` + perfEventWaitsQuery = ` + SELECT EVENT_NAME, COUNT_STAR, SUM_TIMER_WAIT + FROM performance_schema.events_waits_summary_global_by_event_name + ` + perfFileEventsQuery = ` + SELECT + EVENT_NAME, + COUNT_READ, SUM_TIMER_READ, SUM_NUMBER_OF_BYTES_READ, + COUNT_WRITE, SUM_TIMER_WRITE, SUM_NUMBER_OF_BYTES_WRITE, + COUNT_MISC, SUM_TIMER_MISC + FROM performance_schema.file_summary_by_event_name + ` + tableSchemaQuery = ` + SELECT + TABLE_SCHEMA, + TABLE_NAME, + TABLE_TYPE, + ifnull(ENGINE, 'NONE') as ENGINE, + ifnull(VERSION, '0') as VERSION, + ifnull(ROW_FORMAT, 'NONE') as ROW_FORMAT, + ifnull(TABLE_ROWS, '0') as TABLE_ROWS, + ifnull(DATA_LENGTH, '0') as DATA_LENGTH, + ifnull(INDEX_LENGTH, '0') as INDEX_LENGTH, + ifnull(DATA_FREE, '0') as DATA_FREE, + ifnull(CREATE_OPTIONS, 'NONE') as CREATE_OPTIONS + FROM information_schema.tables + WHERE TABLE_SCHEMA = '%s' + ` + dbListQuery = ` + SELECT + SCHEMA_NAME + FROM information_schema.schemata + WHERE SCHEMA_NAME NOT IN ('mysql', 'performance_schema', 'information_schema') + ` +) + +func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { serv, err := dsnAddTimeout(serv) if err != nil { return err } + db, err := sql.Open("mysql", serv) if err != nil { return err @@ -137,7 +380,206 @@ func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { defer db.Close() - rows, err := db.Query(`SHOW /*!50002 GLOBAL */ STATUS`) + err = m.gatherGlobalStatuses(db, serv, acc) + if err != nil { + return err + } + + err = m.gatherGlobalVariables(db, serv, acc) + if err != nil { + return err + } + + if m.GatherSlaveStatus { + err = m.gatherBinaryLogs(db, serv, acc) + if err != nil { + return err + } + } + + err = m.GatherProcessListStatuses(db, serv, acc) + if err != nil { + return err + } + + if m.GatherSlaveStatus { + err = m.gatherSlaveStatuses(db, serv, acc) + if err != nil { + return err + } + } + + err = m.gatherInfoSchemaAutoIncStatuses(db, serv, acc) + if err != nil { + return err + } + + if m.GatherTableIOWaits { + err = m.gatherPerfTableIOWaits(db, serv, acc) + if err != nil { + return err + } + } + + if m.GatherIndexIOWaits { + err = m.gatherPerfIndexIOWaits(db, serv, acc) + if err != nil { + return err + } + } + + err = m.gatherPerfTableLockWaits(db, serv, acc) + if err != nil { + return err + } + + err = m.gatherPerfEventWaits(db, serv, acc) + if err != nil { + return err + } + + err = m.gatherPerfFileEventsStatuses(db, serv, acc) + if err != nil { + return err + } + + err = m.gatherPerfEventsStatements(db, serv, acc) + if err != nil { + return err + } + + err = m.gatherTableSchema(db, serv, acc) + if err != nil { + return err + } + + err = m.gatherTableSchema(db, serv, acc) + if err != nil { + return err + } + return nil +} + +func (m *Mysql) gatherGlobalVariables(db *sql.DB, serv string, acc telegraf.Accumulator) error { + rows, err := db.Query(globalVariablesQuery) + if err != nil { + return err + } + defer rows.Close() + var key string + var val sql.RawBytes + + servtag, err := parseDSN(serv) + if err != nil { + servtag = "localhost" + } + tags := map[string]string{"server": servtag} + fields := make(map[string]interface{}) + for rows.Next() { + if err := rows.Scan(&key, &val); err != nil { + return err + } + key = strings.ToLower(key) + if floatVal, ok := parseValue(val); ok { + fields[key] = floatVal + } + } + acc.Add("mysql_variables", fields, tags) + return nil +} + +// gatherSlaveStatuses can be used to get replication analytics +// When the server is slave, then it returns only one row. +// If the multi-source replication is set, then everything works differently +// This code does not work with multi-source replication. +func (m *Mysql) gatherSlaveStatuses(db *sql.DB, serv string, acc telegraf.Accumulator) error { + rows, err := db.Query(slaveStatusQuery) + + if err != nil { + return err + } + defer rows.Close() + + servtag, err := parseDSN(serv) + + if err != nil { + servtag = "localhost" + } + tags := map[string]string{"server": servtag} + fields := make(map[string]interface{}) + if rows.Next() { + cols, err := rows.Columns() + + if err != nil { + return err + } + vals := make([]interface{}, len(cols)) + + for i := range vals { + vals[i] = &sql.RawBytes{} + } + + if err = rows.Scan(vals...); err != nil { + return err + } + + for i, col := range cols { + // skip unparsable values + if value, ok := parseValue(*vals[i].(*sql.RawBytes)); ok { + //acc.Add("slave_"+col, value, tags) + fields["slave_"+col] = value + } + } + acc.AddFields("mysql", fields, tags) + } + + return nil +} + +// gatherBinaryLogs can be used to collect size and count of all binary files +func (m *Mysql) gatherBinaryLogs(db *sql.DB, serv string, acc telegraf.Accumulator) error { + rows, err := db.Query(binaryLogsQuery) + if err != nil { + return err + } + defer rows.Close() + + var servtag string + servtag, err = parseDSN(serv) + if err != nil { + servtag = "localhost" + } + tags := map[string]string{"server": servtag} + fields := make(map[string]interface{}) + var ( + size uint64 = 0 + count uint64 = 0 + fileSize uint64 + fileName string + ) + + for rows.Next() { + if err := rows.Scan(&fileName, &fileSize); err != nil { + return err + } + size += fileSize + count++ + } + fields["binary_size_bytes"] = size + fields["binary_files_count"] = count + acc.AddFields("mysql", fields, tags) + return nil +} + +func (m *Mysql) gatherGlobalStatuses(db *sql.DB, serv string, acc telegraf.Accumulator) error { + // If user forgot the '/', add it + if strings.HasSuffix(serv, ")") { + serv = serv + "/" + } else if serv == "localhost" { + serv = "" + } + + rows, err := db.Query(globalStatusQuery) if err != nil { return err } @@ -215,25 +657,677 @@ func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { return nil } -func dsnAddTimeout(dsn string) (string, error) { - - // DSN "?timeout=5s" is not valid, but "/?timeout=5s" is valid ("" and "/" - // are the same DSN) - if dsn == "" { - dsn = "/" - } - u, err := url.Parse(dsn) +func (m *Mysql) GatherProcessListStatuses(db *sql.DB, serv string, acc telegraf.Accumulator) error { + rows, err := db.Query(infoSchemaProcessListQuery) if err != nil { - return "", err + return err } - v := u.Query() + defer rows.Close() + var ( + command string + state string + count uint32 + ) - // Only override timeout if not already defined - if _, ok := v["timeout"]; ok == false { - v.Add("timeout", defaultTimeout.String()) - u.RawQuery = v.Encode() + var servtag string + fields := make(map[string]interface{}) + servtag, err = parseDSN(serv) + if err != nil { + servtag = "localhost" } - return u.String(), nil + + // mapping of state with its counts + stateCounts := make(map[string]uint32, len(generalThreadStates)) + // set map with keys and default values + for k, v := range generalThreadStates { + stateCounts[k] = v + } + + for rows.Next() { + err = rows.Scan(&command, &state, &count) + if err != nil { + return err + } + foundState := findThreadState(command, state) + stateCounts[foundState] += count + } + + tags := map[string]string{"server": servtag} + for s, c := range stateCounts { + fields[newNamespace("threads", s)] = c + } + acc.AddFields("mysql_info_schema", fields, tags) + return nil +} + +// gatherPerfTableIOWaits can be used to get total count and time +// of I/O wait event for each table and process +func (m *Mysql) gatherPerfTableIOWaits(db *sql.DB, serv string, acc telegraf.Accumulator) error { + rows, err := db.Query(perfTableIOWaitsQuery) + if err != nil { + return err + } + + defer rows.Close() + var ( + objSchema, objName, servtag string + countFetch, countInsert, countUpdate, countDelete uint64 + timeFetch, timeInsert, timeUpdate, timeDelete uint64 + ) + + servtag, err = parseDSN(serv) + if err != nil { + servtag = "localhost" + } + + for rows.Next() { + err = rows.Scan(&objSchema, &objName, + &countFetch, &countInsert, &countUpdate, &countDelete, + &timeFetch, &timeInsert, &timeUpdate, &timeDelete, + ) + + if err != nil { + return err + } + + tags := map[string]string{ + "server": servtag, + "schema": objSchema, + "name": objName, + } + fields := make(map[string]interface{}) + fields["table_io_waits_total_fetch"] = float64(countFetch) + fields["table_io_waits_total_insert"] = float64(countInsert) + fields["table_io_waits_total_update"] = float64(countUpdate) + fields["table_io_waits_total_delete"] = float64(countDelete) + + fields["table_io_waits_seconds_total_fetch"] = float64(timeFetch) / picoSeconds + fields["table_io_waits_seconds_total_insert"] = float64(timeInsert) / picoSeconds + fields["table_io_waits_seconds_total_update"] = float64(timeUpdate) / picoSeconds + fields["table_io_waits_seconds_total_delete"] = float64(timeDelete) / picoSeconds + + acc.AddFields("mysql_perf_schema", fields, tags) + } + return nil +} + +// gatherPerfIndexIOWaits can be used to get total count and time +// of I/O wait event for each index and process +func (m *Mysql) gatherPerfIndexIOWaits(db *sql.DB, serv string, acc telegraf.Accumulator) error { + rows, err := db.Query(perfIndexIOWaitsQuery) + if err != nil { + return err + } + defer rows.Close() + + var ( + objSchema, objName, indexName, servtag string + countFetch, countInsert, countUpdate, countDelete uint64 + timeFetch, timeInsert, timeUpdate, timeDelete uint64 + ) + + servtag, err = parseDSN(serv) + if err != nil { + servtag = "localhost" + } + + for rows.Next() { + err = rows.Scan(&objSchema, &objName, &indexName, + &countFetch, &countInsert, &countUpdate, &countDelete, + &timeFetch, &timeInsert, &timeUpdate, &timeDelete, + ) + + if err != nil { + return err + } + + tags := map[string]string{ + "server": servtag, + "schema": objSchema, + "name": objName, + "index": indexName, + } + fields := make(map[string]interface{}) + fields["index_io_waits_total_fetch"] = float64(countFetch) + fields["index_io_waits_seconds_total_fetch"] = float64(timeFetch) / picoSeconds + + // update write columns only when index is NONE + if indexName == "NONE" { + fields["index_io_waits_total_insert"] = float64(countInsert) + fields["index_io_waits_total_update"] = float64(countUpdate) + fields["index_io_waits_total_delete"] = float64(countDelete) + + fields["index_io_waits_seconds_total_insert"] = float64(timeInsert) / picoSeconds + fields["index_io_waits_seconds_total_update"] = float64(timeUpdate) / picoSeconds + fields["index_io_waits_seconds_total_delete"] = float64(timeDelete) / picoSeconds + } + + acc.AddFields("mysql_perf_schema", fields, tags) + } + return nil +} + +// gatherInfoSchemaAutoIncStatuses can be used to get auto incremented value of the column +func (m *Mysql) gatherInfoSchemaAutoIncStatuses(db *sql.DB, serv string, acc telegraf.Accumulator) error { + rows, err := db.Query(infoSchemaAutoIncQuery) + if err != nil { + return err + } + defer rows.Close() + + var ( + schema, table, column string + incValue, maxInt uint64 + ) + + servtag, err := parseDSN(serv) + if err != nil { + servtag = "localhost" + } + + for rows.Next() { + if err := rows.Scan(&schema, &table, &column, &incValue, &maxInt); err != nil { + return err + } + tags := map[string]string{ + "server": servtag, + "schema": schema, + "table": table, + "column": column, + } + fields := make(map[string]interface{}) + fields["auto_increment_column"] = incValue + fields["auto_increment_column_max"] = maxInt + + acc.AddFields("mysql_info_schema", fields, tags) + } + return nil +} + +// gatherPerfTableLockWaits can be used to get +// the total number and time for SQL and external lock wait events +// for each table and operation +func (m *Mysql) gatherPerfTableLockWaits(db *sql.DB, serv string, acc telegraf.Accumulator) error { + rows, err := db.Query(perfTableLockWaitsQuery) + if err != nil { + return err + } + defer rows.Close() + + servtag, err := parseDSN(serv) + if err != nil { + servtag = "localhost" + } + + var ( + objectSchema string + objectName string + countReadNormal uint64 + countReadWithSharedLocks uint64 + countReadHighPriority uint64 + countReadNoInsert uint64 + countReadExternal uint64 + countWriteAllowWrite uint64 + countWriteConcurrentInsert uint64 + countWriteDelayed uint64 + countWriteLowPriority uint64 + countWriteNormal uint64 + countWriteExternal uint64 + timeReadNormal uint64 + timeReadWithSharedLocks uint64 + timeReadHighPriority uint64 + timeReadNoInsert uint64 + timeReadExternal uint64 + timeWriteAllowWrite uint64 + timeWriteConcurrentInsert uint64 + timeWriteDelayed uint64 + timeWriteLowPriority uint64 + timeWriteNormal uint64 + timeWriteExternal uint64 + ) + + for rows.Next() { + err = rows.Scan( + &objectSchema, + &objectName, + &countReadNormal, + &countReadWithSharedLocks, + &countReadHighPriority, + &countReadNoInsert, + &countReadExternal, + &countWriteAllowWrite, + &countWriteConcurrentInsert, + &countWriteDelayed, + &countWriteLowPriority, + &countWriteNormal, + &countWriteExternal, + &timeReadNormal, + &timeReadWithSharedLocks, + &timeReadHighPriority, + &timeReadNoInsert, + &timeReadExternal, + &timeWriteAllowWrite, + &timeWriteConcurrentInsert, + &timeWriteDelayed, + &timeWriteLowPriority, + &timeWriteNormal, + &timeWriteExternal, + ) + + if err != nil { + return err + } + tags := map[string]string{ + "server": servtag, + "schema": objectSchema, + "table": objectName, + } + fields := make(map[string]interface{}) + + tags["operation"] = "read_normal" + fields["sql_lock_waits_total"] = float64(countReadNormal) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "read_with_shared_locks" + fields["sql_lock_waits_total"] = float64(countReadWithSharedLocks) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "read_high_priority" + fields["sql_lock_waits_total"] = float64(countReadHighPriority) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "read_no_insert" + fields["sql_lock_waits_total"] = float64(countReadNoInsert) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write_normal" + fields["sql_lock_waits_total"] = float64(countWriteNormal) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write_allow_write" + fields["sql_lock_waits_total"] = float64(countWriteAllowWrite) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write_concurrent_insert" + fields["sql_lock_waits_total"] = float64(countWriteConcurrentInsert) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write_delayed" + fields["sql_lock_waits_total"] = float64(countWriteDelayed) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write_low_priority" + fields["sql_lock_waits_total"] = float64(countWriteLowPriority) + acc.AddFields("mysql_perf_schema", fields, tags) + + delete(fields, "sql_lock_waits_total") + + tags["operation"] = "read" + fields["external_lock_waits_total"] = float64(countReadExternal) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write" + fields["external_lock_waits_total"] = float64(countWriteExternal) + acc.AddFields("mysql_perf_schema", fields, tags) + + delete(fields, "external_lock_waits_total") + + tags["operation"] = "read_normal" + fields["sql_lock_waits_seconds_total"] = float64(timeReadNormal / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "read_with_shared_locks" + fields["sql_lock_waits_seconds_total"] = float64(timeReadWithSharedLocks / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "read_high_priority" + fields["sql_lock_waits_seconds_total"] = float64(timeReadHighPriority / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "read_no_insert" + fields["sql_lock_waits_seconds_total"] = float64(timeReadNoInsert / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write_normal" + fields["sql_lock_waits_seconds_total"] = float64(timeWriteNormal / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write_allow_write" + fields["sql_lock_waits_seconds_total"] = float64(timeWriteAllowWrite / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write_concurrent_insert" + fields["sql_lock_waits_seconds_total"] = float64(timeWriteConcurrentInsert / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write_delayed" + fields["sql_lock_waits_seconds_total"] = float64(timeWriteDelayed / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write_low_priority" + fields["sql_lock_waits_seconds_total"] = float64(timeWriteLowPriority / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + + delete(fields, "sql_lock_waits_seconds_total") + + tags["operation"] = "read" + fields["external_lock_waits_seconds_total"] = float64(timeReadExternal / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["operation"] = "write" + fields["external_lock_waits_seconds_total"] = float64(timeWriteExternal / picoSeconds) + acc.AddFields("mysql_perf_schema", fields, tags) + } + return nil +} + +// gatherPerfEventWaits can be used to get total time and number of event waits +func (m *Mysql) gatherPerfEventWaits(db *sql.DB, serv string, acc telegraf.Accumulator) error { + rows, err := db.Query(perfEventWaitsQuery) + if err != nil { + return err + } + defer rows.Close() + + var ( + event string + starCount, timeWait uint64 + ) + + servtag, err := parseDSN(serv) + if err != nil { + servtag = "localhost" + } + tags := map[string]string{ + "server": servtag, + } + for rows.Next() { + if err := rows.Scan(&event, &starCount, &timeWait); err != nil { + return err + } + tags["event_name"] = event + fields := make(map[string]interface{}) + fields["events_waits_total"] = float64(starCount) + fields["events_waits_seconds_total"] = float64(timeWait) / picoSeconds + + acc.AddFields("mysql_perf_schema", fields, tags) + } + return nil +} + +// gatherPerfFileEvents can be used to get stats on file events +func (m *Mysql) gatherPerfFileEventsStatuses(db *sql.DB, serv string, acc telegraf.Accumulator) error { + rows, err := db.Query(perfFileEventsQuery) + if err != nil { + return err + } + + defer rows.Close() + + var ( + eventName string + countRead, countWrite, countMisc uint64 + sumTimerRead, sumTimerWrite, sumTimerMisc uint64 + sumNumBytesRead, sumNumBytesWrite uint64 + ) + + servtag, err := parseDSN(serv) + if err != nil { + servtag = "localhost" + } + tags := map[string]string{ + "server": servtag, + } + for rows.Next() { + err = rows.Scan( + &eventName, + &countRead, &sumTimerRead, &sumNumBytesRead, + &countWrite, &sumTimerWrite, &sumNumBytesWrite, + &countMisc, &sumTimerMisc, + ) + if err != nil { + return err + } + + tags["event_name"] = eventName + fields := make(map[string]interface{}) + + tags["mode"] = "misc" + fields["file_events_total"] = float64(countWrite) + fields["file_events_seconds_total"] = float64(sumTimerMisc) / picoSeconds + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["mode"] = "read" + fields["file_events_total"] = float64(countRead) + fields["file_events_seconds_total"] = float64(sumTimerRead) / picoSeconds + fields["file_events_bytes_totals"] = float64(sumNumBytesRead) + acc.AddFields("mysql_perf_schema", fields, tags) + + tags["mode"] = "write" + fields["file_events_total"] = float64(countWrite) + fields["file_events_seconds_total"] = float64(sumTimerWrite) / picoSeconds + fields["file_events_bytes_totals"] = float64(sumNumBytesWrite) + acc.AddFields("mysql_perf_schema", fields, tags) + + } + return nil +} + +// gatherPerfEventsStatements can be used to get attributes of each event +func (m *Mysql) gatherPerfEventsStatements(db *sql.DB, serv string, acc telegraf.Accumulator) error { + query := fmt.Sprintf( + perfEventsStatementsQuery, + m.PerfEventsStatementsDigestTextLimit, + m.PerfEventsStatementsTimeLimit, + m.PerfEventsStatementsLimit, + ) + + rows, err := db.Query(query) + if err != nil { + return err + } + + defer rows.Close() + + var ( + schemaName, digest, digest_text string + count, queryTime, errors, warnings uint64 + rowsAffected, rowsSent, rowsExamined uint64 + tmpTables, tmpDiskTables uint64 + sortMergePasses, sortRows uint64 + noIndexUsed uint64 + ) + + servtag, err := parseDSN(serv) + if err != nil { + servtag = "localhost" + } + tags := map[string]string{ + "server": servtag, + } + + for rows.Next() { + err = rows.Scan( + &schemaName, &digest, &digest_text, + &count, &queryTime, &errors, &warnings, + &rowsAffected, &rowsSent, &rowsExamined, + &tmpTables, &tmpDiskTables, + &sortMergePasses, &sortRows, + ) + + if err != nil { + return err + } + tags["schema"] = schemaName + tags["digest"] = digest + tags["digest_text"] = digest_text + + fields := make(map[string]interface{}) + + fields["events_statements_total"] = float64(count) + fields["events_statements_seconds_total"] = float64(queryTime) / picoSeconds + fields["events_statements_errors_total"] = float64(errors) + fields["events_statements_warnings_total"] = float64(warnings) + fields["events_statements_rows_affected_total"] = float64(rowsAffected) + fields["events_statements_rows_sent_total"] = float64(rowsSent) + fields["events_statements_rows_examined_total"] = float64(rowsExamined) + fields["events_statements_tmp_tables_total"] = float64(tmpTables) + fields["events_statements_tmp_disk_tables_total"] = float64(tmpDiskTables) + fields["events_statements_sort_merge_passes_total"] = float64(sortMergePasses) + fields["events_statements_sort_rows_total"] = float64(sortRows) + fields["events_statements_no_index_used_total"] = float64(noIndexUsed) + + acc.AddFields("mysql_perf_schema", fields, tags) + } + return nil +} + +func (m *Mysql) gatherTableSchema(db *sql.DB, serv string, acc telegraf.Accumulator) error { + var ( + dbList []string + servtag string + ) + servtag, err := parseDSN(serv) + if err != nil { + servtag = "localhost" + } + + // if the list of databases if empty, then get all databases + if len(m.TableSchemaDatabases) == 0 { + rows, err := db.Query(dbListQuery) + if err != nil { + return err + } + defer rows.Close() + + var database string + for rows.Next() { + err = rows.Scan(&database) + if err != nil { + return err + } + + dbList = append(dbList, database) + } + } else { + dbList = m.TableSchemaDatabases + } + + for _, database := range dbList { + rows, err := db.Query(fmt.Sprintf(tableSchemaQuery, database)) + if err != nil { + return err + } + defer rows.Close() + var ( + tableSchema string + tableName string + tableType string + engine string + version uint64 + rowFormat string + tableRows uint64 + dataLength uint64 + indexLength uint64 + dataFree uint64 + createOptions string + ) + for rows.Next() { + err = rows.Scan( + &tableSchema, + &tableName, + &tableType, + &engine, + &version, + &rowFormat, + &tableRows, + &dataLength, + &indexLength, + &dataFree, + &createOptions, + ) + if err != nil { + return err + } + tags := map[string]string{"server": servtag} + tags["schema"] = tableSchema + tags["table"] = tableName + versionTags := tags + + acc.Add(newNamespace("info_schema", "table_rows"), float64(tableRows), tags) + + tags["component"] = "data_length" + acc.Add(newNamespace("info_schema", "table_size", "data_length"), float64(dataLength), tags) + + tags["component"] = "index_length" + acc.Add(newNamespace("info_schema", "table_size", "index_length"), float64(indexLength), tags) + + tags["component"] = "data_free" + acc.Add(newNamespace("info_schema", "table_size", "data_free"), float64(dataFree), tags) + + versionTags["type"] = tableType + versionTags["engine"] = engine + versionTags["row_format"] = rowFormat + versionTags["create_options"] = createOptions + + acc.Add(newNamespace("info_schema", "table_version"), float64(version), versionTags) + } + } + return nil +} + +// parseValue can be used to convert values such as "ON","OFF","Yes","No" to 0,1 +func parseValue(value sql.RawBytes) (float64, bool) { + if bytes.Compare(value, []byte("Yes")) == 0 || bytes.Compare(value, []byte("ON")) == 0 { + return 1, true + } + + if bytes.Compare(value, []byte("No")) == 0 || bytes.Compare(value, []byte("OFF")) == 0 { + return 0, false + } + n, err := strconv.ParseFloat(string(value), 64) + return n, err == nil +} + +// findThreadState can be used to find thread state by command and plain state +func findThreadState(rawCommand, rawState string) string { + var ( + // replace '_' symbol with space + command = strings.Replace(strings.ToLower(rawCommand), "_", " ", -1) + state = strings.Replace(strings.ToLower(rawState), "_", " ", -1) + ) + // if the state is already valid, then return it + if _, ok := generalThreadStates[state]; ok { + return state + } + + // if state is plain, return the mapping + if mappedState, ok := stateStatusMappings[state]; ok { + return mappedState + } + // if the state is any lock, return the special state + if strings.Contains(state, "waiting for") && strings.Contains(state, "lock") { + return "waiting for lock" + } + + if command == "sleep" && state == "" { + return "idle" + } + + if command == "query" { + return "executing" + } + + if command == "binlog dump" { + return "replication master" + } + // if no mappings found and state is invalid, then return "other" state + return "other" +} + +// newNamespace can be used to make a namespace +func newNamespace(words ...string) string { + return strings.Replace(strings.Join(words, "_"), " ", "_", -1) } func init() { diff --git a/plugins/inputs/mysql/mysql_test.go b/plugins/inputs/mysql/mysql_test.go index 9e4073432..989c21722 100644 --- a/plugins/inputs/mysql/mysql_test.go +++ b/plugins/inputs/mysql/mysql_test.go @@ -1,6 +1,7 @@ package mysql import ( + "database/sql" "fmt" "testing" @@ -115,3 +116,47 @@ func TestMysqlDNSAddTimeout(t *testing.T) { } } } + +func TestParseValue(t *testing.T) { + testCases := []struct { + rawByte sql.RawBytes + value float64 + boolValue bool + }{ + {sql.RawBytes("Yes"), 1, true}, + {sql.RawBytes("No"), 0, false}, + {sql.RawBytes("ON"), 1, true}, + {sql.RawBytes("OFF"), 0, false}, + {sql.RawBytes("ABC"), 0, false}, + } + for _, cases := range testCases { + if value, ok := parseValue(cases.rawByte); value != cases.value && ok != cases.boolValue { + t.Errorf("want %d with %t, got %d with %t", int(cases.value), cases.boolValue, int(value), ok) + } + } +} + +func TestNewNamespace(t *testing.T) { + testCases := []struct { + words []string + namespace string + }{ + { + []string{"thread", "info_scheme", "query update"}, + "thread_info_scheme_query_update", + }, + { + []string{"thread", "info_scheme", "query_update"}, + "thread_info_scheme_query_update", + }, + { + []string{"thread", "info", "scheme", "query", "update"}, + "thread_info_scheme_query_update", + }, + } + for _, cases := range testCases { + if got := newNamespace(cases.words...); got != cases.namespace { + t.Errorf("want %s, got %s", cases.namespace, got) + } + } +} From b16bc3d2e35f9ef032b8b3f1aa8a31b6fd3fff0f Mon Sep 17 00:00:00 2001 From: Maksadbek Date: Fri, 26 Feb 2016 01:35:27 +0500 Subject: [PATCH 14/84] remove duplicate function; Mysql plugin GatherTableSchema is configurable --- plugins/inputs/mysql/mysql.go | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index a4de7e602..0220394b2 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -24,6 +24,7 @@ type Mysql struct { GatherBinaryLogs bool GatherTableIOWaits bool GatherIndexIOWaits bool + GatherTableSchema bool } var sampleConfig = ` @@ -44,6 +45,7 @@ var sampleConfig = ` GatherBinaryLogs = false GatherTableIOWaits = false GatherIndexIOWaits = false + GatherTableSchema = false ` var defaultTimeout = time.Second * time.Duration(5) @@ -448,14 +450,11 @@ func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { return err } - err = m.gatherTableSchema(db, serv, acc) - if err != nil { - return err - } - - err = m.gatherTableSchema(db, serv, acc) - if err != nil { - return err + if m.GatherTableSchema { + err = m.gatherTableSchema(db, serv, acc) + if err != nil { + return err + } } return nil } @@ -484,7 +483,7 @@ func (m *Mysql) gatherGlobalVariables(db *sql.DB, serv string, acc telegraf.Accu fields[key] = floatVal } } - acc.Add("mysql_variables", fields, tags) + acc.AddFields("mysql_variables", fields, tags) return nil } From c121e38da6d32813431c5e00ac2a4648a27d01df Mon Sep 17 00:00:00 2001 From: Maksadbek Date: Fri, 26 Feb 2016 03:16:25 +0500 Subject: [PATCH 15/84] mysql plugin, check for existence of table before scanning --- plugins/inputs/mysql/mysql.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index 0220394b2..2e8e70a1a 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -367,6 +367,12 @@ const ( FROM information_schema.schemata WHERE SCHEMA_NAME NOT IN ('mysql', 'performance_schema', 'information_schema') ` + perfSchemaTablesQuery = ` + SELECT + table_name + FROM information_schema.tables + WHERE table_schema = 'performance_schema' AND table_name = ? + ` ) func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { @@ -847,6 +853,18 @@ func (m *Mysql) gatherInfoSchemaAutoIncStatuses(db *sql.DB, serv string, acc tel // the total number and time for SQL and external lock wait events // for each table and operation func (m *Mysql) gatherPerfTableLockWaits(db *sql.DB, serv string, acc telegraf.Accumulator) error { + // check if table exists, + // if performance_schema is not enabled, tables do not exist + // then there is no need to scan them + var tableName string + err := db.QueryRow(perfSchemaTablesQuery, "table_lock_waits_summary_by_table").Scan(&tableName) + switch { + case err == sql.ErrNoRows: + return nil + case err != nil: + return err + } + rows, err := db.Query(perfTableLockWaitsQuery) if err != nil { return err From c5d31e75270c1a48a967d766e07083e340fd5804 Mon Sep 17 00:00:00 2001 From: Maksadbek Date: Thu, 3 Mar 2016 02:58:08 +0500 Subject: [PATCH 16/84] statics that lack on MySQL 5.5 is turned off by default --- plugins/inputs/mysql/mysql.go | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index 2e8e70a1a..23f8e4762 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -25,6 +25,8 @@ type Mysql struct { GatherTableIOWaits bool GatherIndexIOWaits bool GatherTableSchema bool + GatherFileEventsStats bool + GatherPerfEventsStatements bool } var sampleConfig = ` @@ -46,6 +48,8 @@ var sampleConfig = ` GatherTableIOWaits = false GatherIndexIOWaits = false GatherTableSchema = false + GatherFileEventsStats = false + GatherPerfEventsStatements = false ` var defaultTimeout = time.Second * time.Duration(5) @@ -372,6 +376,7 @@ const ( table_name FROM information_schema.tables WHERE table_schema = 'performance_schema' AND table_name = ? + ` ) @@ -446,14 +451,18 @@ func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { return err } - err = m.gatherPerfFileEventsStatuses(db, serv, acc) - if err != nil { - return err + if m.GatherFileEventsStats { + err = m.gatherPerfFileEventsStatuses(db, serv, acc) + if err != nil { + return err + } } - err = m.gatherPerfEventsStatements(db, serv, acc) - if err != nil { - return err + if m.GatherPerfEventsStatements { + err = m.gatherPerfEventsStatements(db, serv, acc) + if err != nil { + return err + } } if m.GatherTableSchema { From d59999f51092a122988ef8d99eafe025378c64fc Mon Sep 17 00:00:00 2001 From: Maksadbek Date: Fri, 18 Mar 2016 14:15:54 +0500 Subject: [PATCH 17/84] improvements on queries and additional comments --- plugins/inputs/mysql/mysql.go | 288 +++++++++++++++++++++++++++++----- 1 file changed, 248 insertions(+), 40 deletions(-) diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index 23f8e4762..ea59113ee 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -20,6 +20,8 @@ type Mysql struct { PerfEventsStatementsLimit uint32 PerfEventsStatementsTimeLimit uint32 TableSchemaDatabases []string + GatherProcessList bool + GatherInfoSchemaAutoInc bool GatherSlaveStatus bool GatherBinaryLogs bool GatherTableIOWaits bool @@ -27,6 +29,7 @@ type Mysql struct { GatherTableSchema bool GatherFileEventsStats bool GatherPerfEventsStatements bool + IntervalSlow string } var sampleConfig = ` @@ -43,6 +46,8 @@ var sampleConfig = ` PerfEventsStatementsLimit = 250 PerfEventsStatementsTimeLimit = 86400 TableSchemaDatabases = [] + GatherProcessList = false + GatherInfoSchemaAutoInc = false GatherSlaveStatus = false GatherBinaryLogs = false GatherTableIOWaits = false @@ -50,6 +55,8 @@ var sampleConfig = ` GatherTableSchema = false GatherFileEventsStats = false GatherPerfEventsStatements = false + # Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES) + IntervalSlow = "30m" ` var defaultTimeout = time.Second * time.Duration(5) @@ -62,7 +69,22 @@ func (m *Mysql) Description() string { return "Read metrics from one or many mysql servers" } -var localhost = "" +var ( + localhost = "" + lastT time.Time + initDone = false + scanIntervalSlow uint32 +) + +func (m *Mysql) InitMysql() { + if len(m.IntervalSlow) > 0 { + interval, err := time.ParseDuration(m.IntervalSlow) + if err == nil && interval.Seconds() >= 1.0 { + scanIntervalSlow = uint32(interval.Seconds()) + } + } + initDone = true +} func (m *Mysql) Gather(acc telegraf.Accumulator) error { if len(m.Servers) == 0 { @@ -72,6 +94,12 @@ func (m *Mysql) Gather(acc telegraf.Accumulator) error { return nil } + // Initialise additional query intervals + if !initDone { + m.InitMysql() + } + + // Loop through each server and collect metrics for _, serv := range m.Servers { err := m.gatherServer(serv, acc) if err != nil { @@ -140,6 +168,114 @@ var mappings = []*mapping{ onServer: "Threads_", inExport: "threads_", }, + { + onServer: "Access_", + inExport: "access_", + }, + { + onServer: "Aria__", + inExport: "aria_", + }, + { + onServer: "Binlog__", + inExport: "binlog_", + }, + { + onServer: "Busy_", + inExport: "busy_", + }, + { + onServer: "Connection_", + inExport: "connection_", + }, + { + onServer: "Delayed_", + inExport: "delayed_", + }, + { + onServer: "Empty_", + inExport: "empty_", + }, + { + onServer: "Executed_", + inExport: "executed_", + }, + { + onServer: "Executed_", + inExport: "executed_", + }, + { + onServer: "Feature_", + inExport: "feature_", + }, + { + onServer: "Flush_", + inExport: "flush_", + }, + { + onServer: "Last_", + inExport: "last_", + }, + { + onServer: "Master_", + inExport: "master_", + }, + { + onServer: "Max_", + inExport: "max_", + }, + { + onServer: "Memory_", + inExport: "memory_", + }, + { + onServer: "Not_", + inExport: "not_", + }, + { + onServer: "Performance_", + inExport: "performance_", + }, + { + onServer: "Prepared_", + inExport: "prepared_", + }, + { + onServer: "Rows_", + inExport: "rows_", + }, + { + onServer: "Rpl_", + inExport: "rpl_", + }, + { + onServer: "Select_", + inExport: "select_", + }, + { + onServer: "Slave_", + inExport: "slave_", + }, + { + onServer: "Slow_", + inExport: "slow_", + }, + { + onServer: "Sort_", + inExport: "sort_", + }, + { + onServer: "Subquery_", + inExport: "subquery_", + }, + { + onServer: "Tc_", + inExport: "tc_", + }, + { + onServer: "Threadpool_", + inExport: "threadpool_", + }, } var ( @@ -261,13 +397,13 @@ const ( ORDER BY null` infoSchemaAutoIncQuery = ` SELECT table_schema, table_name, column_name, auto_increment, - pow(2, case data_type + CAST(pow(2, case data_type when 'tinyint' then 7 when 'smallint' then 15 when 'mediumint' then 23 when 'int' then 31 when 'bigint' then 63 - end+(column_type like '% unsigned'))-1 as max_int + end+(column_type like '% unsigned'))-1 as decimal(19)) as max_int FROM information_schema.tables t JOIN information_schema.columns c USING (table_schema,table_name) WHERE c.extra = 'auto_increment' AND t.auto_increment IS NOT NULL @@ -398,21 +534,34 @@ func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { return err } - err = m.gatherGlobalVariables(db, serv, acc) - if err != nil { - return err + // Global Variables may be gathered less often + if len(m.IntervalSlow) > 0 { + if uint32(time.Since(lastT).Seconds()) > scanIntervalSlow { + err = m.gatherGlobalVariables(db, serv, acc) + if err != nil { + return err + } + lastT = time.Now() + } else { + err = m.gatherGlobalVariables(db, serv, acc) + if err != nil { + return err + } + } } - if m.GatherSlaveStatus { + if m.GatherBinaryLogs { err = m.gatherBinaryLogs(db, serv, acc) if err != nil { return err } } - err = m.GatherProcessListStatuses(db, serv, acc) - if err != nil { - return err + if m.GatherProcessList { + err = m.GatherProcessListStatuses(db, serv, acc) + if err != nil { + return err + } } if m.GatherSlaveStatus { @@ -422,9 +571,11 @@ func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { } } - err = m.gatherInfoSchemaAutoIncStatuses(db, serv, acc) - if err != nil { - return err + if m.GatherInfoSchemaAutoInc { + err = m.gatherInfoSchemaAutoIncStatuses(db, serv, acc) + if err != nil { + return err + } } if m.GatherTableIOWaits { @@ -474,15 +625,20 @@ func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { return nil } +// gatherGlobalVariables can be used to fetch all global variables from +// MySQL environment. func (m *Mysql) gatherGlobalVariables(db *sql.DB, serv string, acc telegraf.Accumulator) error { + // run query rows, err := db.Query(globalVariablesQuery) if err != nil { return err } defer rows.Close() + var key string var val sql.RawBytes + // parse DSN and save server tag servtag, err := parseDSN(serv) if err != nil { servtag = "localhost" @@ -494,11 +650,20 @@ func (m *Mysql) gatherGlobalVariables(db *sql.DB, serv string, acc telegraf.Accu return err } key = strings.ToLower(key) + // parse value, if it is numeric then save, otherwise ignore if floatVal, ok := parseValue(val); ok { fields[key] = floatVal } + // Send 20 fields at a time + if len(fields) >= 20 { + acc.AddFields("mysql_variables", fields, tags) + fields = make(map[string]interface{}) + } + } + // Send any remaining fields + if len(fields) > 0 { + acc.AddFields("mysql_variables", fields, tags) } - acc.AddFields("mysql_variables", fields, tags) return nil } @@ -507,40 +672,41 @@ func (m *Mysql) gatherGlobalVariables(db *sql.DB, serv string, acc telegraf.Accu // If the multi-source replication is set, then everything works differently // This code does not work with multi-source replication. func (m *Mysql) gatherSlaveStatuses(db *sql.DB, serv string, acc telegraf.Accumulator) error { + // run query rows, err := db.Query(slaveStatusQuery) - if err != nil { return err } defer rows.Close() servtag, err := parseDSN(serv) - if err != nil { servtag = "localhost" } + tags := map[string]string{"server": servtag} fields := make(map[string]interface{}) - if rows.Next() { - cols, err := rows.Columns() + // to save the column names as a field key + // scanning keys and values separately + if rows.Next() { + // get columns names, and create an array with its length + cols, err := rows.Columns() if err != nil { return err } vals := make([]interface{}, len(cols)) - + // fill the array with sql.Rawbytes for i := range vals { vals[i] = &sql.RawBytes{} } - if err = rows.Scan(vals...); err != nil { return err } - + // range over columns, and try to parse values for i, col := range cols { // skip unparsable values if value, ok := parseValue(*vals[i].(*sql.RawBytes)); ok { - //acc.Add("slave_"+col, value, tags) fields["slave_"+col] = value } } @@ -551,13 +717,16 @@ func (m *Mysql) gatherSlaveStatuses(db *sql.DB, serv string, acc telegraf.Accumu } // gatherBinaryLogs can be used to collect size and count of all binary files +// binlogs metric requires the MySQL server to turn it on in configuration func (m *Mysql) gatherBinaryLogs(db *sql.DB, serv string, acc telegraf.Accumulator) error { + // run query rows, err := db.Query(binaryLogsQuery) if err != nil { return err } defer rows.Close() + // parse DSN and save host as a tag var servtag string servtag, err = parseDSN(serv) if err != nil { @@ -572,6 +741,7 @@ func (m *Mysql) gatherBinaryLogs(db *sql.DB, serv string, acc telegraf.Accumulat fileName string ) + // iterate over rows and count the size and count of files for rows.Next() { if err := rows.Scan(&fileName, &fileSize); err != nil { return err @@ -585,6 +755,9 @@ func (m *Mysql) gatherBinaryLogs(db *sql.DB, serv string, acc telegraf.Accumulat return nil } +// gatherGlobalStatuses can be used to get MySQL status metrics +// the mappings of actual names and names of each status to be exported +// to output is provided on mappings variable func (m *Mysql) gatherGlobalStatuses(db *sql.DB, serv string, acc telegraf.Accumulator) error { // If user forgot the '/', add it if strings.HasSuffix(serv, ")") { @@ -593,11 +766,13 @@ func (m *Mysql) gatherGlobalStatuses(db *sql.DB, serv string, acc telegraf.Accum serv = "" } + // run query rows, err := db.Query(globalStatusQuery) if err != nil { return err } + // parse the DSN and save host name as a tag var servtag string servtag, err = parseDSN(serv) if err != nil { @@ -616,18 +791,26 @@ func (m *Mysql) gatherGlobalStatuses(db *sql.DB, serv string, acc telegraf.Accum var found bool + // iterate over mappings and gather metrics that is provided on mapping for _, mapped := range mappings { if strings.HasPrefix(name, mapped.onServer) { + // convert numeric values to integer i, _ := strconv.Atoi(string(val.([]byte))) fields[mapped.inExport+name[len(mapped.onServer):]] = i found = true } } + // Send 20 fields at a time + if len(fields) >= 20 { + acc.AddFields("mysql", fields, tags) + fields = make(map[string]interface{}) + } if found { continue } + // search for specific values switch name { case "Queries": i, err := strconv.ParseInt(string(val.([]byte)), 10, 64) @@ -643,35 +826,56 @@ func (m *Mysql) gatherGlobalStatuses(db *sql.DB, serv string, acc telegraf.Accum } fields["slow_queries"] = i + case "Connections": + i, err := strconv.ParseInt(string(val.([]byte)), 10, 64) + if err != nil { + return err + } + fields["connections"] = i + case "Syncs": + i, err := strconv.ParseInt(string(val.([]byte)), 10, 64) + if err != nil { + return err + } + fields["syncs"] = i + } + // Send any remaining fields + if len(fields) > 0 { + acc.AddFields("mysql", fields, tags) } } - acc.AddFields("mysql", fields, tags) - conn_rows, err := db.Query("SELECT user, sum(1) FROM INFORMATION_SCHEMA.PROCESSLIST GROUP BY user") + // gather connection metrics from processlist for each user + if m.GatherProcessList { + conn_rows, err := db.Query("SELECT user, sum(1) FROM INFORMATION_SCHEMA.PROCESSLIST GROUP BY user") - for conn_rows.Next() { - var user string - var connections int64 + for conn_rows.Next() { + var user string + var connections int64 - err = conn_rows.Scan(&user, &connections) - if err != nil { - return err + err = conn_rows.Scan(&user, &connections) + if err != nil { + return err + } + + tags := map[string]string{"server": servtag, "user": user} + fields := make(map[string]interface{}) + + if err != nil { + return err + } + fields["connections"] = connections + acc.AddFields("mysql_users", fields, tags) } - - tags := map[string]string{"server": servtag, "user": user} - fields := make(map[string]interface{}) - - if err != nil { - return err - } - fields["connections"] = connections - acc.AddFields("mysql_users", fields, tags) } return nil } +// GatherProcessList can be used to collect metrics on each running command +// and its state with its running count func (m *Mysql) GatherProcessListStatuses(db *sql.DB, serv string, acc telegraf.Accumulator) error { + // run query rows, err := db.Query(infoSchemaProcessListQuery) if err != nil { return err @@ -702,7 +906,9 @@ func (m *Mysql) GatherProcessListStatuses(db *sql.DB, serv string, acc telegraf. if err != nil { return err } + // each state has its mapping foundState := findThreadState(command, state) + // count each state stateCounts[foundState] += count } @@ -821,7 +1027,7 @@ func (m *Mysql) gatherPerfIndexIOWaits(db *sql.DB, serv string, acc telegraf.Acc return nil } -// gatherInfoSchemaAutoIncStatuses can be used to get auto incremented value of the column +// gatherInfoSchemaAutoIncStatuses can be used to get auto incremented values of the column func (m *Mysql) gatherInfoSchemaAutoIncStatuses(db *sql.DB, serv string, acc telegraf.Accumulator) error { rows, err := db.Query(infoSchemaAutoIncQuery) if err != nil { @@ -861,6 +1067,7 @@ func (m *Mysql) gatherInfoSchemaAutoIncStatuses(db *sql.DB, serv string, acc tel // gatherPerfTableLockWaits can be used to get // the total number and time for SQL and external lock wait events // for each table and operation +// requires the MySQL server to be enabled to save this metric func (m *Mysql) gatherPerfTableLockWaits(db *sql.DB, serv string, acc telegraf.Accumulator) error { // check if table exists, // if performance_schema is not enabled, tables do not exist @@ -1208,6 +1415,7 @@ func (m *Mysql) gatherPerfEventsStatements(db *sql.DB, serv string, acc telegraf return nil } +// gatherTableSchema can be used to gather stats on each schema func (m *Mysql) gatherTableSchema(db *sql.DB, serv string, acc telegraf.Accumulator) error { var ( dbList []string From 059b601b1354eb02942772ea687227472a3abd25 Mon Sep 17 00:00:00 2001 From: Maksadbek Date: Wed, 23 Mar 2016 02:21:03 +0500 Subject: [PATCH 18/84] mysql plugin conf field names are lowercase-underscored --- plugins/inputs/mysql/mysql.go | 58 +++++++++++++++++------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index ea59113ee..750165dea 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -15,21 +15,21 @@ import ( ) type Mysql struct { - Servers []string - PerfEventsStatementsDigestTextLimit uint32 - PerfEventsStatementsLimit uint32 - PerfEventsStatementsTimeLimit uint32 - TableSchemaDatabases []string - GatherProcessList bool - GatherInfoSchemaAutoInc bool - GatherSlaveStatus bool - GatherBinaryLogs bool - GatherTableIOWaits bool - GatherIndexIOWaits bool - GatherTableSchema bool - GatherFileEventsStats bool - GatherPerfEventsStatements bool - IntervalSlow string + Servers []string `toml:"servers"` + PerfEventsStatementsDigestTextLimit uint32 `toml:"perf_events_statements_digest_text_limit"` + PerfEventsStatementsLimit uint32 `toml:"perf_events_statements_limit"` + PerfEventsStatementsTimeLimit uint32 `toml:"perf_events_statemetns_time_limit"` + TableSchemaDatabases []string `toml:"table_schema_databases"` + GatherProcessList bool `toml:"gather_process_list"` + GatherInfoSchemaAutoInc bool `toml:"gather_info_schema_auto_inc"` + GatherSlaveStatus bool `toml:"gather_slave_status"` + GatherBinaryLogs bool `toml:"gather_binary_logs"` + GatherTableIOWaits bool `toml:"gather_table_io_waits"` + GatherIndexIOWaits bool `toml:"gather_index_io_waits"` + GatherTableSchema bool `toml:"gather_table_schema"` + GatherFileEventsStats bool `toml:"gather_file_events_stats"` + GatherPerfEventsStatements bool `toml:"gather_perf_events_statements"` + IntervalSlow string `toml:"interval_slow"` } var sampleConfig = ` @@ -42,21 +42,21 @@ var sampleConfig = ` # # If no servers are specified, then localhost is used as the host. servers = ["tcp(127.0.0.1:3306)/"] - PerfEventsStatementsDigestTextLimit = 120 - PerfEventsStatementsLimit = 250 - PerfEventsStatementsTimeLimit = 86400 - TableSchemaDatabases = [] - GatherProcessList = false - GatherInfoSchemaAutoInc = false - GatherSlaveStatus = false - GatherBinaryLogs = false - GatherTableIOWaits = false - GatherIndexIOWaits = false - GatherTableSchema = false - GatherFileEventsStats = false - GatherPerfEventsStatements = false + perf_events_statements_digest_text_limit = 120 + perf_events_statements_limit = 250 + perf_events_statements_time_limit = 86400 + table_schema_databases = [] + gather_process_list = true + gather_info_schema_auto_inc = true + gather_slave_status = true + gather_binary_logs = false + gather_table_io_waits = false + gather_index_io_waits = false + gather_table_schema = false + gather_file_events_stats = false + gather_perf_events_statements = false # Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES) - IntervalSlow = "30m" + interval_slow = "30m" ` var defaultTimeout = time.Second * time.Duration(5) From 644ce9edab8957017f8336a1c12166e69f5230a1 Mon Sep 17 00:00:00 2001 From: Maksadbek Date: Fri, 25 Mar 2016 04:06:36 +0500 Subject: [PATCH 19/84] fixed code regarding needless type casting; single creation of map --- plugins/inputs/mysql/mysql.go | 77 ++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index 750165dea..79d2da3b4 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -733,7 +733,6 @@ func (m *Mysql) gatherBinaryLogs(db *sql.DB, serv string, acc telegraf.Accumulat servtag = "localhost" } tags := map[string]string{"server": servtag} - fields := make(map[string]interface{}) var ( size uint64 = 0 count uint64 = 0 @@ -749,8 +748,10 @@ func (m *Mysql) gatherBinaryLogs(db *sql.DB, serv string, acc telegraf.Accumulat size += fileSize count++ } - fields["binary_size_bytes"] = size - fields["binary_files_count"] = count + fields := map[string]interface{}{ + "binary_size_bytes": size, + "binary_files_count": count, + } acc.AddFields("mysql", fields, tags) return nil } @@ -839,12 +840,11 @@ func (m *Mysql) gatherGlobalStatuses(db *sql.DB, serv string, acc telegraf.Accum } fields["syncs"] = i } - // Send any remaining fields - if len(fields) > 0 { - acc.AddFields("mysql", fields, tags) - } } - + // Send any remaining fields + if len(fields) > 0 { + acc.AddFields("mysql", fields, tags) + } // gather connection metrics from processlist for each user if m.GatherProcessList { conn_rows, err := db.Query("SELECT user, sum(1) FROM INFORMATION_SCHEMA.PROCESSLIST GROUP BY user") @@ -931,8 +931,8 @@ func (m *Mysql) gatherPerfTableIOWaits(db *sql.DB, serv string, acc telegraf.Acc defer rows.Close() var ( objSchema, objName, servtag string - countFetch, countInsert, countUpdate, countDelete uint64 - timeFetch, timeInsert, timeUpdate, timeDelete uint64 + countFetch, countInsert, countUpdate, countDelete float64 + timeFetch, timeInsert, timeUpdate, timeDelete float64 ) servtag, err = parseDSN(serv) @@ -955,16 +955,17 @@ func (m *Mysql) gatherPerfTableIOWaits(db *sql.DB, serv string, acc telegraf.Acc "schema": objSchema, "name": objName, } - fields := make(map[string]interface{}) - fields["table_io_waits_total_fetch"] = float64(countFetch) - fields["table_io_waits_total_insert"] = float64(countInsert) - fields["table_io_waits_total_update"] = float64(countUpdate) - fields["table_io_waits_total_delete"] = float64(countDelete) - fields["table_io_waits_seconds_total_fetch"] = float64(timeFetch) / picoSeconds - fields["table_io_waits_seconds_total_insert"] = float64(timeInsert) / picoSeconds - fields["table_io_waits_seconds_total_update"] = float64(timeUpdate) / picoSeconds - fields["table_io_waits_seconds_total_delete"] = float64(timeDelete) / picoSeconds + fields := map[string]interface{}{ + "table_io_waits_total_fetch": countFetch, + "table_io_waits_total_insert": countInsert, + "table_io_waits_total_update": countUpdate, + "table_io_waits_total_delete": countDelete, + "table_io_waits_seconds_total_fetch": timeFetch / picoSeconds, + "table_io_waits_seconds_total_insert": timeInsert / picoSeconds, + "table_io_waits_seconds_total_update": timeUpdate / picoSeconds, + "table_io_waits_seconds_total_delete": timeDelete / picoSeconds, + } acc.AddFields("mysql_perf_schema", fields, tags) } @@ -1364,11 +1365,11 @@ func (m *Mysql) gatherPerfEventsStatements(db *sql.DB, serv string, acc telegraf var ( schemaName, digest, digest_text string - count, queryTime, errors, warnings uint64 - rowsAffected, rowsSent, rowsExamined uint64 - tmpTables, tmpDiskTables uint64 - sortMergePasses, sortRows uint64 - noIndexUsed uint64 + count, queryTime, errors, warnings float64 + rowsAffected, rowsSent, rowsExamined float64 + tmpTables, tmpDiskTables float64 + sortMergePasses, sortRows float64 + noIndexUsed float64 ) servtag, err := parseDSN(serv) @@ -1395,20 +1396,20 @@ func (m *Mysql) gatherPerfEventsStatements(db *sql.DB, serv string, acc telegraf tags["digest"] = digest tags["digest_text"] = digest_text - fields := make(map[string]interface{}) - - fields["events_statements_total"] = float64(count) - fields["events_statements_seconds_total"] = float64(queryTime) / picoSeconds - fields["events_statements_errors_total"] = float64(errors) - fields["events_statements_warnings_total"] = float64(warnings) - fields["events_statements_rows_affected_total"] = float64(rowsAffected) - fields["events_statements_rows_sent_total"] = float64(rowsSent) - fields["events_statements_rows_examined_total"] = float64(rowsExamined) - fields["events_statements_tmp_tables_total"] = float64(tmpTables) - fields["events_statements_tmp_disk_tables_total"] = float64(tmpDiskTables) - fields["events_statements_sort_merge_passes_total"] = float64(sortMergePasses) - fields["events_statements_sort_rows_total"] = float64(sortRows) - fields["events_statements_no_index_used_total"] = float64(noIndexUsed) + fields := map[string]interface{}{ + "events_statements_total": count, + "events_statements_seconds_total": queryTime / picoSeconds, + "events_statements_errors_total": errors, + "events_statements_warnings_total": warnings, + "events_statements_rows_affected_total": rowsAffected, + "events_statements_rows_sent_total": rowsSent, + "events_statements_rows_examined_total": rowsExamined, + "events_statements_tmp_tables_total": tmpTables, + "events_statements_tmp_disk_tables_total": tmpDiskTables, + "events_statements_sort_merge_passes_total": sortMergePasses, + "events_statements_sort_rows_total": sortRows, + "events_statements_no_index_used_total": noIndexUsed, + } acc.AddFields("mysql_perf_schema", fields, tags) } From 046cb6a5640a8fb0a35da794936d7aa8ddd88f26 Mon Sep 17 00:00:00 2001 From: Maksadbek Date: Sat, 26 Mar 2016 01:54:49 +0500 Subject: [PATCH 20/84] changed types to decrease needless uint64 to float64 casts --- plugins/inputs/mysql/mysql.go | 168 +++++++++++++++++----------------- 1 file changed, 85 insertions(+), 83 deletions(-) diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index 79d2da3b4..371ef46df 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -16,9 +16,9 @@ import ( type Mysql struct { Servers []string `toml:"servers"` - PerfEventsStatementsDigestTextLimit uint32 `toml:"perf_events_statements_digest_text_limit"` - PerfEventsStatementsLimit uint32 `toml:"perf_events_statements_limit"` - PerfEventsStatementsTimeLimit uint32 `toml:"perf_events_statemetns_time_limit"` + PerfEventsStatementsDigestTextLimit int64 `toml:"perf_events_statements_digest_text_limit"` + PerfEventsStatementsLimit int64 `toml:"perf_events_statements_limit"` + PerfEventsStatementsTimeLimit int64 `toml:"perf_events_statemetns_time_limit"` TableSchemaDatabases []string `toml:"table_schema_databases"` GatherProcessList bool `toml:"gather_process_list"` GatherInfoSchemaAutoInc bool `toml:"gather_info_schema_auto_inc"` @@ -983,8 +983,8 @@ func (m *Mysql) gatherPerfIndexIOWaits(db *sql.DB, serv string, acc telegraf.Acc var ( objSchema, objName, indexName, servtag string - countFetch, countInsert, countUpdate, countDelete uint64 - timeFetch, timeInsert, timeUpdate, timeDelete uint64 + countFetch, countInsert, countUpdate, countDelete float64 + timeFetch, timeInsert, timeUpdate, timeDelete float64 ) servtag, err = parseDSN(serv) @@ -1008,19 +1008,20 @@ func (m *Mysql) gatherPerfIndexIOWaits(db *sql.DB, serv string, acc telegraf.Acc "name": objName, "index": indexName, } - fields := make(map[string]interface{}) - fields["index_io_waits_total_fetch"] = float64(countFetch) - fields["index_io_waits_seconds_total_fetch"] = float64(timeFetch) / picoSeconds + fields := map[string]interface{}{ + "index_io_waits_total_fetch": countFetch, + "index_io_waits_seconds_total_fetch": timeFetch / picoSeconds, + } // update write columns only when index is NONE if indexName == "NONE" { - fields["index_io_waits_total_insert"] = float64(countInsert) - fields["index_io_waits_total_update"] = float64(countUpdate) - fields["index_io_waits_total_delete"] = float64(countDelete) + fields["index_io_waits_total_insert"] = countInsert + fields["index_io_waits_total_update"] = countUpdate + fields["index_io_waits_total_delete"] = countDelete - fields["index_io_waits_seconds_total_insert"] = float64(timeInsert) / picoSeconds - fields["index_io_waits_seconds_total_update"] = float64(timeUpdate) / picoSeconds - fields["index_io_waits_seconds_total_delete"] = float64(timeDelete) / picoSeconds + fields["index_io_waits_seconds_total_insert"] = timeInsert / picoSeconds + fields["index_io_waits_seconds_total_update"] = timeUpdate / picoSeconds + fields["index_io_waits_seconds_total_delete"] = timeDelete / picoSeconds } acc.AddFields("mysql_perf_schema", fields, tags) @@ -1096,28 +1097,28 @@ func (m *Mysql) gatherPerfTableLockWaits(db *sql.DB, serv string, acc telegraf.A var ( objectSchema string objectName string - countReadNormal uint64 - countReadWithSharedLocks uint64 - countReadHighPriority uint64 - countReadNoInsert uint64 - countReadExternal uint64 - countWriteAllowWrite uint64 - countWriteConcurrentInsert uint64 - countWriteDelayed uint64 - countWriteLowPriority uint64 - countWriteNormal uint64 - countWriteExternal uint64 - timeReadNormal uint64 - timeReadWithSharedLocks uint64 - timeReadHighPriority uint64 - timeReadNoInsert uint64 - timeReadExternal uint64 - timeWriteAllowWrite uint64 - timeWriteConcurrentInsert uint64 - timeWriteDelayed uint64 - timeWriteLowPriority uint64 - timeWriteNormal uint64 - timeWriteExternal uint64 + countReadNormal float64 + countReadWithSharedLocks float64 + countReadHighPriority float64 + countReadNoInsert float64 + countReadExternal float64 + countWriteAllowWrite float64 + countWriteConcurrentInsert float64 + countWriteDelayed float64 + countWriteLowPriority float64 + countWriteNormal float64 + countWriteExternal float64 + timeReadNormal float64 + timeReadWithSharedLocks float64 + timeReadHighPriority float64 + timeReadNoInsert float64 + timeReadExternal float64 + timeWriteAllowWrite float64 + timeWriteConcurrentInsert float64 + timeWriteDelayed float64 + timeWriteLowPriority float64 + timeWriteNormal float64 + timeWriteExternal float64 ) for rows.Next() { @@ -1159,97 +1160,97 @@ func (m *Mysql) gatherPerfTableLockWaits(db *sql.DB, serv string, acc telegraf.A fields := make(map[string]interface{}) tags["operation"] = "read_normal" - fields["sql_lock_waits_total"] = float64(countReadNormal) + fields["sql_lock_waits_total"] = countReadNormal acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "read_with_shared_locks" - fields["sql_lock_waits_total"] = float64(countReadWithSharedLocks) + fields["sql_lock_waits_total"] = countReadWithSharedLocks acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "read_high_priority" - fields["sql_lock_waits_total"] = float64(countReadHighPriority) + fields["sql_lock_waits_total"] = countReadHighPriority acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "read_no_insert" - fields["sql_lock_waits_total"] = float64(countReadNoInsert) + fields["sql_lock_waits_total"] = countReadNoInsert acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write_normal" - fields["sql_lock_waits_total"] = float64(countWriteNormal) + fields["sql_lock_waits_total"] = countWriteNormal acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write_allow_write" - fields["sql_lock_waits_total"] = float64(countWriteAllowWrite) + fields["sql_lock_waits_total"] = countWriteAllowWrite acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write_concurrent_insert" - fields["sql_lock_waits_total"] = float64(countWriteConcurrentInsert) + fields["sql_lock_waits_total"] = countWriteConcurrentInsert acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write_delayed" - fields["sql_lock_waits_total"] = float64(countWriteDelayed) + fields["sql_lock_waits_total"] = countWriteDelayed acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write_low_priority" - fields["sql_lock_waits_total"] = float64(countWriteLowPriority) + fields["sql_lock_waits_total"] = countWriteLowPriority acc.AddFields("mysql_perf_schema", fields, tags) delete(fields, "sql_lock_waits_total") tags["operation"] = "read" - fields["external_lock_waits_total"] = float64(countReadExternal) + fields["external_lock_waits_total"] = countReadExternal acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write" - fields["external_lock_waits_total"] = float64(countWriteExternal) + fields["external_lock_waits_total"] = countWriteExternal acc.AddFields("mysql_perf_schema", fields, tags) delete(fields, "external_lock_waits_total") tags["operation"] = "read_normal" - fields["sql_lock_waits_seconds_total"] = float64(timeReadNormal / picoSeconds) + fields["sql_lock_waits_seconds_total"] = timeReadNormal / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "read_with_shared_locks" - fields["sql_lock_waits_seconds_total"] = float64(timeReadWithSharedLocks / picoSeconds) + fields["sql_lock_waits_seconds_total"] = timeReadWithSharedLocks / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "read_high_priority" - fields["sql_lock_waits_seconds_total"] = float64(timeReadHighPriority / picoSeconds) + fields["sql_lock_waits_seconds_total"] = timeReadHighPriority / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "read_no_insert" - fields["sql_lock_waits_seconds_total"] = float64(timeReadNoInsert / picoSeconds) + fields["sql_lock_waits_seconds_total"] = timeReadNoInsert / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write_normal" - fields["sql_lock_waits_seconds_total"] = float64(timeWriteNormal / picoSeconds) + fields["sql_lock_waits_seconds_total"] = timeWriteNormal / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write_allow_write" - fields["sql_lock_waits_seconds_total"] = float64(timeWriteAllowWrite / picoSeconds) + fields["sql_lock_waits_seconds_total"] = timeWriteAllowWrite / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write_concurrent_insert" - fields["sql_lock_waits_seconds_total"] = float64(timeWriteConcurrentInsert / picoSeconds) + fields["sql_lock_waits_seconds_total"] = timeWriteConcurrentInsert / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write_delayed" - fields["sql_lock_waits_seconds_total"] = float64(timeWriteDelayed / picoSeconds) + fields["sql_lock_waits_seconds_total"] = timeWriteDelayed / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write_low_priority" - fields["sql_lock_waits_seconds_total"] = float64(timeWriteLowPriority / picoSeconds) + fields["sql_lock_waits_seconds_total"] = timeWriteLowPriority / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) delete(fields, "sql_lock_waits_seconds_total") tags["operation"] = "read" - fields["external_lock_waits_seconds_total"] = float64(timeReadExternal / picoSeconds) + fields["external_lock_waits_seconds_total"] = timeReadExternal / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) tags["operation"] = "write" - fields["external_lock_waits_seconds_total"] = float64(timeWriteExternal / picoSeconds) + fields["external_lock_waits_seconds_total"] = timeWriteExternal / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) } return nil @@ -1265,7 +1266,7 @@ func (m *Mysql) gatherPerfEventWaits(db *sql.DB, serv string, acc telegraf.Accum var ( event string - starCount, timeWait uint64 + starCount, timeWait float64 ) servtag, err := parseDSN(serv) @@ -1280,9 +1281,10 @@ func (m *Mysql) gatherPerfEventWaits(db *sql.DB, serv string, acc telegraf.Accum return err } tags["event_name"] = event - fields := make(map[string]interface{}) - fields["events_waits_total"] = float64(starCount) - fields["events_waits_seconds_total"] = float64(timeWait) / picoSeconds + fields := map[string]interface{}{ + "events_waits_total": starCount, + "events_waits_seconds_total": timeWait / picoSeconds, + } acc.AddFields("mysql_perf_schema", fields, tags) } @@ -1300,9 +1302,9 @@ func (m *Mysql) gatherPerfFileEventsStatuses(db *sql.DB, serv string, acc telegr var ( eventName string - countRead, countWrite, countMisc uint64 - sumTimerRead, sumTimerWrite, sumTimerMisc uint64 - sumNumBytesRead, sumNumBytesWrite uint64 + countRead, countWrite, countMisc float64 + sumTimerRead, sumTimerWrite, sumTimerMisc float64 + sumNumBytesRead, sumNumBytesWrite float64 ) servtag, err := parseDSN(serv) @@ -1327,20 +1329,20 @@ func (m *Mysql) gatherPerfFileEventsStatuses(db *sql.DB, serv string, acc telegr fields := make(map[string]interface{}) tags["mode"] = "misc" - fields["file_events_total"] = float64(countWrite) - fields["file_events_seconds_total"] = float64(sumTimerMisc) / picoSeconds + fields["file_events_total"] = countWrite + fields["file_events_seconds_total"] = sumTimerMisc / picoSeconds acc.AddFields("mysql_perf_schema", fields, tags) tags["mode"] = "read" - fields["file_events_total"] = float64(countRead) - fields["file_events_seconds_total"] = float64(sumTimerRead) / picoSeconds - fields["file_events_bytes_totals"] = float64(sumNumBytesRead) + fields["file_events_total"] = countRead + fields["file_events_seconds_total"] = sumTimerRead / picoSeconds + fields["file_events_bytes_totals"] = sumNumBytesRead acc.AddFields("mysql_perf_schema", fields, tags) tags["mode"] = "write" - fields["file_events_total"] = float64(countWrite) - fields["file_events_seconds_total"] = float64(sumTimerWrite) / picoSeconds - fields["file_events_bytes_totals"] = float64(sumNumBytesWrite) + fields["file_events_total"] = countWrite + fields["file_events_seconds_total"] = sumTimerWrite / picoSeconds + fields["file_events_bytes_totals"] = sumNumBytesWrite acc.AddFields("mysql_perf_schema", fields, tags) } @@ -1459,12 +1461,12 @@ func (m *Mysql) gatherTableSchema(db *sql.DB, serv string, acc telegraf.Accumula tableName string tableType string engine string - version uint64 + version float64 rowFormat string - tableRows uint64 - dataLength uint64 - indexLength uint64 - dataFree uint64 + tableRows float64 + dataLength float64 + indexLength float64 + dataFree float64 createOptions string ) for rows.Next() { @@ -1489,23 +1491,23 @@ func (m *Mysql) gatherTableSchema(db *sql.DB, serv string, acc telegraf.Accumula tags["table"] = tableName versionTags := tags - acc.Add(newNamespace("info_schema", "table_rows"), float64(tableRows), tags) + acc.Add(newNamespace("info_schema", "table_rows"), tableRows, tags) tags["component"] = "data_length" - acc.Add(newNamespace("info_schema", "table_size", "data_length"), float64(dataLength), tags) + acc.Add(newNamespace("info_schema", "table_size", "data_length"), dataLength, tags) tags["component"] = "index_length" - acc.Add(newNamespace("info_schema", "table_size", "index_length"), float64(indexLength), tags) + acc.Add(newNamespace("info_schema", "table_size", "index_length"), indexLength, tags) tags["component"] = "data_free" - acc.Add(newNamespace("info_schema", "table_size", "data_free"), float64(dataFree), tags) + acc.Add(newNamespace("info_schema", "table_size", "data_free"), dataFree, tags) versionTags["type"] = tableType versionTags["engine"] = engine versionTags["row_format"] = rowFormat versionTags["create_options"] = createOptions - acc.Add(newNamespace("info_schema", "table_version"), float64(version), versionTags) + acc.Add(newNamespace("info_schema", "table_version"), version, versionTags) } } return nil From 7e64dc380f36c35907b717f84ab867e097c951b1 Mon Sep 17 00:00:00 2001 From: Maksadbek Date: Thu, 31 Mar 2016 01:42:10 +0500 Subject: [PATCH 21/84] preventing tags from mutation by creating new tag for each metric --- plugins/inputs/mysql/mysql.go | 202 ++++++++++++++++++++-------------- 1 file changed, 119 insertions(+), 83 deletions(-) diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index 371ef46df..21dd92de9 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -1157,101 +1157,123 @@ func (m *Mysql) gatherPerfTableLockWaits(db *sql.DB, serv string, acc telegraf.A "schema": objectSchema, "table": objectName, } - fields := make(map[string]interface{}) + sqlLWFields := make(map[string]interface{}) - tags["operation"] = "read_normal" - fields["sql_lock_waits_total"] = countReadNormal - acc.AddFields("mysql_perf_schema", fields, tags) + rnTags := copyTags(tags) + rnTags["operation"] = "read_normal" + sqlLWFields["sql_lock_waits_total"] = countReadNormal + acc.AddFields("mysql_perf_schema", sqlLWFields, rnTags) - tags["operation"] = "read_with_shared_locks" - fields["sql_lock_waits_total"] = countReadWithSharedLocks - acc.AddFields("mysql_perf_schema", fields, tags) + rwslTags := copyTags(tags) + rwslTags["operation"] = "read_with_shared_locks" + sqlLWFields["sql_lock_waits_total"] = countReadWithSharedLocks + acc.AddFields("mysql_perf_schema", sqlLWFields, rwslTags) - tags["operation"] = "read_high_priority" - fields["sql_lock_waits_total"] = countReadHighPriority - acc.AddFields("mysql_perf_schema", fields, tags) + rhptTags := copyTags(tags) + rhptTags["operation"] = "read_high_priority" + sqlLWFields["sql_lock_waits_total"] = countReadHighPriority + acc.AddFields("mysql_perf_schema", sqlLWFields, rhptTags) - tags["operation"] = "read_no_insert" - fields["sql_lock_waits_total"] = countReadNoInsert - acc.AddFields("mysql_perf_schema", fields, tags) + rniTags := copyTags(tags) + rniTags["operation"] = "read_no_insert" + sqlLWFields["sql_lock_waits_total"] = countReadNoInsert + acc.AddFields("mysql_perf_schema", sqlLWFields, tags) - tags["operation"] = "write_normal" - fields["sql_lock_waits_total"] = countWriteNormal - acc.AddFields("mysql_perf_schema", fields, tags) + wnTags := copyTags(tags) + wnTags["operation"] = "write_normal" + sqlLWFields["sql_lock_waits_total"] = countWriteNormal + acc.AddFields("mysql_perf_schema", sqlLWFields, wnTags) - tags["operation"] = "write_allow_write" - fields["sql_lock_waits_total"] = countWriteAllowWrite - acc.AddFields("mysql_perf_schema", fields, tags) + wawTags := copyTags(tags) + wawTags["operation"] = "write_allow_write" + sqlLWFields["sql_lock_waits_total"] = countWriteAllowWrite + acc.AddFields("mysql_perf_schema", sqlLWFields, wawTags) - tags["operation"] = "write_concurrent_insert" - fields["sql_lock_waits_total"] = countWriteConcurrentInsert - acc.AddFields("mysql_perf_schema", fields, tags) + wciTags := copyTags(tags) + wciTags["operation"] = "write_concurrent_insert" + sqlLWFields["sql_lock_waits_total"] = countWriteConcurrentInsert + acc.AddFields("mysql_perf_schema", sqlLWFields, wciTags) - tags["operation"] = "write_delayed" - fields["sql_lock_waits_total"] = countWriteDelayed - acc.AddFields("mysql_perf_schema", fields, tags) + wdTags := copyTags(tags) + wdTags["operation"] = "write_delayed" + sqlLWFields["sql_lock_waits_total"] = countWriteDelayed + acc.AddFields("mysql_perf_schema", sqlLWFields, wdTags) - tags["operation"] = "write_low_priority" - fields["sql_lock_waits_total"] = countWriteLowPriority - acc.AddFields("mysql_perf_schema", fields, tags) + wlpTags := copyTags(tags) + wlpTags["operation"] = "write_low_priority" + sqlLWFields["sql_lock_waits_total"] = countWriteLowPriority + acc.AddFields("mysql_perf_schema", sqlLWFields, wlpTags) - delete(fields, "sql_lock_waits_total") + externalLWFields := make(map[string]interface{}) - tags["operation"] = "read" - fields["external_lock_waits_total"] = countReadExternal - acc.AddFields("mysql_perf_schema", fields, tags) + rTags := copyTags(tags) + rTags["operation"] = "read" + externalLWFields["external_lock_waits_total"] = countReadExternal + acc.AddFields("mysql_perf_schema", externalLWFields, rTags) - tags["operation"] = "write" - fields["external_lock_waits_total"] = countWriteExternal - acc.AddFields("mysql_perf_schema", fields, tags) + wTags := copyTags(tags) + wTags["operation"] = "write" + externalLWFields["external_lock_waits_total"] = countWriteExternal + acc.AddFields("mysql_perf_schema", externalLWFields, wTags) - delete(fields, "external_lock_waits_total") + sqlLWSecTotalFields := make(map[string]interface{}) - tags["operation"] = "read_normal" - fields["sql_lock_waits_seconds_total"] = timeReadNormal / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + rnstTags := copyTags(tags) + rnstTags["operation"] = "read_normal" + sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeReadNormal / picoSeconds + acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, rnstTags) - tags["operation"] = "read_with_shared_locks" - fields["sql_lock_waits_seconds_total"] = timeReadWithSharedLocks / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + rwslstTags := copyTags(tags) + rwslstTags["operation"] = "read_with_shared_locks" + sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeReadWithSharedLocks / picoSeconds + acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, rwslstTags) - tags["operation"] = "read_high_priority" - fields["sql_lock_waits_seconds_total"] = timeReadHighPriority / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + rhpTags := copyTags(tags) + rhpTags["operation"] = "read_high_priority" + sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeReadHighPriority / picoSeconds + acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, rhpTags) - tags["operation"] = "read_no_insert" - fields["sql_lock_waits_seconds_total"] = timeReadNoInsert / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + rnistTags := copyTags(tags) + rnistTags["operation"] = "read_no_insert" + sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeReadNoInsert / picoSeconds + acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, rnistTags) - tags["operation"] = "write_normal" - fields["sql_lock_waits_seconds_total"] = timeWriteNormal / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + wnstTags := copyTags(tags) + wnstTags["operation"] = "write_normal" + sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeWriteNormal / picoSeconds + acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, wnstTags) - tags["operation"] = "write_allow_write" - fields["sql_lock_waits_seconds_total"] = timeWriteAllowWrite / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + wawstTags := copyTags(tags) + wawstTags["operation"] = "write_allow_write" + sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeWriteAllowWrite / picoSeconds + acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, wawstTags) - tags["operation"] = "write_concurrent_insert" - fields["sql_lock_waits_seconds_total"] = timeWriteConcurrentInsert / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + wcistTags := copyTags(tags) + wcistTags["operation"] = "write_concurrent_insert" + sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeWriteConcurrentInsert / picoSeconds + acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, wcistTags) - tags["operation"] = "write_delayed" - fields["sql_lock_waits_seconds_total"] = timeWriteDelayed / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + wdstTags := copyTags(tags) + wdstTags["operation"] = "write_delayed" + sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeWriteDelayed / picoSeconds + acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, wdstTags) - tags["operation"] = "write_low_priority" - fields["sql_lock_waits_seconds_total"] = timeWriteLowPriority / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + wlpstTags := copyTags(tags) + wlpstTags["operation"] = "write_low_priority" + sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeWriteLowPriority / picoSeconds + acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, wlpstTags) - delete(fields, "sql_lock_waits_seconds_total") + externalLWSecTotalFields := make(map[string]interface{}) - tags["operation"] = "read" - fields["external_lock_waits_seconds_total"] = timeReadExternal / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + rstTags := copyTags(tags) + rstTags["operation"] = "read" + externalLWSecTotalFields["external_lock_waits_seconds_total"] = timeReadExternal / picoSeconds + acc.AddFields("mysql_perf_schema", externalLWSecTotalFields, rstTags) - tags["operation"] = "write" - fields["external_lock_waits_seconds_total"] = timeWriteExternal / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + wstTags := copyTags(tags) + wstTags["operation"] = "write" + externalLWSecTotalFields["external_lock_waits_seconds_total"] = timeWriteExternal / picoSeconds + acc.AddFields("mysql_perf_schema", externalLWSecTotalFields, wstTags) } return nil } @@ -1328,22 +1350,25 @@ func (m *Mysql) gatherPerfFileEventsStatuses(db *sql.DB, serv string, acc telegr tags["event_name"] = eventName fields := make(map[string]interface{}) - tags["mode"] = "misc" + miscTags := copyTags(tags) + miscTags["mode"] = "misc" fields["file_events_total"] = countWrite fields["file_events_seconds_total"] = sumTimerMisc / picoSeconds - acc.AddFields("mysql_perf_schema", fields, tags) + acc.AddFields("mysql_perf_schema", fields, miscTags) - tags["mode"] = "read" + readTags := copyTags(tags) + readTags["mode"] = "read" fields["file_events_total"] = countRead fields["file_events_seconds_total"] = sumTimerRead / picoSeconds fields["file_events_bytes_totals"] = sumNumBytesRead - acc.AddFields("mysql_perf_schema", fields, tags) + acc.AddFields("mysql_perf_schema", fields, readTags) - tags["mode"] = "write" + writeTags := copyTags(tags) + writeTags["mode"] = "write" fields["file_events_total"] = countWrite fields["file_events_seconds_total"] = sumTimerWrite / picoSeconds fields["file_events_bytes_totals"] = sumNumBytesWrite - acc.AddFields("mysql_perf_schema", fields, tags) + acc.AddFields("mysql_perf_schema", fields, writeTags) } return nil @@ -1489,19 +1514,22 @@ func (m *Mysql) gatherTableSchema(db *sql.DB, serv string, acc telegraf.Accumula tags := map[string]string{"server": servtag} tags["schema"] = tableSchema tags["table"] = tableName - versionTags := tags acc.Add(newNamespace("info_schema", "table_rows"), tableRows, tags) - tags["component"] = "data_length" - acc.Add(newNamespace("info_schema", "table_size", "data_length"), dataLength, tags) + dlTags := copyTags(tags) + dlTags["component"] = "data_length" + acc.Add(newNamespace("info_schema", "table_size", "data_length"), dataLength, dlTags) - tags["component"] = "index_length" - acc.Add(newNamespace("info_schema", "table_size", "index_length"), indexLength, tags) + ilTags := copyTags(tags) + ilTags["component"] = "index_length" + acc.Add(newNamespace("info_schema", "table_size", "index_length"), indexLength, ilTags) - tags["component"] = "data_free" - acc.Add(newNamespace("info_schema", "table_size", "data_free"), dataFree, tags) + dfTags := copyTags(tags) + dfTags["component"] = "data_free" + acc.Add(newNamespace("info_schema", "table_size", "data_free"), dataFree, dfTags) + versionTags := copyTags(tags) versionTags["type"] = tableType versionTags["engine"] = engine versionTags["row_format"] = rowFormat @@ -1567,6 +1595,14 @@ func newNamespace(words ...string) string { return strings.Replace(strings.Join(words, "_"), " ", "_", -1) } +func copyTags(in map[string]string) map[string]string { + out := make(map[string]string) + for k, v := range in { + out[k] = v + } + return out +} + func init() { inputs.Add("mysql", func() telegraf.Input { return &Mysql{} From a2d1ee08d486e6b0556ab81c5f9bc354d1f4c135 Mon Sep 17 00:00:00 2001 From: Maksadbek Date: Mon, 11 Apr 2016 01:19:06 +0500 Subject: [PATCH 22/84] transposed the matrix of tags/fields for Lock Waits stats gathering --- plugins/inputs/mysql/mysql.go | 155 +++++++++------------------------- 1 file changed, 42 insertions(+), 113 deletions(-) diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index 21dd92de9..d228ccd74 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -1157,123 +1157,52 @@ func (m *Mysql) gatherPerfTableLockWaits(db *sql.DB, serv string, acc telegraf.A "schema": objectSchema, "table": objectName, } - sqlLWFields := make(map[string]interface{}) - rnTags := copyTags(tags) - rnTags["operation"] = "read_normal" - sqlLWFields["sql_lock_waits_total"] = countReadNormal - acc.AddFields("mysql_perf_schema", sqlLWFields, rnTags) + sqlLWTags := copyTags(tags) + sqlLWTags["perf_query"] = "sql_lock_waits_total" + sqlLWFields := map[string]interface{}{ + "read_normal": countReadNormal, + "read_with_shared_locks": countReadWithSharedLocks, + "read_high_priority": countReadHighPriority, + "read_no_insert": countReadNoInsert, + "write_normal": countWriteNormal, + "write_allow_write": countWriteAllowWrite, + "write_concurrent_insert": countWriteConcurrentInsert, + "write_delayed": countWriteDelayed, + "write_low_priority": countWriteLowPriority, + } + acc.AddFields("mysql_perf_schema", sqlLWFields, sqlLWTags) - rwslTags := copyTags(tags) - rwslTags["operation"] = "read_with_shared_locks" - sqlLWFields["sql_lock_waits_total"] = countReadWithSharedLocks - acc.AddFields("mysql_perf_schema", sqlLWFields, rwslTags) + externalLWTags := copyTags(tags) + externalLWTags["perf_query"] = "external_lock_waits_total" + externalLWFields := map[string]interface{}{ + "read": countReadExternal, + "write": countWriteExternal, + } + acc.AddFields("mysql_perf_schema", externalLWFields, externalLWTags) - rhptTags := copyTags(tags) - rhptTags["operation"] = "read_high_priority" - sqlLWFields["sql_lock_waits_total"] = countReadHighPriority - acc.AddFields("mysql_perf_schema", sqlLWFields, rhptTags) + sqlLWSecTotalTags := copyTags(tags) + sqlLWSecTotalTags["perf_query"] = "sql_lock_waits_seconds_total" + sqlLWSecTotalFields := map[string]interface{}{ + "read_normal": timeReadNormal / picoSeconds, + "read_with_shared_locks": timeReadWithSharedLocks / picoSeconds, + "read_high_priority": timeReadHighPriority / picoSeconds, + "read_no_insert": timeReadNoInsert / picoSeconds, + "write_normal": timeWriteNormal / picoSeconds, + "write_allow_write": timeWriteAllowWrite / picoSeconds, + "write_concurrent_insert": timeWriteConcurrentInsert / picoSeconds, + "write_delayed": timeWriteDelayed / picoSeconds, + "write_low_priority": timeWriteLowPriority / picoSeconds, + } + acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, sqlLWSecTotalTags) - rniTags := copyTags(tags) - rniTags["operation"] = "read_no_insert" - sqlLWFields["sql_lock_waits_total"] = countReadNoInsert - acc.AddFields("mysql_perf_schema", sqlLWFields, tags) - - wnTags := copyTags(tags) - wnTags["operation"] = "write_normal" - sqlLWFields["sql_lock_waits_total"] = countWriteNormal - acc.AddFields("mysql_perf_schema", sqlLWFields, wnTags) - - wawTags := copyTags(tags) - wawTags["operation"] = "write_allow_write" - sqlLWFields["sql_lock_waits_total"] = countWriteAllowWrite - acc.AddFields("mysql_perf_schema", sqlLWFields, wawTags) - - wciTags := copyTags(tags) - wciTags["operation"] = "write_concurrent_insert" - sqlLWFields["sql_lock_waits_total"] = countWriteConcurrentInsert - acc.AddFields("mysql_perf_schema", sqlLWFields, wciTags) - - wdTags := copyTags(tags) - wdTags["operation"] = "write_delayed" - sqlLWFields["sql_lock_waits_total"] = countWriteDelayed - acc.AddFields("mysql_perf_schema", sqlLWFields, wdTags) - - wlpTags := copyTags(tags) - wlpTags["operation"] = "write_low_priority" - sqlLWFields["sql_lock_waits_total"] = countWriteLowPriority - acc.AddFields("mysql_perf_schema", sqlLWFields, wlpTags) - - externalLWFields := make(map[string]interface{}) - - rTags := copyTags(tags) - rTags["operation"] = "read" - externalLWFields["external_lock_waits_total"] = countReadExternal - acc.AddFields("mysql_perf_schema", externalLWFields, rTags) - - wTags := copyTags(tags) - wTags["operation"] = "write" - externalLWFields["external_lock_waits_total"] = countWriteExternal - acc.AddFields("mysql_perf_schema", externalLWFields, wTags) - - sqlLWSecTotalFields := make(map[string]interface{}) - - rnstTags := copyTags(tags) - rnstTags["operation"] = "read_normal" - sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeReadNormal / picoSeconds - acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, rnstTags) - - rwslstTags := copyTags(tags) - rwslstTags["operation"] = "read_with_shared_locks" - sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeReadWithSharedLocks / picoSeconds - acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, rwslstTags) - - rhpTags := copyTags(tags) - rhpTags["operation"] = "read_high_priority" - sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeReadHighPriority / picoSeconds - acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, rhpTags) - - rnistTags := copyTags(tags) - rnistTags["operation"] = "read_no_insert" - sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeReadNoInsert / picoSeconds - acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, rnistTags) - - wnstTags := copyTags(tags) - wnstTags["operation"] = "write_normal" - sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeWriteNormal / picoSeconds - acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, wnstTags) - - wawstTags := copyTags(tags) - wawstTags["operation"] = "write_allow_write" - sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeWriteAllowWrite / picoSeconds - acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, wawstTags) - - wcistTags := copyTags(tags) - wcistTags["operation"] = "write_concurrent_insert" - sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeWriteConcurrentInsert / picoSeconds - acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, wcistTags) - - wdstTags := copyTags(tags) - wdstTags["operation"] = "write_delayed" - sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeWriteDelayed / picoSeconds - acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, wdstTags) - - wlpstTags := copyTags(tags) - wlpstTags["operation"] = "write_low_priority" - sqlLWSecTotalFields["sql_lock_waits_seconds_total"] = timeWriteLowPriority / picoSeconds - acc.AddFields("mysql_perf_schema", sqlLWSecTotalFields, wlpstTags) - - externalLWSecTotalFields := make(map[string]interface{}) - - rstTags := copyTags(tags) - rstTags["operation"] = "read" - externalLWSecTotalFields["external_lock_waits_seconds_total"] = timeReadExternal / picoSeconds - acc.AddFields("mysql_perf_schema", externalLWSecTotalFields, rstTags) - - wstTags := copyTags(tags) - wstTags["operation"] = "write" - externalLWSecTotalFields["external_lock_waits_seconds_total"] = timeWriteExternal / picoSeconds - acc.AddFields("mysql_perf_schema", externalLWSecTotalFields, wstTags) + externalLWSecTotalTags := copyTags(tags) + externalLWSecTotalTags["perf_query"] = "external_lock_waits_seconds_total" + externalLWSecTotalFields := map[string]interface{}{ + "read": timeReadExternal / picoSeconds, + "write": timeWriteExternal / picoSeconds, + } + acc.AddFields("mysql_perf_schema", externalLWSecTotalFields, externalLWSecTotalTags) } return nil } From b95a90dbd60ae2f658547efb6b5020d4d4824b1c Mon Sep 17 00:00:00 2001 From: Maksadbek Date: Thu, 14 Apr 2016 01:58:42 +0500 Subject: [PATCH 23/84] updated README for mysql input plugin closes #889 closes #403 --- CHANGELOG.md | 1 + plugins/inputs/mysql/README.md | 190 +++++++++++++++++++++++++++++++++ plugins/inputs/mysql/mysql.go | 66 ++++++++---- 3 files changed, 235 insertions(+), 22 deletions(-) create mode 100644 plugins/inputs/mysql/README.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c8f611f0..06dd76969 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ based on _prefix_ in addition to globs. This means that a filter like ### Features - [#1017](https://github.com/influxdata/telegraf/pull/1017): taginclude and tagexclude arguments. - [#1015](https://github.com/influxdata/telegraf/pull/1015): Docker plugin schema refactor. +- [#889](https://github.com/influxdata/telegraf/pull/889): Improved MySQL plugin. Thanks @maksadbek! ### Bugfixes - [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78! diff --git a/plugins/inputs/mysql/README.md b/plugins/inputs/mysql/README.md new file mode 100644 index 000000000..b4af1de80 --- /dev/null +++ b/plugins/inputs/mysql/README.md @@ -0,0 +1,190 @@ +# MySQL Input plugin + +This plugin gathers the statistic data from MySQL server + +* Global statuses +* Global variables +* Slave statuses +* Binlog size +* Process list +* Info schema auto increment columns +* Table I/O waits +* Index I/O waits +* Perf Schema table lock waits +* Perf Schema event waits +* Perf Schema events statements +* File events statistics +* Table schema statistics + +## Configuration + +``` +# Read metrics from one or many mysql servers +[[inputs.mysql]] + ## specify servers via a url matching: + ## [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify]] + ## see https://github.com/go-sql-driver/mysql#dsn-data-source-name + ## e.g. + ## root:passwd@tcp(127.0.0.1:3306)/?tls=false + ## root@tcp(127.0.0.1:3306)/?tls=false + # + ## If no servers are specified, then localhost is used as the host. + servers = ["tcp(127.0.0.1:3306)/"] + ## the limits for metrics form perf_events_statements + perf_events_statements_digest_text_limit = 120 + perf_events_statements_limit = 250 + perf_events_statements_time_limit = 86400 + # + ## if the list is empty, then metrics are gathered from all database tables + table_schema_databases = [] + # + ## gather metrics from INFORMATION_SCHEMA.TABLES for databases provided above list + gather_table_schema = false + # + ## gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST + gather_process_list = true + # + ## gather auto_increment columns and max values from information schema + gather_info_schema_auto_inc = true + # + ## gather metrics from SHOW SLAVE STATUS command output + gather_slave_status = true + # + ## gather metrics from SHOW BINARY LOGS command output + gather_binary_logs = false + # + ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_TABLE + gather_table_io_waits = false + # + ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_INDEX_USAGE + gather_index_io_waits = false + # + ## gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME + gather_file_events_stats = false + # + ## gather metrics from PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST + gather_perf_events_statements = false + # + ## Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES) + interval_slow = "30m" +``` + +## Measurements & Fields +* Global statuses - all numeric and boolean values of `SHOW GLOBAL STATUSES` +* Global variables - all numeric and boolean values of `SHOW GLOBAL VARIABLES` +* Slave status - metrics from `SHOW SLAVE STATUS` the metrics are gathered when +the single-source replication is on. If the multi-source replication is set, +then everything works differently, this metric does not work with multi-source +replication. + * slave_[column name]() +* Binary logs - all metrics including size and count of all binary files. +Requires to be turned on in configuration. + * binary_size_bytes(int, number) + * binary_files_count(int, number) +* Process list - connection metrics from processlist for each user. It has the following tags + * connections(int, number) +* Perf Table IO waits - total count and time of I/O waits event for each table +and process. It has following fields: + * table_io_waits_total_fetch(float, number) + * table_io_waits_total_insert(float, number) + * table_io_waits_total_update(float, number) + * table_io_waits_total_delete(float, number) + * table_io_waits_seconds_total_fetch(float, milliseconds) + * table_io_waits_seconds_total_insert(float, milliseconds) + * table_io_waits_seconds_total_update(float, milliseconds) + * table_io_waits_seconds_total_delete(float, milliseconds) +* Perf index IO waits - total count and time of I/O waits event for each index +and process. It has following fields: + * index_io_waits_total_fetch(float, number) + * index_io_waits_seconds_total_fetch(float, milliseconds) + * index_io_waits_total_insert(float, number) + * index_io_waits_total_update(float, number) + * index_io_waits_total_delete(float, number) + * index_io_waits_seconds_total_insert(float, milliseconds) + * index_io_waits_seconds_total_update(float, milliseconds) + * index_io_waits_seconds_total_delete(float, milliseconds) +* Info schema autoincrement statuses - autoincrement fields and max values +for them. It has following fields: + * auto_increment_column(int, number) + * auto_increment_column_max(int, number) +* Perf table lock waits - gathers total number and time for SQL and external +lock waits events for each table and operation. It has following fields. +The unit of fields varies by the tags. + * read_normal(float, number/milliseconds) + * read_with_shared_locks(float, number/milliseconds) + * read_high_priority(float, number/milliseconds) + * read_no_insert(float, number/milliseconds) + * write_normal(float, number/milliseconds) + * write_allow_write(float, number/milliseconds) + * write_concurrent_insert(float, number/milliseconds) + * write_delayed(float, number/milliseconds) + * write_low_priority(float, number/milliseconds) + * read(float, number/milliseconds) + * write(float, number/milliseconds) +* Perf events waits - gathers total time and number of event waits + * events_waits_total(float, number) + * events_waits_seconds_total(float, milliseconds) +* Perf file events statuses - gathers file events statuses + * file_events_total(float,number) + * file_events_seconds_total(float, milliseconds) + * file_events_bytes_total(float, bytes) +* Perf file events statements - gathers attributes of each event + * events_statements_total(float, number) + * events_statements_seconds_total(float, millieconds) + * events_statements_errors_total(float, number) + * events_statements_warnings_total(float, number) + * events_statements_rows_affected_total(float, number) + * events_statements_rows_sent_total(float, number) + * events_statements_rows_examined_total(float, number) + * events_statements_tmp_tables_total(float, number) + * events_statements_tmp_disk_tables_total(float, number) + * events_statements_sort_merge_passes_totales(float, number) + * events_statements_sort_rows_total(float, number) + * events_statements_no_index_used_total(float, number) +* Table schema - gathers statistics of each schema. It has following measurements + * info_schema_table_rows(float, number) + * info_schema_table_size_data_length(float, number) + * info_schema_table_size_index_length(float, number) + * info_schema_table_size_data_free(float, number) + * info_schema_table_version(float, number) + +## Tags +* All measurements has following tags + * server (the host name from which the metrics are gathered) +* Process list measurement has following tags + * user (username for whom the metrics are gathered) +* Perf table IO waits measurement has following tags + * schema + * name (object name for event or process) +* Perf index IO waits has following tags + * schema + * name + * index +* Info schema autoincrement statuses has following tags + * schema + * table + * column +* Perf table lock waits has following tags + * schema + * table + * sql_lock_waits_total(fields including this tag have numeric unit) + * external_lock_waits_total(fields including this tag have numeric unit) + * sql_lock_waits_seconds_total(fields including this tag have millisecond unit) + * external_lock_waits_seconds_total(fields including this tag have millisecond unit) +* Perf events statements has following tags + * event_name +* Perf file events statuses has following tags + * event_name + * mode +* Perf file events statements has following tags + * schema + * digest + * digest_text +* Table schema has following tags + * schema + * table + * component + * type + * engine + * row_format + * create_options diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index d228ccd74..a7254e250 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -33,30 +33,52 @@ type Mysql struct { } var sampleConfig = ` - # specify servers via a url matching: - # [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify]] - # see https://github.com/go-sql-driver/mysql#dsn-data-source-name - # e.g. - # root:passwd@tcp(127.0.0.1:3306)/?tls=false - # root@tcp(127.0.0.1:3306)/?tls=false + ## specify servers via a url matching: + ## [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify]] + ## see https://github.com/go-sql-driver/mysql#dsn-data-source-name + ## e.g. + ## root:passwd@tcp(127.0.0.1:3306)/?tls=false + ## root@tcp(127.0.0.1:3306)/?tls=false # - # If no servers are specified, then localhost is used as the host. + ## If no servers are specified, then localhost is used as the host. servers = ["tcp(127.0.0.1:3306)/"] - perf_events_statements_digest_text_limit = 120 - perf_events_statements_limit = 250 - perf_events_statements_time_limit = 86400 - table_schema_databases = [] - gather_process_list = true - gather_info_schema_auto_inc = true - gather_slave_status = true - gather_binary_logs = false - gather_table_io_waits = false - gather_index_io_waits = false - gather_table_schema = false - gather_file_events_stats = false - gather_perf_events_statements = false - # Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES) - interval_slow = "30m" + ## the limits for metrics form perf_events_statements + perf_events_statements_digest_text_limit = 120 + perf_events_statements_limit = 250 + perf_events_statements_time_limit = 86400 + # + ## if the list is empty, then metrics are gathered from all databasee tables + table_schema_databases = [] + # + ## gather metrics from INFORMATION_SCHEMA.TABLES for databases provided above list + gather_table_schema = false + # + ## gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST + gather_process_list = true + # + ## gather auto_increment columns and max values from information schema + gather_info_schema_auto_inc = true + # + ## gather metrics from SHOW SLAVE STATUS command output + gather_slave_status = true + # + ## gather metrics from SHOW BINARY LOGS command output + gather_binary_logs = false + # + ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_TABLE + gather_table_io_waits = false + # + ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_INDEX_USAGE + gather_index_io_waits = false + # + ## gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME + gather_file_events_stats = false + # + ## gather metrics from PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST + gather_perf_events_statements = false + # + ## Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES) + interval_slow = "30m" ` var defaultTimeout = time.Second * time.Duration(5) From 708a97d77355559a43397556199f755dedea86a6 Mon Sep 17 00:00:00 2001 From: Eugene Chupriyanov Date: Tue, 12 Apr 2016 20:20:27 +0300 Subject: [PATCH 24/84] Try to reconnect to Riemann if metrics upload failed. Signed-off-by: Eugene Chupriyanov Error checks added Don't Close() nil client Signed-off-by: Eugene Chupriyanov --- plugins/outputs/riemann/riemann.go | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/plugins/outputs/riemann/riemann.go b/plugins/outputs/riemann/riemann.go index c805bbd00..88c4b20de 100644 --- a/plugins/outputs/riemann/riemann.go +++ b/plugins/outputs/riemann/riemann.go @@ -1,7 +1,6 @@ package riemann import ( - "errors" "fmt" "os" "sort" @@ -33,6 +32,7 @@ func (r *Riemann) Connect() error { c, err := raidman.Dial(r.Transport, r.URL) if err != nil { + r.client = nil return err } @@ -41,7 +41,11 @@ func (r *Riemann) Connect() error { } func (r *Riemann) Close() error { + if r.client == nil { + return nil + } r.client.Close() + r.client = nil return nil } @@ -58,6 +62,13 @@ func (r *Riemann) Write(metrics []telegraf.Metric) error { return nil } + if r.client == nil { + err := r.Connect() + if err != nil { + return fmt.Errorf("FAILED to (re)connect to Riemann. Error: %s\n", err) + } + } + var events []*raidman.Event for _, p := range metrics { evs := buildEvents(p, r.Separator) @@ -68,8 +79,16 @@ func (r *Riemann) Write(metrics []telegraf.Metric) error { var senderr = r.client.SendMulti(events) if senderr != nil { - return errors.New(fmt.Sprintf("FAILED to send riemann message: %s\n", - senderr)) + r.Close() // always retuns nil + connerr := r.Connect() + if connerr != nil { + return fmt.Errorf("FAILED to (re)connect to Riemann. Error: %s\n", connerr) + } + senderr = r.client.SendMulti(events) + if senderr != nil { + return fmt.Errorf("FAILED to send riemann message (will try to reconnect). Error: %s\n", + senderr) + } } return nil From c737a19d9f6ab6094a4e3128ab170123b30f9c9e Mon Sep 17 00:00:00 2001 From: Eugene Chupriyanov Date: Tue, 19 Apr 2016 01:08:18 +0300 Subject: [PATCH 25/84] Just close Riemann client on send metrics failure Signed-off-by: Eugene Chupriyanov closes #1013 --- CHANGELOG.md | 1 + plugins/outputs/riemann/riemann.go | 11 ++--------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06dd76969..30257749b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ based on _prefix_ in addition to globs. This means that a filter like ### Bugfixes - [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78! +- [#1013](https://github.com/influxdata/telegraf/pull/1013): Close dead riemann output connections. Thanks @echupriyanov! ## v0.12.1 [2016-04-14] diff --git a/plugins/outputs/riemann/riemann.go b/plugins/outputs/riemann/riemann.go index 88c4b20de..bc49a7191 100644 --- a/plugins/outputs/riemann/riemann.go +++ b/plugins/outputs/riemann/riemann.go @@ -80,15 +80,8 @@ func (r *Riemann) Write(metrics []telegraf.Metric) error { var senderr = r.client.SendMulti(events) if senderr != nil { r.Close() // always retuns nil - connerr := r.Connect() - if connerr != nil { - return fmt.Errorf("FAILED to (re)connect to Riemann. Error: %s\n", connerr) - } - senderr = r.client.SendMulti(events) - if senderr != nil { - return fmt.Errorf("FAILED to send riemann message (will try to reconnect). Error: %s\n", - senderr) - } + return fmt.Errorf("FAILED to send riemann message (will try to reconnect). Error: %s\n", + senderr) } return nil From 92e57ee06ca1dcd050572d3cfc195f8c3166a8ad Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Apr 2016 19:13:45 -0600 Subject: [PATCH 26/84] Set default tags in test accumulator closes #1012 --- CHANGELOG.md | 1 + agent/agent.go | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30257749b..4945a3bbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ based on _prefix_ in addition to globs. This means that a filter like ### Bugfixes - [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78! - [#1013](https://github.com/influxdata/telegraf/pull/1013): Close dead riemann output connections. Thanks @echupriyanov! +- [#1012](https://github.com/influxdata/telegraf/pull/1012): Set default tags in test accumulator. ## v0.12.1 [2016-04-14] diff --git a/agent/agent.go b/agent/agent.go index fdd17a267..60f2d63c6 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -221,6 +221,7 @@ func (a *Agent) Test() error { for _, input := range a.Config.Inputs { acc := NewAccumulator(input.Config, metricC) acc.SetDebug(true) + acc.setDefaultTags(a.Config.Tags) fmt.Printf("* Plugin: %s, Collection 1\n", input.Name) if input.Config.Interval != 0 { From 8cc72368ca4de53610e9d891f6634b8d70385e7d Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Apr 2016 22:23:48 -0600 Subject: [PATCH 27/84] influxdb output: close connections & dont always overwrite closes #1058 closes #1059 also see https://github.com/influxdata/influxdb/pull/6425 --- CHANGELOG.md | 6 +++- Godeps | 2 +- plugins/outputs/influxdb/influxdb.go | 43 +++++++++++++++++++--------- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4945a3bbd..fdd8cf98f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## v0.13 [unreleased] ### Release Notes + - **Breaking Change**: docker plugin tags. The cont_id tag no longer exists, it will now be a field, and be called container_id. Additionally, cont_image and cont_name are being renamed to container_image and container_name. @@ -15,7 +16,7 @@ So adding "container" to each metric will: (1) make it more clear that these metrics are per-container, and (2) allow users to easily drop per-container metrics if cardinality is an issue (`namedrop = ["docker_container_*"]`) -- `tagexclude` and `tagexclude` are now available, which can be used to remove +- `tagexclude` and `taginclude` are now available, which can be used to remove tags from measurements on inputs and outputs. See [the configuration doc](https://github.com/influxdata/telegraf/blob/master/docs/CONFIGURATION.md) for more details. @@ -25,14 +26,17 @@ based on _prefix_ in addition to globs. This means that a filter like `fielddrop = ["time_"]` will need to be changed to `fielddrop = ["time_*"]` ### Features + - [#1017](https://github.com/influxdata/telegraf/pull/1017): taginclude and tagexclude arguments. - [#1015](https://github.com/influxdata/telegraf/pull/1015): Docker plugin schema refactor. - [#889](https://github.com/influxdata/telegraf/pull/889): Improved MySQL plugin. Thanks @maksadbek! ### Bugfixes + - [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78! - [#1013](https://github.com/influxdata/telegraf/pull/1013): Close dead riemann output connections. Thanks @echupriyanov! - [#1012](https://github.com/influxdata/telegraf/pull/1012): Set default tags in test accumulator. +- [#1058](https://github.com/influxdata/telegraf/issues/1058): Fix possible leaky TCP connections in influxdb output. ## v0.12.1 [2016-04-14] diff --git a/Godeps b/Godeps index 71057f497..926adcb74 100644 --- a/Godeps +++ b/Godeps @@ -24,7 +24,7 @@ github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478 github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da -github.com/influxdata/influxdb e3fef5593c21644f2b43af55d6e17e70910b0e48 +github.com/influxdata/influxdb 21db76b3374c733f37ed16ad93f3484020034351 github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0 github.com/klauspost/crc32 19b0b332c9e4516a6370a0456e6182c3b5036720 github.com/lib/pq e182dc4027e2ded4b19396d638610f2653295f36 diff --git a/plugins/outputs/influxdb/influxdb.go b/plugins/outputs/influxdb/influxdb.go index 626635a3b..891c752bd 100644 --- a/plugins/outputs/influxdb/influxdb.go +++ b/plugins/outputs/influxdb/influxdb.go @@ -125,13 +125,9 @@ func (i *InfluxDB) Connect() error { return err } - // Create Database if it doesn't exist - _, e := c.Query(client.Query{ - Command: fmt.Sprintf("CREATE DATABASE IF NOT EXISTS \"%s\"", i.Database), - }) - - if e != nil { - log.Println("Database creation failed: " + e.Error()) + err = createDatabase(c, i.Database) + if err != nil { + log.Println("Database creation failed: " + err.Error()) continue } @@ -144,8 +140,24 @@ func (i *InfluxDB) Connect() error { return nil } +func createDatabase(c client.Client, database string) error { + // Create Database if it doesn't exist + _, err := c.Query(client.Query{ + Command: fmt.Sprintf("CREATE DATABASE IF NOT EXISTS \"%s\"", database), + }) + return err +} + func (i *InfluxDB) Close() error { - // InfluxDB client does not provide a Close() function + var errS string + for j, _ := range i.conns { + if err := i.conns[j].Close(); err != nil { + errS += err.Error() + } + } + if errS != "" { + return fmt.Errorf("output influxdb close failed: %s", errS) + } return nil } @@ -185,18 +197,21 @@ func (i *InfluxDB) Write(metrics []telegraf.Metric) error { p := rand.Perm(len(i.conns)) for _, n := range p { if e := i.conns[n].Write(bp); e != nil { - log.Println("ERROR: " + e.Error()) + // Log write failure + log.Printf("ERROR: %s", e) + // If the database was not found, try to recreate it + if strings.Contains(e.Error(), "database not found") { + if errc := createDatabase(i.conns[n], i.Database); errc != nil { + log.Printf("ERROR: Database %s not found and failed to recreate\n", + i.Database) + } + } } else { err = nil break } } - // If all of the writes failed, create a new connection array so that - // i.Connect() will be called on the next gather. - if err != nil { - i.conns = make([]client.Client, 0) - } return err } From a585119a67a529280a6a14d26ee49f3a7cbefe14 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 19 Apr 2016 14:46:37 -0600 Subject: [PATCH 28/84] Change prometheus doc to glob match --- plugins/inputs/prometheus/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/inputs/prometheus/README.md b/plugins/inputs/prometheus/README.md index c5c952515..3aa8c8afd 100644 --- a/plugins/inputs/prometheus/README.md +++ b/plugins/inputs/prometheus/README.md @@ -22,7 +22,7 @@ to filter and some tags # An array of urls to scrape metrics from. urls = ["http://my-kube-apiserver:8080/metrics"] # Get only metrics with "apiserver_" string is in metric name - namepass = ["apiserver_"] + namepass = ["apiserver_*"] # Add a metric name prefix name_prefix = "k8s_" # Add tags to be able to make beautiful dashboards From 46543d632305c9fb83beae1db85849eb119c528c Mon Sep 17 00:00:00 2001 From: Larry Kim Date: Sun, 17 Apr 2016 02:00:52 +0900 Subject: [PATCH 29/84] Possible bug fix for oid_key collision closes #1044 --- CHANGELOG.md | 1 + plugins/inputs/snmp/snmp.go | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fdd8cf98f..bcd0efa71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1013](https://github.com/influxdata/telegraf/pull/1013): Close dead riemann output connections. Thanks @echupriyanov! - [#1012](https://github.com/influxdata/telegraf/pull/1012): Set default tags in test accumulator. - [#1058](https://github.com/influxdata/telegraf/issues/1058): Fix possible leaky TCP connections in influxdb output. +- [#1044](https://github.com/influxdata/telegraf/pull/1044): Fix SNMP OID possible collisions. Thanks @relip ## v0.12.1 [2016-04-14] diff --git a/plugins/inputs/snmp/snmp.go b/plugins/inputs/snmp/snmp.go index 3a95e84fa..8ccfe100b 100644 --- a/plugins/inputs/snmp/snmp.go +++ b/plugins/inputs/snmp/snmp.go @@ -733,7 +733,11 @@ func (h *Host) HandleResponse(oids map[string]Data, result *gosnmp.SnmpPacket, a break nextresult } } - if strings.HasPrefix(variable.Name, oid_key) { + // If variable.Name is the same as oid_key + // OR + // the result is SNMP table which "." comes right after oid_key. + // ex: oid_key: .1.3.6.1.2.1.2.2.1.16, variable.Name: .1.3.6.1.2.1.2.2.1.16.1 + if variable.Name == oid_key || strings.HasPrefix(variable.Name, oid_key+".") { switch variable.Type { // handle Metrics case gosnmp.Boolean, gosnmp.Integer, gosnmp.Counter32, gosnmp.Gauge32, From 46aaaa9b704eec7506433a131fd0242c6c02361f Mon Sep 17 00:00:00 2001 From: Victor Garcia Date: Tue, 19 Apr 2016 20:55:03 +0200 Subject: [PATCH 30/84] Adding TTL metrics data closes #1060 --- CHANGELOG.md | 1 + plugins/inputs/mongodb/README.md | 5 ++- plugins/inputs/mongodb/mongodb_data.go | 34 +++++++++--------- plugins/inputs/mongodb/mongodb_data_test.go | 40 +++++++++++---------- plugins/inputs/mongodb/mongostat.go | 20 +++++++++++ 5 files changed, 63 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bcd0efa71..35a649db5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1017](https://github.com/influxdata/telegraf/pull/1017): taginclude and tagexclude arguments. - [#1015](https://github.com/influxdata/telegraf/pull/1015): Docker plugin schema refactor. - [#889](https://github.com/influxdata/telegraf/pull/889): Improved MySQL plugin. Thanks @maksadbek! +- [#1060](https://github.com/influxdata/telegraf/pull/1060): TTL metrics added to MongoDB input plugin ### Bugfixes diff --git a/plugins/inputs/mongodb/README.md b/plugins/inputs/mongodb/README.md index 56fe73840..868b51528 100644 --- a/plugins/inputs/mongodb/README.md +++ b/plugins/inputs/mongodb/README.md @@ -48,6 +48,5 @@ and create a single measurement containing values e.g. * resident_megabytes * updates_per_sec * vsize_megabytes - - - + * ttl_deletes_per_sec + * ttl_passes_per_sec diff --git a/plugins/inputs/mongodb/mongodb_data.go b/plugins/inputs/mongodb/mongodb_data.go index 1a951806d..2bbeabd94 100644 --- a/plugins/inputs/mongodb/mongodb_data.go +++ b/plugins/inputs/mongodb/mongodb_data.go @@ -26,22 +26,24 @@ func NewMongodbData(statLine *StatLine, tags map[string]string) *MongodbData { } var DefaultStats = map[string]string{ - "inserts_per_sec": "Insert", - "queries_per_sec": "Query", - "updates_per_sec": "Update", - "deletes_per_sec": "Delete", - "getmores_per_sec": "GetMore", - "commands_per_sec": "Command", - "flushes_per_sec": "Flushes", - "vsize_megabytes": "Virtual", - "resident_megabytes": "Resident", - "queued_reads": "QueuedReaders", - "queued_writes": "QueuedWriters", - "active_reads": "ActiveReaders", - "active_writes": "ActiveWriters", - "net_in_bytes": "NetIn", - "net_out_bytes": "NetOut", - "open_connections": "NumConnections", + "inserts_per_sec": "Insert", + "queries_per_sec": "Query", + "updates_per_sec": "Update", + "deletes_per_sec": "Delete", + "getmores_per_sec": "GetMore", + "commands_per_sec": "Command", + "flushes_per_sec": "Flushes", + "vsize_megabytes": "Virtual", + "resident_megabytes": "Resident", + "queued_reads": "QueuedReaders", + "queued_writes": "QueuedWriters", + "active_reads": "ActiveReaders", + "active_writes": "ActiveWriters", + "net_in_bytes": "NetIn", + "net_out_bytes": "NetOut", + "open_connections": "NumConnections", + "ttl_deletes_per_sec": "DeletedDocuments", + "ttl_passes_per_sec": "Passes", } var DefaultReplStats = map[string]string{ diff --git a/plugins/inputs/mongodb/mongodb_data_test.go b/plugins/inputs/mongodb/mongodb_data_test.go index 3166ab018..e63d2025d 100644 --- a/plugins/inputs/mongodb/mongodb_data_test.go +++ b/plugins/inputs/mongodb/mongodb_data_test.go @@ -13,24 +13,26 @@ var tags = make(map[string]string) func TestAddNonReplStats(t *testing.T) { d := NewMongodbData( &StatLine{ - StorageEngine: "", - Time: time.Now(), - Insert: 0, - Query: 0, - Update: 0, - Delete: 0, - GetMore: 0, - Command: 0, - Flushes: 0, - Virtual: 0, - Resident: 0, - QueuedReaders: 0, - QueuedWriters: 0, - ActiveReaders: 0, - ActiveWriters: 0, - NetIn: 0, - NetOut: 0, - NumConnections: 0, + StorageEngine: "", + Time: time.Now(), + Insert: 0, + Query: 0, + Update: 0, + Delete: 0, + GetMore: 0, + Command: 0, + Flushes: 0, + Virtual: 0, + Resident: 0, + QueuedReaders: 0, + QueuedWriters: 0, + ActiveReaders: 0, + ActiveWriters: 0, + NetIn: 0, + NetOut: 0, + NumConnections: 0, + Passes: 0, + DeletedDocuments: 0, }, tags, ) @@ -128,6 +130,8 @@ func TestStateTag(t *testing.T) { "resident_megabytes": int64(0), "updates_per_sec": int64(0), "vsize_megabytes": int64(0), + "ttl_deletes_per_sec": int64(0), + "ttl_passes_per_sec": int64(0), } acc.AssertContainsTaggedFields(t, "mongodb", fields, stateTags) } diff --git a/plugins/inputs/mongodb/mongostat.go b/plugins/inputs/mongodb/mongostat.go index e12d797d0..33cd8254e 100644 --- a/plugins/inputs/mongodb/mongostat.go +++ b/plugins/inputs/mongodb/mongostat.go @@ -54,6 +54,7 @@ type ServerStatus struct { ShardCursorType map[string]interface{} `bson:"shardCursorType"` StorageEngine map[string]string `bson:"storageEngine"` WiredTiger *WiredTiger `bson:"wiredTiger"` + Metrics *MetricsStats `bson:"metrics"` } // WiredTiger stores information related to the WiredTiger storage engine. @@ -194,6 +195,17 @@ type OpcountStats struct { Command int64 `bson:"command"` } +// MetricsStats stores information related to metrics +type MetricsStats struct { + TTL *TTLStats `bson:"ttl"` +} + +// TTLStats stores information related to documents with a ttl index. +type TTLStats struct { + DeletedDocuments int64 `bson:"deletedDocuments"` + Passes int64 `bson:"passes"` +} + // ReadWriteLockTimes stores time spent holding read/write locks. type ReadWriteLockTimes struct { Read int64 `bson:"R"` @@ -332,6 +344,9 @@ type StatLine struct { // Opcounter fields Insert, Query, Update, Delete, GetMore, Command int64 + // TTL fields + Passes, DeletedDocuments int64 + // Collection locks (3.0 mmap only) CollectionLocks *CollectionLockStatus @@ -423,6 +438,11 @@ func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs returnVal.Command = diff(newStat.Opcounters.Command, oldStat.Opcounters.Command, sampleSecs) } + if newStat.Metrics.TTL != nil && oldStat.Metrics.TTL != nil { + returnVal.Passes = diff(newStat.Metrics.TTL.Passes, oldStat.Metrics.TTL.Passes, sampleSecs) + returnVal.DeletedDocuments = diff(newStat.Metrics.TTL.DeletedDocuments, oldStat.Metrics.TTL.DeletedDocuments, sampleSecs) + } + if newStat.OpcountersRepl != nil && oldStat.OpcountersRepl != nil { returnVal.InsertR = diff(newStat.OpcountersRepl.Insert, oldStat.OpcountersRepl.Insert, sampleSecs) returnVal.QueryR = diff(newStat.OpcountersRepl.Query, oldStat.OpcountersRepl.Query, sampleSecs) From 9aec58c6b82063cb277c4e1c342f951c730a0da5 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Apr 2016 18:12:58 -0600 Subject: [PATCH 31/84] Don't allow inputs to overwrite host tag closes #1054 This affects tags in the following plugins: - cassandra - disque - rethinkdb --- CHANGELOG.md | 5 +++ agent/accumulator.go | 14 +++---- plugins/inputs/cassandra/cassandra.go | 20 ++-------- plugins/inputs/cassandra/cassandra_test.go | 40 +++++++++---------- plugins/inputs/disque/disque.go | 2 +- plugins/inputs/rethinkdb/rethinkdb_server.go | 4 +- .../inputs/rethinkdb/rethinkdb_server_test.go | 4 +- 7 files changed, 38 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35a649db5..ba75be262 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,10 @@ for more details. only. Previously there was an undocumented behavior where filters would match based on _prefix_ in addition to globs. This means that a filter like `fielddrop = ["time_"]` will need to be changed to `fielddrop = ["time_*"]` +- The following plugins have changed their tags to _not_ overwrite the host tag: + - cassandra: `host -> cassandra_host` + - disque: `host -> disque_host` + - rethinkdb: `host -> rethinkdb_host` ### Features @@ -31,6 +35,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1015](https://github.com/influxdata/telegraf/pull/1015): Docker plugin schema refactor. - [#889](https://github.com/influxdata/telegraf/pull/889): Improved MySQL plugin. Thanks @maksadbek! - [#1060](https://github.com/influxdata/telegraf/pull/1060): TTL metrics added to MongoDB input plugin +- [#1056](https://github.com/influxdata/telegraf/pull/1056): Don't allow inputs to overwrite host tags. ### Bugfixes diff --git a/agent/accumulator.go b/agent/accumulator.go index 6b2ffde2d..70744359f 100644 --- a/agent/accumulator.go +++ b/agent/accumulator.go @@ -84,17 +84,13 @@ func (ac *accumulator) AddFields( if tags == nil { tags = make(map[string]string) } - // Apply plugin-wide tags if set - for k, v := range ac.inputConfig.Tags { - if _, ok := tags[k]; !ok { - tags[k] = v - } - } // Apply daemon-wide tags if set for k, v := range ac.defaultTags { - if _, ok := tags[k]; !ok { - tags[k] = v - } + tags[k] = v + } + // Apply plugin-wide tags if set + for k, v := range ac.inputConfig.Tags { + tags[k] = v } ac.inputConfig.Filter.FilterTags(tags) diff --git a/plugins/inputs/cassandra/cassandra.go b/plugins/inputs/cassandra/cassandra.go index b7525de1a..f49a74ab0 100644 --- a/plugins/inputs/cassandra/cassandra.go +++ b/plugins/inputs/cassandra/cassandra.go @@ -9,17 +9,9 @@ import ( "io/ioutil" "net/http" "net/url" - //"reflect" "strings" ) -/*type Server struct { - Host string - Username string - Password string - Port string -}*/ - type JolokiaClient interface { MakeRequest(req *http.Request) (*http.Response, error) } @@ -55,12 +47,6 @@ type jmxMetric interface { addTagsFields(out map[string]interface{}) } -func addServerTags(host string, tags map[string]string) { - if host != "" && host != "localhost" && host != "127.0.0.1" { - tags["host"] = host - } -} - func newJavaMetric(host string, metric string, acc telegraf.Accumulator) *javaMetric { return &javaMetric{host: host, metric: metric, acc: acc} @@ -120,7 +106,7 @@ func (j javaMetric) addTagsFields(out map[string]interface{}) { tokens := parseJmxMetricRequest(mbean) addTokensToTags(tokens, tags) - addServerTags(j.host, tags) + tags["cassandra_host"] = j.host if _, ok := tags["mname"]; !ok { //Queries for a single value will not return a "name" tag in the response. @@ -148,7 +134,7 @@ func addCassandraMetric(mbean string, c cassandraMetric, fields := make(map[string]interface{}) tokens := parseJmxMetricRequest(mbean) addTokensToTags(tokens, tags) - addServerTags(c.host, tags) + tags["cassandra_host"] = c.host addValuesAsFields(values, fields, tags["mname"]) c.acc.AddFields(tokens["class"]+tokens["type"], fields, tags) @@ -192,7 +178,7 @@ func (j *Cassandra) SampleConfig() string { servers = ["myuser:mypassword@10.10.10.1:8778","10.10.10.2:8778",":8778"] ## List of metrics collected on above servers ## Each metric consists of a jmx path. - ## This will collect all heap memory usage metrics from the jvm and + ## This will collect all heap memory usage metrics from the jvm and ## ReadLatency metrics for all keyspaces and tables. ## "type=Table" in the query works with Cassandra3.0. Older versions might ## need to use "type=ColumnFamily" diff --git a/plugins/inputs/cassandra/cassandra_test.go b/plugins/inputs/cassandra/cassandra_test.go index 184fa3bbb..aa39017fe 100644 --- a/plugins/inputs/cassandra/cassandra_test.go +++ b/plugins/inputs/cassandra/cassandra_test.go @@ -58,7 +58,7 @@ const validCassandraNestedMultiValueJSON = ` "status": 200, "timestamp": 1458089184, "value": { - "org.apache.cassandra.metrics:keyspace=test_keyspace1,name=ReadLatency,scope=test_table1,type=Table": + "org.apache.cassandra.metrics:keyspace=test_keyspace1,name=ReadLatency,scope=test_table1,type=Table": { "999thPercentile": 1.0, "Count": 100, "DurationUnit": "microseconds", @@ -66,7 +66,7 @@ const validCassandraNestedMultiValueJSON = ` "RateUnit": "events/second", "StdDev": null }, - "org.apache.cassandra.metrics:keyspace=test_keyspace2,name=ReadLatency,scope=test_table2,type=Table": + "org.apache.cassandra.metrics:keyspace=test_keyspace2,name=ReadLatency,scope=test_table2,type=Table": { "999thPercentile": 2.0, "Count": 200, "DurationUnit": "microseconds", @@ -163,13 +163,13 @@ func TestHttpJsonJavaMultiValue(t *testing.T) { "HeapMemoryUsage_used": 203288528.0, } tags1 := map[string]string{ - "host": "10.10.10.10", - "mname": "HeapMemoryUsage", + "cassandra_host": "10.10.10.10", + "mname": "HeapMemoryUsage", } tags2 := map[string]string{ - "host": "10.10.10.11", - "mname": "HeapMemoryUsage", + "cassandra_host": "10.10.10.11", + "mname": "HeapMemoryUsage", } acc.AssertContainsTaggedFields(t, "javaMemory", fields, tags1) acc.AssertContainsTaggedFields(t, "javaMemory", fields, tags2) @@ -190,8 +190,8 @@ func TestHttpJsonJavaMultiType(t *testing.T) { } tags := map[string]string{ - "host": "10.10.10.10", - "mname": "ConcurrentMarkSweep", + "cassandra_host": "10.10.10.10", + "mname": "ConcurrentMarkSweep", } acc.AssertContainsTaggedFields(t, "javaGarbageCollector", fields, tags) } @@ -231,10 +231,10 @@ func TestHttpJsonCassandraMultiValue(t *testing.T) { } tags := map[string]string{ - "host": "10.10.10.10", - "mname": "ReadLatency", - "keyspace": "test_keyspace1", - "scope": "test_table", + "cassandra_host": "10.10.10.10", + "mname": "ReadLatency", + "keyspace": "test_keyspace1", + "scope": "test_table", } acc.AssertContainsTaggedFields(t, "cassandraTable", fields, tags) } @@ -268,17 +268,17 @@ func TestHttpJsonCassandraNestedMultiValue(t *testing.T) { } tags1 := map[string]string{ - "host": "10.10.10.10", - "mname": "ReadLatency", - "keyspace": "test_keyspace1", - "scope": "test_table1", + "cassandra_host": "10.10.10.10", + "mname": "ReadLatency", + "keyspace": "test_keyspace1", + "scope": "test_table1", } tags2 := map[string]string{ - "host": "10.10.10.10", - "mname": "ReadLatency", - "keyspace": "test_keyspace2", - "scope": "test_table2", + "cassandra_host": "10.10.10.10", + "mname": "ReadLatency", + "keyspace": "test_keyspace2", + "scope": "test_table2", } acc.AssertContainsTaggedFields(t, "cassandraTable", fields1, tags1) diff --git a/plugins/inputs/disque/disque.go b/plugins/inputs/disque/disque.go index d726590b4..0e4baf9cb 100644 --- a/plugins/inputs/disque/disque.go +++ b/plugins/inputs/disque/disque.go @@ -162,7 +162,7 @@ func (g *Disque) gatherServer(addr *url.URL, acc telegraf.Accumulator) error { var read int fields := make(map[string]interface{}) - tags := map[string]string{"host": addr.String()} + tags := map[string]string{"disque_host": addr.String()} for read < sz { line, err := r.ReadString('\n') if err != nil { diff --git a/plugins/inputs/rethinkdb/rethinkdb_server.go b/plugins/inputs/rethinkdb/rethinkdb_server.go index 98e2a35f0..f172717d1 100644 --- a/plugins/inputs/rethinkdb/rethinkdb_server.go +++ b/plugins/inputs/rethinkdb/rethinkdb_server.go @@ -97,8 +97,8 @@ func (s *Server) getServerStatus() error { func (s *Server) getDefaultTags() map[string]string { tags := make(map[string]string) - tags["host"] = s.Url.Host - tags["hostname"] = s.serverStatus.Network.Hostname + tags["rethinkdb_host"] = s.Url.Host + tags["rethinkdb_hostname"] = s.serverStatus.Network.Hostname return tags } diff --git a/plugins/inputs/rethinkdb/rethinkdb_server_test.go b/plugins/inputs/rethinkdb/rethinkdb_server_test.go index c4b644222..82ff29280 100644 --- a/plugins/inputs/rethinkdb/rethinkdb_server_test.go +++ b/plugins/inputs/rethinkdb/rethinkdb_server_test.go @@ -20,8 +20,8 @@ func TestGetDefaultTags(t *testing.T) { in string out string }{ - {"host", server.Url.Host}, - {"hostname", server.serverStatus.Network.Hostname}, + {"rethinkdb_host", server.Url.Host}, + {"rethinkdb_hostname", server.serverStatus.Network.Hostname}, } defaultTags := server.getDefaultTags() for _, tt := range tagTests { From f1c995dcb842cfe6bc0e85f36a2e585ae37d028b Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 19 Apr 2016 18:01:41 -0600 Subject: [PATCH 32/84] Update etc/telegraf.conf --- etc/telegraf.conf | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index f2bdace78..1623e5636 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -438,7 +438,7 @@ # servers = ["myuser:mypassword@10.10.10.1:8778","10.10.10.2:8778",":8778"] # ## List of metrics collected on above servers # ## Each metric consists of a jmx path. -# ## This will collect all heap memory usage metrics from the jvm and +# ## This will collect all heap memory usage metrics from the jvm and # ## ReadLatency metrics for all keyspaces and tables. # ## "type=Table" in the query works with Cassandra3.0. Older versions might # ## need to use "type=ColumnFamily" @@ -781,9 +781,46 @@ # ## e.g. # ## root:passwd@tcp(127.0.0.1:3306)/?tls=false # ## root@tcp(127.0.0.1:3306)/?tls=false -# ## +# # # ## If no servers are specified, then localhost is used as the host. # servers = ["tcp(127.0.0.1:3306)/"] +# ## the limits for metrics form perf_events_statements +# perf_events_statements_digest_text_limit = 120 +# perf_events_statements_limit = 250 +# perf_events_statements_time_limit = 86400 +# # +# ## if the list is empty, then metrics are gathered from all databasee tables +# table_schema_databases = [] +# # +# ## gather metrics from INFORMATION_SCHEMA.TABLES for databases provided above list +# gather_table_schema = false +# # +# ## gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST +# gather_process_list = true +# # +# ## gather auto_increment columns and max values from information schema +# gather_info_schema_auto_inc = true +# # +# ## gather metrics from SHOW SLAVE STATUS command output +# gather_slave_status = true +# # +# ## gather metrics from SHOW BINARY LOGS command output +# gather_binary_logs = false +# # +# ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_TABLE +# gather_table_io_waits = false +# # +# ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_INDEX_USAGE +# gather_index_io_waits = false +# # +# ## gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME +# gather_file_events_stats = false +# # +# ## gather metrics from PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST +# gather_perf_events_statements = false +# # +# ## Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES) +# interval_slow = "30m" # # Read metrics about network interface usage From 2f7da03cce24e572253e97b6bf21d9d34efa2a0f Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 19 Apr 2016 22:57:22 -0600 Subject: [PATCH 33/84] Do not log every tcp connect/disconnect leaving as comments for whenever I rig up global debug logging. closes #1062 --- plugins/inputs/tcp_listener/tcp_listener.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/plugins/inputs/tcp_listener/tcp_listener.go b/plugins/inputs/tcp_listener/tcp_listener.go index b7f5ef9ed..ce07a7601 100644 --- a/plugins/inputs/tcp_listener/tcp_listener.go +++ b/plugins/inputs/tcp_listener/tcp_listener.go @@ -150,8 +150,7 @@ func (t *TcpListener) tcpListen() error { if err != nil { return err } - - log.Printf("Received TCP Connection from %s", conn.RemoteAddr()) + // log.Printf("Received TCP Connection from %s", conn.RemoteAddr()) select { case <-t.accept: @@ -187,7 +186,7 @@ func (t *TcpListener) handler(conn *net.TCPConn, id string) { defer func() { t.wg.Done() conn.Close() - log.Printf("Closed TCP Connection from %s", conn.RemoteAddr()) + // log.Printf("Closed TCP Connection from %s", conn.RemoteAddr()) // Add one connection potential back to channel when this one closes t.accept <- true t.forget(id) From 4bcf157d88ac484b842c4b20081b7387fa9755d3 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 19 Apr 2016 22:50:22 -0600 Subject: [PATCH 34/84] Don't replace _ with . in datadog names closes #1024 --- CHANGELOG.md | 2 ++ plugins/outputs/datadog/datadog.go | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba75be262..a652b38d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ for more details. only. Previously there was an undocumented behavior where filters would match based on _prefix_ in addition to globs. This means that a filter like `fielddrop = ["time_"]` will need to be changed to `fielddrop = ["time_*"]` +- **datadog**: measurement and field names will no longer have `_` replaced by `.` - The following plugins have changed their tags to _not_ overwrite the host tag: - cassandra: `host -> cassandra_host` - disque: `host -> disque_host` @@ -42,6 +43,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78! - [#1013](https://github.com/influxdata/telegraf/pull/1013): Close dead riemann output connections. Thanks @echupriyanov! - [#1012](https://github.com/influxdata/telegraf/pull/1012): Set default tags in test accumulator. +- [#1024](https://github.com/influxdata/telegraf/issues/1024): Don't replace `.` with `_` in datadog output. - [#1058](https://github.com/influxdata/telegraf/issues/1058): Fix possible leaky TCP connections in influxdb output. - [#1044](https://github.com/influxdata/telegraf/pull/1044): Fix SNMP OID possible collisions. Thanks @relip diff --git a/plugins/outputs/datadog/datadog.go b/plugins/outputs/datadog/datadog.go index 56fdc38e4..088568718 100644 --- a/plugins/outputs/datadog/datadog.go +++ b/plugins/outputs/datadog/datadog.go @@ -8,7 +8,6 @@ import ( "net/http" "net/url" "sort" - "strings" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" @@ -71,21 +70,22 @@ func (d *Datadog) Write(metrics []telegraf.Metric) error { metricCounter := 0 for _, m := range metrics { - mname := strings.Replace(m.Name(), "_", ".", -1) if dogMs, err := buildMetrics(m); err == nil { for fieldName, dogM := range dogMs { // name of the datadog measurement var dname string if fieldName == "value" { // adding .value seems redundant here - dname = mname + dname = m.Name() } else { - dname = mname + "." + strings.Replace(fieldName, "_", ".", -1) + dname = m.Name() + "." + fieldName } + var host string + host, _ = m.Tags()["host"] metric := &Metric{ Metric: dname, Tags: buildTags(m.Tags()), - Host: m.Tags()["host"], + Host: host, } metric.Points[0] = dogM tempSeries = append(tempSeries, metric) From f1b7ecb2a2dd3a7c3585574008c4805f1fca35ad Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 20 Apr 2016 13:18:07 -0600 Subject: [PATCH 35/84] procstat: Add user, pidfile, pattern & exe tags closes #1035 --- CHANGELOG.md | 1 + plugins/inputs/procstat/procstat.go | 53 ++++++++++++++++------- plugins/inputs/procstat/procstat_test.go | 1 + plugins/inputs/procstat/spec_processor.go | 3 +- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a652b38d7..426c9118b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#889](https://github.com/influxdata/telegraf/pull/889): Improved MySQL plugin. Thanks @maksadbek! - [#1060](https://github.com/influxdata/telegraf/pull/1060): TTL metrics added to MongoDB input plugin - [#1056](https://github.com/influxdata/telegraf/pull/1056): Don't allow inputs to overwrite host tags. +- [#1035](https://github.com/influxdata/telegraf/issues/1035): Add `user`, `exe`, `pidfile` tags to procstat plugin. ### Bugfixes diff --git a/plugins/inputs/procstat/procstat.go b/plugins/inputs/procstat/procstat.go index a0e63fd6f..fbacb9069 100644 --- a/plugins/inputs/procstat/procstat.go +++ b/plugins/inputs/procstat/procstat.go @@ -21,12 +21,16 @@ type Procstat struct { Prefix string User string + // pidmap maps a pid to a process object, so we don't recreate every gather pidmap map[int32]*process.Process + // tagmap maps a pid to a map of tags for that pid + tagmap map[int32]map[string]string } func NewProcstat() *Procstat { return &Procstat{ pidmap: make(map[int32]*process.Process), + tagmap: make(map[int32]map[string]string), } } @@ -61,8 +65,8 @@ func (p *Procstat) Gather(acc telegraf.Accumulator) error { log.Printf("Error: procstat getting process, exe: [%s] pidfile: [%s] pattern: [%s] user: [%s] %s", p.Exe, p.PidFile, p.Pattern, p.User, err.Error()) } else { - for _, proc := range p.pidmap { - p := NewSpecProcessor(p.Prefix, acc, proc) + for pid, proc := range p.pidmap { + p := NewSpecProcessor(p.Prefix, acc, proc, p.tagmap[pid]) p.pushMetrics() } } @@ -103,45 +107,50 @@ func (p *Procstat) getAllPids() ([]int32, error) { var err error if p.PidFile != "" { - pids, err = pidsFromFile(p.PidFile) + pids, err = p.pidsFromFile() } else if p.Exe != "" { - pids, err = pidsFromExe(p.Exe) + pids, err = p.pidsFromExe() } else if p.Pattern != "" { - pids, err = pidsFromPattern(p.Pattern) + pids, err = p.pidsFromPattern() } else if p.User != "" { - pids, err = pidsFromUser(p.User) + pids, err = p.pidsFromUser() } else { - err = fmt.Errorf("Either exe, pid_file or pattern has to be specified") + err = fmt.Errorf("Either exe, pid_file, user, or pattern has to be specified") } return pids, err } -func pidsFromFile(file string) ([]int32, error) { +func (p *Procstat) pidsFromFile() ([]int32, error) { var out []int32 var outerr error - pidString, err := ioutil.ReadFile(file) + pidString, err := ioutil.ReadFile(p.PidFile) if err != nil { - outerr = fmt.Errorf("Failed to read pidfile '%s'. Error: '%s'", file, err) + outerr = fmt.Errorf("Failed to read pidfile '%s'. Error: '%s'", + p.PidFile, err) } else { pid, err := strconv.Atoi(strings.TrimSpace(string(pidString))) if err != nil { outerr = err } else { out = append(out, int32(pid)) + p.tagmap[int32(pid)] = map[string]string{ + "pidfile": p.PidFile, + "pid": string(pidString), + } } } return out, outerr } -func pidsFromExe(exe string) ([]int32, error) { +func (p *Procstat) pidsFromExe() ([]int32, error) { var out []int32 var outerr error bin, err := exec.LookPath("pgrep") if err != nil { return out, fmt.Errorf("Couldn't find pgrep binary: %s", err) } - pgrep, err := exec.Command(bin, exe).Output() + pgrep, err := exec.Command(bin, p.Exe).Output() if err != nil { return out, fmt.Errorf("Failed to execute %s. Error: '%s'", bin, err) } else { @@ -150,6 +159,10 @@ func pidsFromExe(exe string) ([]int32, error) { ipid, err := strconv.Atoi(pid) if err == nil { out = append(out, int32(ipid)) + p.tagmap[int32(ipid)] = map[string]string{ + "exe": p.Exe, + "pid": pid, + } } else { outerr = err } @@ -158,14 +171,14 @@ func pidsFromExe(exe string) ([]int32, error) { return out, outerr } -func pidsFromPattern(pattern string) ([]int32, error) { +func (p *Procstat) pidsFromPattern() ([]int32, error) { var out []int32 var outerr error bin, err := exec.LookPath("pgrep") if err != nil { return out, fmt.Errorf("Couldn't find pgrep binary: %s", err) } - pgrep, err := exec.Command(bin, "-f", pattern).Output() + pgrep, err := exec.Command(bin, "-f", p.Pattern).Output() if err != nil { return out, fmt.Errorf("Failed to execute %s. Error: '%s'", bin, err) } else { @@ -174,6 +187,10 @@ func pidsFromPattern(pattern string) ([]int32, error) { ipid, err := strconv.Atoi(pid) if err == nil { out = append(out, int32(ipid)) + p.tagmap[int32(ipid)] = map[string]string{ + "pattern": p.Pattern, + "pid": pid, + } } else { outerr = err } @@ -182,14 +199,14 @@ func pidsFromPattern(pattern string) ([]int32, error) { return out, outerr } -func pidsFromUser(user string) ([]int32, error) { +func (p *Procstat) pidsFromUser() ([]int32, error) { var out []int32 var outerr error bin, err := exec.LookPath("pgrep") if err != nil { return out, fmt.Errorf("Couldn't find pgrep binary: %s", err) } - pgrep, err := exec.Command(bin, "-u", user).Output() + pgrep, err := exec.Command(bin, "-u", p.User).Output() if err != nil { return out, fmt.Errorf("Failed to execute %s. Error: '%s'", bin, err) } else { @@ -198,6 +215,10 @@ func pidsFromUser(user string) ([]int32, error) { ipid, err := strconv.Atoi(pid) if err == nil { out = append(out, int32(ipid)) + p.tagmap[int32(ipid)] = map[string]string{ + "user": p.User, + "pid": pid, + } } else { outerr = err } diff --git a/plugins/inputs/procstat/procstat_test.go b/plugins/inputs/procstat/procstat_test.go index bf5790f67..ccc72bdbb 100644 --- a/plugins/inputs/procstat/procstat_test.go +++ b/plugins/inputs/procstat/procstat_test.go @@ -25,6 +25,7 @@ func TestGather(t *testing.T) { PidFile: file.Name(), Prefix: "foo", pidmap: make(map[int32]*process.Process), + tagmap: make(map[int32]map[string]string), } p.Gather(&acc) assert.True(t, acc.HasFloatField("procstat", "foo_cpu_time_user")) diff --git a/plugins/inputs/procstat/spec_processor.go b/plugins/inputs/procstat/spec_processor.go index bb248f003..d76538ef8 100644 --- a/plugins/inputs/procstat/spec_processor.go +++ b/plugins/inputs/procstat/spec_processor.go @@ -36,9 +36,8 @@ func NewSpecProcessor( prefix string, acc telegraf.Accumulator, p *process.Process, + tags map[string]string, ) *SpecProcessor { - tags := make(map[string]string) - tags["pid"] = fmt.Sprintf("%v", p.Pid) if name, err := p.Name(); err == nil { tags["process_name"] = name } From c0588926b826cab4ced76b9fe5bfcd69f9bc090c Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 20 Apr 2016 18:22:04 -0600 Subject: [PATCH 36/84] Add n_cpu field to system plugin closes #1041 --- plugins/inputs/system/system.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/inputs/system/system.go b/plugins/inputs/system/system.go index 42b0310a4..55e606225 100644 --- a/plugins/inputs/system/system.go +++ b/plugins/inputs/system/system.go @@ -4,6 +4,7 @@ import ( "bufio" "bytes" "fmt" + "runtime" "github.com/shirou/gopsutil/host" "github.com/shirou/gopsutil/load" @@ -43,6 +44,7 @@ func (_ *SystemStats) Gather(acc telegraf.Accumulator) error { "uptime": hostinfo.Uptime, "n_users": len(users), "uptime_format": format_uptime(hostinfo.Uptime), + "n_cpus": runtime.NumCPU(), } acc.AddFields("system", fields, nil) From 8a395fdb4a607e12f07896e11f5fd3381bd22d75 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 20 Apr 2016 18:36:14 -0600 Subject: [PATCH 37/84] changelog update feature 1041 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 426c9118b..2a1cdcd43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1060](https://github.com/influxdata/telegraf/pull/1060): TTL metrics added to MongoDB input plugin - [#1056](https://github.com/influxdata/telegraf/pull/1056): Don't allow inputs to overwrite host tags. - [#1035](https://github.com/influxdata/telegraf/issues/1035): Add `user`, `exe`, `pidfile` tags to procstat plugin. +- [#1041](https://github.com/influxdata/telegraf/issues/1041): Add `n_cpus` field to the system plugin. ### Bugfixes From f818f4469382ed1100e6806e34ae86e439fb4eb4 Mon Sep 17 00:00:00 2001 From: Martin Gehrke Date: Thu, 21 Apr 2016 09:26:46 -0400 Subject: [PATCH 38/84] Added Network Interface Object block to Generic Queries examples in win_perf_counters/README.md Network metrics are pretty important and the block adds a couple with a link to the names for more. This adds a block with a few counters to the Generic Queries examples in plugins/inputs/win_perf_counters/README.md --- plugins/inputs/win_perf_counters/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/plugins/inputs/win_perf_counters/README.md b/plugins/inputs/win_perf_counters/README.md index 7a2b87a61..967714b48 100644 --- a/plugins/inputs/win_perf_counters/README.md +++ b/plugins/inputs/win_perf_counters/README.md @@ -156,6 +156,15 @@ if any of the combinations of ObjectName/Instances/Counters are invalid. Instances = ["------"] # Use 6 x - to remove the Instance bit from the query. Measurement = "win_mem" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). + + [[inputs.win_perf_counters.object]] + # more counters for the Network Interface Object can be found at + # https://msdn.microsoft.com/en-us/library/ms803962.aspx + ObjectName = "Network Interface" + Counters = ["Bytes Received/sec","Bytes Sent/sec","Packets Received/sec","Packets Sent/sec"] + Instances = ["*"] # Use 6 x - to remove the Instance bit from the query. + Measurement = "win_net" + #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). ``` ### Active Directory Domain Controller From 88def9b71b2cbf4886f13031644489f143da7ee5 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 20 Apr 2016 19:51:25 -0600 Subject: [PATCH 39/84] filestat input plugin closes #929 --- CHANGELOG.md | 1 + etc/telegraf.conf | 10 ++ plugins/inputs/all/all.go | 1 + plugins/inputs/filestat/README.md | 37 ++++ plugins/inputs/filestat/filestat.go | 185 +++++++++++++++++++ plugins/inputs/filestat/filestat_test.go | 197 +++++++++++++++++++++ plugins/inputs/filestat/testdata/log1.log | 0 plugins/inputs/filestat/testdata/log2.log | 0 plugins/inputs/filestat/testdata/test.conf | 5 + 9 files changed, 436 insertions(+) create mode 100644 plugins/inputs/filestat/README.md create mode 100644 plugins/inputs/filestat/filestat.go create mode 100644 plugins/inputs/filestat/filestat_test.go create mode 100644 plugins/inputs/filestat/testdata/log1.log create mode 100644 plugins/inputs/filestat/testdata/log2.log create mode 100644 plugins/inputs/filestat/testdata/test.conf diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a1cdcd43..6b09616ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1056](https://github.com/influxdata/telegraf/pull/1056): Don't allow inputs to overwrite host tags. - [#1035](https://github.com/influxdata/telegraf/issues/1035): Add `user`, `exe`, `pidfile` tags to procstat plugin. - [#1041](https://github.com/influxdata/telegraf/issues/1041): Add `n_cpus` field to the system plugin. +- [#1072](https://github.com/influxdata/telegraf/pull/1072): New Input Plugin: filestat. ### Bugfixes diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 1623e5636..627f6d64c 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -580,6 +580,16 @@ # data_format = "influx" +# # Read stats about given file(s) +# [[inputs.filestat]] +# ## Files to gather stats about. +# ## These accept standard unix glob matching rules, but with the addition of +# ## ** as a "super asterisk". See https://github.com/gobwas/glob. +# ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] +# ## If true, read the entire file and calculate an md5 checksum. +# md5 = false + + # # Read metrics of haproxy, via socket or csv stats page # [[inputs.haproxy]] # ## An array of address to gather stats about. Specify an ip on hostname diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 3f56ee541..36526f4d1 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -14,6 +14,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/dovecot" _ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch" _ "github.com/influxdata/telegraf/plugins/inputs/exec" + _ "github.com/influxdata/telegraf/plugins/inputs/filestat" _ "github.com/influxdata/telegraf/plugins/inputs/github_webhooks" _ "github.com/influxdata/telegraf/plugins/inputs/haproxy" _ "github.com/influxdata/telegraf/plugins/inputs/http_response" diff --git a/plugins/inputs/filestat/README.md b/plugins/inputs/filestat/README.md new file mode 100644 index 000000000..bfa51011c --- /dev/null +++ b/plugins/inputs/filestat/README.md @@ -0,0 +1,37 @@ +# filestat Input Plugin + +The filestat plugin gathers metrics about file existence, size, and other stats. + +### Configuration: + +```toml +# Read stats about given file(s) +[[inputs.filestat]] + ## Files to gather stats about. + ## These accept standard unix glob matching rules, but with the addition of + ## ** as a "super asterisk". See https://github.com/gobwas/glob. + files = ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] + ## If true, read the entire file and calculate an md5 checksum. + md5 = false +``` + +### Measurements & Fields: + +- filestat + - exists (int, 0 | 1) + - size_bytes (int, bytes) + - md5 (optional, string) + +### Tags: + +- All measurements have the following tags: + - file (the path the to file, as specified in the config) + +### Example Output: + +``` +$ telegraf -config /etc/telegraf/telegraf.conf -input-filter filestat -test +* Plugin: filestat, Collection 1 +> filestat,file=/tmp/foo/bar,host=tyrion exists=0i 1461203374493128216 +> filestat,file=/Users/sparrc/ws/telegraf.conf,host=tyrion exists=1i,size=47894i 1461203374493199335 +``` diff --git a/plugins/inputs/filestat/filestat.go b/plugins/inputs/filestat/filestat.go new file mode 100644 index 000000000..c0b70c878 --- /dev/null +++ b/plugins/inputs/filestat/filestat.go @@ -0,0 +1,185 @@ +package filestat + +import ( + "crypto/md5" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/gobwas/glob" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +var sepStr = fmt.Sprintf("%v", string(os.PathSeparator)) + +const sampleConfig = ` + ## Files to gather stats about. + ## These accept standard unix glob matching rules, but with the addition of + ## ** as a "super asterisk". See https://github.com/gobwas/glob. + ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] + ## If true, read the entire file and calculate an md5 checksum. + md5 = false +` + +type FileStat struct { + Md5 bool + Files []string + + // maps full file paths to glob obj + globs map[string]glob.Glob + // maps full file paths to their root dir + roots map[string]string +} + +func NewFileStat() *FileStat { + return &FileStat{ + globs: make(map[string]glob.Glob), + roots: make(map[string]string), + } +} + +func (_ *FileStat) Description() string { + return "Read stats about given file(s)" +} + +func (_ *FileStat) SampleConfig() string { return sampleConfig } + +func (f *FileStat) Gather(acc telegraf.Accumulator) error { + var errS string + var err error + + for _, filepath := range f.Files { + // Get the compiled glob object for this filepath + g, ok := f.globs[filepath] + if !ok { + if g, err = glob.Compile(filepath, os.PathSeparator); err != nil { + errS += err.Error() + " " + continue + } + f.globs[filepath] = g + } + // Get the root directory for this filepath + root, ok := f.roots[filepath] + if !ok { + root = findRootDir(filepath) + f.roots[filepath] = root + } + + var matches []string + // Do not walk file tree if we don't have to. + if !hasMeta(filepath) { + matches = []string{filepath} + } else { + matches = walkFilePath(f.roots[filepath], f.globs[filepath]) + } + for _, file := range matches { + tags := map[string]string{ + "file": file, + } + fields := map[string]interface{}{ + "exists": int64(0), + } + // Get file stats + fileInfo, err := os.Stat(file) + if os.IsNotExist(err) { + // file doesn't exist, so move on to the next + acc.AddFields("filestat", fields, tags) + continue + } + if err != nil { + errS += err.Error() + " " + continue + } + + // file exists and no errors encountered + fields["exists"] = int64(1) + fields["size_bytes"] = fileInfo.Size() + + if f.Md5 { + md5, err := getMd5(file) + if err != nil { + errS += err.Error() + " " + } else { + fields["md5_sum"] = md5 + } + } + + acc.AddFields("filestat", fields, tags) + } + } + + if errS != "" { + return fmt.Errorf(errS) + } + return nil +} + +// walk the filepath from the given root and return a list of files that match +// the given glob. +func walkFilePath(root string, g glob.Glob) []string { + matchedFiles := []string{} + walkfn := func(path string, _ os.FileInfo, _ error) error { + if g.Match(path) { + matchedFiles = append(matchedFiles, path) + } + return nil + } + filepath.Walk(root, walkfn) + return matchedFiles +} + +// Read given file and calculate an md5 hash. +func getMd5(file string) (string, error) { + of, err := os.Open(file) + if err != nil { + return "", err + } + defer of.Close() + + hash := md5.New() + _, err = io.Copy(hash, of) + if err != nil { + // fatal error + return "", err + } + return fmt.Sprintf("%x", hash.Sum(nil)), nil +} + +// find the root dir of the given path (could include globs). +// ie: +// /var/log/telegraf.conf -> /var/log/ +// /home/** -> /home/ +// /home/*/** -> /home/ +// /lib/share/*/*/**.txt -> /lib/share/ +func findRootDir(path string) string { + pathItems := strings.Split(path, sepStr) + outpath := sepStr + for i, item := range pathItems { + if i == len(pathItems)-1 { + break + } + if item == "" { + continue + } + if hasMeta(item) { + break + } + outpath += item + sepStr + } + return outpath +} + +// hasMeta reports whether path contains any magic glob characters. +func hasMeta(path string) bool { + return strings.IndexAny(path, "*?[") >= 0 +} + +func init() { + inputs.Add("filestat", func() telegraf.Input { + return NewFileStat() + }) +} diff --git a/plugins/inputs/filestat/filestat_test.go b/plugins/inputs/filestat/filestat_test.go new file mode 100644 index 000000000..f8977c920 --- /dev/null +++ b/plugins/inputs/filestat/filestat_test.go @@ -0,0 +1,197 @@ +package filestat + +import ( + "runtime" + "strings" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +func TestGatherNoMd5(t *testing.T) { + dir := getTestdataDir() + fs := NewFileStat() + fs.Files = []string{ + dir + "log1.log", + dir + "log2.log", + "/non/existant/file", + } + + acc := testutil.Accumulator{} + fs.Gather(&acc) + + tags1 := map[string]string{ + "file": dir + "log1.log", + } + fields1 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + } + acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1) + + tags2 := map[string]string{ + "file": dir + "log2.log", + } + fields2 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + } + acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2) + + tags3 := map[string]string{ + "file": "/non/existant/file", + } + fields3 := map[string]interface{}{ + "exists": int64(0), + } + acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3) +} + +func TestGatherExplicitFiles(t *testing.T) { + dir := getTestdataDir() + fs := NewFileStat() + fs.Md5 = true + fs.Files = []string{ + dir + "log1.log", + dir + "log2.log", + "/non/existant/file", + } + + acc := testutil.Accumulator{} + fs.Gather(&acc) + + tags1 := map[string]string{ + "file": dir + "log1.log", + } + fields1 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1) + + tags2 := map[string]string{ + "file": dir + "log2.log", + } + fields2 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2) + + tags3 := map[string]string{ + "file": "/non/existant/file", + } + fields3 := map[string]interface{}{ + "exists": int64(0), + } + acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3) +} + +func TestGatherGlob(t *testing.T) { + dir := getTestdataDir() + fs := NewFileStat() + fs.Md5 = true + fs.Files = []string{ + dir + "*.log", + } + + acc := testutil.Accumulator{} + fs.Gather(&acc) + + tags1 := map[string]string{ + "file": dir + "log1.log", + } + fields1 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1) + + tags2 := map[string]string{ + "file": dir + "log2.log", + } + fields2 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2) +} + +func TestGatherSuperAsterisk(t *testing.T) { + dir := getTestdataDir() + fs := NewFileStat() + fs.Md5 = true + fs.Files = []string{ + dir + "**", + } + + acc := testutil.Accumulator{} + fs.Gather(&acc) + + tags1 := map[string]string{ + "file": dir + "log1.log", + } + fields1 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1) + + tags2 := map[string]string{ + "file": dir + "log2.log", + } + fields2 := map[string]interface{}{ + "size_bytes": int64(0), + "exists": int64(1), + "md5_sum": "d41d8cd98f00b204e9800998ecf8427e", + } + acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2) + + tags3 := map[string]string{ + "file": dir + "test.conf", + } + fields3 := map[string]interface{}{ + "size_bytes": int64(104), + "exists": int64(1), + "md5_sum": "5a7e9b77fa25e7bb411dbd17cf403c1f", + } + acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3) +} + +func TestFindRootDir(t *testing.T) { + tests := []struct { + input string + output string + }{ + {"/var/log/telegraf.conf", "/var/log/"}, + {"/home/**", "/home/"}, + {"/home/*/**", "/home/"}, + {"/lib/share/*/*/**.txt", "/lib/share/"}, + } + + for _, test := range tests { + actual := findRootDir(test.input) + assert.Equal(t, test.output, actual) + } +} + +func TestGetMd5(t *testing.T) { + dir := getTestdataDir() + md5, err := getMd5(dir + "test.conf") + assert.NoError(t, err) + assert.Equal(t, "5a7e9b77fa25e7bb411dbd17cf403c1f", md5) + + md5, err = getMd5("/tmp/foo/bar/fooooo") + assert.Error(t, err) +} + +func getTestdataDir() string { + _, filename, _, _ := runtime.Caller(1) + return strings.Replace(filename, "filestat_test.go", "testdata/", 1) +} diff --git a/plugins/inputs/filestat/testdata/log1.log b/plugins/inputs/filestat/testdata/log1.log new file mode 100644 index 000000000..e69de29bb diff --git a/plugins/inputs/filestat/testdata/log2.log b/plugins/inputs/filestat/testdata/log2.log new file mode 100644 index 000000000..e69de29bb diff --git a/plugins/inputs/filestat/testdata/test.conf b/plugins/inputs/filestat/testdata/test.conf new file mode 100644 index 000000000..a06111991 --- /dev/null +++ b/plugins/inputs/filestat/testdata/test.conf @@ -0,0 +1,5 @@ +# this is a fake testing config file +# for testing the filestat plugin + +option1 = "foo" +option2 = "bar" From f9c8ed0dc3d0ef013412a8158a15371b0d050c54 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 21 Apr 2016 19:47:23 -0600 Subject: [PATCH 40/84] Add filestat plugin to README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e731dad0a..8084ba790 100644 --- a/README.md +++ b/README.md @@ -168,7 +168,8 @@ Currently implemented sources: * [docker](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/docker) * [dovecot](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/dovecot) * [elasticsearch](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/elasticsearch) -* [exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec ) (generic executable plugin, support JSON, influx, graphite and nagios) +* [exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec) (generic executable plugin, support JSON, influx, graphite and nagios) +* [filestat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/filestat) * [haproxy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/haproxy) * [http_response](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/http_response) * [httpjson](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/httpjson) (generic JSON-emitting http service plugin) From 194288c00e1c6424264b38d14f2b45769bdc6fb4 Mon Sep 17 00:00:00 2001 From: Victor Garcia Date: Wed, 20 Apr 2016 01:16:22 +0200 Subject: [PATCH 41/84] Adding replication lag metric closes #1066 --- CHANGELOG.md | 1 + plugins/inputs/mongodb/README.md | 1 + plugins/inputs/mongodb/mongodb_data.go | 1 + plugins/inputs/mongodb/mongodb_data_test.go | 1 + plugins/inputs/mongodb/mongodb_server.go | 18 +++++- plugins/inputs/mongodb/mongostat.go | 66 +++++++++++++++++++-- 6 files changed, 81 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b09616ab..659b318da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1035](https://github.com/influxdata/telegraf/issues/1035): Add `user`, `exe`, `pidfile` tags to procstat plugin. - [#1041](https://github.com/influxdata/telegraf/issues/1041): Add `n_cpus` field to the system plugin. - [#1072](https://github.com/influxdata/telegraf/pull/1072): New Input Plugin: filestat. +- [#1066](https://github.com/influxdata/telegraf/pull/1066): Replication lag metrics for MongoDB input plugin ### Bugfixes diff --git a/plugins/inputs/mongodb/README.md b/plugins/inputs/mongodb/README.md index 868b51528..0d140b395 100644 --- a/plugins/inputs/mongodb/README.md +++ b/plugins/inputs/mongodb/README.md @@ -50,3 +50,4 @@ and create a single measurement containing values e.g. * vsize_megabytes * ttl_deletes_per_sec * ttl_passes_per_sec + * repl_lag diff --git a/plugins/inputs/mongodb/mongodb_data.go b/plugins/inputs/mongodb/mongodb_data.go index 2bbeabd94..f9bbc1d3b 100644 --- a/plugins/inputs/mongodb/mongodb_data.go +++ b/plugins/inputs/mongodb/mongodb_data.go @@ -54,6 +54,7 @@ var DefaultReplStats = map[string]string{ "repl_getmores_per_sec": "GetMoreR", "repl_commands_per_sec": "CommandR", "member_status": "NodeType", + "repl_lag": "ReplLag", } var MmapStats = map[string]string{ diff --git a/plugins/inputs/mongodb/mongodb_data_test.go b/plugins/inputs/mongodb/mongodb_data_test.go index e63d2025d..5619641fc 100644 --- a/plugins/inputs/mongodb/mongodb_data_test.go +++ b/plugins/inputs/mongodb/mongodb_data_test.go @@ -127,6 +127,7 @@ func TestStateTag(t *testing.T) { "repl_inserts_per_sec": int64(0), "repl_queries_per_sec": int64(0), "repl_updates_per_sec": int64(0), + "repl_lag": int64(0), "resident_megabytes": int64(0), "updates_per_sec": int64(0), "vsize_megabytes": int64(0), diff --git a/plugins/inputs/mongodb/mongodb_server.go b/plugins/inputs/mongodb/mongodb_server.go index 26aac2b63..86699a4d9 100644 --- a/plugins/inputs/mongodb/mongodb_server.go +++ b/plugins/inputs/mongodb/mongodb_server.go @@ -1,6 +1,7 @@ package mongodb import ( + "log" "net/url" "time" @@ -12,7 +13,7 @@ import ( type Server struct { Url *url.URL Session *mgo.Session - lastResult *ServerStatus + lastResult *MongoStatus } func (s *Server) getDefaultTags() map[string]string { @@ -24,11 +25,22 @@ func (s *Server) getDefaultTags() map[string]string { func (s *Server) gatherData(acc telegraf.Accumulator) error { s.Session.SetMode(mgo.Eventual, true) s.Session.SetSocketTimeout(0) - result := &ServerStatus{} - err := s.Session.DB("admin").Run(bson.D{{"serverStatus", 1}, {"recordStats", 0}}, result) + result_server := &ServerStatus{} + err := s.Session.DB("admin").Run(bson.D{{"serverStatus", 1}, {"recordStats", 0}}, result_server) if err != nil { return err } + result_repl := &ReplSetStatus{} + err = s.Session.DB("admin").Run(bson.D{{"replSetGetStatus", 1}}, result_repl) + if err != nil { + log.Println("Not gathering replica set status, member not in replica set") + } + + result := &MongoStatus{ + ServerStatus: result_server, + ReplSetStatus: result_repl, + } + defer func() { s.lastResult = result }() diff --git a/plugins/inputs/mongodb/mongostat.go b/plugins/inputs/mongodb/mongostat.go index 33cd8254e..b131bf9a0 100644 --- a/plugins/inputs/mongodb/mongostat.go +++ b/plugins/inputs/mongodb/mongostat.go @@ -11,6 +11,8 @@ import ( "sort" "strings" "time" + + "gopkg.in/mgo.v2/bson" ) const ( @@ -28,8 +30,13 @@ const ( WTOnly // only active if node has wiredtiger-specific fields ) +type MongoStatus struct { + SampleTime time.Time + ServerStatus *ServerStatus + ReplSetStatus *ReplSetStatus +} + type ServerStatus struct { - SampleTime time.Time `bson:""` Host string `bson:"host"` Version string `bson:"version"` Process string `bson:"process"` @@ -57,6 +64,19 @@ type ServerStatus struct { Metrics *MetricsStats `bson:"metrics"` } +// ReplSetStatus stores information from replSetGetStatus +type ReplSetStatus struct { + Members []ReplSetMember `bson:"members"` + MyState int64 `bson:"myState"` +} + +// ReplSetMember stores information related to a replica set member +type ReplSetMember struct { + Name string `bson:"name"` + State int64 `bson:"state"` + Optime *bson.MongoTimestamp `bson:"optime"` +} + // WiredTiger stores information related to the WiredTiger storage engine. type WiredTiger struct { Transaction TransactionStats `bson:"transaction"` @@ -356,6 +376,7 @@ type StatLine struct { // Replicated Opcounter fields InsertR, QueryR, UpdateR, DeleteR, GetMoreR, CommandR int64 + ReplLag int64 Flushes int64 Mapped, Virtual, Resident, NonMapped int64 Faults int64 @@ -410,8 +431,11 @@ func diff(newVal, oldVal, sampleTime int64) int64 { return d / sampleTime } -// NewStatLine constructs a StatLine object from two ServerStatus objects. -func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs int64) *StatLine { +// NewStatLine constructs a StatLine object from two MongoStatus objects. +func NewStatLine(oldMongo, newMongo MongoStatus, key string, all bool, sampleSecs int64) *StatLine { + oldStat := *oldMongo.ServerStatus + newStat := *newMongo.ServerStatus + returnVal := &StatLine{ Key: key, Host: newStat.Host, @@ -462,7 +486,7 @@ func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs returnVal.Flushes = newStat.BackgroundFlushing.Flushes - oldStat.BackgroundFlushing.Flushes } - returnVal.Time = newStat.SampleTime + returnVal.Time = newMongo.SampleTime returnVal.IsMongos = (newStat.ShardCursorType != nil || strings.HasPrefix(newStat.Process, MongosProcess)) @@ -607,5 +631,39 @@ func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs returnVal.NumConnections = newStat.Connections.Current } + newReplStat := *newMongo.ReplSetStatus + + if newReplStat.Members != nil { + myName := newStat.Repl.Me + // Find the master and myself + master := ReplSetMember{} + me := ReplSetMember{} + for _, member := range newReplStat.Members { + if member.Name == myName { + if member.State == 1 { + // I'm the master + returnVal.ReplLag = 0 + break + } else { + // I'm secondary + me = member + } + } else if member.State == 1 { + // Master found + master = member + } + } + + if me.Optime != nil && master.Optime != nil && me.State == 2 { + // MongoTimestamp type is int64 where the first 32bits are the unix timestamp + lag := int64(*master.Optime>>32 - *me.Optime>>32) + if lag < 0 { + returnVal.ReplLag = 0 + } else { + returnVal.ReplLag = lag + } + } + } + return returnVal } From 23f2b475319cc847320a1260ccdbc1fa6eea6b90 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 18 Apr 2016 18:32:15 -0600 Subject: [PATCH 42/84] Ignore errors in systemd closes #1022 --- CHANGELOG.md | 1 + scripts/post-install.sh | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 659b318da..1e45895f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1024](https://github.com/influxdata/telegraf/issues/1024): Don't replace `.` with `_` in datadog output. - [#1058](https://github.com/influxdata/telegraf/issues/1058): Fix possible leaky TCP connections in influxdb output. - [#1044](https://github.com/influxdata/telegraf/pull/1044): Fix SNMP OID possible collisions. Thanks @relip +- [#1022](https://github.com/influxdata/telegraf/issues/1022): Dont error deb/rpm install on systemd errors. ## v0.12.1 [2016-04-14] diff --git a/scripts/post-install.sh b/scripts/post-install.sh index 53d745ca9..9aec08543 100644 --- a/scripts/post-install.sh +++ b/scripts/post-install.sh @@ -12,7 +12,7 @@ function install_init { function install_systemd { cp -f $SCRIPT_DIR/telegraf.service /lib/systemd/system/telegraf.service - systemctl enable telegraf + systemctl enable telegraf || true systemctl daemon-reload || true } @@ -53,29 +53,29 @@ if [[ -f /etc/redhat-release ]]; then # RHEL-variant logic which systemctl &>/dev/null if [[ $? -eq 0 ]]; then - install_systemd + install_systemd else - # Assuming sysv - install_init - install_chkconfig + # Assuming sysv + install_init + install_chkconfig fi elif [[ -f /etc/debian_version ]]; then # Debian/Ubuntu logic which systemctl &>/dev/null if [[ $? -eq 0 ]]; then - install_systemd - systemctl restart telegraf + install_systemd + systemctl restart telegraf || echo "WARNING: systemd not running." else - # Assuming sysv - install_init - install_update_rcd - invoke-rc.d telegraf restart + # Assuming sysv + install_init + install_update_rcd + invoke-rc.d telegraf restart fi elif [[ -f /etc/os-release ]]; then source /etc/os-release if [[ $ID = "amzn" ]]; then - # Amazon Linux logic - install_init - install_chkconfig + # Amazon Linux logic + install_init + install_chkconfig fi fi From 1b083d63ab44d950204fe1166d4d1608f6328c90 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 22 Apr 2016 11:20:25 -0600 Subject: [PATCH 43/84] add gitattributes file --- .gitattributes | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..80edb3f0c --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +CHANGELOG.md merge=union + From 7b27cad1baac8a6346ad13419fb1a0df81c21ab4 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 22 Apr 2016 10:23:00 -0600 Subject: [PATCH 44/84] Dont specify AWS credential chain, use default closes #1078 --- CHANGELOG.md | 1 + plugins/inputs/cloudwatch/cloudwatch.go | 13 ++----------- plugins/outputs/cloudwatch/cloudwatch.go | 9 --------- plugins/outputs/kinesis/kinesis.go | 9 --------- 4 files changed, 3 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e45895f4..c49d2b72c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1058](https://github.com/influxdata/telegraf/issues/1058): Fix possible leaky TCP connections in influxdb output. - [#1044](https://github.com/influxdata/telegraf/pull/1044): Fix SNMP OID possible collisions. Thanks @relip - [#1022](https://github.com/influxdata/telegraf/issues/1022): Dont error deb/rpm install on systemd errors. +- [#1078](https://github.com/influxdata/telegraf/issues/1078): Use default AWS credential chain. ## v0.12.1 [2016-04-14] diff --git a/plugins/inputs/cloudwatch/cloudwatch.go b/plugins/inputs/cloudwatch/cloudwatch.go index e3fa74bad..8e6384180 100644 --- a/plugins/inputs/cloudwatch/cloudwatch.go +++ b/plugins/inputs/cloudwatch/cloudwatch.go @@ -6,9 +6,6 @@ import ( "time" "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/credentials" - "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds" - "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/cloudwatch" @@ -62,7 +59,7 @@ func (c *CloudWatch) SampleConfig() string { ## Collection Delay (required - must account for metrics availability via CloudWatch API) delay = '1m' - ## Recomended: use metric 'interval' that is a multiple of 'period' to avoid + ## Recomended: use metric 'interval' that is a multiple of 'period' to avoid ## gaps or overlap in pulled data interval = '1m' @@ -74,7 +71,7 @@ func (c *CloudWatch) SampleConfig() string { ## Refreshes Namespace available metrics every 1h #[[inputs.cloudwatch.metrics]] # names = ['Latency', 'RequestCount'] - # + # # ## Dimension filters for Metric (optional) # [[inputs.cloudwatch.metrics.dimensions]] # name = 'LoadBalancerName' @@ -154,12 +151,6 @@ func init() { func (c *CloudWatch) initializeCloudWatch() error { config := &aws.Config{ Region: aws.String(c.Region), - Credentials: credentials.NewChainCredentials( - []credentials.Provider{ - &ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(session.New())}, - &credentials.EnvProvider{}, - &credentials.SharedCredentialsProvider{}, - }), } c.client = cloudwatch.New(session.New(config)) diff --git a/plugins/outputs/cloudwatch/cloudwatch.go b/plugins/outputs/cloudwatch/cloudwatch.go index 42d98b5be..75b2addf4 100644 --- a/plugins/outputs/cloudwatch/cloudwatch.go +++ b/plugins/outputs/cloudwatch/cloudwatch.go @@ -8,9 +8,6 @@ import ( "time" "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/credentials" - "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds" - "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/cloudwatch" @@ -43,12 +40,6 @@ func (c *CloudWatch) Description() string { func (c *CloudWatch) Connect() error { Config := &aws.Config{ Region: aws.String(c.Region), - Credentials: credentials.NewChainCredentials( - []credentials.Provider{ - &ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(session.New())}, - &credentials.EnvProvider{}, - &credentials.SharedCredentialsProvider{}, - }), } svc := cloudwatch.New(session.New(Config)) diff --git a/plugins/outputs/kinesis/kinesis.go b/plugins/outputs/kinesis/kinesis.go index 01906a7f5..a4b5e6a7b 100644 --- a/plugins/outputs/kinesis/kinesis.go +++ b/plugins/outputs/kinesis/kinesis.go @@ -8,9 +8,6 @@ import ( "time" "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/credentials" - "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds" - "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/kinesis" @@ -67,12 +64,6 @@ func (k *KinesisOutput) Connect() error { } Config := &aws.Config{ Region: aws.String(k.Region), - Credentials: credentials.NewChainCredentials( - []credentials.Provider{ - &ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(session.New())}, - &credentials.EnvProvider{}, - &credentials.SharedCredentialsProvider{}, - }), } svc := kinesis.New(session.New(Config)) From 5b1e59a48c03133ee4bd2c06a676a41b107e58a5 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 22 Apr 2016 19:15:07 -0600 Subject: [PATCH 45/84] filestat plugin config fixup --- etc/telegraf.conf | 6 +++--- plugins/inputs/filestat/filestat.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 627f6d64c..dc41eaa96 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -459,7 +459,7 @@ # ## Collection Delay (required - must account for metrics availability via CloudWatch API) # delay = '1m' # -# ## Recomended: use metric 'interval' that is a multiple of 'period' to avoid +# ## Recomended: use metric 'interval' that is a multiple of 'period' to avoid # ## gaps or overlap in pulled data # interval = '1m' # @@ -471,7 +471,7 @@ # ## Refreshes Namespace available metrics every 1h # #[[inputs.cloudwatch.metrics]] # # names = ['Latency', 'RequestCount'] -# # +# # # # ## Dimension filters for Metric (optional) # # [[inputs.cloudwatch.metrics.dimensions]] # # name = 'LoadBalancerName' @@ -585,7 +585,7 @@ # ## Files to gather stats about. # ## These accept standard unix glob matching rules, but with the addition of # ## ** as a "super asterisk". See https://github.com/gobwas/glob. -# ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] +# files = ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] # ## If true, read the entire file and calculate an md5 checksum. # md5 = false diff --git a/plugins/inputs/filestat/filestat.go b/plugins/inputs/filestat/filestat.go index c0b70c878..e280cbb1b 100644 --- a/plugins/inputs/filestat/filestat.go +++ b/plugins/inputs/filestat/filestat.go @@ -20,7 +20,7 @@ const sampleConfig = ` ## Files to gather stats about. ## These accept standard unix glob matching rules, but with the addition of ## ** as a "super asterisk". See https://github.com/gobwas/glob. - ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] + files = ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] ## If true, read the entire file and calculate an md5 checksum. md5 = false ` From 93f5b8cc4aaad22e8c2195a24c97884e5a5e5787 Mon Sep 17 00:00:00 2001 From: zensqlmonitor Date: Sat, 23 Apr 2016 09:14:04 +0200 Subject: [PATCH 46/84] Fix datatype conversion --- plugins/inputs/sqlserver/sqlserver.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/inputs/sqlserver/sqlserver.go b/plugins/inputs/sqlserver/sqlserver.go index 58d61705f..f91e66c24 100644 --- a/plugins/inputs/sqlserver/sqlserver.go +++ b/plugins/inputs/sqlserver/sqlserver.go @@ -1052,7 +1052,7 @@ SELECT When 1073874176 Then IsNull(Cast(cc.cntr_value - pc.cntr_value as Money) / NullIf(cbc.cntr_value - pbc.cntr_value, 0), 0) -- Avg When 272696320 Then IsNull(Cast(cc.cntr_value - pc.cntr_value as Money) / NullIf(cbc.cntr_value - pbc.cntr_value, 0), 0) -- Avg/sec When 1073939712 Then cc.cntr_value - pc.cntr_value -- Base - Else cc.cntr_value End as int) + Else cc.cntr_value End as bigint) --, currentvalue= CAST(cc.cntr_value as bigint) FROM #CCounters cc INNER JOIN #PCounters pc On cc.object_name = pc.object_name From 1751c35f6941d60f0205fea78f15bb50cf74a164 Mon Sep 17 00:00:00 2001 From: zensqlmonitor Date: Sat, 23 Apr 2016 09:18:08 +0200 Subject: [PATCH 47/84] SQL Server input. Fix datatype conversion. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c49d2b72c..c2dded62e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1044](https://github.com/influxdata/telegraf/pull/1044): Fix SNMP OID possible collisions. Thanks @relip - [#1022](https://github.com/influxdata/telegraf/issues/1022): Dont error deb/rpm install on systemd errors. - [#1078](https://github.com/influxdata/telegraf/issues/1078): Use default AWS credential chain. +- [#1070](https://github.com/influxdata/telegraf/issues/1070): SQL Server input. Fix datatype conversion. ## v0.12.1 [2016-04-14] From d3a25e4dc1865f1e229d1629c68fcd5dc24d52a5 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Sat, 23 Apr 2016 11:42:28 -0600 Subject: [PATCH 48/84] globpath refactor into pkg separate from filestat --- internal/globpath/globpath.go | 93 ++++++++++++++++++++++++ internal/globpath/globpath_test.go | 49 +++++++++++++ internal/globpath/testdata/log1.log | 0 internal/globpath/testdata/log2.log | 0 internal/globpath/testdata/test.conf | 5 ++ plugins/inputs/filestat/filestat.go | 76 ++----------------- plugins/inputs/filestat/filestat_test.go | 17 ----- 7 files changed, 153 insertions(+), 87 deletions(-) create mode 100644 internal/globpath/globpath.go create mode 100644 internal/globpath/globpath_test.go create mode 100644 internal/globpath/testdata/log1.log create mode 100644 internal/globpath/testdata/log2.log create mode 100644 internal/globpath/testdata/test.conf diff --git a/internal/globpath/globpath.go b/internal/globpath/globpath.go new file mode 100644 index 000000000..729754063 --- /dev/null +++ b/internal/globpath/globpath.go @@ -0,0 +1,93 @@ +package globpath + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/gobwas/glob" +) + +var sepStr = fmt.Sprintf("%v", string(os.PathSeparator)) + +type GlobPath struct { + path string + hasMeta bool + g glob.Glob + root string +} + +func Compile(path string) (*GlobPath, error) { + out := GlobPath{ + hasMeta: hasMeta(path), + path: path, + } + + // if there are no glob meta characters in the path, don't bother compiling + // a glob object or finding the root directory. (see short-circuit in Match) + if !out.hasMeta { + return &out, nil + } + + var err error + if out.g, err = glob.Compile(path, os.PathSeparator); err != nil { + return nil, err + } + // Get the root directory for this filepath + out.root = findRootDir(path) + return &out, nil +} + +func (g *GlobPath) Match() []string { + if !g.hasMeta { + return []string{g.path} + } + return walkFilePath(g.root, g.g) +} + +// walk the filepath from the given root and return a list of files that match +// the given glob. +func walkFilePath(root string, g glob.Glob) []string { + matchedFiles := []string{} + walkfn := func(path string, _ os.FileInfo, _ error) error { + if g.Match(path) { + matchedFiles = append(matchedFiles, path) + } + return nil + } + filepath.Walk(root, walkfn) + return matchedFiles +} + +// find the root dir of the given path (could include globs). +// ie: +// /var/log/telegraf.conf -> /var/log +// /home/** -> /home +// /home/*/** -> /home +// /lib/share/*/*/**.txt -> /lib/share +func findRootDir(path string) string { + pathItems := strings.Split(path, sepStr) + out := sepStr + for i, item := range pathItems { + if i == len(pathItems)-1 { + break + } + if item == "" { + continue + } + if hasMeta(item) { + break + } + out += item + sepStr + } + if out != "/" { + out = strings.TrimSuffix(out, "/") + } + return out +} + +// hasMeta reports whether path contains any magic glob characters. +func hasMeta(path string) bool { + return strings.IndexAny(path, "*?[") >= 0 +} diff --git a/internal/globpath/globpath_test.go b/internal/globpath/globpath_test.go new file mode 100644 index 000000000..9c3fc16e0 --- /dev/null +++ b/internal/globpath/globpath_test.go @@ -0,0 +1,49 @@ +package globpath + +import ( + "runtime" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCompileAndMatch(t *testing.T) { + dir := getTestdataDir() + g1, err := Compile(dir + "/**") + require.NoError(t, err) + g2, err := Compile(dir + "/*.log") + require.NoError(t, err) + g3, err := Compile(dir + "/log1.log") + require.NoError(t, err) + + matches := g1.Match() + assert.Len(t, matches, 3) + matches = g2.Match() + assert.Len(t, matches, 2) + matches = g3.Match() + assert.Len(t, matches, 1) +} + +func TestFindRootDir(t *testing.T) { + tests := []struct { + input string + output string + }{ + {"/var/log/telegraf.conf", "/var/log"}, + {"/home/**", "/home"}, + {"/home/*/**", "/home"}, + {"/lib/share/*/*/**.txt", "/lib/share"}, + } + + for _, test := range tests { + actual := findRootDir(test.input) + assert.Equal(t, test.output, actual) + } +} + +func getTestdataDir() string { + _, filename, _, _ := runtime.Caller(1) + return strings.Replace(filename, "globpath_test.go", "testdata", 1) +} diff --git a/internal/globpath/testdata/log1.log b/internal/globpath/testdata/log1.log new file mode 100644 index 000000000..e69de29bb diff --git a/internal/globpath/testdata/log2.log b/internal/globpath/testdata/log2.log new file mode 100644 index 000000000..e69de29bb diff --git a/internal/globpath/testdata/test.conf b/internal/globpath/testdata/test.conf new file mode 100644 index 000000000..a06111991 --- /dev/null +++ b/internal/globpath/testdata/test.conf @@ -0,0 +1,5 @@ +# this is a fake testing config file +# for testing the filestat plugin + +option1 = "foo" +option2 = "bar" diff --git a/plugins/inputs/filestat/filestat.go b/plugins/inputs/filestat/filestat.go index e280cbb1b..6e61a0a5f 100644 --- a/plugins/inputs/filestat/filestat.go +++ b/plugins/inputs/filestat/filestat.go @@ -5,17 +5,12 @@ import ( "fmt" "io" "os" - "path/filepath" - "strings" - - "github.com/gobwas/glob" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/globpath" "github.com/influxdata/telegraf/plugins/inputs" ) -var sepStr = fmt.Sprintf("%v", string(os.PathSeparator)) - const sampleConfig = ` ## Files to gather stats about. ## These accept standard unix glob matching rules, but with the addition of @@ -29,16 +24,13 @@ type FileStat struct { Md5 bool Files []string - // maps full file paths to glob obj - globs map[string]glob.Glob - // maps full file paths to their root dir - roots map[string]string + // maps full file paths to globmatch obj + globs map[string]*globpath.GlobPath } func NewFileStat() *FileStat { return &FileStat{ - globs: make(map[string]glob.Glob), - roots: make(map[string]string), + globs: make(map[string]*globpath.GlobPath), } } @@ -56,27 +48,14 @@ func (f *FileStat) Gather(acc telegraf.Accumulator) error { // Get the compiled glob object for this filepath g, ok := f.globs[filepath] if !ok { - if g, err = glob.Compile(filepath, os.PathSeparator); err != nil { + if g, err = globpath.Compile(filepath); err != nil { errS += err.Error() + " " continue } f.globs[filepath] = g } - // Get the root directory for this filepath - root, ok := f.roots[filepath] - if !ok { - root = findRootDir(filepath) - f.roots[filepath] = root - } - var matches []string - // Do not walk file tree if we don't have to. - if !hasMeta(filepath) { - matches = []string{filepath} - } else { - matches = walkFilePath(f.roots[filepath], f.globs[filepath]) - } - for _, file := range matches { + for _, file := range g.Match() { tags := map[string]string{ "file": file, } @@ -118,20 +97,6 @@ func (f *FileStat) Gather(acc telegraf.Accumulator) error { return nil } -// walk the filepath from the given root and return a list of files that match -// the given glob. -func walkFilePath(root string, g glob.Glob) []string { - matchedFiles := []string{} - walkfn := func(path string, _ os.FileInfo, _ error) error { - if g.Match(path) { - matchedFiles = append(matchedFiles, path) - } - return nil - } - filepath.Walk(root, walkfn) - return matchedFiles -} - // Read given file and calculate an md5 hash. func getMd5(file string) (string, error) { of, err := os.Open(file) @@ -149,35 +114,6 @@ func getMd5(file string) (string, error) { return fmt.Sprintf("%x", hash.Sum(nil)), nil } -// find the root dir of the given path (could include globs). -// ie: -// /var/log/telegraf.conf -> /var/log/ -// /home/** -> /home/ -// /home/*/** -> /home/ -// /lib/share/*/*/**.txt -> /lib/share/ -func findRootDir(path string) string { - pathItems := strings.Split(path, sepStr) - outpath := sepStr - for i, item := range pathItems { - if i == len(pathItems)-1 { - break - } - if item == "" { - continue - } - if hasMeta(item) { - break - } - outpath += item + sepStr - } - return outpath -} - -// hasMeta reports whether path contains any magic glob characters. -func hasMeta(path string) bool { - return strings.IndexAny(path, "*?[") >= 0 -} - func init() { inputs.Add("filestat", func() telegraf.Input { return NewFileStat() diff --git a/plugins/inputs/filestat/filestat_test.go b/plugins/inputs/filestat/filestat_test.go index f8977c920..a404869d9 100644 --- a/plugins/inputs/filestat/filestat_test.go +++ b/plugins/inputs/filestat/filestat_test.go @@ -164,23 +164,6 @@ func TestGatherSuperAsterisk(t *testing.T) { acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3) } -func TestFindRootDir(t *testing.T) { - tests := []struct { - input string - output string - }{ - {"/var/log/telegraf.conf", "/var/log/"}, - {"/home/**", "/home/"}, - {"/home/*/**", "/home/"}, - {"/lib/share/*/*/**.txt", "/lib/share/"}, - } - - for _, test := range tests { - actual := findRootDir(test.input) - assert.Equal(t, test.output, actual) - } -} - func TestGetMd5(t *testing.T) { dir := getTestdataDir() md5, err := getMd5(dir + "test.conf") From 07728d742529c96dfcc4f77a3123cfca9b283938 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Sun, 24 Apr 2016 14:37:44 -0600 Subject: [PATCH 49/84] Refactor globpath pkg to return a map this is so that we don't call os.Stat twice for every file matched by Match(). Also changing the behavior to _not_ return the name of a file that doesn't exist if it's not a glob. --- internal/globpath/globpath.go | 17 +++++++++----- internal/globpath/globpath_test.go | 13 +++++++++++ plugins/inputs/filestat/filestat.go | 36 ++++++++++++++--------------- 3 files changed, 41 insertions(+), 25 deletions(-) diff --git a/internal/globpath/globpath.go b/internal/globpath/globpath.go index 729754063..6755e69b2 100644 --- a/internal/globpath/globpath.go +++ b/internal/globpath/globpath.go @@ -39,20 +39,25 @@ func Compile(path string) (*GlobPath, error) { return &out, nil } -func (g *GlobPath) Match() []string { +func (g *GlobPath) Match() map[string]os.FileInfo { if !g.hasMeta { - return []string{g.path} + out := make(map[string]os.FileInfo) + info, err := os.Stat(g.path) + if !os.IsNotExist(err) { + out[g.path] = info + } + return out } return walkFilePath(g.root, g.g) } // walk the filepath from the given root and return a list of files that match // the given glob. -func walkFilePath(root string, g glob.Glob) []string { - matchedFiles := []string{} - walkfn := func(path string, _ os.FileInfo, _ error) error { +func walkFilePath(root string, g glob.Glob) map[string]os.FileInfo { + matchedFiles := make(map[string]os.FileInfo) + walkfn := func(path string, info os.FileInfo, _ error) error { if g.Match(path) { - matchedFiles = append(matchedFiles, path) + matchedFiles[path] = info } return nil } diff --git a/internal/globpath/globpath_test.go b/internal/globpath/globpath_test.go index 9c3fc16e0..db72c94f4 100644 --- a/internal/globpath/globpath_test.go +++ b/internal/globpath/globpath_test.go @@ -11,12 +11,21 @@ import ( func TestCompileAndMatch(t *testing.T) { dir := getTestdataDir() + // test super asterisk g1, err := Compile(dir + "/**") require.NoError(t, err) + // test single asterisk g2, err := Compile(dir + "/*.log") require.NoError(t, err) + // test no meta characters (file exists) g3, err := Compile(dir + "/log1.log") require.NoError(t, err) + // test file that doesn't exist + g4, err := Compile(dir + "/i_dont_exist.log") + require.NoError(t, err) + // test super asterisk that doesn't exist + g5, err := Compile(dir + "/dir_doesnt_exist/**") + require.NoError(t, err) matches := g1.Match() assert.Len(t, matches, 3) @@ -24,6 +33,10 @@ func TestCompileAndMatch(t *testing.T) { assert.Len(t, matches, 2) matches = g3.Match() assert.Len(t, matches, 1) + matches = g4.Match() + assert.Len(t, matches, 0) + matches = g5.Match() + assert.Len(t, matches, 0) } func TestFindRootDir(t *testing.T) { diff --git a/plugins/inputs/filestat/filestat.go b/plugins/inputs/filestat/filestat.go index 6e61a0a5f..831d37444 100644 --- a/plugins/inputs/filestat/filestat.go +++ b/plugins/inputs/filestat/filestat.go @@ -55,31 +55,29 @@ func (f *FileStat) Gather(acc telegraf.Accumulator) error { f.globs[filepath] = g } - for _, file := range g.Match() { + files := g.Match() + if len(files) == 0 { + acc.AddFields("filestat", + map[string]interface{}{ + "exists": int64(0), + }, + map[string]string{ + "file": filepath, + }) + continue + } + + for fileName, fileInfo := range files { tags := map[string]string{ - "file": file, + "file": fileName, } fields := map[string]interface{}{ - "exists": int64(0), + "exists": int64(1), + "size_bytes": fileInfo.Size(), } - // Get file stats - fileInfo, err := os.Stat(file) - if os.IsNotExist(err) { - // file doesn't exist, so move on to the next - acc.AddFields("filestat", fields, tags) - continue - } - if err != nil { - errS += err.Error() + " " - continue - } - - // file exists and no errors encountered - fields["exists"] = int64(1) - fields["size_bytes"] = fileInfo.Size() if f.Md5 { - md5, err := getMd5(file) + md5, err := getMd5(fileName) if err != nil { errS += err.Error() + " " } else { From c159460b2cecdb382f4e5ebd9ad2aa5c4f7c4d3e Mon Sep 17 00:00:00 2001 From: Pierre Fersing Date: Sun, 24 Apr 2016 12:43:54 +0200 Subject: [PATCH 50/84] Refactor running_output buffering closes #1087 --- docs/CONFIGURATION.md | 4 ++ etc/telegraf.conf | 8 ++- internal/config/config.go | 19 +++++- internal/models/running_output.go | 90 +++++++++++++++----------- internal/models/running_output_test.go | 17 +++-- 5 files changed, 88 insertions(+), 50 deletions(-) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 9f783f87a..3e4e62adc 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -38,8 +38,12 @@ config. * **interval**: Default data collection interval for all inputs * **round_interval**: Rounds collection interval to 'interval' ie, if interval="10s" then always collect on :00, :10, :20, etc. +* **metric_batch_size**: Telegraf will send metrics to output in batch of at +most metric_batch_size metrics. * **metric_buffer_limit**: Telegraf will cache metric_buffer_limit metrics for each output, and will flush this buffer on a successful write. +This should be a multiple of metric_batch_size and could not be less +than 2 times metric_batch_size. * **collection_jitter**: Collection jitter is used to jitter the collection by a random amount. Each plugin will sleep for a random time within jitter before collecting. diff --git a/etc/telegraf.conf b/etc/telegraf.conf index dc41eaa96..46b422ffa 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -30,9 +30,13 @@ ## ie, if interval="10s" then always collect on :00, :10, :20, etc. round_interval = true + ## Telegraf will send metrics to output in batch of at + ## most metric_batch_size metrics. + metric_batch_size = 1000 ## Telegraf will cache metric_buffer_limit metrics for each output, and will - ## flush this buffer on a successful write. - metric_buffer_limit = 1000 + ## flush this buffer on a successful write. This should be a multiple of + ## metric_batch_size and could not be less than 2 times metric_batch_size + metric_buffer_limit = 10000 ## Flush the buffer whenever full, regardless of flush_interval. flush_buffer_when_full = true diff --git a/internal/config/config.go b/internal/config/config.go index 5d0836964..fcebd24e6 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -93,9 +93,15 @@ type AgentConfig struct { // ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s FlushJitter internal.Duration + // MetricBatchSize is the maximum number of metrics that is wrote to an + // output plugin in one call. + MetricBatchSize int + // MetricBufferLimit is the max number of metrics that each output plugin // will cache. The buffer is cleared when a successful write occurs. When - // full, the oldest metrics will be overwritten. + // full, the oldest metrics will be overwritten. This number should be a + // multiple of MetricBatchSize. Due to current implementation, this could + // not be less than 2 times MetricBatchSize. MetricBufferLimit int // FlushBufferWhenFull tells Telegraf to flush the metric buffer whenever @@ -182,9 +188,13 @@ var header = `# Telegraf Configuration ## ie, if interval="10s" then always collect on :00, :10, :20, etc. round_interval = true + ## Telegraf will send metrics to output in batch of at + ## most metric_batch_size metrics. + metric_batch_size = 1000 ## Telegraf will cache metric_buffer_limit metrics for each output, and will - ## flush this buffer on a successful write. - metric_buffer_limit = 1000 + ## flush this buffer on a successful write. This should be a multiple of + ## metric_batch_size and could not be less than 2 times metric_batch_size + metric_buffer_limit = 10000 ## Flush the buffer whenever full, regardless of flush_interval. flush_buffer_when_full = true @@ -526,6 +536,9 @@ func (c *Config) addOutput(name string, table *ast.Table) error { } ro := internal_models.NewRunningOutput(name, output, outputConfig) + if c.Agent.MetricBatchSize > 0 { + ro.MetricBatchSize = c.Agent.MetricBatchSize + } if c.Agent.MetricBufferLimit > 0 { ro.MetricBufferLimit = c.Agent.MetricBufferLimit } diff --git a/internal/models/running_output.go b/internal/models/running_output.go index c76dffcdf..91b200799 100644 --- a/internal/models/running_output.go +++ b/internal/models/running_output.go @@ -9,25 +9,32 @@ import ( ) const ( - // Default number of metrics kept between flushes. - DEFAULT_METRIC_BUFFER_LIMIT = 1000 - // Limit how many full metric buffers are kept due to failed writes. - FULL_METRIC_BUFFERS_LIMIT = 100 + // Default size of metrics batch size. + DEFAULT_METRIC_BATCH_SIZE = 1000 + + // Default number of metrics kept. It should be a multiple of batch size. + DEFAULT_METRIC_BUFFER_LIMIT = 10000 ) +// tmpmetrics point to batch of metrics ready to be wrote to output. +// readI point to the oldest batch of metrics (the first to sent to output). It +// may point to nil value if tmpmetrics is empty. +// writeI point to the next slot to buffer a batch of metrics is output fail to +// write. type RunningOutput struct { Name string Output telegraf.Output Config *OutputConfig Quiet bool MetricBufferLimit int + MetricBatchSize int FlushBufferWhenFull bool metrics []telegraf.Metric - tmpmetrics map[int][]telegraf.Metric - overwriteI int - mapI int + tmpmetrics []([]telegraf.Metric) + writeI int + readI int sync.Mutex } @@ -40,10 +47,10 @@ func NewRunningOutput( ro := &RunningOutput{ Name: name, metrics: make([]telegraf.Metric, 0), - tmpmetrics: make(map[int][]telegraf.Metric), Output: output, Config: conf, MetricBufferLimit: DEFAULT_METRIC_BUFFER_LIMIT, + MetricBatchSize: DEFAULT_METRIC_BATCH_SIZE, } return ro } @@ -59,6 +66,17 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) { ro.Lock() defer ro.Unlock() + if ro.tmpmetrics == nil { + size := ro.MetricBufferLimit / ro.MetricBatchSize + // ro.metrics already contains one batch + size = size - 1 + + if size < 1 { + size = 1 + } + ro.tmpmetrics = make([]([]telegraf.Metric), size) + } + // Filter any tagexclude/taginclude parameters before adding metric if len(ro.Config.Filter.TagExclude) != 0 || len(ro.Config.Filter.TagInclude) != 0 { // In order to filter out tags, we need to create a new metric, since @@ -72,40 +90,32 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) { metric, _ = telegraf.NewMetric(name, tags, fields, t) } - if len(ro.metrics) < ro.MetricBufferLimit { + if len(ro.metrics) < ro.MetricBatchSize { ro.metrics = append(ro.metrics, metric) } else { + flushSuccess := true if ro.FlushBufferWhenFull { - ro.metrics = append(ro.metrics, metric) - tmpmetrics := make([]telegraf.Metric, len(ro.metrics)) - copy(tmpmetrics, ro.metrics) - ro.metrics = make([]telegraf.Metric, 0) - err := ro.write(tmpmetrics) + err := ro.write(ro.metrics) if err != nil { log.Printf("ERROR writing full metric buffer to output %s, %s", ro.Name, err) - if len(ro.tmpmetrics) == FULL_METRIC_BUFFERS_LIMIT { - ro.mapI = 0 - // overwrite one - ro.tmpmetrics[ro.mapI] = tmpmetrics - ro.mapI++ - } else { - ro.tmpmetrics[ro.mapI] = tmpmetrics - ro.mapI++ - } + flushSuccess = false } } else { - if ro.overwriteI == 0 { + flushSuccess = false + } + if !flushSuccess { + if ro.tmpmetrics[ro.writeI] != nil && ro.writeI == ro.readI { log.Printf("WARNING: overwriting cached metrics, you may want to " + "increase the metric_buffer_limit setting in your [agent] " + "config if you do not wish to overwrite metrics.\n") + ro.readI = (ro.readI + 1) % cap(ro.tmpmetrics) } - if ro.overwriteI == len(ro.metrics) { - ro.overwriteI = 0 - } - ro.metrics[ro.overwriteI] = metric - ro.overwriteI++ + ro.tmpmetrics[ro.writeI] = ro.metrics + ro.writeI = (ro.writeI + 1) % cap(ro.tmpmetrics) } + ro.metrics = make([]telegraf.Metric, 0) + ro.metrics = append(ro.metrics, metric) } } @@ -113,21 +123,23 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) { func (ro *RunningOutput) Write() error { ro.Lock() defer ro.Unlock() + + // Write any cached metric buffers before, as those metrics are the + // oldest + for ro.tmpmetrics[ro.readI] != nil { + if err := ro.write(ro.tmpmetrics[ro.readI]); err != nil { + return err + } else { + ro.tmpmetrics[ro.readI] = nil + ro.readI = (ro.readI + 1) % cap(ro.tmpmetrics) + } + } + err := ro.write(ro.metrics) if err != nil { return err } else { ro.metrics = make([]telegraf.Metric, 0) - ro.overwriteI = 0 - } - - // Write any cached metric buffers that failed previously - for i, tmpmetrics := range ro.tmpmetrics { - if err := ro.write(tmpmetrics); err != nil { - return err - } else { - delete(ro.tmpmetrics, i) - } } return nil diff --git a/internal/models/running_output_test.go b/internal/models/running_output_test.go index 9607f2417..ca7034b61 100644 --- a/internal/models/running_output_test.go +++ b/internal/models/running_output_test.go @@ -193,7 +193,7 @@ func TestRunningOutputDefault(t *testing.T) { assert.Len(t, m.Metrics(), 10) } -// Test that the first metric gets overwritten if there is a buffer overflow. +// Test that the first metrics batch gets overwritten if there is a buffer overflow. func TestRunningOutputOverwrite(t *testing.T) { conf := &OutputConfig{ Filter: Filter{ @@ -203,6 +203,7 @@ func TestRunningOutputOverwrite(t *testing.T) { m := &mockOutput{} ro := NewRunningOutput("test", m, conf) + ro.MetricBatchSize = 1 ro.MetricBufferLimit = 4 for _, metric := range first5 { @@ -236,6 +237,7 @@ func TestRunningOutputMultiOverwrite(t *testing.T) { m := &mockOutput{} ro := NewRunningOutput("test", m, conf) + ro.MetricBatchSize = 1 ro.MetricBufferLimit = 3 for _, metric := range first5 { @@ -274,7 +276,8 @@ func TestRunningOutputFlushWhenFull(t *testing.T) { m := &mockOutput{} ro := NewRunningOutput("test", m, conf) ro.FlushBufferWhenFull = true - ro.MetricBufferLimit = 5 + ro.MetricBatchSize = 5 + ro.MetricBufferLimit = 10 // Fill buffer to limit for _, metric := range first5 { @@ -286,7 +289,7 @@ func TestRunningOutputFlushWhenFull(t *testing.T) { // add one more metric ro.AddMetric(next5[0]) // now it flushed - assert.Len(t, m.Metrics(), 6) + assert.Len(t, m.Metrics(), 5) // add one more metric and write it manually ro.AddMetric(next5[1]) @@ -307,7 +310,8 @@ func TestRunningOutputMultiFlushWhenFull(t *testing.T) { m := &mockOutput{} ro := NewRunningOutput("test", m, conf) ro.FlushBufferWhenFull = true - ro.MetricBufferLimit = 4 + ro.MetricBatchSize = 4 + ro.MetricBufferLimit = 12 // Fill buffer past limit twive for _, metric := range first5 { @@ -317,7 +321,7 @@ func TestRunningOutputMultiFlushWhenFull(t *testing.T) { ro.AddMetric(metric) } // flushed twice - assert.Len(t, m.Metrics(), 10) + assert.Len(t, m.Metrics(), 8) } func TestRunningOutputWriteFail(t *testing.T) { @@ -331,7 +335,8 @@ func TestRunningOutputWriteFail(t *testing.T) { m.failWrite = true ro := NewRunningOutput("test", m, conf) ro.FlushBufferWhenFull = true - ro.MetricBufferLimit = 4 + ro.MetricBatchSize = 4 + ro.MetricBufferLimit = 12 // Fill buffer past limit twice for _, metric := range first5 { From 01b01c5969b934a5d2a5259ca42d65ec17ebab93 Mon Sep 17 00:00:00 2001 From: Hannu Valtonen Date: Sun, 24 Apr 2016 00:40:10 +0300 Subject: [PATCH 51/84] postgresql_extensible: Censor also other security related conn params While these aren't quite as sensitive as passwords, they do tend to be long filesystem paths that shouldn't be reported along with every measurement. --- plugins/inputs/postgresql_extensible/postgresql_extensible.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/inputs/postgresql_extensible/postgresql_extensible.go b/plugins/inputs/postgresql_extensible/postgresql_extensible.go index 4ebf752ff..341811870 100644 --- a/plugins/inputs/postgresql_extensible/postgresql_extensible.go +++ b/plugins/inputs/postgresql_extensible/postgresql_extensible.go @@ -184,7 +184,7 @@ type scanner interface { Scan(dest ...interface{}) error } -var passwordKVMatcher, _ = regexp.Compile("password=\\S+ ?") +var KVMatcher, _ = regexp.Compile("(password|sslcert|sslkey|sslmode|sslrootcert)=\\S+ ?") func (p *Postgresql) SanitizedAddress() (_ string, err error) { var canonicalizedAddress string @@ -196,7 +196,7 @@ func (p *Postgresql) SanitizedAddress() (_ string, err error) { } else { canonicalizedAddress = p.Address } - p.sanitizedAddress = passwordKVMatcher.ReplaceAllString(canonicalizedAddress, "") + p.sanitizedAddress = KVMatcher.ReplaceAllString(canonicalizedAddress, "") return p.sanitizedAddress, err } From ce7d852d2234f3979e68ce69c7a4600d960c6ccb Mon Sep 17 00:00:00 2001 From: Mika Eloranta Date: Mon, 25 Apr 2016 15:10:13 +0300 Subject: [PATCH 52/84] postgresql_extensible: configurable measurement name The output measurement name can be configured per query. --- .../postgresql_extensible.go | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/plugins/inputs/postgresql_extensible/postgresql_extensible.go b/plugins/inputs/postgresql_extensible/postgresql_extensible.go index 341811870..cd6fa8cc4 100644 --- a/plugins/inputs/postgresql_extensible/postgresql_extensible.go +++ b/plugins/inputs/postgresql_extensible/postgresql_extensible.go @@ -21,18 +21,20 @@ type Postgresql struct { AdditionalTags []string sanitizedAddress string Query []struct { - Sqlquery string - Version int - Withdbname bool - Tagvalue string + Sqlquery string + Version int + Withdbname bool + Tagvalue string + Measurement string } } type query []struct { - Sqlquery string - Version int - Withdbname bool - Tagvalue string + Sqlquery string + Version int + Withdbname bool + Tagvalue string + Measurement string } var ignoredColumns = map[string]bool{"datid": true, "datname": true, "stats_reset": true} @@ -65,24 +67,28 @@ var sampleConfig = ` ## because the databases variable was set to ['postgres', 'pgbench' ] and the ## withdbname was true. Be careful that if the withdbname is set to false you ## don't have to define the where clause (aka with the dbname) the tagvalue - ## field is used to define custom tags (separated by comas) + ## field is used to define custom tags (separated by commas) + ## The optional "measurement" value can be used to override the default + ## output measurement name ("postgresql"). # ## Structure : ## [[inputs.postgresql_extensible.query]] ## sqlquery string ## version string ## withdbname boolean - ## tagvalue string (coma separated) + ## tagvalue string (comma separated) + ## measurement string [[inputs.postgresql_extensible.query]] sqlquery="SELECT * FROM pg_stat_database" version=901 withdbname=false tagvalue="" + measurement="" [[inputs.postgresql_extensible.query]] sqlquery="SELECT * FROM pg_stat_bgwriter" version=901 withdbname=false - tagvalue="" + tagvalue="postgresql.stats" ` func (p *Postgresql) SampleConfig() string { @@ -106,6 +112,7 @@ func (p *Postgresql) Gather(acc telegraf.Accumulator) error { var db_version int var query string var tag_value string + var meas_name string if p.Address == "" || p.Address == "localhost" { p.Address = localhost @@ -131,6 +138,11 @@ func (p *Postgresql) Gather(acc telegraf.Accumulator) error { for i := range p.Query { sql_query = p.Query[i].Sqlquery tag_value = p.Query[i].Tagvalue + if p.Query[i].Measurement != "" { + meas_name = p.Query[i].Measurement + } else { + meas_name = "postgresql" + } if p.Query[i].Withdbname { if len(p.Databases) != 0 { @@ -170,7 +182,7 @@ func (p *Postgresql) Gather(acc telegraf.Accumulator) error { } for rows.Next() { - err = p.accRow(rows, acc) + err = p.accRow(meas_name, rows, acc) if err != nil { return err } @@ -201,7 +213,7 @@ func (p *Postgresql) SanitizedAddress() (_ string, err error) { return p.sanitizedAddress, err } -func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error { +func (p *Postgresql) accRow(meas_name string, row scanner, acc telegraf.Accumulator) error { var columnVars []interface{} var dbname bytes.Buffer @@ -267,7 +279,7 @@ func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error { } } } - acc.AddFields("postgresql", fields, tags) + acc.AddFields(meas_name, fields, tags) return nil } From 1a7340bb0241b57023ecb672af19019cb6bb22e4 Mon Sep 17 00:00:00 2001 From: Mika Eloranta Date: Mon, 25 Apr 2016 16:27:23 +0300 Subject: [PATCH 53/84] postgresql_extensible: fix nil field values nil field values would break the output influxdb line procotol. Skip them from the output. --- .../inputs/postgresql_extensible/postgresql_extensible.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/plugins/inputs/postgresql_extensible/postgresql_extensible.go b/plugins/inputs/postgresql_extensible/postgresql_extensible.go index cd6fa8cc4..710e537c3 100644 --- a/plugins/inputs/postgresql_extensible/postgresql_extensible.go +++ b/plugins/inputs/postgresql_extensible/postgresql_extensible.go @@ -4,6 +4,7 @@ import ( "bytes" "database/sql" "fmt" + "log" "regexp" "strings" @@ -259,9 +260,11 @@ func (p *Postgresql) accRow(meas_name string, row scanner, acc telegraf.Accumula var isATag int fields := make(map[string]interface{}) for col, val := range columnMap { + if acc.Debug() { + log.Printf("postgresql_extensible: column: %s = %T: %s\n", col, *val, *val) + } _, ignore := ignoredColumns[col] - //if !ignore && *val != "" { - if !ignore { + if !ignore && *val != nil { isATag = 0 for tag := range p.AdditionalTags { if col == p.AdditionalTags[tag] { From 0367a39e1ff9fdd0ad6186d237fe26b3ef8b8ce4 Mon Sep 17 00:00:00 2001 From: Mika Eloranta Date: Mon, 25 Apr 2016 18:09:35 +0300 Subject: [PATCH 54/84] postgresql_extensible: custom address in metrics output Allow overriding the the metrics "server" tag with the specified value. Can be used to give a more user-friendly value for the server name. closes #1093 --- .../postgresql_extensible/postgresql_extensible.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/plugins/inputs/postgresql_extensible/postgresql_extensible.go b/plugins/inputs/postgresql_extensible/postgresql_extensible.go index 710e537c3..75bc6b936 100644 --- a/plugins/inputs/postgresql_extensible/postgresql_extensible.go +++ b/plugins/inputs/postgresql_extensible/postgresql_extensible.go @@ -16,6 +16,7 @@ import ( type Postgresql struct { Address string + Outputaddress string Databases []string OrderedColumns []string AllColumns []string @@ -58,6 +59,11 @@ var sampleConfig = ` ## databases are gathered. ## databases = ["app_production", "testing"] # + # outputaddress = "db01" + ## A custom name for the database that will be used as the "server" tag in the + ## measurement output. If not specified, a default one generated from + ## the connection address is used. + # ## Define the toml config where the sql queries are stored ## New queries can be added, if the withdbname is set to true and there is no ## databases defined in the 'databases field', the sql query is ended by a @@ -200,6 +206,9 @@ type scanner interface { var KVMatcher, _ = regexp.Compile("(password|sslcert|sslkey|sslmode|sslrootcert)=\\S+ ?") func (p *Postgresql) SanitizedAddress() (_ string, err error) { + if p.Outputaddress != "" { + return p.Outputaddress, nil + } var canonicalizedAddress string if strings.HasPrefix(p.Address, "postgres://") || strings.HasPrefix(p.Address, "postgresql://") { canonicalizedAddress, err = pq.ParseURL(p.Address) From 249a860c6fa966003152eb72e9a8102ab4454957 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 25 Apr 2016 19:57:38 -0600 Subject: [PATCH 55/84] procstat: fix newlines in tags --- plugins/inputs/procstat/procstat.go | 2 +- plugins/inputs/procstat/spec_processor.go | 114 +++++++--------------- 2 files changed, 34 insertions(+), 82 deletions(-) diff --git a/plugins/inputs/procstat/procstat.go b/plugins/inputs/procstat/procstat.go index fbacb9069..2a55f8618 100644 --- a/plugins/inputs/procstat/procstat.go +++ b/plugins/inputs/procstat/procstat.go @@ -136,7 +136,7 @@ func (p *Procstat) pidsFromFile() ([]int32, error) { out = append(out, int32(pid)) p.tagmap[int32(pid)] = map[string]string{ "pidfile": p.PidFile, - "pid": string(pidString), + "pid": strings.TrimSpace(string(pidString)), } } } diff --git a/plugins/inputs/procstat/spec_processor.go b/plugins/inputs/procstat/spec_processor.go index d76538ef8..d43b8ecc4 100644 --- a/plugins/inputs/procstat/spec_processor.go +++ b/plugins/inputs/procstat/spec_processor.go @@ -1,7 +1,6 @@ package procstat import ( - "fmt" "time" "github.com/shirou/gopsutil/process" @@ -17,21 +16,6 @@ type SpecProcessor struct { proc *process.Process } -func (p *SpecProcessor) add(metric string, value interface{}) { - var mname string - if p.Prefix == "" { - mname = metric - } else { - mname = p.Prefix + "_" + metric - } - p.fields[mname] = value -} - -func (p *SpecProcessor) flush() { - p.acc.AddFields("procstat", p.fields, p.tags) - p.fields = make(map[string]interface{}) -} - func NewSpecProcessor( prefix string, acc telegraf.Accumulator, @@ -51,90 +35,58 @@ func NewSpecProcessor( } func (p *SpecProcessor) pushMetrics() { - p.pushNThreadsStats() - p.pushFDStats() - p.pushCtxStats() - p.pushIOStats() - p.pushCPUStats() - p.pushMemoryStats() - p.flush() -} + fields := map[string]interface{}{} -func (p *SpecProcessor) pushNThreadsStats() error { numThreads, err := p.proc.NumThreads() - if err != nil { - return fmt.Errorf("NumThreads error: %s\n", err) + if err == nil { + fields["num_threads"] = numThreads } - p.add("num_threads", numThreads) - return nil -} -func (p *SpecProcessor) pushFDStats() error { fds, err := p.proc.NumFDs() - if err != nil { - return fmt.Errorf("NumFD error: %s\n", err) + if err == nil { + fields["num_fds"] = fds } - p.add("num_fds", fds) - return nil -} -func (p *SpecProcessor) pushCtxStats() error { ctx, err := p.proc.NumCtxSwitches() - if err != nil { - return fmt.Errorf("ContextSwitch error: %s\n", err) + if err == nil { + fields["voluntary_context_switches"] = ctx.Voluntary + fields["involuntary_context_switches"] = ctx.Involuntary } - p.add("voluntary_context_switches", ctx.Voluntary) - p.add("involuntary_context_switches", ctx.Involuntary) - return nil -} -func (p *SpecProcessor) pushIOStats() error { io, err := p.proc.IOCounters() - if err != nil { - return fmt.Errorf("IOCounters error: %s\n", err) + if err == nil { + fields["read_count"] = io.ReadCount + fields["write_count"] = io.WriteCount + fields["read_bytes"] = io.ReadBytes + fields["write_bytes"] = io.WriteCount } - p.add("read_count", io.ReadCount) - p.add("write_count", io.WriteCount) - p.add("read_bytes", io.ReadBytes) - p.add("write_bytes", io.WriteCount) - return nil -} -func (p *SpecProcessor) pushCPUStats() error { cpu_time, err := p.proc.CPUTimes() - if err != nil { - return err + if err == nil { + fields["cpu_time_user"] = cpu_time.User + fields["cpu_time_system"] = cpu_time.System + fields["cpu_time_idle"] = cpu_time.Idle + fields["cpu_time_nice"] = cpu_time.Nice + fields["cpu_time_iowait"] = cpu_time.Iowait + fields["cpu_time_irq"] = cpu_time.Irq + fields["cpu_time_soft_irq"] = cpu_time.Softirq + fields["cpu_time_steal"] = cpu_time.Steal + fields["cpu_time_stolen"] = cpu_time.Stolen + fields["cpu_time_guest"] = cpu_time.Guest + fields["cpu_time_guest_nice"] = cpu_time.GuestNice } - p.add("cpu_time_user", cpu_time.User) - p.add("cpu_time_system", cpu_time.System) - p.add("cpu_time_idle", cpu_time.Idle) - p.add("cpu_time_nice", cpu_time.Nice) - p.add("cpu_time_iowait", cpu_time.Iowait) - p.add("cpu_time_irq", cpu_time.Irq) - p.add("cpu_time_soft_irq", cpu_time.Softirq) - p.add("cpu_time_steal", cpu_time.Steal) - p.add("cpu_time_stolen", cpu_time.Stolen) - p.add("cpu_time_guest", cpu_time.Guest) - p.add("cpu_time_guest_nice", cpu_time.GuestNice) cpu_perc, err := p.proc.CPUPercent(time.Duration(0)) - if err != nil { - return err - } else if cpu_perc == 0 { - return nil + if err == nil && cpu_perc != 0 { + fields["cpu_usage"] = cpu_perc } - p.add("cpu_usage", cpu_perc) - return nil -} - -func (p *SpecProcessor) pushMemoryStats() error { mem, err := p.proc.MemoryInfo() - if err != nil { - return err + if err == nil { + fields["memory_rss"] = mem.RSS + fields["memory_vms"] = mem.VMS + fields["memory_swap"] = mem.Swap } - p.add("memory_rss", mem.RSS) - p.add("memory_vms", mem.VMS) - p.add("memory_swap", mem.Swap) - return nil + + p.acc.AddFields("procstat", fields, p.tags) } From 671b40df2acf62620ea61ed1d8a199d51f791baf Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 25 Apr 2016 20:10:34 -0600 Subject: [PATCH 56/84] procstat: field prefix fixup --- plugins/inputs/procstat/spec_processor.go | 50 ++++++++++++----------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/plugins/inputs/procstat/spec_processor.go b/plugins/inputs/procstat/spec_processor.go index d43b8ecc4..90503f930 100644 --- a/plugins/inputs/procstat/spec_processor.go +++ b/plugins/inputs/procstat/spec_processor.go @@ -35,57 +35,61 @@ func NewSpecProcessor( } func (p *SpecProcessor) pushMetrics() { + var prefix string + if p.Prefix != "" { + prefix = p.Prefix + "_" + } fields := map[string]interface{}{} numThreads, err := p.proc.NumThreads() if err == nil { - fields["num_threads"] = numThreads + fields[prefix+"num_threads"] = numThreads } fds, err := p.proc.NumFDs() if err == nil { - fields["num_fds"] = fds + fields[prefix+"num_fds"] = fds } ctx, err := p.proc.NumCtxSwitches() if err == nil { - fields["voluntary_context_switches"] = ctx.Voluntary - fields["involuntary_context_switches"] = ctx.Involuntary + fields[prefix+"voluntary_context_switches"] = ctx.Voluntary + fields[prefix+"involuntary_context_switches"] = ctx.Involuntary } io, err := p.proc.IOCounters() if err == nil { - fields["read_count"] = io.ReadCount - fields["write_count"] = io.WriteCount - fields["read_bytes"] = io.ReadBytes - fields["write_bytes"] = io.WriteCount + fields[prefix+"read_count"] = io.ReadCount + fields[prefix+"write_count"] = io.WriteCount + fields[prefix+"read_bytes"] = io.ReadBytes + fields[prefix+"write_bytes"] = io.WriteCount } cpu_time, err := p.proc.CPUTimes() if err == nil { - fields["cpu_time_user"] = cpu_time.User - fields["cpu_time_system"] = cpu_time.System - fields["cpu_time_idle"] = cpu_time.Idle - fields["cpu_time_nice"] = cpu_time.Nice - fields["cpu_time_iowait"] = cpu_time.Iowait - fields["cpu_time_irq"] = cpu_time.Irq - fields["cpu_time_soft_irq"] = cpu_time.Softirq - fields["cpu_time_steal"] = cpu_time.Steal - fields["cpu_time_stolen"] = cpu_time.Stolen - fields["cpu_time_guest"] = cpu_time.Guest - fields["cpu_time_guest_nice"] = cpu_time.GuestNice + fields[prefix+"cpu_time_user"] = cpu_time.User + fields[prefix+"cpu_time_system"] = cpu_time.System + fields[prefix+"cpu_time_idle"] = cpu_time.Idle + fields[prefix+"cpu_time_nice"] = cpu_time.Nice + fields[prefix+"cpu_time_iowait"] = cpu_time.Iowait + fields[prefix+"cpu_time_irq"] = cpu_time.Irq + fields[prefix+"cpu_time_soft_irq"] = cpu_time.Softirq + fields[prefix+"cpu_time_steal"] = cpu_time.Steal + fields[prefix+"cpu_time_stolen"] = cpu_time.Stolen + fields[prefix+"cpu_time_guest"] = cpu_time.Guest + fields[prefix+"cpu_time_guest_nice"] = cpu_time.GuestNice } cpu_perc, err := p.proc.CPUPercent(time.Duration(0)) if err == nil && cpu_perc != 0 { - fields["cpu_usage"] = cpu_perc + fields[prefix+"cpu_usage"] = cpu_perc } mem, err := p.proc.MemoryInfo() if err == nil { - fields["memory_rss"] = mem.RSS - fields["memory_vms"] = mem.VMS - fields["memory_swap"] = mem.Swap + fields[prefix+"memory_rss"] = mem.RSS + fields[prefix+"memory_vms"] = mem.VMS + fields[prefix+"memory_swap"] = mem.Swap } p.acc.AddFields("procstat", fields, p.tags) From 411d8d74391b8764b53d18f6bd78188cf651d5e5 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 26 Apr 2016 09:24:32 -0600 Subject: [PATCH 57/84] Fix leaky tcp connections in phpfpm plugin closes #1089 --- CHANGELOG.md | 1 + plugins/inputs/phpfpm/phpfpm.go | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2dded62e..320f11789 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1022](https://github.com/influxdata/telegraf/issues/1022): Dont error deb/rpm install on systemd errors. - [#1078](https://github.com/influxdata/telegraf/issues/1078): Use default AWS credential chain. - [#1070](https://github.com/influxdata/telegraf/issues/1070): SQL Server input. Fix datatype conversion. +- [#1089](https://github.com/influxdata/telegraf/issues/1089): Fix leaky TCP connections in phpfpm plugin. ## v0.12.1 [2016-04-14] diff --git a/plugins/inputs/phpfpm/phpfpm.go b/plugins/inputs/phpfpm/phpfpm.go index 169fe2194..3b23ef92c 100644 --- a/plugins/inputs/phpfpm/phpfpm.go +++ b/plugins/inputs/phpfpm/phpfpm.go @@ -184,6 +184,7 @@ func (g *phpfpm) gatherHttp(addr string, acc telegraf.Accumulator) error { return fmt.Errorf("Unable to connect to phpfpm status page '%s': %v", addr, err) } + defer res.Body.Close() if res.StatusCode != 200 { return fmt.Errorf("Unable to get valid stat result from '%s': %v", From b9c24189e466b76ab0e0184d58e6873a09840b77 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 22 Apr 2016 15:47:26 -0600 Subject: [PATCH 58/84] Tail input plugin --- Godeps | 1 + plugins/inputs/all/all.go | 1 + plugins/inputs/filestat/filestat.go | 10 +- plugins/inputs/tail/README.md | 29 ++++ plugins/inputs/tail/tail.go | 152 ++++++++++++++++++++ plugins/inputs/tail/tail_test.go | 1 + plugins/inputs/tcp_listener/tcp_listener.go | 5 +- plugins/inputs/udp_listener/udp_listener.go | 4 +- 8 files changed, 199 insertions(+), 4 deletions(-) create mode 100644 plugins/inputs/tail/README.md create mode 100644 plugins/inputs/tail/tail.go create mode 100644 plugins/inputs/tail/tail_test.go diff --git a/Godeps b/Godeps index 926adcb74..9dd6a83ed 100644 --- a/Godeps +++ b/Godeps @@ -23,6 +23,7 @@ github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2 github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478 +github.com/hpcloud/tail b2940955ab8b26e19d43a43c4da0475dd81bdb56 github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da github.com/influxdata/influxdb 21db76b3374c733f37ed16ad93f3484020034351 github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0 diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 36526f4d1..93ea3e779 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -56,6 +56,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/statsd" _ "github.com/influxdata/telegraf/plugins/inputs/sysstat" _ "github.com/influxdata/telegraf/plugins/inputs/system" + _ "github.com/influxdata/telegraf/plugins/inputs/tail" _ "github.com/influxdata/telegraf/plugins/inputs/tcp_listener" _ "github.com/influxdata/telegraf/plugins/inputs/trig" _ "github.com/influxdata/telegraf/plugins/inputs/twemproxy" diff --git a/plugins/inputs/filestat/filestat.go b/plugins/inputs/filestat/filestat.go index 831d37444..938c12e34 100644 --- a/plugins/inputs/filestat/filestat.go +++ b/plugins/inputs/filestat/filestat.go @@ -14,8 +14,14 @@ import ( const sampleConfig = ` ## Files to gather stats about. ## These accept standard unix glob matching rules, but with the addition of - ## ** as a "super asterisk". See https://github.com/gobwas/glob. - files = ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] + ## ** as a "super asterisk". ie: + ## "/var/log/**.log" -> recursively find all .log files in /var/log + ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log + ## "/var/log/apache.log" -> just tail the apache log file + ## + ## See https://github.com/gobwas/glob for more examples + ## + files = ["/var/log/**.log"] ## If true, read the entire file and calculate an md5 checksum. md5 = false ` diff --git a/plugins/inputs/tail/README.md b/plugins/inputs/tail/README.md new file mode 100644 index 000000000..3b1c50665 --- /dev/null +++ b/plugins/inputs/tail/README.md @@ -0,0 +1,29 @@ +# tail Input Plugin + +The tail plugin "tails" a logfile and parses each log message. + +By default, the tail plugin acts like the following unix tail command: + +``` +tail --follow=name --lines=0 --retry myfile.log +``` + +- `--follow=name` means that it will follow the _name_ of the given file, so +that it will be compatible with log-rotated files. +- `--lines=0` means that it will start at the end of the file (unless +the `from_beginning` option is set). +- `--retry` means it will retry on inaccessible files. + +see http://man7.org/linux/man-pages/man1/tail.1.html for more details. + +The plugin expects messages in one of the +[Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md). + +### Configuration: + +```toml +# Stream a log file, like the tail -f command +[[inputs.tail]] + # SampleConfig +``` + diff --git a/plugins/inputs/tail/tail.go b/plugins/inputs/tail/tail.go new file mode 100644 index 000000000..cb99eff61 --- /dev/null +++ b/plugins/inputs/tail/tail.go @@ -0,0 +1,152 @@ +package tail + +import ( + "fmt" + "log" + "sync" + + "github.com/hpcloud/tail" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/globpath" + "github.com/influxdata/telegraf/plugins/inputs" + "github.com/influxdata/telegraf/plugins/parsers" +) + +type Tail struct { + Files []string + FromBeginning bool + + tailers []*tail.Tail + parser parsers.Parser + wg sync.WaitGroup + acc telegraf.Accumulator + + sync.Mutex +} + +const sampleConfig = ` + ## files to tail. + ## These accept standard unix glob matching rules, but with the addition of + ## ** as a "super asterisk". ie: + ## "/var/log/**.log" -> recursively find all .log files in /var/log + ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log + ## "/var/log/apache.log" -> just tail the apache log file + ## + ## See https://github.com/gobwas/glob for more examples + ## + files = ["/var/mymetrics.out"] + ## Read file from beginning. + from_beginning = false + + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" +` + +func (t *Tail) SampleConfig() string { + return sampleConfig +} + +func (t *Tail) Description() string { + return "Stream a log file, like the tail -f command" +} + +func (t *Tail) Gather(acc telegraf.Accumulator) error { + return nil +} + +func (t *Tail) Start(acc telegraf.Accumulator) error { + t.Lock() + defer t.Unlock() + + t.acc = acc + + var seek tail.SeekInfo + if !t.FromBeginning { + seek.Whence = 2 + seek.Offset = 0 + } + + var errS string + // Create a "tailer" for each file + for _, filepath := range t.Files { + g, err := globpath.Compile(filepath) + if err != nil { + log.Printf("ERROR Glob %s failed to compile, %s", filepath, err) + } + for file, _ := range g.Match() { + tailer, err := tail.TailFile(file, + tail.Config{ + ReOpen: true, + Follow: true, + Location: &seek, + }) + if err != nil { + errS += err.Error() + " " + continue + } + // create a goroutine for each "tailer" + go t.receiver(tailer) + t.tailers = append(t.tailers, tailer) + } + } + + if errS != "" { + return fmt.Errorf(errS) + } + return nil +} + +// this is launched as a goroutine to continuously watch a tailed logfile +// for changes, parse any incoming msgs, and add to the accumulator. +func (t *Tail) receiver(tailer *tail.Tail) { + t.wg.Add(1) + defer t.wg.Done() + + var m telegraf.Metric + var err error + var line *tail.Line + for line = range tailer.Lines { + if line.Err != nil { + log.Printf("ERROR tailing file %s, Error: %s\n", + tailer.Filename, err) + continue + } + m, err = t.parser.ParseLine(line.Text) + if err == nil { + t.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) + } else { + log.Printf("Malformed log line in %s: [%s], Error: %s\n", + tailer.Filename, line.Text, err) + } + } +} + +func (t *Tail) Stop() { + t.Lock() + defer t.Unlock() + + for _, t := range t.tailers { + err := t.Stop() + if err != nil { + log.Printf("ERROR stopping tail on file %s\n", t.Filename) + } + t.Cleanup() + } + t.wg.Wait() +} + +func (t *Tail) SetParser(parser parsers.Parser) { + t.parser = parser +} + +func init() { + inputs.Add("tail", func() telegraf.Input { + return &Tail{ + FromBeginning: false, + } + }) +} diff --git a/plugins/inputs/tail/tail_test.go b/plugins/inputs/tail/tail_test.go new file mode 100644 index 000000000..78e2dd578 --- /dev/null +++ b/plugins/inputs/tail/tail_test.go @@ -0,0 +1 @@ +package tail diff --git a/plugins/inputs/tcp_listener/tcp_listener.go b/plugins/inputs/tcp_listener/tcp_listener.go index ce07a7601..a420ed759 100644 --- a/plugins/inputs/tcp_listener/tcp_listener.go +++ b/plugins/inputs/tcp_listener/tcp_listener.go @@ -221,7 +221,10 @@ func (t *TcpListener) handler(conn *net.TCPConn, id string) { // tcpParser parses the incoming tcp byte packets func (t *TcpListener) tcpParser() error { defer t.wg.Done() + var packet []byte + var metrics []telegraf.Metric + var err error for { select { case <-t.done: @@ -230,7 +233,7 @@ func (t *TcpListener) tcpParser() error { if len(packet) == 0 { continue } - metrics, err := t.parser.Parse(packet) + metrics, err = t.parser.Parse(packet) if err == nil { t.storeMetrics(metrics) } else { diff --git a/plugins/inputs/udp_listener/udp_listener.go b/plugins/inputs/udp_listener/udp_listener.go index 39249de37..8e2637ce7 100644 --- a/plugins/inputs/udp_listener/udp_listener.go +++ b/plugins/inputs/udp_listener/udp_listener.go @@ -135,12 +135,14 @@ func (u *UdpListener) udpParser() error { defer u.wg.Done() var packet []byte + var metrics []telegraf.Metric + var err error for { select { case <-u.done: return nil case packet = <-u.in: - metrics, err := u.parser.Parse(packet) + metrics, err = u.parser.Parse(packet) if err == nil { u.storeMetrics(metrics) } else { From c7719ac36530dee844dbcaa57393a54436dc8f7f Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 26 Apr 2016 10:25:04 -0600 Subject: [PATCH 59/84] buffers: fix bug when Write called before AddMetric --- internal/models/running_output.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/internal/models/running_output.go b/internal/models/running_output.go index 91b200799..cab5035f6 100644 --- a/internal/models/running_output.go +++ b/internal/models/running_output.go @@ -124,6 +124,17 @@ func (ro *RunningOutput) Write() error { ro.Lock() defer ro.Unlock() + if ro.tmpmetrics == nil { + size := ro.MetricBufferLimit / ro.MetricBatchSize + // ro.metrics already contains one batch + size = size - 1 + + if size < 1 { + size = 1 + } + ro.tmpmetrics = make([]([]telegraf.Metric), size) + } + // Write any cached metric buffers before, as those metrics are the // oldest for ro.tmpmetrics[ro.readI] != nil { From 44c945b9f59086e903cc7c37f5637c628c881b16 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 26 Apr 2016 10:43:41 -0600 Subject: [PATCH 60/84] Tail unit tests and README tweaks --- plugins/inputs/tail/README.md | 27 +++++++-- plugins/inputs/tail/tail.go | 10 +++- plugins/inputs/tail/tail_test.go | 100 +++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 8 deletions(-) diff --git a/plugins/inputs/tail/README.md b/plugins/inputs/tail/README.md index 3b1c50665..9ae120e91 100644 --- a/plugins/inputs/tail/README.md +++ b/plugins/inputs/tail/README.md @@ -5,14 +5,14 @@ The tail plugin "tails" a logfile and parses each log message. By default, the tail plugin acts like the following unix tail command: ``` -tail --follow=name --lines=0 --retry myfile.log +tail -F --lines=0 myfile.log ``` -- `--follow=name` means that it will follow the _name_ of the given file, so -that it will be compatible with log-rotated files. +- `-F` means that it will follow the _name_ of the given file, so +that it will be compatible with log-rotated files, and that it will retry on +inaccessible files. - `--lines=0` means that it will start at the end of the file (unless the `from_beginning` option is set). -- `--retry` means it will retry on inaccessible files. see http://man7.org/linux/man-pages/man1/tail.1.html for more details. @@ -24,6 +24,23 @@ The plugin expects messages in one of the ```toml # Stream a log file, like the tail -f command [[inputs.tail]] - # SampleConfig + ## files to tail. + ## These accept standard unix glob matching rules, but with the addition of + ## ** as a "super asterisk". ie: + ## "/var/log/**.log" -> recursively find all .log files in /var/log + ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log + ## "/var/log/apache.log" -> just tail the apache log file + ## + ## See https://github.com/gobwas/glob for more examples + ## + files = ["/var/mymetrics.out"] + ## Read file from beginning. + from_beginning = false + + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" ``` diff --git a/plugins/inputs/tail/tail.go b/plugins/inputs/tail/tail.go index cb99eff61..7cfca81e2 100644 --- a/plugins/inputs/tail/tail.go +++ b/plugins/inputs/tail/tail.go @@ -25,6 +25,12 @@ type Tail struct { sync.Mutex } +func NewTail() *Tail { + return &Tail{ + FromBeginning: false, + } +} + const sampleConfig = ` ## files to tail. ## These accept standard unix glob matching rules, but with the addition of @@ -145,8 +151,6 @@ func (t *Tail) SetParser(parser parsers.Parser) { func init() { inputs.Add("tail", func() telegraf.Input { - return &Tail{ - FromBeginning: false, - } + return NewTail() }) } diff --git a/plugins/inputs/tail/tail_test.go b/plugins/inputs/tail/tail_test.go index 78e2dd578..5d7c04a88 100644 --- a/plugins/inputs/tail/tail_test.go +++ b/plugins/inputs/tail/tail_test.go @@ -1 +1,101 @@ package tail + +import ( + "io/ioutil" + "os" + "testing" + "time" + + "github.com/influxdata/telegraf/plugins/parsers" + "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTailFromBeginning(t *testing.T) { + tmpfile, err := ioutil.TempFile("", "") + require.NoError(t, err) + defer os.Remove(tmpfile.Name()) + + tt := NewTail() + tt.FromBeginning = true + tt.Files = []string{tmpfile.Name()} + p, _ := parsers.NewInfluxParser() + tt.SetParser(p) + defer tt.Stop() + defer tmpfile.Close() + + acc := testutil.Accumulator{} + require.NoError(t, tt.Start(&acc)) + + _, err = tmpfile.WriteString("cpu,mytag=foo usage_idle=100\n") + require.NoError(t, err) + require.NoError(t, tt.Gather(&acc)) + time.Sleep(time.Millisecond * 50) + + acc.AssertContainsTaggedFields(t, "cpu", + map[string]interface{}{ + "usage_idle": float64(100), + }, + map[string]string{ + "mytag": "foo", + }) +} + +func TestTailFromEnd(t *testing.T) { + tmpfile, err := ioutil.TempFile("", "") + require.NoError(t, err) + defer os.Remove(tmpfile.Name()) + _, err = tmpfile.WriteString("cpu,mytag=foo usage_idle=100\n") + require.NoError(t, err) + + tt := NewTail() + tt.Files = []string{tmpfile.Name()} + p, _ := parsers.NewInfluxParser() + tt.SetParser(p) + defer tt.Stop() + defer tmpfile.Close() + + acc := testutil.Accumulator{} + require.NoError(t, tt.Start(&acc)) + time.Sleep(time.Millisecond * 100) + + _, err = tmpfile.WriteString("cpu,othertag=foo usage_idle=100\n") + require.NoError(t, err) + require.NoError(t, tt.Gather(&acc)) + time.Sleep(time.Millisecond * 50) + + acc.AssertContainsTaggedFields(t, "cpu", + map[string]interface{}{ + "usage_idle": float64(100), + }, + map[string]string{ + "othertag": "foo", + }) + assert.Len(t, acc.Metrics, 1) +} + +func TestTailBadLine(t *testing.T) { + tmpfile, err := ioutil.TempFile("", "") + require.NoError(t, err) + defer os.Remove(tmpfile.Name()) + + tt := NewTail() + tt.FromBeginning = true + tt.Files = []string{tmpfile.Name()} + p, _ := parsers.NewInfluxParser() + tt.SetParser(p) + defer tt.Stop() + defer tmpfile.Close() + + acc := testutil.Accumulator{} + require.NoError(t, tt.Start(&acc)) + + _, err = tmpfile.WriteString("cpu mytag= foo usage_idle= 100\n") + require.NoError(t, err) + require.NoError(t, tt.Gather(&acc)) + time.Sleep(time.Millisecond * 50) + + assert.Len(t, acc.Metrics, 0) +} From 1c4043ab3981d77d4149ed86facc9b61f2104f7a Mon Sep 17 00:00:00 2001 From: John Engelman Date: Sat, 23 Apr 2016 13:19:04 -0500 Subject: [PATCH 61/84] Closes #1085 - allow for specifying AWS credentials in config. closes #1085 closes #1086 --- CHANGELOG.md | 1 + plugins/inputs/cloudwatch/cloudwatch.go | 15 +++++++++++++++ plugins/outputs/cloudwatch/cloudwatch.go | 19 +++++++++++++++++-- plugins/outputs/kinesis/kinesis.go | 16 ++++++++++++++++ 4 files changed, 49 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 320f11789..d1f00ad0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1041](https://github.com/influxdata/telegraf/issues/1041): Add `n_cpus` field to the system plugin. - [#1072](https://github.com/influxdata/telegraf/pull/1072): New Input Plugin: filestat. - [#1066](https://github.com/influxdata/telegraf/pull/1066): Replication lag metrics for MongoDB input plugin +- [#1086](https://github.com/influxdata/telegraf/pull/1086): Ability to specify AWS keys in config file. Thanks @johnrengleman! ### Bugfixes diff --git a/plugins/inputs/cloudwatch/cloudwatch.go b/plugins/inputs/cloudwatch/cloudwatch.go index 8e6384180..8edf2f895 100644 --- a/plugins/inputs/cloudwatch/cloudwatch.go +++ b/plugins/inputs/cloudwatch/cloudwatch.go @@ -6,6 +6,7 @@ import ( "time" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/cloudwatch" @@ -18,6 +19,8 @@ import ( type ( CloudWatch struct { Region string `toml:"region"` + AccessKey string `toml:"access_key"` + SecretKey string `toml:"secret_key"` Period internal.Duration `toml:"period"` Delay internal.Duration `toml:"delay"` Namespace string `toml:"namespace"` @@ -53,6 +56,15 @@ func (c *CloudWatch) SampleConfig() string { ## Amazon Region region = 'us-east-1' + ## Amazon Credentials + ## Credentials are loaded in the following order + ## 1) explicit credentials from 'access_key' and 'secret_key' + ## 2) environment variables + ## 3) shared credentials file + ## 4) EC2 Instance Profile + #access_key = "" + #secret_key = "" + ## Requested CloudWatch aggregation Period (required - must be a multiple of 60s) period = '1m' @@ -152,6 +164,9 @@ func (c *CloudWatch) initializeCloudWatch() error { config := &aws.Config{ Region: aws.String(c.Region), } + if c.AccessKey != "" || c.SecretKey != "" { + config.Credentials = credentials.NewStaticCredentials(c.AccessKey, c.SecretKey, "") + } c.client = cloudwatch.New(session.New(config)) return nil diff --git a/plugins/outputs/cloudwatch/cloudwatch.go b/plugins/outputs/cloudwatch/cloudwatch.go index 75b2addf4..e4bfa0666 100644 --- a/plugins/outputs/cloudwatch/cloudwatch.go +++ b/plugins/outputs/cloudwatch/cloudwatch.go @@ -8,6 +8,7 @@ import ( "time" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/cloudwatch" @@ -16,8 +17,10 @@ import ( ) type CloudWatch struct { - Region string // AWS Region - Namespace string // CloudWatch Metrics Namespace + Region string `toml:"region"` // AWS Region + AccessKey string `toml:"access_key"` // Explicit AWS Access Key ID + SecretKey string `toml:"secret_key"` // Explicit AWS Secret Access Key + Namespace string `toml:"namespace"` // CloudWatch Metrics Namespace svc *cloudwatch.CloudWatch } @@ -25,6 +28,15 @@ var sampleConfig = ` ## Amazon REGION region = 'us-east-1' + ## Amazon Credentials + ## Credentials are loaded in the following order + ## 1) explicit credentials from 'access_key' and 'secret_key' + ## 2) environment variables + ## 3) shared credentials file + ## 4) EC2 Instance Profile + #access_key = "" + #secret_key = "" + ## Namespace for the CloudWatch MetricDatums namespace = 'InfluxData/Telegraf' ` @@ -41,6 +53,9 @@ func (c *CloudWatch) Connect() error { Config := &aws.Config{ Region: aws.String(c.Region), } + if c.AccessKey != "" || c.SecretKey != "" { + Config.Credentials = credentials.NewStaticCredentials(c.AccessKey, c.SecretKey, "") + } svc := cloudwatch.New(session.New(Config)) diff --git a/plugins/outputs/kinesis/kinesis.go b/plugins/outputs/kinesis/kinesis.go index a4b5e6a7b..fabec2402 100644 --- a/plugins/outputs/kinesis/kinesis.go +++ b/plugins/outputs/kinesis/kinesis.go @@ -8,6 +8,7 @@ import ( "time" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/kinesis" @@ -17,6 +18,8 @@ import ( type KinesisOutput struct { Region string `toml:"region"` + AccessKey string `toml:"access_key"` + SecretKey string `toml:"secret_key"` StreamName string `toml:"streamname"` PartitionKey string `toml:"partitionkey"` Format string `toml:"format"` @@ -27,6 +30,16 @@ type KinesisOutput struct { var sampleConfig = ` ## Amazon REGION of kinesis endpoint. region = "ap-southeast-2" + + ## Amazon Credentials + ## Credentials are loaded in the following order + ## 1) explicit credentials from 'access_key' and 'secret_key' + ## 2) environment variables + ## 3) shared credentials file + ## 4) EC2 Instance Profile + #access_key = "" + #secret_key = "" + ## Kinesis StreamName must exist prior to starting telegraf. streamname = "StreamName" ## PartitionKey as used for sharding data. @@ -65,6 +78,9 @@ func (k *KinesisOutput) Connect() error { Config := &aws.Config{ Region: aws.String(k.Region), } + if k.AccessKey != "" || k.SecretKey != "" { + Config.Credentials = credentials.NewStaticCredentials(k.AccessKey, k.SecretKey, "") + } svc := kinesis.New(session.New(Config)) KinesisParams := &kinesis.ListStreamsInput{ From 4de75ce62175cf9af6c9da6d837676cf9f57d7b0 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 25 Apr 2016 17:49:06 -0600 Subject: [PATCH 62/84] Performance refactor of running_output buffers closes #914 closes #967 --- CHANGELOG.md | 16 ++ etc/telegraf.conf | 85 +++++- internal/buffer/buffer.go | 77 ++++++ internal/buffer/buffer_test.go | 94 +++++++ internal/config/config.go | 20 +- internal/models/running_output.go | 155 +++++------ internal/models/running_output_test.go | 346 ++++++++++++++++++------- 7 files changed, 587 insertions(+), 206 deletions(-) create mode 100644 internal/buffer/buffer.go create mode 100644 internal/buffer/buffer_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index d1f00ad0d..306083da0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,18 @@ ### Release Notes +- New [agent] configuration option: `metric_batch_size`. This option tells +telegraf the maximum batch size to allow to accumulate before sending a flush +to the configured outputs. `metric_buffer_limit` now refers to the absolute +maximum number of metrics that will accumulate before metrics are dropped. + +- There is no longer an option to +`flush_buffer_when_full`, this is now the default and only behavior of telegraf. + - **Breaking Change**: docker plugin tags. The cont_id tag no longer exists, it will now be a field, and be called container_id. Additionally, cont_image and cont_name are being renamed to container_image and container_name. + - **Breaking Change**: docker plugin measurements. The `docker_cpu`, `docker_mem`, `docker_blkio` and `docker_net` measurements are being renamed to `docker_container_cpu`, `docker_container_mem`, `docker_container_blkio` and @@ -16,15 +25,19 @@ So adding "container" to each metric will: (1) make it more clear that these metrics are per-container, and (2) allow users to easily drop per-container metrics if cardinality is an issue (`namedrop = ["docker_container_*"]`) + - `tagexclude` and `taginclude` are now available, which can be used to remove tags from measurements on inputs and outputs. See [the configuration doc](https://github.com/influxdata/telegraf/blob/master/docs/CONFIGURATION.md) for more details. + - **Measurement filtering:** All measurement filters now match based on glob only. Previously there was an undocumented behavior where filters would match based on _prefix_ in addition to globs. This means that a filter like `fielddrop = ["time_"]` will need to be changed to `fielddrop = ["time_*"]` + - **datadog**: measurement and field names will no longer have `_` replaced by `.` + - The following plugins have changed their tags to _not_ overwrite the host tag: - cassandra: `host -> cassandra_host` - disque: `host -> disque_host` @@ -42,6 +55,8 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1072](https://github.com/influxdata/telegraf/pull/1072): New Input Plugin: filestat. - [#1066](https://github.com/influxdata/telegraf/pull/1066): Replication lag metrics for MongoDB input plugin - [#1086](https://github.com/influxdata/telegraf/pull/1086): Ability to specify AWS keys in config file. Thanks @johnrengleman! +- [#1096](https://github.com/influxdata/telegraf/pull/1096): Performance refactor of running output buffers. +- [#967](https://github.com/influxdata/telegraf/issues/967): Buffer logging improvements. ### Bugfixes @@ -55,6 +70,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1078](https://github.com/influxdata/telegraf/issues/1078): Use default AWS credential chain. - [#1070](https://github.com/influxdata/telegraf/issues/1070): SQL Server input. Fix datatype conversion. - [#1089](https://github.com/influxdata/telegraf/issues/1089): Fix leaky TCP connections in phpfpm plugin. +- [#914](https://github.com/influxdata/telegraf/issues/914): Telegraf can drop metrics on full buffers. ## v0.12.1 [2016-04-14] diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 46b422ffa..c855b94fe 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -30,15 +30,13 @@ ## ie, if interval="10s" then always collect on :00, :10, :20, etc. round_interval = true - ## Telegraf will send metrics to output in batch of at + ## Telegraf will send metrics to outputs in batches of at ## most metric_batch_size metrics. metric_batch_size = 1000 - ## Telegraf will cache metric_buffer_limit metrics for each output, and will - ## flush this buffer on a successful write. This should be a multiple of - ## metric_batch_size and could not be less than 2 times metric_batch_size + ## For failed writes, telegraf will cache metric_buffer_limit metrics for each + ## output, and will flush this buffer on a successful write. Oldest metrics + ## are dropped first when this buffer fills. metric_buffer_limit = 10000 - ## Flush the buffer whenever full, regardless of flush_interval. - flush_buffer_when_full = true ## Collection jitter is used to jitter the collection by a random amount. ## Each plugin will sleep for a random time within jitter before collecting. @@ -151,6 +149,15 @@ # ## Amazon REGION # region = 'us-east-1' # +# ## Amazon Credentials +# ## Credentials are loaded in the following order +# ## 1) explicit credentials from 'access_key' and 'secret_key' +# ## 2) environment variables +# ## 3) shared credentials file +# ## 4) EC2 Instance Profile +# #access_key = "" +# #secret_key = "" +# # ## Namespace for the CloudWatch MetricDatums # namespace = 'InfluxData/Telegraf' @@ -243,6 +250,16 @@ # [[outputs.kinesis]] # ## Amazon REGION of kinesis endpoint. # region = "ap-southeast-2" +# +# ## Amazon Credentials +# ## Credentials are loaded in the following order +# ## 1) explicit credentials from 'access_key' and 'secret_key' +# ## 2) environment variables +# ## 3) shared credentials file +# ## 4) EC2 Instance Profile +# #access_key = "" +# #secret_key = "" +# # ## Kinesis StreamName must exist prior to starting telegraf. # streamname = "StreamName" # ## PartitionKey as used for sharding data. @@ -457,6 +474,15 @@ # ## Amazon Region # region = 'us-east-1' # +# ## Amazon Credentials +# ## Credentials are loaded in the following order +# ## 1) explicit credentials from 'access_key' and 'secret_key' +# ## 2) environment variables +# ## 3) shared credentials file +# ## 4) EC2 Instance Profile +# #access_key = "" +# #secret_key = "" +# # ## Requested CloudWatch aggregation Period (required - must be a multiple of 60s) # period = '1m' # @@ -588,8 +614,14 @@ # [[inputs.filestat]] # ## Files to gather stats about. # ## These accept standard unix glob matching rules, but with the addition of -# ## ** as a "super asterisk". See https://github.com/gobwas/glob. -# files = ["/etc/telegraf/telegraf.conf", "/var/log/**.log"] +# ## ** as a "super asterisk". ie: +# ## "/var/log/**.log" -> recursively find all .log files in /var/log +# ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log +# ## "/var/log/apache.log" -> just tail the apache log file +# ## +# ## See https://github.com/gobwas/glob for more examples +# ## +# files = ["/var/log/**.log"] # ## If true, read the entire file and calculate an md5 checksum. # md5 = false @@ -980,6 +1012,11 @@ # ## databases are gathered. # ## databases = ["app_production", "testing"] # # +# # outputaddress = "db01" +# ## A custom name for the database that will be used as the "server" tag in the +# ## measurement output. If not specified, a default one generated from +# ## the connection address is used. +# # # ## Define the toml config where the sql queries are stored # ## New queries can be added, if the withdbname is set to true and there is no # ## databases defined in the 'databases field', the sql query is ended by a @@ -990,24 +1027,28 @@ # ## because the databases variable was set to ['postgres', 'pgbench' ] and the # ## withdbname was true. Be careful that if the withdbname is set to false you # ## don't have to define the where clause (aka with the dbname) the tagvalue -# ## field is used to define custom tags (separated by comas) +# ## field is used to define custom tags (separated by commas) +# ## The optional "measurement" value can be used to override the default +# ## output measurement name ("postgresql"). # # # ## Structure : # ## [[inputs.postgresql_extensible.query]] # ## sqlquery string # ## version string # ## withdbname boolean -# ## tagvalue string (coma separated) +# ## tagvalue string (comma separated) +# ## measurement string # [[inputs.postgresql_extensible.query]] # sqlquery="SELECT * FROM pg_stat_database" # version=901 # withdbname=false # tagvalue="" +# measurement="" # [[inputs.postgresql_extensible.query]] # sqlquery="SELECT * FROM pg_stat_bgwriter" # version=901 # withdbname=false -# tagvalue="" +# tagvalue="postgresql.stats" # # Read metrics from one or many PowerDNS servers @@ -1379,6 +1420,28 @@ # percentile_limit = 1000 +# # Stream a log file, like the tail -f command +# [[inputs.tail]] +# ## files to tail. +# ## These accept standard unix glob matching rules, but with the addition of +# ## ** as a "super asterisk". ie: +# ## "/var/log/**.log" -> recursively find all .log files in /var/log +# ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log +# ## "/var/log/apache.log" -> just tail the apache log file +# ## +# ## See https://github.com/gobwas/glob for more examples +# ## +# files = ["/var/mymetrics.out"] +# ## Read file from beginning. +# from_beginning = false +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + # # Generic TCP listener # [[inputs.tcp_listener]] # ## Address and port to host TCP listener on diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go new file mode 100644 index 000000000..b7a05bf03 --- /dev/null +++ b/internal/buffer/buffer.go @@ -0,0 +1,77 @@ +package buffer + +import ( + "github.com/influxdata/telegraf" +) + +// Buffer is an object for storing metrics in a circular buffer. +type Buffer struct { + buf chan telegraf.Metric + // total dropped metrics + drops int + // total metrics added + total int +} + +// NewBuffer returns a Buffer +// size is the maximum number of metrics that Buffer will cache. If Add is +// called when the buffer is full, then the oldest metric(s) will be dropped. +func NewBuffer(size int) *Buffer { + return &Buffer{ + buf: make(chan telegraf.Metric, size), + } +} + +// IsEmpty returns true if Buffer is empty. +func (b *Buffer) IsEmpty() bool { + return len(b.buf) == 0 +} + +// Len returns the current length of the buffer. +func (b *Buffer) Len() int { + return len(b.buf) +} + +// Drops returns the total number of dropped metrics that have occured in this +// buffer since instantiation. +func (b *Buffer) Drops() int { + return b.drops +} + +// Total returns the total number of metrics that have been added to this buffer. +func (b *Buffer) Total() int { + return b.total +} + +// Add adds metrics to the buffer. +func (b *Buffer) Add(metrics ...telegraf.Metric) { + for i, _ := range metrics { + b.total++ + select { + case b.buf <- metrics[i]: + default: + b.drops++ + <-b.buf + b.buf <- metrics[i] + } + } +} + +// Batch returns a batch of metrics of size batchSize. +// the batch will be of maximum length batchSize. It can be less than batchSize, +// if the length of Buffer is less than batchSize. +func (b *Buffer) Batch(batchSize int) []telegraf.Metric { + n := min(len(b.buf), batchSize) + out := make([]telegraf.Metric, n) + for i := 0; i < n; i++ { + out[i] = <-b.buf + } + return out +} + +func min(a, b int) int { + if b < a { + return b + } + return a +} diff --git a/internal/buffer/buffer_test.go b/internal/buffer/buffer_test.go new file mode 100644 index 000000000..9a36f4d84 --- /dev/null +++ b/internal/buffer/buffer_test.go @@ -0,0 +1,94 @@ +package buffer + +import ( + "testing" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" +) + +var metricList = []telegraf.Metric{ + testutil.TestMetric(2, "mymetric1"), + testutil.TestMetric(1, "mymetric2"), + testutil.TestMetric(11, "mymetric3"), + testutil.TestMetric(15, "mymetric4"), + testutil.TestMetric(8, "mymetric5"), +} + +func BenchmarkAddMetrics(b *testing.B) { + buf := NewBuffer(10000) + m := testutil.TestMetric(1, "mymetric") + for n := 0; n < b.N; n++ { + buf.Add(m) + } +} + +func TestNewBufferBasicFuncs(t *testing.T) { + b := NewBuffer(10) + + assert.True(t, b.IsEmpty()) + assert.Zero(t, b.Len()) + assert.Zero(t, b.Drops()) + assert.Zero(t, b.Total()) + + m := testutil.TestMetric(1, "mymetric") + b.Add(m) + assert.False(t, b.IsEmpty()) + assert.Equal(t, b.Len(), 1) + assert.Equal(t, b.Drops(), 0) + assert.Equal(t, b.Total(), 1) + + b.Add(metricList...) + assert.False(t, b.IsEmpty()) + assert.Equal(t, b.Len(), 6) + assert.Equal(t, b.Drops(), 0) + assert.Equal(t, b.Total(), 6) +} + +func TestDroppingMetrics(t *testing.T) { + b := NewBuffer(10) + + // Add up to the size of the buffer + b.Add(metricList...) + b.Add(metricList...) + assert.False(t, b.IsEmpty()) + assert.Equal(t, b.Len(), 10) + assert.Equal(t, b.Drops(), 0) + assert.Equal(t, b.Total(), 10) + + // Add 5 more and verify they were dropped + b.Add(metricList...) + assert.False(t, b.IsEmpty()) + assert.Equal(t, b.Len(), 10) + assert.Equal(t, b.Drops(), 5) + assert.Equal(t, b.Total(), 15) +} + +func TestGettingBatches(t *testing.T) { + b := NewBuffer(20) + + // Verify that the buffer returned is smaller than requested when there are + // not as many items as requested. + b.Add(metricList...) + batch := b.Batch(10) + assert.Len(t, batch, 5) + + // Verify that the buffer is now empty + assert.True(t, b.IsEmpty()) + assert.Zero(t, b.Len()) + assert.Zero(t, b.Drops()) + assert.Equal(t, b.Total(), 5) + + // Verify that the buffer returned is not more than the size requested + b.Add(metricList...) + batch = b.Batch(3) + assert.Len(t, batch, 3) + + // Verify that buffer is not empty + assert.False(t, b.IsEmpty()) + assert.Equal(t, b.Len(), 2) + assert.Equal(t, b.Drops(), 0) + assert.Equal(t, b.Total(), 10) +} diff --git a/internal/config/config.go b/internal/config/config.go index fcebd24e6..2a34493ff 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -188,15 +188,13 @@ var header = `# Telegraf Configuration ## ie, if interval="10s" then always collect on :00, :10, :20, etc. round_interval = true - ## Telegraf will send metrics to output in batch of at + ## Telegraf will send metrics to outputs in batches of at ## most metric_batch_size metrics. metric_batch_size = 1000 - ## Telegraf will cache metric_buffer_limit metrics for each output, and will - ## flush this buffer on a successful write. This should be a multiple of - ## metric_batch_size and could not be less than 2 times metric_batch_size + ## For failed writes, telegraf will cache metric_buffer_limit metrics for each + ## output, and will flush this buffer on a successful write. Oldest metrics + ## are dropped first when this buffer fills. metric_buffer_limit = 10000 - ## Flush the buffer whenever full, regardless of flush_interval. - flush_buffer_when_full = true ## Collection jitter is used to jitter the collection by a random amount. ## Each plugin will sleep for a random time within jitter before collecting. @@ -535,14 +533,8 @@ func (c *Config) addOutput(name string, table *ast.Table) error { return err } - ro := internal_models.NewRunningOutput(name, output, outputConfig) - if c.Agent.MetricBatchSize > 0 { - ro.MetricBatchSize = c.Agent.MetricBatchSize - } - if c.Agent.MetricBufferLimit > 0 { - ro.MetricBufferLimit = c.Agent.MetricBufferLimit - } - ro.FlushBufferWhenFull = c.Agent.FlushBufferWhenFull + ro := internal_models.NewRunningOutput(name, output, outputConfig, + c.Agent.MetricBatchSize, c.Agent.MetricBufferLimit) c.Outputs = append(c.Outputs, ro) return nil } diff --git a/internal/models/running_output.go b/internal/models/running_output.go index cab5035f6..d0d2abbc1 100644 --- a/internal/models/running_output.go +++ b/internal/models/running_output.go @@ -2,14 +2,13 @@ package internal_models import ( "log" - "sync" "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/buffer" ) const ( - // Default size of metrics batch size. DEFAULT_METRIC_BATCH_SIZE = 1000 @@ -17,40 +16,40 @@ const ( DEFAULT_METRIC_BUFFER_LIMIT = 10000 ) -// tmpmetrics point to batch of metrics ready to be wrote to output. -// readI point to the oldest batch of metrics (the first to sent to output). It -// may point to nil value if tmpmetrics is empty. -// writeI point to the next slot to buffer a batch of metrics is output fail to -// write. +// RunningOutput contains the output configuration type RunningOutput struct { - Name string - Output telegraf.Output - Config *OutputConfig - Quiet bool - MetricBufferLimit int - MetricBatchSize int - FlushBufferWhenFull bool + Name string + Output telegraf.Output + Config *OutputConfig + Quiet bool + MetricBufferLimit int + MetricBatchSize int - metrics []telegraf.Metric - tmpmetrics []([]telegraf.Metric) - writeI int - readI int - - sync.Mutex + metrics *buffer.Buffer + failMetrics *buffer.Buffer } func NewRunningOutput( name string, output telegraf.Output, conf *OutputConfig, + batchSize int, + bufferLimit int, ) *RunningOutput { + if bufferLimit == 0 { + bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT + } + if batchSize == 0 { + batchSize = DEFAULT_METRIC_BATCH_SIZE + } ro := &RunningOutput{ Name: name, - metrics: make([]telegraf.Metric, 0), + metrics: buffer.NewBuffer(batchSize), + failMetrics: buffer.NewBuffer(bufferLimit), Output: output, Config: conf, - MetricBufferLimit: DEFAULT_METRIC_BUFFER_LIMIT, - MetricBatchSize: DEFAULT_METRIC_BATCH_SIZE, + MetricBufferLimit: bufferLimit, + MetricBatchSize: batchSize, } return ro } @@ -63,19 +62,6 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) { return } } - ro.Lock() - defer ro.Unlock() - - if ro.tmpmetrics == nil { - size := ro.MetricBufferLimit / ro.MetricBatchSize - // ro.metrics already contains one batch - size = size - 1 - - if size < 1 { - size = 1 - } - ro.tmpmetrics = make([]([]telegraf.Metric), size) - } // Filter any tagexclude/taginclude parameters before adding metric if len(ro.Config.Filter.TagExclude) != 0 || len(ro.Config.Filter.TagInclude) != 0 { @@ -90,69 +76,64 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) { metric, _ = telegraf.NewMetric(name, tags, fields, t) } - if len(ro.metrics) < ro.MetricBatchSize { - ro.metrics = append(ro.metrics, metric) - } else { - flushSuccess := true - if ro.FlushBufferWhenFull { - err := ro.write(ro.metrics) - if err != nil { - log.Printf("ERROR writing full metric buffer to output %s, %s", - ro.Name, err) - flushSuccess = false - } - } else { - flushSuccess = false + ro.metrics.Add(metric) + if ro.metrics.Len() == ro.MetricBatchSize { + batch := ro.metrics.Batch(ro.MetricBatchSize) + err := ro.write(batch) + if err != nil { + ro.failMetrics.Add(batch...) } - if !flushSuccess { - if ro.tmpmetrics[ro.writeI] != nil && ro.writeI == ro.readI { - log.Printf("WARNING: overwriting cached metrics, you may want to " + - "increase the metric_buffer_limit setting in your [agent] " + - "config if you do not wish to overwrite metrics.\n") - ro.readI = (ro.readI + 1) % cap(ro.tmpmetrics) - } - ro.tmpmetrics[ro.writeI] = ro.metrics - ro.writeI = (ro.writeI + 1) % cap(ro.tmpmetrics) - } - ro.metrics = make([]telegraf.Metric, 0) - ro.metrics = append(ro.metrics, metric) } } // Write writes all cached points to this output. func (ro *RunningOutput) Write() error { - ro.Lock() - defer ro.Unlock() - - if ro.tmpmetrics == nil { - size := ro.MetricBufferLimit / ro.MetricBatchSize - // ro.metrics already contains one batch - size = size - 1 - - if size < 1 { - size = 1 - } - ro.tmpmetrics = make([]([]telegraf.Metric), size) + if !ro.Quiet { + log.Printf("Output [%s] buffer fullness: %d / %d metrics. "+ + "Total gathered metrics: %d. Total dropped metrics: %d.", + ro.Name, + ro.failMetrics.Len()+ro.metrics.Len(), + ro.MetricBufferLimit, + ro.metrics.Total(), + ro.metrics.Drops()+ro.failMetrics.Drops()) } - // Write any cached metric buffers before, as those metrics are the - // oldest - for ro.tmpmetrics[ro.readI] != nil { - if err := ro.write(ro.tmpmetrics[ro.readI]); err != nil { - return err - } else { - ro.tmpmetrics[ro.readI] = nil - ro.readI = (ro.readI + 1) % cap(ro.tmpmetrics) + var err error + if !ro.failMetrics.IsEmpty() { + bufLen := ro.failMetrics.Len() + // how many batches of failed writes we need to write. + nBatches := bufLen/ro.MetricBatchSize + 1 + batchSize := ro.MetricBatchSize + + for i := 0; i < nBatches; i++ { + // If it's the last batch, only grab the metrics that have not had + // a write attempt already (this is primarily to preserve order). + if i == nBatches-1 { + batchSize = bufLen % ro.MetricBatchSize + } + batch := ro.failMetrics.Batch(batchSize) + // If we've already failed previous writes, don't bother trying to + // write to this output again. We are not exiting the loop just so + // that we can rotate the metrics to preserve order. + if err == nil { + err = ro.write(batch) + } + if err != nil { + ro.failMetrics.Add(batch...) + } } } - err := ro.write(ro.metrics) + batch := ro.metrics.Batch(ro.MetricBatchSize) + // see comment above about not trying to write to an already failed output. + // if ro.failMetrics is empty then err will always be nil at this point. + if err == nil { + err = ro.write(batch) + } if err != nil { + ro.failMetrics.Add(batch...) return err - } else { - ro.metrics = make([]telegraf.Metric, 0) } - return nil } @@ -165,8 +146,8 @@ func (ro *RunningOutput) write(metrics []telegraf.Metric) error { elapsed := time.Since(start) if err == nil { if !ro.Quiet { - log.Printf("Wrote %d metrics to output %s in %s\n", - len(metrics), ro.Name, elapsed) + log.Printf("Output [%s] wrote batch of %d metrics in %s\n", + ro.Name, len(metrics), elapsed) } } return err diff --git a/internal/models/running_output_test.go b/internal/models/running_output_test.go index ca7034b61..d9238c5a4 100644 --- a/internal/models/running_output_test.go +++ b/internal/models/running_output_test.go @@ -2,7 +2,6 @@ package internal_models import ( "fmt" - "sort" "sync" "testing" @@ -29,6 +28,62 @@ var next5 = []telegraf.Metric{ testutil.TestMetric(101, "metric10"), } +// Benchmark adding metrics. +func BenchmarkRunningOutputAddWrite(b *testing.B) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: false, + }, + } + + m := &perfOutput{} + ro := NewRunningOutput("test", m, conf, 1000, 10000) + ro.Quiet = true + + for n := 0; n < b.N; n++ { + ro.AddMetric(first5[0]) + ro.Write() + } +} + +// Benchmark adding metrics. +func BenchmarkRunningOutputAddWriteEvery100(b *testing.B) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: false, + }, + } + + m := &perfOutput{} + ro := NewRunningOutput("test", m, conf, 1000, 10000) + ro.Quiet = true + + for n := 0; n < b.N; n++ { + ro.AddMetric(first5[0]) + if n%100 == 0 { + ro.Write() + } + } +} + +// Benchmark adding metrics. +func BenchmarkRunningOutputAddFailWrites(b *testing.B) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: false, + }, + } + + m := &perfOutput{} + m.failWrite = true + ro := NewRunningOutput("test", m, conf, 1000, 10000) + ro.Quiet = true + + for n := 0; n < b.N; n++ { + ro.AddMetric(first5[0]) + } +} + // Test that NameDrop filters ger properly applied. func TestRunningOutput_DropFilter(t *testing.T) { conf := &OutputConfig{ @@ -40,7 +95,7 @@ func TestRunningOutput_DropFilter(t *testing.T) { assert.NoError(t, conf.Filter.CompileFilter()) m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) + ro := NewRunningOutput("test", m, conf, 1000, 10000) for _, metric := range first5 { ro.AddMetric(metric) @@ -66,7 +121,7 @@ func TestRunningOutput_PassFilter(t *testing.T) { assert.NoError(t, conf.Filter.CompileFilter()) m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) + ro := NewRunningOutput("test", m, conf, 1000, 10000) for _, metric := range first5 { ro.AddMetric(metric) @@ -92,7 +147,7 @@ func TestRunningOutput_TagIncludeNoMatch(t *testing.T) { assert.NoError(t, conf.Filter.CompileFilter()) m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) + ro := NewRunningOutput("test", m, conf, 1000, 10000) ro.AddMetric(first5[0]) assert.Len(t, m.Metrics(), 0) @@ -114,7 +169,7 @@ func TestRunningOutput_TagExcludeMatch(t *testing.T) { assert.NoError(t, conf.Filter.CompileFilter()) m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) + ro := NewRunningOutput("test", m, conf, 1000, 10000) ro.AddMetric(first5[0]) assert.Len(t, m.Metrics(), 0) @@ -136,7 +191,7 @@ func TestRunningOutput_TagExcludeNoMatch(t *testing.T) { assert.NoError(t, conf.Filter.CompileFilter()) m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) + ro := NewRunningOutput("test", m, conf, 1000, 10000) ro.AddMetric(first5[0]) assert.Len(t, m.Metrics(), 0) @@ -158,7 +213,7 @@ func TestRunningOutput_TagIncludeMatch(t *testing.T) { assert.NoError(t, conf.Filter.CompileFilter()) m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) + ro := NewRunningOutput("test", m, conf, 1000, 10000) ro.AddMetric(first5[0]) assert.Len(t, m.Metrics(), 0) @@ -178,7 +233,7 @@ func TestRunningOutputDefault(t *testing.T) { } m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) + ro := NewRunningOutput("test", m, conf, 1000, 10000) for _, metric := range first5 { ro.AddMetric(metric) @@ -193,77 +248,6 @@ func TestRunningOutputDefault(t *testing.T) { assert.Len(t, m.Metrics(), 10) } -// Test that the first metrics batch gets overwritten if there is a buffer overflow. -func TestRunningOutputOverwrite(t *testing.T) { - conf := &OutputConfig{ - Filter: Filter{ - IsActive: false, - }, - } - - m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) - ro.MetricBatchSize = 1 - ro.MetricBufferLimit = 4 - - for _, metric := range first5 { - ro.AddMetric(metric) - } - require.Len(t, m.Metrics(), 0) - - err := ro.Write() - require.NoError(t, err) - require.Len(t, m.Metrics(), 4) - - var expected, actual []string - for i, exp := range first5[1:] { - expected = append(expected, exp.String()) - actual = append(actual, m.Metrics()[i].String()) - } - - sort.Strings(expected) - sort.Strings(actual) - - assert.Equal(t, expected, actual) -} - -// Test that multiple buffer overflows are handled properly. -func TestRunningOutputMultiOverwrite(t *testing.T) { - conf := &OutputConfig{ - Filter: Filter{ - IsActive: false, - }, - } - - m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) - ro.MetricBatchSize = 1 - ro.MetricBufferLimit = 3 - - for _, metric := range first5 { - ro.AddMetric(metric) - } - for _, metric := range next5 { - ro.AddMetric(metric) - } - require.Len(t, m.Metrics(), 0) - - err := ro.Write() - require.NoError(t, err) - require.Len(t, m.Metrics(), 3) - - var expected, actual []string - for i, exp := range next5[2:] { - expected = append(expected, exp.String()) - actual = append(actual, m.Metrics()[i].String()) - } - - sort.Strings(expected) - sort.Strings(actual) - - assert.Equal(t, expected, actual) -} - // Test that running output doesn't flush until it's full when // FlushBufferWhenFull is set. func TestRunningOutputFlushWhenFull(t *testing.T) { @@ -274,12 +258,9 @@ func TestRunningOutputFlushWhenFull(t *testing.T) { } m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) - ro.FlushBufferWhenFull = true - ro.MetricBatchSize = 5 - ro.MetricBufferLimit = 10 + ro := NewRunningOutput("test", m, conf, 6, 10) - // Fill buffer to limit + // Fill buffer to 1 under limit for _, metric := range first5 { ro.AddMetric(metric) } @@ -289,7 +270,7 @@ func TestRunningOutputFlushWhenFull(t *testing.T) { // add one more metric ro.AddMetric(next5[0]) // now it flushed - assert.Len(t, m.Metrics(), 5) + assert.Len(t, m.Metrics(), 6) // add one more metric and write it manually ro.AddMetric(next5[1]) @@ -308,10 +289,7 @@ func TestRunningOutputMultiFlushWhenFull(t *testing.T) { } m := &mockOutput{} - ro := NewRunningOutput("test", m, conf) - ro.FlushBufferWhenFull = true - ro.MetricBatchSize = 4 - ro.MetricBufferLimit = 12 + ro := NewRunningOutput("test", m, conf, 4, 12) // Fill buffer past limit twive for _, metric := range first5 { @@ -333,12 +311,9 @@ func TestRunningOutputWriteFail(t *testing.T) { m := &mockOutput{} m.failWrite = true - ro := NewRunningOutput("test", m, conf) - ro.FlushBufferWhenFull = true - ro.MetricBatchSize = 4 - ro.MetricBufferLimit = 12 + ro := NewRunningOutput("test", m, conf, 4, 12) - // Fill buffer past limit twice + // Fill buffer to limit twice for _, metric := range first5 { ro.AddMetric(metric) } @@ -361,6 +336,161 @@ func TestRunningOutputWriteFail(t *testing.T) { assert.Len(t, m.Metrics(), 10) } +// Verify that the order of points is preserved during a write failure. +func TestRunningOutputWriteFailOrder(t *testing.T) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: false, + }, + } + + m := &mockOutput{} + m.failWrite = true + ro := NewRunningOutput("test", m, conf, 100, 1000) + + // add 5 metrics + for _, metric := range first5 { + ro.AddMetric(metric) + } + // no successful flush yet + assert.Len(t, m.Metrics(), 0) + + // Write fails + err := ro.Write() + require.Error(t, err) + // no successful flush yet + assert.Len(t, m.Metrics(), 0) + + m.failWrite = false + // add 5 more metrics + for _, metric := range next5 { + ro.AddMetric(metric) + } + err = ro.Write() + require.NoError(t, err) + + // Verify that 10 metrics were written + assert.Len(t, m.Metrics(), 10) + // Verify that they are in order + expected := append(first5, next5...) + assert.Equal(t, expected, m.Metrics()) +} + +// Verify that the order of points is preserved during many write failures. +func TestRunningOutputWriteFailOrder2(t *testing.T) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: false, + }, + } + + m := &mockOutput{} + m.failWrite = true + ro := NewRunningOutput("test", m, conf, 5, 100) + + // add 5 metrics + for _, metric := range first5 { + ro.AddMetric(metric) + } + // Write fails + err := ro.Write() + require.Error(t, err) + // no successful flush yet + assert.Len(t, m.Metrics(), 0) + + // add 5 metrics + for _, metric := range next5 { + ro.AddMetric(metric) + } + // Write fails + err = ro.Write() + require.Error(t, err) + // no successful flush yet + assert.Len(t, m.Metrics(), 0) + + // add 5 metrics + for _, metric := range first5 { + ro.AddMetric(metric) + } + // Write fails + err = ro.Write() + require.Error(t, err) + // no successful flush yet + assert.Len(t, m.Metrics(), 0) + + // add 5 metrics + for _, metric := range next5 { + ro.AddMetric(metric) + } + // Write fails + err = ro.Write() + require.Error(t, err) + // no successful flush yet + assert.Len(t, m.Metrics(), 0) + + m.failWrite = false + err = ro.Write() + require.NoError(t, err) + + // Verify that 10 metrics were written + assert.Len(t, m.Metrics(), 20) + // Verify that they are in order + expected := append(first5, next5...) + expected = append(expected, first5...) + expected = append(expected, next5...) + assert.Equal(t, expected, m.Metrics()) +} + +// Verify that the order of points is preserved when there is a remainder +// of points for the batch. +// +// ie, with a batch size of 5: +// +// 1 2 3 4 5 6 <-- order, failed points +// 6 1 2 3 4 5 <-- order, after 1st write failure (1 2 3 4 5 was batch) +// 1 2 3 4 5 6 <-- order, after 2nd write failure, (6 was batch) +// +func TestRunningOutputWriteFailOrder3(t *testing.T) { + conf := &OutputConfig{ + Filter: Filter{ + IsActive: false, + }, + } + + m := &mockOutput{} + m.failWrite = true + ro := NewRunningOutput("test", m, conf, 5, 1000) + + // add 5 metrics + for _, metric := range first5 { + ro.AddMetric(metric) + } + // no successful flush yet + assert.Len(t, m.Metrics(), 0) + + // Write fails + err := ro.Write() + require.Error(t, err) + // no successful flush yet + assert.Len(t, m.Metrics(), 0) + + // add and attempt to write a single metric: + ro.AddMetric(next5[0]) + err = ro.Write() + require.Error(t, err) + + // unset fail and write metrics + m.failWrite = false + err = ro.Write() + require.NoError(t, err) + + // Verify that 6 metrics were written + assert.Len(t, m.Metrics(), 6) + // Verify that they are in order + expected := append(first5, next5[0]) + assert.Equal(t, expected, m.Metrics()) +} + type mockOutput struct { sync.Mutex @@ -408,3 +538,31 @@ func (m *mockOutput) Metrics() []telegraf.Metric { defer m.Unlock() return m.metrics } + +type perfOutput struct { + // if true, mock a write failure + failWrite bool +} + +func (m *perfOutput) Connect() error { + return nil +} + +func (m *perfOutput) Close() error { + return nil +} + +func (m *perfOutput) Description() string { + return "" +} + +func (m *perfOutput) SampleConfig() string { + return "" +} + +func (m *perfOutput) Write(metrics []telegraf.Metric) error { + if m.failWrite { + return fmt.Errorf("Failed Write!") + } + return nil +} From ee4f4d78005a386d55e9805374be75a096713982 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 27 Apr 2016 15:08:38 -0600 Subject: [PATCH 63/84] ping plugin: Set default timeout --- plugins/inputs/ping/ping.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/plugins/inputs/ping/ping.go b/plugins/inputs/ping/ping.go index 1798a5eb7..6c26acb8a 100644 --- a/plugins/inputs/ping/ping.go +++ b/plugins/inputs/ping/ping.go @@ -43,18 +43,18 @@ func (_ *Ping) Description() string { return "Ping given url(s) and return statistics" } -var sampleConfig = ` +const sampleConfig = ` ## NOTE: this plugin forks the ping command. You may need to set capabilities ## via setcap cap_net_raw+p /bin/ping - + # ## urls to ping urls = ["www.google.com"] # required - ## number of pings to send (ping -c ) + ## number of pings to send per collection (ping -c ) count = 1 # required ## interval, in s, at which to ping. 0 == default (ping -i ) ping_interval = 0.0 - ## ping timeout, in s. 0 == no timeout (ping -t ) - timeout = 0.0 + ## ping timeout, in s. 0 == no timeout (ping -W ) + timeout = 1.0 ## interface to send ping from (ping -I ) interface = "" ` @@ -71,16 +71,16 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error { // Spin off a go routine for each url to ping for _, url := range p.Urls { wg.Add(1) - go func(url string, acc telegraf.Accumulator) { + go func(u string) { defer wg.Done() - args := p.args(url) + args := p.args(u) out, err := p.pingHost(args...) if err != nil { // Combine go err + stderr output errorChannel <- errors.New( strings.TrimSpace(out) + ", " + err.Error()) } - tags := map[string]string{"url": url} + tags := map[string]string{"url": u} trans, rec, avg, err := processPingOutput(out) if err != nil { // fatal error @@ -98,7 +98,7 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error { fields["average_response_ms"] = avg } acc.AddFields("ping", fields, tags) - }(url, acc) + }(url) } wg.Wait() From 89f2c0b0a4cda76cea45cb40f0ddd79f12b48b55 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 27 Apr 2016 08:29:51 -0600 Subject: [PATCH 64/84] Cassandra: update plugin supported prefix & fix panic fixes #1102 --- plugins/inputs/cassandra/cassandra.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/plugins/inputs/cassandra/cassandra.go b/plugins/inputs/cassandra/cassandra.go index f49a74ab0..351232aca 100644 --- a/plugins/inputs/cassandra/cassandra.go +++ b/plugins/inputs/cassandra/cassandra.go @@ -7,6 +7,7 @@ import ( "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" "io/ioutil" + "log" "net/http" "net/url" "strings" @@ -263,15 +264,19 @@ func (c *Cassandra) Gather(acc telegraf.Accumulator) error { for _, server := range servers { for _, metric := range metrics { - var m jmxMetric - serverTokens := parseServerTokens(server) + var m jmxMetric if strings.HasPrefix(metric, "/java.lang:") { m = newJavaMetric(serverTokens["host"], metric, acc) } else if strings.HasPrefix(metric, "/org.apache.cassandra.metrics:") { m = newCassandraMetric(serverTokens["host"], metric, acc) + } else { + // unsupported metric type + log.Printf("Unsupported Cassandra metric [%s], skipping", + metric) + continue } // Prepare URL From a7b0861436ff5fc29f5a23b145482cce47e9e96f Mon Sep 17 00:00:00 2001 From: Jesse Hanley Date: Wed, 27 Apr 2016 15:14:25 -0400 Subject: [PATCH 65/84] Adding Jobstats support to Lustre2 input plugin Lustre Jobstats allows for RPCs to be tagged with a value, such as a job's ID. This allows for per job statistics. This plugin collects statistics and tags the data with the jobid. closes #1107 --- CHANGELOG.md | 1 + plugins/inputs/lustre2/lustre2.go | 266 ++++++++++++++++++++++++- plugins/inputs/lustre2/lustre2_test.go | 118 +++++++++++ 3 files changed, 378 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 306083da0..39383be71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1086](https://github.com/influxdata/telegraf/pull/1086): Ability to specify AWS keys in config file. Thanks @johnrengleman! - [#1096](https://github.com/influxdata/telegraf/pull/1096): Performance refactor of running output buffers. - [#967](https://github.com/influxdata/telegraf/issues/967): Buffer logging improvements. +- [#1107](https://github.com/influxdata/telegraf/issues/1107): Support lustre2 job stats. Thanks @hanleyja! ### Bugfixes diff --git a/plugins/inputs/lustre2/lustre2.go b/plugins/inputs/lustre2/lustre2.go index 6ac41d391..8ef9223b5 100644 --- a/plugins/inputs/lustre2/lustre2.go +++ b/plugins/inputs/lustre2/lustre2.go @@ -34,9 +34,13 @@ var sampleConfig = ` ## # ost_procfiles = [ # "/proc/fs/lustre/obdfilter/*/stats", - # "/proc/fs/lustre/osd-ldiskfs/*/stats" + # "/proc/fs/lustre/osd-ldiskfs/*/stats", + # "/proc/fs/lustre/obdfilter/*/job_stats", + # ] + # mds_procfiles = [ + # "/proc/fs/lustre/mdt/*/md_stats", + # "/proc/fs/lustre/mdt/*/job_stats", # ] - # mds_procfiles = ["/proc/fs/lustre/mdt/*/md_stats"] ` /* The wanted fields would be a []string if not for the @@ -82,6 +86,139 @@ var wanted_ost_fields = []*mapping{ }, } +var wanted_ost_jobstats_fields = []*mapping{ + { // The read line has several fields, so we need to differentiate what they are + inProc: "read", + field: 3, + reportAs: "jobstats_read_calls", + }, + { + inProc: "read", + field: 7, + reportAs: "jobstats_read_min_size", + }, + { + inProc: "read", + field: 9, + reportAs: "jobstats_read_max_size", + }, + { + inProc: "read", + field: 11, + reportAs: "jobstats_read_bytes", + }, + { // Different inProc for newer versions + inProc: "read_bytes", + field: 3, + reportAs: "jobstats_read_calls", + }, + { + inProc: "read_bytes", + field: 7, + reportAs: "jobstats_read_min_size", + }, + { + inProc: "read_bytes", + field: 9, + reportAs: "jobstats_read_max_size", + }, + { + inProc: "read_bytes", + field: 11, + reportAs: "jobstats_read_bytes", + }, + { // We need to do the same for the write fields + inProc: "write", + field: 3, + reportAs: "jobstats_write_calls", + }, + { + inProc: "write", + field: 7, + reportAs: "jobstats_write_min_size", + }, + { + inProc: "write", + field: 9, + reportAs: "jobstats_write_max_size", + }, + { + inProc: "write", + field: 11, + reportAs: "jobstats_write_bytes", + }, + { // Different inProc for newer versions + inProc: "write_bytes", + field: 3, + reportAs: "jobstats_write_calls", + }, + { + inProc: "write_bytes", + field: 7, + reportAs: "jobstats_write_min_size", + }, + { + inProc: "write_bytes", + field: 9, + reportAs: "jobstats_write_max_size", + }, + { + inProc: "write_bytes", + field: 11, + reportAs: "jobstats_write_bytes", + }, + { + inProc: "getattr", + field: 3, + reportAs: "jobstats_ost_getattr", + }, + { + inProc: "setattr", + field: 3, + reportAs: "jobstats_ost_setattr", + }, + { + inProc: "punch", + field: 3, + reportAs: "jobstats_punch", + }, + { + inProc: "sync", + field: 3, + reportAs: "jobstats_ost_sync", + }, + { + inProc: "destroy", + field: 3, + reportAs: "jobstats_destroy", + }, + { + inProc: "create", + field: 3, + reportAs: "jobstats_create", + }, + { + inProc: "statfs", + field: 3, + reportAs: "jobstats_ost_statfs", + }, + { + inProc: "get_info", + field: 3, + reportAs: "jobstats_get_info", + }, + { + inProc: "set_info", + field: 3, + reportAs: "jobstats_set_info", + }, + { + inProc: "quotactl", + field: 3, + reportAs: "jobstats_quotactl", + }, +} + var wanted_mds_fields = []*mapping{ { inProc: "open", @@ -133,6 +270,89 @@ var wanted_mds_fields = []*mapping{ }, } +var wanted_mdt_jobstats_fields = []*mapping{ + { + inProc: "open", + field: 3, + reportAs: "jobstats_open", + }, + { + inProc: "close", + field: 3, + reportAs: "jobstats_close", + }, + { + inProc: "mknod", + field: 3, + reportAs: "jobstats_mknod", + }, + { + inProc: "link", + field: 3, + reportAs: "jobstats_link", + }, + { + inProc: "unlink", + field: 3, + reportAs: "jobstats_unlink", + }, + { + inProc: "mkdir", + field: 3, + reportAs: "jobstats_mkdir", + }, + { + inProc: "rmdir", + field: 3, + reportAs: "jobstats_rmdir", + }, + { + inProc: "rename", + field: 3, + reportAs: "jobstats_rename", + }, + { + inProc: "getattr", + field: 3, + reportAs: "jobstats_getattr", + }, + { + inProc: "setattr", + field: 3, + reportAs: "jobstats_setattr", + }, + { + inProc: "getxattr", + field: 3, + reportAs: "jobstats_getxattr", + }, + { + inProc: "setxattr", + field: 3, + reportAs: "jobstats_setxattr", + }, + { + inProc: "statfs", + field: 3, + reportAs: "jobstats_statfs", + }, + { + inProc: "sync", + field: 3, + reportAs: "jobstats_sync", + }, + { + inProc: "samedir_rename", + field: 3, + reportAs: "jobstats_samedir_rename", + }, + { + inProc: "crossdir_rename", + field: 3, + reportAs: "jobstats_crossdir_rename", + }, +} + func (l *Lustre2) GetLustreProcStats(fileglob string, wanted_fields []*mapping, acc telegraf.Accumulator) error { files, err := filepath.Glob(fileglob) if err != nil { @@ -143,7 +363,7 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wanted_fields []*mapping, /* Turn /proc/fs/lustre/obdfilter//stats and similar * into just the object store target name * Assumpion: the target name is always second to last, - * which is true in Lustre 2.1->2.5 + * which is true in Lustre 2.1->2.8 */ path := strings.Split(file, "/") name := path[len(path)-2] @@ -161,16 +381,21 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wanted_fields []*mapping, for _, line := range lines { parts := strings.Fields(line) + if strings.HasPrefix(line, "- job_id:") { + // Set the job_id explicitly if present + fields["jobid"] = parts[2] + } + for _, wanted := range wanted_fields { var data uint64 - if parts[0] == wanted.inProc { + if strings.TrimSuffix(parts[0], ":") == wanted.inProc { wanted_field := wanted.field // if not set, assume field[1]. Shouldn't be field[0], as // that's a string if wanted_field == 0 { wanted_field = 1 } - data, err = strconv.ParseUint((parts[wanted_field]), 10, 64) + data, err = strconv.ParseUint(strings.TrimSuffix((parts[wanted_field]), ","), 10, 64) if err != nil { return err } @@ -213,6 +438,12 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error { if err != nil { return err } + // per job statistics are in obdfilter//job_stats + err = l.GetLustreProcStats("/proc/fs/lustre/obdfilter/*/job_stats", + wanted_ost_jobstats_fields, acc) + if err != nil { + return err + } } if len(l.Mds_procfiles) == 0 { @@ -222,16 +453,31 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error { if err != nil { return err } + + // Metadata target job stats + err = l.GetLustreProcStats("/proc/fs/lustre/mdt/*/job_stats", + wanted_mdt_jobstats_fields, acc) + if err != nil { + return err + } } for _, procfile := range l.Ost_procfiles { - err := l.GetLustreProcStats(procfile, wanted_ost_fields, acc) + ost_fields := wanted_ost_fields + if strings.HasSuffix(procfile, "job_stats") { + ost_fields = wanted_ost_jobstats_fields + } + err := l.GetLustreProcStats(procfile, ost_fields, acc) if err != nil { return err } } for _, procfile := range l.Mds_procfiles { - err := l.GetLustreProcStats(procfile, wanted_mds_fields, acc) + mdt_fields := wanted_mds_fields + if strings.HasSuffix(procfile, "job_stats") { + mdt_fields = wanted_mdt_jobstats_fields + } + err := l.GetLustreProcStats(procfile, mdt_fields, acc) if err != nil { return err } @@ -241,6 +487,12 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error { tags := map[string]string{ "name": name, } + if _, ok := fields["jobid"]; ok { + if jobid, ok := fields["jobid"].(string); ok { + tags["jobid"] = jobid + } + delete(fields, "jobid") + } acc.AddFields("lustre2", fields, tags) } diff --git a/plugins/inputs/lustre2/lustre2_test.go b/plugins/inputs/lustre2/lustre2_test.go index 9e560df2c..5cc9c0e43 100644 --- a/plugins/inputs/lustre2/lustre2_test.go +++ b/plugins/inputs/lustre2/lustre2_test.go @@ -38,6 +38,23 @@ cache_hit 7393729777 samples [pages] 1 1 7393729777 cache_miss 11653333250 samples [pages] 1 1 11653333250 ` +const obdfilterJobStatsContents = `job_stats: +- job_id: testjob1 + snapshot_time: 1461772761 + read_bytes: { samples: 1, unit: bytes, min: 4096, max: 4096, sum: 4096 } + write_bytes: { samples: 25, unit: bytes, min: 1048576, max: 1048576, sum: 26214400 } + getattr: { samples: 0, unit: reqs } + setattr: { samples: 0, unit: reqs } + punch: { samples: 1, unit: reqs } + sync: { samples: 0, unit: reqs } + destroy: { samples: 0, unit: reqs } + create: { samples: 0, unit: reqs } + statfs: { samples: 0, unit: reqs } + get_info: { samples: 0, unit: reqs } + set_info: { samples: 0, unit: reqs } + quotactl: { samples: 0, unit: reqs } +` + const mdtProcContents = `snapshot_time 1438693238.20113 secs.usecs open 1024577037 samples [reqs] close 873243496 samples [reqs] @@ -57,6 +74,27 @@ samedir_rename 259625 samples [reqs] crossdir_rename 369571 samples [reqs] ` +const mdtJobStatsContents = `job_stats: +- job_id: testjob1 + snapshot_time: 1461772761 + open: { samples: 5, unit: reqs } + close: { samples: 4, unit: reqs } + mknod: { samples: 6, unit: reqs } + link: { samples: 8, unit: reqs } + unlink: { samples: 90, unit: reqs } + mkdir: { samples: 521, unit: reqs } + rmdir: { samples: 520, unit: reqs } + rename: { samples: 9, unit: reqs } + getattr: { samples: 11, unit: reqs } + setattr: { samples: 1, unit: reqs } + getxattr: { samples: 3, unit: reqs } + setxattr: { samples: 4, unit: reqs } + statfs: { samples: 1205, unit: reqs } + sync: { samples: 2, unit: reqs } + samedir_rename: { samples: 705, unit: reqs } + crossdir_rename: { samples: 200, unit: reqs } +` + func TestLustre2GeneratesMetrics(t *testing.T) { tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/" @@ -83,6 +121,7 @@ func TestLustre2GeneratesMetrics(t *testing.T) { err = ioutil.WriteFile(obddir+"/"+ost_name+"/stats", []byte(obdfilterProcContents), 0644) require.NoError(t, err) + // Begin by testing standard Lustre stats m := &Lustre2{ Ost_procfiles: []string{obddir + "/*/stats", osddir + "/*/stats"}, Mds_procfiles: []string{mdtdir + "/*/md_stats"}, @@ -128,3 +167,82 @@ func TestLustre2GeneratesMetrics(t *testing.T) { err = os.RemoveAll(os.TempDir() + "/telegraf") require.NoError(t, err) } + +func TestLustre2GeneratesJobstatsMetrics(t *testing.T) { + + tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/" + ost_name := "OST0001" + job_name := "testjob1" + + mdtdir := tempdir + "/mdt/" + err := os.MkdirAll(mdtdir+"/"+ost_name, 0755) + require.NoError(t, err) + + obddir := tempdir + "/obdfilter/" + err = os.MkdirAll(obddir+"/"+ost_name, 0755) + require.NoError(t, err) + + err = ioutil.WriteFile(mdtdir+"/"+ost_name+"/job_stats", []byte(mdtJobStatsContents), 0644) + require.NoError(t, err) + + err = ioutil.WriteFile(obddir+"/"+ost_name+"/job_stats", []byte(obdfilterJobStatsContents), 0644) + require.NoError(t, err) + + // Test Lustre Jobstats + m := &Lustre2{ + Ost_procfiles: []string{obddir + "/*/job_stats"}, + Mds_procfiles: []string{mdtdir + "/*/job_stats"}, + } + + var acc testutil.Accumulator + + err = m.Gather(&acc) + require.NoError(t, err) + + tags := map[string]string{ + "name": ost_name, + "jobid": job_name, + } + + fields := map[string]interface{}{ + "jobstats_read_calls": uint64(1), + "jobstats_read_min_size": uint64(4096), + "jobstats_read_max_size": uint64(4096), + "jobstats_read_bytes": uint64(4096), + "jobstats_write_calls": uint64(25), + "jobstats_write_min_size": uint64(1048576), + "jobstats_write_max_size": uint64(1048576), + "jobstats_write_bytes": uint64(26214400), + "jobstats_ost_getattr": uint64(0), + "jobstats_ost_setattr": uint64(0), + "jobstats_punch": uint64(1), + "jobstats_ost_sync": uint64(0), + "jobstats_destroy": uint64(0), + "jobstats_create": uint64(0), + "jobstats_ost_statfs": uint64(0), + "jobstats_get_info": uint64(0), + "jobstats_set_info": uint64(0), + "jobstats_quotactl": uint64(0), + "jobstats_open": uint64(5), + "jobstats_close": uint64(4), + "jobstats_mknod": uint64(6), + "jobstats_link": uint64(8), + "jobstats_unlink": uint64(90), + "jobstats_mkdir": uint64(521), + "jobstats_rmdir": uint64(520), + "jobstats_rename": uint64(9), + "jobstats_getattr": uint64(11), + "jobstats_setattr": uint64(1), + "jobstats_getxattr": uint64(3), + "jobstats_setxattr": uint64(4), + "jobstats_statfs": uint64(1205), + "jobstats_sync": uint64(2), + "jobstats_samedir_rename": uint64(705), + "jobstats_crossdir_rename": uint64(200), + } + + acc.AssertContainsTaggedFields(t, "lustre2", fields, tags) + + err = os.RemoveAll(os.TempDir() + "/telegraf") + require.NoError(t, err) +} From cf5980ace21a9a34d8e51f280b17e5cd6c483b6b Mon Sep 17 00:00:00 2001 From: Simone Aiello Date: Thu, 14 Apr 2016 23:00:41 +0200 Subject: [PATCH 66/84] jolokia: add proxy mode --- plugins/inputs/jolokia/README.md | 32 ++-- plugins/inputs/jolokia/jolokia.go | 222 +++++++++++++++++++------ plugins/inputs/jolokia/jolokia_test.go | 21 ++- 3 files changed, 212 insertions(+), 63 deletions(-) diff --git a/plugins/inputs/jolokia/README.md b/plugins/inputs/jolokia/README.md index 5c7db6230..cec3c95ce 100644 --- a/plugins/inputs/jolokia/README.md +++ b/plugins/inputs/jolokia/README.md @@ -5,13 +5,22 @@ ```toml [[inputs.jolokia]] ## This is the context root used to compose the jolokia url - context = "/jolokia/read" + context = "/jolokia" - ## List of servers exposing jolokia read service + # This specifies the mode used + # mode = "proxy" + # + # When in proxy mode this section is used to specify further proxy address configurations. + # Remember to change servers addresses + # [inputs.jolokia.proxy] + # host = "127.0.0.1" + # port = "8080" + + # List of servers exposing jolokia read service [[inputs.jolokia.servers]] - name = "stable" - host = "192.168.103.2" - port = "8180" + name = "as-server-01" + host = "127.0.0.1" + port = "8080" # username = "myuser" # password = "mypassword" @@ -21,17 +30,20 @@ ## This collect all heap memory usage metrics. [[inputs.jolokia.metrics]] name = "heap_memory_usage" - jmx = "/java.lang:type=Memory/HeapMemoryUsage" - + mbean = "java.lang:type=Memory" + attribute = "HeapMemoryUsage" + ## This collect thread counts metrics. [[inputs.jolokia.metrics]] name = "thread_count" - jmx = "/java.lang:type=Threading/TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" - + mbean = "java.lang:type=Threading" + attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" + ## This collect number of class loaded/unloaded counts metrics. [[inputs.jolokia.metrics]] name = "class_count" - jmx = "/java.lang:type=ClassLoading/LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" + mbean = "java.lang:type=ClassLoading" + attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" ``` #### Description diff --git a/plugins/inputs/jolokia/jolokia.go b/plugins/inputs/jolokia/jolokia.go index 15a01d5de..64835366e 100644 --- a/plugins/inputs/jolokia/jolokia.go +++ b/plugins/inputs/jolokia/jolokia.go @@ -8,6 +8,7 @@ import ( "net/http" "net/url" "time" + "bytes" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" @@ -23,7 +24,9 @@ type Server struct { type Metric struct { Name string - Jmx string + Mbean string + Attribute string + Path string } type JolokiaClient interface { @@ -41,20 +44,32 @@ func (c JolokiaClientImpl) MakeRequest(req *http.Request) (*http.Response, error type Jolokia struct { jClient JolokiaClient Context string + Mode string Servers []Server Metrics []Metric + Proxy Server } func (j *Jolokia) SampleConfig() string { return ` - ## This is the context root used to compose the jolokia url - context = "/jolokia/read" + # This is the context root used to compose the jolokia url + context = "/jolokia" - ## List of servers exposing jolokia read service + # This specifies the mode used + # mode = "proxy" + # + # When in proxy mode this section is used to specify further proxy address configurations. + # Remember to change servers addresses + # [inputs.jolokia.proxy] + # host = "127.0.0.1" + # port = "8080" + + + # List of servers exposing jolokia read service [[inputs.jolokia.servers]] - name = "stable" - host = "192.168.103.2" - port = "8180" + name = "as-server-01" + host = "127.0.0.1" + port = "8080" # username = "myuser" # password = "mypassword" @@ -64,17 +79,20 @@ func (j *Jolokia) SampleConfig() string { ## This collect all heap memory usage metrics. [[inputs.jolokia.metrics]] name = "heap_memory_usage" - jmx = "/java.lang:type=Memory/HeapMemoryUsage" - + mbean = "java.lang:type=Memory" + attribute = "HeapMemoryUsage" + ## This collect thread counts metrics. [[inputs.jolokia.metrics]] name = "thread_count" - jmx = "/java.lang:type=Threading/TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" - + mbean = "java.lang:type=Threading" + attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" + ## This collect number of class loaded/unloaded counts metrics. [[inputs.jolokia.metrics]] name = "class_count" - jmx = "/java.lang:type=ClassLoading/LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" + mbean = "java.lang:type=ClassLoading" + attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" ` } @@ -82,12 +100,7 @@ func (j *Jolokia) Description() string { return "Read JMX metrics through Jolokia" } -func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) { - // Create + send request - req, err := http.NewRequest("GET", requestUrl.String(), nil) - if err != nil { - return nil, err - } +func (j *Jolokia) doRequest(req *http.Request) (map[string]interface{}, error) { resp, err := j.jClient.MakeRequest(req) if err != nil { @@ -98,7 +111,7 @@ func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) { // Process response if resp.StatusCode != http.StatusOK { err = fmt.Errorf("Response from url \"%s\" has status code %d (%s), expected %d (%s)", - requestUrl, + req.RequestURI, resp.StatusCode, http.StatusText(resp.StatusCode), http.StatusOK, @@ -118,52 +131,159 @@ func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) { return nil, errors.New("Error decoding JSON response") } + if status, ok := jsonOut["status"]; ok { + if status != float64(200) { + return nil, fmt.Errorf("Not expected status value in response body: %3.f", status) + } + } else { + return nil, fmt.Errorf("Missing status in response body") + } + return jsonOut, nil } +func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) { + // Create + send request + req, err := http.NewRequest("GET", requestUrl.String(), nil) + if err != nil { + return nil, err + } + + return j.doRequest(req) +} + + +func (j *Jolokia) collectMeasurement(measurement string, out map[string]interface{}, fields map[string]interface{}) { + + if values, ok := out["value"]; ok { + switch t := values.(type) { + case map[string]interface{}: + for k, v := range t { + fields[measurement+"_"+k] = v + } + case interface{}: + fields[measurement] = t + } + } else { + fmt.Printf("Missing key 'value' in output response\n") + } + +} + + func (j *Jolokia) Gather(acc telegraf.Accumulator) error { - context := j.Context //"/jolokia/read" + context := j.Context // Usually "/jolokia" servers := j.Servers metrics := j.Metrics tags := make(map[string]string) + mode := j.Mode - for _, server := range servers { - tags["server"] = server.Name - tags["port"] = server.Port - tags["host"] = server.Host - fields := make(map[string]interface{}) - for _, metric := range metrics { + if( mode == "agent" || mode == ""){ - measurement := metric.Name - jmxPath := metric.Jmx + for _, server := range servers { + tags["server"] = server.Name + tags["port"] = server.Port + tags["host"] = server.Host + fields := make(map[string]interface{}) + for _, metric := range metrics { + + measurement := metric.Name + jmxPath := "/" + metric.Mbean + if metric.Attribute != "" { + jmxPath = jmxPath + "/" + metric.Attribute + + if metric.Path != "" { + jmxPath = jmxPath + "/" + metric.Path + } + } // Prepare URL - requestUrl, err := url.Parse("http://" + server.Host + ":" + - server.Port + context + jmxPath) - if err != nil { - return err - } - if server.Username != "" || server.Password != "" { - requestUrl.User = url.UserPassword(server.Username, server.Password) - } - - out, _ := j.getAttr(requestUrl) - - if values, ok := out["value"]; ok { - switch t := values.(type) { - case map[string]interface{}: - for k, v := range t { - fields[measurement+"_"+k] = v - } - case interface{}: - fields[measurement] = t + requestUrl, err := url.Parse("http://" + server.Host + ":" + + server.Port + context + "/read" + jmxPath) + if err != nil { + return err } - } else { - fmt.Printf("Missing key 'value' in '%s' output response\n", - requestUrl.String()) + if server.Username != "" || server.Password != "" { + requestUrl.User = url.UserPassword(server.Username, server.Password) + } + out, _ := j.getAttr(requestUrl) + j.collectMeasurement(measurement, out, fields) } + acc.AddFields("jolokia", fields, tags) } - acc.AddFields("jolokia", fields, tags) + + } else if ( mode == "proxy") { + + proxy := j.Proxy + + // Prepare ProxyURL + proxyURL, err := url.Parse("http://" + proxy.Host + ":" + + proxy.Port + context) + if err != nil { + return err + } + if proxy.Username != "" || proxy.Password != "" { + proxyURL.User = url.UserPassword(proxy.Username, proxy.Password) + } + + for _, server := range servers { + tags["server"] = server.Name + tags["port"] = server.Port + tags["host"] = server.Host + fields := make(map[string]interface{}) + for _, metric := range metrics { + + measurement := metric.Name + // Prepare URL + serviceUrl := fmt.Sprintf("service:jmx:rmi:///jndi/rmi://%s:%s/jmxrmi", server.Host, server.Port) + + target := map[string]string{ + "url": serviceUrl, + } + + if server.Username != "" { + target["user"] = server.Username + } + + if server.Password != "" { + target["password"] = server.Password + } + + // Create + send request + bodyContent := map[string]interface{}{ + "type": "read", + "mbean": metric.Mbean, + "target": target, + } + + if metric.Attribute != "" { + bodyContent["attribute"] = metric.Attribute + if metric.Path != "" { + bodyContent["path"] = metric.Path + } + } + + requestBody, err := json.Marshal(bodyContent) + + req, err := http.NewRequest("POST", proxyURL.String(), bytes.NewBuffer(requestBody)) + + if err != nil { + return err + } + + req.Header.Add("Content-type", "application/json") + + out, err := j.doRequest(req) + + if err != nil { + fmt.Printf("Error handling response: %s\n", err) + }else { + j.collectMeasurement(measurement, out, fields) + } + } + acc.AddFields("jolokia", fields, tags) + } + } return nil diff --git a/plugins/inputs/jolokia/jolokia_test.go b/plugins/inputs/jolokia/jolokia_test.go index 961ba7055..eb8fb12da 100644 --- a/plugins/inputs/jolokia/jolokia_test.go +++ b/plugins/inputs/jolokia/jolokia_test.go @@ -47,8 +47,10 @@ const invalidJSON = "I don't think this is JSON" const empty = "" var Servers = []Server{Server{Name: "as1", Host: "127.0.0.1", Port: "8080"}} -var HeapMetric = Metric{Name: "heap_memory_usage", Jmx: "/java.lang:type=Memory/HeapMemoryUsage"} -var UsedHeapMetric = Metric{Name: "heap_memory_usage", Jmx: "/java.lang:type=Memory/HeapMemoryUsage"} +var HeapMetric = Metric{Name: "heap_memory_usage", + Mbean: "java.lang:type=Memory", Attribute: "HeapMemoryUsage" } +var UsedHeapMetric = Metric{Name: "heap_memory_usage", + Mbean: "java.lang:type=Memory", Attribute: "HeapMemoryUsage"} type jolokiaClientStub struct { responseBody string @@ -114,3 +116,18 @@ func TestHttpJsonOn404(t *testing.T) { assert.Nil(t, err) assert.Equal(t, 0, len(acc.Metrics)) } + + +// Test that the proper values are ignored or collected +func TestHttpInvalidJson(t *testing.T) { + + jolokia := genJolokiaClientStub(invalidJSON, 200, Servers, + []Metric{UsedHeapMetric}) + + var acc testutil.Accumulator + acc.SetDebug(true) + err := jolokia.Gather(&acc) + + assert.Nil(t, err) + assert.Equal(t, 0, len(acc.Metrics)) +} From 18636ea628e76e70531698cc05dc80ca6af3bb06 Mon Sep 17 00:00:00 2001 From: Simone Aiello Date: Fri, 15 Apr 2016 00:38:46 +0200 Subject: [PATCH 67/84] jolokia: use always POST code refactor to use same prepareRequest method for both 'agent' and 'proxy' mode closes #1031 closes #1050 closes #473 --- CHANGELOG.md | 3 + plugins/inputs/jolokia/README.md | 2 +- plugins/inputs/jolokia/jolokia.go | 238 +++++++++++-------------- plugins/inputs/jolokia/jolokia_test.go | 9 +- 4 files changed, 116 insertions(+), 136 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39383be71..a307b8764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -82,8 +82,10 @@ https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#g - Possible breaking change for the librato and graphite outputs. Telegraf will no longer insert field names when the field is simply named `value`. This is because the `value` field is redundant in the graphite/librato context. +- Breaking change in jolokia plugin. See https://github.com/influxdata/telegraf/blob/master/plugins/inputs/jolokia/README.md ### Features +- [#1031](https://github.com/influxdata/telegraf/pull/1031): Jolokia plugin proxy mode. Thanks @saiello! - [#1009](https://github.com/influxdata/telegraf/pull/1009): Cassandra input plugin. Thanks @subhachandrachandra! - [#976](https://github.com/influxdata/telegraf/pull/976): Reduce allocations in the UDP and statsd inputs. - [#979](https://github.com/influxdata/telegraf/pull/979): Reduce allocations in the TCP listener. @@ -96,6 +98,7 @@ because the `value` field is redundant in the graphite/librato context. - [#1008](https://github.com/influxdata/telegraf/pull/1008): Adding memstats metrics to the influxdb plugin. ### Bugfixes +- [#1050](https://github.com/influxdata/telegraf/issues/1050): jolokia plugin - do not overwrite host tag. Thanks @saiello! - [#968](https://github.com/influxdata/telegraf/issues/968): Processes plugin gets unknown state when spaces are in (command name) - [#969](https://github.com/influxdata/telegraf/pull/969): ipmi_sensors: allow : in password. Thanks @awaw! - [#972](https://github.com/influxdata/telegraf/pull/972): dovecot: remove extra newline in dovecot command. Thanks @mrannanj! diff --git a/plugins/inputs/jolokia/README.md b/plugins/inputs/jolokia/README.md index cec3c95ce..05ade3d01 100644 --- a/plugins/inputs/jolokia/README.md +++ b/plugins/inputs/jolokia/README.md @@ -54,4 +54,4 @@ are collected for each server configured. See: https://jolokia.org/ # Measurements: -Jolokia plugin produces one measure for each metric configured, adding Server's `name`, `host` and `port` as tags. +Jolokia plugin produces one measure for each metric configured, adding Server's `server_name`, `server_host` and `server_port` as tags. diff --git a/plugins/inputs/jolokia/jolokia.go b/plugins/inputs/jolokia/jolokia.go index 64835366e..08256ce85 100644 --- a/plugins/inputs/jolokia/jolokia.go +++ b/plugins/inputs/jolokia/jolokia.go @@ -1,6 +1,7 @@ package jolokia import ( + "bytes" "encoding/json" "errors" "fmt" @@ -8,7 +9,6 @@ import ( "net/http" "net/url" "time" - "bytes" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" @@ -23,10 +23,10 @@ type Server struct { } type Metric struct { - Name string - Mbean string + Name string + Mbean string Attribute string - Path string + Path string } type JolokiaClient interface { @@ -44,28 +44,28 @@ func (c JolokiaClientImpl) MakeRequest(req *http.Request) (*http.Response, error type Jolokia struct { jClient JolokiaClient Context string - Mode string + Mode string Servers []Server Metrics []Metric - Proxy Server + Proxy Server } -func (j *Jolokia) SampleConfig() string { - return ` - # This is the context root used to compose the jolokia url +const sampleConfig = ` + ## This is the context root used to compose the jolokia url context = "/jolokia" - # This specifies the mode used + ## This specifies the mode used # mode = "proxy" # - # When in proxy mode this section is used to specify further proxy address configurations. - # Remember to change servers addresses + ## When in proxy mode this section is used to specify further + ## proxy address configurations. + ## Remember to change host address to fit your environment. # [inputs.jolokia.proxy] - # host = "127.0.0.1" - # port = "8080" + # host = "127.0.0.1" + # port = "8080" - # List of servers exposing jolokia read service + ## List of servers exposing jolokia read service [[inputs.jolokia.servers]] name = "as-server-01" host = "127.0.0.1" @@ -86,14 +86,17 @@ func (j *Jolokia) SampleConfig() string { [[inputs.jolokia.metrics]] name = "thread_count" mbean = "java.lang:type=Threading" - attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" + attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" ## This collect number of class loaded/unloaded counts metrics. [[inputs.jolokia.metrics]] name = "class_count" mbean = "java.lang:type=ClassLoading" - attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" + attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" ` + +func (j *Jolokia) SampleConfig() string { + return sampleConfig } func (j *Jolokia) Description() string { @@ -133,7 +136,8 @@ func (j *Jolokia) doRequest(req *http.Request) (map[string]interface{}, error) { if status, ok := jsonOut["status"]; ok { if status != float64(200) { - return nil, fmt.Errorf("Not expected status value in response body: %3.f", status) + return nil, fmt.Errorf("Not expected status value in response body: %3.f", + status) } } else { return nil, fmt.Errorf("Missing status in response body") @@ -142,148 +146,122 @@ func (j *Jolokia) doRequest(req *http.Request) (map[string]interface{}, error) { return jsonOut, nil } -func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) { - // Create + send request - req, err := http.NewRequest("GET", requestUrl.String(), nil) - if err != nil { - return nil, err - } - - return j.doRequest(req) -} - - -func (j *Jolokia) collectMeasurement(measurement string, out map[string]interface{}, fields map[string]interface{}) { - - if values, ok := out["value"]; ok { - switch t := values.(type) { - case map[string]interface{}: - for k, v := range t { - fields[measurement+"_"+k] = v - } - case interface{}: - fields[measurement] = t - } - } else { - fmt.Printf("Missing key 'value' in output response\n") - } - -} - - -func (j *Jolokia) Gather(acc telegraf.Accumulator) error { +func (j *Jolokia) prepareRequest(server Server, metric Metric) (*http.Request, error) { + var jolokiaUrl *url.URL context := j.Context // Usually "/jolokia" - servers := j.Servers - metrics := j.Metrics - tags := make(map[string]string) - mode := j.Mode - if( mode == "agent" || mode == ""){ + // Create bodyContent + bodyContent := map[string]interface{}{ + "type": "read", + "mbean": metric.Mbean, + } - for _, server := range servers { - tags["server"] = server.Name - tags["port"] = server.Port - tags["host"] = server.Host - fields := make(map[string]interface{}) - for _, metric := range metrics { + if metric.Attribute != "" { + bodyContent["attribute"] = metric.Attribute + if metric.Path != "" { + bodyContent["path"] = metric.Path + } + } - measurement := metric.Name - jmxPath := "/" + metric.Mbean - if metric.Attribute != "" { - jmxPath = jmxPath + "/" + metric.Attribute + // Add target, only in proxy mode + if j.Mode == "proxy" { + serviceUrl := fmt.Sprintf("service:jmx:rmi:///jndi/rmi://%s:%s/jmxrmi", + server.Host, server.Port) - if metric.Path != "" { - jmxPath = jmxPath + "/" + metric.Path - } - } - - // Prepare URL - requestUrl, err := url.Parse("http://" + server.Host + ":" + - server.Port + context + "/read" + jmxPath) - if err != nil { - return err - } - if server.Username != "" || server.Password != "" { - requestUrl.User = url.UserPassword(server.Username, server.Password) - } - out, _ := j.getAttr(requestUrl) - j.collectMeasurement(measurement, out, fields) - } - acc.AddFields("jolokia", fields, tags) + target := map[string]string{ + "url": serviceUrl, } - } else if ( mode == "proxy") { + if server.Username != "" { + target["user"] = server.Username + } + + if server.Password != "" { + target["password"] = server.Password + } + + bodyContent["target"] = target proxy := j.Proxy // Prepare ProxyURL - proxyURL, err := url.Parse("http://" + proxy.Host + ":" + - proxy.Port + context) + proxyUrl, err := url.Parse("http://" + proxy.Host + ":" + proxy.Port + context) if err != nil { - return err + return nil, err } if proxy.Username != "" || proxy.Password != "" { - proxyURL.User = url.UserPassword(proxy.Username, proxy.Password) + proxyUrl.User = url.UserPassword(proxy.Username, proxy.Password) } - for _, server := range servers { - tags["server"] = server.Name - tags["port"] = server.Port - tags["host"] = server.Host - fields := make(map[string]interface{}) - for _, metric := range metrics { + jolokiaUrl = proxyUrl - measurement := metric.Name - // Prepare URL - serviceUrl := fmt.Sprintf("service:jmx:rmi:///jndi/rmi://%s:%s/jmxrmi", server.Host, server.Port) + } else { + serverUrl, err := url.Parse("http://" + server.Host + ":" + server.Port + context) + if err != nil { + return nil, err + } + if server.Username != "" || server.Password != "" { + serverUrl.User = url.UserPassword(server.Username, server.Password) + } - target := map[string]string{ - "url": serviceUrl, - } + jolokiaUrl = serverUrl + } - if server.Username != "" { - target["user"] = server.Username - } + requestBody, err := json.Marshal(bodyContent) - if server.Password != "" { - target["password"] = server.Password - } + req, err := http.NewRequest("POST", jolokiaUrl.String(), bytes.NewBuffer(requestBody)) - // Create + send request - bodyContent := map[string]interface{}{ - "type": "read", - "mbean": metric.Mbean, - "target": target, - } + if err != nil { + return nil, err + } - if metric.Attribute != "" { - bodyContent["attribute"] = metric.Attribute - if metric.Path != "" { - bodyContent["path"] = metric.Path - } - } + req.Header.Add("Content-type", "application/json") - requestBody, err := json.Marshal(bodyContent) + return req, nil +} - req, err := http.NewRequest("POST", proxyURL.String(), bytes.NewBuffer(requestBody)) +func (j *Jolokia) Gather(acc telegraf.Accumulator) error { + servers := j.Servers + metrics := j.Metrics + tags := make(map[string]string) - if err != nil { - return err - } + for _, server := range servers { + tags["server_name"] = server.Name + tags["server_port"] = server.Port + tags["server_host"] = server.Host + fields := make(map[string]interface{}) - req.Header.Add("Content-type", "application/json") + for _, metric := range metrics { + measurement := metric.Name + + req, err := j.prepareRequest(server, metric) + if err != nil { + return err + } + + out, err := j.doRequest(req) + + if err != nil { + fmt.Printf("Error handling response: %s\n", err) + } else { + + if values, ok := out["value"]; ok { + switch t := values.(type) { + case map[string]interface{}: + for k, v := range t { + fields[measurement+"_"+k] = v + } + case interface{}: + fields[measurement] = t + } + } else { + fmt.Printf("Missing key 'value' in output response\n") + } - out, err := j.doRequest(req) - - if err != nil { - fmt.Printf("Error handling response: %s\n", err) - }else { - j.collectMeasurement(measurement, out, fields) - } } - acc.AddFields("jolokia", fields, tags) } + acc.AddFields("jolokia", fields, tags) } return nil diff --git a/plugins/inputs/jolokia/jolokia_test.go b/plugins/inputs/jolokia/jolokia_test.go index eb8fb12da..ff0c0e49d 100644 --- a/plugins/inputs/jolokia/jolokia_test.go +++ b/plugins/inputs/jolokia/jolokia_test.go @@ -48,7 +48,7 @@ const empty = "" var Servers = []Server{Server{Name: "as1", Host: "127.0.0.1", Port: "8080"}} var HeapMetric = Metric{Name: "heap_memory_usage", - Mbean: "java.lang:type=Memory", Attribute: "HeapMemoryUsage" } + Mbean: "java.lang:type=Memory", Attribute: "HeapMemoryUsage"} var UsedHeapMetric = Metric{Name: "heap_memory_usage", Mbean: "java.lang:type=Memory", Attribute: "HeapMemoryUsage"} @@ -96,9 +96,9 @@ func TestHttpJsonMultiValue(t *testing.T) { "heap_memory_usage_used": 203288528.0, } tags := map[string]string{ - "host": "127.0.0.1", - "port": "8080", - "server": "as1", + "server_host": "127.0.0.1", + "server_port": "8080", + "server_name": "as1", } acc.AssertContainsTaggedFields(t, "jolokia", fields, tags) } @@ -117,7 +117,6 @@ func TestHttpJsonOn404(t *testing.T) { assert.Equal(t, 0, len(acc.Metrics)) } - // Test that the proper values are ignored or collected func TestHttpInvalidJson(t *testing.T) { From 59a297abe65f775e5f0d1acaca5cb19d0da79e54 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 27 Apr 2016 15:46:21 -0600 Subject: [PATCH 68/84] etc/telegraf.conf update --- etc/telegraf.conf | 48 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index c855b94fe..40e126d94 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -722,13 +722,24 @@ # # Read JMX metrics through Jolokia # [[inputs.jolokia]] # ## This is the context root used to compose the jolokia url -# context = "/jolokia/read" +# context = "/jolokia" +# +# ## This specifies the mode used +# # mode = "proxy" +# # +# ## When in proxy mode this section is used to specify further +# ## proxy address configurations. +# ## Remember to change host address to fit your environment. +# # [inputs.jolokia.proxy] +# # host = "127.0.0.1" +# # port = "8080" +# # # ## List of servers exposing jolokia read service # [[inputs.jolokia.servers]] -# name = "stable" -# host = "192.168.103.2" -# port = "8180" +# name = "as-server-01" +# host = "127.0.0.1" +# port = "8080" # # username = "myuser" # # password = "mypassword" # @@ -738,17 +749,20 @@ # ## This collect all heap memory usage metrics. # [[inputs.jolokia.metrics]] # name = "heap_memory_usage" -# jmx = "/java.lang:type=Memory/HeapMemoryUsage" -# +# mbean = "java.lang:type=Memory" +# attribute = "HeapMemoryUsage" +# # ## This collect thread counts metrics. # [[inputs.jolokia.metrics]] # name = "thread_count" -# jmx = "/java.lang:type=Threading/TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" -# +# mbean = "java.lang:type=Threading" +# attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" +# # ## This collect number of class loaded/unloaded counts metrics. # [[inputs.jolokia.metrics]] # name = "class_count" -# jmx = "/java.lang:type=ClassLoading/LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" +# mbean = "java.lang:type=ClassLoading" +# attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" # # Read metrics from a LeoFS Server via SNMP @@ -765,9 +779,13 @@ # ## # # ost_procfiles = [ # # "/proc/fs/lustre/obdfilter/*/stats", -# # "/proc/fs/lustre/osd-ldiskfs/*/stats" +# # "/proc/fs/lustre/osd-ldiskfs/*/stats", +# # "/proc/fs/lustre/obdfilter/*/job_stats", +# # ] +# # mds_procfiles = [ +# # "/proc/fs/lustre/mdt/*/md_stats", +# # "/proc/fs/lustre/mdt/*/job_stats", # # ] -# # mds_procfiles = ["/proc/fs/lustre/mdt/*/md_stats"] # # Gathers metrics from the /3.0/reports MailChimp API @@ -958,15 +976,15 @@ # [[inputs.ping]] # ## NOTE: this plugin forks the ping command. You may need to set capabilities # ## via setcap cap_net_raw+p /bin/ping -# +# # # ## urls to ping # urls = ["www.google.com"] # required -# ## number of pings to send (ping -c ) +# ## number of pings to send per collection (ping -c ) # count = 1 # required # ## interval, in s, at which to ping. 0 == default (ping -i ) # ping_interval = 0.0 -# ## ping timeout, in s. 0 == no timeout (ping -t ) -# timeout = 0.0 +# ## ping timeout, in s. 0 == no timeout (ping -W ) +# timeout = 1.0 # ## interface to send ping from (ping -I ) # interface = "" From 8742ead58539581fd18fd5c0a606936546ead24b Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 27 Apr 2016 15:50:55 -0600 Subject: [PATCH 69/84] Change server_ -> jolokia_ in tags and other formatting --- CHANGELOG.md | 10 +++++++--- plugins/inputs/jolokia/README.md | 26 +++++++++++++++----------- plugins/inputs/jolokia/jolokia.go | 6 +++--- plugins/inputs/jolokia/jolokia_test.go | 6 +++--- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a307b8764..c1d309f82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ### Release Notes +- **Breaking change** in jolokia plugin. See +https://github.com/influxdata/telegraf/blob/master/plugins/inputs/jolokia/README.md +for updated configuration. The plugin will now support proxy mode and will make +POST requests. + - New [agent] configuration option: `metric_batch_size`. This option tells telegraf the maximum batch size to allow to accumulate before sending a flush to the configured outputs. `metric_buffer_limit` now refers to the absolute @@ -45,6 +50,7 @@ based on _prefix_ in addition to globs. This means that a filter like ### Features +- [#1031](https://github.com/influxdata/telegraf/pull/1031): Jolokia plugin proxy mode. Thanks @saiello! - [#1017](https://github.com/influxdata/telegraf/pull/1017): taginclude and tagexclude arguments. - [#1015](https://github.com/influxdata/telegraf/pull/1015): Docker plugin schema refactor. - [#889](https://github.com/influxdata/telegraf/pull/889): Improved MySQL plugin. Thanks @maksadbek! @@ -61,6 +67,7 @@ based on _prefix_ in addition to globs. This means that a filter like ### Bugfixes +- [#1050](https://github.com/influxdata/telegraf/issues/1050): jolokia plugin - do not overwrite host tag. Thanks @saiello! - [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78! - [#1013](https://github.com/influxdata/telegraf/pull/1013): Close dead riemann output connections. Thanks @echupriyanov! - [#1012](https://github.com/influxdata/telegraf/pull/1012): Set default tags in test accumulator. @@ -82,10 +89,8 @@ https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#g - Possible breaking change for the librato and graphite outputs. Telegraf will no longer insert field names when the field is simply named `value`. This is because the `value` field is redundant in the graphite/librato context. -- Breaking change in jolokia plugin. See https://github.com/influxdata/telegraf/blob/master/plugins/inputs/jolokia/README.md ### Features -- [#1031](https://github.com/influxdata/telegraf/pull/1031): Jolokia plugin proxy mode. Thanks @saiello! - [#1009](https://github.com/influxdata/telegraf/pull/1009): Cassandra input plugin. Thanks @subhachandrachandra! - [#976](https://github.com/influxdata/telegraf/pull/976): Reduce allocations in the UDP and statsd inputs. - [#979](https://github.com/influxdata/telegraf/pull/979): Reduce allocations in the TCP listener. @@ -98,7 +103,6 @@ because the `value` field is redundant in the graphite/librato context. - [#1008](https://github.com/influxdata/telegraf/pull/1008): Adding memstats metrics to the influxdb plugin. ### Bugfixes -- [#1050](https://github.com/influxdata/telegraf/issues/1050): jolokia plugin - do not overwrite host tag. Thanks @saiello! - [#968](https://github.com/influxdata/telegraf/issues/968): Processes plugin gets unknown state when spaces are in (command name) - [#969](https://github.com/influxdata/telegraf/pull/969): ipmi_sensors: allow : in password. Thanks @awaw! - [#972](https://github.com/influxdata/telegraf/pull/972): dovecot: remove extra newline in dovecot command. Thanks @mrannanj! diff --git a/plugins/inputs/jolokia/README.md b/plugins/inputs/jolokia/README.md index 05ade3d01..596dbed5f 100644 --- a/plugins/inputs/jolokia/README.md +++ b/plugins/inputs/jolokia/README.md @@ -3,20 +3,23 @@ #### Configuration ```toml +# Read JMX metrics through Jolokia [[inputs.jolokia]] ## This is the context root used to compose the jolokia url context = "/jolokia" - # This specifies the mode used + ## This specifies the mode used # mode = "proxy" # - # When in proxy mode this section is used to specify further proxy address configurations. - # Remember to change servers addresses + ## When in proxy mode this section is used to specify further + ## proxy address configurations. + ## Remember to change host address to fit your environment. # [inputs.jolokia.proxy] - # host = "127.0.0.1" - # port = "8080" + # host = "127.0.0.1" + # port = "8080" - # List of servers exposing jolokia read service + + ## List of servers exposing jolokia read service [[inputs.jolokia.servers]] name = "as-server-01" host = "127.0.0.1" @@ -37,21 +40,22 @@ [[inputs.jolokia.metrics]] name = "thread_count" mbean = "java.lang:type=Threading" - attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" + attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" ## This collect number of class loaded/unloaded counts metrics. [[inputs.jolokia.metrics]] name = "class_count" mbean = "java.lang:type=ClassLoading" - attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" + attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount" ``` #### Description -The Jolokia plugin collects JVM metrics exposed as MBean's attributes through jolokia REST endpoint. All metrics -are collected for each server configured. +The Jolokia plugin collects JVM metrics exposed as MBean's attributes through +jolokia REST endpoint. All metrics are collected for each server configured. See: https://jolokia.org/ # Measurements: -Jolokia plugin produces one measure for each metric configured, adding Server's `server_name`, `server_host` and `server_port` as tags. +Jolokia plugin produces one measure for each metric configured, +adding Server's `jolokia_name`, `jolokia_host` and `jolokia_port` as tags. diff --git a/plugins/inputs/jolokia/jolokia.go b/plugins/inputs/jolokia/jolokia.go index 08256ce85..244338559 100644 --- a/plugins/inputs/jolokia/jolokia.go +++ b/plugins/inputs/jolokia/jolokia.go @@ -226,9 +226,9 @@ func (j *Jolokia) Gather(acc telegraf.Accumulator) error { tags := make(map[string]string) for _, server := range servers { - tags["server_name"] = server.Name - tags["server_port"] = server.Port - tags["server_host"] = server.Host + tags["jolokia_name"] = server.Name + tags["jolokia_port"] = server.Port + tags["jolokia_host"] = server.Host fields := make(map[string]interface{}) for _, metric := range metrics { diff --git a/plugins/inputs/jolokia/jolokia_test.go b/plugins/inputs/jolokia/jolokia_test.go index ff0c0e49d..13724b937 100644 --- a/plugins/inputs/jolokia/jolokia_test.go +++ b/plugins/inputs/jolokia/jolokia_test.go @@ -96,9 +96,9 @@ func TestHttpJsonMultiValue(t *testing.T) { "heap_memory_usage_used": 203288528.0, } tags := map[string]string{ - "server_host": "127.0.0.1", - "server_port": "8080", - "server_name": "as1", + "jolokia_host": "127.0.0.1", + "jolokia_port": "8080", + "jolokia_name": "as1", } acc.AssertContainsTaggedFields(t, "jolokia", fields, tags) } From 1390c2200440f83bbcd900d720e9148a19a07c57 Mon Sep 17 00:00:00 2001 From: Bob Zoller Date: Wed, 27 Apr 2016 15:19:22 -0700 Subject: [PATCH 70/84] sanitize * to - in graphite serializer closes #1110 --- CHANGELOG.md | 1 + plugins/serializers/graphite/graphite.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1d309f82..d66e4dd3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,6 +64,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1096](https://github.com/influxdata/telegraf/pull/1096): Performance refactor of running output buffers. - [#967](https://github.com/influxdata/telegraf/issues/967): Buffer logging improvements. - [#1107](https://github.com/influxdata/telegraf/issues/1107): Support lustre2 job stats. Thanks @hanleyja! +- [#1110](https://github.com/influxdata/telegraf/pull/1110): Sanitize * to - in graphite serializer. Thanks @goodeggs! ### Bugfixes diff --git a/plugins/serializers/graphite/graphite.go b/plugins/serializers/graphite/graphite.go index 6484d3fee..bf2e75579 100644 --- a/plugins/serializers/graphite/graphite.go +++ b/plugins/serializers/graphite/graphite.go @@ -17,7 +17,7 @@ type GraphiteSerializer struct { Template string } -var sanitizedChars = strings.NewReplacer("/", "-", "@", "-", " ", "_", "..", ".") +var sanitizedChars = strings.NewReplacer("/", "-", "@", "-", "*", "-", " ", "_", "..", ".") func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]string, error) { out := []string{} From 08f4b1ae8a3aa1f1bed224442a109c695c9500a2 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 28 Apr 2016 11:41:16 -0600 Subject: [PATCH 71/84] Update build to go 1.6.2 --- circle.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/circle.yml b/circle.yml index cbfdb225c..7a269f29f 100644 --- a/circle.yml +++ b/circle.yml @@ -4,9 +4,9 @@ machine: post: - sudo service zookeeper stop - go version - - go version | grep 1.6.1 || sudo rm -rf /usr/local/go - - wget https://storage.googleapis.com/golang/go1.6.1.linux-amd64.tar.gz - - sudo tar -C /usr/local -xzf go1.6.1.linux-amd64.tar.gz + - go version | grep 1.6.2 || sudo rm -rf /usr/local/go + - wget https://storage.googleapis.com/golang/go1.6.2.linux-amd64.tar.gz + - sudo tar -C /usr/local -xzf go1.6.2.linux-amd64.tar.gz - go version dependencies: From 024e1088eb3e1edd3fe2eea4156f98f0bf1caa68 Mon Sep 17 00:00:00 2001 From: Adithya B Cherunilam Date: Tue, 26 Apr 2016 02:03:07 +0530 Subject: [PATCH 72/84] Ensure sure that the post install script is compatible with RHEL 5 closes #1091 closes #1094 --- scripts/post-install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/post-install.sh b/scripts/post-install.sh index 9aec08543..fb0b441e8 100644 --- a/scripts/post-install.sh +++ b/scripts/post-install.sh @@ -26,7 +26,7 @@ function install_chkconfig { id telegraf &>/dev/null if [[ $? -ne 0 ]]; then - useradd --system -U -M telegraf -s /bin/false -d /etc/telegraf + useradd -r -K USERGROUPS_ENAB=yes -M telegraf -s /bin/false -d /etc/telegraf fi test -d $LOG_DIR || mkdir -p $LOG_DIR From 015261a524e8f335f844e7e3ffdd75ea3b4f2b57 Mon Sep 17 00:00:00 2001 From: G-regL Date: Thu, 28 Apr 2016 13:12:04 -0400 Subject: [PATCH 73/84] Sanitize Field name Replace '/[sS]ec' for '_persec' and spaces with underscores. closes #1118 --- CHANGELOG.md | 3 +++ plugins/inputs/win_perf_counters/win_perf_counters.go | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d66e4dd3a..eadbed925 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,8 @@ based on _prefix_ in addition to globs. This means that a filter like - cassandra: `host -> cassandra_host` - disque: `host -> disque_host` - rethinkdb: `host -> rethinkdb_host` + +- **Breaking Change**: The `win_perf_counters` input has been changed to sanitize field names, replacing `/Sec` and `/sec` with `_persec`, as well as spaces with underscores. This is needed because Graphite doesn't like slashes and spaces, and was failing to accept metrics that had them. The `/[sS]ec` -> `_persec` is just to make things clearer and uniform. ### Features @@ -65,6 +67,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#967](https://github.com/influxdata/telegraf/issues/967): Buffer logging improvements. - [#1107](https://github.com/influxdata/telegraf/issues/1107): Support lustre2 job stats. Thanks @hanleyja! - [#1110](https://github.com/influxdata/telegraf/pull/1110): Sanitize * to - in graphite serializer. Thanks @goodeggs! +- [#1118] (https://github.com/influxdata/telegraf/pull/1118): Sanitize Counter names for `win_perf_counters` input. ### Bugfixes diff --git a/plugins/inputs/win_perf_counters/win_perf_counters.go b/plugins/inputs/win_perf_counters/win_perf_counters.go index 8279f1c7a..4684289ee 100644 --- a/plugins/inputs/win_perf_counters/win_perf_counters.go +++ b/plugins/inputs/win_perf_counters/win_perf_counters.go @@ -107,6 +107,8 @@ type item struct { counterHandle win.PDH_HCOUNTER } +var sanitizedChars = strings.NewReplacer("/sec", "_persec", "/Sec", "_persec", " ", "_") + func (m *Win_PerfCounters) AddItem(metrics *itemList, query string, objectName string, counter string, instance string, measurement string, include_total bool) { @@ -297,7 +299,7 @@ func (m *Win_PerfCounters) Gather(acc telegraf.Accumulator) error { tags["instance"] = s } tags["objectname"] = metric.objectName - fields[string(metric.counter)] = float32(c.FmtValue.DoubleValue) + fields[sanitizedChars.Replace(string(metric.counter))] = float32(c.FmtValue.DoubleValue) var measurement string if metric.measurement == "" { From b59266249dbeff43ba21bfd3dcc854b12eefd9ca Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 28 Apr 2016 13:11:41 -0600 Subject: [PATCH 74/84] README fixups for udp_listener, statsd inputs closes #1119 --- plugins/inputs/statsd/README.md | 10 +++------- plugins/inputs/tcp_listener/README.md | 3 +-- plugins/inputs/udp_listener/README.md | 8 +------- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/plugins/inputs/statsd/README.md b/plugins/inputs/statsd/README.md index 8722ce1e9..ba0c8e746 100644 --- a/plugins/inputs/statsd/README.md +++ b/plugins/inputs/statsd/README.md @@ -18,10 +18,10 @@ ## Percentiles to calculate for timing & histogram stats percentiles = [90] - ## convert measurement names, "." to "_" and "-" to "__" - convert_names = true + ## separator to use between elements of a statsd metric + metric_separator = "_" - ## Parses tags in DataDog's dogstatsd format + ## Parses tags in the datadog statsd format ## http://docs.datadoghq.com/guides/dogstatsd/ parse_data_dog_tags = false @@ -39,10 +39,6 @@ ## calculation of percentiles. Raising this limit increases the accuracy ## of percentiles but also increases the memory usage and cpu time. percentile_limit = 1000 - - ## UDP packet size for the server to listen for. This will depend on the size - ## of the packets that the client is sending, which is usually 1500 bytes. - udp_packet_size = 1500 ``` ### Description diff --git a/plugins/inputs/tcp_listener/README.md b/plugins/inputs/tcp_listener/README.md index d2dfeb575..0066ea801 100644 --- a/plugins/inputs/tcp_listener/README.md +++ b/plugins/inputs/tcp_listener/README.md @@ -22,8 +22,7 @@ This is a sample configuration for the plugin. ## Maximum number of concurrent TCP connections to allow max_tcp_connections = 250 - ## Data format to consume. - + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md diff --git a/plugins/inputs/udp_listener/README.md b/plugins/inputs/udp_listener/README.md index 1dd03a2a7..ee675f535 100644 --- a/plugins/inputs/udp_listener/README.md +++ b/plugins/inputs/udp_listener/README.md @@ -18,13 +18,7 @@ This is a sample configuration for the plugin. ## UDP listener will start dropping packets. allowed_pending_messages = 10000 - ## UDP packet size for the server to listen for. This will depend - ## on the size of the packets that the client is sending, which is - ## usually 1500 bytes. - udp_packet_size = 1500 - - ## Data format to consume. - + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md From 3e1026286bb1b0c3e3eb4ab87ef3bc331397e571 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 28 Apr 2016 14:44:08 -0600 Subject: [PATCH 75/84] skip network-dependent unit tests in short mode --- plugins/inputs/dns_query/dns_query_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/plugins/inputs/dns_query/dns_query_test.go b/plugins/inputs/dns_query/dns_query_test.go index d7d267a59..aeeb7656f 100644 --- a/plugins/inputs/dns_query/dns_query_test.go +++ b/plugins/inputs/dns_query/dns_query_test.go @@ -15,6 +15,9 @@ var servers = []string{"8.8.8.8"} var domains = []string{"google.com"} func TestGathering(t *testing.T) { + if testing.Short() { + t.Skip("Skipping network-dependent test in short mode.") + } var dnsConfig = DnsQuery{ Servers: servers, Domains: domains, @@ -31,6 +34,9 @@ func TestGathering(t *testing.T) { } func TestGatheringMxRecord(t *testing.T) { + if testing.Short() { + t.Skip("Skipping network-dependent test in short mode.") + } var dnsConfig = DnsQuery{ Servers: servers, Domains: domains, @@ -48,6 +54,9 @@ func TestGatheringMxRecord(t *testing.T) { } func TestGatheringRootDomain(t *testing.T) { + if testing.Short() { + t.Skip("Skipping network-dependent test in short mode.") + } var dnsConfig = DnsQuery{ Servers: servers, Domains: []string{"."}, @@ -72,6 +81,9 @@ func TestGatheringRootDomain(t *testing.T) { } func TestMetricContainsServerAndDomainAndRecordTypeTags(t *testing.T) { + if testing.Short() { + t.Skip("Skipping network-dependent test in short mode.") + } var dnsConfig = DnsQuery{ Servers: servers, Domains: domains, @@ -95,6 +107,9 @@ func TestMetricContainsServerAndDomainAndRecordTypeTags(t *testing.T) { } func TestGatheringTimeout(t *testing.T) { + if testing.Short() { + t.Skip("Skipping network-dependent test in short mode.") + } var dnsConfig = DnsQuery{ Servers: servers, Domains: domains, From 3ed006d2166b93c8eebdcd505c675292d604625c Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 28 Apr 2016 17:00:06 -0600 Subject: [PATCH 76/84] Sanitize invalid opentsdb characters closes #1098 --- plugins/outputs/opentsdb/opentsdb.go | 10 +++++++--- plugins/outputs/opentsdb/opentsdb_test.go | 22 ++++++++++++++++------ 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/plugins/outputs/opentsdb/opentsdb.go b/plugins/outputs/opentsdb/opentsdb.go index 83a3429e3..5e94ca340 100644 --- a/plugins/outputs/opentsdb/opentsdb.go +++ b/plugins/outputs/opentsdb/opentsdb.go @@ -21,6 +21,9 @@ type OpenTSDB struct { Debug bool } +var sanitizedChars = strings.NewReplacer("@", "-", "*", "-", " ", "_", + `%`, "-", "#", "-", "$", "-") + var sampleConfig = ` ## prefix for metrics keys prefix = "my.specific.prefix." @@ -94,8 +97,8 @@ func buildTags(mTags map[string]string) []string { tags := make([]string, len(mTags)) index := 0 for k, v := range mTags { - tags[index] = fmt.Sprintf("%s=%s", k, v) - index += 1 + tags[index] = sanitizedChars.Replace(fmt.Sprintf("%s=%s", k, v)) + index++ } sort.Strings(tags) return tags @@ -105,7 +108,8 @@ func buildMetrics(m telegraf.Metric, now time.Time, prefix string) []*MetricLine ret := []*MetricLine{} for fieldName, value := range m.Fields() { metric := &MetricLine{ - Metric: fmt.Sprintf("%s%s_%s", prefix, m.Name(), fieldName), + Metric: sanitizedChars.Replace(fmt.Sprintf("%s%s_%s", + prefix, m.Name(), fieldName)), Timestamp: now.Unix(), } diff --git a/plugins/outputs/opentsdb/opentsdb_test.go b/plugins/outputs/opentsdb/opentsdb_test.go index 900c9f123..30323725b 100644 --- a/plugins/outputs/opentsdb/opentsdb_test.go +++ b/plugins/outputs/opentsdb/opentsdb_test.go @@ -25,6 +25,10 @@ func TestBuildTagsTelnet(t *testing.T) { map[string]string{"one": "two", "aaa": "bbb"}, []string{"aaa=bbb", "one=two"}, }, + { + map[string]string{"Sp%ci@l Chars": "g$t repl#ced"}, + []string{"Sp-ci-l_Chars=g-t_repl-ced"}, + }, { map[string]string{}, []string{}, @@ -59,13 +63,19 @@ func TestWrite(t *testing.T) { // Verify postive and negative test cases of writing data metrics := testutil.MockMetrics() - metrics = append(metrics, testutil.TestMetric(float64(1.0), "justametric.float")) - metrics = append(metrics, testutil.TestMetric(int64(123456789), "justametric.int")) - metrics = append(metrics, testutil.TestMetric(uint64(123456789012345), "justametric.uint")) - metrics = append(metrics, testutil.TestMetric("Lorem Ipsum", "justametric.string")) - metrics = append(metrics, testutil.TestMetric(float64(42.0), "justametric.anotherfloat")) + metrics = append(metrics, testutil.TestMetric(float64(1.0), + "justametric.float")) + metrics = append(metrics, testutil.TestMetric(int64(123456789), + "justametric.int")) + metrics = append(metrics, testutil.TestMetric(uint64(123456789012345), + "justametric.uint")) + metrics = append(metrics, testutil.TestMetric("Lorem Ipsum", + "justametric.string")) + metrics = append(metrics, testutil.TestMetric(float64(42.0), + "justametric.anotherfloat")) + metrics = append(metrics, testutil.TestMetric(float64(42.0), + "metric w/ specialchars")) err = o.Write(metrics) require.NoError(t, err) - } From 5d3c582ecffb01f47d068001e9eb58fe196ef499 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 28 Apr 2016 17:34:37 -0600 Subject: [PATCH 77/84] changelog update --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eadbed925..40fb69452 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,7 +47,7 @@ based on _prefix_ in addition to globs. This means that a filter like - cassandra: `host -> cassandra_host` - disque: `host -> disque_host` - rethinkdb: `host -> rethinkdb_host` - + - **Breaking Change**: The `win_perf_counters` input has been changed to sanitize field names, replacing `/Sec` and `/sec` with `_persec`, as well as spaces with underscores. This is needed because Graphite doesn't like slashes and spaces, and was failing to accept metrics that had them. The `/[sS]ec` -> `_persec` is just to make things clearer and uniform. ### Features @@ -83,6 +83,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1070](https://github.com/influxdata/telegraf/issues/1070): SQL Server input. Fix datatype conversion. - [#1089](https://github.com/influxdata/telegraf/issues/1089): Fix leaky TCP connections in phpfpm plugin. - [#914](https://github.com/influxdata/telegraf/issues/914): Telegraf can drop metrics on full buffers. +- [#1098](https://github.com/influxdata/telegraf/issues/1098): Sanitize invalid OpenTSDB characters. ## v0.12.1 [2016-04-14] From cbe32c74823a9efc9ba12b30d449778cfa578f31 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 28 Apr 2016 15:19:03 -0600 Subject: [PATCH 78/84] Support default config paths precedence will be: 1. --config command-line option 2. $TELEGRAF_CONFIG_PATH 3. $HOME/.telegraf/telegraf.conf 4. /etc/telegraf/telegraf.conf --- CHANGELOG.md | 5 +++-- cmd/telegraf/telegraf.go | 39 ++++++++++++++++++++------------------- docs/CONFIGURATION.md | 6 +++--- internal/config/config.go | 27 +++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40fb69452..ae54ed7ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,8 +66,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1096](https://github.com/influxdata/telegraf/pull/1096): Performance refactor of running output buffers. - [#967](https://github.com/influxdata/telegraf/issues/967): Buffer logging improvements. - [#1107](https://github.com/influxdata/telegraf/issues/1107): Support lustre2 job stats. Thanks @hanleyja! -- [#1110](https://github.com/influxdata/telegraf/pull/1110): Sanitize * to - in graphite serializer. Thanks @goodeggs! -- [#1118] (https://github.com/influxdata/telegraf/pull/1118): Sanitize Counter names for `win_perf_counters` input. +- [#1122](https://github.com/influxdata/telegraf/pull/1122): Support setting config path through env variable and default paths. ### Bugfixes @@ -84,6 +83,8 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1089](https://github.com/influxdata/telegraf/issues/1089): Fix leaky TCP connections in phpfpm plugin. - [#914](https://github.com/influxdata/telegraf/issues/914): Telegraf can drop metrics on full buffers. - [#1098](https://github.com/influxdata/telegraf/issues/1098): Sanitize invalid OpenTSDB characters. +- [#1110](https://github.com/influxdata/telegraf/pull/1110): Sanitize * to - in graphite serializer. Thanks @goodeggs! +- [#1118](https://github.com/influxdata/telegraf/pull/1118): Sanitize Counter names for `win_perf_counters` input. ## v0.12.1 [2016-04-14] diff --git a/cmd/telegraf/telegraf.go b/cmd/telegraf/telegraf.go index be591829b..ad0174788 100644 --- a/cmd/telegraf/telegraf.go +++ b/cmd/telegraf/telegraf.go @@ -71,6 +71,13 @@ The flags are: -quiet run in quiet mode -version print the version to stdout +In addition to the -config flag, telegraf will also load the config file from +an environment variable or default location. Precedence is: + 1. -config flag + 2. $TELEGRAF_CONFIG_PATH environment variable + 3. $HOME/.telegraf/telegraf.conf + 4. /etc/telegraf/telegraf.conf + Examples: # generate a telegraf config file: @@ -98,12 +105,10 @@ func main() { flag.Parse() args := flag.Args() - if flag.NFlag() == 0 && len(args) == 0 { - usageExit(0) - } - var inputFilters []string if *fInputFiltersLegacy != "" { + fmt.Printf("WARNING '--filter' flag is deprecated, please use" + + " '--input-filter'") inputFilter := strings.TrimSpace(*fInputFiltersLegacy) inputFilters = strings.Split(":"+inputFilter+":", ":") } @@ -114,6 +119,8 @@ func main() { var outputFilters []string if *fOutputFiltersLegacy != "" { + fmt.Printf("WARNING '--outputfilter' flag is deprecated, please use" + + " '--output-filter'") outputFilter := strings.TrimSpace(*fOutputFiltersLegacy) outputFilters = strings.Split(":"+outputFilter+":", ":") } @@ -170,25 +177,19 @@ func main() { return } - var ( - c *config.Config - err error - ) - - if *fConfig != "" { - c = config.NewConfig() - c.OutputFilters = outputFilters - c.InputFilters = inputFilters - err = c.LoadConfig(*fConfig) - if err != nil { - log.Fatal(err) - } - } else { - fmt.Println("You must specify a config file. See telegraf --help") + // If no other options are specified, load the config file and run. + c := config.NewConfig() + c.OutputFilters = outputFilters + c.InputFilters = inputFilters + err := c.LoadConfig(*fConfig) + if err != nil { + fmt.Println(err) os.Exit(1) } if *fConfigDirectoryLegacy != "" { + fmt.Printf("WARNING '--configdirectory' flag is deprecated, please use" + + " '--config-directory'") err = c.LoadDirectory(*fConfigDirectoryLegacy) if err != nil { log.Fatal(err) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 3e4e62adc..a01178919 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -15,8 +15,8 @@ To generate a file with specific inputs and outputs, you can use the telegraf -sample-config -input-filter cpu:mem:net:swap -output-filter influxdb:kafka ``` -You can see the latest config file with all available plugins -[here](https://github.com/influxdata/telegraf/blob/master/etc/telegraf.conf) +You can see the latest config file with all available plugins here: +[telegraf.conf](https://github.com/influxdata/telegraf/blob/master/etc/telegraf.conf) ## Environment Variables @@ -79,7 +79,7 @@ match against the tag name, and if it matches the measurement is emitted. * **tagdrop**: The inverse of tagpass. If a tag matches, the measurement is not emitted. This is tested on measurements that have passed the tagpass test. * **tagexclude**: tagexclude can be used to exclude a tag from measurement(s). -As opposed to tagdrop, which will drop an entire measurement based on it's +As opposed to tagdrop, which will drop an entire measurement based on it's tags, tagexclude simply strips the given tag keys from the measurement. This can be used on inputs & outputs, but it is _recommended_ to be used on inputs, as it is more efficient to filter out tags at the ingestion point. diff --git a/internal/config/config.go b/internal/config/config.go index 2a34493ff..d580796fa 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -412,8 +412,35 @@ func (c *Config) LoadDirectory(path string) error { return nil } +// Try to find a default config file at these locations (in order): +// 1. $TELEGRAF_CONFIG_PATH +// 2. $HOME/.telegraf/telegraf.conf +// 3. /etc/telegraf/telegraf.conf +// +func getDefaultConfigPath() (string, error) { + envfile := os.Getenv("TELEGRAF_CONFIG_PATH") + homefile := os.ExpandEnv("${HOME}/.telegraf/telegraf.conf") + etcfile := "/etc/telegraf/telegraf.conf" + for _, path := range []string{envfile, homefile, etcfile} { + if _, err := os.Stat(path); err == nil { + log.Printf("Using config file: %s", path) + return path, nil + } + } + + // if we got here, we didn't find a file in a default location + return "", fmt.Errorf("No config file specified, and could not find one"+ + " in $TELEGRAF_CONFIG_PATH, %s, or %s", homefile, etcfile) +} + // LoadConfig loads the given config file and applies it to c func (c *Config) LoadConfig(path string) error { + var err error + if path == "" { + if path, err = getDefaultConfigPath(); err != nil { + return err + } + } tbl, err := parseFile(path) if err != nil { return fmt.Errorf("Error parsing %s, %s", path, err) From 3f807a9432641a96cfb0211092a2ab89d0baccbd Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 28 Apr 2016 19:23:45 -0600 Subject: [PATCH 79/84] Implement timeouts for all exec command runners First is to write an internal CombinedOutput and Run function with a timeout. Second, the following instances of command runners need to have timeouts: plugins/inputs/ping/ping.go 125: out, err := c.CombinedOutput() plugins/inputs/exec/exec.go 91: if err := cmd.Run(); err != nil { plugins/inputs/ipmi_sensor/command.go 31: err := cmd.Run() plugins/inputs/sysstat/sysstat.go 194: out, err := cmd.CombinedOutput() plugins/inputs/leofs/leofs.go 185: defer cmd.Wait() plugins/inputs/sysstat/sysstat.go 282: if err := cmd.Wait(); err != nil { closes #1067 --- internal/internal.go | 56 ++++++++++++++++++- internal/internal_test.go | 78 ++++++++++++++++++++++++++- plugins/inputs/exec/exec.go | 17 ++++-- plugins/inputs/ipmi_sensor/command.go | 15 +++--- plugins/inputs/leofs/leofs.go | 15 ++++-- plugins/inputs/ping/ping.go | 11 ++-- plugins/inputs/ping/ping_test.go | 8 +-- plugins/inputs/sysstat/sysstat.go | 65 ++++++++++++---------- 8 files changed, 210 insertions(+), 55 deletions(-) diff --git a/internal/internal.go b/internal/internal.go index 4b8e1536f..ae1464925 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -2,13 +2,16 @@ package internal import ( "bufio" + "bytes" "crypto/rand" "crypto/tls" "crypto/x509" "errors" "fmt" "io/ioutil" + "log" "os" + "os/exec" "strings" "time" "unicode" @@ -16,6 +19,12 @@ import ( const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +var ( + TimeoutErr = errors.New("Command timed out.") + + NotImplementedError = errors.New("not implemented yet") +) + // Duration just wraps time.Duration type Duration struct { Duration time.Duration @@ -33,8 +42,6 @@ func (d *Duration) UnmarshalTOML(b []byte) error { return nil } -var NotImplementedError = errors.New("not implemented yet") - // ReadLines reads contents from a file and splits them by new lines. // A convenience wrapper to ReadLinesOffsetN(filename, 0, -1). func ReadLines(filename string) ([]string, error) { @@ -139,3 +146,48 @@ func SnakeCase(in string) string { return string(out) } + +// CombinedOutputTimeout runs the given command with the given timeout and +// returns the combined output of stdout and stderr. +// If the command times out, it attempts to kill the process. +func CombinedOutputTimeout(c *exec.Cmd, timeout time.Duration) ([]byte, error) { + var b bytes.Buffer + c.Stdout = &b + c.Stderr = &b + if err := c.Start(); err != nil { + return nil, err + } + err := WaitTimeout(c, timeout) + return b.Bytes(), err +} + +// RunTimeout runs the given command with the given timeout. +// If the command times out, it attempts to kill the process. +func RunTimeout(c *exec.Cmd, timeout time.Duration) error { + if err := c.Start(); err != nil { + return err + } + return WaitTimeout(c, timeout) +} + +// WaitTimeout waits for the given command to finish with a timeout. +// It assumes the command has already been started. +// If the command times out, it attempts to kill the process. +func WaitTimeout(c *exec.Cmd, timeout time.Duration) error { + timer := time.NewTimer(timeout) + done := make(chan error) + go func() { done <- c.Wait() }() + select { + case err := <-done: + timer.Stop() + return err + case <-timer.C: + if err := c.Process.Kill(); err != nil { + log.Printf("FATAL error killing process: %s", err) + return err + } + // wait for the command to return after killing it + <-done + return TimeoutErr + } +} diff --git a/internal/internal_test.go b/internal/internal_test.go index 7ff64e87b..90e1badc1 100644 --- a/internal/internal_test.go +++ b/internal/internal_test.go @@ -1,6 +1,12 @@ package internal -import "testing" +import ( + "os/exec" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) type SnakeTest struct { input string @@ -30,3 +36,73 @@ func TestSnakeCase(t *testing.T) { } } } + +var ( + sleepbin, _ = exec.LookPath("sleep") + echobin, _ = exec.LookPath("echo") +) + +func TestRunTimeout(t *testing.T) { + if sleepbin == "" { + t.Skip("'sleep' binary not available on OS, skipping.") + } + cmd := exec.Command(sleepbin, "10") + start := time.Now() + err := RunTimeout(cmd, time.Millisecond*20) + elapsed := time.Since(start) + + assert.Equal(t, TimeoutErr, err) + // Verify that command gets killed in 20ms, with some breathing room + assert.True(t, elapsed < time.Millisecond*75) +} + +func TestCombinedOutputTimeout(t *testing.T) { + if sleepbin == "" { + t.Skip("'sleep' binary not available on OS, skipping.") + } + cmd := exec.Command(sleepbin, "10") + start := time.Now() + _, err := CombinedOutputTimeout(cmd, time.Millisecond*20) + elapsed := time.Since(start) + + assert.Equal(t, TimeoutErr, err) + // Verify that command gets killed in 20ms, with some breathing room + assert.True(t, elapsed < time.Millisecond*75) +} + +func TestCombinedOutput(t *testing.T) { + if echobin == "" { + t.Skip("'echo' binary not available on OS, skipping.") + } + cmd := exec.Command(echobin, "foo") + out, err := CombinedOutputTimeout(cmd, time.Second) + + assert.NoError(t, err) + assert.Equal(t, "foo\n", string(out)) +} + +// test that CombinedOutputTimeout and exec.Cmd.CombinedOutput return +// the same output from a failed command. +func TestCombinedOutputError(t *testing.T) { + if sleepbin == "" { + t.Skip("'sleep' binary not available on OS, skipping.") + } + cmd := exec.Command(sleepbin, "foo") + expected, err := cmd.CombinedOutput() + + cmd2 := exec.Command(sleepbin, "foo") + actual, err := CombinedOutputTimeout(cmd2, time.Second) + + assert.Error(t, err) + assert.Equal(t, expected, actual) +} + +func TestRunError(t *testing.T) { + if sleepbin == "" { + t.Skip("'sleep' binary not available on OS, skipping.") + } + cmd := exec.Command(sleepbin, "foo") + err := RunTimeout(cmd, time.Second) + + assert.Error(t, err) +} diff --git a/plugins/inputs/exec/exec.go b/plugins/inputs/exec/exec.go index d2e09ccd0..c1b2092e8 100644 --- a/plugins/inputs/exec/exec.go +++ b/plugins/inputs/exec/exec.go @@ -6,10 +6,12 @@ import ( "os/exec" "sync" "syscall" + "time" "github.com/gonuts/go-shellquote" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/plugins/parsers/nagios" @@ -19,6 +21,9 @@ const sampleConfig = ` ## Commands array commands = ["/tmp/test.sh", "/usr/bin/mycollector --foo=bar"] + ## Timeout for each command to complete. + timeout = "5s" + ## measurement name suffix (for separating different commands) name_suffix = "_mycollector" @@ -32,6 +37,7 @@ const sampleConfig = ` type Exec struct { Commands []string Command string + Timeout internal.Duration parser parsers.Parser @@ -43,7 +49,8 @@ type Exec struct { func NewExec() *Exec { return &Exec{ - runner: CommandRunner{}, + runner: CommandRunner{}, + Timeout: internal.Duration{Duration: time.Second * 5}, } } @@ -73,7 +80,11 @@ func AddNagiosState(exitCode error, acc telegraf.Accumulator) error { return nil } -func (c CommandRunner) Run(e *Exec, command string, acc telegraf.Accumulator) ([]byte, error) { +func (c CommandRunner) Run( + e *Exec, + command string, + acc telegraf.Accumulator, +) ([]byte, error) { split_cmd, err := shellquote.Split(command) if err != nil || len(split_cmd) == 0 { return nil, fmt.Errorf("exec: unable to parse command, %s", err) @@ -84,7 +95,7 @@ func (c CommandRunner) Run(e *Exec, command string, acc telegraf.Accumulator) ([ var out bytes.Buffer cmd.Stdout = &out - if err := cmd.Run(); err != nil { + if err := internal.RunTimeout(cmd, e.Timeout.Duration); err != nil { switch e.parser.(type) { case *nagios.NagiosParser: AddNagiosState(err, acc) diff --git a/plugins/inputs/ipmi_sensor/command.go b/plugins/inputs/ipmi_sensor/command.go index 353c27d36..76374c494 100644 --- a/plugins/inputs/ipmi_sensor/command.go +++ b/plugins/inputs/ipmi_sensor/command.go @@ -1,10 +1,12 @@ package ipmi_sensor import ( - "bytes" "fmt" "os/exec" "strings" + "time" + + "github.com/influxdata/telegraf/internal" ) type CommandRunner struct{} @@ -18,21 +20,16 @@ func (t CommandRunner) cmd(conn *Connection, args ...string) *exec.Cmd { } return exec.Command(path, opts...) - } func (t CommandRunner) Run(conn *Connection, args ...string) (string, error) { cmd := t.cmd(conn, args...) - var stdout bytes.Buffer - var stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - err := cmd.Run() + output, err := internal.CombinedOutputTimeout(cmd, time.Second*5) if err != nil { return "", fmt.Errorf("run %s %s: %s (%s)", - cmd.Path, strings.Join(cmd.Args, " "), stderr.String(), err) + cmd.Path, strings.Join(cmd.Args, " "), string(output), err) } - return stdout.String(), err + return string(output), err } diff --git a/plugins/inputs/leofs/leofs.go b/plugins/inputs/leofs/leofs.go index f4910ad0c..06c71e932 100644 --- a/plugins/inputs/leofs/leofs.go +++ b/plugins/inputs/leofs/leofs.go @@ -3,13 +3,16 @@ package leofs import ( "bufio" "fmt" - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/inputs" "net/url" "os/exec" "strconv" "strings" "sync" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" ) const oid = ".1.3.6.1.4.1.35450" @@ -175,14 +178,18 @@ func (l *LeoFS) Gather(acc telegraf.Accumulator) error { return outerr } -func (l *LeoFS) gatherServer(endpoint string, serverType ServerType, acc telegraf.Accumulator) error { +func (l *LeoFS) gatherServer( + endpoint string, + serverType ServerType, + acc telegraf.Accumulator, +) error { cmd := exec.Command("snmpwalk", "-v2c", "-cpublic", endpoint, oid) stdout, err := cmd.StdoutPipe() if err != nil { return err } cmd.Start() - defer cmd.Wait() + defer internal.WaitTimeout(cmd, time.Second*5) scanner := bufio.NewScanner(stdout) if !scanner.Scan() { return fmt.Errorf("Unable to retrieve the node name") diff --git a/plugins/inputs/ping/ping.go b/plugins/inputs/ping/ping.go index 6c26acb8a..dfe67dc3f 100644 --- a/plugins/inputs/ping/ping.go +++ b/plugins/inputs/ping/ping.go @@ -9,15 +9,17 @@ import ( "strconv" "strings" "sync" + "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" ) // HostPinger is a function that runs the "ping" function using a list of // passed arguments. This can be easily switched with a mocked ping function // for unit test purposes (see ping_test.go) -type HostPinger func(args ...string) (string, error) +type HostPinger func(timeout float64, args ...string) (string, error) type Ping struct { // Interval at which to ping (ping -i ) @@ -74,7 +76,7 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error { go func(u string) { defer wg.Done() args := p.args(u) - out, err := p.pingHost(args...) + out, err := p.pingHost(p.Timeout, args...) if err != nil { // Combine go err + stderr output errorChannel <- errors.New( @@ -116,13 +118,14 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error { return errors.New(strings.Join(errorStrings, "\n")) } -func hostPinger(args ...string) (string, error) { +func hostPinger(timeout float64, args ...string) (string, error) { bin, err := exec.LookPath("ping") if err != nil { return "", err } c := exec.Command(bin, args...) - out, err := c.CombinedOutput() + out, err := internal.CombinedOutputTimeout(c, + time.Second*time.Duration(timeout+1)) return string(out), err } diff --git a/plugins/inputs/ping/ping_test.go b/plugins/inputs/ping/ping_test.go index cd61a4fb2..25ecdf2fa 100644 --- a/plugins/inputs/ping/ping_test.go +++ b/plugins/inputs/ping/ping_test.go @@ -124,7 +124,7 @@ func TestArgs(t *testing.T) { "Expected: %s Actual: %s", expected, actual) } -func mockHostPinger(args ...string) (string, error) { +func mockHostPinger(timeout float64, args ...string) (string, error) { return linuxPingOutput, nil } @@ -161,7 +161,7 @@ PING www.google.com (216.58.218.164) 56(84) bytes of data. rtt min/avg/max/mdev = 35.225/44.033/51.806/5.325 ms ` -func mockLossyHostPinger(args ...string) (string, error) { +func mockLossyHostPinger(timeout float64, args ...string) (string, error) { return lossyPingOutput, nil } @@ -192,7 +192,7 @@ Request timeout for icmp_seq 0 2 packets transmitted, 0 packets received, 100.0% packet loss ` -func mockErrorHostPinger(args ...string) (string, error) { +func mockErrorHostPinger(timeout float64, args ...string) (string, error) { return errorPingOutput, errors.New("No packets received") } @@ -215,7 +215,7 @@ func TestBadPingGather(t *testing.T) { acc.AssertContainsTaggedFields(t, "ping", fields, tags) } -func mockFatalHostPinger(args ...string) (string, error) { +func mockFatalHostPinger(timeout float64, args ...string) (string, error) { return fatalPingOutput, errors.New("So very bad") } diff --git a/plugins/inputs/sysstat/sysstat.go b/plugins/inputs/sysstat/sysstat.go index c8c17ac45..c55516716 100644 --- a/plugins/inputs/sysstat/sysstat.go +++ b/plugins/inputs/sysstat/sysstat.go @@ -17,6 +17,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -98,31 +99,34 @@ var sampleConfig = ` # group = true # # - ## Options for the sadf command. The values on the left represent the sadf options and - ## the values on the right their description (wich are used for grouping and prefixing metrics). + ## Options for the sadf command. The values on the left represent the sadf + ## options and the values on the right their description (wich are used for + ## grouping and prefixing metrics). ## - ## Run 'sar -h' or 'man sar' to find out the supported options for your sysstat version. + ## Run 'sar -h' or 'man sar' to find out the supported options for your + ## sysstat version. [inputs.sysstat.options] - -C = "cpu" - -B = "paging" - -b = "io" - -d = "disk" # requires DISK activity - "-n ALL" = "network" - "-P ALL" = "per_cpu" - -q = "queue" - -R = "mem" - -r = "mem_util" - -S = "swap_util" - -u = "cpu_util" - -v = "inode" - -W = "swap" - -w = "task" - # -H = "hugepages" # only available for newer linux distributions - # "-I ALL" = "interrupts" # requires INT activity + -C = "cpu" + -B = "paging" + -b = "io" + -d = "disk" # requires DISK activity + "-n ALL" = "network" + "-P ALL" = "per_cpu" + -q = "queue" + -R = "mem" + -r = "mem_util" + -S = "swap_util" + -u = "cpu_util" + -v = "inode" + -W = "swap" + -w = "task" + # -H = "hugepages" # only available for newer linux distributions + # "-I ALL" = "interrupts" # requires INT activity # # - ## Device tags can be used to add additional tags for devices. For example the configuration below - ## adds a tag vg with value rootvg for all metrics with sda devices. + ## Device tags can be used to add additional tags for devices. + ## For example the configuration below adds a tag vg with value rootvg for + ## all metrics with sda devices. # [[inputs.sysstat.device_tags.sda]] # vg = "rootvg" ` @@ -174,24 +178,28 @@ func (s *Sysstat) Gather(acc telegraf.Accumulator) error { return errors.New(strings.Join(errorStrings, "\n")) } -// collect collects sysstat data with the collector utility sadc. It runs the following command: +// collect collects sysstat data with the collector utility sadc. +// It runs the following command: // Sadc -S -S ... 2 tmpFile -// The above command collects system metrics during and saves it in binary form to tmpFile. +// The above command collects system metrics during and +// saves it in binary form to tmpFile. func (s *Sysstat) collect() error { options := []string{} for _, act := range s.Activities { options = append(options, "-S", act) } s.tmpFile = path.Join("/tmp", fmt.Sprintf("sysstat-%d", time.Now().Unix())) - collectInterval := s.interval - parseInterval // collectInterval has to be smaller than the telegraf data collection interval + // collectInterval has to be smaller than the telegraf data collection interval + collectInterval := s.interval - parseInterval - if collectInterval < 0 { // If true, interval is not defined yet and Gather is run for the first time. + // If true, interval is not defined yet and Gather is run for the first time. + if collectInterval < 0 { collectInterval = 1 // In that case we only collect for 1 second. } options = append(options, strconv.Itoa(collectInterval), "2", s.tmpFile) cmd := execCommand(s.Sadc, options...) - out, err := cmd.CombinedOutput() + out, err := internal.CombinedOutputTimeout(cmd, time.Second*5) if err != nil { return fmt.Errorf("failed to run command %s: %s", strings.Join(cmd.Args, " "), string(out)) } @@ -279,8 +287,9 @@ func (s *Sysstat) parse(acc telegraf.Accumulator, option string, ts time.Time) e acc.AddFields(measurement, v.fields, v.tags, ts) } } - if err := cmd.Wait(); err != nil { - return fmt.Errorf("command %s failed with %s", strings.Join(cmd.Args, " "), err) + if err := internal.WaitTimeout(cmd, time.Second*5); err != nil { + return fmt.Errorf("command %s failed with %s", + strings.Join(cmd.Args, " "), err) } return nil } From 91460436cf07bcc844a3cfe450c14b17d9e20289 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 29 Apr 2016 12:32:04 -0600 Subject: [PATCH 80/84] Changelog update --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae54ed7ca..c9c550584 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -85,6 +85,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1098](https://github.com/influxdata/telegraf/issues/1098): Sanitize invalid OpenTSDB characters. - [#1110](https://github.com/influxdata/telegraf/pull/1110): Sanitize * to - in graphite serializer. Thanks @goodeggs! - [#1118](https://github.com/influxdata/telegraf/pull/1118): Sanitize Counter names for `win_perf_counters` input. +- [#1125](https://github.com/influxdata/telegraf/pull/1125): Wrap all exec command runners with a timeout, so hung os processes don't halt Telegraf. ## v0.12.1 [2016-04-14] From a30b1a394fa740ba89630043cea860c28647c56c Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 29 Apr 2016 18:48:07 -0600 Subject: [PATCH 81/84] Kafka output: set max_retry=3 & required_acks=-1 as defaults closes #1113 --- CHANGELOG.md | 1 + plugins/outputs/kafka/kafka.go | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9c550584..deca82235 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -86,6 +86,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1110](https://github.com/influxdata/telegraf/pull/1110): Sanitize * to - in graphite serializer. Thanks @goodeggs! - [#1118](https://github.com/influxdata/telegraf/pull/1118): Sanitize Counter names for `win_perf_counters` input. - [#1125](https://github.com/influxdata/telegraf/pull/1125): Wrap all exec command runners with a timeout, so hung os processes don't halt Telegraf. +- [#1113](https://github.com/influxdata/telegraf/pull/1113): Set MaxRetry and RequiredAcks defaults in Kafka output. ## v0.12.1 [2016-04-14] diff --git a/plugins/outputs/kafka/kafka.go b/plugins/outputs/kafka/kafka.go index 1fafa1353..1b350ac6c 100644 --- a/plugins/outputs/kafka/kafka.go +++ b/plugins/outputs/kafka/kafka.go @@ -181,6 +181,9 @@ func (k *Kafka) Write(metrics []telegraf.Metric) error { func init() { outputs.Add("kafka", func() telegraf.Output { - return &Kafka{} + return &Kafka{ + MaxRetry: 3, + RequiredAcks: -1, + } }) } From 4e9798d0e6d8361fd5755c1ecc43b0b048b2c105 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 29 Apr 2016 16:12:15 -0600 Subject: [PATCH 82/84] agent and tags configs sometimes not applied closes #1090 --- CHANGELOG.md | 1 + internal/config/config.go | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index deca82235..ad2207a43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,6 +87,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1118](https://github.com/influxdata/telegraf/pull/1118): Sanitize Counter names for `win_perf_counters` input. - [#1125](https://github.com/influxdata/telegraf/pull/1125): Wrap all exec command runners with a timeout, so hung os processes don't halt Telegraf. - [#1113](https://github.com/influxdata/telegraf/pull/1113): Set MaxRetry and RequiredAcks defaults in Kafka output. +- [#1090](https://github.com/influxdata/telegraf/issues/1090): [agent] and [global_tags] config sometimes not getting applied. ## v0.12.1 [2016-04-14] diff --git a/internal/config/config.go b/internal/config/config.go index d580796fa..daaaa10fc 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -446,6 +446,33 @@ func (c *Config) LoadConfig(path string) error { return fmt.Errorf("Error parsing %s, %s", path, err) } + // Parse tags tables first: + for _, tableName := range []string{"tags", "global_tags"} { + if val, ok := tbl.Fields[tableName]; ok { + subTable, ok := val.(*ast.Table) + if !ok { + return fmt.Errorf("%s: invalid configuration", path) + } + if err = config.UnmarshalTable(subTable, c.Tags); err != nil { + log.Printf("Could not parse [global_tags] config\n") + return fmt.Errorf("Error parsing %s, %s", path, err) + } + } + } + + // Parse agent table: + if val, ok := tbl.Fields["agent"]; ok { + subTable, ok := val.(*ast.Table) + if !ok { + return fmt.Errorf("%s: invalid configuration", path) + } + if err = config.UnmarshalTable(subTable, c.Agent); err != nil { + log.Printf("Could not parse [agent] config\n") + return fmt.Errorf("Error parsing %s, %s", path, err) + } + } + + // Parse all the rest of the plugins: for name, val := range tbl.Fields { subTable, ok := val.(*ast.Table) if !ok { @@ -453,16 +480,7 @@ func (c *Config) LoadConfig(path string) error { } switch name { - case "agent": - if err = config.UnmarshalTable(subTable, c.Agent); err != nil { - log.Printf("Could not parse [agent] config\n") - return fmt.Errorf("Error parsing %s, %s", path, err) - } - case "global_tags", "tags": - if err = config.UnmarshalTable(subTable, c.Tags); err != nil { - log.Printf("Could not parse [global_tags] config\n") - return fmt.Errorf("Error parsing %s, %s", path, err) - } + case "agent", "global_tags", "tags": case "outputs": for pluginName, pluginVal := range subTable.Fields { switch pluginSubTable := pluginVal.(type) { From c114849a314826ba074d843ec476f0b6c15cd048 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Sun, 1 May 2016 10:20:15 -0600 Subject: [PATCH 83/84] Use a timeout for docker list & stat cmds closes #1133 --- CHANGELOG.md | 2 ++ etc/telegraf.conf | 5 +++++ plugins/inputs/docker/docker.go | 17 +++++++++++++---- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad2207a43..5e89bbd72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,6 +88,8 @@ based on _prefix_ in addition to globs. This means that a filter like - [#1125](https://github.com/influxdata/telegraf/pull/1125): Wrap all exec command runners with a timeout, so hung os processes don't halt Telegraf. - [#1113](https://github.com/influxdata/telegraf/pull/1113): Set MaxRetry and RequiredAcks defaults in Kafka output. - [#1090](https://github.com/influxdata/telegraf/issues/1090): [agent] and [global_tags] config sometimes not getting applied. +- [#1133](https://github.com/influxdata/telegraf/issues/1133): Use a timeout for docker list & stat cmds. +- [#1052](https://github.com/influxdata/telegraf/issues/1052): Docker panic fix when decode fails. ## v0.12.1 [2016-04-14] diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 40e126d94..f57bd1410 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -565,6 +565,8 @@ # endpoint = "unix:///var/run/docker.sock" # ## Only collect metrics for these containers, collect all if empty # container_names = [] +# ## Timeout for docker list, info, and stats commands +# timeout = "5s" # # Read statistics from one or many dovecot servers @@ -600,6 +602,9 @@ # ## Commands array # commands = ["/tmp/test.sh", "/usr/bin/mycollector --foo=bar"] # +# ## Timeout for each command to complete. +# timeout = "5s" +# # ## measurement name suffix (for separating different commands) # name_suffix = "_mycollector" # diff --git a/plugins/inputs/docker/docker.go b/plugins/inputs/docker/docker.go index 4241f6b5d..8a680a8e8 100644 --- a/plugins/inputs/docker/docker.go +++ b/plugins/inputs/docker/docker.go @@ -16,6 +16,7 @@ import ( "github.com/docker/engine-api/client" "github.com/docker/engine-api/types" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -23,6 +24,7 @@ import ( type Docker struct { Endpoint string ContainerNames []string + Timeout internal.Duration client DockerClient } @@ -54,6 +56,8 @@ var sampleConfig = ` endpoint = "unix:///var/run/docker.sock" ## Only collect metrics for these containers, collect all if empty container_names = [] + ## Timeout for docker list, info, and stats commands + timeout = "5s" ` // Description returns input description @@ -97,7 +101,9 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error { // List containers opts := types.ContainerListOptions{} - containers, err := d.client.ContainerList(context.Background(), opts) + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration) + defer cancel() + containers, err := d.client.ContainerList(ctx, opts) if err != nil { return err } @@ -106,7 +112,6 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error { var wg sync.WaitGroup wg.Add(len(containers)) for _, container := range containers { - go func(c types.Container) { defer wg.Done() err := d.gatherContainer(c, acc) @@ -127,7 +132,9 @@ func (d *Docker) gatherInfo(acc telegraf.Accumulator) error { metadataFields := make(map[string]interface{}) now := time.Now() // Get info from docker daemon - info, err := d.client.Info(context.Background()) + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration) + defer cancel() + info, err := d.client.Info(ctx) if err != nil { return err } @@ -210,7 +217,9 @@ func (d *Docker) gatherContainer( } } - r, err := d.client.ContainerStats(context.Background(), container.ID, false) + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration) + defer cancel() + r, err := d.client.ContainerStats(ctx, container.ID, false) if err != nil { log.Printf("Error getting docker stats: %s\n", err.Error()) } From 40dccf5b2974b59eaf30ce6f1354a0702040466f Mon Sep 17 00:00:00 2001 From: Victor Garcia Date: Fri, 29 Apr 2016 18:40:26 +0200 Subject: [PATCH 84/84] Metric for MongoDB jumbo chunks closes #1128 --- CHANGELOG.md | 1 + plugins/inputs/mongodb/README.md | 1 + plugins/inputs/mongodb/mongodb_data.go | 5 +++++ plugins/inputs/mongodb/mongodb_data_test.go | 1 + plugins/inputs/mongodb/mongodb_server.go | 7 +++++++ plugins/inputs/mongodb/mongostat.go | 12 ++++++++++++ 6 files changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e89bbd72..f05121d06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ based on _prefix_ in addition to globs. This means that a filter like - [#967](https://github.com/influxdata/telegraf/issues/967): Buffer logging improvements. - [#1107](https://github.com/influxdata/telegraf/issues/1107): Support lustre2 job stats. Thanks @hanleyja! - [#1122](https://github.com/influxdata/telegraf/pull/1122): Support setting config path through env variable and default paths. +- [#1128](https://github.com/influxdata/telegraf/pull/1128): MongoDB jumbo chunks metric for MongoDB input plugin ### Bugfixes diff --git a/plugins/inputs/mongodb/README.md b/plugins/inputs/mongodb/README.md index 0d140b395..66ff2668e 100644 --- a/plugins/inputs/mongodb/README.md +++ b/plugins/inputs/mongodb/README.md @@ -51,3 +51,4 @@ and create a single measurement containing values e.g. * ttl_deletes_per_sec * ttl_passes_per_sec * repl_lag + * jumbo_chunks (only if mongos or mongo config) diff --git a/plugins/inputs/mongodb/mongodb_data.go b/plugins/inputs/mongodb/mongodb_data.go index f9bbc1d3b..7a52d650a 100644 --- a/plugins/inputs/mongodb/mongodb_data.go +++ b/plugins/inputs/mongodb/mongodb_data.go @@ -57,6 +57,10 @@ var DefaultReplStats = map[string]string{ "repl_lag": "ReplLag", } +var DefaultClusterStats = map[string]string{ + "jumbo_chunks": "JumboChunksCount", +} + var MmapStats = map[string]string{ "mapped_megabytes": "Mapped", "non-mapped_megabytes": "NonMapped", @@ -74,6 +78,7 @@ func (d *MongodbData) AddDefaultStats() { if d.StatLine.NodeType != "" { d.addStat(statLine, DefaultReplStats) } + d.addStat(statLine, DefaultClusterStats) if d.StatLine.StorageEngine == "mmapv1" { d.addStat(statLine, MmapStats) } else if d.StatLine.StorageEngine == "wiredTiger" { diff --git a/plugins/inputs/mongodb/mongodb_data_test.go b/plugins/inputs/mongodb/mongodb_data_test.go index 5619641fc..a08549cfd 100644 --- a/plugins/inputs/mongodb/mongodb_data_test.go +++ b/plugins/inputs/mongodb/mongodb_data_test.go @@ -133,6 +133,7 @@ func TestStateTag(t *testing.T) { "vsize_megabytes": int64(0), "ttl_deletes_per_sec": int64(0), "ttl_passes_per_sec": int64(0), + "jumbo_chunks": int64(0), } acc.AssertContainsTaggedFields(t, "mongodb", fields, stateTags) } diff --git a/plugins/inputs/mongodb/mongodb_server.go b/plugins/inputs/mongodb/mongodb_server.go index 86699a4d9..173391e2f 100644 --- a/plugins/inputs/mongodb/mongodb_server.go +++ b/plugins/inputs/mongodb/mongodb_server.go @@ -36,9 +36,16 @@ func (s *Server) gatherData(acc telegraf.Accumulator) error { log.Println("Not gathering replica set status, member not in replica set") } + jumbo_chunks, _ := s.Session.DB("config").C("chunks").Find(bson.M{"jumbo": true}).Count() + + result_cluster := &ClusterStatus{ + JumboChunksCount: int64(jumbo_chunks), + } + result := &MongoStatus{ ServerStatus: result_server, ReplSetStatus: result_repl, + ClusterStatus: result_cluster, } defer func() { diff --git a/plugins/inputs/mongodb/mongostat.go b/plugins/inputs/mongodb/mongostat.go index b131bf9a0..d564931d1 100644 --- a/plugins/inputs/mongodb/mongostat.go +++ b/plugins/inputs/mongodb/mongostat.go @@ -34,6 +34,7 @@ type MongoStatus struct { SampleTime time.Time ServerStatus *ServerStatus ReplSetStatus *ReplSetStatus + ClusterStatus *ClusterStatus } type ServerStatus struct { @@ -64,6 +65,11 @@ type ServerStatus struct { Metrics *MetricsStats `bson:"metrics"` } +// ClusterStatus stores information related to the whole cluster +type ClusterStatus struct { + JumboChunksCount int64 +} + // ReplSetStatus stores information from replSetGetStatus type ReplSetStatus struct { Members []ReplSetMember `bson:"members"` @@ -387,6 +393,9 @@ type StatLine struct { NumConnections int64 ReplSetName string NodeType string + + // Cluster fields + JumboChunksCount int64 } func parseLocks(stat ServerStatus) map[string]LockUsage { @@ -665,5 +674,8 @@ func NewStatLine(oldMongo, newMongo MongoStatus, key string, all bool, sampleSec } } + newClusterStat := *newMongo.ClusterStatus + returnVal.JumboChunksCount = newClusterStat.JumboChunksCount + return returnVal }