Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Luis I. Perez 2016-05-02 12:47:49 +02:00
commit b93f229ca9
95 changed files with 5401 additions and 1086 deletions

2
.gitattributes vendored Normal file
View File

@ -0,0 +1,2 @@
CHANGELOG.md merge=union

View File

@ -1,4 +1,98 @@
## v0.12.1 [unreleased]
## v0.13 [unreleased]
### Release Notes
- **Breaking change** in jolokia plugin. See
https://github.com/influxdata/telegraf/blob/master/plugins/inputs/jolokia/README.md
for updated configuration. The plugin will now support proxy mode and will make
POST requests.
- New [agent] configuration option: `metric_batch_size`. This option tells
telegraf the maximum batch size to allow to accumulate before sending a flush
to the configured outputs. `metric_buffer_limit` now refers to the absolute
maximum number of metrics that will accumulate before metrics are dropped.
- There is no longer an option to
`flush_buffer_when_full`, this is now the default and only behavior of telegraf.
- **Breaking Change**: docker plugin tags. The cont_id tag no longer exists, it
will now be a field, and be called container_id. Additionally, cont_image and
cont_name are being renamed to container_image and container_name.
- **Breaking Change**: docker plugin measurements. The `docker_cpu`, `docker_mem`,
`docker_blkio` and `docker_net` measurements are being renamed to
`docker_container_cpu`, `docker_container_mem`, `docker_container_blkio` and
`docker_container_net`. Why? Because these metrics are
specifically tracking per-container stats. The problem with per-container stats,
in some use-cases, is that if containers are short-lived AND names are not
kept consistent, then the series cardinality will balloon very quickly.
So adding "container" to each metric will:
(1) make it more clear that these metrics are per-container, and
(2) allow users to easily drop per-container metrics if cardinality is an
issue (`namedrop = ["docker_container_*"]`)
- `tagexclude` and `taginclude` are now available, which can be used to remove
tags from measurements on inputs and outputs. See
[the configuration doc](https://github.com/influxdata/telegraf/blob/master/docs/CONFIGURATION.md)
for more details.
- **Measurement filtering:** All measurement filters now match based on glob
only. Previously there was an undocumented behavior where filters would match
based on _prefix_ in addition to globs. This means that a filter like
`fielddrop = ["time_"]` will need to be changed to `fielddrop = ["time_*"]`
- **datadog**: measurement and field names will no longer have `_` replaced by `.`
- The following plugins have changed their tags to _not_ overwrite the host tag:
- cassandra: `host -> cassandra_host`
- disque: `host -> disque_host`
- rethinkdb: `host -> rethinkdb_host`
- **Breaking Change**: The `win_perf_counters` input has been changed to sanitize field names, replacing `/Sec` and `/sec` with `_persec`, as well as spaces with underscores. This is needed because Graphite doesn't like slashes and spaces, and was failing to accept metrics that had them. The `/[sS]ec` -> `_persec` is just to make things clearer and uniform.
### Features
- [#1031](https://github.com/influxdata/telegraf/pull/1031): Jolokia plugin proxy mode. Thanks @saiello!
- [#1017](https://github.com/influxdata/telegraf/pull/1017): taginclude and tagexclude arguments.
- [#1015](https://github.com/influxdata/telegraf/pull/1015): Docker plugin schema refactor.
- [#889](https://github.com/influxdata/telegraf/pull/889): Improved MySQL plugin. Thanks @maksadbek!
- [#1060](https://github.com/influxdata/telegraf/pull/1060): TTL metrics added to MongoDB input plugin
- [#1056](https://github.com/influxdata/telegraf/pull/1056): Don't allow inputs to overwrite host tags.
- [#1035](https://github.com/influxdata/telegraf/issues/1035): Add `user`, `exe`, `pidfile` tags to procstat plugin.
- [#1041](https://github.com/influxdata/telegraf/issues/1041): Add `n_cpus` field to the system plugin.
- [#1072](https://github.com/influxdata/telegraf/pull/1072): New Input Plugin: filestat.
- [#1066](https://github.com/influxdata/telegraf/pull/1066): Replication lag metrics for MongoDB input plugin
- [#1086](https://github.com/influxdata/telegraf/pull/1086): Ability to specify AWS keys in config file. Thanks @johnrengleman!
- [#1096](https://github.com/influxdata/telegraf/pull/1096): Performance refactor of running output buffers.
- [#967](https://github.com/influxdata/telegraf/issues/967): Buffer logging improvements.
- [#1107](https://github.com/influxdata/telegraf/issues/1107): Support lustre2 job stats. Thanks @hanleyja!
- [#1122](https://github.com/influxdata/telegraf/pull/1122): Support setting config path through env variable and default paths.
- [#1128](https://github.com/influxdata/telegraf/pull/1128): MongoDB jumbo chunks metric for MongoDB input plugin
### Bugfixes
- [#1050](https://github.com/influxdata/telegraf/issues/1050): jolokia plugin - do not overwrite host tag. Thanks @saiello!
- [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78!
- [#1013](https://github.com/influxdata/telegraf/pull/1013): Close dead riemann output connections. Thanks @echupriyanov!
- [#1012](https://github.com/influxdata/telegraf/pull/1012): Set default tags in test accumulator.
- [#1024](https://github.com/influxdata/telegraf/issues/1024): Don't replace `.` with `_` in datadog output.
- [#1058](https://github.com/influxdata/telegraf/issues/1058): Fix possible leaky TCP connections in influxdb output.
- [#1044](https://github.com/influxdata/telegraf/pull/1044): Fix SNMP OID possible collisions. Thanks @relip
- [#1022](https://github.com/influxdata/telegraf/issues/1022): Dont error deb/rpm install on systemd errors.
- [#1078](https://github.com/influxdata/telegraf/issues/1078): Use default AWS credential chain.
- [#1070](https://github.com/influxdata/telegraf/issues/1070): SQL Server input. Fix datatype conversion.
- [#1089](https://github.com/influxdata/telegraf/issues/1089): Fix leaky TCP connections in phpfpm plugin.
- [#914](https://github.com/influxdata/telegraf/issues/914): Telegraf can drop metrics on full buffers.
- [#1098](https://github.com/influxdata/telegraf/issues/1098): Sanitize invalid OpenTSDB characters.
- [#1110](https://github.com/influxdata/telegraf/pull/1110): Sanitize * to - in graphite serializer. Thanks @goodeggs!
- [#1118](https://github.com/influxdata/telegraf/pull/1118): Sanitize Counter names for `win_perf_counters` input.
- [#1125](https://github.com/influxdata/telegraf/pull/1125): Wrap all exec command runners with a timeout, so hung os processes don't halt Telegraf.
- [#1113](https://github.com/influxdata/telegraf/pull/1113): Set MaxRetry and RequiredAcks defaults in Kafka output.
- [#1090](https://github.com/influxdata/telegraf/issues/1090): [agent] and [global_tags] config sometimes not getting applied.
- [#1133](https://github.com/influxdata/telegraf/issues/1133): Use a timeout for docker list & stat cmds.
- [#1052](https://github.com/influxdata/telegraf/issues/1052): Docker panic fix when decode fails.
## v0.12.1 [2016-04-14]
### Release Notes
- Breaking change in the dovecot input plugin. See Features section below.

4
Godeps
View File

@ -16,14 +16,16 @@ github.com/eapache/go-resiliency b86b1ec0dd4209a588dc1285cdd471e73525c0b3
github.com/eapache/queue ded5959c0d4e360646dc9e9908cff48666781367
github.com/eclipse/paho.mqtt.golang 0f7a459f04f13a41b7ed752d47944528d4bf9a86
github.com/go-sql-driver/mysql 1fca743146605a172a266e1654e01e5cd5669bee
github.com/gobwas/glob d877f6352135181470c40c73ebb81aefa22115fa
github.com/golang/protobuf 552c7b9542c194800fd493123b3798ef0a832032
github.com/golang/snappy 427fb6fc07997f43afa32f35e850833760e489a7
github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2
github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a
github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e
github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478
github.com/hpcloud/tail b2940955ab8b26e19d43a43c4da0475dd81bdb56
github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da
github.com/influxdata/influxdb e3fef5593c21644f2b43af55d6e17e70910b0e48
github.com/influxdata/influxdb 21db76b3374c733f37ed16ad93f3484020034351
github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0
github.com/klauspost/crc32 19b0b332c9e4516a6370a0456e6182c3b5036720
github.com/lib/pq e182dc4027e2ded4b19396d638610f2653295f36

View File

@ -20,12 +20,12 @@ new plugins.
### Linux deb and rpm Packages:
Latest:
* http://get.influxdb.org/telegraf/telegraf_0.12.0-1_amd64.deb
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1.x86_64.rpm
* http://get.influxdb.org/telegraf/telegraf_0.12.1-1_amd64.deb
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1.x86_64.rpm
Latest (arm):
* http://get.influxdb.org/telegraf/telegraf_0.12.0-1_armhf.deb
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1.armhf.rpm
* http://get.influxdb.org/telegraf/telegraf_0.12.1-1_armhf.deb
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1.armhf.rpm
##### Package Instructions:
@ -46,28 +46,28 @@ to use this repo to install & update telegraf.
### Linux tarballs:
Latest:
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_amd64.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_i386.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_armhf.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_linux_amd64.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_linux_i386.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_linux_armhf.tar.gz
##### tarball Instructions:
To install the full directory structure with config file, run:
```
sudo tar -C / -zxvf ./telegraf-0.12.0-1_linux_amd64.tar.gz
sudo tar -C / -zxvf ./telegraf-0.12.1-1_linux_amd64.tar.gz
```
To extract only the binary, run:
```
tar -zxvf telegraf-0.12.0-1_linux_amd64.tar.gz --strip-components=3 ./usr/bin/telegraf
tar -zxvf telegraf-0.12.1-1_linux_amd64.tar.gz --strip-components=3 ./usr/bin/telegraf
```
### FreeBSD tarball:
Latest:
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_freebsd_amd64.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_freebsd_amd64.tar.gz
##### tarball Instructions:
@ -87,8 +87,8 @@ brew install telegraf
### Windows Binaries (EXPERIMENTAL)
Latest:
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_windows_amd64.zip
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_windows_i386.zip
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_windows_amd64.zip
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_windows_i386.zip
### From Source:
@ -168,7 +168,8 @@ Currently implemented sources:
* [docker](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/docker)
* [dovecot](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/dovecot)
* [elasticsearch](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/elasticsearch)
* [exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec ) (generic executable plugin, support JSON, influx, graphite and nagios)
* [exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec) (generic executable plugin, support JSON, influx, graphite and nagios)
* [filestat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/filestat)
* [haproxy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/haproxy)
* [http_response](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/http_response)
* [httpjson](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/httpjson) (generic JSON-emitting http service plugin)

View File

@ -84,18 +84,15 @@ func (ac *accumulator) AddFields(
if tags == nil {
tags = make(map[string]string)
}
// Apply plugin-wide tags if set
for k, v := range ac.inputConfig.Tags {
if _, ok := tags[k]; !ok {
tags[k] = v
}
}
// Apply daemon-wide tags if set
for k, v := range ac.defaultTags {
if _, ok := tags[k]; !ok {
tags[k] = v
}
// Apply plugin-wide tags if set
for k, v := range ac.inputConfig.Tags {
tags[k] = v
}
ac.inputConfig.Filter.FilterTags(tags)
result := make(map[string]interface{})
for k, v := range fields {

View File

@ -300,3 +300,35 @@ func TestAddBools(t *testing.T) {
fmt.Sprintf("acctest,acc=test,default=tag value=false %d", now.UnixNano()),
actual)
}
// Test that tag filters get applied to metrics.
func TestAccFilterTags(t *testing.T) {
a := accumulator{}
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
filter := internal_models.Filter{
TagExclude: []string{"acc"},
}
assert.NoError(t, filter.CompileFilter())
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig.Filter = filter
a.Add("acctest", float64(101), map[string]string{})
a.Add("acctest", float64(101), map[string]string{"acc": "test"})
a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
assert.Contains(t, actual, "acctest value=101")
testm = <-a.metrics
actual = testm.String()
assert.Contains(t, actual, "acctest value=101")
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest value=101 %d", now.UnixNano()),
actual)
}

View File

@ -221,6 +221,7 @@ func (a *Agent) Test() error {
for _, input := range a.Config.Inputs {
acc := NewAccumulator(input.Config, metricC)
acc.SetDebug(true)
acc.setDefaultTags(a.Config.Tags)
fmt.Printf("* Plugin: %s, Collection 1\n", input.Name)
if input.Config.Interval != 0 {

View File

@ -4,9 +4,9 @@ machine:
post:
- sudo service zookeeper stop
- go version
- go version | grep 1.6 || sudo rm -rf /usr/local/go
- wget https://storage.googleapis.com/golang/go1.6.linux-amd64.tar.gz
- sudo tar -C /usr/local -xzf go1.6.linux-amd64.tar.gz
- go version | grep 1.6.2 || sudo rm -rf /usr/local/go
- wget https://storage.googleapis.com/golang/go1.6.2.linux-amd64.tar.gz
- sudo tar -C /usr/local -xzf go1.6.2.linux-amd64.tar.gz
- go version
dependencies:

View File

@ -71,6 +71,13 @@ The flags are:
-quiet run in quiet mode
-version print the version to stdout
In addition to the -config flag, telegraf will also load the config file from
an environment variable or default location. Precedence is:
1. -config flag
2. $TELEGRAF_CONFIG_PATH environment variable
3. $HOME/.telegraf/telegraf.conf
4. /etc/telegraf/telegraf.conf
Examples:
# generate a telegraf config file:
@ -98,12 +105,10 @@ func main() {
flag.Parse()
args := flag.Args()
if flag.NFlag() == 0 && len(args) == 0 {
usageExit(0)
}
var inputFilters []string
if *fInputFiltersLegacy != "" {
fmt.Printf("WARNING '--filter' flag is deprecated, please use" +
" '--input-filter'")
inputFilter := strings.TrimSpace(*fInputFiltersLegacy)
inputFilters = strings.Split(":"+inputFilter+":", ":")
}
@ -114,6 +119,8 @@ func main() {
var outputFilters []string
if *fOutputFiltersLegacy != "" {
fmt.Printf("WARNING '--outputfilter' flag is deprecated, please use" +
" '--output-filter'")
outputFilter := strings.TrimSpace(*fOutputFiltersLegacy)
outputFilters = strings.Split(":"+outputFilter+":", ":")
}
@ -170,25 +177,19 @@ func main() {
return
}
var (
c *config.Config
err error
)
if *fConfig != "" {
c = config.NewConfig()
// If no other options are specified, load the config file and run.
c := config.NewConfig()
c.OutputFilters = outputFilters
c.InputFilters = inputFilters
err = c.LoadConfig(*fConfig)
err := c.LoadConfig(*fConfig)
if err != nil {
log.Fatal(err)
}
} else {
fmt.Println("You must specify a config file. See telegraf --help")
fmt.Println(err)
os.Exit(1)
}
if *fConfigDirectoryLegacy != "" {
fmt.Printf("WARNING '--configdirectory' flag is deprecated, please use" +
" '--config-directory'")
err = c.LoadDirectory(*fConfigDirectoryLegacy)
if err != nil {
log.Fatal(err)

View File

@ -3,11 +3,20 @@
## Generating a Configuration File
A default Telegraf config file can be generated using the -sample-config flag:
`telegraf -sample-config > telegraf.conf`
```
telegraf -sample-config > telegraf.conf
```
To generate a file with specific inputs and outputs, you can use the
-input-filter and -output-filter flags:
`telegraf -sample-config -input-filter cpu:mem:net:swap -output-filter influxdb:kafka`
```
telegraf -sample-config -input-filter cpu:mem:net:swap -output-filter influxdb:kafka
```
You can see the latest config file with all available plugins here:
[telegraf.conf](https://github.com/influxdata/telegraf/blob/master/etc/telegraf.conf)
## Environment Variables
@ -17,8 +26,8 @@ for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
## `[global_tags]` Configuration
Global tags can be specific in the `[global_tags]` section of the config file in
key="value" format. All metrics being gathered on this host will be tagged
Global tags can be specified in the `[global_tags]` section of the config file
in key="value" format. All metrics being gathered on this host will be tagged
with the tags specified here.
## `[agent]` Configuration
@ -29,8 +38,12 @@ config.
* **interval**: Default data collection interval for all inputs
* **round_interval**: Rounds collection interval to 'interval'
ie, if interval="10s" then always collect on :00, :10, :20, etc.
* **metric_batch_size**: Telegraf will send metrics to output in batch of at
most metric_batch_size metrics.
* **metric_buffer_limit**: Telegraf will cache metric_buffer_limit metrics
for each output, and will flush this buffer on a successful write.
This should be a multiple of metric_batch_size and could not be less
than 2 times metric_batch_size.
* **collection_jitter**: Collection jitter is used to jitter
the collection by a random amount.
Each plugin will sleep for a random time within jitter before collecting.
@ -47,9 +60,35 @@ ie, a jitter of 5s and flush_interval 10s means flushes will happen every 10-15s
* **quiet**: Run telegraf in quiet mode.
* **hostname**: Override default hostname, if empty use os.Hostname().
## `[inputs.xxx]` Configuration
#### Measurement Filtering
There are some configuration options that are configurable per input:
Filters can be configured per input or output, see below for examples.
* **namepass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against
measurement names and if it matches, the field is emitted.
* **namedrop**: The inverse of pass, if a measurement name matches, it is not emitted.
* **fieldpass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against field names
and if it matches, the field is emitted. fieldpass is not available for outputs.
* **fielddrop**: The inverse of pass, if a field name matches, it is not emitted.
fielddrop is not available for outputs.
* **tagpass**: tag names and arrays of strings that are used to filter
measurements by the current input. Each string in the array is tested as a glob
match against the tag name, and if it matches the measurement is emitted.
* **tagdrop**: The inverse of tagpass. If a tag matches, the measurement is not
emitted. This is tested on measurements that have passed the tagpass test.
* **tagexclude**: tagexclude can be used to exclude a tag from measurement(s).
As opposed to tagdrop, which will drop an entire measurement based on it's
tags, tagexclude simply strips the given tag keys from the measurement. This
can be used on inputs & outputs, but it is _recommended_ to be used on inputs,
as it is more efficient to filter out tags at the ingestion point.
* **taginclude**: taginclude is the inverse of tagexclude. It will only include
the tag keys in the final measurement.
## Input Configuration
Some configuration options are configurable per input:
* **name_override**: Override the base name of the measurement.
(Default is the name of the input).
@ -60,24 +99,6 @@ There are some configuration options that are configurable per input:
global interval, but if one particular input should be run less or more often,
you can configure that here.
#### Input Filters
There are also filters that can be configured per input:
* **namepass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against
measurement names and if it matches, the field is emitted.
* **namedrop**: The inverse of pass, if a measurement name matches, it is not emitted.
* **fieldpass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against field names
and if it matches, the field is emitted.
* **fielddrop**: The inverse of pass, if a field name matches, it is not emitted.
* **tagpass**: tag names and arrays of strings that are used to filter
measurements by the current input. Each string in the array is tested as a glob
match against the tag name, and if it matches the measurement is emitted.
* **tagdrop**: The inverse of tagpass. If a tag matches, the measurement is not
emitted. This is tested on measurements that have passed the tagpass test.
#### Input Configuration Examples
This is a full working config that will output CPU data to an InfluxDB instance
@ -155,6 +176,20 @@ fields which begin with `time_`.
namepass = ["rest_client_*"]
```
#### Input Config: taginclude and tagexclude
```toml
# Only include the "cpu" tag in the measurements for the cpu plugin.
[[inputs.cpu]]
percpu = true
totalcpu = true
taginclude = ["cpu"]
# Exclude the "fstype" tag from the measurements for the disk plugin.
[[inputs.disk]]
tagexclude = ["fstype"]
```
#### Input config: prefix, suffix, and override
This plugin will emit measurements with the name `cpu_total`
@ -180,6 +215,9 @@ This will emit measurements with the name `foobar`
This plugin will emit measurements with two additional tags: `tag1=foo` and
`tag2=bar`
NOTE: Order matters, the `[inputs.cpu.tags]` table must be at the _end_ of the
plugin definition.
```toml
[[inputs.cpu]]
percpu = false
@ -208,15 +246,12 @@ to avoid measurement collisions:
fielddrop = ["cpu_time*"]
```
## `[outputs.xxx]` Configuration
## Output Configuration
Telegraf also supports specifying multiple output sinks to send data to,
configuring each output sink is different, but examples can be
found by running `telegraf -sample-config`.
Outputs also support the same configurable options as inputs
(namepass, namedrop, tagpass, tagdrop)
```toml
[[outputs.influxdb]]
urls = [ "http://localhost:8086" ]

View File

@ -75,14 +75,19 @@ metrics are parsed directly into Telegraf metrics.
# JSON:
The JSON data format flattens JSON into metric _fields_. For example, this JSON:
The JSON data format flattens JSON into metric _fields_.
NOTE: Only numerical values are converted to fields, and they are converted
into a float. strings are ignored unless specified as a tag_key (see below).
So for example, this JSON:
```json
{
"a": 5,
"b": {
"c": 6
}
},
"ignored": "I'm a string"
}
```

View File

@ -28,6 +28,5 @@
- github.com/wvanbergen/kazoo-go [MIT LICENSE](https://github.com/wvanbergen/kazoo-go/blob/master/MIT-LICENSE)
- gopkg.in/dancannon/gorethink.v1 [APACHE LICENSE](https://github.com/dancannon/gorethink/blob/v1.1.2/LICENSE)
- gopkg.in/mgo.v2 [BSD LICENSE](https://github.com/go-mgo/mgo/blob/v2/LICENSE)
- golang.org/x/crypto/* [BSD LICENSE](https://github.com/golang/crypto/blob/master/LICENSE)
- internal Glob function [MIT LICENSE](https://github.com/ryanuber/go-glob/blob/master/LICENSE)
- golang.org/x/crypto/ [BSD LICENSE](https://github.com/golang/crypto/blob/master/LICENSE)

View File

@ -30,11 +30,13 @@
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will cache metric_buffer_limit metrics for each output, and will
## flush this buffer on a successful write.
metric_buffer_limit = 1000
## Flush the buffer whenever full, regardless of flush_interval.
flush_buffer_when_full = true
## Telegraf will send metrics to outputs in batches of at
## most metric_batch_size metrics.
metric_batch_size = 1000
## For failed writes, telegraf will cache metric_buffer_limit metrics for each
## output, and will flush this buffer on a successful write. Oldest metrics
## are dropped first when this buffer fills.
metric_buffer_limit = 10000
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
@ -147,6 +149,15 @@
# ## Amazon REGION
# region = 'us-east-1'
#
# ## Amazon Credentials
# ## Credentials are loaded in the following order
# ## 1) explicit credentials from 'access_key' and 'secret_key'
# ## 2) environment variables
# ## 3) shared credentials file
# ## 4) EC2 Instance Profile
# #access_key = ""
# #secret_key = ""
#
# ## Namespace for the CloudWatch MetricDatums
# namespace = 'InfluxData/Telegraf'
@ -239,6 +250,16 @@
# [[outputs.kinesis]]
# ## Amazon REGION of kinesis endpoint.
# region = "ap-southeast-2"
#
# ## Amazon Credentials
# ## Credentials are loaded in the following order
# ## 1) explicit credentials from 'access_key' and 'secret_key'
# ## 2) environment variables
# ## 3) shared credentials file
# ## 4) EC2 Instance Profile
# #access_key = ""
# #secret_key = ""
#
# ## Kinesis StreamName must exist prior to starting telegraf.
# streamname = "StreamName"
# ## PartitionKey as used for sharding data.
@ -453,6 +474,15 @@
# ## Amazon Region
# region = 'us-east-1'
#
# ## Amazon Credentials
# ## Credentials are loaded in the following order
# ## 1) explicit credentials from 'access_key' and 'secret_key'
# ## 2) environment variables
# ## 3) shared credentials file
# ## 4) EC2 Instance Profile
# #access_key = ""
# #secret_key = ""
#
# ## Requested CloudWatch aggregation Period (required - must be a multiple of 60s)
# period = '1m'
#
@ -535,6 +565,8 @@
# endpoint = "unix:///var/run/docker.sock"
# ## Only collect metrics for these containers, collect all if empty
# container_names = []
# ## Timeout for docker list, info, and stats commands
# timeout = "5s"
# # Read statistics from one or many dovecot servers
@ -570,6 +602,9 @@
# ## Commands array
# commands = ["/tmp/test.sh", "/usr/bin/mycollector --foo=bar"]
#
# ## Timeout for each command to complete.
# timeout = "5s"
#
# ## measurement name suffix (for separating different commands)
# name_suffix = "_mycollector"
#
@ -580,6 +615,22 @@
# data_format = "influx"
# # Read stats about given file(s)
# [[inputs.filestat]]
# ## Files to gather stats about.
# ## These accept standard unix glob matching rules, but with the addition of
# ## ** as a "super asterisk". ie:
# ## "/var/log/**.log" -> recursively find all .log files in /var/log
# ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
# ## "/var/log/apache.log" -> just tail the apache log file
# ##
# ## See https://github.com/gobwas/glob for more examples
# ##
# files = ["/var/log/**.log"]
# ## If true, read the entire file and calculate an md5 checksum.
# md5 = false
# # Read metrics of haproxy, via socket or csv stats page
# [[inputs.haproxy]]
# ## An array of address to gather stats about. Specify an ip on hostname
@ -676,13 +727,24 @@
# # Read JMX metrics through Jolokia
# [[inputs.jolokia]]
# ## This is the context root used to compose the jolokia url
# context = "/jolokia/read"
# context = "/jolokia"
#
# ## This specifies the mode used
# # mode = "proxy"
# #
# ## When in proxy mode this section is used to specify further
# ## proxy address configurations.
# ## Remember to change host address to fit your environment.
# # [inputs.jolokia.proxy]
# # host = "127.0.0.1"
# # port = "8080"
#
#
# ## List of servers exposing jolokia read service
# [[inputs.jolokia.servers]]
# name = "stable"
# host = "192.168.103.2"
# port = "8180"
# name = "as-server-01"
# host = "127.0.0.1"
# port = "8080"
# # username = "myuser"
# # password = "mypassword"
#
@ -692,17 +754,20 @@
# ## This collect all heap memory usage metrics.
# [[inputs.jolokia.metrics]]
# name = "heap_memory_usage"
# jmx = "/java.lang:type=Memory/HeapMemoryUsage"
# mbean = "java.lang:type=Memory"
# attribute = "HeapMemoryUsage"
#
# ## This collect thread counts metrics.
# [[inputs.jolokia.metrics]]
# name = "thread_count"
# jmx = "/java.lang:type=Threading/TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"
# mbean = "java.lang:type=Threading"
# attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"
#
# ## This collect number of class loaded/unloaded counts metrics.
# [[inputs.jolokia.metrics]]
# name = "class_count"
# jmx = "/java.lang:type=ClassLoading/LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"
# mbean = "java.lang:type=ClassLoading"
# attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"
# # Read metrics from a LeoFS Server via SNMP
@ -719,9 +784,13 @@
# ##
# # ost_procfiles = [
# # "/proc/fs/lustre/obdfilter/*/stats",
# # "/proc/fs/lustre/osd-ldiskfs/*/stats"
# # "/proc/fs/lustre/osd-ldiskfs/*/stats",
# # "/proc/fs/lustre/obdfilter/*/job_stats",
# # ]
# # mds_procfiles = [
# # "/proc/fs/lustre/mdt/*/md_stats",
# # "/proc/fs/lustre/mdt/*/job_stats",
# # ]
# # mds_procfiles = ["/proc/fs/lustre/mdt/*/md_stats"]
# # Gathers metrics from the /3.0/reports MailChimp API
@ -781,9 +850,46 @@
# ## e.g.
# ## root:passwd@tcp(127.0.0.1:3306)/?tls=false
# ## root@tcp(127.0.0.1:3306)/?tls=false
# ##
# #
# ## If no servers are specified, then localhost is used as the host.
# servers = ["tcp(127.0.0.1:3306)/"]
# ## the limits for metrics form perf_events_statements
# perf_events_statements_digest_text_limit = 120
# perf_events_statements_limit = 250
# perf_events_statements_time_limit = 86400
# #
# ## if the list is empty, then metrics are gathered from all databasee tables
# table_schema_databases = []
# #
# ## gather metrics from INFORMATION_SCHEMA.TABLES for databases provided above list
# gather_table_schema = false
# #
# ## gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST
# gather_process_list = true
# #
# ## gather auto_increment columns and max values from information schema
# gather_info_schema_auto_inc = true
# #
# ## gather metrics from SHOW SLAVE STATUS command output
# gather_slave_status = true
# #
# ## gather metrics from SHOW BINARY LOGS command output
# gather_binary_logs = false
# #
# ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_TABLE
# gather_table_io_waits = false
# #
# ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_INDEX_USAGE
# gather_index_io_waits = false
# #
# ## gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME
# gather_file_events_stats = false
# #
# ## gather metrics from PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
# gather_perf_events_statements = false
# #
# ## Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES)
# interval_slow = "30m"
# # Read metrics about network interface usage
@ -875,15 +981,15 @@
# [[inputs.ping]]
# ## NOTE: this plugin forks the ping command. You may need to set capabilities
# ## via setcap cap_net_raw+p /bin/ping
#
# #
# ## urls to ping
# urls = ["www.google.com"] # required
# ## number of pings to send (ping -c <COUNT>)
# ## number of pings to send per collection (ping -c <COUNT>)
# count = 1 # required
# ## interval, in s, at which to ping. 0 == default (ping -i <PING_INTERVAL>)
# ping_interval = 0.0
# ## ping timeout, in s. 0 == no timeout (ping -t <TIMEOUT>)
# timeout = 0.0
# ## ping timeout, in s. 0 == no timeout (ping -W <TIMEOUT>)
# timeout = 1.0
# ## interface to send ping from (ping -I <INTERFACE>)
# interface = ""
@ -929,6 +1035,11 @@
# ## databases are gathered.
# ## databases = ["app_production", "testing"]
# #
# # outputaddress = "db01"
# ## A custom name for the database that will be used as the "server" tag in the
# ## measurement output. If not specified, a default one generated from
# ## the connection address is used.
# #
# ## Define the toml config where the sql queries are stored
# ## New queries can be added, if the withdbname is set to true and there is no
# ## databases defined in the 'databases field', the sql query is ended by a
@ -939,24 +1050,28 @@
# ## because the databases variable was set to ['postgres', 'pgbench' ] and the
# ## withdbname was true. Be careful that if the withdbname is set to false you
# ## don't have to define the where clause (aka with the dbname) the tagvalue
# ## field is used to define custom tags (separated by comas)
# ## field is used to define custom tags (separated by commas)
# ## The optional "measurement" value can be used to override the default
# ## output measurement name ("postgresql").
# #
# ## Structure :
# ## [[inputs.postgresql_extensible.query]]
# ## sqlquery string
# ## version string
# ## withdbname boolean
# ## tagvalue string (coma separated)
# ## tagvalue string (comma separated)
# ## measurement string
# [[inputs.postgresql_extensible.query]]
# sqlquery="SELECT * FROM pg_stat_database"
# version=901
# withdbname=false
# tagvalue=""
# measurement=""
# [[inputs.postgresql_extensible.query]]
# sqlquery="SELECT * FROM pg_stat_bgwriter"
# version=901
# withdbname=false
# tagvalue=""
# tagvalue="postgresql.stats"
# # Read metrics from one or many PowerDNS servers
@ -1328,6 +1443,28 @@
# percentile_limit = 1000
# # Stream a log file, like the tail -f command
# [[inputs.tail]]
# ## files to tail.
# ## These accept standard unix glob matching rules, but with the addition of
# ## ** as a "super asterisk". ie:
# ## "/var/log/**.log" -> recursively find all .log files in /var/log
# ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
# ## "/var/log/apache.log" -> just tail the apache log file
# ##
# ## See https://github.com/gobwas/glob for more examples
# ##
# files = ["/var/mymetrics.out"]
# ## Read file from beginning.
# from_beginning = false
#
# ## Data format to consume.
# ## Each data format has it's own unique set of configuration options, read
# ## more about them here:
# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
# data_format = "influx"
# # Generic TCP listener
# [[inputs.tcp_listener]]
# ## Address and port to host TCP listener on

77
internal/buffer/buffer.go Normal file
View File

@ -0,0 +1,77 @@
package buffer
import (
"github.com/influxdata/telegraf"
)
// Buffer is an object for storing metrics in a circular buffer.
type Buffer struct {
buf chan telegraf.Metric
// total dropped metrics
drops int
// total metrics added
total int
}
// NewBuffer returns a Buffer
// size is the maximum number of metrics that Buffer will cache. If Add is
// called when the buffer is full, then the oldest metric(s) will be dropped.
func NewBuffer(size int) *Buffer {
return &Buffer{
buf: make(chan telegraf.Metric, size),
}
}
// IsEmpty returns true if Buffer is empty.
func (b *Buffer) IsEmpty() bool {
return len(b.buf) == 0
}
// Len returns the current length of the buffer.
func (b *Buffer) Len() int {
return len(b.buf)
}
// Drops returns the total number of dropped metrics that have occured in this
// buffer since instantiation.
func (b *Buffer) Drops() int {
return b.drops
}
// Total returns the total number of metrics that have been added to this buffer.
func (b *Buffer) Total() int {
return b.total
}
// Add adds metrics to the buffer.
func (b *Buffer) Add(metrics ...telegraf.Metric) {
for i, _ := range metrics {
b.total++
select {
case b.buf <- metrics[i]:
default:
b.drops++
<-b.buf
b.buf <- metrics[i]
}
}
}
// Batch returns a batch of metrics of size batchSize.
// the batch will be of maximum length batchSize. It can be less than batchSize,
// if the length of Buffer is less than batchSize.
func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
n := min(len(b.buf), batchSize)
out := make([]telegraf.Metric, n)
for i := 0; i < n; i++ {
out[i] = <-b.buf
}
return out
}
func min(a, b int) int {
if b < a {
return b
}
return a
}

View File

@ -0,0 +1,94 @@
package buffer
import (
"testing"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
var metricList = []telegraf.Metric{
testutil.TestMetric(2, "mymetric1"),
testutil.TestMetric(1, "mymetric2"),
testutil.TestMetric(11, "mymetric3"),
testutil.TestMetric(15, "mymetric4"),
testutil.TestMetric(8, "mymetric5"),
}
func BenchmarkAddMetrics(b *testing.B) {
buf := NewBuffer(10000)
m := testutil.TestMetric(1, "mymetric")
for n := 0; n < b.N; n++ {
buf.Add(m)
}
}
func TestNewBufferBasicFuncs(t *testing.T) {
b := NewBuffer(10)
assert.True(t, b.IsEmpty())
assert.Zero(t, b.Len())
assert.Zero(t, b.Drops())
assert.Zero(t, b.Total())
m := testutil.TestMetric(1, "mymetric")
b.Add(m)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 1)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 1)
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 6)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 6)
}
func TestDroppingMetrics(t *testing.T) {
b := NewBuffer(10)
// Add up to the size of the buffer
b.Add(metricList...)
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 10)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 10)
// Add 5 more and verify they were dropped
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 10)
assert.Equal(t, b.Drops(), 5)
assert.Equal(t, b.Total(), 15)
}
func TestGettingBatches(t *testing.T) {
b := NewBuffer(20)
// Verify that the buffer returned is smaller than requested when there are
// not as many items as requested.
b.Add(metricList...)
batch := b.Batch(10)
assert.Len(t, batch, 5)
// Verify that the buffer is now empty
assert.True(t, b.IsEmpty())
assert.Zero(t, b.Len())
assert.Zero(t, b.Drops())
assert.Equal(t, b.Total(), 5)
// Verify that the buffer returned is not more than the size requested
b.Add(metricList...)
batch = b.Batch(3)
assert.Len(t, batch, 3)
// Verify that buffer is not empty
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 2)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 10)
}

View File

@ -93,9 +93,15 @@ type AgentConfig struct {
// ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
FlushJitter internal.Duration
// MetricBatchSize is the maximum number of metrics that is wrote to an
// output plugin in one call.
MetricBatchSize int
// MetricBufferLimit is the max number of metrics that each output plugin
// will cache. The buffer is cleared when a successful write occurs. When
// full, the oldest metrics will be overwritten.
// full, the oldest metrics will be overwritten. This number should be a
// multiple of MetricBatchSize. Due to current implementation, this could
// not be less than 2 times MetricBatchSize.
MetricBufferLimit int
// FlushBufferWhenFull tells Telegraf to flush the metric buffer whenever
@ -182,11 +188,13 @@ var header = `# Telegraf Configuration
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will cache metric_buffer_limit metrics for each output, and will
## flush this buffer on a successful write.
metric_buffer_limit = 1000
## Flush the buffer whenever full, regardless of flush_interval.
flush_buffer_when_full = true
## Telegraf will send metrics to outputs in batches of at
## most metric_batch_size metrics.
metric_batch_size = 1000
## For failed writes, telegraf will cache metric_buffer_limit metrics for each
## output, and will flush this buffer on a successful write. Oldest metrics
## are dropped first when this buffer fills.
metric_buffer_limit = 10000
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
@ -404,13 +412,67 @@ func (c *Config) LoadDirectory(path string) error {
return nil
}
// Try to find a default config file at these locations (in order):
// 1. $TELEGRAF_CONFIG_PATH
// 2. $HOME/.telegraf/telegraf.conf
// 3. /etc/telegraf/telegraf.conf
//
func getDefaultConfigPath() (string, error) {
envfile := os.Getenv("TELEGRAF_CONFIG_PATH")
homefile := os.ExpandEnv("${HOME}/.telegraf/telegraf.conf")
etcfile := "/etc/telegraf/telegraf.conf"
for _, path := range []string{envfile, homefile, etcfile} {
if _, err := os.Stat(path); err == nil {
log.Printf("Using config file: %s", path)
return path, nil
}
}
// if we got here, we didn't find a file in a default location
return "", fmt.Errorf("No config file specified, and could not find one"+
" in $TELEGRAF_CONFIG_PATH, %s, or %s", homefile, etcfile)
}
// LoadConfig loads the given config file and applies it to c
func (c *Config) LoadConfig(path string) error {
var err error
if path == "" {
if path, err = getDefaultConfigPath(); err != nil {
return err
}
}
tbl, err := parseFile(path)
if err != nil {
return fmt.Errorf("Error parsing %s, %s", path, err)
}
// Parse tags tables first:
for _, tableName := range []string{"tags", "global_tags"} {
if val, ok := tbl.Fields[tableName]; ok {
subTable, ok := val.(*ast.Table)
if !ok {
return fmt.Errorf("%s: invalid configuration", path)
}
if err = config.UnmarshalTable(subTable, c.Tags); err != nil {
log.Printf("Could not parse [global_tags] config\n")
return fmt.Errorf("Error parsing %s, %s", path, err)
}
}
}
// Parse agent table:
if val, ok := tbl.Fields["agent"]; ok {
subTable, ok := val.(*ast.Table)
if !ok {
return fmt.Errorf("%s: invalid configuration", path)
}
if err = config.UnmarshalTable(subTable, c.Agent); err != nil {
log.Printf("Could not parse [agent] config\n")
return fmt.Errorf("Error parsing %s, %s", path, err)
}
}
// Parse all the rest of the plugins:
for name, val := range tbl.Fields {
subTable, ok := val.(*ast.Table)
if !ok {
@ -418,16 +480,7 @@ func (c *Config) LoadConfig(path string) error {
}
switch name {
case "agent":
if err = config.UnmarshalTable(subTable, c.Agent); err != nil {
log.Printf("Could not parse [agent] config\n")
return fmt.Errorf("Error parsing %s, %s", path, err)
}
case "global_tags", "tags":
if err = config.UnmarshalTable(subTable, c.Tags); err != nil {
log.Printf("Could not parse [global_tags] config\n")
return fmt.Errorf("Error parsing %s, %s", path, err)
}
case "agent", "global_tags", "tags":
case "outputs":
for pluginName, pluginVal := range subTable.Fields {
switch pluginSubTable := pluginVal.(type) {
@ -525,11 +578,8 @@ func (c *Config) addOutput(name string, table *ast.Table) error {
return err
}
ro := internal_models.NewRunningOutput(name, output, outputConfig)
if c.Agent.MetricBufferLimit > 0 {
ro.MetricBufferLimit = c.Agent.MetricBufferLimit
}
ro.FlushBufferWhenFull = c.Agent.FlushBufferWhenFull
ro := internal_models.NewRunningOutput(name, output, outputConfig,
c.Agent.MetricBatchSize, c.Agent.MetricBufferLimit)
c.Outputs = append(c.Outputs, ro)
return nil
}
@ -580,9 +630,9 @@ func (c *Config) addInput(name string, table *ast.Table) error {
// buildFilter builds a Filter
// (tagpass/tagdrop/namepass/namedrop/fieldpass/fielddrop) to
// be inserted into the internal_models.OutputConfig/internal_models.InputConfig to be used for prefix
// filtering on tags and measurements
func buildFilter(tbl *ast.Table) internal_models.Filter {
// be inserted into the internal_models.OutputConfig/internal_models.InputConfig
// to be used for glob filtering on tags and measurements
func buildFilter(tbl *ast.Table) (internal_models.Filter, error) {
f := internal_models.Filter{}
if node, ok := tbl.Fields["namepass"]; ok {
@ -681,6 +731,33 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
}
}
if node, ok := tbl.Fields["tagexclude"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
f.TagExclude = append(f.TagExclude, str.Value)
}
}
}
}
}
if node, ok := tbl.Fields["taginclude"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
f.TagInclude = append(f.TagInclude, str.Value)
}
}
}
}
}
if err := f.CompileFilter(); err != nil {
return f, err
}
delete(tbl.Fields, "namedrop")
delete(tbl.Fields, "namepass")
delete(tbl.Fields, "fielddrop")
@ -689,7 +766,9 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
delete(tbl.Fields, "pass")
delete(tbl.Fields, "tagdrop")
delete(tbl.Fields, "tagpass")
return f
delete(tbl.Fields, "tagexclude")
delete(tbl.Fields, "taginclude")
return f, nil
}
// buildInput parses input specific items from the ast.Table,
@ -748,7 +827,11 @@ func buildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, erro
delete(tbl.Fields, "name_override")
delete(tbl.Fields, "interval")
delete(tbl.Fields, "tags")
cp.Filter = buildFilter(tbl)
var err error
cp.Filter, err = buildFilter(tbl)
if err != nil {
return cp, err
}
return cp, nil
}
@ -864,13 +947,18 @@ func buildSerializer(name string, tbl *ast.Table) (serializers.Serializer, error
return serializers.NewSerializer(c)
}
// buildOutput parses output specific items from the ast.Table, builds the filter and returns an
// buildOutput parses output specific items from the ast.Table,
// builds the filter and returns an
// internal_models.OutputConfig to be inserted into internal_models.RunningInput
// Note: error exists in the return for future calls that might require error
func buildOutput(name string, tbl *ast.Table) (*internal_models.OutputConfig, error) {
filter, err := buildFilter(tbl)
if err != nil {
return nil, err
}
oc := &internal_models.OutputConfig{
Name: name,
Filter: buildFilter(tbl),
Filter: filter,
}
// Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass
if len(oc.Filter.FieldDrop) > 0 {

View File

@ -26,9 +26,7 @@ func TestConfig_LoadSingleInputWithEnvVars(t *testing.T) {
memcached := inputs.Inputs["memcached"]().(*memcached.Memcached)
memcached.Servers = []string{"192.168.1.1"}
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: internal_models.Filter{
filter := internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
@ -46,7 +44,11 @@ func TestConfig_LoadSingleInputWithEnvVars(t *testing.T) {
},
},
IsActive: true,
},
}
assert.NoError(t, filter.CompileFilter())
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: filter,
Interval: 10 * time.Second,
}
mConfig.Tags = make(map[string]string)
@ -64,9 +66,7 @@ func TestConfig_LoadSingleInput(t *testing.T) {
memcached := inputs.Inputs["memcached"]().(*memcached.Memcached)
memcached.Servers = []string{"localhost"}
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: internal_models.Filter{
filter := internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
@ -84,7 +84,11 @@ func TestConfig_LoadSingleInput(t *testing.T) {
},
},
IsActive: true,
},
}
assert.NoError(t, filter.CompileFilter())
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: filter,
Interval: 5 * time.Second,
}
mConfig.Tags = make(map[string]string)
@ -109,9 +113,7 @@ func TestConfig_LoadDirectory(t *testing.T) {
memcached := inputs.Inputs["memcached"]().(*memcached.Memcached)
memcached.Servers = []string{"localhost"}
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: internal_models.Filter{
filter := internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
@ -129,7 +131,11 @@ func TestConfig_LoadDirectory(t *testing.T) {
},
},
IsActive: true,
},
}
assert.NoError(t, filter.CompileFilter())
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: filter,
Interval: 5 * time.Second,
}
mConfig.Tags = make(map[string]string)

View File

@ -0,0 +1,98 @@
package globpath
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/gobwas/glob"
)
var sepStr = fmt.Sprintf("%v", string(os.PathSeparator))
type GlobPath struct {
path string
hasMeta bool
g glob.Glob
root string
}
func Compile(path string) (*GlobPath, error) {
out := GlobPath{
hasMeta: hasMeta(path),
path: path,
}
// if there are no glob meta characters in the path, don't bother compiling
// a glob object or finding the root directory. (see short-circuit in Match)
if !out.hasMeta {
return &out, nil
}
var err error
if out.g, err = glob.Compile(path, os.PathSeparator); err != nil {
return nil, err
}
// Get the root directory for this filepath
out.root = findRootDir(path)
return &out, nil
}
func (g *GlobPath) Match() map[string]os.FileInfo {
if !g.hasMeta {
out := make(map[string]os.FileInfo)
info, err := os.Stat(g.path)
if !os.IsNotExist(err) {
out[g.path] = info
}
return out
}
return walkFilePath(g.root, g.g)
}
// walk the filepath from the given root and return a list of files that match
// the given glob.
func walkFilePath(root string, g glob.Glob) map[string]os.FileInfo {
matchedFiles := make(map[string]os.FileInfo)
walkfn := func(path string, info os.FileInfo, _ error) error {
if g.Match(path) {
matchedFiles[path] = info
}
return nil
}
filepath.Walk(root, walkfn)
return matchedFiles
}
// find the root dir of the given path (could include globs).
// ie:
// /var/log/telegraf.conf -> /var/log
// /home/** -> /home
// /home/*/** -> /home
// /lib/share/*/*/**.txt -> /lib/share
func findRootDir(path string) string {
pathItems := strings.Split(path, sepStr)
out := sepStr
for i, item := range pathItems {
if i == len(pathItems)-1 {
break
}
if item == "" {
continue
}
if hasMeta(item) {
break
}
out += item + sepStr
}
if out != "/" {
out = strings.TrimSuffix(out, "/")
}
return out
}
// hasMeta reports whether path contains any magic glob characters.
func hasMeta(path string) bool {
return strings.IndexAny(path, "*?[") >= 0
}

View File

@ -0,0 +1,62 @@
package globpath
import (
"runtime"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestCompileAndMatch(t *testing.T) {
dir := getTestdataDir()
// test super asterisk
g1, err := Compile(dir + "/**")
require.NoError(t, err)
// test single asterisk
g2, err := Compile(dir + "/*.log")
require.NoError(t, err)
// test no meta characters (file exists)
g3, err := Compile(dir + "/log1.log")
require.NoError(t, err)
// test file that doesn't exist
g4, err := Compile(dir + "/i_dont_exist.log")
require.NoError(t, err)
// test super asterisk that doesn't exist
g5, err := Compile(dir + "/dir_doesnt_exist/**")
require.NoError(t, err)
matches := g1.Match()
assert.Len(t, matches, 3)
matches = g2.Match()
assert.Len(t, matches, 2)
matches = g3.Match()
assert.Len(t, matches, 1)
matches = g4.Match()
assert.Len(t, matches, 0)
matches = g5.Match()
assert.Len(t, matches, 0)
}
func TestFindRootDir(t *testing.T) {
tests := []struct {
input string
output string
}{
{"/var/log/telegraf.conf", "/var/log"},
{"/home/**", "/home"},
{"/home/*/**", "/home"},
{"/lib/share/*/*/**.txt", "/lib/share"},
}
for _, test := range tests {
actual := findRootDir(test.input)
assert.Equal(t, test.output, actual)
}
}
func getTestdataDir() string {
_, filename, _, _ := runtime.Caller(1)
return strings.Replace(filename, "globpath_test.go", "testdata", 1)
}

0
internal/globpath/testdata/log1.log vendored Normal file
View File

0
internal/globpath/testdata/log2.log vendored Normal file
View File

5
internal/globpath/testdata/test.conf vendored Normal file
View File

@ -0,0 +1,5 @@
# this is a fake testing config file
# for testing the filestat plugin
option1 = "foo"
option2 = "bar"

View File

@ -2,13 +2,16 @@ package internal
import (
"bufio"
"bytes"
"crypto/rand"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"strings"
"time"
"unicode"
@ -16,6 +19,12 @@ import (
const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
var (
TimeoutErr = errors.New("Command timed out.")
NotImplementedError = errors.New("not implemented yet")
)
// Duration just wraps time.Duration
type Duration struct {
Duration time.Duration
@ -33,8 +42,6 @@ func (d *Duration) UnmarshalTOML(b []byte) error {
return nil
}
var NotImplementedError = errors.New("not implemented yet")
// ReadLines reads contents from a file and splits them by new lines.
// A convenience wrapper to ReadLinesOffsetN(filename, 0, -1).
func ReadLines(filename string) ([]string, error) {
@ -140,58 +147,47 @@ func SnakeCase(in string) string {
return string(out)
}
// Glob will test a string pattern, potentially containing globs, against a
// subject string. The result is a simple true/false, determining whether or
// not the glob pattern matched the subject text.
//
// Adapted from https://github.com/ryanuber/go-glob/blob/master/glob.go
// thanks Ryan Uber!
func Glob(pattern, measurement string) bool {
// Empty pattern can only match empty subject
if pattern == "" {
return measurement == pattern
// CombinedOutputTimeout runs the given command with the given timeout and
// returns the combined output of stdout and stderr.
// If the command times out, it attempts to kill the process.
func CombinedOutputTimeout(c *exec.Cmd, timeout time.Duration) ([]byte, error) {
var b bytes.Buffer
c.Stdout = &b
c.Stderr = &b
if err := c.Start(); err != nil {
return nil, err
}
err := WaitTimeout(c, timeout)
return b.Bytes(), err
}
// RunTimeout runs the given command with the given timeout.
// If the command times out, it attempts to kill the process.
func RunTimeout(c *exec.Cmd, timeout time.Duration) error {
if err := c.Start(); err != nil {
return err
}
return WaitTimeout(c, timeout)
}
// WaitTimeout waits for the given command to finish with a timeout.
// It assumes the command has already been started.
// If the command times out, it attempts to kill the process.
func WaitTimeout(c *exec.Cmd, timeout time.Duration) error {
timer := time.NewTimer(timeout)
done := make(chan error)
go func() { done <- c.Wait() }()
select {
case err := <-done:
timer.Stop()
return err
case <-timer.C:
if err := c.Process.Kill(); err != nil {
log.Printf("FATAL error killing process: %s", err)
return err
}
// wait for the command to return after killing it
<-done
return TimeoutErr
}
// If the pattern _is_ a glob, it matches everything
if pattern == "*" {
return true
}
parts := strings.Split(pattern, "*")
if len(parts) == 1 {
// No globs in pattern, so test for match
return pattern == measurement
}
leadingGlob := strings.HasPrefix(pattern, "*")
trailingGlob := strings.HasSuffix(pattern, "*")
end := len(parts) - 1
for i, part := range parts {
switch i {
case 0:
if leadingGlob {
continue
}
if !strings.HasPrefix(measurement, part) {
return false
}
case end:
if len(measurement) > 0 {
return trailingGlob || strings.HasSuffix(measurement, part)
}
default:
if !strings.Contains(measurement, part) {
return false
}
}
// Trim evaluated text from measurement as we loop over the pattern.
idx := strings.Index(measurement, part) + len(part)
measurement = measurement[idx:]
}
// All parts of the pattern matched
return true
}

View File

@ -1,47 +1,12 @@
package internal
import "testing"
import (
"os/exec"
"testing"
"time"
func testGlobMatch(t *testing.T, pattern, subj string) {
if !Glob(pattern, subj) {
t.Errorf("%s should match %s", pattern, subj)
}
}
func testGlobNoMatch(t *testing.T, pattern, subj string) {
if Glob(pattern, subj) {
t.Errorf("%s should not match %s", pattern, subj)
}
}
func TestEmptyPattern(t *testing.T) {
testGlobMatch(t, "", "")
testGlobNoMatch(t, "", "test")
}
func TestPatternWithoutGlobs(t *testing.T) {
testGlobMatch(t, "test", "test")
}
func TestGlob(t *testing.T) {
for _, pattern := range []string{
"*test", // Leading glob
"this*", // Trailing glob
"*is*a*", // Lots of globs
"**test**", // Double glob characters
"**is**a***test*", // Varying number of globs
} {
testGlobMatch(t, pattern, "this_is_a_test")
}
for _, pattern := range []string{
"test*", // Implicit substring match should fail
"*is", // Partial match should fail
"*no*", // Globs without a match between them should fail
} {
testGlobNoMatch(t, pattern, "this_is_a_test")
}
}
"github.com/stretchr/testify/assert"
)
type SnakeTest struct {
input string
@ -71,3 +36,73 @@ func TestSnakeCase(t *testing.T) {
}
}
}
var (
sleepbin, _ = exec.LookPath("sleep")
echobin, _ = exec.LookPath("echo")
)
func TestRunTimeout(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "10")
start := time.Now()
err := RunTimeout(cmd, time.Millisecond*20)
elapsed := time.Since(start)
assert.Equal(t, TimeoutErr, err)
// Verify that command gets killed in 20ms, with some breathing room
assert.True(t, elapsed < time.Millisecond*75)
}
func TestCombinedOutputTimeout(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "10")
start := time.Now()
_, err := CombinedOutputTimeout(cmd, time.Millisecond*20)
elapsed := time.Since(start)
assert.Equal(t, TimeoutErr, err)
// Verify that command gets killed in 20ms, with some breathing room
assert.True(t, elapsed < time.Millisecond*75)
}
func TestCombinedOutput(t *testing.T) {
if echobin == "" {
t.Skip("'echo' binary not available on OS, skipping.")
}
cmd := exec.Command(echobin, "foo")
out, err := CombinedOutputTimeout(cmd, time.Second)
assert.NoError(t, err)
assert.Equal(t, "foo\n", string(out))
}
// test that CombinedOutputTimeout and exec.Cmd.CombinedOutput return
// the same output from a failed command.
func TestCombinedOutputError(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "foo")
expected, err := cmd.CombinedOutput()
cmd2 := exec.Command(sleepbin, "foo")
actual, err := CombinedOutputTimeout(cmd2, time.Second)
assert.Error(t, err)
assert.Equal(t, expected, actual)
}
func TestRunError(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "foo")
err := RunTimeout(cmd, time.Second)
assert.Error(t, err)
}

View File

@ -1,33 +1,104 @@
package internal_models
import (
"fmt"
"strings"
"github.com/gobwas/glob"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
)
// TagFilter is the name of a tag, and the values on which to filter
type TagFilter struct {
Name string
Filter []string
filter glob.Glob
}
// Filter containing drop/pass and tagdrop/tagpass rules
type Filter struct {
NameDrop []string
nameDrop glob.Glob
NamePass []string
namePass glob.Glob
FieldDrop []string
fieldDrop glob.Glob
FieldPass []string
fieldPass glob.Glob
TagDrop []TagFilter
TagPass []TagFilter
TagExclude []string
tagExclude glob.Glob
TagInclude []string
tagInclude glob.Glob
IsActive bool
}
func (f Filter) ShouldMetricPass(metric telegraf.Metric) bool {
// Compile all Filter lists into glob.Glob objects.
func (f *Filter) CompileFilter() error {
var err error
f.nameDrop, err = compileFilter(f.NameDrop)
if err != nil {
return fmt.Errorf("Error compiling 'namedrop', %s", err)
}
f.namePass, err = compileFilter(f.NamePass)
if err != nil {
return fmt.Errorf("Error compiling 'namepass', %s", err)
}
f.fieldDrop, err = compileFilter(f.FieldDrop)
if err != nil {
return fmt.Errorf("Error compiling 'fielddrop', %s", err)
}
f.fieldPass, err = compileFilter(f.FieldPass)
if err != nil {
return fmt.Errorf("Error compiling 'fieldpass', %s", err)
}
f.tagExclude, err = compileFilter(f.TagExclude)
if err != nil {
return fmt.Errorf("Error compiling 'tagexclude', %s", err)
}
f.tagInclude, err = compileFilter(f.TagInclude)
if err != nil {
return fmt.Errorf("Error compiling 'taginclude', %s", err)
}
for i, _ := range f.TagDrop {
f.TagDrop[i].filter, err = compileFilter(f.TagDrop[i].Filter)
if err != nil {
return fmt.Errorf("Error compiling 'tagdrop', %s", err)
}
}
for i, _ := range f.TagPass {
f.TagPass[i].filter, err = compileFilter(f.TagPass[i].Filter)
if err != nil {
return fmt.Errorf("Error compiling 'tagpass', %s", err)
}
}
return nil
}
func compileFilter(filter []string) (glob.Glob, error) {
if len(filter) == 0 {
return nil, nil
}
var g glob.Glob
var err error
if len(filter) == 1 {
g, err = glob.Compile(filter[0])
} else {
g, err = glob.Compile("{" + strings.Join(filter, ",") + "}")
}
return g, err
}
func (f *Filter) ShouldMetricPass(metric telegraf.Metric) bool {
if f.ShouldNamePass(metric.Name()) && f.ShouldTagsPass(metric.Tags()) {
return true
}
@ -36,70 +107,51 @@ func (f Filter) ShouldMetricPass(metric telegraf.Metric) bool {
// ShouldFieldsPass returns true if the metric should pass, false if should drop
// based on the drop/pass filter parameters
func (f Filter) ShouldNamePass(key string) bool {
if f.NamePass != nil {
for _, pat := range f.NamePass {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
func (f *Filter) ShouldNamePass(key string) bool {
if f.namePass != nil {
if f.namePass.Match(key) {
return true
}
}
return false
}
if f.NameDrop != nil {
for _, pat := range f.NameDrop {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
if f.nameDrop != nil {
if f.nameDrop.Match(key) {
return false
}
}
return true
}
return true
}
// ShouldFieldsPass returns true if the metric should pass, false if should drop
// based on the drop/pass filter parameters
func (f Filter) ShouldFieldsPass(key string) bool {
if f.FieldPass != nil {
for _, pat := range f.FieldPass {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
func (f *Filter) ShouldFieldsPass(key string) bool {
if f.fieldPass != nil {
if f.fieldPass.Match(key) {
return true
}
}
return false
}
if f.FieldDrop != nil {
for _, pat := range f.FieldDrop {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
if f.fieldDrop != nil {
if f.fieldDrop.Match(key) {
return false
}
}
return true
}
return true
}
// ShouldTagsPass returns true if the metric should pass, false if should drop
// based on the tagdrop/tagpass filter parameters
func (f Filter) ShouldTagsPass(tags map[string]string) bool {
func (f *Filter) ShouldTagsPass(tags map[string]string) bool {
if f.TagPass != nil {
for _, pat := range f.TagPass {
if tagval, ok := tags[pat.Name]; ok {
for _, filter := range pat.Filter {
if internal.Glob(filter, tagval) {
return true
if pat.filter == nil {
continue
}
if tagval, ok := tags[pat.Name]; ok {
if pat.filter.Match(tagval) {
return true
}
}
}
@ -108,11 +160,12 @@ func (f Filter) ShouldTagsPass(tags map[string]string) bool {
if f.TagDrop != nil {
for _, pat := range f.TagDrop {
if tagval, ok := tags[pat.Name]; ok {
for _, filter := range pat.Filter {
if internal.Glob(filter, tagval) {
return false
if pat.filter == nil {
continue
}
if tagval, ok := tags[pat.Name]; ok {
if pat.filter.Match(tagval) {
return false
}
}
}
@ -121,3 +174,23 @@ func (f Filter) ShouldTagsPass(tags map[string]string) bool {
return true
}
// Apply TagInclude and TagExclude filters.
// modifies the tags map in-place.
func (f *Filter) FilterTags(tags map[string]string) {
if f.tagInclude != nil {
for k, _ := range tags {
if !f.tagInclude.Match(k) {
delete(tags, k)
}
}
}
if f.tagExclude != nil {
for k, _ := range tags {
if f.tagExclude.Match(k) {
delete(tags, k)
}
}
}
}

View File

@ -2,6 +2,11 @@ package internal_models
import (
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFilter_Empty(t *testing.T) {
@ -28,6 +33,7 @@ func TestFilter_NamePass(t *testing.T) {
f := Filter{
NamePass: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.CompileFilter())
passes := []string{
"foo",
@ -61,6 +67,7 @@ func TestFilter_NameDrop(t *testing.T) {
f := Filter{
NameDrop: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.CompileFilter())
drops := []string{
"foo",
@ -94,6 +101,7 @@ func TestFilter_FieldPass(t *testing.T) {
f := Filter{
FieldPass: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.CompileFilter())
passes := []string{
"foo",
@ -127,6 +135,7 @@ func TestFilter_FieldDrop(t *testing.T) {
f := Filter{
FieldDrop: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.CompileFilter())
drops := []string{
"foo",
@ -169,6 +178,7 @@ func TestFilter_TagPass(t *testing.T) {
f := Filter{
TagPass: filters,
}
require.NoError(t, f.CompileFilter())
passes := []map[string]string{
{"cpu": "cpu-total"},
@ -212,6 +222,7 @@ func TestFilter_TagDrop(t *testing.T) {
f := Filter{
TagDrop: filters,
}
require.NoError(t, f.CompileFilter())
drops := []map[string]string{
{"cpu": "cpu-total"},
@ -241,3 +252,115 @@ func TestFilter_TagDrop(t *testing.T) {
}
}
}
func TestFilter_CompileFilterError(t *testing.T) {
f := Filter{
NameDrop: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
f = Filter{
NamePass: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
f = Filter{
FieldDrop: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
f = Filter{
FieldPass: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
f = Filter{
TagExclude: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
f = Filter{
TagInclude: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
filters := []TagFilter{
TagFilter{
Name: "cpu",
Filter: []string{"{foobar}"},
}}
f = Filter{
TagDrop: filters,
}
require.Error(t, f.CompileFilter())
filters = []TagFilter{
TagFilter{
Name: "cpu",
Filter: []string{"{foobar}"},
}}
f = Filter{
TagPass: filters,
}
require.Error(t, f.CompileFilter())
}
func TestFilter_ShouldMetricsPass(t *testing.T) {
m := testutil.TestMetric(1, "testmetric")
f := Filter{
NameDrop: []string{"foobar"},
}
require.NoError(t, f.CompileFilter())
require.True(t, f.ShouldMetricPass(m))
m = testutil.TestMetric(1, "foobar")
require.False(t, f.ShouldMetricPass(m))
}
func TestFilter_FilterTagsNoMatches(t *testing.T) {
pretags := map[string]string{
"host": "localhost",
"mytag": "foobar",
}
f := Filter{
TagExclude: []string{"nomatch"},
}
require.NoError(t, f.CompileFilter())
f.FilterTags(pretags)
assert.Equal(t, map[string]string{
"host": "localhost",
"mytag": "foobar",
}, pretags)
f = Filter{
TagInclude: []string{"nomatch"},
}
require.NoError(t, f.CompileFilter())
f.FilterTags(pretags)
assert.Equal(t, map[string]string{}, pretags)
}
func TestFilter_FilterTagsMatches(t *testing.T) {
pretags := map[string]string{
"host": "localhost",
"mytag": "foobar",
}
f := Filter{
TagExclude: []string{"ho*"},
}
require.NoError(t, f.CompileFilter())
f.FilterTags(pretags)
assert.Equal(t, map[string]string{
"mytag": "foobar",
}, pretags)
pretags = map[string]string{
"host": "localhost",
"mytag": "foobar",
}
f = Filter{
TagInclude: []string{"my*"},
}
require.NoError(t, f.CompileFilter())
f.FilterTags(pretags)
assert.Equal(t, map[string]string{
"mytag": "foobar",
}, pretags)
}

View File

@ -2,48 +2,54 @@ package internal_models
import (
"log"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/buffer"
)
const (
// Default number of metrics kept between flushes.
DEFAULT_METRIC_BUFFER_LIMIT = 1000
// Default size of metrics batch size.
DEFAULT_METRIC_BATCH_SIZE = 1000
// Limit how many full metric buffers are kept due to failed writes.
FULL_METRIC_BUFFERS_LIMIT = 100
// Default number of metrics kept. It should be a multiple of batch size.
DEFAULT_METRIC_BUFFER_LIMIT = 10000
)
// RunningOutput contains the output configuration
type RunningOutput struct {
Name string
Output telegraf.Output
Config *OutputConfig
Quiet bool
MetricBufferLimit int
FlushBufferWhenFull bool
MetricBatchSize int
metrics []telegraf.Metric
tmpmetrics map[int][]telegraf.Metric
overwriteI int
mapI int
sync.Mutex
metrics *buffer.Buffer
failMetrics *buffer.Buffer
}
func NewRunningOutput(
name string,
output telegraf.Output,
conf *OutputConfig,
batchSize int,
bufferLimit int,
) *RunningOutput {
if bufferLimit == 0 {
bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
}
if batchSize == 0 {
batchSize = DEFAULT_METRIC_BATCH_SIZE
}
ro := &RunningOutput{
Name: name,
metrics: make([]telegraf.Metric, 0),
tmpmetrics: make(map[int][]telegraf.Metric),
metrics: buffer.NewBuffer(batchSize),
failMetrics: buffer.NewBuffer(bufferLimit),
Output: output,
Config: conf,
MetricBufferLimit: DEFAULT_METRIC_BUFFER_LIMIT,
MetricBufferLimit: bufferLimit,
MetricBatchSize: batchSize,
}
return ro
}
@ -56,67 +62,78 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
return
}
}
ro.Lock()
defer ro.Unlock()
if len(ro.metrics) < ro.MetricBufferLimit {
ro.metrics = append(ro.metrics, metric)
} else {
if ro.FlushBufferWhenFull {
ro.metrics = append(ro.metrics, metric)
tmpmetrics := make([]telegraf.Metric, len(ro.metrics))
copy(tmpmetrics, ro.metrics)
ro.metrics = make([]telegraf.Metric, 0)
err := ro.write(tmpmetrics)
// Filter any tagexclude/taginclude parameters before adding metric
if len(ro.Config.Filter.TagExclude) != 0 || len(ro.Config.Filter.TagInclude) != 0 {
// In order to filter out tags, we need to create a new metric, since
// metrics are immutable once created.
tags := metric.Tags()
fields := metric.Fields()
t := metric.Time()
name := metric.Name()
ro.Config.Filter.FilterTags(tags)
// error is not possible if creating from another metric, so ignore.
metric, _ = telegraf.NewMetric(name, tags, fields, t)
}
ro.metrics.Add(metric)
if ro.metrics.Len() == ro.MetricBatchSize {
batch := ro.metrics.Batch(ro.MetricBatchSize)
err := ro.write(batch)
if err != nil {
log.Printf("ERROR writing full metric buffer to output %s, %s",
ro.Name, err)
if len(ro.tmpmetrics) == FULL_METRIC_BUFFERS_LIMIT {
ro.mapI = 0
// overwrite one
ro.tmpmetrics[ro.mapI] = tmpmetrics
ro.mapI++
} else {
ro.tmpmetrics[ro.mapI] = tmpmetrics
ro.mapI++
}
}
} else {
if ro.overwriteI == 0 {
log.Printf("WARNING: overwriting cached metrics, you may want to " +
"increase the metric_buffer_limit setting in your [agent] " +
"config if you do not wish to overwrite metrics.\n")
}
if ro.overwriteI == len(ro.metrics) {
ro.overwriteI = 0
}
ro.metrics[ro.overwriteI] = metric
ro.overwriteI++
ro.failMetrics.Add(batch...)
}
}
}
// Write writes all cached points to this output.
func (ro *RunningOutput) Write() error {
ro.Lock()
defer ro.Unlock()
err := ro.write(ro.metrics)
if !ro.Quiet {
log.Printf("Output [%s] buffer fullness: %d / %d metrics. "+
"Total gathered metrics: %d. Total dropped metrics: %d.",
ro.Name,
ro.failMetrics.Len()+ro.metrics.Len(),
ro.MetricBufferLimit,
ro.metrics.Total(),
ro.metrics.Drops()+ro.failMetrics.Drops())
}
var err error
if !ro.failMetrics.IsEmpty() {
bufLen := ro.failMetrics.Len()
// how many batches of failed writes we need to write.
nBatches := bufLen/ro.MetricBatchSize + 1
batchSize := ro.MetricBatchSize
for i := 0; i < nBatches; i++ {
// If it's the last batch, only grab the metrics that have not had
// a write attempt already (this is primarily to preserve order).
if i == nBatches-1 {
batchSize = bufLen % ro.MetricBatchSize
}
batch := ro.failMetrics.Batch(batchSize)
// If we've already failed previous writes, don't bother trying to
// write to this output again. We are not exiting the loop just so
// that we can rotate the metrics to preserve order.
if err == nil {
err = ro.write(batch)
}
if err != nil {
return err
} else {
ro.metrics = make([]telegraf.Metric, 0)
ro.overwriteI = 0
ro.failMetrics.Add(batch...)
}
// Write any cached metric buffers that failed previously
for i, tmpmetrics := range ro.tmpmetrics {
if err := ro.write(tmpmetrics); err != nil {
return err
} else {
delete(ro.tmpmetrics, i)
}
}
batch := ro.metrics.Batch(ro.MetricBatchSize)
// see comment above about not trying to write to an already failed output.
// if ro.failMetrics is empty then err will always be nil at this point.
if err == nil {
err = ro.write(batch)
}
if err != nil {
ro.failMetrics.Add(batch...)
return err
}
return nil
}
@ -129,8 +146,8 @@ func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
elapsed := time.Since(start)
if err == nil {
if !ro.Quiet {
log.Printf("Wrote %d metrics to output %s in %s\n",
len(metrics), ro.Name, elapsed)
log.Printf("Output [%s] wrote batch of %d metrics in %s\n",
ro.Name, len(metrics), elapsed)
}
}
return err

View File

@ -2,7 +2,6 @@ package internal_models
import (
"fmt"
"sort"
"sync"
"testing"
@ -29,16 +28,100 @@ var next5 = []telegraf.Metric{
testutil.TestMetric(101, "metric10"),
}
// Test that we can write metrics with simple default setup.
func TestRunningOutputDefault(t *testing.T) {
// Benchmark adding metrics.
func BenchmarkRunningOutputAddWrite(b *testing.B) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &perfOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.Quiet = true
for n := 0; n < b.N; n++ {
ro.AddMetric(first5[0])
ro.Write()
}
}
// Benchmark adding metrics.
func BenchmarkRunningOutputAddWriteEvery100(b *testing.B) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &perfOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.Quiet = true
for n := 0; n < b.N; n++ {
ro.AddMetric(first5[0])
if n%100 == 0 {
ro.Write()
}
}
}
// Benchmark adding metrics.
func BenchmarkRunningOutputAddFailWrites(b *testing.B) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &perfOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.Quiet = true
for n := 0; n < b.N; n++ {
ro.AddMetric(first5[0])
}
}
// Test that NameDrop filters ger properly applied.
func TestRunningOutput_DropFilter(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: true,
NameDrop: []string{"metric1", "metric2"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
}
for _, metric := range next5 {
ro.AddMetric(metric)
}
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 8)
}
// Test that NameDrop filters without a match do nothing.
func TestRunningOutput_PassFilter(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: true,
NameDrop: []string{"metric1000", "foo*"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
@ -53,41 +136,96 @@ func TestRunningOutputDefault(t *testing.T) {
assert.Len(t, m.Metrics(), 10)
}
// Test that the first metric gets overwritten if there is a buffer overflow.
func TestRunningOutputOverwrite(t *testing.T) {
// Test that tags are properly included
func TestRunningOutput_TagIncludeNoMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
IsActive: true,
TagInclude: []string{"nothing*"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.MetricBufferLimit = 4
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
}
require.Len(t, m.Metrics(), 0)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
require.NoError(t, err)
require.Len(t, m.Metrics(), 4)
var expected, actual []string
for i, exp := range first5[1:] {
expected = append(expected, exp.String())
actual = append(actual, m.Metrics()[i].String())
}
sort.Strings(expected)
sort.Strings(actual)
assert.Equal(t, expected, actual)
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Empty(t, m.Metrics()[0].Tags())
}
// Test that multiple buffer overflows are handled properly.
func TestRunningOutputMultiOverwrite(t *testing.T) {
// Test that tags are properly excluded
func TestRunningOutput_TagExcludeMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: true,
TagExclude: []string{"tag*"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Len(t, m.Metrics()[0].Tags(), 0)
}
// Test that tags are properly Excluded
func TestRunningOutput_TagExcludeNoMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: true,
TagExclude: []string{"nothing*"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Len(t, m.Metrics()[0].Tags(), 1)
}
// Test that tags are properly included
func TestRunningOutput_TagIncludeMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: true,
TagInclude: []string{"tag*"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Len(t, m.Metrics()[0].Tags(), 1)
}
// Test that we can write metrics with simple default setup.
func TestRunningOutputDefault(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
@ -95,8 +233,7 @@ func TestRunningOutputMultiOverwrite(t *testing.T) {
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.MetricBufferLimit = 3
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
@ -104,22 +241,11 @@ func TestRunningOutputMultiOverwrite(t *testing.T) {
for _, metric := range next5 {
ro.AddMetric(metric)
}
require.Len(t, m.Metrics(), 0)
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
require.NoError(t, err)
require.Len(t, m.Metrics(), 3)
var expected, actual []string
for i, exp := range next5[2:] {
expected = append(expected, exp.String())
actual = append(actual, m.Metrics()[i].String())
}
sort.Strings(expected)
sort.Strings(actual)
assert.Equal(t, expected, actual)
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 10)
}
// Test that running output doesn't flush until it's full when
@ -132,11 +258,9 @@ func TestRunningOutputFlushWhenFull(t *testing.T) {
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.FlushBufferWhenFull = true
ro.MetricBufferLimit = 5
ro := NewRunningOutput("test", m, conf, 6, 10)
// Fill buffer to limit
// Fill buffer to 1 under limit
for _, metric := range first5 {
ro.AddMetric(metric)
}
@ -165,9 +289,7 @@ func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.FlushBufferWhenFull = true
ro.MetricBufferLimit = 4
ro := NewRunningOutput("test", m, conf, 4, 12)
// Fill buffer past limit twive
for _, metric := range first5 {
@ -177,7 +299,7 @@ func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
ro.AddMetric(metric)
}
// flushed twice
assert.Len(t, m.Metrics(), 10)
assert.Len(t, m.Metrics(), 8)
}
func TestRunningOutputWriteFail(t *testing.T) {
@ -189,11 +311,9 @@ func TestRunningOutputWriteFail(t *testing.T) {
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf)
ro.FlushBufferWhenFull = true
ro.MetricBufferLimit = 4
ro := NewRunningOutput("test", m, conf, 4, 12)
// Fill buffer past limit twice
// Fill buffer to limit twice
for _, metric := range first5 {
ro.AddMetric(metric)
}
@ -216,6 +336,161 @@ func TestRunningOutputWriteFail(t *testing.T) {
assert.Len(t, m.Metrics(), 10)
}
// Verify that the order of points is preserved during a write failure.
func TestRunningOutputWriteFailOrder(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 100, 1000)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// Write fails
err := ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
m.failWrite = false
// add 5 more metrics
for _, metric := range next5 {
ro.AddMetric(metric)
}
err = ro.Write()
require.NoError(t, err)
// Verify that 10 metrics were written
assert.Len(t, m.Metrics(), 10)
// Verify that they are in order
expected := append(first5, next5...)
assert.Equal(t, expected, m.Metrics())
}
// Verify that the order of points is preserved during many write failures.
func TestRunningOutputWriteFailOrder2(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 5, 100)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// Write fails
err := ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add 5 metrics
for _, metric := range next5 {
ro.AddMetric(metric)
}
// Write fails
err = ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// Write fails
err = ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add 5 metrics
for _, metric := range next5 {
ro.AddMetric(metric)
}
// Write fails
err = ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
m.failWrite = false
err = ro.Write()
require.NoError(t, err)
// Verify that 10 metrics were written
assert.Len(t, m.Metrics(), 20)
// Verify that they are in order
expected := append(first5, next5...)
expected = append(expected, first5...)
expected = append(expected, next5...)
assert.Equal(t, expected, m.Metrics())
}
// Verify that the order of points is preserved when there is a remainder
// of points for the batch.
//
// ie, with a batch size of 5:
//
// 1 2 3 4 5 6 <-- order, failed points
// 6 1 2 3 4 5 <-- order, after 1st write failure (1 2 3 4 5 was batch)
// 1 2 3 4 5 6 <-- order, after 2nd write failure, (6 was batch)
//
func TestRunningOutputWriteFailOrder3(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 5, 1000)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// Write fails
err := ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add and attempt to write a single metric:
ro.AddMetric(next5[0])
err = ro.Write()
require.Error(t, err)
// unset fail and write metrics
m.failWrite = false
err = ro.Write()
require.NoError(t, err)
// Verify that 6 metrics were written
assert.Len(t, m.Metrics(), 6)
// Verify that they are in order
expected := append(first5, next5[0])
assert.Equal(t, expected, m.Metrics())
}
type mockOutput struct {
sync.Mutex
@ -263,3 +538,31 @@ func (m *mockOutput) Metrics() []telegraf.Metric {
defer m.Unlock()
return m.metrics
}
type perfOutput struct {
// if true, mock a write failure
failWrite bool
}
func (m *perfOutput) Connect() error {
return nil
}
func (m *perfOutput) Close() error {
return nil
}
func (m *perfOutput) Description() string {
return ""
}
func (m *perfOutput) SampleConfig() string {
return ""
}
func (m *perfOutput) Write(metrics []telegraf.Metric) error {
if m.failWrite {
return fmt.Errorf("Failed Write!")
}
return nil
}

View File

@ -14,6 +14,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/dovecot"
_ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch"
_ "github.com/influxdata/telegraf/plugins/inputs/exec"
_ "github.com/influxdata/telegraf/plugins/inputs/filestat"
_ "github.com/influxdata/telegraf/plugins/inputs/github_webhooks"
_ "github.com/influxdata/telegraf/plugins/inputs/haproxy"
_ "github.com/influxdata/telegraf/plugins/inputs/http_response"
@ -55,6 +56,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/statsd"
_ "github.com/influxdata/telegraf/plugins/inputs/sysstat"
_ "github.com/influxdata/telegraf/plugins/inputs/system"
_ "github.com/influxdata/telegraf/plugins/inputs/tail"
_ "github.com/influxdata/telegraf/plugins/inputs/tcp_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/trig"
_ "github.com/influxdata/telegraf/plugins/inputs/twemproxy"

View File

@ -7,19 +7,12 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"io/ioutil"
"log"
"net/http"
"net/url"
//"reflect"
"strings"
)
/*type Server struct {
Host string
Username string
Password string
Port string
}*/
type JolokiaClient interface {
MakeRequest(req *http.Request) (*http.Response, error)
}
@ -55,12 +48,6 @@ type jmxMetric interface {
addTagsFields(out map[string]interface{})
}
func addServerTags(host string, tags map[string]string) {
if host != "" && host != "localhost" && host != "127.0.0.1" {
tags["host"] = host
}
}
func newJavaMetric(host string, metric string,
acc telegraf.Accumulator) *javaMetric {
return &javaMetric{host: host, metric: metric, acc: acc}
@ -120,7 +107,7 @@ func (j javaMetric) addTagsFields(out map[string]interface{}) {
tokens := parseJmxMetricRequest(mbean)
addTokensToTags(tokens, tags)
addServerTags(j.host, tags)
tags["cassandra_host"] = j.host
if _, ok := tags["mname"]; !ok {
//Queries for a single value will not return a "name" tag in the response.
@ -148,7 +135,7 @@ func addCassandraMetric(mbean string, c cassandraMetric,
fields := make(map[string]interface{})
tokens := parseJmxMetricRequest(mbean)
addTokensToTags(tokens, tags)
addServerTags(c.host, tags)
tags["cassandra_host"] = c.host
addValuesAsFields(values, fields, tags["mname"])
c.acc.AddFields(tokens["class"]+tokens["type"], fields, tags)
@ -277,15 +264,19 @@ func (c *Cassandra) Gather(acc telegraf.Accumulator) error {
for _, server := range servers {
for _, metric := range metrics {
var m jmxMetric
serverTokens := parseServerTokens(server)
var m jmxMetric
if strings.HasPrefix(metric, "/java.lang:") {
m = newJavaMetric(serverTokens["host"], metric, acc)
} else if strings.HasPrefix(metric,
"/org.apache.cassandra.metrics:") {
m = newCassandraMetric(serverTokens["host"], metric, acc)
} else {
// unsupported metric type
log.Printf("Unsupported Cassandra metric [%s], skipping",
metric)
continue
}
// Prepare URL

View File

@ -163,12 +163,12 @@ func TestHttpJsonJavaMultiValue(t *testing.T) {
"HeapMemoryUsage_used": 203288528.0,
}
tags1 := map[string]string{
"host": "10.10.10.10",
"cassandra_host": "10.10.10.10",
"mname": "HeapMemoryUsage",
}
tags2 := map[string]string{
"host": "10.10.10.11",
"cassandra_host": "10.10.10.11",
"mname": "HeapMemoryUsage",
}
acc.AssertContainsTaggedFields(t, "javaMemory", fields, tags1)
@ -190,7 +190,7 @@ func TestHttpJsonJavaMultiType(t *testing.T) {
}
tags := map[string]string{
"host": "10.10.10.10",
"cassandra_host": "10.10.10.10",
"mname": "ConcurrentMarkSweep",
}
acc.AssertContainsTaggedFields(t, "javaGarbageCollector", fields, tags)
@ -231,7 +231,7 @@ func TestHttpJsonCassandraMultiValue(t *testing.T) {
}
tags := map[string]string{
"host": "10.10.10.10",
"cassandra_host": "10.10.10.10",
"mname": "ReadLatency",
"keyspace": "test_keyspace1",
"scope": "test_table",
@ -268,14 +268,14 @@ func TestHttpJsonCassandraNestedMultiValue(t *testing.T) {
}
tags1 := map[string]string{
"host": "10.10.10.10",
"cassandra_host": "10.10.10.10",
"mname": "ReadLatency",
"keyspace": "test_keyspace1",
"scope": "test_table1",
}
tags2 := map[string]string{
"host": "10.10.10.10",
"cassandra_host": "10.10.10.10",
"mname": "ReadLatency",
"keyspace": "test_keyspace2",
"scope": "test_table2",

View File

@ -7,8 +7,6 @@ import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds"
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/cloudwatch"
@ -21,6 +19,8 @@ import (
type (
CloudWatch struct {
Region string `toml:"region"`
AccessKey string `toml:"access_key"`
SecretKey string `toml:"secret_key"`
Period internal.Duration `toml:"period"`
Delay internal.Duration `toml:"delay"`
Namespace string `toml:"namespace"`
@ -56,6 +56,15 @@ func (c *CloudWatch) SampleConfig() string {
## Amazon Region
region = 'us-east-1'
## Amazon Credentials
## Credentials are loaded in the following order
## 1) explicit credentials from 'access_key' and 'secret_key'
## 2) environment variables
## 3) shared credentials file
## 4) EC2 Instance Profile
#access_key = ""
#secret_key = ""
## Requested CloudWatch aggregation Period (required - must be a multiple of 60s)
period = '1m'
@ -154,12 +163,9 @@ func init() {
func (c *CloudWatch) initializeCloudWatch() error {
config := &aws.Config{
Region: aws.String(c.Region),
Credentials: credentials.NewChainCredentials(
[]credentials.Provider{
&ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(session.New())},
&credentials.EnvProvider{},
&credentials.SharedCredentialsProvider{},
}),
}
if c.AccessKey != "" || c.SecretKey != "" {
config.Credentials = credentials.NewStaticCredentials(c.AccessKey, c.SecretKey, "")
}
c.client = cloudwatch.New(session.New(config))

View File

@ -162,7 +162,7 @@ func (g *Disque) gatherServer(addr *url.URL, acc telegraf.Accumulator) error {
var read int
fields := make(map[string]interface{})
tags := map[string]string{"host": addr.String()}
tags := map[string]string{"disque_host": addr.String()}
for read < sz {
line, err := r.ReadString('\n')
if err != nil {

View File

@ -15,6 +15,9 @@ var servers = []string{"8.8.8.8"}
var domains = []string{"google.com"}
func TestGathering(t *testing.T) {
if testing.Short() {
t.Skip("Skipping network-dependent test in short mode.")
}
var dnsConfig = DnsQuery{
Servers: servers,
Domains: domains,
@ -31,6 +34,9 @@ func TestGathering(t *testing.T) {
}
func TestGatheringMxRecord(t *testing.T) {
if testing.Short() {
t.Skip("Skipping network-dependent test in short mode.")
}
var dnsConfig = DnsQuery{
Servers: servers,
Domains: domains,
@ -48,6 +54,9 @@ func TestGatheringMxRecord(t *testing.T) {
}
func TestGatheringRootDomain(t *testing.T) {
if testing.Short() {
t.Skip("Skipping network-dependent test in short mode.")
}
var dnsConfig = DnsQuery{
Servers: servers,
Domains: []string{"."},
@ -72,6 +81,9 @@ func TestGatheringRootDomain(t *testing.T) {
}
func TestMetricContainsServerAndDomainAndRecordTypeTags(t *testing.T) {
if testing.Short() {
t.Skip("Skipping network-dependent test in short mode.")
}
var dnsConfig = DnsQuery{
Servers: servers,
Domains: domains,
@ -95,6 +107,9 @@ func TestMetricContainsServerAndDomainAndRecordTypeTags(t *testing.T) {
}
func TestGatheringTimeout(t *testing.T) {
if testing.Short() {
t.Skip("Skipping network-dependent test in short mode.")
}
var dnsConfig = DnsQuery{
Servers: servers,
Domains: domains,

View File

@ -29,10 +29,10 @@ for the stat structure can be found
Every effort was made to preserve the names based on the JSON response from the
docker API.
Note that the docker_cpu metric may appear multiple times per collection, based
on the availability of per-cpu stats on your system.
Note that the docker_container_cpu metric may appear multiple times per collection,
based on the availability of per-cpu stats on your system.
- docker_mem
- docker_container_mem
- total_pgmafault
- cache
- mapped_file
@ -66,7 +66,8 @@ on the availability of per-cpu stats on your system.
- usage
- failcnt
- limit
- docker_cpu
- container_id
- docker_container_cpu
- throttling_periods
- throttling_throttled_periods
- throttling_throttled_time
@ -75,7 +76,8 @@ on the availability of per-cpu stats on your system.
- usage_system
- usage_total
- usage_percent
- docker_net
- container_id
- docker_container_net
- rx_dropped
- rx_bytes
- rx_errors
@ -84,7 +86,8 @@ on the availability of per-cpu stats on your system.
- rx_packets
- tx_errors
- tx_bytes
- docker_blkio
- container_id
- docker_container_blkio
- io_service_bytes_recursive_async
- io_service_bytes_recursive_read
- io_service_bytes_recursive_sync
@ -125,20 +128,20 @@ on the availability of per-cpu stats on your system.
- docker_metadata
- unit=bytes
- docker_cpu specific:
- cont_id (container ID)
- cont_image (container image)
- cont_name (container name)
- docker_container_mem specific:
- container_image
- container_name
- docker_container_cpu specific:
- container_image
- container_name
- cpu
- docker_net specific:
- cont_id (container ID)
- cont_image (container image)
- cont_name (container name)
- docker_container_net specific:
- container_image
- container_name
- network
- docker_blkio specific:
- cont_id (container ID)
- cont_image (container image)
- cont_name (container name)
- docker_container_blkio specific:
- container_image
- container_name
- device
### Example Output:
@ -156,8 +159,8 @@ on the availability of per-cpu stats on your system.
> docker,unit=bytes pool_blocksize=65540i 1456926671065383978
> docker_data,unit=bytes available=24340000000i,total=107400000000i,used=14820000000i 1456926671065383978
> docker_metadata,unit=bytes available=2126999999i,total=2146999999i,used=20420000i 145692667106538
> docker_mem,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\
cont_image=spotify/kafka,cont_name=kafka \
> docker_container_mem,
container_image=spotify/kafka,container_name=kafka \
active_anon=52568064i,active_file=6926336i,cache=12038144i,fail_count=0i,\
hierarchical_memory_limit=9223372036854771712i,inactive_anon=52707328i,\
inactive_file=5111808i,limit=1044578304i,mapped_file=10301440i,\
@ -168,21 +171,21 @@ total_inactive_file=5111808i,total_mapped_file=10301440i,total_pgfault=63762i,\
total_pgmafault=0i,total_pgpgin=73355i,total_pgpgout=45736i,\
total_rss=105275392i,total_rss_huge=4194304i,total_unevictable=0i,\
total_writeback=0i,unevictable=0i,usage=117440512i,writeback=0i 1453409536840126713
> docker_cpu,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\
cont_image=spotify/kafka,cont_name=kafka,cpu=cpu-total \
> docker_container_cpu,
container_image=spotify/kafka,container_name=kafka,cpu=cpu-total \
throttling_periods=0i,throttling_throttled_periods=0i,\
throttling_throttled_time=0i,usage_in_kernelmode=440000000i,\
usage_in_usermode=2290000000i,usage_system=84795360000000i,\
usage_total=6628208865i 1453409536840126713
> docker_cpu,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\
cont_image=spotify/kafka,cont_name=kafka,cpu=cpu0 \
> docker_container_cpu,
container_image=spotify/kafka,container_name=kafka,cpu=cpu0 \
usage_total=6628208865i 1453409536840126713
> docker_net,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\
cont_image=spotify/kafka,cont_name=kafka,network=eth0 \
> docker_container_net,\
container_image=spotify/kafka,container_name=kafka,network=eth0 \
rx_bytes=7468i,rx_dropped=0i,rx_errors=0i,rx_packets=94i,tx_bytes=946i,\
tx_dropped=0i,tx_errors=0i,tx_packets=13i 1453409536840126713
> docker_blkio,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\
cont_image=spotify/kafka,cont_name=kafka,device=8:0 \
> docker_container_blkio,
container_image=spotify/kafka,container_name=kafka,device=8:0 \
io_service_bytes_recursive_async=80216064i,io_service_bytes_recursive_read=79925248i,\
io_service_bytes_recursive_sync=77824i,io_service_bytes_recursive_total=80293888i,\
io_service_bytes_recursive_write=368640i,io_serviced_recursive_async=6562i,\

View File

@ -16,6 +16,7 @@ import (
"github.com/docker/engine-api/client"
"github.com/docker/engine-api/types"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
)
@ -23,6 +24,7 @@ import (
type Docker struct {
Endpoint string
ContainerNames []string
Timeout internal.Duration
client DockerClient
}
@ -54,6 +56,8 @@ var sampleConfig = `
endpoint = "unix:///var/run/docker.sock"
## Only collect metrics for these containers, collect all if empty
container_names = []
## Timeout for docker list, info, and stats commands
timeout = "5s"
`
// Description returns input description
@ -97,7 +101,9 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
// List containers
opts := types.ContainerListOptions{}
containers, err := d.client.ContainerList(context.Background(), opts)
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
containers, err := d.client.ContainerList(ctx, opts)
if err != nil {
return err
}
@ -106,12 +112,12 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
var wg sync.WaitGroup
wg.Add(len(containers))
for _, container := range containers {
go func(c types.Container) {
defer wg.Done()
err := d.gatherContainer(c, acc)
if err != nil {
fmt.Println(err.Error())
log.Printf("Error gathering container %s stats: %s\n",
c.Names, err.Error())
}
}(container)
}
@ -126,7 +132,9 @@ func (d *Docker) gatherInfo(acc telegraf.Accumulator) error {
metadataFields := make(map[string]interface{})
now := time.Now()
// Get info from docker daemon
info, err := d.client.Info(context.Background())
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
info, err := d.client.Info(ctx)
if err != nil {
return err
}
@ -200,9 +208,8 @@ func (d *Docker) gatherContainer(
}
tags := map[string]string{
"cont_id": container.ID,
"cont_name": cname,
"cont_image": container.Image,
"container_name": cname,
"container_image": container.Image,
}
if len(d.ContainerNames) > 0 {
if !sliceContains(cname, d.ContainerNames) {
@ -210,22 +217,27 @@ func (d *Docker) gatherContainer(
}
}
r, err := d.client.ContainerStats(context.Background(), container.ID, false)
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
r, err := d.client.ContainerStats(ctx, container.ID, false)
if err != nil {
log.Printf("Error getting docker stats: %s\n", err.Error())
}
defer r.Close()
dec := json.NewDecoder(r)
if err = dec.Decode(&v); err != nil {
log.Printf("Error decoding: %s\n", err.Error())
if err == io.EOF {
return nil
}
return fmt.Errorf("Error decoding: %s", err.Error())
}
// Add labels to tags
for k, v := range container.Labels {
tags[k] = v
for k, label := range container.Labels {
tags[k] = label
}
gatherContainerStats(v, acc, tags)
gatherContainerStats(v, acc, tags, container.ID)
return nil
}
@ -234,6 +246,7 @@ func gatherContainerStats(
stat *types.StatsJSON,
acc telegraf.Accumulator,
tags map[string]string,
id string,
) {
now := stat.Read
@ -272,8 +285,9 @@ func gatherContainerStats(
"inactive_file": stat.MemoryStats.Stats["inactive_file"],
"total_pgpgin": stat.MemoryStats.Stats["total_pgpgin"],
"usage_percent": calculateMemPercent(stat),
"container_id": id,
}
acc.AddFields("docker_mem", memfields, tags, now)
acc.AddFields("docker_container_mem", memfields, tags, now)
cpufields := map[string]interface{}{
"usage_total": stat.CPUStats.CPUUsage.TotalUsage,
@ -284,15 +298,16 @@ func gatherContainerStats(
"throttling_throttled_periods": stat.CPUStats.ThrottlingData.ThrottledPeriods,
"throttling_throttled_time": stat.CPUStats.ThrottlingData.ThrottledTime,
"usage_percent": calculateCPUPercent(stat),
"container_id": id,
}
cputags := copyTags(tags)
cputags["cpu"] = "cpu-total"
acc.AddFields("docker_cpu", cpufields, cputags, now)
acc.AddFields("docker_container_cpu", cpufields, cputags, now)
for i, percpu := range stat.CPUStats.CPUUsage.PercpuUsage {
percputags := copyTags(tags)
percputags["cpu"] = fmt.Sprintf("cpu%d", i)
acc.AddFields("docker_cpu", map[string]interface{}{"usage_total": percpu}, percputags, now)
acc.AddFields("docker_container_cpu", map[string]interface{}{"usage_total": percpu}, percputags, now)
}
for network, netstats := range stat.Networks {
@ -305,11 +320,12 @@ func gatherContainerStats(
"rx_packets": netstats.RxPackets,
"tx_errors": netstats.TxErrors,
"tx_bytes": netstats.TxBytes,
"container_id": id,
}
// Create a new network tag dictionary for the "network" tag
nettags := copyTags(tags)
nettags["network"] = network
acc.AddFields("docker_net", netfields, nettags, now)
acc.AddFields("docker_container_net", netfields, nettags, now)
}
gatherBlockIOMetrics(stat, acc, tags, now)
@ -404,7 +420,7 @@ func gatherBlockIOMetrics(
for device, fields := range deviceStatMap {
iotags := copyTags(tags)
iotags["device"] = device
acc.AddFields("docker_blkio", fields, iotags, now)
acc.AddFields("docker_container_blkio", fields, iotags, now)
}
}

View File

@ -21,13 +21,12 @@ func TestDockerGatherContainerStats(t *testing.T) {
stats := testStats()
tags := map[string]string{
"cont_id": "foobarbaz",
"cont_name": "redis",
"cont_image": "redis/image",
"container_name": "redis",
"container_image": "redis/image",
}
gatherContainerStats(stats, &acc, tags)
gatherContainerStats(stats, &acc, tags, "123456789")
// test docker_net measurement
// test docker_container_net measurement
netfields := map[string]interface{}{
"rx_dropped": uint64(1),
"rx_bytes": uint64(2),
@ -37,10 +36,11 @@ func TestDockerGatherContainerStats(t *testing.T) {
"rx_packets": uint64(2),
"tx_errors": uint64(3),
"tx_bytes": uint64(4),
"container_id": "123456789",
}
nettags := copyTags(tags)
nettags["network"] = "eth0"
acc.AssertContainsTaggedFields(t, "docker_net", netfields, nettags)
acc.AssertContainsTaggedFields(t, "docker_container_net", netfields, nettags)
// test docker_blkio measurement
blkiotags := copyTags(tags)
@ -49,9 +49,9 @@ func TestDockerGatherContainerStats(t *testing.T) {
"io_service_bytes_recursive_read": uint64(100),
"io_serviced_recursive_write": uint64(101),
}
acc.AssertContainsTaggedFields(t, "docker_blkio", blkiofields, blkiotags)
acc.AssertContainsTaggedFields(t, "docker_container_blkio", blkiofields, blkiotags)
// test docker_mem measurement
// test docker_container_mem measurement
memfields := map[string]interface{}{
"max_usage": uint64(1001),
"usage": uint64(1111),
@ -87,11 +87,12 @@ func TestDockerGatherContainerStats(t *testing.T) {
"inactive_file": uint64(3),
"total_pgpgin": uint64(4),
"usage_percent": float64(55.55),
"container_id": "123456789",
}
acc.AssertContainsTaggedFields(t, "docker_mem", memfields, tags)
acc.AssertContainsTaggedFields(t, "docker_container_mem", memfields, tags)
// test docker_cpu measurement
// test docker_container_cpu measurement
cputags := copyTags(tags)
cputags["cpu"] = "cpu-total"
cpufields := map[string]interface{}{
@ -103,20 +104,21 @@ func TestDockerGatherContainerStats(t *testing.T) {
"throttling_throttled_periods": uint64(0),
"throttling_throttled_time": uint64(0),
"usage_percent": float64(400.0),
"container_id": "123456789",
}
acc.AssertContainsTaggedFields(t, "docker_cpu", cpufields, cputags)
acc.AssertContainsTaggedFields(t, "docker_container_cpu", cpufields, cputags)
cputags["cpu"] = "cpu0"
cpu0fields := map[string]interface{}{
"usage_total": uint64(1),
}
acc.AssertContainsTaggedFields(t, "docker_cpu", cpu0fields, cputags)
acc.AssertContainsTaggedFields(t, "docker_container_cpu", cpu0fields, cputags)
cputags["cpu"] = "cpu1"
cpu1fields := map[string]interface{}{
"usage_total": uint64(1002),
}
acc.AssertContainsTaggedFields(t, "docker_cpu", cpu1fields, cputags)
acc.AssertContainsTaggedFields(t, "docker_container_cpu", cpu1fields, cputags)
}
func testStats() *types.StatsJSON {
@ -367,19 +369,18 @@ func TestDockerGatherInfo(t *testing.T) {
},
)
acc.AssertContainsTaggedFields(t,
"docker_cpu",
"docker_container_cpu",
map[string]interface{}{
"usage_total": uint64(1231652),
},
map[string]string{
"cont_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173",
"cont_name": "etcd2",
"cont_image": "quay.io/coreos/etcd:v2.2.2",
"container_name": "etcd2",
"container_image": "quay.io/coreos/etcd:v2.2.2",
"cpu": "cpu3",
},
)
acc.AssertContainsTaggedFields(t,
"docker_mem",
"docker_container_mem",
map[string]interface{}{
"total_pgpgout": uint64(0),
"usage_percent": float64(0),
@ -415,11 +416,11 @@ func TestDockerGatherInfo(t *testing.T) {
"pgfault": uint64(0),
"usage": uint64(0),
"limit": uint64(18935443456),
"container_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173",
},
map[string]string{
"cont_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173",
"cont_name": "etcd2",
"cont_image": "quay.io/coreos/etcd:v2.2.2",
"container_name": "etcd2",
"container_image": "quay.io/coreos/etcd:v2.2.2",
},
)

View File

@ -6,10 +6,12 @@ import (
"os/exec"
"sync"
"syscall"
"time"
"github.com/gonuts/go-shellquote"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/plugins/parsers/nagios"
@ -19,6 +21,9 @@ const sampleConfig = `
## Commands array
commands = ["/tmp/test.sh", "/usr/bin/mycollector --foo=bar"]
## Timeout for each command to complete.
timeout = "5s"
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
@ -32,6 +37,7 @@ const sampleConfig = `
type Exec struct {
Commands []string
Command string
Timeout internal.Duration
parser parsers.Parser
@ -44,6 +50,7 @@ type Exec struct {
func NewExec() *Exec {
return &Exec{
runner: CommandRunner{},
Timeout: internal.Duration{Duration: time.Second * 5},
}
}
@ -73,7 +80,11 @@ func AddNagiosState(exitCode error, acc telegraf.Accumulator) error {
return nil
}
func (c CommandRunner) Run(e *Exec, command string, acc telegraf.Accumulator) ([]byte, error) {
func (c CommandRunner) Run(
e *Exec,
command string,
acc telegraf.Accumulator,
) ([]byte, error) {
split_cmd, err := shellquote.Split(command)
if err != nil || len(split_cmd) == 0 {
return nil, fmt.Errorf("exec: unable to parse command, %s", err)
@ -84,7 +95,7 @@ func (c CommandRunner) Run(e *Exec, command string, acc telegraf.Accumulator) ([
var out bytes.Buffer
cmd.Stdout = &out
if err := cmd.Run(); err != nil {
if err := internal.RunTimeout(cmd, e.Timeout.Duration); err != nil {
switch e.parser.(type) {
case *nagios.NagiosParser:
AddNagiosState(err, acc)

View File

@ -0,0 +1,37 @@
# filestat Input Plugin
The filestat plugin gathers metrics about file existence, size, and other stats.
### Configuration:
```toml
# Read stats about given file(s)
[[inputs.filestat]]
## Files to gather stats about.
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". See https://github.com/gobwas/glob.
files = ["/etc/telegraf/telegraf.conf", "/var/log/**.log"]
## If true, read the entire file and calculate an md5 checksum.
md5 = false
```
### Measurements & Fields:
- filestat
- exists (int, 0 | 1)
- size_bytes (int, bytes)
- md5 (optional, string)
### Tags:
- All measurements have the following tags:
- file (the path the to file, as specified in the config)
### Example Output:
```
$ telegraf -config /etc/telegraf/telegraf.conf -input-filter filestat -test
* Plugin: filestat, Collection 1
> filestat,file=/tmp/foo/bar,host=tyrion exists=0i 1461203374493128216
> filestat,file=/Users/sparrc/ws/telegraf.conf,host=tyrion exists=1i,size=47894i 1461203374493199335
```

View File

@ -0,0 +1,125 @@
package filestat
import (
"crypto/md5"
"fmt"
"io"
"os"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/globpath"
"github.com/influxdata/telegraf/plugins/inputs"
)
const sampleConfig = `
## Files to gather stats about.
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". ie:
## "/var/log/**.log" -> recursively find all .log files in /var/log
## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
## "/var/log/apache.log" -> just tail the apache log file
##
## See https://github.com/gobwas/glob for more examples
##
files = ["/var/log/**.log"]
## If true, read the entire file and calculate an md5 checksum.
md5 = false
`
type FileStat struct {
Md5 bool
Files []string
// maps full file paths to globmatch obj
globs map[string]*globpath.GlobPath
}
func NewFileStat() *FileStat {
return &FileStat{
globs: make(map[string]*globpath.GlobPath),
}
}
func (_ *FileStat) Description() string {
return "Read stats about given file(s)"
}
func (_ *FileStat) SampleConfig() string { return sampleConfig }
func (f *FileStat) Gather(acc telegraf.Accumulator) error {
var errS string
var err error
for _, filepath := range f.Files {
// Get the compiled glob object for this filepath
g, ok := f.globs[filepath]
if !ok {
if g, err = globpath.Compile(filepath); err != nil {
errS += err.Error() + " "
continue
}
f.globs[filepath] = g
}
files := g.Match()
if len(files) == 0 {
acc.AddFields("filestat",
map[string]interface{}{
"exists": int64(0),
},
map[string]string{
"file": filepath,
})
continue
}
for fileName, fileInfo := range files {
tags := map[string]string{
"file": fileName,
}
fields := map[string]interface{}{
"exists": int64(1),
"size_bytes": fileInfo.Size(),
}
if f.Md5 {
md5, err := getMd5(fileName)
if err != nil {
errS += err.Error() + " "
} else {
fields["md5_sum"] = md5
}
}
acc.AddFields("filestat", fields, tags)
}
}
if errS != "" {
return fmt.Errorf(errS)
}
return nil
}
// Read given file and calculate an md5 hash.
func getMd5(file string) (string, error) {
of, err := os.Open(file)
if err != nil {
return "", err
}
defer of.Close()
hash := md5.New()
_, err = io.Copy(hash, of)
if err != nil {
// fatal error
return "", err
}
return fmt.Sprintf("%x", hash.Sum(nil)), nil
}
func init() {
inputs.Add("filestat", func() telegraf.Input {
return NewFileStat()
})
}

View File

@ -0,0 +1,180 @@
package filestat
import (
"runtime"
"strings"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
func TestGatherNoMd5(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Files = []string{
dir + "log1.log",
dir + "log2.log",
"/non/existant/file",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
tags3 := map[string]string{
"file": "/non/existant/file",
}
fields3 := map[string]interface{}{
"exists": int64(0),
}
acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3)
}
func TestGatherExplicitFiles(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Md5 = true
fs.Files = []string{
dir + "log1.log",
dir + "log2.log",
"/non/existant/file",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
tags3 := map[string]string{
"file": "/non/existant/file",
}
fields3 := map[string]interface{}{
"exists": int64(0),
}
acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3)
}
func TestGatherGlob(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Md5 = true
fs.Files = []string{
dir + "*.log",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
}
func TestGatherSuperAsterisk(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Md5 = true
fs.Files = []string{
dir + "**",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
tags3 := map[string]string{
"file": dir + "test.conf",
}
fields3 := map[string]interface{}{
"size_bytes": int64(104),
"exists": int64(1),
"md5_sum": "5a7e9b77fa25e7bb411dbd17cf403c1f",
}
acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3)
}
func TestGetMd5(t *testing.T) {
dir := getTestdataDir()
md5, err := getMd5(dir + "test.conf")
assert.NoError(t, err)
assert.Equal(t, "5a7e9b77fa25e7bb411dbd17cf403c1f", md5)
md5, err = getMd5("/tmp/foo/bar/fooooo")
assert.Error(t, err)
}
func getTestdataDir() string {
_, filename, _, _ := runtime.Caller(1)
return strings.Replace(filename, "filestat_test.go", "testdata/", 1)
}

View File

View File

View File

@ -0,0 +1,5 @@
# this is a fake testing config file
# for testing the filestat plugin
option1 = "foo"
option2 = "bar"

View File

@ -1,10 +1,12 @@
package ipmi_sensor
import (
"bytes"
"fmt"
"os/exec"
"strings"
"time"
"github.com/influxdata/telegraf/internal"
)
type CommandRunner struct{}
@ -18,21 +20,16 @@ func (t CommandRunner) cmd(conn *Connection, args ...string) *exec.Cmd {
}
return exec.Command(path, opts...)
}
func (t CommandRunner) Run(conn *Connection, args ...string) (string, error) {
cmd := t.cmd(conn, args...)
var stdout bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
output, err := internal.CombinedOutputTimeout(cmd, time.Second*5)
if err != nil {
return "", fmt.Errorf("run %s %s: %s (%s)",
cmd.Path, strings.Join(cmd.Args, " "), stderr.String(), err)
cmd.Path, strings.Join(cmd.Args, " "), string(output), err)
}
return stdout.String(), err
return string(output), err
}

View File

@ -3,15 +3,27 @@
#### Configuration
```toml
# Read JMX metrics through Jolokia
[[inputs.jolokia]]
## This is the context root used to compose the jolokia url
context = "/jolokia/read"
context = "/jolokia"
## This specifies the mode used
# mode = "proxy"
#
## When in proxy mode this section is used to specify further
## proxy address configurations.
## Remember to change host address to fit your environment.
# [inputs.jolokia.proxy]
# host = "127.0.0.1"
# port = "8080"
## List of servers exposing jolokia read service
[[inputs.jolokia.servers]]
name = "stable"
host = "192.168.103.2"
port = "8180"
name = "as-server-01"
host = "127.0.0.1"
port = "8080"
# username = "myuser"
# password = "mypassword"
@ -21,25 +33,29 @@
## This collect all heap memory usage metrics.
[[inputs.jolokia.metrics]]
name = "heap_memory_usage"
jmx = "/java.lang:type=Memory/HeapMemoryUsage"
mbean = "java.lang:type=Memory"
attribute = "HeapMemoryUsage"
## This collect thread counts metrics.
[[inputs.jolokia.metrics]]
name = "thread_count"
jmx = "/java.lang:type=Threading/TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"
mbean = "java.lang:type=Threading"
attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"
## This collect number of class loaded/unloaded counts metrics.
[[inputs.jolokia.metrics]]
name = "class_count"
jmx = "/java.lang:type=ClassLoading/LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"
mbean = "java.lang:type=ClassLoading"
attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"
```
#### Description
The Jolokia plugin collects JVM metrics exposed as MBean's attributes through jolokia REST endpoint. All metrics
are collected for each server configured.
The Jolokia plugin collects JVM metrics exposed as MBean's attributes through
jolokia REST endpoint. All metrics are collected for each server configured.
See: https://jolokia.org/
# Measurements:
Jolokia plugin produces one measure for each metric configured, adding Server's `name`, `host` and `port` as tags.
Jolokia plugin produces one measure for each metric configured,
adding Server's `jolokia_name`, `jolokia_host` and `jolokia_port` as tags.

View File

@ -1,6 +1,7 @@
package jolokia
import (
"bytes"
"encoding/json"
"errors"
"fmt"
@ -23,7 +24,9 @@ type Server struct {
type Metric struct {
Name string
Jmx string
Mbean string
Attribute string
Path string
}
type JolokiaClient interface {
@ -41,20 +44,32 @@ func (c JolokiaClientImpl) MakeRequest(req *http.Request) (*http.Response, error
type Jolokia struct {
jClient JolokiaClient
Context string
Mode string
Servers []Server
Metrics []Metric
Proxy Server
}
func (j *Jolokia) SampleConfig() string {
return `
const sampleConfig = `
## This is the context root used to compose the jolokia url
context = "/jolokia/read"
context = "/jolokia"
## This specifies the mode used
# mode = "proxy"
#
## When in proxy mode this section is used to specify further
## proxy address configurations.
## Remember to change host address to fit your environment.
# [inputs.jolokia.proxy]
# host = "127.0.0.1"
# port = "8080"
## List of servers exposing jolokia read service
[[inputs.jolokia.servers]]
name = "stable"
host = "192.168.103.2"
port = "8180"
name = "as-server-01"
host = "127.0.0.1"
port = "8080"
# username = "myuser"
# password = "mypassword"
@ -64,30 +79,31 @@ func (j *Jolokia) SampleConfig() string {
## This collect all heap memory usage metrics.
[[inputs.jolokia.metrics]]
name = "heap_memory_usage"
jmx = "/java.lang:type=Memory/HeapMemoryUsage"
mbean = "java.lang:type=Memory"
attribute = "HeapMemoryUsage"
## This collect thread counts metrics.
[[inputs.jolokia.metrics]]
name = "thread_count"
jmx = "/java.lang:type=Threading/TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"
mbean = "java.lang:type=Threading"
attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"
## This collect number of class loaded/unloaded counts metrics.
[[inputs.jolokia.metrics]]
name = "class_count"
jmx = "/java.lang:type=ClassLoading/LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"
mbean = "java.lang:type=ClassLoading"
attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"
`
func (j *Jolokia) SampleConfig() string {
return sampleConfig
}
func (j *Jolokia) Description() string {
return "Read JMX metrics through Jolokia"
}
func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) {
// Create + send request
req, err := http.NewRequest("GET", requestUrl.String(), nil)
if err != nil {
return nil, err
}
func (j *Jolokia) doRequest(req *http.Request) (map[string]interface{}, error) {
resp, err := j.jClient.MakeRequest(req)
if err != nil {
@ -98,7 +114,7 @@ func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) {
// Process response
if resp.StatusCode != http.StatusOK {
err = fmt.Errorf("Response from url \"%s\" has status code %d (%s), expected %d (%s)",
requestUrl,
req.RequestURI,
resp.StatusCode,
http.StatusText(resp.StatusCode),
http.StatusOK,
@ -118,36 +134,116 @@ func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) {
return nil, errors.New("Error decoding JSON response")
}
if status, ok := jsonOut["status"]; ok {
if status != float64(200) {
return nil, fmt.Errorf("Not expected status value in response body: %3.f",
status)
}
} else {
return nil, fmt.Errorf("Missing status in response body")
}
return jsonOut, nil
}
func (j *Jolokia) prepareRequest(server Server, metric Metric) (*http.Request, error) {
var jolokiaUrl *url.URL
context := j.Context // Usually "/jolokia"
// Create bodyContent
bodyContent := map[string]interface{}{
"type": "read",
"mbean": metric.Mbean,
}
if metric.Attribute != "" {
bodyContent["attribute"] = metric.Attribute
if metric.Path != "" {
bodyContent["path"] = metric.Path
}
}
// Add target, only in proxy mode
if j.Mode == "proxy" {
serviceUrl := fmt.Sprintf("service:jmx:rmi:///jndi/rmi://%s:%s/jmxrmi",
server.Host, server.Port)
target := map[string]string{
"url": serviceUrl,
}
if server.Username != "" {
target["user"] = server.Username
}
if server.Password != "" {
target["password"] = server.Password
}
bodyContent["target"] = target
proxy := j.Proxy
// Prepare ProxyURL
proxyUrl, err := url.Parse("http://" + proxy.Host + ":" + proxy.Port + context)
if err != nil {
return nil, err
}
if proxy.Username != "" || proxy.Password != "" {
proxyUrl.User = url.UserPassword(proxy.Username, proxy.Password)
}
jolokiaUrl = proxyUrl
} else {
serverUrl, err := url.Parse("http://" + server.Host + ":" + server.Port + context)
if err != nil {
return nil, err
}
if server.Username != "" || server.Password != "" {
serverUrl.User = url.UserPassword(server.Username, server.Password)
}
jolokiaUrl = serverUrl
}
requestBody, err := json.Marshal(bodyContent)
req, err := http.NewRequest("POST", jolokiaUrl.String(), bytes.NewBuffer(requestBody))
if err != nil {
return nil, err
}
req.Header.Add("Content-type", "application/json")
return req, nil
}
func (j *Jolokia) Gather(acc telegraf.Accumulator) error {
context := j.Context //"/jolokia/read"
servers := j.Servers
metrics := j.Metrics
tags := make(map[string]string)
for _, server := range servers {
tags["server"] = server.Name
tags["port"] = server.Port
tags["host"] = server.Host
tags["jolokia_name"] = server.Name
tags["jolokia_port"] = server.Port
tags["jolokia_host"] = server.Host
fields := make(map[string]interface{})
for _, metric := range metrics {
measurement := metric.Name
jmxPath := metric.Jmx
// Prepare URL
requestUrl, err := url.Parse("http://" + server.Host + ":" +
server.Port + context + jmxPath)
req, err := j.prepareRequest(server, metric)
if err != nil {
return err
}
if server.Username != "" || server.Password != "" {
requestUrl.User = url.UserPassword(server.Username, server.Password)
}
out, _ := j.getAttr(requestUrl)
out, err := j.doRequest(req)
if err != nil {
fmt.Printf("Error handling response: %s\n", err)
} else {
if values, ok := out["value"]; ok {
switch t := values.(type) {
@ -159,10 +255,12 @@ func (j *Jolokia) Gather(acc telegraf.Accumulator) error {
fields[measurement] = t
}
} else {
fmt.Printf("Missing key 'value' in '%s' output response\n",
requestUrl.String())
fmt.Printf("Missing key 'value' in output response\n")
}
}
}
acc.AddFields("jolokia", fields, tags)
}

View File

@ -47,8 +47,10 @@ const invalidJSON = "I don't think this is JSON"
const empty = ""
var Servers = []Server{Server{Name: "as1", Host: "127.0.0.1", Port: "8080"}}
var HeapMetric = Metric{Name: "heap_memory_usage", Jmx: "/java.lang:type=Memory/HeapMemoryUsage"}
var UsedHeapMetric = Metric{Name: "heap_memory_usage", Jmx: "/java.lang:type=Memory/HeapMemoryUsage"}
var HeapMetric = Metric{Name: "heap_memory_usage",
Mbean: "java.lang:type=Memory", Attribute: "HeapMemoryUsage"}
var UsedHeapMetric = Metric{Name: "heap_memory_usage",
Mbean: "java.lang:type=Memory", Attribute: "HeapMemoryUsage"}
type jolokiaClientStub struct {
responseBody string
@ -94,9 +96,9 @@ func TestHttpJsonMultiValue(t *testing.T) {
"heap_memory_usage_used": 203288528.0,
}
tags := map[string]string{
"host": "127.0.0.1",
"port": "8080",
"server": "as1",
"jolokia_host": "127.0.0.1",
"jolokia_port": "8080",
"jolokia_name": "as1",
}
acc.AssertContainsTaggedFields(t, "jolokia", fields, tags)
}
@ -114,3 +116,17 @@ func TestHttpJsonOn404(t *testing.T) {
assert.Nil(t, err)
assert.Equal(t, 0, len(acc.Metrics))
}
// Test that the proper values are ignored or collected
func TestHttpInvalidJson(t *testing.T) {
jolokia := genJolokiaClientStub(invalidJSON, 200, Servers,
[]Metric{UsedHeapMetric})
var acc testutil.Accumulator
acc.SetDebug(true)
err := jolokia.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 0, len(acc.Metrics))
}

View File

@ -3,13 +3,16 @@ package leofs
import (
"bufio"
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"net/url"
"os/exec"
"strconv"
"strings"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
)
const oid = ".1.3.6.1.4.1.35450"
@ -175,14 +178,18 @@ func (l *LeoFS) Gather(acc telegraf.Accumulator) error {
return outerr
}
func (l *LeoFS) gatherServer(endpoint string, serverType ServerType, acc telegraf.Accumulator) error {
func (l *LeoFS) gatherServer(
endpoint string,
serverType ServerType,
acc telegraf.Accumulator,
) error {
cmd := exec.Command("snmpwalk", "-v2c", "-cpublic", endpoint, oid)
stdout, err := cmd.StdoutPipe()
if err != nil {
return err
}
cmd.Start()
defer cmd.Wait()
defer internal.WaitTimeout(cmd, time.Second*5)
scanner := bufio.NewScanner(stdout)
if !scanner.Scan() {
return fmt.Errorf("Unable to retrieve the node name")

View File

@ -34,9 +34,13 @@ var sampleConfig = `
##
# ost_procfiles = [
# "/proc/fs/lustre/obdfilter/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/stats"
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
# "/proc/fs/lustre/obdfilter/*/job_stats",
# ]
# mds_procfiles = [
# "/proc/fs/lustre/mdt/*/md_stats",
# "/proc/fs/lustre/mdt/*/job_stats",
# ]
# mds_procfiles = ["/proc/fs/lustre/mdt/*/md_stats"]
`
/* The wanted fields would be a []string if not for the
@ -82,6 +86,139 @@ var wanted_ost_fields = []*mapping{
},
}
var wanted_ost_jobstats_fields = []*mapping{
{ // The read line has several fields, so we need to differentiate what they are
inProc: "read",
field: 3,
reportAs: "jobstats_read_calls",
},
{
inProc: "read",
field: 7,
reportAs: "jobstats_read_min_size",
},
{
inProc: "read",
field: 9,
reportAs: "jobstats_read_max_size",
},
{
inProc: "read",
field: 11,
reportAs: "jobstats_read_bytes",
},
{ // Different inProc for newer versions
inProc: "read_bytes",
field: 3,
reportAs: "jobstats_read_calls",
},
{
inProc: "read_bytes",
field: 7,
reportAs: "jobstats_read_min_size",
},
{
inProc: "read_bytes",
field: 9,
reportAs: "jobstats_read_max_size",
},
{
inProc: "read_bytes",
field: 11,
reportAs: "jobstats_read_bytes",
},
{ // We need to do the same for the write fields
inProc: "write",
field: 3,
reportAs: "jobstats_write_calls",
},
{
inProc: "write",
field: 7,
reportAs: "jobstats_write_min_size",
},
{
inProc: "write",
field: 9,
reportAs: "jobstats_write_max_size",
},
{
inProc: "write",
field: 11,
reportAs: "jobstats_write_bytes",
},
{ // Different inProc for newer versions
inProc: "write_bytes",
field: 3,
reportAs: "jobstats_write_calls",
},
{
inProc: "write_bytes",
field: 7,
reportAs: "jobstats_write_min_size",
},
{
inProc: "write_bytes",
field: 9,
reportAs: "jobstats_write_max_size",
},
{
inProc: "write_bytes",
field: 11,
reportAs: "jobstats_write_bytes",
},
{
inProc: "getattr",
field: 3,
reportAs: "jobstats_ost_getattr",
},
{
inProc: "setattr",
field: 3,
reportAs: "jobstats_ost_setattr",
},
{
inProc: "punch",
field: 3,
reportAs: "jobstats_punch",
},
{
inProc: "sync",
field: 3,
reportAs: "jobstats_ost_sync",
},
{
inProc: "destroy",
field: 3,
reportAs: "jobstats_destroy",
},
{
inProc: "create",
field: 3,
reportAs: "jobstats_create",
},
{
inProc: "statfs",
field: 3,
reportAs: "jobstats_ost_statfs",
},
{
inProc: "get_info",
field: 3,
reportAs: "jobstats_get_info",
},
{
inProc: "set_info",
field: 3,
reportAs: "jobstats_set_info",
},
{
inProc: "quotactl",
field: 3,
reportAs: "jobstats_quotactl",
},
}
var wanted_mds_fields = []*mapping{
{
inProc: "open",
@ -133,6 +270,89 @@ var wanted_mds_fields = []*mapping{
},
}
var wanted_mdt_jobstats_fields = []*mapping{
{
inProc: "open",
field: 3,
reportAs: "jobstats_open",
},
{
inProc: "close",
field: 3,
reportAs: "jobstats_close",
},
{
inProc: "mknod",
field: 3,
reportAs: "jobstats_mknod",
},
{
inProc: "link",
field: 3,
reportAs: "jobstats_link",
},
{
inProc: "unlink",
field: 3,
reportAs: "jobstats_unlink",
},
{
inProc: "mkdir",
field: 3,
reportAs: "jobstats_mkdir",
},
{
inProc: "rmdir",
field: 3,
reportAs: "jobstats_rmdir",
},
{
inProc: "rename",
field: 3,
reportAs: "jobstats_rename",
},
{
inProc: "getattr",
field: 3,
reportAs: "jobstats_getattr",
},
{
inProc: "setattr",
field: 3,
reportAs: "jobstats_setattr",
},
{
inProc: "getxattr",
field: 3,
reportAs: "jobstats_getxattr",
},
{
inProc: "setxattr",
field: 3,
reportAs: "jobstats_setxattr",
},
{
inProc: "statfs",
field: 3,
reportAs: "jobstats_statfs",
},
{
inProc: "sync",
field: 3,
reportAs: "jobstats_sync",
},
{
inProc: "samedir_rename",
field: 3,
reportAs: "jobstats_samedir_rename",
},
{
inProc: "crossdir_rename",
field: 3,
reportAs: "jobstats_crossdir_rename",
},
}
func (l *Lustre2) GetLustreProcStats(fileglob string, wanted_fields []*mapping, acc telegraf.Accumulator) error {
files, err := filepath.Glob(fileglob)
if err != nil {
@ -143,7 +363,7 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wanted_fields []*mapping,
/* Turn /proc/fs/lustre/obdfilter/<ost_name>/stats and similar
* into just the object store target name
* Assumpion: the target name is always second to last,
* which is true in Lustre 2.1->2.5
* which is true in Lustre 2.1->2.8
*/
path := strings.Split(file, "/")
name := path[len(path)-2]
@ -161,16 +381,21 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wanted_fields []*mapping,
for _, line := range lines {
parts := strings.Fields(line)
if strings.HasPrefix(line, "- job_id:") {
// Set the job_id explicitly if present
fields["jobid"] = parts[2]
}
for _, wanted := range wanted_fields {
var data uint64
if parts[0] == wanted.inProc {
if strings.TrimSuffix(parts[0], ":") == wanted.inProc {
wanted_field := wanted.field
// if not set, assume field[1]. Shouldn't be field[0], as
// that's a string
if wanted_field == 0 {
wanted_field = 1
}
data, err = strconv.ParseUint((parts[wanted_field]), 10, 64)
data, err = strconv.ParseUint(strings.TrimSuffix((parts[wanted_field]), ","), 10, 64)
if err != nil {
return err
}
@ -213,6 +438,12 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
if err != nil {
return err
}
// per job statistics are in obdfilter/<ost_name>/job_stats
err = l.GetLustreProcStats("/proc/fs/lustre/obdfilter/*/job_stats",
wanted_ost_jobstats_fields, acc)
if err != nil {
return err
}
}
if len(l.Mds_procfiles) == 0 {
@ -222,16 +453,31 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
if err != nil {
return err
}
// Metadata target job stats
err = l.GetLustreProcStats("/proc/fs/lustre/mdt/*/job_stats",
wanted_mdt_jobstats_fields, acc)
if err != nil {
return err
}
}
for _, procfile := range l.Ost_procfiles {
err := l.GetLustreProcStats(procfile, wanted_ost_fields, acc)
ost_fields := wanted_ost_fields
if strings.HasSuffix(procfile, "job_stats") {
ost_fields = wanted_ost_jobstats_fields
}
err := l.GetLustreProcStats(procfile, ost_fields, acc)
if err != nil {
return err
}
}
for _, procfile := range l.Mds_procfiles {
err := l.GetLustreProcStats(procfile, wanted_mds_fields, acc)
mdt_fields := wanted_mds_fields
if strings.HasSuffix(procfile, "job_stats") {
mdt_fields = wanted_mdt_jobstats_fields
}
err := l.GetLustreProcStats(procfile, mdt_fields, acc)
if err != nil {
return err
}
@ -241,6 +487,12 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
tags := map[string]string{
"name": name,
}
if _, ok := fields["jobid"]; ok {
if jobid, ok := fields["jobid"].(string); ok {
tags["jobid"] = jobid
}
delete(fields, "jobid")
}
acc.AddFields("lustre2", fields, tags)
}

View File

@ -38,6 +38,23 @@ cache_hit 7393729777 samples [pages] 1 1 7393729777
cache_miss 11653333250 samples [pages] 1 1 11653333250
`
const obdfilterJobStatsContents = `job_stats:
- job_id: testjob1
snapshot_time: 1461772761
read_bytes: { samples: 1, unit: bytes, min: 4096, max: 4096, sum: 4096 }
write_bytes: { samples: 25, unit: bytes, min: 1048576, max: 1048576, sum: 26214400 }
getattr: { samples: 0, unit: reqs }
setattr: { samples: 0, unit: reqs }
punch: { samples: 1, unit: reqs }
sync: { samples: 0, unit: reqs }
destroy: { samples: 0, unit: reqs }
create: { samples: 0, unit: reqs }
statfs: { samples: 0, unit: reqs }
get_info: { samples: 0, unit: reqs }
set_info: { samples: 0, unit: reqs }
quotactl: { samples: 0, unit: reqs }
`
const mdtProcContents = `snapshot_time 1438693238.20113 secs.usecs
open 1024577037 samples [reqs]
close 873243496 samples [reqs]
@ -57,6 +74,27 @@ samedir_rename 259625 samples [reqs]
crossdir_rename 369571 samples [reqs]
`
const mdtJobStatsContents = `job_stats:
- job_id: testjob1
snapshot_time: 1461772761
open: { samples: 5, unit: reqs }
close: { samples: 4, unit: reqs }
mknod: { samples: 6, unit: reqs }
link: { samples: 8, unit: reqs }
unlink: { samples: 90, unit: reqs }
mkdir: { samples: 521, unit: reqs }
rmdir: { samples: 520, unit: reqs }
rename: { samples: 9, unit: reqs }
getattr: { samples: 11, unit: reqs }
setattr: { samples: 1, unit: reqs }
getxattr: { samples: 3, unit: reqs }
setxattr: { samples: 4, unit: reqs }
statfs: { samples: 1205, unit: reqs }
sync: { samples: 2, unit: reqs }
samedir_rename: { samples: 705, unit: reqs }
crossdir_rename: { samples: 200, unit: reqs }
`
func TestLustre2GeneratesMetrics(t *testing.T) {
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
@ -83,6 +121,7 @@ func TestLustre2GeneratesMetrics(t *testing.T) {
err = ioutil.WriteFile(obddir+"/"+ost_name+"/stats", []byte(obdfilterProcContents), 0644)
require.NoError(t, err)
// Begin by testing standard Lustre stats
m := &Lustre2{
Ost_procfiles: []string{obddir + "/*/stats", osddir + "/*/stats"},
Mds_procfiles: []string{mdtdir + "/*/md_stats"},
@ -128,3 +167,82 @@ func TestLustre2GeneratesMetrics(t *testing.T) {
err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}
func TestLustre2GeneratesJobstatsMetrics(t *testing.T) {
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
ost_name := "OST0001"
job_name := "testjob1"
mdtdir := tempdir + "/mdt/"
err := os.MkdirAll(mdtdir+"/"+ost_name, 0755)
require.NoError(t, err)
obddir := tempdir + "/obdfilter/"
err = os.MkdirAll(obddir+"/"+ost_name, 0755)
require.NoError(t, err)
err = ioutil.WriteFile(mdtdir+"/"+ost_name+"/job_stats", []byte(mdtJobStatsContents), 0644)
require.NoError(t, err)
err = ioutil.WriteFile(obddir+"/"+ost_name+"/job_stats", []byte(obdfilterJobStatsContents), 0644)
require.NoError(t, err)
// Test Lustre Jobstats
m := &Lustre2{
Ost_procfiles: []string{obddir + "/*/job_stats"},
Mds_procfiles: []string{mdtdir + "/*/job_stats"},
}
var acc testutil.Accumulator
err = m.Gather(&acc)
require.NoError(t, err)
tags := map[string]string{
"name": ost_name,
"jobid": job_name,
}
fields := map[string]interface{}{
"jobstats_read_calls": uint64(1),
"jobstats_read_min_size": uint64(4096),
"jobstats_read_max_size": uint64(4096),
"jobstats_read_bytes": uint64(4096),
"jobstats_write_calls": uint64(25),
"jobstats_write_min_size": uint64(1048576),
"jobstats_write_max_size": uint64(1048576),
"jobstats_write_bytes": uint64(26214400),
"jobstats_ost_getattr": uint64(0),
"jobstats_ost_setattr": uint64(0),
"jobstats_punch": uint64(1),
"jobstats_ost_sync": uint64(0),
"jobstats_destroy": uint64(0),
"jobstats_create": uint64(0),
"jobstats_ost_statfs": uint64(0),
"jobstats_get_info": uint64(0),
"jobstats_set_info": uint64(0),
"jobstats_quotactl": uint64(0),
"jobstats_open": uint64(5),
"jobstats_close": uint64(4),
"jobstats_mknod": uint64(6),
"jobstats_link": uint64(8),
"jobstats_unlink": uint64(90),
"jobstats_mkdir": uint64(521),
"jobstats_rmdir": uint64(520),
"jobstats_rename": uint64(9),
"jobstats_getattr": uint64(11),
"jobstats_setattr": uint64(1),
"jobstats_getxattr": uint64(3),
"jobstats_setxattr": uint64(4),
"jobstats_statfs": uint64(1205),
"jobstats_sync": uint64(2),
"jobstats_samedir_rename": uint64(705),
"jobstats_crossdir_rename": uint64(200),
}
acc.AssertContainsTaggedFields(t, "lustre2", fields, tags)
err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}

View File

@ -0,0 +1,54 @@
# Telegraf plugin: MongoDB
#### Configuration
```toml
[[inputs.mongodb]]
## An array of URI to gather stats about. Specify an ip or hostname
## with optional port add password. ie,
## mongodb://user:auth_key@10.10.3.30:27017,
## mongodb://10.10.3.33:18832,
## 10.0.0.1:10000, etc.
servers = ["127.0.0.1:27017"]
```
For authenticated mongodb istances use connection mongdb connection URI
```toml
[[inputs.mongodb]]
servers = ["mongodb://username:password@10.XX.XX.XX:27101/mydatabase?authSource=admin"]
```
This connection uri may be different based on your environement and mongodb
setup. If the user doesn't have the required privilege to execute serverStatus
command the you will get this error on telegraf
```
Error in input [mongodb]: not authorized on admin to execute command { serverStatus: 1, recordStats: 0 }
```
#### Description
The telegraf plugin collects mongodb stats exposed by serverStatus and few more
and create a single measurement containing values e.g.
* active_reads
* active_writes
* commands_per_sec
* deletes_per_sec
* flushes_per_sec
* getmores_per_sec
* inserts_per_sec
* net_in_bytes
* net_out_bytes
* open_connections
* percent_cache_dirty
* percent_cache_used
* queries_per_sec
* queued_reads
* queued_writes
* resident_megabytes
* updates_per_sec
* vsize_megabytes
* ttl_deletes_per_sec
* ttl_passes_per_sec
* repl_lag
* jumbo_chunks (only if mongos or mongo config)

View File

@ -42,6 +42,8 @@ var DefaultStats = map[string]string{
"net_in_bytes": "NetIn",
"net_out_bytes": "NetOut",
"open_connections": "NumConnections",
"ttl_deletes_per_sec": "DeletedDocuments",
"ttl_passes_per_sec": "Passes",
}
var DefaultReplStats = map[string]string{
@ -52,6 +54,11 @@ var DefaultReplStats = map[string]string{
"repl_getmores_per_sec": "GetMoreR",
"repl_commands_per_sec": "CommandR",
"member_status": "NodeType",
"repl_lag": "ReplLag",
}
var DefaultClusterStats = map[string]string{
"jumbo_chunks": "JumboChunksCount",
}
var MmapStats = map[string]string{
@ -71,6 +78,7 @@ func (d *MongodbData) AddDefaultStats() {
if d.StatLine.NodeType != "" {
d.addStat(statLine, DefaultReplStats)
}
d.addStat(statLine, DefaultClusterStats)
if d.StatLine.StorageEngine == "mmapv1" {
d.addStat(statLine, MmapStats)
} else if d.StatLine.StorageEngine == "wiredTiger" {

View File

@ -31,6 +31,8 @@ func TestAddNonReplStats(t *testing.T) {
NetIn: 0,
NetOut: 0,
NumConnections: 0,
Passes: 0,
DeletedDocuments: 0,
},
tags,
)
@ -125,9 +127,13 @@ func TestStateTag(t *testing.T) {
"repl_inserts_per_sec": int64(0),
"repl_queries_per_sec": int64(0),
"repl_updates_per_sec": int64(0),
"repl_lag": int64(0),
"resident_megabytes": int64(0),
"updates_per_sec": int64(0),
"vsize_megabytes": int64(0),
"ttl_deletes_per_sec": int64(0),
"ttl_passes_per_sec": int64(0),
"jumbo_chunks": int64(0),
}
acc.AssertContainsTaggedFields(t, "mongodb", fields, stateTags)
}

View File

@ -1,6 +1,7 @@
package mongodb
import (
"log"
"net/url"
"time"
@ -12,7 +13,7 @@ import (
type Server struct {
Url *url.URL
Session *mgo.Session
lastResult *ServerStatus
lastResult *MongoStatus
}
func (s *Server) getDefaultTags() map[string]string {
@ -24,11 +25,29 @@ func (s *Server) getDefaultTags() map[string]string {
func (s *Server) gatherData(acc telegraf.Accumulator) error {
s.Session.SetMode(mgo.Eventual, true)
s.Session.SetSocketTimeout(0)
result := &ServerStatus{}
err := s.Session.DB("admin").Run(bson.D{{"serverStatus", 1}, {"recordStats", 0}}, result)
result_server := &ServerStatus{}
err := s.Session.DB("admin").Run(bson.D{{"serverStatus", 1}, {"recordStats", 0}}, result_server)
if err != nil {
return err
}
result_repl := &ReplSetStatus{}
err = s.Session.DB("admin").Run(bson.D{{"replSetGetStatus", 1}}, result_repl)
if err != nil {
log.Println("Not gathering replica set status, member not in replica set")
}
jumbo_chunks, _ := s.Session.DB("config").C("chunks").Find(bson.M{"jumbo": true}).Count()
result_cluster := &ClusterStatus{
JumboChunksCount: int64(jumbo_chunks),
}
result := &MongoStatus{
ServerStatus: result_server,
ReplSetStatus: result_repl,
ClusterStatus: result_cluster,
}
defer func() {
s.lastResult = result
}()

View File

@ -11,6 +11,8 @@ import (
"sort"
"strings"
"time"
"gopkg.in/mgo.v2/bson"
)
const (
@ -28,8 +30,14 @@ const (
WTOnly // only active if node has wiredtiger-specific fields
)
type MongoStatus struct {
SampleTime time.Time
ServerStatus *ServerStatus
ReplSetStatus *ReplSetStatus
ClusterStatus *ClusterStatus
}
type ServerStatus struct {
SampleTime time.Time `bson:""`
Host string `bson:"host"`
Version string `bson:"version"`
Process string `bson:"process"`
@ -54,6 +62,25 @@ type ServerStatus struct {
ShardCursorType map[string]interface{} `bson:"shardCursorType"`
StorageEngine map[string]string `bson:"storageEngine"`
WiredTiger *WiredTiger `bson:"wiredTiger"`
Metrics *MetricsStats `bson:"metrics"`
}
// ClusterStatus stores information related to the whole cluster
type ClusterStatus struct {
JumboChunksCount int64
}
// ReplSetStatus stores information from replSetGetStatus
type ReplSetStatus struct {
Members []ReplSetMember `bson:"members"`
MyState int64 `bson:"myState"`
}
// ReplSetMember stores information related to a replica set member
type ReplSetMember struct {
Name string `bson:"name"`
State int64 `bson:"state"`
Optime *bson.MongoTimestamp `bson:"optime"`
}
// WiredTiger stores information related to the WiredTiger storage engine.
@ -194,6 +221,17 @@ type OpcountStats struct {
Command int64 `bson:"command"`
}
// MetricsStats stores information related to metrics
type MetricsStats struct {
TTL *TTLStats `bson:"ttl"`
}
// TTLStats stores information related to documents with a ttl index.
type TTLStats struct {
DeletedDocuments int64 `bson:"deletedDocuments"`
Passes int64 `bson:"passes"`
}
// ReadWriteLockTimes stores time spent holding read/write locks.
type ReadWriteLockTimes struct {
Read int64 `bson:"R"`
@ -332,6 +370,9 @@ type StatLine struct {
// Opcounter fields
Insert, Query, Update, Delete, GetMore, Command int64
// TTL fields
Passes, DeletedDocuments int64
// Collection locks (3.0 mmap only)
CollectionLocks *CollectionLockStatus
@ -341,6 +382,7 @@ type StatLine struct {
// Replicated Opcounter fields
InsertR, QueryR, UpdateR, DeleteR, GetMoreR, CommandR int64
ReplLag int64
Flushes int64
Mapped, Virtual, Resident, NonMapped int64
Faults int64
@ -351,6 +393,9 @@ type StatLine struct {
NumConnections int64
ReplSetName string
NodeType string
// Cluster fields
JumboChunksCount int64
}
func parseLocks(stat ServerStatus) map[string]LockUsage {
@ -395,8 +440,11 @@ func diff(newVal, oldVal, sampleTime int64) int64 {
return d / sampleTime
}
// NewStatLine constructs a StatLine object from two ServerStatus objects.
func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs int64) *StatLine {
// NewStatLine constructs a StatLine object from two MongoStatus objects.
func NewStatLine(oldMongo, newMongo MongoStatus, key string, all bool, sampleSecs int64) *StatLine {
oldStat := *oldMongo.ServerStatus
newStat := *newMongo.ServerStatus
returnVal := &StatLine{
Key: key,
Host: newStat.Host,
@ -423,6 +471,11 @@ func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs
returnVal.Command = diff(newStat.Opcounters.Command, oldStat.Opcounters.Command, sampleSecs)
}
if newStat.Metrics.TTL != nil && oldStat.Metrics.TTL != nil {
returnVal.Passes = diff(newStat.Metrics.TTL.Passes, oldStat.Metrics.TTL.Passes, sampleSecs)
returnVal.DeletedDocuments = diff(newStat.Metrics.TTL.DeletedDocuments, oldStat.Metrics.TTL.DeletedDocuments, sampleSecs)
}
if newStat.OpcountersRepl != nil && oldStat.OpcountersRepl != nil {
returnVal.InsertR = diff(newStat.OpcountersRepl.Insert, oldStat.OpcountersRepl.Insert, sampleSecs)
returnVal.QueryR = diff(newStat.OpcountersRepl.Query, oldStat.OpcountersRepl.Query, sampleSecs)
@ -442,7 +495,7 @@ func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs
returnVal.Flushes = newStat.BackgroundFlushing.Flushes - oldStat.BackgroundFlushing.Flushes
}
returnVal.Time = newStat.SampleTime
returnVal.Time = newMongo.SampleTime
returnVal.IsMongos =
(newStat.ShardCursorType != nil || strings.HasPrefix(newStat.Process, MongosProcess))
@ -587,5 +640,42 @@ func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs
returnVal.NumConnections = newStat.Connections.Current
}
newReplStat := *newMongo.ReplSetStatus
if newReplStat.Members != nil {
myName := newStat.Repl.Me
// Find the master and myself
master := ReplSetMember{}
me := ReplSetMember{}
for _, member := range newReplStat.Members {
if member.Name == myName {
if member.State == 1 {
// I'm the master
returnVal.ReplLag = 0
break
} else {
// I'm secondary
me = member
}
} else if member.State == 1 {
// Master found
master = member
}
}
if me.Optime != nil && master.Optime != nil && me.State == 2 {
// MongoTimestamp type is int64 where the first 32bits are the unix timestamp
lag := int64(*master.Optime>>32 - *me.Optime>>32)
if lag < 0 {
returnVal.ReplLag = 0
} else {
returnVal.ReplLag = lag
}
}
}
newClusterStat := *newMongo.ClusterStatus
returnVal.JumboChunksCount = newClusterStat.JumboChunksCount
return returnVal
}

View File

@ -3,6 +3,7 @@ package mqtt_consumer
import (
"fmt"
"log"
"strings"
"sync"
"time"
@ -46,6 +47,8 @@ type MQTTConsumer struct {
// keep the accumulator internally:
acc telegraf.Accumulator
started bool
}
var sampleConfig = `
@ -100,6 +103,7 @@ func (m *MQTTConsumer) SetParser(parser parsers.Parser) {
func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
m.Lock()
defer m.Unlock()
m.started = false
if m.PersistentSession && m.ClientID == "" {
return fmt.Errorf("ERROR MQTT Consumer: When using persistent_session" +
@ -124,19 +128,31 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
m.in = make(chan mqtt.Message, 1000)
m.done = make(chan struct{})
go m.receiver()
return nil
}
func (m *MQTTConsumer) onConnect(c mqtt.Client) {
log.Printf("MQTT Client Connected")
if !m.PersistentSession || !m.started {
topics := make(map[string]byte)
for _, topic := range m.Topics {
topics[topic] = byte(m.QoS)
}
subscribeToken := m.client.SubscribeMultiple(topics, m.recvMessage)
subscribeToken := c.SubscribeMultiple(topics, m.recvMessage)
subscribeToken.Wait()
if subscribeToken.Error() != nil {
return subscribeToken.Error()
log.Printf("MQTT SUBSCRIBE ERROR\ntopics: %s\nerror: %s",
strings.Join(m.Topics[:], ","), subscribeToken.Error())
}
m.started = true
}
return
}
go m.receiver()
return nil
func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) {
log.Printf("MQTT Connection lost\nerror: %s\nMQTT Client will try to reconnect", err.Error())
return
}
// receiver() reads all incoming messages from the consumer, and parses them into
@ -172,6 +188,7 @@ func (m *MQTTConsumer) Stop() {
defer m.Unlock()
close(m.done)
m.client.Disconnect(200)
m.started = false
}
func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error {
@ -219,6 +236,8 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
opts.SetAutoReconnect(true)
opts.SetKeepAlive(time.Second * 60)
opts.SetCleanSession(!m.PersistentSession)
opts.SetOnConnectHandler(m.onConnect)
opts.SetConnectionLostHandler(m.onConnectionLost)
return opts, nil
}

View File

@ -0,0 +1,190 @@
# MySQL Input plugin
This plugin gathers the statistic data from MySQL server
* Global statuses
* Global variables
* Slave statuses
* Binlog size
* Process list
* Info schema auto increment columns
* Table I/O waits
* Index I/O waits
* Perf Schema table lock waits
* Perf Schema event waits
* Perf Schema events statements
* File events statistics
* Table schema statistics
## Configuration
```
# Read metrics from one or many mysql servers
[[inputs.mysql]]
## specify servers via a url matching:
## [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify]]
## see https://github.com/go-sql-driver/mysql#dsn-data-source-name
## e.g.
## root:passwd@tcp(127.0.0.1:3306)/?tls=false
## root@tcp(127.0.0.1:3306)/?tls=false
#
## If no servers are specified, then localhost is used as the host.
servers = ["tcp(127.0.0.1:3306)/"]
## the limits for metrics form perf_events_statements
perf_events_statements_digest_text_limit = 120
perf_events_statements_limit = 250
perf_events_statements_time_limit = 86400
#
## if the list is empty, then metrics are gathered from all database tables
table_schema_databases = []
#
## gather metrics from INFORMATION_SCHEMA.TABLES for databases provided above list
gather_table_schema = false
#
## gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST
gather_process_list = true
#
## gather auto_increment columns and max values from information schema
gather_info_schema_auto_inc = true
#
## gather metrics from SHOW SLAVE STATUS command output
gather_slave_status = true
#
## gather metrics from SHOW BINARY LOGS command output
gather_binary_logs = false
#
## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_TABLE
gather_table_io_waits = false
#
## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_INDEX_USAGE
gather_index_io_waits = false
#
## gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME
gather_file_events_stats = false
#
## gather metrics from PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
gather_perf_events_statements = false
#
## Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES)
interval_slow = "30m"
```
## Measurements & Fields
* Global statuses - all numeric and boolean values of `SHOW GLOBAL STATUSES`
* Global variables - all numeric and boolean values of `SHOW GLOBAL VARIABLES`
* Slave status - metrics from `SHOW SLAVE STATUS` the metrics are gathered when
the single-source replication is on. If the multi-source replication is set,
then everything works differently, this metric does not work with multi-source
replication.
* slave_[column name]()
* Binary logs - all metrics including size and count of all binary files.
Requires to be turned on in configuration.
* binary_size_bytes(int, number)
* binary_files_count(int, number)
* Process list - connection metrics from processlist for each user. It has the following tags
* connections(int, number)
* Perf Table IO waits - total count and time of I/O waits event for each table
and process. It has following fields:
* table_io_waits_total_fetch(float, number)
* table_io_waits_total_insert(float, number)
* table_io_waits_total_update(float, number)
* table_io_waits_total_delete(float, number)
* table_io_waits_seconds_total_fetch(float, milliseconds)
* table_io_waits_seconds_total_insert(float, milliseconds)
* table_io_waits_seconds_total_update(float, milliseconds)
* table_io_waits_seconds_total_delete(float, milliseconds)
* Perf index IO waits - total count and time of I/O waits event for each index
and process. It has following fields:
* index_io_waits_total_fetch(float, number)
* index_io_waits_seconds_total_fetch(float, milliseconds)
* index_io_waits_total_insert(float, number)
* index_io_waits_total_update(float, number)
* index_io_waits_total_delete(float, number)
* index_io_waits_seconds_total_insert(float, milliseconds)
* index_io_waits_seconds_total_update(float, milliseconds)
* index_io_waits_seconds_total_delete(float, milliseconds)
* Info schema autoincrement statuses - autoincrement fields and max values
for them. It has following fields:
* auto_increment_column(int, number)
* auto_increment_column_max(int, number)
* Perf table lock waits - gathers total number and time for SQL and external
lock waits events for each table and operation. It has following fields.
The unit of fields varies by the tags.
* read_normal(float, number/milliseconds)
* read_with_shared_locks(float, number/milliseconds)
* read_high_priority(float, number/milliseconds)
* read_no_insert(float, number/milliseconds)
* write_normal(float, number/milliseconds)
* write_allow_write(float, number/milliseconds)
* write_concurrent_insert(float, number/milliseconds)
* write_delayed(float, number/milliseconds)
* write_low_priority(float, number/milliseconds)
* read(float, number/milliseconds)
* write(float, number/milliseconds)
* Perf events waits - gathers total time and number of event waits
* events_waits_total(float, number)
* events_waits_seconds_total(float, milliseconds)
* Perf file events statuses - gathers file events statuses
* file_events_total(float,number)
* file_events_seconds_total(float, milliseconds)
* file_events_bytes_total(float, bytes)
* Perf file events statements - gathers attributes of each event
* events_statements_total(float, number)
* events_statements_seconds_total(float, millieconds)
* events_statements_errors_total(float, number)
* events_statements_warnings_total(float, number)
* events_statements_rows_affected_total(float, number)
* events_statements_rows_sent_total(float, number)
* events_statements_rows_examined_total(float, number)
* events_statements_tmp_tables_total(float, number)
* events_statements_tmp_disk_tables_total(float, number)
* events_statements_sort_merge_passes_totales(float, number)
* events_statements_sort_rows_total(float, number)
* events_statements_no_index_used_total(float, number)
* Table schema - gathers statistics of each schema. It has following measurements
* info_schema_table_rows(float, number)
* info_schema_table_size_data_length(float, number)
* info_schema_table_size_index_length(float, number)
* info_schema_table_size_data_free(float, number)
* info_schema_table_version(float, number)
## Tags
* All measurements has following tags
* server (the host name from which the metrics are gathered)
* Process list measurement has following tags
* user (username for whom the metrics are gathered)
* Perf table IO waits measurement has following tags
* schema
* name (object name for event or process)
* Perf index IO waits has following tags
* schema
* name
* index
* Info schema autoincrement statuses has following tags
* schema
* table
* column
* Perf table lock waits has following tags
* schema
* table
* sql_lock_waits_total(fields including this tag have numeric unit)
* external_lock_waits_total(fields including this tag have numeric unit)
* sql_lock_waits_seconds_total(fields including this tag have millisecond unit)
* external_lock_waits_seconds_total(fields including this tag have millisecond unit)
* Perf events statements has following tags
* event_name
* Perf file events statuses has following tags
* event_name
* mode
* Perf file events statements has following tags
* schema
* digest
* digest_text
* Table schema has following tags
* schema
* table
* component
* type
* engine
* row_format
* create_options

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
package mysql
import (
"database/sql"
"fmt"
"testing"
@ -115,3 +116,47 @@ func TestMysqlDNSAddTimeout(t *testing.T) {
}
}
}
func TestParseValue(t *testing.T) {
testCases := []struct {
rawByte sql.RawBytes
value float64
boolValue bool
}{
{sql.RawBytes("Yes"), 1, true},
{sql.RawBytes("No"), 0, false},
{sql.RawBytes("ON"), 1, true},
{sql.RawBytes("OFF"), 0, false},
{sql.RawBytes("ABC"), 0, false},
}
for _, cases := range testCases {
if value, ok := parseValue(cases.rawByte); value != cases.value && ok != cases.boolValue {
t.Errorf("want %d with %t, got %d with %t", int(cases.value), cases.boolValue, int(value), ok)
}
}
}
func TestNewNamespace(t *testing.T) {
testCases := []struct {
words []string
namespace string
}{
{
[]string{"thread", "info_scheme", "query update"},
"thread_info_scheme_query_update",
},
{
[]string{"thread", "info_scheme", "query_update"},
"thread_info_scheme_query_update",
},
{
[]string{"thread", "info", "scheme", "query", "update"},
"thread_info_scheme_query_update",
},
}
for _, cases := range testCases {
if got := newNamespace(cases.words...); got != cases.namespace {
t.Errorf("want %s, got %s", cases.namespace, got)
}
}
}

View File

@ -184,6 +184,7 @@ func (g *phpfpm) gatherHttp(addr string, acc telegraf.Accumulator) error {
return fmt.Errorf("Unable to connect to phpfpm status page '%s': %v",
addr, err)
}
defer res.Body.Close()
if res.StatusCode != 200 {
return fmt.Errorf("Unable to get valid stat result from '%s': %v",

View File

@ -9,15 +9,17 @@ import (
"strconv"
"strings"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
)
// HostPinger is a function that runs the "ping" function using a list of
// passed arguments. This can be easily switched with a mocked ping function
// for unit test purposes (see ping_test.go)
type HostPinger func(args ...string) (string, error)
type HostPinger func(timeout float64, args ...string) (string, error)
type Ping struct {
// Interval at which to ping (ping -i <INTERVAL>)
@ -43,18 +45,18 @@ func (_ *Ping) Description() string {
return "Ping given url(s) and return statistics"
}
var sampleConfig = `
const sampleConfig = `
## NOTE: this plugin forks the ping command. You may need to set capabilities
## via setcap cap_net_raw+p /bin/ping
#
## urls to ping
urls = ["www.google.com"] # required
## number of pings to send (ping -c <COUNT>)
## number of pings to send per collection (ping -c <COUNT>)
count = 1 # required
## interval, in s, at which to ping. 0 == default (ping -i <PING_INTERVAL>)
ping_interval = 0.0
## ping timeout, in s. 0 == no timeout (ping -t <TIMEOUT>)
timeout = 0.0
## ping timeout, in s. 0 == no timeout (ping -W <TIMEOUT>)
timeout = 1.0
## interface to send ping from (ping -I <INTERFACE>)
interface = ""
`
@ -71,16 +73,16 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error {
// Spin off a go routine for each url to ping
for _, url := range p.Urls {
wg.Add(1)
go func(url string, acc telegraf.Accumulator) {
go func(u string) {
defer wg.Done()
args := p.args(url)
out, err := p.pingHost(args...)
args := p.args(u)
out, err := p.pingHost(p.Timeout, args...)
if err != nil {
// Combine go err + stderr output
errorChannel <- errors.New(
strings.TrimSpace(out) + ", " + err.Error())
}
tags := map[string]string{"url": url}
tags := map[string]string{"url": u}
trans, rec, avg, err := processPingOutput(out)
if err != nil {
// fatal error
@ -98,7 +100,7 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error {
fields["average_response_ms"] = avg
}
acc.AddFields("ping", fields, tags)
}(url, acc)
}(url)
}
wg.Wait()
@ -116,13 +118,14 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error {
return errors.New(strings.Join(errorStrings, "\n"))
}
func hostPinger(args ...string) (string, error) {
func hostPinger(timeout float64, args ...string) (string, error) {
bin, err := exec.LookPath("ping")
if err != nil {
return "", err
}
c := exec.Command(bin, args...)
out, err := c.CombinedOutput()
out, err := internal.CombinedOutputTimeout(c,
time.Second*time.Duration(timeout+1))
return string(out), err
}

View File

@ -124,7 +124,7 @@ func TestArgs(t *testing.T) {
"Expected: %s Actual: %s", expected, actual)
}
func mockHostPinger(args ...string) (string, error) {
func mockHostPinger(timeout float64, args ...string) (string, error) {
return linuxPingOutput, nil
}
@ -161,7 +161,7 @@ PING www.google.com (216.58.218.164) 56(84) bytes of data.
rtt min/avg/max/mdev = 35.225/44.033/51.806/5.325 ms
`
func mockLossyHostPinger(args ...string) (string, error) {
func mockLossyHostPinger(timeout float64, args ...string) (string, error) {
return lossyPingOutput, nil
}
@ -192,7 +192,7 @@ Request timeout for icmp_seq 0
2 packets transmitted, 0 packets received, 100.0% packet loss
`
func mockErrorHostPinger(args ...string) (string, error) {
func mockErrorHostPinger(timeout float64, args ...string) (string, error) {
return errorPingOutput, errors.New("No packets received")
}
@ -215,7 +215,7 @@ func TestBadPingGather(t *testing.T) {
acc.AssertContainsTaggedFields(t, "ping", fields, tags)
}
func mockFatalHostPinger(args ...string) (string, error) {
func mockFatalHostPinger(timeout float64, args ...string) (string, error) {
return fatalPingOutput, errors.New("So very bad")
}

View File

@ -4,6 +4,7 @@ import (
"bytes"
"database/sql"
"fmt"
"log"
"regexp"
"strings"
@ -15,6 +16,7 @@ import (
type Postgresql struct {
Address string
Outputaddress string
Databases []string
OrderedColumns []string
AllColumns []string
@ -25,6 +27,7 @@ type Postgresql struct {
Version int
Withdbname bool
Tagvalue string
Measurement string
}
}
@ -33,6 +36,7 @@ type query []struct {
Version int
Withdbname bool
Tagvalue string
Measurement string
}
var ignoredColumns = map[string]bool{"datid": true, "datname": true, "stats_reset": true}
@ -55,6 +59,11 @@ var sampleConfig = `
## databases are gathered.
## databases = ["app_production", "testing"]
#
# outputaddress = "db01"
## A custom name for the database that will be used as the "server" tag in the
## measurement output. If not specified, a default one generated from
## the connection address is used.
#
## Define the toml config where the sql queries are stored
## New queries can be added, if the withdbname is set to true and there is no
## databases defined in the 'databases field', the sql query is ended by a
@ -65,24 +74,28 @@ var sampleConfig = `
## because the databases variable was set to ['postgres', 'pgbench' ] and the
## withdbname was true. Be careful that if the withdbname is set to false you
## don't have to define the where clause (aka with the dbname) the tagvalue
## field is used to define custom tags (separated by comas)
## field is used to define custom tags (separated by commas)
## The optional "measurement" value can be used to override the default
## output measurement name ("postgresql").
#
## Structure :
## [[inputs.postgresql_extensible.query]]
## sqlquery string
## version string
## withdbname boolean
## tagvalue string (coma separated)
## tagvalue string (comma separated)
## measurement string
[[inputs.postgresql_extensible.query]]
sqlquery="SELECT * FROM pg_stat_database"
version=901
withdbname=false
tagvalue=""
measurement=""
[[inputs.postgresql_extensible.query]]
sqlquery="SELECT * FROM pg_stat_bgwriter"
version=901
withdbname=false
tagvalue=""
tagvalue="postgresql.stats"
`
func (p *Postgresql) SampleConfig() string {
@ -106,6 +119,7 @@ func (p *Postgresql) Gather(acc telegraf.Accumulator) error {
var db_version int
var query string
var tag_value string
var meas_name string
if p.Address == "" || p.Address == "localhost" {
p.Address = localhost
@ -131,6 +145,11 @@ func (p *Postgresql) Gather(acc telegraf.Accumulator) error {
for i := range p.Query {
sql_query = p.Query[i].Sqlquery
tag_value = p.Query[i].Tagvalue
if p.Query[i].Measurement != "" {
meas_name = p.Query[i].Measurement
} else {
meas_name = "postgresql"
}
if p.Query[i].Withdbname {
if len(p.Databases) != 0 {
@ -170,7 +189,7 @@ func (p *Postgresql) Gather(acc telegraf.Accumulator) error {
}
for rows.Next() {
err = p.accRow(rows, acc)
err = p.accRow(meas_name, rows, acc)
if err != nil {
return err
}
@ -184,9 +203,12 @@ type scanner interface {
Scan(dest ...interface{}) error
}
var passwordKVMatcher, _ = regexp.Compile("password=\\S+ ?")
var KVMatcher, _ = regexp.Compile("(password|sslcert|sslkey|sslmode|sslrootcert)=\\S+ ?")
func (p *Postgresql) SanitizedAddress() (_ string, err error) {
if p.Outputaddress != "" {
return p.Outputaddress, nil
}
var canonicalizedAddress string
if strings.HasPrefix(p.Address, "postgres://") || strings.HasPrefix(p.Address, "postgresql://") {
canonicalizedAddress, err = pq.ParseURL(p.Address)
@ -196,12 +218,12 @@ func (p *Postgresql) SanitizedAddress() (_ string, err error) {
} else {
canonicalizedAddress = p.Address
}
p.sanitizedAddress = passwordKVMatcher.ReplaceAllString(canonicalizedAddress, "")
p.sanitizedAddress = KVMatcher.ReplaceAllString(canonicalizedAddress, "")
return p.sanitizedAddress, err
}
func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error {
func (p *Postgresql) accRow(meas_name string, row scanner, acc telegraf.Accumulator) error {
var columnVars []interface{}
var dbname bytes.Buffer
@ -247,9 +269,11 @@ func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error {
var isATag int
fields := make(map[string]interface{})
for col, val := range columnMap {
if acc.Debug() {
log.Printf("postgresql_extensible: column: %s = %T: %s\n", col, *val, *val)
}
_, ignore := ignoredColumns[col]
//if !ignore && *val != "" {
if !ignore {
if !ignore && *val != nil {
isATag = 0
for tag := range p.AdditionalTags {
if col == p.AdditionalTags[tag] {
@ -267,7 +291,7 @@ func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error {
}
}
}
acc.AddFields("postgresql", fields, tags)
acc.AddFields(meas_name, fields, tags)
return nil
}

View File

@ -21,12 +21,16 @@ type Procstat struct {
Prefix string
User string
// pidmap maps a pid to a process object, so we don't recreate every gather
pidmap map[int32]*process.Process
// tagmap maps a pid to a map of tags for that pid
tagmap map[int32]map[string]string
}
func NewProcstat() *Procstat {
return &Procstat{
pidmap: make(map[int32]*process.Process),
tagmap: make(map[int32]map[string]string),
}
}
@ -61,8 +65,8 @@ func (p *Procstat) Gather(acc telegraf.Accumulator) error {
log.Printf("Error: procstat getting process, exe: [%s] pidfile: [%s] pattern: [%s] user: [%s] %s",
p.Exe, p.PidFile, p.Pattern, p.User, err.Error())
} else {
for _, proc := range p.pidmap {
p := NewSpecProcessor(p.Prefix, acc, proc)
for pid, proc := range p.pidmap {
p := NewSpecProcessor(p.Prefix, acc, proc, p.tagmap[pid])
p.pushMetrics()
}
}
@ -103,45 +107,50 @@ func (p *Procstat) getAllPids() ([]int32, error) {
var err error
if p.PidFile != "" {
pids, err = pidsFromFile(p.PidFile)
pids, err = p.pidsFromFile()
} else if p.Exe != "" {
pids, err = pidsFromExe(p.Exe)
pids, err = p.pidsFromExe()
} else if p.Pattern != "" {
pids, err = pidsFromPattern(p.Pattern)
pids, err = p.pidsFromPattern()
} else if p.User != "" {
pids, err = pidsFromUser(p.User)
pids, err = p.pidsFromUser()
} else {
err = fmt.Errorf("Either exe, pid_file or pattern has to be specified")
err = fmt.Errorf("Either exe, pid_file, user, or pattern has to be specified")
}
return pids, err
}
func pidsFromFile(file string) ([]int32, error) {
func (p *Procstat) pidsFromFile() ([]int32, error) {
var out []int32
var outerr error
pidString, err := ioutil.ReadFile(file)
pidString, err := ioutil.ReadFile(p.PidFile)
if err != nil {
outerr = fmt.Errorf("Failed to read pidfile '%s'. Error: '%s'", file, err)
outerr = fmt.Errorf("Failed to read pidfile '%s'. Error: '%s'",
p.PidFile, err)
} else {
pid, err := strconv.Atoi(strings.TrimSpace(string(pidString)))
if err != nil {
outerr = err
} else {
out = append(out, int32(pid))
p.tagmap[int32(pid)] = map[string]string{
"pidfile": p.PidFile,
"pid": strings.TrimSpace(string(pidString)),
}
}
}
return out, outerr
}
func pidsFromExe(exe string) ([]int32, error) {
func (p *Procstat) pidsFromExe() ([]int32, error) {
var out []int32
var outerr error
bin, err := exec.LookPath("pgrep")
if err != nil {
return out, fmt.Errorf("Couldn't find pgrep binary: %s", err)
}
pgrep, err := exec.Command(bin, exe).Output()
pgrep, err := exec.Command(bin, p.Exe).Output()
if err != nil {
return out, fmt.Errorf("Failed to execute %s. Error: '%s'", bin, err)
} else {
@ -150,6 +159,10 @@ func pidsFromExe(exe string) ([]int32, error) {
ipid, err := strconv.Atoi(pid)
if err == nil {
out = append(out, int32(ipid))
p.tagmap[int32(ipid)] = map[string]string{
"exe": p.Exe,
"pid": pid,
}
} else {
outerr = err
}
@ -158,14 +171,14 @@ func pidsFromExe(exe string) ([]int32, error) {
return out, outerr
}
func pidsFromPattern(pattern string) ([]int32, error) {
func (p *Procstat) pidsFromPattern() ([]int32, error) {
var out []int32
var outerr error
bin, err := exec.LookPath("pgrep")
if err != nil {
return out, fmt.Errorf("Couldn't find pgrep binary: %s", err)
}
pgrep, err := exec.Command(bin, "-f", pattern).Output()
pgrep, err := exec.Command(bin, "-f", p.Pattern).Output()
if err != nil {
return out, fmt.Errorf("Failed to execute %s. Error: '%s'", bin, err)
} else {
@ -174,6 +187,10 @@ func pidsFromPattern(pattern string) ([]int32, error) {
ipid, err := strconv.Atoi(pid)
if err == nil {
out = append(out, int32(ipid))
p.tagmap[int32(ipid)] = map[string]string{
"pattern": p.Pattern,
"pid": pid,
}
} else {
outerr = err
}
@ -182,14 +199,14 @@ func pidsFromPattern(pattern string) ([]int32, error) {
return out, outerr
}
func pidsFromUser(user string) ([]int32, error) {
func (p *Procstat) pidsFromUser() ([]int32, error) {
var out []int32
var outerr error
bin, err := exec.LookPath("pgrep")
if err != nil {
return out, fmt.Errorf("Couldn't find pgrep binary: %s", err)
}
pgrep, err := exec.Command(bin, "-u", user).Output()
pgrep, err := exec.Command(bin, "-u", p.User).Output()
if err != nil {
return out, fmt.Errorf("Failed to execute %s. Error: '%s'", bin, err)
} else {
@ -198,6 +215,10 @@ func pidsFromUser(user string) ([]int32, error) {
ipid, err := strconv.Atoi(pid)
if err == nil {
out = append(out, int32(ipid))
p.tagmap[int32(ipid)] = map[string]string{
"user": p.User,
"pid": pid,
}
} else {
outerr = err
}

View File

@ -25,6 +25,7 @@ func TestGather(t *testing.T) {
PidFile: file.Name(),
Prefix: "foo",
pidmap: make(map[int32]*process.Process),
tagmap: make(map[int32]map[string]string),
}
p.Gather(&acc)
assert.True(t, acc.HasFloatField("procstat", "foo_cpu_time_user"))

View File

@ -1,7 +1,6 @@
package procstat
import (
"fmt"
"time"
"github.com/shirou/gopsutil/process"
@ -17,28 +16,12 @@ type SpecProcessor struct {
proc *process.Process
}
func (p *SpecProcessor) add(metric string, value interface{}) {
var mname string
if p.Prefix == "" {
mname = metric
} else {
mname = p.Prefix + "_" + metric
}
p.fields[mname] = value
}
func (p *SpecProcessor) flush() {
p.acc.AddFields("procstat", p.fields, p.tags)
p.fields = make(map[string]interface{})
}
func NewSpecProcessor(
prefix string,
acc telegraf.Accumulator,
p *process.Process,
tags map[string]string,
) *SpecProcessor {
tags := make(map[string]string)
tags["pid"] = fmt.Sprintf("%v", p.Pid)
if name, err := p.Name(); err == nil {
tags["process_name"] = name
}
@ -52,90 +35,62 @@ func NewSpecProcessor(
}
func (p *SpecProcessor) pushMetrics() {
p.pushNThreadsStats()
p.pushFDStats()
p.pushCtxStats()
p.pushIOStats()
p.pushCPUStats()
p.pushMemoryStats()
p.flush()
}
var prefix string
if p.Prefix != "" {
prefix = p.Prefix + "_"
}
fields := map[string]interface{}{}
func (p *SpecProcessor) pushNThreadsStats() error {
numThreads, err := p.proc.NumThreads()
if err != nil {
return fmt.Errorf("NumThreads error: %s\n", err)
if err == nil {
fields[prefix+"num_threads"] = numThreads
}
p.add("num_threads", numThreads)
return nil
}
func (p *SpecProcessor) pushFDStats() error {
fds, err := p.proc.NumFDs()
if err != nil {
return fmt.Errorf("NumFD error: %s\n", err)
if err == nil {
fields[prefix+"num_fds"] = fds
}
p.add("num_fds", fds)
return nil
}
func (p *SpecProcessor) pushCtxStats() error {
ctx, err := p.proc.NumCtxSwitches()
if err != nil {
return fmt.Errorf("ContextSwitch error: %s\n", err)
if err == nil {
fields[prefix+"voluntary_context_switches"] = ctx.Voluntary
fields[prefix+"involuntary_context_switches"] = ctx.Involuntary
}
p.add("voluntary_context_switches", ctx.Voluntary)
p.add("involuntary_context_switches", ctx.Involuntary)
return nil
}
func (p *SpecProcessor) pushIOStats() error {
io, err := p.proc.IOCounters()
if err != nil {
return fmt.Errorf("IOCounters error: %s\n", err)
if err == nil {
fields[prefix+"read_count"] = io.ReadCount
fields[prefix+"write_count"] = io.WriteCount
fields[prefix+"read_bytes"] = io.ReadBytes
fields[prefix+"write_bytes"] = io.WriteCount
}
p.add("read_count", io.ReadCount)
p.add("write_count", io.WriteCount)
p.add("read_bytes", io.ReadBytes)
p.add("write_bytes", io.WriteCount)
return nil
}
func (p *SpecProcessor) pushCPUStats() error {
cpu_time, err := p.proc.CPUTimes()
if err != nil {
return err
if err == nil {
fields[prefix+"cpu_time_user"] = cpu_time.User
fields[prefix+"cpu_time_system"] = cpu_time.System
fields[prefix+"cpu_time_idle"] = cpu_time.Idle
fields[prefix+"cpu_time_nice"] = cpu_time.Nice
fields[prefix+"cpu_time_iowait"] = cpu_time.Iowait
fields[prefix+"cpu_time_irq"] = cpu_time.Irq
fields[prefix+"cpu_time_soft_irq"] = cpu_time.Softirq
fields[prefix+"cpu_time_steal"] = cpu_time.Steal
fields[prefix+"cpu_time_stolen"] = cpu_time.Stolen
fields[prefix+"cpu_time_guest"] = cpu_time.Guest
fields[prefix+"cpu_time_guest_nice"] = cpu_time.GuestNice
}
p.add("cpu_time_user", cpu_time.User)
p.add("cpu_time_system", cpu_time.System)
p.add("cpu_time_idle", cpu_time.Idle)
p.add("cpu_time_nice", cpu_time.Nice)
p.add("cpu_time_iowait", cpu_time.Iowait)
p.add("cpu_time_irq", cpu_time.Irq)
p.add("cpu_time_soft_irq", cpu_time.Softirq)
p.add("cpu_time_steal", cpu_time.Steal)
p.add("cpu_time_stolen", cpu_time.Stolen)
p.add("cpu_time_guest", cpu_time.Guest)
p.add("cpu_time_guest_nice", cpu_time.GuestNice)
cpu_perc, err := p.proc.CPUPercent(time.Duration(0))
if err != nil {
return err
} else if cpu_perc == 0 {
return nil
if err == nil && cpu_perc != 0 {
fields[prefix+"cpu_usage"] = cpu_perc
}
p.add("cpu_usage", cpu_perc)
return nil
}
func (p *SpecProcessor) pushMemoryStats() error {
mem, err := p.proc.MemoryInfo()
if err != nil {
return err
if err == nil {
fields[prefix+"memory_rss"] = mem.RSS
fields[prefix+"memory_vms"] = mem.VMS
fields[prefix+"memory_swap"] = mem.Swap
}
p.add("memory_rss", mem.RSS)
p.add("memory_vms", mem.VMS)
p.add("memory_swap", mem.Swap)
return nil
p.acc.AddFields("procstat", fields, p.tags)
}

View File

@ -22,7 +22,7 @@ to filter and some tags
# An array of urls to scrape metrics from.
urls = ["http://my-kube-apiserver:8080/metrics"]
# Get only metrics with "apiserver_" string is in metric name
namepass = ["apiserver_"]
namepass = ["apiserver_*"]
# Add a metric name prefix
name_prefix = "k8s_"
# Add tags to be able to make beautiful dashboards

View File

@ -97,8 +97,8 @@ func (s *Server) getServerStatus() error {
func (s *Server) getDefaultTags() map[string]string {
tags := make(map[string]string)
tags["host"] = s.Url.Host
tags["hostname"] = s.serverStatus.Network.Hostname
tags["rethinkdb_host"] = s.Url.Host
tags["rethinkdb_hostname"] = s.serverStatus.Network.Hostname
return tags
}

View File

@ -20,8 +20,8 @@ func TestGetDefaultTags(t *testing.T) {
in string
out string
}{
{"host", server.Url.Host},
{"hostname", server.serverStatus.Network.Hostname},
{"rethinkdb_host", server.Url.Host},
{"rethinkdb_hostname", server.serverStatus.Network.Hostname},
}
defaultTags := server.getDefaultTags()
for _, tt := range tagTests {

View File

@ -398,15 +398,16 @@ func (s *Snmp) Gather(acc telegraf.Accumulator) error {
// only if len(s.OidInstanceMapping) == 0
if len(OidInstanceMapping) >= 0 {
if err := host.SNMPMap(acc, s.nameToOid, s.subTableMap); err != nil {
return err
log.Printf("SNMP Mapping error for host '%s': %s", host.Address, err)
continue
}
}
// Launch Get requests
if err := host.SNMPGet(acc, s.initNode); err != nil {
return err
log.Printf("SNMP Error for host '%s': %s", host.Address, err)
}
if err := host.SNMPBulk(acc, s.initNode); err != nil {
return err
log.Printf("SNMP Error for host '%s': %s", host.Address, err)
}
}
return nil
@ -732,7 +733,11 @@ func (h *Host) HandleResponse(oids map[string]Data, result *gosnmp.SnmpPacket, a
break nextresult
}
}
if strings.HasPrefix(variable.Name, oid_key) {
// If variable.Name is the same as oid_key
// OR
// the result is SNMP table which "." comes right after oid_key.
// ex: oid_key: .1.3.6.1.2.1.2.2.1.16, variable.Name: .1.3.6.1.2.1.2.2.1.16.1
if variable.Name == oid_key || strings.HasPrefix(variable.Name, oid_key+".") {
switch variable.Type {
// handle Metrics
case gosnmp.Boolean, gosnmp.Integer, gosnmp.Counter32, gosnmp.Gauge32,

View File

@ -5,7 +5,7 @@ import (
"github.com/influxdata/telegraf/testutil"
// "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -45,7 +45,8 @@ func TestSNMPErrorGet2(t *testing.T) {
var acc testutil.Accumulator
err := s.Gather(&acc)
require.Error(t, err)
require.NoError(t, err)
assert.Equal(t, 0, len(acc.Metrics))
}
func TestSNMPErrorBulk(t *testing.T) {
@ -65,7 +66,8 @@ func TestSNMPErrorBulk(t *testing.T) {
var acc testutil.Accumulator
err := s.Gather(&acc)
require.Error(t, err)
require.NoError(t, err)
assert.Equal(t, 0, len(acc.Metrics))
}
func TestSNMPGet1(t *testing.T) {

View File

@ -1052,7 +1052,7 @@ SELECT
When 1073874176 Then IsNull(Cast(cc.cntr_value - pc.cntr_value as Money) / NullIf(cbc.cntr_value - pbc.cntr_value, 0), 0) -- Avg
When 272696320 Then IsNull(Cast(cc.cntr_value - pc.cntr_value as Money) / NullIf(cbc.cntr_value - pbc.cntr_value, 0), 0) -- Avg/sec
When 1073939712 Then cc.cntr_value - pc.cntr_value -- Base
Else cc.cntr_value End as int)
Else cc.cntr_value End as bigint)
--, currentvalue= CAST(cc.cntr_value as bigint)
FROM #CCounters cc
INNER JOIN #PCounters pc On cc.object_name = pc.object_name

View File

@ -18,10 +18,10 @@
## Percentiles to calculate for timing & histogram stats
percentiles = [90]
## convert measurement names, "." to "_" and "-" to "__"
convert_names = true
## separator to use between elements of a statsd metric
metric_separator = "_"
## Parses tags in DataDog's dogstatsd format
## Parses tags in the datadog statsd format
## http://docs.datadoghq.com/guides/dogstatsd/
parse_data_dog_tags = false
@ -39,10 +39,6 @@
## calculation of percentiles. Raising this limit increases the accuracy
## of percentiles but also increases the memory usage and cpu time.
percentile_limit = 1000
## UDP packet size for the server to listen for. This will depend on the size
## of the packets that the client is sending, which is usually 1500 bytes.
udp_packet_size = 1500
```
### Description

View File

@ -17,6 +17,7 @@ import (
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
)
@ -98,10 +99,12 @@ var sampleConfig = `
# group = true
#
#
## Options for the sadf command. The values on the left represent the sadf options and
## the values on the right their description (wich are used for grouping and prefixing metrics).
## Options for the sadf command. The values on the left represent the sadf
## options and the values on the right their description (wich are used for
## grouping and prefixing metrics).
##
## Run 'sar -h' or 'man sar' to find out the supported options for your sysstat version.
## Run 'sar -h' or 'man sar' to find out the supported options for your
## sysstat version.
[inputs.sysstat.options]
-C = "cpu"
-B = "paging"
@ -121,8 +124,9 @@ var sampleConfig = `
# "-I ALL" = "interrupts" # requires INT activity
#
#
## Device tags can be used to add additional tags for devices. For example the configuration below
## adds a tag vg with value rootvg for all metrics with sda devices.
## Device tags can be used to add additional tags for devices.
## For example the configuration below adds a tag vg with value rootvg for
## all metrics with sda devices.
# [[inputs.sysstat.device_tags.sda]]
# vg = "rootvg"
`
@ -174,24 +178,28 @@ func (s *Sysstat) Gather(acc telegraf.Accumulator) error {
return errors.New(strings.Join(errorStrings, "\n"))
}
// collect collects sysstat data with the collector utility sadc. It runs the following command:
// collect collects sysstat data with the collector utility sadc.
// It runs the following command:
// Sadc -S <Activity1> -S <Activity2> ... <collectInterval> 2 tmpFile
// The above command collects system metrics during <collectInterval> and saves it in binary form to tmpFile.
// The above command collects system metrics during <collectInterval> and
// saves it in binary form to tmpFile.
func (s *Sysstat) collect() error {
options := []string{}
for _, act := range s.Activities {
options = append(options, "-S", act)
}
s.tmpFile = path.Join("/tmp", fmt.Sprintf("sysstat-%d", time.Now().Unix()))
collectInterval := s.interval - parseInterval // collectInterval has to be smaller than the telegraf data collection interval
// collectInterval has to be smaller than the telegraf data collection interval
collectInterval := s.interval - parseInterval
if collectInterval < 0 { // If true, interval is not defined yet and Gather is run for the first time.
// If true, interval is not defined yet and Gather is run for the first time.
if collectInterval < 0 {
collectInterval = 1 // In that case we only collect for 1 second.
}
options = append(options, strconv.Itoa(collectInterval), "2", s.tmpFile)
cmd := execCommand(s.Sadc, options...)
out, err := cmd.CombinedOutput()
out, err := internal.CombinedOutputTimeout(cmd, time.Second*5)
if err != nil {
return fmt.Errorf("failed to run command %s: %s", strings.Join(cmd.Args, " "), string(out))
}
@ -279,8 +287,9 @@ func (s *Sysstat) parse(acc telegraf.Accumulator, option string, ts time.Time) e
acc.AddFields(measurement, v.fields, v.tags, ts)
}
}
if err := cmd.Wait(); err != nil {
return fmt.Errorf("command %s failed with %s", strings.Join(cmd.Args, " "), err)
if err := internal.WaitTimeout(cmd, time.Second*5); err != nil {
return fmt.Errorf("command %s failed with %s",
strings.Join(cmd.Args, " "), err)
}
return nil
}

View File

@ -4,6 +4,7 @@ import (
"bufio"
"bytes"
"fmt"
"runtime"
"github.com/shirou/gopsutil/host"
"github.com/shirou/gopsutil/load"
@ -43,6 +44,7 @@ func (_ *SystemStats) Gather(acc telegraf.Accumulator) error {
"uptime": hostinfo.Uptime,
"n_users": len(users),
"uptime_format": format_uptime(hostinfo.Uptime),
"n_cpus": runtime.NumCPU(),
}
acc.AddFields("system", fields, nil)

View File

@ -0,0 +1,46 @@
# tail Input Plugin
The tail plugin "tails" a logfile and parses each log message.
By default, the tail plugin acts like the following unix tail command:
```
tail -F --lines=0 myfile.log
```
- `-F` means that it will follow the _name_ of the given file, so
that it will be compatible with log-rotated files, and that it will retry on
inaccessible files.
- `--lines=0` means that it will start at the end of the file (unless
the `from_beginning` option is set).
see http://man7.org/linux/man-pages/man1/tail.1.html for more details.
The plugin expects messages in one of the
[Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
### Configuration:
```toml
# Stream a log file, like the tail -f command
[[inputs.tail]]
## files to tail.
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". ie:
## "/var/log/**.log" -> recursively find all .log files in /var/log
## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
## "/var/log/apache.log" -> just tail the apache log file
##
## See https://github.com/gobwas/glob for more examples
##
files = ["/var/mymetrics.out"]
## Read file from beginning.
from_beginning = false
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
```

156
plugins/inputs/tail/tail.go Normal file
View File

@ -0,0 +1,156 @@
package tail
import (
"fmt"
"log"
"sync"
"github.com/hpcloud/tail"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/globpath"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
)
type Tail struct {
Files []string
FromBeginning bool
tailers []*tail.Tail
parser parsers.Parser
wg sync.WaitGroup
acc telegraf.Accumulator
sync.Mutex
}
func NewTail() *Tail {
return &Tail{
FromBeginning: false,
}
}
const sampleConfig = `
## files to tail.
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". ie:
## "/var/log/**.log" -> recursively find all .log files in /var/log
## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
## "/var/log/apache.log" -> just tail the apache log file
##
## See https://github.com/gobwas/glob for more examples
##
files = ["/var/mymetrics.out"]
## Read file from beginning.
from_beginning = false
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
`
func (t *Tail) SampleConfig() string {
return sampleConfig
}
func (t *Tail) Description() string {
return "Stream a log file, like the tail -f command"
}
func (t *Tail) Gather(acc telegraf.Accumulator) error {
return nil
}
func (t *Tail) Start(acc telegraf.Accumulator) error {
t.Lock()
defer t.Unlock()
t.acc = acc
var seek tail.SeekInfo
if !t.FromBeginning {
seek.Whence = 2
seek.Offset = 0
}
var errS string
// Create a "tailer" for each file
for _, filepath := range t.Files {
g, err := globpath.Compile(filepath)
if err != nil {
log.Printf("ERROR Glob %s failed to compile, %s", filepath, err)
}
for file, _ := range g.Match() {
tailer, err := tail.TailFile(file,
tail.Config{
ReOpen: true,
Follow: true,
Location: &seek,
})
if err != nil {
errS += err.Error() + " "
continue
}
// create a goroutine for each "tailer"
go t.receiver(tailer)
t.tailers = append(t.tailers, tailer)
}
}
if errS != "" {
return fmt.Errorf(errS)
}
return nil
}
// this is launched as a goroutine to continuously watch a tailed logfile
// for changes, parse any incoming msgs, and add to the accumulator.
func (t *Tail) receiver(tailer *tail.Tail) {
t.wg.Add(1)
defer t.wg.Done()
var m telegraf.Metric
var err error
var line *tail.Line
for line = range tailer.Lines {
if line.Err != nil {
log.Printf("ERROR tailing file %s, Error: %s\n",
tailer.Filename, err)
continue
}
m, err = t.parser.ParseLine(line.Text)
if err == nil {
t.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
} else {
log.Printf("Malformed log line in %s: [%s], Error: %s\n",
tailer.Filename, line.Text, err)
}
}
}
func (t *Tail) Stop() {
t.Lock()
defer t.Unlock()
for _, t := range t.tailers {
err := t.Stop()
if err != nil {
log.Printf("ERROR stopping tail on file %s\n", t.Filename)
}
t.Cleanup()
}
t.wg.Wait()
}
func (t *Tail) SetParser(parser parsers.Parser) {
t.parser = parser
}
func init() {
inputs.Add("tail", func() telegraf.Input {
return NewTail()
})
}

View File

@ -0,0 +1,101 @@
package tail
import (
"io/ioutil"
"os"
"testing"
"time"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestTailFromBeginning(t *testing.T) {
tmpfile, err := ioutil.TempFile("", "")
require.NoError(t, err)
defer os.Remove(tmpfile.Name())
tt := NewTail()
tt.FromBeginning = true
tt.Files = []string{tmpfile.Name()}
p, _ := parsers.NewInfluxParser()
tt.SetParser(p)
defer tt.Stop()
defer tmpfile.Close()
acc := testutil.Accumulator{}
require.NoError(t, tt.Start(&acc))
_, err = tmpfile.WriteString("cpu,mytag=foo usage_idle=100\n")
require.NoError(t, err)
require.NoError(t, tt.Gather(&acc))
time.Sleep(time.Millisecond * 50)
acc.AssertContainsTaggedFields(t, "cpu",
map[string]interface{}{
"usage_idle": float64(100),
},
map[string]string{
"mytag": "foo",
})
}
func TestTailFromEnd(t *testing.T) {
tmpfile, err := ioutil.TempFile("", "")
require.NoError(t, err)
defer os.Remove(tmpfile.Name())
_, err = tmpfile.WriteString("cpu,mytag=foo usage_idle=100\n")
require.NoError(t, err)
tt := NewTail()
tt.Files = []string{tmpfile.Name()}
p, _ := parsers.NewInfluxParser()
tt.SetParser(p)
defer tt.Stop()
defer tmpfile.Close()
acc := testutil.Accumulator{}
require.NoError(t, tt.Start(&acc))
time.Sleep(time.Millisecond * 100)
_, err = tmpfile.WriteString("cpu,othertag=foo usage_idle=100\n")
require.NoError(t, err)
require.NoError(t, tt.Gather(&acc))
time.Sleep(time.Millisecond * 50)
acc.AssertContainsTaggedFields(t, "cpu",
map[string]interface{}{
"usage_idle": float64(100),
},
map[string]string{
"othertag": "foo",
})
assert.Len(t, acc.Metrics, 1)
}
func TestTailBadLine(t *testing.T) {
tmpfile, err := ioutil.TempFile("", "")
require.NoError(t, err)
defer os.Remove(tmpfile.Name())
tt := NewTail()
tt.FromBeginning = true
tt.Files = []string{tmpfile.Name()}
p, _ := parsers.NewInfluxParser()
tt.SetParser(p)
defer tt.Stop()
defer tmpfile.Close()
acc := testutil.Accumulator{}
require.NoError(t, tt.Start(&acc))
_, err = tmpfile.WriteString("cpu mytag= foo usage_idle= 100\n")
require.NoError(t, err)
require.NoError(t, tt.Gather(&acc))
time.Sleep(time.Millisecond * 50)
assert.Len(t, acc.Metrics, 0)
}

View File

@ -23,7 +23,6 @@ This is a sample configuration for the plugin.
max_tcp_connections = 250
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md

View File

@ -150,8 +150,7 @@ func (t *TcpListener) tcpListen() error {
if err != nil {
return err
}
log.Printf("Received TCP Connection from %s", conn.RemoteAddr())
// log.Printf("Received TCP Connection from %s", conn.RemoteAddr())
select {
case <-t.accept:
@ -187,7 +186,7 @@ func (t *TcpListener) handler(conn *net.TCPConn, id string) {
defer func() {
t.wg.Done()
conn.Close()
log.Printf("Closed TCP Connection from %s", conn.RemoteAddr())
// log.Printf("Closed TCP Connection from %s", conn.RemoteAddr())
// Add one connection potential back to channel when this one closes
t.accept <- true
t.forget(id)
@ -222,7 +221,10 @@ func (t *TcpListener) handler(conn *net.TCPConn, id string) {
// tcpParser parses the incoming tcp byte packets
func (t *TcpListener) tcpParser() error {
defer t.wg.Done()
var packet []byte
var metrics []telegraf.Metric
var err error
for {
select {
case <-t.done:
@ -231,7 +233,7 @@ func (t *TcpListener) tcpParser() error {
if len(packet) == 0 {
continue
}
metrics, err := t.parser.Parse(packet)
metrics, err = t.parser.Parse(packet)
if err == nil {
t.storeMetrics(metrics)
} else {

View File

@ -18,13 +18,7 @@ This is a sample configuration for the plugin.
## UDP listener will start dropping packets.
allowed_pending_messages = 10000
## UDP packet size for the server to listen for. This will depend
## on the size of the packets that the client is sending, which is
## usually 1500 bytes.
udp_packet_size = 1500
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md

View File

@ -135,12 +135,14 @@ func (u *UdpListener) udpParser() error {
defer u.wg.Done()
var packet []byte
var metrics []telegraf.Metric
var err error
for {
select {
case <-u.done:
return nil
case packet = <-u.in:
metrics, err := u.parser.Parse(packet)
metrics, err = u.parser.Parse(packet)
if err == nil {
u.storeMetrics(metrics)
} else {

View File

@ -156,6 +156,15 @@ if any of the combinations of ObjectName/Instances/Counters are invalid.
Instances = ["------"] # Use 6 x - to remove the Instance bit from the query.
Measurement = "win_mem"
#IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
[[inputs.win_perf_counters.object]]
# more counters for the Network Interface Object can be found at
# https://msdn.microsoft.com/en-us/library/ms803962.aspx
ObjectName = "Network Interface"
Counters = ["Bytes Received/sec","Bytes Sent/sec","Packets Received/sec","Packets Sent/sec"]
Instances = ["*"] # Use 6 x - to remove the Instance bit from the query.
Measurement = "win_net"
#IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
```
### Active Directory Domain Controller

View File

@ -107,6 +107,8 @@ type item struct {
counterHandle win.PDH_HCOUNTER
}
var sanitizedChars = strings.NewReplacer("/sec", "_persec", "/Sec", "_persec", " ", "_")
func (m *Win_PerfCounters) AddItem(metrics *itemList, query string, objectName string, counter string, instance string,
measurement string, include_total bool) {
@ -297,7 +299,7 @@ func (m *Win_PerfCounters) Gather(acc telegraf.Accumulator) error {
tags["instance"] = s
}
tags["objectname"] = metric.objectName
fields[string(metric.counter)] = float32(c.FmtValue.DoubleValue)
fields[sanitizedChars.Replace(string(metric.counter))] = float32(c.FmtValue.DoubleValue)
var measurement string
if metric.measurement == "" {

View File

@ -9,8 +9,6 @@ import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds"
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/cloudwatch"
@ -19,8 +17,10 @@ import (
)
type CloudWatch struct {
Region string // AWS Region
Namespace string // CloudWatch Metrics Namespace
Region string `toml:"region"` // AWS Region
AccessKey string `toml:"access_key"` // Explicit AWS Access Key ID
SecretKey string `toml:"secret_key"` // Explicit AWS Secret Access Key
Namespace string `toml:"namespace"` // CloudWatch Metrics Namespace
svc *cloudwatch.CloudWatch
}
@ -28,6 +28,15 @@ var sampleConfig = `
## Amazon REGION
region = 'us-east-1'
## Amazon Credentials
## Credentials are loaded in the following order
## 1) explicit credentials from 'access_key' and 'secret_key'
## 2) environment variables
## 3) shared credentials file
## 4) EC2 Instance Profile
#access_key = ""
#secret_key = ""
## Namespace for the CloudWatch MetricDatums
namespace = 'InfluxData/Telegraf'
`
@ -43,12 +52,9 @@ func (c *CloudWatch) Description() string {
func (c *CloudWatch) Connect() error {
Config := &aws.Config{
Region: aws.String(c.Region),
Credentials: credentials.NewChainCredentials(
[]credentials.Provider{
&ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(session.New())},
&credentials.EnvProvider{},
&credentials.SharedCredentialsProvider{},
}),
}
if c.AccessKey != "" || c.SecretKey != "" {
Config.Credentials = credentials.NewStaticCredentials(c.AccessKey, c.SecretKey, "")
}
svc := cloudwatch.New(session.New(Config))

View File

@ -8,7 +8,6 @@ import (
"net/http"
"net/url"
"sort"
"strings"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
@ -71,21 +70,22 @@ func (d *Datadog) Write(metrics []telegraf.Metric) error {
metricCounter := 0
for _, m := range metrics {
mname := strings.Replace(m.Name(), "_", ".", -1)
if dogMs, err := buildMetrics(m); err == nil {
for fieldName, dogM := range dogMs {
// name of the datadog measurement
var dname string
if fieldName == "value" {
// adding .value seems redundant here
dname = mname
dname = m.Name()
} else {
dname = mname + "." + strings.Replace(fieldName, "_", ".", -1)
dname = m.Name() + "." + fieldName
}
var host string
host, _ = m.Tags()["host"]
metric := &Metric{
Metric: dname,
Tags: buildTags(m.Tags()),
Host: m.Tags()["host"],
Host: host,
}
metric.Points[0] = dogM
tempSeries = append(tempSeries, metric)

View File

@ -125,13 +125,9 @@ func (i *InfluxDB) Connect() error {
return err
}
// Create Database if it doesn't exist
_, e := c.Query(client.Query{
Command: fmt.Sprintf("CREATE DATABASE IF NOT EXISTS \"%s\"", i.Database),
})
if e != nil {
log.Println("Database creation failed: " + e.Error())
err = createDatabase(c, i.Database)
if err != nil {
log.Println("Database creation failed: " + err.Error())
continue
}
@ -144,8 +140,24 @@ func (i *InfluxDB) Connect() error {
return nil
}
func createDatabase(c client.Client, database string) error {
// Create Database if it doesn't exist
_, err := c.Query(client.Query{
Command: fmt.Sprintf("CREATE DATABASE IF NOT EXISTS \"%s\"", database),
})
return err
}
func (i *InfluxDB) Close() error {
// InfluxDB client does not provide a Close() function
var errS string
for j, _ := range i.conns {
if err := i.conns[j].Close(); err != nil {
errS += err.Error()
}
}
if errS != "" {
return fmt.Errorf("output influxdb close failed: %s", errS)
}
return nil
}
@ -185,18 +197,21 @@ func (i *InfluxDB) Write(metrics []telegraf.Metric) error {
p := rand.Perm(len(i.conns))
for _, n := range p {
if e := i.conns[n].Write(bp); e != nil {
log.Println("ERROR: " + e.Error())
// Log write failure
log.Printf("ERROR: %s", e)
// If the database was not found, try to recreate it
if strings.Contains(e.Error(), "database not found") {
if errc := createDatabase(i.conns[n], i.Database); errc != nil {
log.Printf("ERROR: Database %s not found and failed to recreate\n",
i.Database)
}
}
} else {
err = nil
break
}
}
// If all of the writes failed, create a new connection array so that
// i.Connect() will be called on the next gather.
if err != nil {
i.conns = make([]client.Client, 0)
}
return err
}

View File

@ -181,6 +181,9 @@ func (k *Kafka) Write(metrics []telegraf.Metric) error {
func init() {
outputs.Add("kafka", func() telegraf.Output {
return &Kafka{}
return &Kafka{
MaxRetry: 3,
RequiredAcks: -1,
}
})
}

View File

@ -9,8 +9,6 @@ import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds"
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/kinesis"
@ -20,6 +18,8 @@ import (
type KinesisOutput struct {
Region string `toml:"region"`
AccessKey string `toml:"access_key"`
SecretKey string `toml:"secret_key"`
StreamName string `toml:"streamname"`
PartitionKey string `toml:"partitionkey"`
Format string `toml:"format"`
@ -30,6 +30,16 @@ type KinesisOutput struct {
var sampleConfig = `
## Amazon REGION of kinesis endpoint.
region = "ap-southeast-2"
## Amazon Credentials
## Credentials are loaded in the following order
## 1) explicit credentials from 'access_key' and 'secret_key'
## 2) environment variables
## 3) shared credentials file
## 4) EC2 Instance Profile
#access_key = ""
#secret_key = ""
## Kinesis StreamName must exist prior to starting telegraf.
streamname = "StreamName"
## PartitionKey as used for sharding data.
@ -67,12 +77,9 @@ func (k *KinesisOutput) Connect() error {
}
Config := &aws.Config{
Region: aws.String(k.Region),
Credentials: credentials.NewChainCredentials(
[]credentials.Provider{
&ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(session.New())},
&credentials.EnvProvider{},
&credentials.SharedCredentialsProvider{},
}),
}
if k.AccessKey != "" || k.SecretKey != "" {
Config.Credentials = credentials.NewStaticCredentials(k.AccessKey, k.SecretKey, "")
}
svc := kinesis.New(session.New(Config))

View File

@ -21,6 +21,9 @@ type OpenTSDB struct {
Debug bool
}
var sanitizedChars = strings.NewReplacer("@", "-", "*", "-", " ", "_",
`%`, "-", "#", "-", "$", "-")
var sampleConfig = `
## prefix for metrics keys
prefix = "my.specific.prefix."
@ -94,8 +97,8 @@ func buildTags(mTags map[string]string) []string {
tags := make([]string, len(mTags))
index := 0
for k, v := range mTags {
tags[index] = fmt.Sprintf("%s=%s", k, v)
index += 1
tags[index] = sanitizedChars.Replace(fmt.Sprintf("%s=%s", k, v))
index++
}
sort.Strings(tags)
return tags
@ -105,7 +108,8 @@ func buildMetrics(m telegraf.Metric, now time.Time, prefix string) []*MetricLine
ret := []*MetricLine{}
for fieldName, value := range m.Fields() {
metric := &MetricLine{
Metric: fmt.Sprintf("%s%s_%s", prefix, m.Name(), fieldName),
Metric: sanitizedChars.Replace(fmt.Sprintf("%s%s_%s",
prefix, m.Name(), fieldName)),
Timestamp: now.Unix(),
}

View File

@ -25,6 +25,10 @@ func TestBuildTagsTelnet(t *testing.T) {
map[string]string{"one": "two", "aaa": "bbb"},
[]string{"aaa=bbb", "one=two"},
},
{
map[string]string{"Sp%ci@l Chars": "g$t repl#ced"},
[]string{"Sp-ci-l_Chars=g-t_repl-ced"},
},
{
map[string]string{},
[]string{},
@ -59,13 +63,19 @@ func TestWrite(t *testing.T) {
// Verify postive and negative test cases of writing data
metrics := testutil.MockMetrics()
metrics = append(metrics, testutil.TestMetric(float64(1.0), "justametric.float"))
metrics = append(metrics, testutil.TestMetric(int64(123456789), "justametric.int"))
metrics = append(metrics, testutil.TestMetric(uint64(123456789012345), "justametric.uint"))
metrics = append(metrics, testutil.TestMetric("Lorem Ipsum", "justametric.string"))
metrics = append(metrics, testutil.TestMetric(float64(42.0), "justametric.anotherfloat"))
metrics = append(metrics, testutil.TestMetric(float64(1.0),
"justametric.float"))
metrics = append(metrics, testutil.TestMetric(int64(123456789),
"justametric.int"))
metrics = append(metrics, testutil.TestMetric(uint64(123456789012345),
"justametric.uint"))
metrics = append(metrics, testutil.TestMetric("Lorem Ipsum",
"justametric.string"))
metrics = append(metrics, testutil.TestMetric(float64(42.0),
"justametric.anotherfloat"))
metrics = append(metrics, testutil.TestMetric(float64(42.0),
"metric w/ specialchars"))
err = o.Write(metrics)
require.NoError(t, err)
}

View File

@ -1,7 +1,6 @@
package riemann
import (
"errors"
"fmt"
"os"
"sort"
@ -33,6 +32,7 @@ func (r *Riemann) Connect() error {
c, err := raidman.Dial(r.Transport, r.URL)
if err != nil {
r.client = nil
return err
}
@ -41,7 +41,11 @@ func (r *Riemann) Connect() error {
}
func (r *Riemann) Close() error {
if r.client == nil {
return nil
}
r.client.Close()
r.client = nil
return nil
}
@ -58,6 +62,13 @@ func (r *Riemann) Write(metrics []telegraf.Metric) error {
return nil
}
if r.client == nil {
err := r.Connect()
if err != nil {
return fmt.Errorf("FAILED to (re)connect to Riemann. Error: %s\n", err)
}
}
var events []*raidman.Event
for _, p := range metrics {
evs := buildEvents(p, r.Separator)
@ -68,8 +79,9 @@ func (r *Riemann) Write(metrics []telegraf.Metric) error {
var senderr = r.client.SendMulti(events)
if senderr != nil {
return errors.New(fmt.Sprintf("FAILED to send riemann message: %s\n",
senderr))
r.Close() // always retuns nil
return fmt.Errorf("FAILED to send riemann message (will try to reconnect). Error: %s\n",
senderr)
}
return nil

View File

@ -17,7 +17,7 @@ type GraphiteSerializer struct {
Template string
}
var sanitizedChars = strings.NewReplacer("/", "-", "@", "-", " ", "_", "..", ".")
var sanitizedChars = strings.NewReplacer("/", "-", "@", "-", "*", "-", " ", "_", "..", ".")
func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]string, error) {
out := []string{}

View File

@ -12,7 +12,7 @@ function install_init {
function install_systemd {
cp -f $SCRIPT_DIR/telegraf.service /lib/systemd/system/telegraf.service
systemctl enable telegraf
systemctl enable telegraf || true
systemctl daemon-reload || true
}
@ -26,7 +26,7 @@ function install_chkconfig {
id telegraf &>/dev/null
if [[ $? -ne 0 ]]; then
useradd --system -U -M telegraf -s /bin/false -d /etc/telegraf
useradd -r -K USERGROUPS_ENAB=yes -M telegraf -s /bin/false -d /etc/telegraf
fi
test -d $LOG_DIR || mkdir -p $LOG_DIR
@ -64,7 +64,7 @@ elif [[ -f /etc/debian_version ]]; then
which systemctl &>/dev/null
if [[ $? -eq 0 ]]; then
install_systemd
systemctl restart telegraf
systemctl restart telegraf || echo "WARNING: systemd not running."
else
# Assuming sysv
install_init