diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1520c7aa0..b59da651a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -11,6 +11,8 @@ Erase the other section and everything on and above this line. ## Bug report +### Relevant telegraf.conf: + ### System info: [Include Telegraf version, operating system name, and other relevant details] diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 19bb38765..2e838a8e4 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,5 +1,5 @@ ### Required for all PRs: -- [ ] CHANGELOG.md updated +- [ ] CHANGELOG.md updated (we recommend not updating this until the PR has been approved by a maintainer) - [ ] Sign [CLA](https://influxdata.com/community/cla/) (if not already signed) - [ ] README.md updated (if adding a new plugin) diff --git a/.gitignore b/.gitignore index 7d27d694e..8269337df 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +build tivan .vagrant /telegraf diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e6fc7ac8..5bdd04142 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,31 +2,80 @@ ### Release Notes +**Breaking Change** The SNMP plugin is being deprecated in it's current form. +There is a [new SNMP plugin](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/snmp) +which fixes many of the issues and confusions +of it's predecessor. For users wanting to continue to use the deprecated SNMP +plugin, you will need to change your config file from `[[inputs.snmp]]` to +`[[inputs.snmp_legacy]]`. The configuration of the new SNMP plugin is _not_ +backwards-compatible. + +- Telegraf now supports being installed as an official windows service, +which can be installed via +`> C:\Program Files\Telegraf\telegraf.exe --service install` + +**Breaking Change**: Aerospike main server node measurements have been renamed +aerospike_node. Aerospike namespace measurements have been renamed to +aerospike_namespace. They will also now be tagged with the node_name +that they correspond to. This has been done to differentiate measurements +that pertain to node vs. namespace statistics. + +**Breaking Change**: users of github_webhooks must change to the new +`[[inputs.webhooks]]` plugin. + +This means that the default github_webhooks config: + +``` +# A Github Webhook Event collector +[[inputs.github_webhooks]] + ## Address and port to host Webhook listener on + service_address = ":1618" +``` + +should now look like: + +``` +# A Webhooks Event collector +[[inputs.webhooks]] + ## Address and port to host Webhook listener on + service_address = ":1618" + + [inputs.webhooks.github] + path = "/" +``` + - `flush_jitter` behavior has been changed. The random jitter will now be evaluated at every flush interval, rather than once at startup. This makes it consistent with the behavior of `collection_jitter`. -- All AWS plugins now utilize a standard mechanism for evaluating credentials. -This allows all AWS plugins to support environment variables, shared credential -files & profiles, and role assumptions. See the specific plugin README for -details. - -- The AWS CloudWatch input plugin can now declare a wildcard value for a metric -dimension. This causes the plugin to read all metrics that contain the specified -dimension key regardless of value. This is used to export collections of metrics -without having to know the dimension values ahead of time. - -- The AWS CloudWatch input plugin can now be configured with the `cache_ttl` -attribute. This configures the TTL of the internal metric cache. This is useful -in conjunction with wildcard dimension values as it will control the amount of -time before a new metric is included by the plugin. - ### Features + +- [#1413](https://github.com/influxdata/telegraf/issues/1413): Separate container_version from container_image tag. +- [#1525](https://github.com/influxdata/telegraf/pull/1525): Support setting per-device and total metrics for Docker network and blockio. +- [#1466](https://github.com/influxdata/telegraf/pull/1466): MongoDB input plugin: adding per DB stats from db.stats() +- [#1503](https://github.com/influxdata/telegraf/pull/1503): Add tls support for certs to RabbitMQ input plugin +- [#1289](https://github.com/influxdata/telegraf/pull/1289): webhooks input plugin. Thanks @francois2metz and @cduez! +- [#1247](https://github.com/influxdata/telegraf/pull/1247): rollbar webhook plugin. +- [#1408](https://github.com/influxdata/telegraf/pull/1408): mandrill webhook plugin. +- [#1402](https://github.com/influxdata/telegraf/pull/1402): docker-machine/boot2docker no longer required for unit tests. +- [#1350](https://github.com/influxdata/telegraf/pull/1350): cgroup input plugin. +- [#1369](https://github.com/influxdata/telegraf/pull/1369): Add input plugin for consuming metrics from NSQD. +- [#1369](https://github.com/influxdata/telegraf/pull/1480): add ability to read redis from a socket. +- [#1387](https://github.com/influxdata/telegraf/pull/1387): **Breaking Change** - Redis `role` tag renamed to `replication_role` to avoid global_tags override +- [#1437](https://github.com/influxdata/telegraf/pull/1437): Fetching Galera status metrics in MySQL +- [#1500](https://github.com/influxdata/telegraf/pull/1500): Aerospike plugin refactored to use official client lib. +- [#1434](https://github.com/influxdata/telegraf/pull/1434): Add measurement name arg to logparser plugin. +- [#1479](https://github.com/influxdata/telegraf/pull/1479): logparser: change resp_code from a field to a tag. +- [#1411](https://github.com/influxdata/telegraf/pull/1411): Implement support for fetching hddtemp data +- [#1340](https://github.com/influxdata/telegraf/issues/1340): statsd: do not log every dropped metric. +- [#1368](https://github.com/influxdata/telegraf/pull/1368): Add precision rounding to all metrics on collection. +- [#1390](https://github.com/influxdata/telegraf/pull/1390): Add support for Tengine +- [#1320](https://github.com/influxdata/telegraf/pull/1320): Logparser input plugin for parsing grok-style log patterns. +- [#1397](https://github.com/influxdata/telegraf/issues/1397): ElasticSearch: now supports connecting to ElasticSearch via SSL - [#1262](https://github.com/influxdata/telegraf/pull/1261): Add graylog input pluging. - [#1294](https://github.com/influxdata/telegraf/pull/1294): consul input plugin. Thanks @harnash - [#1164](https://github.com/influxdata/telegraf/pull/1164): conntrack input plugin. Thanks @robinpercy! - [#1165](https://github.com/influxdata/telegraf/pull/1165): vmstat input plugin. Thanks @jshim-xm! -- [#1247](https://github.com/influxdata/telegraf/pull/1247): rollbar input plugin. Thanks @francois2metz and @cduez! - [#1208](https://github.com/influxdata/telegraf/pull/1208): Standardized AWS credentials evaluation & wildcard CloudWatch dimensions. Thanks @johnrengelman! - [#1264](https://github.com/influxdata/telegraf/pull/1264): Add SSL config options to http_response plugin. - [#1272](https://github.com/influxdata/telegraf/pull/1272): graphite parser: add ability to specify multiple tag keys, for consistency with influxdb parser. @@ -38,9 +87,44 @@ time before a new metric is included by the plugin. - [#1278](https://github.com/influxdata/telegraf/pull/1278) & [#1288](https://github.com/influxdata/telegraf/pull/1288) & [#1295](https://github.com/influxdata/telegraf/pull/1295): RabbitMQ/Apache/InfluxDB inputs: made url(s) parameter optional by using reasonable input defaults if not specified - [#1296](https://github.com/influxdata/telegraf/issues/1296): Refactor of flush_jitter argument. - [#1213](https://github.com/influxdata/telegraf/issues/1213): Add inactive & active memory to mem plugin. +- [#1543](https://github.com/influxdata/telegraf/pull/1543): Official Windows service. +- [#1414](https://github.com/influxdata/telegraf/pull/1414): Forking sensors command to remove C package dependency. ### Bugfixes +- [#1619](https://github.com/influxdata/telegraf/issues/1619): Fix `make windows` build target +- [#1519](https://github.com/influxdata/telegraf/pull/1519): Fix error race conditions and partial failures. +- [#1477](https://github.com/influxdata/telegraf/issues/1477): nstat: fix inaccurate config panic. +- [#1481](https://github.com/influxdata/telegraf/issues/1481): jolokia: fix handling multiple multi-dimensional attributes. +- [#1430](https://github.com/influxdata/telegraf/issues/1430): Fix prometheus character sanitizing. Sanitize more win_perf_counters characters. +- [#1534](https://github.com/influxdata/telegraf/pull/1534): Add diskio io_time to FreeBSD & report timing metrics as ms (as linux does). +- [#1379](https://github.com/influxdata/telegraf/issues/1379): Fix covering Amazon Linux for post remove flow. +- [#1584](https://github.com/influxdata/telegraf/issues/1584): procstat missing fields: read/write bytes & count +- [#1472](https://github.com/influxdata/telegraf/pull/1472): diskio input plugin: set 'skip_serial_number = true' by default to avoid high cardinality. +- [#1426](https://github.com/influxdata/telegraf/pull/1426): nil metrics panic fix. +- [#1384](https://github.com/influxdata/telegraf/pull/1384): Fix datarace in apache input plugin. +- [#1399](https://github.com/influxdata/telegraf/issues/1399): Add `read_repairs` statistics to riak plugin. +- [#1405](https://github.com/influxdata/telegraf/issues/1405): Fix memory/connection leak in prometheus input plugin. +- [#1378](https://github.com/influxdata/telegraf/issues/1378): Trim BOM from config file for Windows support. +- [#1339](https://github.com/influxdata/telegraf/issues/1339): Prometheus client output panic on service reload. +- [#1461](https://github.com/influxdata/telegraf/pull/1461): Prometheus parser, protobuf format header fix. +- [#1334](https://github.com/influxdata/telegraf/issues/1334): Prometheus output, metric refresh and caching fixes. +- [#1432](https://github.com/influxdata/telegraf/issues/1432): Panic fix for multiple graphite outputs under very high load. +- [#1412](https://github.com/influxdata/telegraf/pull/1412): Instrumental output has better reconnect behavior +- [#1460](https://github.com/influxdata/telegraf/issues/1460): Remove PID from procstat plugin to fix cardinality issues. +- [#1427](https://github.com/influxdata/telegraf/issues/1427): Cassandra input: version 2.x "column family" fix. +- [#1463](https://github.com/influxdata/telegraf/issues/1463): Shared WaitGroup in Exec plugin +- [#1436](https://github.com/influxdata/telegraf/issues/1436): logparser: honor modifiers in "pattern" config. +- [#1418](https://github.com/influxdata/telegraf/issues/1418): logparser: error and exit on file permissions/missing errors. +- [#1499](https://github.com/influxdata/telegraf/pull/1499): Make the user able to specify full path for HAproxy stats +- [#1521](https://github.com/influxdata/telegraf/pull/1521): Fix Redis url, an extra "tcp://" was added. +- [#1330](https://github.com/influxdata/telegraf/issues/1330): Fix exec plugin panic when using single binary. +- [#1336](https://github.com/influxdata/telegraf/issues/1336): Fixed incorrect prometheus metrics source selection. +- [#1112](https://github.com/influxdata/telegraf/issues/1112): Set default Zookeeper chroot to empty string. +- [#1335](https://github.com/influxdata/telegraf/issues/1335): Fix overall ping timeout to be calculated based on per-ping timeout. +- [#1374](https://github.com/influxdata/telegraf/pull/1374): Change "default" retention policy to "". +- [#1377](https://github.com/influxdata/telegraf/issues/1377): Graphite output mangling '%' character. +- [#1396](https://github.com/influxdata/telegraf/pull/1396): Prometheus input plugin now supports x509 certs authentication - [#1252](https://github.com/influxdata/telegraf/pull/1252) & [#1279](https://github.com/influxdata/telegraf/pull/1279): Fix systemd service. Thanks @zbindenren & @PierreF! - [#1221](https://github.com/influxdata/telegraf/pull/1221): Fix influxdb n_shards counter. - [#1258](https://github.com/influxdata/telegraf/pull/1258): Fix potential kernel plugin integer parse error. @@ -50,6 +134,11 @@ time before a new metric is included by the plugin. - [#1316](https://github.com/influxdata/telegraf/pull/1316): Removed leaked "database" tag on redis metrics. Thanks @PierreF! - [#1323](https://github.com/influxdata/telegraf/issues/1323): Processes plugin: fix potential error with /proc/net/stat directory. - [#1322](https://github.com/influxdata/telegraf/issues/1322): Fix rare RHEL 5.2 panic in gopsutil diskio gathering function. +- [#1586](https://github.com/influxdata/telegraf/pull/1586): Remove IF NOT EXISTS from influxdb output database creation. +- [#1600](https://github.com/influxdata/telegraf/issues/1600): Fix quoting with text values in postgresql_extensible plugin. +- [#1425](https://github.com/influxdata/telegraf/issues/1425): Fix win_perf_counter "index out of range" panic. +- [#1634](https://github.com/influxdata/telegraf/issues/1634): Fix ntpq panic when field is missing. +- [#1637](https://github.com/influxdata/telegraf/issues/1637): Sanitize graphite output field names. ## v0.13.1 [2016-05-24] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6792abaa4..8aeb3a614 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,6 +11,8 @@ Output plugins READMEs are less structured, but any information you can provide on how the data will look is appreciated. See the [OpenTSDB output](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/opentsdb) for a good example. +1. **Optional:** Help users of your plugin by including example queries for populating dashboards. Include these sample queries in the `README.md` for the plugin. +1. **Optional:** Write a [tickscript](https://docs.influxdata.com/kapacitor/v1.0/tick/syntax/) for your plugin and add it to [Kapacitor](https://github.com/influxdata/kapacitor/tree/master/examples/telegraf). Or mention @jackzampolin in a PR comment with some common queries that you would want to alert on and he will write one for you. ## GoDoc @@ -114,7 +116,7 @@ creating the `Parser` object. You should also add the following to your SampleConfig() return: ```toml - ## Data format to consume. + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md @@ -244,7 +246,7 @@ instantiating and creating the `Serializer` object. You should also add the following to your SampleConfig() return: ```toml - ## Data format to output. + ## Data format to output. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md @@ -290,10 +292,6 @@ To execute Telegraf tests follow these simple steps: instructions - execute `make test` -**OSX users**: you will need to install `boot2docker` or `docker-machine`. -The Makefile will assume that you have a `docker-machine` box called `default` to -get the IP address. - ### Unit test troubleshooting Try cleaning up your test environment by executing `make docker-kill` and diff --git a/Godeps b/Godeps index 2ac95a904..3c70bcaf8 100644 --- a/Godeps +++ b/Godeps @@ -1,5 +1,6 @@ github.com/Shopify/sarama 8aadb476e66ca998f2f6bb3c993e9a2daa3666b9 github.com/Sirupsen/logrus 219c8cb75c258c552e999735be6df753ffc7afdc +github.com/aerospike/aerospike-client-go 45863b7fd8640dc12f7fdd397104d97e1986f25a github.com/amir/raidman 53c1b967405155bfc8758557863bf2e14f814687 github.com/aws/aws-sdk-go 13a12060f716145019378a10e2806c174356b857 github.com/beorn7/perks 3ac7bf7a47d159a033b107610db8a1b6575507a4 @@ -28,6 +29,8 @@ github.com/hpcloud/tail b2940955ab8b26e19d43a43c4da0475dd81bdb56 github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da github.com/influxdata/influxdb e094138084855d444195b252314dfee9eae34cab github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0 +github.com/kardianos/osext 29ae4ffbc9a6fe9fb2bc5029050ce6996ea1d3bc +github.com/kardianos/service 5e335590050d6d00f3aa270217d288dda1c94d0a github.com/klauspost/crc32 19b0b332c9e4516a6370a0456e6182c3b5036720 github.com/lib/pq e182dc4027e2ded4b19396d638610f2653295f36 github.com/matttproud/golang_protobuf_extensions d0c3fe89de86839aecf2e0579c40ba3bb336a453 @@ -43,12 +46,15 @@ github.com/prometheus/client_model fa8ad6fec33561be4280a8f0514318c79d7f6cb6 github.com/prometheus/common e8eabff8812b05acf522b45fdcd725a785188e37 github.com/prometheus/procfs 406e5b7bfd8201a36e2bb5f7bdae0b03380c2ce8 github.com/samuel/go-zookeeper 218e9c81c0dd8b3b18172b2bbfad92cc7d6db55f -github.com/shirou/gopsutil 586bb697f3ec9f8ec08ffefe18f521a64534037c +github.com/shirou/gopsutil 4d0c402af66c78735c5ccf820dc2ca7de5e4ff08 github.com/soniah/gosnmp b1b4f885b12c5dcbd021c5cee1c904110de6db7d +github.com/sparrc/aerospike-client-go d4bb42d2c2d39dae68e054116f4538af189e05d5 github.com/streadway/amqp b4f3ceab0337f013208d31348b578d83c0064744 github.com/stretchr/testify 1f4a1643a57e798696635ea4c126e9127adb7d3c +github.com/vjeantet/grok 83bfdfdfd1a8146795b28e547a8e3c8b28a466c2 github.com/wvanbergen/kafka 46f9a1cf3f670edec492029fadded9c2d9e18866 github.com/wvanbergen/kazoo-go 0f768712ae6f76454f987c3356177e138df258f8 +github.com/yuin/gopher-lua bf3808abd44b1e55143a2d7f08571aaa80db1808 github.com/zensqlmonitor/go-mssqldb ffe5510c6fa5e15e6d983210ab501c815b56b363 golang.org/x/crypto 5dc8cb4b8a8eb076cbb5a06bc3b8682c15bdbbd3 golang.org/x/net 6acef71eb69611914f7a30939ea9f6e194c78172 diff --git a/Godeps_windows b/Godeps_windows index cc3077fd4..067c98c1c 100644 --- a/Godeps_windows +++ b/Godeps_windows @@ -1,59 +1,12 @@ -github.com/Microsoft/go-winio 9f57cbbcbcb41dea496528872a4f0e37a4f7ae98 -github.com/Shopify/sarama 8aadb476e66ca998f2f6bb3c993e9a2daa3666b9 -github.com/Sirupsen/logrus 219c8cb75c258c552e999735be6df753ffc7afdc +github.com/Microsoft/go-winio ce2922f643c8fd76b46cadc7f404a06282678b34 github.com/StackExchange/wmi f3e2bae1e0cb5aef83e319133eabfee30013a4a5 -github.com/amir/raidman 53c1b967405155bfc8758557863bf2e14f814687 -github.com/aws/aws-sdk-go 13a12060f716145019378a10e2806c174356b857 -github.com/beorn7/perks 3ac7bf7a47d159a033b107610db8a1b6575507a4 -github.com/cenkalti/backoff 4dc77674aceaabba2c7e3da25d4c823edfb73f99 -github.com/couchbase/go-couchbase cb664315a324d87d19c879d9cc67fda6be8c2ac1 -github.com/couchbase/gomemcached a5ea6356f648fec6ab89add00edd09151455b4b2 -github.com/couchbase/goutils 5823a0cbaaa9008406021dc5daf80125ea30bba6 -github.com/dancannon/gorethink e7cac92ea2bc52638791a021f212145acfedb1fc -github.com/davecgh/go-spew 5215b55f46b2b919f50a1df0eaa5886afe4e3b3d -github.com/docker/engine-api 8924d6900370b4c7e7984be5adc61f50a80d7537 -github.com/docker/go-connections f549a9393d05688dff0992ef3efd8bbe6c628aeb -github.com/docker/go-units 5d2041e26a699eaca682e2ea41c8f891e1060444 -github.com/eapache/go-resiliency b86b1ec0dd4209a588dc1285cdd471e73525c0b3 -github.com/eapache/queue ded5959c0d4e360646dc9e9908cff48666781367 -github.com/eclipse/paho.mqtt.golang 0f7a459f04f13a41b7ed752d47944528d4bf9a86 -github.com/go-ole/go-ole 50055884d646dd9434f16bbb5c9801749b9bafe4 -github.com/go-sql-driver/mysql 1fca743146605a172a266e1654e01e5cd5669bee -github.com/golang/protobuf 552c7b9542c194800fd493123b3798ef0a832032 -github.com/golang/snappy 427fb6fc07997f43afa32f35e850833760e489a7 -github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2 -github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a -github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e -github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478 -github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da -github.com/influxdata/influxdb e3fef5593c21644f2b43af55d6e17e70910b0e48 -github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0 -github.com/klauspost/crc32 19b0b332c9e4516a6370a0456e6182c3b5036720 -github.com/lib/pq e182dc4027e2ded4b19396d638610f2653295f36 -github.com/lxn/win 9a7734ea4db26bc593d52f6a8a957afdad39c5c1 -github.com/matttproud/golang_protobuf_extensions d0c3fe89de86839aecf2e0579c40ba3bb336a453 -github.com/miekg/dns cce6c130cdb92c752850880fd285bea1d64439dd -github.com/mreiferson/go-snappystream 028eae7ab5c4c9e2d1cb4c4ca1e53259bbe7e504 -github.com/naoina/go-stringutil 6b638e95a32d0c1131db0e7fe83775cbea4a0d0b -github.com/nats-io/nats b13fc9d12b0b123ebc374e6b808c6228ae4234a3 -github.com/nats-io/nuid 4f84f5f3b2786224e336af2e13dba0a0a80b76fa -github.com/nsqio/go-nsq 0b80d6f05e15ca1930e0c5e1d540ed627e299980 -github.com/prometheus/client_golang 18acf9993a863f4c4b40612e19cdd243e7c86831 -github.com/prometheus/client_model fa8ad6fec33561be4280a8f0514318c79d7f6cb6 -github.com/prometheus/common e8eabff8812b05acf522b45fdcd725a785188e37 -github.com/prometheus/procfs 406e5b7bfd8201a36e2bb5f7bdae0b03380c2ce8 -github.com/samuel/go-zookeeper 218e9c81c0dd8b3b18172b2bbfad92cc7d6db55f -github.com/shirou/gopsutil 1f32ce1bb380845be7f5d174ac641a2c592c0c42 -github.com/shirou/w32 ada3ba68f000aa1b58580e45c9d308fe0b7fc5c5 -github.com/soniah/gosnmp b1b4f885b12c5dcbd021c5cee1c904110de6db7d -github.com/streadway/amqp b4f3ceab0337f013208d31348b578d83c0064744 -github.com/stretchr/testify 1f4a1643a57e798696635ea4c126e9127adb7d3c -github.com/wvanbergen/kafka 46f9a1cf3f670edec492029fadded9c2d9e18866 -github.com/wvanbergen/kazoo-go 0f768712ae6f76454f987c3356177e138df258f8 -github.com/zensqlmonitor/go-mssqldb ffe5510c6fa5e15e6d983210ab501c815b56b363 -golang.org/x/net 6acef71eb69611914f7a30939ea9f6e194c78172 -golang.org/x/text a71fd10341b064c10f4a81ceac72bcf70f26ea34 -gopkg.in/dancannon/gorethink.v1 7d1af5be49cb5ecc7b177bf387d232050299d6ef -gopkg.in/fatih/pool.v2 cba550ebf9bce999a02e963296d4bc7a486cb715 -gopkg.in/mgo.v2 d90005c5262a3463800497ea5a89aed5fe22c886 -gopkg.in/yaml.v2 a83829b6f1293c91addabc89d0571c246397bbf4 +github.com/go-ole/go-ole be49f7c07711fcb603cff39e1de7c67926dc0ba7 +github.com/lxn/win 950a0e81e7678e63d8e6cd32412bdecb325ccd88 +github.com/shirou/w32 3c9377fc6748f222729a8270fe2775d149a249ad +golang.org/x/sys a646d33e2ee3172a661fc09bca23bb4889a41bc8 +github.com/go-ini/ini 9144852efba7c4daf409943ee90767da62d55438 +github.com/jmespath/go-jmespath bd40a432e4c76585ef6b72d3fd96fb9b6dc7b68d +github.com/pmezard/go-difflib/difflib 792786c7400a136282c1664665ae0a8db921c6c2 +github.com/stretchr/objx 1a9d0bb9f541897e62256577b352fdbc1fb4fd94 +gopkg.in/fsnotify.v1 a8a77c9133d2d6fd8334f3260d06f60e8d80a5fb +gopkg.in/tomb.v1 dd632973f1e7218eb1089048e0798ec9ae7dceb8 diff --git a/Makefile b/Makefile index c2bcc121d..2951e175a 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,3 @@ -UNAME := $(shell sh -c 'uname') VERSION := $(shell sh -c 'git describe --always --tags') ifdef GOBIN PATH := $(GOBIN):$(PATH) @@ -17,7 +16,7 @@ build: go install -ldflags "-X main.version=$(VERSION)" ./... build-windows: - go build -o telegraf.exe -ldflags \ + GOOS=windows GOARCH=amd64 go build -o telegraf.exe -ldflags \ "-X main.version=$(VERSION)" \ ./cmd/telegraf/telegraf.go @@ -26,10 +25,6 @@ build-for-docker: "-s -X main.version=$(VERSION)" \ ./cmd/telegraf/telegraf.go -# Build with race detector -dev: prepare - go build -race -ldflags "-X main.version=$(VERSION)" ./... - # run package script package: ./scripts/build.py --package --version="$(VERSION)" --platform=linux --arch=all --upload @@ -42,31 +37,22 @@ prepare: # Use the windows godeps file to prepare dependencies prepare-windows: go get github.com/sparrc/gdm + gdm restore gdm restore -f Godeps_windows # Run all docker containers necessary for unit tests docker-run: -ifeq ($(UNAME), Darwin) - docker run --name kafka \ - -e ADVERTISED_HOST=$(shell sh -c 'boot2docker ip || docker-machine ip default') \ - -e ADVERTISED_PORT=9092 \ - -p "2181:2181" -p "9092:9092" \ - -d spotify/kafka -endif -ifeq ($(UNAME), Linux) docker run --name kafka \ -e ADVERTISED_HOST=localhost \ -e ADVERTISED_PORT=9092 \ -p "2181:2181" -p "9092:9092" \ -d spotify/kafka -endif docker run --name mysql -p "3306:3306" -e MYSQL_ALLOW_EMPTY_PASSWORD=yes -d mysql docker run --name memcached -p "11211:11211" -d memcached docker run --name postgres -p "5432:5432" -d postgres docker run --name rabbitmq -p "15672:15672" -p "5672:5672" -d rabbitmq:3-management - docker run --name opentsdb -p "4242:4242" -d petergrace/opentsdb-docker docker run --name redis -p "6379:6379" -d redis - docker run --name aerospike -p "3000:3000" -d aerospike + docker run --name aerospike -p "3000:3000" -d aerospike/aerospike-server docker run --name nsq -p "4150:4150" -d nsqio/nsq /nsqd docker run --name mqtt -p "1883:1883" -d ncarlier/mqtt docker run --name riemann -p "5555:5555" -d blalor/riemann @@ -79,8 +65,7 @@ docker-run-circle: -e ADVERTISED_PORT=9092 \ -p "2181:2181" -p "9092:9092" \ -d spotify/kafka - docker run --name opentsdb -p "4242:4242" -d petergrace/opentsdb-docker - docker run --name aerospike -p "3000:3000" -d aerospike + docker run --name aerospike -p "3000:3000" -d aerospike/aerospike-server docker run --name nsq -p "4150:4150" -d nsqio/nsq /nsqd docker run --name mqtt -p "1883:1883" -d ncarlier/mqtt docker run --name riemann -p "5555:5555" -d blalor/riemann @@ -88,8 +73,8 @@ docker-run-circle: # Kill all docker containers, ignore errors docker-kill: - -docker kill nsq aerospike redis opentsdb rabbitmq postgres memcached mysql kafka mqtt riemann snmp - -docker rm nsq aerospike redis opentsdb rabbitmq postgres memcached mysql kafka mqtt riemann snmp + -docker kill nsq aerospike redis rabbitmq postgres memcached mysql kafka mqtt riemann snmp + -docker rm nsq aerospike redis rabbitmq postgres memcached mysql kafka mqtt riemann snmp # Run full unit tests using docker containers (includes setup and teardown) test: vet docker-kill docker-run diff --git a/README.md b/README.md index eb684f23f..74bbf2a4f 100644 --- a/README.md +++ b/README.md @@ -20,12 +20,12 @@ new plugins. ### Linux deb and rpm Packages: Latest: -* https://dl.influxdata.com/telegraf/releases/telegraf_0.13.1_amd64.deb -* https://dl.influxdata.com/telegraf/releases/telegraf-0.13.1.x86_64.rpm +* https://dl.influxdata.com/telegraf/releases/telegraf_1.0.0-beta3_amd64.deb +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_beta3.x86_64.rpm Latest (arm): -* https://dl.influxdata.com/telegraf/releases/telegraf_0.13.1_armhf.deb -* https://dl.influxdata.com/telegraf/releases/telegraf-0.13.1.armhf.rpm +* https://dl.influxdata.com/telegraf/releases/telegraf_1.0.0-beta3_armhf.deb +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_beta3.armhf.rpm ##### Package Instructions: @@ -46,14 +46,14 @@ to use this repo to install & update telegraf. ### Linux tarballs: Latest: -* https://dl.influxdata.com/telegraf/releases/telegraf-0.13.1_linux_amd64.tar.gz -* https://dl.influxdata.com/telegraf/releases/telegraf-0.13.1_linux_i386.tar.gz -* https://dl.influxdata.com/telegraf/releases/telegraf-0.13.1_linux_armhf.tar.gz +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta3_linux_amd64.tar.gz +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta3_linux_i386.tar.gz +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta3_linux_armhf.tar.gz ### FreeBSD tarball: Latest: -* https://dl.influxdata.com/telegraf/releases/telegraf-0.13.1_freebsd_amd64.tar.gz +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta3_freebsd_amd64.tar.gz ### Ansible Role: @@ -69,8 +69,7 @@ brew install telegraf ### Windows Binaries (EXPERIMENTAL) Latest: -* https://dl.influxdata.com/telegraf/releases/telegraf-0.13.1_windows_amd64.zip -* https://dl.influxdata.com/telegraf/releases/telegraf-0.13.1_windows_i386.zip +* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0-beta3_windows_amd64.zip ### From Source: @@ -157,6 +156,7 @@ Currently implemented sources: * [exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec) (generic executable plugin, support JSON, influx, graphite and nagios) * [filestat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/filestat) * [haproxy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/haproxy) +* [hddtemp](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/hddtemp) * [http_response](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/http_response) * [httpjson](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/httpjson) (generic JSON-emitting http service plugin) * [influxdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/influxdb) @@ -188,7 +188,7 @@ Currently implemented sources: * [redis](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/redis) * [rethinkdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/rethinkdb) * [riak](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/riak) -* [sensors ](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/sensors) (only available if built from source) +* [sensors](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/sensors) * [snmp](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/snmp) * [sql server](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/sqlserver) (microsoft) * [twemproxy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/twemproxy) @@ -218,8 +218,11 @@ Telegraf can also collect metrics via the following service plugins: * [mqtt_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mqtt_consumer) * [kafka_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/kafka_consumer) * [nats_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nats_consumer) -* [github_webhooks](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/github_webhooks) -* [rollbar_webhooks](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/rollbar_webhooks) +* [webhooks](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks) + * [github](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/github) + * [mandrill](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/mandrill) + * [rollbar](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/rollbar) +* [nsq_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nsq_consumer) We'll be adding support for many more over the coming months. Read on if you want to add support for another service or third-party API. diff --git a/accumulator.go b/accumulator.go index cbea58ebf..1fdba8f99 100644 --- a/accumulator.go +++ b/accumulator.go @@ -16,6 +16,12 @@ type Accumulator interface { tags map[string]string, t ...time.Time) + AddError(err error) + Debug() bool SetDebug(enabled bool) + + SetPrecision(precision, interval time.Duration) + + DisablePrecision() } diff --git a/agent/accumulator.go b/agent/accumulator.go index d6ff8de60..f6863b745 100644 --- a/agent/accumulator.go +++ b/agent/accumulator.go @@ -4,6 +4,7 @@ import ( "fmt" "log" "math" + "sync/atomic" "time" "github.com/influxdata/telegraf" @@ -11,12 +12,13 @@ import ( ) func NewAccumulator( - inputConfig *internal_models.InputConfig, + inputConfig *models.InputConfig, metrics chan telegraf.Metric, ) *accumulator { acc := accumulator{} acc.metrics = metrics acc.inputConfig = inputConfig + acc.precision = time.Nanosecond return &acc } @@ -29,9 +31,11 @@ type accumulator struct { // print every point added to the accumulator trace bool - inputConfig *internal_models.InputConfig + inputConfig *models.InputConfig - prefix string + precision time.Duration + + errCount uint64 } func (ac *accumulator) Add( @@ -141,10 +145,7 @@ func (ac *accumulator) AddFields( } else { timestamp = time.Now() } - - if ac.prefix != "" { - measurement = ac.prefix + measurement - } + timestamp = timestamp.Round(ac.precision) m, err := telegraf.NewMetric(measurement, tags, result, timestamp) if err != nil { @@ -157,6 +158,17 @@ func (ac *accumulator) AddFields( ac.metrics <- m } +// AddError passes a runtime error to the accumulator. +// The error will be tagged with the plugin name and written to the log. +func (ac *accumulator) AddError(err error) { + if err == nil { + return + } + atomic.AddUint64(&ac.errCount, 1) + //TODO suppress/throttle consecutive duplicate errors? + log.Printf("ERROR in input [%s]: %s", ac.inputConfig.Name, err) +} + func (ac *accumulator) Debug() bool { return ac.debug } @@ -173,6 +185,31 @@ func (ac *accumulator) SetTrace(trace bool) { ac.trace = trace } +// SetPrecision takes two time.Duration objects. If the first is non-zero, +// it sets that as the precision. Otherwise, it takes the second argument +// as the order of time that the metrics should be rounded to, with the +// maximum being 1s. +func (ac *accumulator) SetPrecision(precision, interval time.Duration) { + if precision > 0 { + ac.precision = precision + return + } + switch { + case interval >= time.Second: + ac.precision = time.Second + case interval >= time.Millisecond: + ac.precision = time.Millisecond + case interval >= time.Microsecond: + ac.precision = time.Microsecond + default: + ac.precision = time.Nanosecond + } +} + +func (ac *accumulator) DisablePrecision() { + ac.precision = time.Nanosecond +} + func (ac *accumulator) setDefaultTags(tags map[string]string) { ac.defaultTags = tags } diff --git a/agent/accumulator_test.go b/agent/accumulator_test.go index ee8f65e48..4dd69985f 100644 --- a/agent/accumulator_test.go +++ b/agent/accumulator_test.go @@ -1,8 +1,11 @@ package agent import ( + "bytes" "fmt" + "log" "math" + "os" "testing" "time" @@ -10,6 +13,7 @@ import ( "github.com/influxdata/telegraf/internal/models" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestAdd(t *testing.T) { @@ -17,7 +21,7 @@ func TestAdd(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} a.Add("acctest", float64(101), map[string]string{}) a.Add("acctest", float64(101), map[string]string{"acc": "test"}) @@ -38,13 +42,135 @@ func TestAdd(t *testing.T) { actual) } +func TestAddNoPrecisionWithInterval(t *testing.T) { + a := accumulator{} + now := time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC) + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &models.InputConfig{} + + a.SetPrecision(0, time.Second) + a.Add("acctest", float64(101), map[string]string{}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Contains(t, actual, "acctest,acc=test value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800000000000)), + actual) +} + +func TestAddNoIntervalWithPrecision(t *testing.T) { + a := accumulator{} + now := time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC) + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &models.InputConfig{} + + a.SetPrecision(time.Second, time.Millisecond) + a.Add("acctest", float64(101), map[string]string{}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Contains(t, actual, "acctest,acc=test value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800000000000)), + actual) +} + +func TestAddDisablePrecision(t *testing.T) { + a := accumulator{} + now := time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC) + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &models.InputConfig{} + + a.SetPrecision(time.Second, time.Millisecond) + a.DisablePrecision() + a.Add("acctest", float64(101), map[string]string{}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Contains(t, actual, "acctest,acc=test value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800082912748)), + actual) +} + +func TestDifferentPrecisions(t *testing.T) { + a := accumulator{} + now := time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC) + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &models.InputConfig{} + + a.SetPrecision(0, time.Second) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + testm := <-a.metrics + actual := testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800000000000)), + actual) + + a.SetPrecision(0, time.Millisecond) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800083000000)), + actual) + + a.SetPrecision(0, time.Microsecond) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800082913000)), + actual) + + a.SetPrecision(0, time.Nanosecond) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800082912748)), + actual) +} + func TestAddDefaultTags(t *testing.T) { a := accumulator{} a.addDefaultTag("default", "tag") now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} a.Add("acctest", float64(101), map[string]string{}) a.Add("acctest", float64(101), map[string]string{"acc": "test"}) @@ -70,7 +196,7 @@ func TestAddFields(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} fields := map[string]interface{}{ "usage": float64(99), @@ -103,7 +229,7 @@ func TestAddInfFields(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} fields := map[string]interface{}{ "usage": inf, @@ -131,7 +257,7 @@ func TestAddNaNFields(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} fields := map[string]interface{}{ "usage": nan, @@ -155,7 +281,7 @@ func TestAddUint64Fields(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} fields := map[string]interface{}{ "usage": uint64(99), @@ -184,7 +310,7 @@ func TestAddUint64Overflow(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} fields := map[string]interface{}{ "usage": uint64(9223372036854775808), @@ -214,7 +340,7 @@ func TestAddInts(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} a.Add("acctest", int(101), map[string]string{}) a.Add("acctest", int32(101), map[string]string{"acc": "test"}) @@ -241,7 +367,7 @@ func TestAddFloats(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} a.Add("acctest", float32(101), map[string]string{"acc": "test"}) a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) @@ -263,7 +389,7 @@ func TestAddStrings(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} a.Add("acctest", "test", map[string]string{"acc": "test"}) a.Add("acctest", "foo", map[string]string{"acc": "test"}, now) @@ -285,7 +411,7 @@ func TestAddBools(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} a.Add("acctest", true, map[string]string{"acc": "test"}) a.Add("acctest", false, map[string]string{"acc": "test"}, now) @@ -307,11 +433,11 @@ func TestAccFilterTags(t *testing.T) { now := time.Now() a.metrics = make(chan telegraf.Metric, 10) defer close(a.metrics) - filter := internal_models.Filter{ + filter := models.Filter{ TagExclude: []string{"acc"}, } assert.NoError(t, filter.CompileFilter()) - a.inputConfig = &internal_models.InputConfig{} + a.inputConfig = &models.InputConfig{} a.inputConfig.Filter = filter a.Add("acctest", float64(101), map[string]string{}) @@ -332,3 +458,27 @@ func TestAccFilterTags(t *testing.T) { fmt.Sprintf("acctest value=101 %d", now.UnixNano()), actual) } + +func TestAccAddError(t *testing.T) { + errBuf := bytes.NewBuffer(nil) + log.SetOutput(errBuf) + defer log.SetOutput(os.Stderr) + + a := accumulator{} + a.inputConfig = &models.InputConfig{} + a.inputConfig.Name = "mock_plugin" + + a.AddError(fmt.Errorf("foo")) + a.AddError(fmt.Errorf("bar")) + a.AddError(fmt.Errorf("baz")) + + errs := bytes.Split(errBuf.Bytes(), []byte{'\n'}) + assert.EqualValues(t, 3, a.errCount) + require.Len(t, errs, 4) // 4 because of trailing newline + assert.Contains(t, string(errs[0]), "mock_plugin") + assert.Contains(t, string(errs[0]), "foo") + assert.Contains(t, string(errs[1]), "mock_plugin") + assert.Contains(t, string(errs[1]), "bar") + assert.Contains(t, string(errs[2]), "mock_plugin") + assert.Contains(t, string(errs[2]), "baz") +} diff --git a/agent/agent.go b/agent/agent.go index 1423ef773..d86037e79 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -88,7 +88,7 @@ func (a *Agent) Close() error { return err } -func panicRecover(input *internal_models.RunningInput) { +func panicRecover(input *models.RunningInput) { if err := recover(); err != nil { trace := make([]byte, 2048) runtime.Stack(trace, true) @@ -104,7 +104,7 @@ func panicRecover(input *internal_models.RunningInput) { // reporting interval. func (a *Agent) gatherer( shutdown chan struct{}, - input *internal_models.RunningInput, + input *models.RunningInput, interval time.Duration, metricC chan telegraf.Metric, ) error { @@ -118,6 +118,8 @@ func (a *Agent) gatherer( acc := NewAccumulator(input.Config, metricC) acc.SetDebug(a.Config.Agent.Debug) + acc.SetPrecision(a.Config.Agent.Precision.Duration, + a.Config.Agent.Interval.Duration) acc.setDefaultTags(a.Config.Tags) internal.RandomSleep(a.Config.Agent.CollectionJitter.Duration, shutdown) @@ -150,7 +152,7 @@ func (a *Agent) gatherer( // over. func gatherWithTimeout( shutdown chan struct{}, - input *internal_models.RunningInput, + input *models.RunningInput, acc *accumulator, timeout time.Duration, ) { @@ -201,6 +203,8 @@ func (a *Agent) Test() error { for _, input := range a.Config.Inputs { acc := NewAccumulator(input.Config, metricC) acc.SetTrace(true) + acc.SetPrecision(a.Config.Agent.Precision.Duration, + a.Config.Agent.Interval.Duration) acc.setDefaultTags(a.Config.Tags) fmt.Printf("* Plugin: %s, Collection 1\n", input.Name) @@ -211,6 +215,9 @@ func (a *Agent) Test() error { if err := input.Input.Gather(acc); err != nil { return err } + if acc.errCount > 0 { + return fmt.Errorf("Errors encountered during processing") + } // Special instructions for some inputs. cpu, for example, needs to be // run twice in order to return cpu usage percentages. @@ -233,7 +240,7 @@ func (a *Agent) flush() { wg.Add(len(a.Config.Outputs)) for _, o := range a.Config.Outputs { - go func(output *internal_models.RunningOutput) { + go func(output *models.RunningOutput) { defer wg.Done() err := output.Write() if err != nil { @@ -264,13 +271,33 @@ func (a *Agent) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) er internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown) a.flush() case m := <-metricC: - for _, o := range a.Config.Outputs { - o.AddMetric(m) + for i, o := range a.Config.Outputs { + if i == len(a.Config.Outputs)-1 { + o.AddMetric(m) + } else { + o.AddMetric(copyMetric(m)) + } } } } } +func copyMetric(m telegraf.Metric) telegraf.Metric { + t := time.Time(m.Time()) + + tags := make(map[string]string) + fields := make(map[string]interface{}) + for k, v := range m.Tags() { + tags[k] = v + } + for k, v := range m.Fields() { + fields[k] = v + } + + out, _ := telegraf.NewMetric(m.Name(), tags, fields, t) + return out +} + // Run runs the agent daemon, gathering every Interval func (a *Agent) Run(shutdown chan struct{}) error { var wg sync.WaitGroup @@ -289,6 +316,9 @@ func (a *Agent) Run(shutdown chan struct{}) error { case telegraf.ServiceInput: acc := NewAccumulator(input.Config, metricC) acc.SetDebug(a.Config.Agent.Debug) + // Service input plugins should set their own precision of their + // metrics. + acc.DisablePrecision() acc.setDefaultTags(a.Config.Tags) if err := p.Start(acc); err != nil { log.Printf("Service for input %s failed to start, exiting\n%s\n", @@ -321,7 +351,7 @@ func (a *Agent) Run(shutdown chan struct{}) error { if input.Config.Interval != 0 { interval = input.Config.Interval } - go func(in *internal_models.RunningInput, interv time.Duration) { + go func(in *models.RunningInput, interv time.Duration) { defer wg.Done() if err := a.gatherer(shutdown, in, interv, metricC); err != nil { log.Printf(err.Error()) diff --git a/cmd/telegraf/telegraf.go b/cmd/telegraf/telegraf.go index 6681ad073..f19b127a8 100644 --- a/cmd/telegraf/telegraf.go +++ b/cmd/telegraf/telegraf.go @@ -6,6 +6,7 @@ import ( "log" "os" "os/signal" + "runtime" "strings" "syscall" @@ -15,6 +16,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/all" "github.com/influxdata/telegraf/plugins/outputs" _ "github.com/influxdata/telegraf/plugins/outputs/all" + "github.com/kardianos/service" ) var fDebug = flag.Bool("debug", false, @@ -39,12 +41,8 @@ var fOutputList = flag.Bool("output-list", false, "print available output plugins.") var fUsage = flag.String("usage", "", "print usage for a plugin, ie, 'telegraf -usage mysql'") -var fInputFiltersLegacy = flag.String("filter", "", - "filter the inputs to enable, separator is :") -var fOutputFiltersLegacy = flag.String("outputfilter", "", - "filter the outputs to enable, separator is :") -var fConfigDirectoryLegacy = flag.String("configdirectory", "", - "directory containing additional *.conf files") +var fService = flag.String("service", "", + "operate on the service") // Telegraf version, populated linker. // ie, -ldflags "-X main.version=`git describe --always --tags`" @@ -74,6 +72,7 @@ The flags are: -debug print metrics as they're generated to stdout -quiet run in quiet mode -version print the version to stdout + -service Control the service, ie, 'telegraf -service install (windows only)' In addition to the -config flag, telegraf will also load the config file from an environment variable or default location. Precedence is: @@ -100,7 +99,22 @@ Examples: telegraf -config telegraf.conf -input-filter cpu:mem -output-filter influxdb ` -func main() { +var logger service.Logger + +var stop chan struct{} + +var srvc service.Service +var svcConfig *service.Config + +type program struct{} + +func reloadLoop(stop chan struct{}, s service.Service) { + defer func() { + if service.Interactive() { + os.Exit(0) + } + return + }() reload := make(chan bool, 1) reload <- true for <-reload { @@ -110,24 +124,11 @@ func main() { args := flag.Args() var inputFilters []string - if *fInputFiltersLegacy != "" { - fmt.Printf("WARNING '--filter' flag is deprecated, please use" + - " '--input-filter'") - inputFilter := strings.TrimSpace(*fInputFiltersLegacy) - inputFilters = strings.Split(":"+inputFilter+":", ":") - } if *fInputFilters != "" { inputFilter := strings.TrimSpace(*fInputFilters) inputFilters = strings.Split(":"+inputFilter+":", ":") } - var outputFilters []string - if *fOutputFiltersLegacy != "" { - fmt.Printf("WARNING '--outputfilter' flag is deprecated, please use" + - " '--output-filter'") - outputFilter := strings.TrimSpace(*fOutputFiltersLegacy) - outputFilters = strings.Split(":"+outputFilter+":", ":") - } if *fOutputFilters != "" { outputFilter := strings.TrimSpace(*fOutputFilters) outputFilters = strings.Split(":"+outputFilter+":", ":") @@ -145,40 +146,43 @@ func main() { } } - if *fOutputList { + // switch for flags which just do something and exit immediately + switch { + case *fOutputList: fmt.Println("Available Output Plugins:") for k, _ := range outputs.Outputs { fmt.Printf(" %s\n", k) } return - } - - if *fInputList { + case *fInputList: fmt.Println("Available Input Plugins:") for k, _ := range inputs.Inputs { fmt.Printf(" %s\n", k) } return - } - - if *fVersion { + case *fVersion: v := fmt.Sprintf("Telegraf - version %s", version) fmt.Println(v) return - } - - if *fSampleConfig { + case *fSampleConfig: config.PrintSampleConfig(inputFilters, outputFilters) return - } - - if *fUsage != "" { + case *fUsage != "": if err := config.PrintInputConfig(*fUsage); err != nil { if err2 := config.PrintOutputConfig(*fUsage); err2 != nil { log.Fatalf("%s and %s", err, err2) } } return + case *fService != "" && runtime.GOOS == "windows": + if *fConfig != "" { + (*svcConfig).Arguments = []string{"-config", *fConfig} + } + err := service.Control(s, *fService) + if err != nil { + log.Fatal(err) + } + return } // If no other options are specified, load the config file and run. @@ -191,15 +195,6 @@ func main() { os.Exit(1) } - if *fConfigDirectoryLegacy != "" { - fmt.Printf("WARNING '--configdirectory' flag is deprecated, please use" + - " '--config-directory'") - err = c.LoadDirectory(*fConfigDirectoryLegacy) - if err != nil { - log.Fatal(err) - } - } - if *fConfigDirectory != "" { err = c.LoadDirectory(*fConfigDirectory) if err != nil { @@ -243,14 +238,18 @@ func main() { signals := make(chan os.Signal) signal.Notify(signals, os.Interrupt, syscall.SIGHUP) go func() { - sig := <-signals - if sig == os.Interrupt { - close(shutdown) - } - if sig == syscall.SIGHUP { - log.Printf("Reloading Telegraf config\n") - <-reload - reload <- true + select { + case sig := <-signals: + if sig == os.Interrupt { + close(shutdown) + } + if sig == syscall.SIGHUP { + log.Printf("Reloading Telegraf config\n") + <-reload + reload <- true + close(shutdown) + } + case <-stop: close(shutdown) } }() @@ -279,3 +278,46 @@ func usageExit(rc int) { fmt.Println(usage) os.Exit(rc) } + +func (p *program) Start(s service.Service) error { + srvc = s + go p.run() + return nil +} +func (p *program) run() { + stop = make(chan struct{}) + reloadLoop(stop, srvc) +} +func (p *program) Stop(s service.Service) error { + close(stop) + return nil +} + +func main() { + if runtime.GOOS == "windows" { + svcConfig = &service.Config{ + Name: "telegraf", + DisplayName: "Telegraf Data Collector Service", + Description: "Collects data using a series of plugins and publishes it to" + + "another series of plugins.", + Arguments: []string{"-config", "C:\\Program Files\\Telegraf\\telegraf.conf"}, + } + + prg := &program{} + s, err := service.New(prg, svcConfig) + if err != nil { + log.Fatal(err) + } + logger, err = s.Logger(nil) + if err != nil { + log.Fatal(err) + } + err = s.Run() + if err != nil { + logger.Error(err) + } + } else { + stop = make(chan struct{}) + reloadLoop(stop, nil) + } +} diff --git a/docs/LICENSE_OF_DEPENDENCIES.md b/docs/LICENSE_OF_DEPENDENCIES.md index d448872f6..5553fda70 100644 --- a/docs/LICENSE_OF_DEPENDENCIES.md +++ b/docs/LICENSE_OF_DEPENDENCIES.md @@ -16,6 +16,7 @@ - github.com/hashicorp/go-msgpack [BSD LICENSE](https://github.com/hashicorp/go-msgpack/blob/master/LICENSE) - github.com/hashicorp/raft [MPL LICENSE](https://github.com/hashicorp/raft/blob/master/LICENSE) - github.com/hashicorp/raft-boltdb [MPL LICENSE](https://github.com/hashicorp/raft-boltdb/blob/master/LICENSE) +- github.com/kardianos/service [ZLIB LICENSE](https://github.com/kardianos/service/blob/master/LICENSE) (License not named but matches word for word with ZLib) - github.com/lib/pq [MIT LICENSE](https://github.com/lib/pq/blob/master/LICENSE.md) - github.com/matttproud/golang_protobuf_extensions [APACHE LICENSE](https://github.com/matttproud/golang_protobuf_extensions/blob/master/LICENSE) - github.com/naoina/go-stringutil [MIT LICENSE](https://github.com/naoina/go-stringutil/blob/master/LICENSE) diff --git a/docs/WINDOWS_SERVICE.md b/docs/WINDOWS_SERVICE.md index 679a41527..0ef218350 100644 --- a/docs/WINDOWS_SERVICE.md +++ b/docs/WINDOWS_SERVICE.md @@ -1,36 +1,40 @@ # Running Telegraf as a Windows Service -If you have tried to install Go binaries as Windows Services with the **sc.exe** -tool you may have seen that the service errors and stops running after a while. +Telegraf natively supports running as a Windows Service. Outlined below is are +the general steps to set it up. -**NSSM** (the Non-Sucking Service Manager) is a tool that helps you in a -[number of scenarios](http://nssm.cc/scenarios) including running Go binaries -that were not specifically designed to run only in Windows platforms. +1. Obtain the telegraf windows distribution +2. Create the directory `C:\Program Files\Telegraf` (if you install in a different + location simply specify the `-config` parameter with the desired location) +3. Place the telegraf.exe and the config file into `C:\Program Files\Telegraf` +4. To install the service into the Windows Service Manager, run (as an + administrator): -## NSSM Installation via Chocolatey + ``` + > C:\Program Files\Telegraf\telegraf.exe --service install + ``` -You can install [Chocolatey](https://chocolatey.org/) and [NSSM](http://nssm.cc/) -with these commands +5. Edit the configuration file to meet your needs +6. To check that it works, run: -```powershell -iex ((new-object net.webclient).DownloadString('https://chocolatey.org/install.ps1')) -choco install -y nssm -``` + ``` + > C:\Program Files\Telegraf\telegraf.exe --config C:\Program Files\Telegraf\telegraf.conf --test + ``` -## Installing Telegraf as a Windows Service with NSSM +7. To start collecting data, run: -You can download the latest Telegraf Windows binaries (still Experimental at -the moment) from [the Telegraf Github repo](https://github.com/influxdata/telegraf). + ``` + > net start telegraf + ``` -Then you can create a C:\telegraf folder, unzip the binary there and modify the -**telegraf.conf** sample to allocate the metrics you want to send to **InfluxDB**. +## Other supported operations -Once you have NSSM installed in your system, the process is quite straightforward. -You only need to type this command in your Windows shell +Telegraf can manage its own service through the --service flag: -```powershell -nssm install Telegraf c:\telegraf\telegraf.exe -config c:\telegraf\telegraf.config -``` +| Command | Effect | +|------------------------------------|-------------------------------| +| `telegraf.exe --service install` | Install telegraf as a service | +| `telegraf.exe --service uninstall` | Remove the telegraf service | +| `telegraf.exe --service start` | Start the telegraf service | +| `telegraf.exe --service stop` | Stop the telegraf service | -And now your service will be installed in Windows and you will be able to start and -stop it gracefully \ No newline at end of file diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 176b32f0f..902c7f7fb 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -52,6 +52,11 @@ ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s flush_jitter = "0s" + ## By default, precision will be set to the same timestamp order as the + ## collection interval, with the maximum being 1s. + ## Precision will NOT be used for service inputs, such as logparser and statsd. + ## Valid values are "ns", "us" (or "µs"), "ms", "s". + precision = "" ## Run telegraf in debug mode debug = false ## Run telegraf in quiet mode @@ -75,13 +80,10 @@ urls = ["http://localhost:8086"] # required ## The target database for metrics (telegraf will create it if not exists). database = "telegraf" # required - ## Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h". - ## note: using "s" precision greatly improves InfluxDB compression. - precision = "s" - ## Retention policy to write to. - retention_policy = "default" - ## Write consistency (clusters only), can be: "any", "one", "quorom", "all" + ## Retention policy to write to. Empty string writes to the default rp. + retention_policy = "" + ## Write consistency (clusters only), can be: "any", "one", "quorum", "all" write_consistency = "any" ## Write timeout (for the InfluxDB client), formatted as a string. @@ -195,6 +197,8 @@ # # Configuration for Graphite server to send metrics to # [[outputs.graphite]] # ## TCP endpoint for your graphite instance. +# ## If multiple endpoints are configured, output will be load balanced. +# ## Only one of the endpoints will be written to with each iteration. # servers = ["localhost:2003"] # ## Prefix metrics name # prefix = "" @@ -317,14 +321,13 @@ # api_token = "my-secret-token" # required. # ## Debug # # debug = false -# ## Tag Field to populate source attribute (optional) -# ## This is typically the _hostname_ from which the metric was obtained. -# source_tag = "host" # ## Connection timeout. # # timeout = "5s" -# ## Output Name Template (same as graphite buckets) +# ## Output source Template (same as graphite buckets) # ## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#graphite -# template = "host.tags.measurement.field" +# ## This template is used in librato's source (not metric's name) +# template = "host" +# # # Configuration for MQTT server to send metrics to @@ -432,8 +435,8 @@ ## disk partitions. ## Setting devices will restrict the stats to the specified devices. # devices = ["sda", "sdb"] - ## Uncomment the following line if you do not need disk serial numbers. - # skip_serial_number = true + ## Uncomment the following line if you need disk serial numbers. + # skip_serial_number = false # Get kernel statistics from /proc/stat @@ -461,7 +464,7 @@ # no configuration -# # Read stats from an aerospike server +# # Read stats from aerospike server(s) # [[inputs.aerospike]] # ## Aerospike servers to connect to (with port) # ## This plugin will query all namespaces the aerospike @@ -524,6 +527,19 @@ # socket_suffix = "asok" +# # Read specific statistics per cgroup +# [[inputs.cgroup]] +# ## Directories in which to look for files, globs are supported. +# # paths = [ +# # "/cgroup/memory", +# # "/cgroup/memory/child1", +# # "/cgroup/memory/child2/*", +# # ] +# ## cgroup stat fields, as file names, globs are supported. +# ## these file names are appended to each path from above. +# # files = ["memory.*usage*", "memory.limit_in_bytes"] + + # # Pull Metric Statistics from Amazon CloudWatch # [[inputs.cloudwatch]] # ## Amazon Region @@ -649,6 +665,13 @@ # container_names = [] # ## Timeout for docker list, info, and stats commands # timeout = "5s" +# +# ## Whether to report for each container per-device blkio (8:0, 8:1...) and +# ## network (eth0, eth1, ...) stats or not +# perdevice = true +# ## Whether to report for each container total blkio and network stats or not +# total = false +# # # Read statistics from one or many dovecot servers @@ -677,6 +700,13 @@ # # ## set cluster_health to true when you want to also obtain cluster level stats # cluster_health = false +# +# ## Optional SSL Config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false # # Read metrics from one or more commands that can output to stdout @@ -758,9 +788,11 @@ # [[inputs.haproxy]] # ## An array of address to gather stats about. Specify an ip on hostname # ## with optional port. ie localhost, 10.10.3.33:1936, etc. -# -# ## If no servers are specified, then default to 127.0.0.1:1936 -# servers = ["http://myhaproxy.com:1936", "http://anotherhaproxy.com:1936"] +# ## Make sure you specify the complete path to the stats endpoint +# ## ie 10.10.3.33:1936/haproxy?stats +# # +# ## If no servers are specified, then default to 127.0.0.1:1936/haproxy?stats +# servers = ["http://myhaproxy.com:1936/haproxy?stats"] # ## Or you can also use local socket # ## servers = ["socket:/run/haproxy/admin.sock"] @@ -946,21 +978,35 @@ # # Telegraf plugin for gathering metrics from N Mesos masters # [[inputs.mesos]] -# # Timeout, in ms. +# ## Timeout, in ms. # timeout = 100 -# # A list of Mesos masters, default value is localhost:5050. +# ## A list of Mesos masters. # masters = ["localhost:5050"] -# # Metrics groups to be collected, by default, all enabled. +# ## Master metrics groups to be collected, by default, all enabled. # master_collections = [ # "resources", # "master", # "system", -# "slaves", +# "agents", # "frameworks", +# "tasks", # "messages", # "evqueue", # "registrar", # ] +# ## A list of Mesos slaves, default is [] +# # slaves = [] +# ## Slave metrics groups to be collected, by default, all enabled. +# # slave_collections = [ +# # "resources", +# # "agent", +# # "system", +# # "executors", +# # "tasks", +# # "messages", +# # ] +# ## Include mesos tasks statistics, default is false +# # slave_tasks = true # # Read metrics from one or many MongoDB servers @@ -971,6 +1017,7 @@ # ## mongodb://10.10.3.33:18832, # ## 10.0.0.1:10000, etc. # servers = ["127.0.0.1:27017"] +# gather_perdb_stats = false # # Read metrics from one or many mysql servers @@ -1077,9 +1124,9 @@ # ## file paths for proc files. If empty default paths will be used: # ## /proc/net/netstat, /proc/net/snmp, /proc/net/snmp6 # ## These can also be overridden with env variables, see README. -# proc_net_netstat = "" -# proc_net_snmp = "" -# proc_net_snmp6 = "" +# proc_net_netstat = "/proc/net/netstat" +# proc_net_snmp = "/proc/net/snmp" +# proc_net_snmp6 = "/proc/net/snmp6" # ## dump metrics with 0 values too # dump_zeros = true @@ -1103,6 +1150,23 @@ # command = "passenger-status -v --show=xml" +# # Read metrics from one or many pgbouncer servers +# [[inputs.pgbouncer]] +# ## specify address via a url matching: +# ## postgres://[pqgotest[:password]]@localhost:port[/dbname]\ +# ## ?sslmode=[disable|verify-ca|verify-full] +# ## or a simple string: +# ## host=localhost user=pqotest port=6432 password=... sslmode=... dbname=pgbouncer +# ## +# ## All connection parameters are optional, except for dbname, +# ## you need to set it always as pgbouncer. +# address = "host=localhost user=postgres port=6432 sslmode=disable dbname=pgbouncer" +# +# ## A list of databases to pull metrics about. If not specified, metrics for all +# ## databases are gathered. +# # databases = ["app_production", "testing"] + + # # Read metrics of phpfpm, via HTTP status page or socket # [[inputs.phpfpm]] # ## An array of addresses to gather stats about. Specify an ip or hostname @@ -1138,7 +1202,7 @@ # count = 1 # required # ## interval, in s, at which to ping. 0 == default (ping -i ) # ping_interval = 0.0 -# ## ping timeout, in s. 0 == no timeout (ping -W ) +# ## per-ping timeout, in s. 0 == no timeout (ping -W ) # timeout = 1.0 # ## interface to send ping from (ping -I ) # interface = "" @@ -1257,10 +1321,15 @@ # ## An array of urls to scrape metrics from. # urls = ["http://localhost:9100/metrics"] # -# ## Use SSL but skip chain & host verification -# # insecure_skip_verify = false # ## Use bearer token for authorization # # bearer_token = /path/to/bearer/token +# +# ## Optional SSL Config +# # ssl_ca = /path/to/cafile +# # ssl_cert = /path/to/certfile +# # ssl_key = /path/to/keyfile +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false # # Reads last_run_summary.yaml file and converts to measurments @@ -1276,6 +1345,13 @@ # # username = "guest" # # password = "guest" # +# ## Optional SSL Config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false +# # ## A list of nodes to pull metrics about. If not specified, metrics for # ## all nodes are gathered. # # nodes = ["rabbit@node1", "rabbit@node2"] @@ -1294,6 +1370,7 @@ # ## e.g. # ## tcp://localhost:6379 # ## tcp://:password@192.168.99.100 +# ## unix:///var/run/redis.sock # ## # ## If no servers are specified, then localhost is used as the host. # ## If no port is specified, 6379 is used @@ -1316,8 +1393,8 @@ # servers = ["http://localhost:8098"] -# # Reads oids value from one or many snmp agents -# [[inputs.snmp]] +# # DEPRECATED! PLEASE USE inputs.snmp INSTEAD. +# [[inputs.snmp_legacy]] # ## Use 'oids.txt' file to translate oids to names # ## To generate 'oids.txt' you need to run: # ## snmptranslate -m all -Tz -On | sed -e 's/"//g' > /tmp/oids.txt @@ -1488,12 +1565,6 @@ # SERVICE INPUT PLUGINS # ############################################################################### -# # A Github Webhook Event collector -# [[inputs.github_webhooks]] -# ## Address and port to host Webhook listener on -# service_address = ":1618" - - # # Read metrics from Kafka topic(s) # [[inputs.kafka_consumer]] # ## topic(s) to consume @@ -1501,7 +1572,7 @@ # ## an array of Zookeeper connection strings # zookeeper_peers = ["localhost:2181"] # ## Zookeeper Chroot -# zookeeper_chroot = "/" +# zookeeper_chroot = "" # ## the name of the consumer group # consumer_group = "telegraf_metrics_consumers" # ## Offset (must be either "oldest" or "newest") @@ -1514,6 +1585,37 @@ # data_format = "influx" +# # Stream and parse log file(s). +# [[inputs.logparser]] +# ## Log files to parse. +# ## These accept standard unix glob matching rules, but with the addition of +# ## ** as a "super asterisk". ie: +# ## /var/log/**.log -> recursively find all .log files in /var/log +# ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log +# ## /var/log/apache.log -> only tail the apache log file +# files = ["/var/log/apache/access.log"] +# ## Read file from beginning. +# from_beginning = false +# +# ## Parse logstash-style "grok" patterns: +# ## Telegraf built-in parsing patterns: https://goo.gl/dkay10 +# [inputs.logparser.grok] +# ## This is a list of patterns to check the given log file(s) for. +# ## Note that adding patterns here increases processing time. The most +# ## efficient configuration is to have one pattern per logparser. +# ## Other common built-in patterns are: +# ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) +# ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) +# patterns = ["%{COMBINED_LOG_FORMAT}"] +# ## Name of the outputted measurement name. +# measurement = "apache_access_log" +# ## Full path(s) to custom pattern files. +# custom_pattern_files = [] +# ## Custom patterns can also be defined here. Put one pattern per line. +# custom_patterns = ''' +# ''' + + # # Read metrics from MQTT topic(s) # [[inputs.mqtt_consumer]] # servers = ["localhost:1883"] @@ -1570,10 +1672,19 @@ # data_format = "influx" -# # A Rollbar Webhook Event collector -# [[inputs.rollbar_webhooks]] -# ## Address and port to host Webhook listener on -# service_address = ":1619" +# # Read NSQ topic for metrics. +# [[inputs.nsq_consumer]] +# ## An string representing the NSQD TCP Endpoint +# server = "localhost:4150" +# topic = "telegraf" +# channel = "consumer" +# max_in_flight = 100 +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" # # Statsd Server @@ -1670,3 +1781,18 @@ # ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md # data_format = "influx" + +# # A Webhooks Event collector +# [[inputs.webhooks]] +# ## Address and port to host Webhook listener on +# service_address = ":1619" +# +# [inputs.webhooks.github] +# path = "/github" +# +# [inputs.webhooks.mandrill] +# path = "/mandrill" +# +# [inputs.webhooks.rollbar] +# path = "/rollbar" + diff --git a/filter/filter.go b/filter/filter.go new file mode 100644 index 000000000..85eed17ac --- /dev/null +++ b/filter/filter.go @@ -0,0 +1,79 @@ +package filter + +import ( + "strings" + + "github.com/gobwas/glob" +) + +type Filter interface { + Match(string) bool +} + +// CompileFilter takes a list of string filters and returns a Filter interface +// for matching a given string against the filter list. The filter list +// supports glob matching too, ie: +// +// f, _ := CompileFilter([]string{"cpu", "mem", "net*"}) +// f.Match("cpu") // true +// f.Match("network") // true +// f.Match("memory") // false +// +func CompileFilter(filters []string) (Filter, error) { + // return if there is nothing to compile + if len(filters) == 0 { + return nil, nil + } + + // check if we can compile a non-glob filter + noGlob := true + for _, filter := range filters { + if hasMeta(filter) { + noGlob = false + break + } + } + + switch { + case noGlob: + // return non-globbing filter if not needed. + return compileFilterNoGlob(filters), nil + case len(filters) == 1: + return glob.Compile(filters[0]) + default: + return glob.Compile("{" + strings.Join(filters, ",") + "}") + } +} + +// hasMeta reports whether path contains any magic glob characters. +func hasMeta(s string) bool { + return strings.IndexAny(s, "*?[") >= 0 +} + +type filter struct { + m map[string]struct{} +} + +func (f *filter) Match(s string) bool { + _, ok := f.m[s] + return ok +} + +type filtersingle struct { + s string +} + +func (f *filtersingle) Match(s string) bool { + return f.s == s +} + +func compileFilterNoGlob(filters []string) Filter { + if len(filters) == 1 { + return &filtersingle{s: filters[0]} + } + out := filter{m: make(map[string]struct{})} + for _, filter := range filters { + out.m[filter] = struct{}{} + } + return &out +} diff --git a/filter/filter_test.go b/filter/filter_test.go new file mode 100644 index 000000000..85072e2ac --- /dev/null +++ b/filter/filter_test.go @@ -0,0 +1,96 @@ +package filter + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCompileFilter(t *testing.T) { + f, err := CompileFilter([]string{}) + assert.NoError(t, err) + assert.Nil(t, f) + + f, err = CompileFilter([]string{"cpu"}) + assert.NoError(t, err) + assert.True(t, f.Match("cpu")) + assert.False(t, f.Match("cpu0")) + assert.False(t, f.Match("mem")) + + f, err = CompileFilter([]string{"cpu*"}) + assert.NoError(t, err) + assert.True(t, f.Match("cpu")) + assert.True(t, f.Match("cpu0")) + assert.False(t, f.Match("mem")) + + f, err = CompileFilter([]string{"cpu", "mem"}) + assert.NoError(t, err) + assert.True(t, f.Match("cpu")) + assert.False(t, f.Match("cpu0")) + assert.True(t, f.Match("mem")) + + f, err = CompileFilter([]string{"cpu", "mem", "net*"}) + assert.NoError(t, err) + assert.True(t, f.Match("cpu")) + assert.False(t, f.Match("cpu0")) + assert.True(t, f.Match("mem")) + assert.True(t, f.Match("network")) +} + +var benchbool bool + +func BenchmarkFilterSingleNoGlobFalse(b *testing.B) { + f, _ := CompileFilter([]string{"cpu"}) + var tmp bool + for n := 0; n < b.N; n++ { + tmp = f.Match("network") + } + benchbool = tmp +} + +func BenchmarkFilterSingleNoGlobTrue(b *testing.B) { + f, _ := CompileFilter([]string{"cpu"}) + var tmp bool + for n := 0; n < b.N; n++ { + tmp = f.Match("cpu") + } + benchbool = tmp +} + +func BenchmarkFilter(b *testing.B) { + f, _ := CompileFilter([]string{"cpu", "mem", "net*"}) + var tmp bool + for n := 0; n < b.N; n++ { + tmp = f.Match("network") + } + benchbool = tmp +} + +func BenchmarkFilterNoGlob(b *testing.B) { + f, _ := CompileFilter([]string{"cpu", "mem", "net"}) + var tmp bool + for n := 0; n < b.N; n++ { + tmp = f.Match("net") + } + benchbool = tmp +} + +func BenchmarkFilter2(b *testing.B) { + f, _ := CompileFilter([]string{"aa", "bb", "c", "ad", "ar", "at", "aq", + "aw", "az", "axxx", "ab", "cpu", "mem", "net*"}) + var tmp bool + for n := 0; n < b.N; n++ { + tmp = f.Match("network") + } + benchbool = tmp +} + +func BenchmarkFilter2NoGlob(b *testing.B) { + f, _ := CompileFilter([]string{"aa", "bb", "c", "ad", "ar", "at", "aq", + "aw", "az", "axxx", "ab", "cpu", "mem", "net"}) + var tmp bool + for n := 0; n < b.N; n++ { + tmp = f.Match("net") + } + benchbool = tmp +} diff --git a/internal/config/config.go b/internal/config/config.go index fdc9a8753..24c1af3fa 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -9,6 +9,7 @@ import ( "os" "path/filepath" "regexp" + "runtime" "sort" "strings" "time" @@ -47,8 +48,8 @@ type Config struct { OutputFilters []string Agent *AgentConfig - Inputs []*internal_models.RunningInput - Outputs []*internal_models.RunningOutput + Inputs []*models.RunningInput + Outputs []*models.RunningOutput } func NewConfig() *Config { @@ -61,8 +62,8 @@ func NewConfig() *Config { }, Tags: make(map[string]string), - Inputs: make([]*internal_models.RunningInput, 0), - Outputs: make([]*internal_models.RunningOutput, 0), + Inputs: make([]*models.RunningInput, 0), + Outputs: make([]*models.RunningOutput, 0), InputFilters: make([]string, 0), OutputFilters: make([]string, 0), } @@ -77,6 +78,14 @@ type AgentConfig struct { // ie, if Interval=10s then always collect on :00, :10, :20, etc. RoundInterval bool + // By default, precision will be set to the same timestamp order as the + // collection interval, with the maximum being 1s. + // ie, when interval = "10s", precision will be "1s" + // when interval = "250ms", precision will be "1ms" + // Precision will NOT be used for service inputs. It is up to each individual + // service input to set the timestamp at the appropriate precision. + Precision internal.Duration + // CollectionJitter is used to jitter the collection by a random amount. // Each plugin will sleep for a random time within jitter before collecting. // This can be used to avoid many plugins querying things like sysfs at the @@ -108,11 +117,10 @@ type AgentConfig struct { // does _not_ deactivate FlushInterval. FlushBufferWhenFull bool - // TODO(cam): Remove UTC and Precision parameters, they are no longer + // TODO(cam): Remove UTC and parameter, they are no longer // valid for the agent config. Leaving them here for now for backwards- // compatability - UTC bool `toml:"utc"` - Precision string + UTC bool `toml:"utc"` // Debug is the option for running in debug mode Debug bool @@ -132,7 +140,7 @@ func (c *Config) InputNames() []string { return name } -// Outputs returns a list of strings of the configured inputs. +// Outputs returns a list of strings of the configured outputs. func (c *Config) OutputNames() []string { var name []string for _, output := range c.Outputs { @@ -209,6 +217,11 @@ var header = `# Telegraf Configuration ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s flush_jitter = "0s" + ## By default, precision will be set to the same timestamp order as the + ## collection interval, with the maximum being 1s. + ## Precision will NOT be used for service inputs, such as logparser and statsd. + ## Valid values are "ns", "us" (or "µs"), "ms", "s". + precision = "" ## Run telegraf in debug mode debug = false ## Run telegraf in quiet mode @@ -420,6 +433,9 @@ func getDefaultConfigPath() (string, error) { envfile := os.Getenv("TELEGRAF_CONFIG_PATH") homefile := os.ExpandEnv("${HOME}/.telegraf/telegraf.conf") etcfile := "/etc/telegraf/telegraf.conf" + if runtime.GOOS == "windows" { + etcfile = `C:\Program Files\Telegraf\telegraf.conf` + } for _, path := range []string{envfile, homefile, etcfile} { if _, err := os.Stat(path); err == nil { log.Printf("Using config file: %s", path) @@ -527,6 +543,13 @@ func (c *Config) LoadConfig(path string) error { return nil } +// trimBOM trims the Byte-Order-Marks from the beginning of the file. +// this is for Windows compatability only. +// see https://github.com/influxdata/telegraf/issues/1378 +func trimBOM(f []byte) []byte { + return bytes.TrimPrefix(f, []byte("\xef\xbb\xbf")) +} + // parseFile loads a TOML configuration from a provided path and // returns the AST produced from the TOML parser. When loading the file, it // will find environment variables and replace them. @@ -535,6 +558,8 @@ func parseFile(fpath string) (*ast.Table, error) { if err != nil { return nil, err } + // ugh windows why + contents = trimBOM(contents) env_vars := envVarRe.FindAll(contents, -1) for _, env_var := range env_vars { @@ -577,7 +602,7 @@ func (c *Config) addOutput(name string, table *ast.Table) error { return err } - ro := internal_models.NewRunningOutput(name, output, outputConfig, + ro := models.NewRunningOutput(name, output, outputConfig, c.Agent.MetricBatchSize, c.Agent.MetricBufferLimit) c.Outputs = append(c.Outputs, ro) return nil @@ -618,7 +643,7 @@ func (c *Config) addInput(name string, table *ast.Table) error { return err } - rp := &internal_models.RunningInput{ + rp := &models.RunningInput{ Name: name, Input: input, Config: pluginConfig, @@ -629,10 +654,10 @@ func (c *Config) addInput(name string, table *ast.Table) error { // buildFilter builds a Filter // (tagpass/tagdrop/namepass/namedrop/fieldpass/fielddrop) to -// be inserted into the internal_models.OutputConfig/internal_models.InputConfig +// be inserted into the models.OutputConfig/models.InputConfig // to be used for glob filtering on tags and measurements -func buildFilter(tbl *ast.Table) (internal_models.Filter, error) { - f := internal_models.Filter{} +func buildFilter(tbl *ast.Table) (models.Filter, error) { + f := models.Filter{} if node, ok := tbl.Fields["namepass"]; ok { if kv, ok := node.(*ast.KeyValue); ok { @@ -696,7 +721,7 @@ func buildFilter(tbl *ast.Table) (internal_models.Filter, error) { if subtbl, ok := node.(*ast.Table); ok { for name, val := range subtbl.Fields { if kv, ok := val.(*ast.KeyValue); ok { - tagfilter := &internal_models.TagFilter{Name: name} + tagfilter := &models.TagFilter{Name: name} if ary, ok := kv.Value.(*ast.Array); ok { for _, elem := range ary.Value { if str, ok := elem.(*ast.String); ok { @@ -715,7 +740,7 @@ func buildFilter(tbl *ast.Table) (internal_models.Filter, error) { if subtbl, ok := node.(*ast.Table); ok { for name, val := range subtbl.Fields { if kv, ok := val.(*ast.KeyValue); ok { - tagfilter := &internal_models.TagFilter{Name: name} + tagfilter := &models.TagFilter{Name: name} if ary, ok := kv.Value.(*ast.Array); ok { for _, elem := range ary.Value { if str, ok := elem.(*ast.String); ok { @@ -772,9 +797,9 @@ func buildFilter(tbl *ast.Table) (internal_models.Filter, error) { // buildInput parses input specific items from the ast.Table, // builds the filter and returns a -// internal_models.InputConfig to be inserted into internal_models.RunningInput -func buildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, error) { - cp := &internal_models.InputConfig{Name: name} +// models.InputConfig to be inserted into models.RunningInput +func buildInput(name string, tbl *ast.Table) (*models.InputConfig, error) { + cp := &models.InputConfig{Name: name} if node, ok := tbl.Fields["interval"]; ok { if kv, ok := node.(*ast.KeyValue); ok { if str, ok := kv.Value.(*ast.String); ok { @@ -948,14 +973,14 @@ func buildSerializer(name string, tbl *ast.Table) (serializers.Serializer, error // buildOutput parses output specific items from the ast.Table, // builds the filter and returns an -// internal_models.OutputConfig to be inserted into internal_models.RunningInput +// models.OutputConfig to be inserted into models.RunningInput // Note: error exists in the return for future calls that might require error -func buildOutput(name string, tbl *ast.Table) (*internal_models.OutputConfig, error) { +func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) { filter, err := buildFilter(tbl) if err != nil { return nil, err } - oc := &internal_models.OutputConfig{ + oc := &models.OutputConfig{ Name: name, Filter: filter, } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 1659cd6ec..cb8c9192c 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -26,19 +26,19 @@ func TestConfig_LoadSingleInputWithEnvVars(t *testing.T) { memcached := inputs.Inputs["memcached"]().(*memcached.Memcached) memcached.Servers = []string{"192.168.1.1"} - filter := internal_models.Filter{ + filter := models.Filter{ NameDrop: []string{"metricname2"}, NamePass: []string{"metricname1"}, FieldDrop: []string{"other", "stuff"}, FieldPass: []string{"some", "strings"}, - TagDrop: []internal_models.TagFilter{ - internal_models.TagFilter{ + TagDrop: []models.TagFilter{ + models.TagFilter{ Name: "badtag", Filter: []string{"othertag"}, }, }, - TagPass: []internal_models.TagFilter{ - internal_models.TagFilter{ + TagPass: []models.TagFilter{ + models.TagFilter{ Name: "goodtag", Filter: []string{"mytag"}, }, @@ -46,7 +46,7 @@ func TestConfig_LoadSingleInputWithEnvVars(t *testing.T) { IsActive: true, } assert.NoError(t, filter.CompileFilter()) - mConfig := &internal_models.InputConfig{ + mConfig := &models.InputConfig{ Name: "memcached", Filter: filter, Interval: 10 * time.Second, @@ -66,19 +66,19 @@ func TestConfig_LoadSingleInput(t *testing.T) { memcached := inputs.Inputs["memcached"]().(*memcached.Memcached) memcached.Servers = []string{"localhost"} - filter := internal_models.Filter{ + filter := models.Filter{ NameDrop: []string{"metricname2"}, NamePass: []string{"metricname1"}, FieldDrop: []string{"other", "stuff"}, FieldPass: []string{"some", "strings"}, - TagDrop: []internal_models.TagFilter{ - internal_models.TagFilter{ + TagDrop: []models.TagFilter{ + models.TagFilter{ Name: "badtag", Filter: []string{"othertag"}, }, }, - TagPass: []internal_models.TagFilter{ - internal_models.TagFilter{ + TagPass: []models.TagFilter{ + models.TagFilter{ Name: "goodtag", Filter: []string{"mytag"}, }, @@ -86,7 +86,7 @@ func TestConfig_LoadSingleInput(t *testing.T) { IsActive: true, } assert.NoError(t, filter.CompileFilter()) - mConfig := &internal_models.InputConfig{ + mConfig := &models.InputConfig{ Name: "memcached", Filter: filter, Interval: 5 * time.Second, @@ -113,19 +113,19 @@ func TestConfig_LoadDirectory(t *testing.T) { memcached := inputs.Inputs["memcached"]().(*memcached.Memcached) memcached.Servers = []string{"localhost"} - filter := internal_models.Filter{ + filter := models.Filter{ NameDrop: []string{"metricname2"}, NamePass: []string{"metricname1"}, FieldDrop: []string{"other", "stuff"}, FieldPass: []string{"some", "strings"}, - TagDrop: []internal_models.TagFilter{ - internal_models.TagFilter{ + TagDrop: []models.TagFilter{ + models.TagFilter{ Name: "badtag", Filter: []string{"othertag"}, }, }, - TagPass: []internal_models.TagFilter{ - internal_models.TagFilter{ + TagPass: []models.TagFilter{ + models.TagFilter{ Name: "goodtag", Filter: []string{"mytag"}, }, @@ -133,7 +133,7 @@ func TestConfig_LoadDirectory(t *testing.T) { IsActive: true, } assert.NoError(t, filter.CompileFilter()) - mConfig := &internal_models.InputConfig{ + mConfig := &models.InputConfig{ Name: "memcached", Filter: filter, Interval: 5 * time.Second, @@ -150,7 +150,7 @@ func TestConfig_LoadDirectory(t *testing.T) { assert.NoError(t, err) ex.SetParser(p) ex.Command = "/usr/bin/myothercollector --foo=bar" - eConfig := &internal_models.InputConfig{ + eConfig := &models.InputConfig{ Name: "exec", MeasurementSuffix: "_myothercollector", } @@ -169,7 +169,7 @@ func TestConfig_LoadDirectory(t *testing.T) { pstat := inputs.Inputs["procstat"]().(*procstat.Procstat) pstat.PidFile = "/var/run/grafana-server.pid" - pConfig := &internal_models.InputConfig{Name: "procstat"} + pConfig := &models.InputConfig{Name: "procstat"} pConfig.Tags = make(map[string]string) assert.Equal(t, pstat, c.Inputs[3].Input, diff --git a/internal/internal.go b/internal/internal.go index 27a24f021..58a1200e0 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -17,8 +17,6 @@ import ( "strings" "time" "unicode" - - "github.com/gobwas/glob" ) const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" @@ -135,8 +133,8 @@ func GetTLSConfig( cert, err := tls.LoadX509KeyPair(SSLCert, SSLKey) if err != nil { return nil, errors.New(fmt.Sprintf( - "Could not load TLS client key/certificate: %s", - err)) + "Could not load TLS client key/certificate from %s:%s: %s", + SSLKey, SSLCert, err)) } t.Certificates = []tls.Certificate{cert} @@ -209,27 +207,6 @@ func WaitTimeout(c *exec.Cmd, timeout time.Duration) error { } } -// CompileFilter takes a list of glob "filters", ie: -// ["MAIN.*", "CPU.*", "NET"] -// and compiles them into a glob object. This glob object can -// then be used to match keys to the filter. -func CompileFilter(filters []string) (glob.Glob, error) { - var out glob.Glob - - // return if there is nothing to compile - if len(filters) == 0 { - return out, nil - } - - var err error - if len(filters) == 1 { - out, err = glob.Compile(filters[0]) - } else { - out, err = glob.Compile("{" + strings.Join(filters, ",") + "}") - } - return out, err -} - // RandomSleep will sleep for a random amount of time up to max. // If the shutdown channel is closed, it will return before it has finished // sleeping. diff --git a/internal/internal_test.go b/internal/internal_test.go index 31bb5ec61..213e94d3d 100644 --- a/internal/internal_test.go +++ b/internal/internal_test.go @@ -107,37 +107,6 @@ func TestRunError(t *testing.T) { assert.Error(t, err) } -func TestCompileFilter(t *testing.T) { - f, err := CompileFilter([]string{}) - assert.NoError(t, err) - assert.Nil(t, f) - - f, err = CompileFilter([]string{"cpu"}) - assert.NoError(t, err) - assert.True(t, f.Match("cpu")) - assert.False(t, f.Match("cpu0")) - assert.False(t, f.Match("mem")) - - f, err = CompileFilter([]string{"cpu*"}) - assert.NoError(t, err) - assert.True(t, f.Match("cpu")) - assert.True(t, f.Match("cpu0")) - assert.False(t, f.Match("mem")) - - f, err = CompileFilter([]string{"cpu", "mem"}) - assert.NoError(t, err) - assert.True(t, f.Match("cpu")) - assert.False(t, f.Match("cpu0")) - assert.True(t, f.Match("mem")) - - f, err = CompileFilter([]string{"cpu", "mem", "net*"}) - assert.NoError(t, err) - assert.True(t, f.Match("cpu")) - assert.False(t, f.Match("cpu0")) - assert.True(t, f.Match("mem")) - assert.True(t, f.Match("network")) -} - func TestRandomSleep(t *testing.T) { // test that zero max returns immediately s := time.Now() diff --git a/internal/models/filter.go b/internal/models/filter.go index 71d71c23e..9ad4c0049 100644 --- a/internal/models/filter.go +++ b/internal/models/filter.go @@ -1,82 +1,80 @@ -package internal_models +package models import ( "fmt" - "github.com/gobwas/glob" - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/filter" ) // TagFilter is the name of a tag, and the values on which to filter type TagFilter struct { Name string Filter []string - filter glob.Glob + filter filter.Filter } // Filter containing drop/pass and tagdrop/tagpass rules type Filter struct { NameDrop []string - nameDrop glob.Glob + nameDrop filter.Filter NamePass []string - namePass glob.Glob + namePass filter.Filter FieldDrop []string - fieldDrop glob.Glob + fieldDrop filter.Filter FieldPass []string - fieldPass glob.Glob + fieldPass filter.Filter TagDrop []TagFilter TagPass []TagFilter TagExclude []string - tagExclude glob.Glob + tagExclude filter.Filter TagInclude []string - tagInclude glob.Glob + tagInclude filter.Filter IsActive bool } -// Compile all Filter lists into glob.Glob objects. +// Compile all Filter lists into filter.Filter objects. func (f *Filter) CompileFilter() error { var err error - f.nameDrop, err = internal.CompileFilter(f.NameDrop) + f.nameDrop, err = filter.CompileFilter(f.NameDrop) if err != nil { return fmt.Errorf("Error compiling 'namedrop', %s", err) } - f.namePass, err = internal.CompileFilter(f.NamePass) + f.namePass, err = filter.CompileFilter(f.NamePass) if err != nil { return fmt.Errorf("Error compiling 'namepass', %s", err) } - f.fieldDrop, err = internal.CompileFilter(f.FieldDrop) + f.fieldDrop, err = filter.CompileFilter(f.FieldDrop) if err != nil { return fmt.Errorf("Error compiling 'fielddrop', %s", err) } - f.fieldPass, err = internal.CompileFilter(f.FieldPass) + f.fieldPass, err = filter.CompileFilter(f.FieldPass) if err != nil { return fmt.Errorf("Error compiling 'fieldpass', %s", err) } - f.tagExclude, err = internal.CompileFilter(f.TagExclude) + f.tagExclude, err = filter.CompileFilter(f.TagExclude) if err != nil { return fmt.Errorf("Error compiling 'tagexclude', %s", err) } - f.tagInclude, err = internal.CompileFilter(f.TagInclude) + f.tagInclude, err = filter.CompileFilter(f.TagInclude) if err != nil { return fmt.Errorf("Error compiling 'taginclude', %s", err) } for i, _ := range f.TagDrop { - f.TagDrop[i].filter, err = internal.CompileFilter(f.TagDrop[i].Filter) + f.TagDrop[i].filter, err = filter.CompileFilter(f.TagDrop[i].Filter) if err != nil { return fmt.Errorf("Error compiling 'tagdrop', %s", err) } } for i, _ := range f.TagPass { - f.TagPass[i].filter, err = internal.CompileFilter(f.TagPass[i].Filter) + f.TagPass[i].filter, err = filter.CompileFilter(f.TagPass[i].Filter) if err != nil { return fmt.Errorf("Error compiling 'tagpass', %s", err) } diff --git a/internal/models/filter_test.go b/internal/models/filter_test.go index a37416095..497d08532 100644 --- a/internal/models/filter_test.go +++ b/internal/models/filter_test.go @@ -1,4 +1,4 @@ -package internal_models +package models import ( "testing" @@ -253,51 +253,6 @@ func TestFilter_TagDrop(t *testing.T) { } } -func TestFilter_CompileFilterError(t *testing.T) { - f := Filter{ - NameDrop: []string{"", ""}, - } - assert.Error(t, f.CompileFilter()) - f = Filter{ - NamePass: []string{"", ""}, - } - assert.Error(t, f.CompileFilter()) - f = Filter{ - FieldDrop: []string{"", ""}, - } - assert.Error(t, f.CompileFilter()) - f = Filter{ - FieldPass: []string{"", ""}, - } - assert.Error(t, f.CompileFilter()) - f = Filter{ - TagExclude: []string{"", ""}, - } - assert.Error(t, f.CompileFilter()) - f = Filter{ - TagInclude: []string{"", ""}, - } - assert.Error(t, f.CompileFilter()) - filters := []TagFilter{ - TagFilter{ - Name: "cpu", - Filter: []string{"{foobar}"}, - }} - f = Filter{ - TagDrop: filters, - } - require.Error(t, f.CompileFilter()) - filters = []TagFilter{ - TagFilter{ - Name: "cpu", - Filter: []string{"{foobar}"}, - }} - f = Filter{ - TagPass: filters, - } - require.Error(t, f.CompileFilter()) -} - func TestFilter_ShouldMetricsPass(t *testing.T) { m := testutil.TestMetric(1, "testmetric") f := Filter{ diff --git a/internal/models/running_input.go b/internal/models/running_input.go index cffaf336c..445c5ee96 100644 --- a/internal/models/running_input.go +++ b/internal/models/running_input.go @@ -1,4 +1,4 @@ -package internal_models +package models import ( "time" diff --git a/internal/models/running_output.go b/internal/models/running_output.go index d0d2abbc1..82a6885d5 100644 --- a/internal/models/running_output.go +++ b/internal/models/running_output.go @@ -1,4 +1,4 @@ -package internal_models +package models import ( "log" @@ -138,7 +138,7 @@ func (ro *RunningOutput) Write() error { } func (ro *RunningOutput) write(metrics []telegraf.Metric) error { - if len(metrics) == 0 { + if metrics == nil || len(metrics) == 0 { return nil } start := time.Now() diff --git a/internal/models/running_output_test.go b/internal/models/running_output_test.go index d9238c5a4..a552629e9 100644 --- a/internal/models/running_output_test.go +++ b/internal/models/running_output_test.go @@ -1,4 +1,4 @@ -package internal_models +package models import ( "fmt" diff --git a/metric.go b/metric.go index 574565c22..0d186784a 100644 --- a/metric.go +++ b/metric.go @@ -45,14 +45,9 @@ func NewMetric( name string, tags map[string]string, fields map[string]interface{}, - t ...time.Time, + t time.Time, ) (Metric, error) { - var T time.Time - if len(t) > 0 { - T = t[0] - } - - pt, err := client.NewPoint(name, tags, fields, T) + pt, err := client.NewPoint(name, tags, fields, t) if err != nil { return nil, err } diff --git a/metric_test.go b/metric_test.go index 1177ab494..4182c9cc1 100644 --- a/metric_test.go +++ b/metric_test.go @@ -51,23 +51,6 @@ func TestNewMetricString(t *testing.T) { assert.Equal(t, lineProtoPrecision, m.PrecisionString("s")) } -func TestNewMetricStringNoTime(t *testing.T) { - tags := map[string]string{ - "host": "localhost", - } - fields := map[string]interface{}{ - "usage_idle": float64(99), - } - m, err := NewMetric("cpu", tags, fields) - assert.NoError(t, err) - - lineProto := fmt.Sprintf("cpu,host=localhost usage_idle=99") - assert.Equal(t, lineProto, m.String()) - - lineProtoPrecision := fmt.Sprintf("cpu,host=localhost usage_idle=99") - assert.Equal(t, lineProtoPrecision, m.PrecisionString("s")) -} - func TestNewMetricFailNaN(t *testing.T) { now := time.Now() diff --git a/plugins/inputs/EXAMPLE_README.md b/plugins/inputs/EXAMPLE_README.md index 6bebf1e88..d6fcfdb91 100644 --- a/plugins/inputs/EXAMPLE_README.md +++ b/plugins/inputs/EXAMPLE_README.md @@ -27,6 +27,14 @@ The example plugin gathers metrics about example things - tag2 - measurement2 has the following tags: - tag3 + +### Sample Queries: + +These are some useful queries (to generate dashboards or other) to run against data from this plugin: + +``` +SELECT max(field1), mean(field1), min(field1) FROM measurement1 WHERE tag1=bar AND time > now() - 1h GROUP BY tag +``` ### Example Output: diff --git a/plugins/inputs/aerospike/README.md b/plugins/inputs/aerospike/README.md index 6fb6bb189..60c470cd3 100644 --- a/plugins/inputs/aerospike/README.md +++ b/plugins/inputs/aerospike/README.md @@ -1,265 +1,55 @@ -## Telegraf Plugin: Aerospike +# Aerospike Input Plugin -#### Plugin arguments: -- **servers** string array: List of aerospike servers to query (def: 127.0.0.1:3000) - -#### Description - -The aerospike plugin queries aerospike server(s) and get node statistics. It also collects stats for +The aerospike plugin queries aerospike server(s) and get node statistics & stats for all the configured namespaces. For what the measurements mean, please consult the [Aerospike Metrics Reference Docs](http://www.aerospike.com/docs/reference/metrics). The metric names, to make it less complicated in querying, have replaced all `-` with `_` as Aerospike metrics come in both forms (no idea why). -# Measurements: -#### Aerospike Statistics [values]: +All metrics are attempted to be cast to integers, then booleans, then strings. -Meta: -- units: Integer +### Measurements: -Measurement names: -- batch_index_queue -- batch_index_unused_buffers -- batch_queue -- batch_tree_count -- client_connections -- data_used_bytes_memory -- index_used_bytes_memory -- info_queue -- migrate_progress_recv -- migrate_progress_send -- migrate_rx_objs -- migrate_tx_objs -- objects -- ongoing_write_reqs -- partition_absent -- partition_actual -- partition_desync -- partition_object_count -- partition_ref_count -- partition_replica -- proxy_in_progress -- query_agg_avg_rec_count -- query_avg_rec_count -- query_lookup_avg_rec_count -- queue -- record_locks -- record_refs -- sindex_used_bytes_memory -- sindex_gc_garbage_cleaned -- system_free_mem_pct -- total_bytes_disk -- total_bytes_memory -- tree_count -- scans_active -- uptime -- used_bytes_disk -- used_bytes_memory -- cluster_size -- waiting_transactions +The aerospike metrics are under two measurement names: -#### Aerospike Statistics [cumulative]: +***aerospike_node***: These are the aerospike **node** measurements, which are +available from the aerospike `statistics` command. -Meta: -- units: Integer + ie, + ``` + telnet localhost 3003 + statistics + ... + ``` -Measurement names: -- batch_errors -- batch_index_complete -- batch_index_errors -- batch_index_initiate -- batch_index_timeout -- batch_initiate -- batch_timeout -- err_duplicate_proxy_request -- err_out_of_space -- err_replica_non_null_node -- err_replica_null_node -- err_rw_cant_put_unique -- err_rw_pending_limit -- err_rw_request_not_found -- err_storage_queue_full -- err_sync_copy_null_master -- err_sync_copy_null_node -- err_tsvc_requests -- err_write_fail_bin_exists -- err_write_fail_generation -- err_write_fail_generation_xdr -- err_write_fail_incompatible_type -- err_write_fail_key_exists -- err_write_fail_key_mismatch -- err_write_fail_not_found -- err_write_fail_noxdr -- err_write_fail_parameter -- err_write_fail_prole_delete -- err_write_fail_prole_generation -- err_write_fail_prole_unknown -- err_write_fail_unknown -- fabric_msgs_rcvd -- fabric_msgs_sent -- heartbeat_received_foreign -- heartbeat_received_self -- migrate_msgs_recv -- migrate_msgs_sent -- migrate_num_incoming_accepted -- migrate_num_incoming_refused -- proxy_action -- proxy_initiate -- proxy_retry -- proxy_retry_new_dest -- proxy_retry_q_full -- proxy_retry_same_dest -- proxy_unproxy -- query_abort -- query_agg -- query_agg_abort -- query_agg_err -- query_agg_success -- query_bad_records -- query_fail -- query_long_queue_full -- query_long_running -- query_lookup_abort -- query_lookup_err -- query_lookups -- query_lookup_success -- query_reqs -- query_short_queue_full -- query_short_running -- query_success -- query_tracked -- read_dup_prole -- reaped_fds -- rw_err_ack_badnode -- rw_err_ack_internal -- rw_err_ack_nomatch -- rw_err_dup_cluster_key -- rw_err_dup_internal -- rw_err_dup_send -- rw_err_write_cluster_key -- rw_err_write_internal -- rw_err_write_send -- sindex_ucgarbage_found -- sindex_gc_locktimedout -- sindex_gc_inactivity_dur -- sindex_gc_activity_dur -- sindex_gc_list_creation_time -- sindex_gc_list_deletion_time -- sindex_gc_objects_validated -- sindex_gc_garbage_found -- stat_cluster_key_err_ack_dup_trans_reenqueue -- stat_cluster_key_err_ack_rw_trans_reenqueue -- stat_cluster_key_prole_retry -- stat_cluster_key_regular_processed -- stat_cluster_key_trans_to_proxy_retry -- stat_deleted_set_object -- stat_delete_success -- stat_duplicate_operation -- stat_evicted_objects -- stat_evicted_objects_time -- stat_evicted_set_objects -- stat_expired_objects -- stat_nsup_deletes_not_shipped -- stat_proxy_errs -- stat_proxy_reqs -- stat_proxy_reqs_xdr -- stat_proxy_success -- stat_read_errs_notfound -- stat_read_errs_other -- stat_read_reqs -- stat_read_reqs_xdr -- stat_read_success -- stat_rw_timeout -- stat_slow_trans_queue_batch_pop -- stat_slow_trans_queue_pop -- stat_slow_trans_queue_push -- stat_write_errs -- stat_write_errs_notfound -- stat_write_errs_other -- stat_write_reqs -- stat_write_reqs_xdr -- stat_write_success -- stat_xdr_pipe_miss -- stat_xdr_pipe_writes -- stat_zero_bin_records -- storage_defrag_corrupt_record -- storage_defrag_wait -- transactions -- basic_scans_succeeded -- basic_scans_failed -- aggr_scans_succeeded -- aggr_scans_failed -- udf_bg_scans_succeeded -- udf_bg_scans_failed -- udf_delete_err_others -- udf_delete_reqs -- udf_delete_success -- udf_lua_errs -- udf_query_rec_reqs -- udf_read_errs_other -- udf_read_reqs -- udf_read_success -- udf_replica_writes -- udf_scan_rec_reqs -- udf_write_err_others -- udf_write_reqs -- udf_write_success -- write_master -- write_prole +***aerospike_namespace***: These are aerospike namespace measurements, which +are available from the aerospike `namespace/` command. -#### Aerospike Statistics [percentage]: + ie, + ``` + telnet localhost 3003 + namespaces + ;;etc. + namespace/ + ... + ``` -Meta: -- units: percent (out of 100) +### Tags: -Measurement names: -- free_pct_disk -- free_pct_memory +All measurements have tags: -# Measurements: -#### Aerospike Namespace Statistics [values]: +- aerospike_host -Meta: -- units: Integer -- tags: `namespace=` +Namespace metrics have tags: -Measurement names: -- available_bin_names -- available_pct -- current_time -- data_used_bytes_memory -- index_used_bytes_memory -- master_objects -- max_evicted_ttl -- max_void_time -- non_expirable_objects -- objects -- prole_objects -- sindex_used_bytes_memory -- total_bytes_disk -- total_bytes_memory -- used_bytes_disk -- used_bytes_memory +- namespace_name -#### Aerospike Namespace Statistics [cumulative]: +### Example Output: -Meta: -- units: Integer -- tags: `namespace=` - -Measurement names: -- evicted_objects -- expired_objects -- set_deleted_objects -- set_evicted_objects - -#### Aerospike Namespace Statistics [percentage]: - -Meta: -- units: percent (out of 100) -- tags: `namespace=` - -Measurement names: -- free_pct_disk -- free_pct_memory +``` +% telegraf --config ~/db/ws/telegraf.conf --input-filter aerospike --test +* Plugin: aerospike, Collection 1 +> aerospike_node,aerospike_host=localhost:3000,host=tars batch_error=0i,batch_index_complete=0i,batch_index_created_buffers=0i,batch_index_destroyed_buffers=0i,batch_index_error=0i,batch_index_huge_buffers=0i,batch_index_initiate=0i,batch_index_queue="0:0,0:0,0:0,0:0",batch_index_timeout=0i,batch_index_unused_buffers=0i,batch_initiate=0i,batch_queue=0i,batch_timeout=0i,client_connections=6i,cluster_integrity=true,cluster_key="8AF422E05281249E",cluster_size=1i,delete_queue=0i,demarshal_error=0i,early_tsvc_batch_sub_error=0i,early_tsvc_client_error=0i,early_tsvc_udf_sub_error=0i,fabric_connections=16i,fabric_msgs_rcvd=0i,fabric_msgs_sent=0i,heartbeat_connections=0i,heartbeat_received_foreign=0i,heartbeat_received_self=0i,info_complete=47i,info_queue=0i,migrate_allowed=true,migrate_partitions_remaining=0i,migrate_progress_recv=0i,migrate_progress_send=0i,node_name="BB9020011AC4202",objects=0i,paxos_principal="BB9020011AC4202",proxy_in_progress=0i,proxy_retry=0i,query_long_running=0i,query_short_running=0i,reaped_fds=0i,record_refs=0i,rw_in_progress=0i,scans_active=0i,sindex_gc_activity_dur=0i,sindex_gc_garbage_cleaned=0i,sindex_gc_garbage_found=0i,sindex_gc_inactivity_dur=0i,sindex_gc_list_creation_time=0i,sindex_gc_list_deletion_time=0i,sindex_gc_locktimedout=0i,sindex_gc_objects_validated=0i,sindex_ucgarbage_found=0i,sub_objects=0i,system_free_mem_pct=92i,system_swapping=false,tsvc_queue=0i,uptime=1457i 1468923222000000000 +> aerospike_namespace,aerospike_host=localhost:3000,host=tars,namespace=test allow_nonxdr_writes=true,allow_xdr_writes=true,available_bin_names=32768i,batch_sub_proxy_complete=0i,batch_sub_proxy_error=0i,batch_sub_proxy_timeout=0i,batch_sub_read_error=0i,batch_sub_read_not_found=0i,batch_sub_read_success=0i,batch_sub_read_timeout=0i,batch_sub_tsvc_error=0i,batch_sub_tsvc_timeout=0i,client_delete_error=0i,client_delete_not_found=0i,client_delete_success=0i,client_delete_timeout=0i,client_lang_delete_success=0i,client_lang_error=0i,client_lang_read_success=0i,client_lang_write_success=0i,client_proxy_complete=0i,client_proxy_error=0i,client_proxy_timeout=0i,client_read_error=0i,client_read_not_found=0i,client_read_success=0i,client_read_timeout=0i,client_tsvc_error=0i,client_tsvc_timeout=0i,client_udf_complete=0i,client_udf_error=0i,client_udf_timeout=0i,client_write_error=0i,client_write_success=0i,client_write_timeout=0i,cold_start_evict_ttl=4294967295i,conflict_resolution_policy="generation",current_time=206619222i,data_in_index=false,default_ttl=432000i,device_available_pct=99i,device_free_pct=100i,device_total_bytes=4294967296i,device_used_bytes=0i,disallow_null_setname=false,enable_benchmarks_batch_sub=false,enable_benchmarks_read=false,enable_benchmarks_storage=false,enable_benchmarks_udf=false,enable_benchmarks_udf_sub=false,enable_benchmarks_write=false,enable_hist_proxy=false,enable_xdr=false,evict_hist_buckets=10000i,evict_tenths_pct=5i,evict_ttl=0i,evicted_objects=0i,expired_objects=0i,fail_generation=0i,fail_key_busy=0i,fail_record_too_big=0i,fail_xdr_forbidden=0i,geo2dsphere_within.earth_radius_meters=6371000i,geo2dsphere_within.level_mod=1i,geo2dsphere_within.max_cells=12i,geo2dsphere_within.max_level=30i,geo2dsphere_within.min_level=1i,geo2dsphere_within.strict=true,geo_region_query_cells=0i,geo_region_query_falsepos=0i,geo_region_query_points=0i,geo_region_query_reqs=0i,high_water_disk_pct=50i,high_water_memory_pct=60i,hwm_breached=false,ldt_enabled=false,ldt_gc_rate=0i,ldt_page_size=8192i,master_objects=0i,master_sub_objects=0i,max_ttl=315360000i,max_void_time=0i,memory_free_pct=100i,memory_size=1073741824i,memory_used_bytes=0i,memory_used_data_bytes=0i,memory_used_index_bytes=0i,memory_used_sindex_bytes=0i,migrate_order=5i,migrate_record_receives=0i,migrate_record_retransmits=0i,migrate_records_skipped=0i,migrate_records_transmitted=0i,migrate_rx_instances=0i,migrate_rx_partitions_active=0i,migrate_rx_partitions_initial=0i,migrate_rx_partitions_remaining=0i,migrate_sleep=1i,migrate_tx_instances=0i,migrate_tx_partitions_active=0i,migrate_tx_partitions_imbalance=0i,migrate_tx_partitions_initial=0i,migrate_tx_partitions_remaining=0i,node_name="BB9020011AC4202",non_expirable_objects=0i,ns_forward_xdr_writes=false,nsup_cycle_duration=0i,nsup_cycle_sleep_pct=0i,objects=0i,prole_objects=0i,prole_sub_objects=0i,query_agg=0i,query_agg_abort=0i,query_agg_avg_rec_count=0i,query_agg_error=0i,query_agg_success=0i,query_fail=0i,query_long_queue_full=0i,query_long_reqs=0i,query_lookup_abort=0i,query_lookup_avg_rec_count=0i,query_lookup_error=0i,query_lookup_success=0i,query_lookups=0i,query_reqs=0i,query_short_queue_full=0i,query_short_reqs=0i,query_udf_bg_failure=0i,query_udf_bg_success=0i,read_consistency_level_override="off",repl_factor=1i,scan_aggr_abort=0i,scan_aggr_complete=0i,scan_aggr_error=0i,scan_basic_abort=0i,scan_basic_complete=0i,scan_basic_error=0i,scan_udf_bg_abort=0i,scan_udf_bg_complete=0i,scan_udf_bg_error=0i,set_deleted_objects=0i,sets_enable_xdr=true,sindex.data_max_memory="ULONG_MAX",sindex.num_partitions=32i,single_bin=false,stop_writes=false,stop_writes_pct=90i,storage_engine="device",storage_engine.cold_start_empty=false,storage_engine.data_in_memory=true,storage_engine.defrag_lwm_pct=50i,storage_engine.defrag_queue_min=0i,storage_engine.defrag_sleep=1000i,storage_engine.defrag_startup_minimum=10i,storage_engine.disable_odirect=false,storage_engine.enable_osync=false,storage_engine.file="/opt/aerospike/data/test.dat",storage_engine.filesize=4294967296i,storage_engine.flush_max_ms=1000i,storage_engine.fsync_max_sec=0i,storage_engine.max_write_cache=67108864i,storage_engine.min_avail_pct=5i,storage_engine.post_write_queue=0i,storage_engine.scheduler_mode="null",storage_engine.write_block_size=1048576i,storage_engine.write_threads=1i,sub_objects=0i,udf_sub_lang_delete_success=0i,udf_sub_lang_error=0i,udf_sub_lang_read_success=0i,udf_sub_lang_write_success=0i,udf_sub_tsvc_error=0i,udf_sub_tsvc_timeout=0i,udf_sub_udf_complete=0i,udf_sub_udf_error=0i,udf_sub_udf_timeout=0i,write_commit_level_override="off",xdr_write_error=0i,xdr_write_success=0i,xdr_write_timeout=0i,{test}_query_hist_track_back=300i,{test}_query_hist_track_slice=10i,{test}_query_hist_track_thresholds="1,8,64",{test}_read_hist_track_back=300i,{test}_read_hist_track_slice=10i,{test}_read_hist_track_thresholds="1,8,64",{test}_udf_hist_track_back=300i,{test}_udf_hist_track_slice=10i,{test}_udf_hist_track_thresholds="1,8,64",{test}_write_hist_track_back=300i,{test}_write_hist_track_slice=10i,{test}_write_hist_track_thresholds="1,8,64" 1468923222000000000 +``` \ No newline at end of file diff --git a/plugins/inputs/aerospike/aerospike.go b/plugins/inputs/aerospike/aerospike.go index cd2ebe25c..eb608723e 100644 --- a/plugins/inputs/aerospike/aerospike.go +++ b/plugins/inputs/aerospike/aerospike.go @@ -1,104 +1,19 @@ package aerospike import ( - "bytes" - "encoding/binary" - "fmt" - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/inputs" "net" "strconv" "strings" "sync" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" + "github.com/influxdata/telegraf/plugins/inputs" + + as "github.com/sparrc/aerospike-client-go" ) -const ( - MSG_HEADER_SIZE = 8 - MSG_TYPE = 1 // Info is 1 - MSG_VERSION = 2 -) - -var ( - STATISTICS_COMMAND = []byte("statistics\n") - NAMESPACES_COMMAND = []byte("namespaces\n") -) - -type aerospikeMessageHeader struct { - Version uint8 - Type uint8 - DataLen [6]byte -} - -type aerospikeMessage struct { - aerospikeMessageHeader - Data []byte -} - -// Taken from aerospike-client-go/types/message.go -func (msg *aerospikeMessage) Serialize() []byte { - msg.DataLen = msgLenToBytes(int64(len(msg.Data))) - buf := bytes.NewBuffer([]byte{}) - binary.Write(buf, binary.BigEndian, msg.aerospikeMessageHeader) - binary.Write(buf, binary.BigEndian, msg.Data[:]) - return buf.Bytes() -} - -type aerospikeInfoCommand struct { - msg *aerospikeMessage -} - -// Taken from aerospike-client-go/info.go -func (nfo *aerospikeInfoCommand) parseMultiResponse() (map[string]string, error) { - responses := make(map[string]string) - offset := int64(0) - begin := int64(0) - - dataLen := int64(len(nfo.msg.Data)) - - // Create reusable StringBuilder for performance. - for offset < dataLen { - b := nfo.msg.Data[offset] - - if b == '\t' { - name := nfo.msg.Data[begin:offset] - offset++ - begin = offset - - // Parse field value. - for offset < dataLen { - if nfo.msg.Data[offset] == '\n' { - break - } - offset++ - } - - if offset > begin { - value := nfo.msg.Data[begin:offset] - responses[string(name)] = string(value) - } else { - responses[string(name)] = "" - } - offset++ - begin = offset - } else if b == '\n' { - if offset > begin { - name := nfo.msg.Data[begin:offset] - responses[string(name)] = "" - } - offset++ - begin = offset - } else { - offset++ - } - } - - if offset > begin { - name := nfo.msg.Data[begin:offset] - responses[string(name)] = "" - } - return responses, nil -} - type Aerospike struct { Servers []string } @@ -115,7 +30,7 @@ func (a *Aerospike) SampleConfig() string { } func (a *Aerospike) Description() string { - return "Read stats from an aerospike server" + return "Read stats from aerospike server(s)" } func (a *Aerospike) Gather(acc telegraf.Accumulator) error { @@ -124,214 +39,101 @@ func (a *Aerospike) Gather(acc telegraf.Accumulator) error { } var wg sync.WaitGroup - - var outerr error - + errChan := errchan.New(len(a.Servers)) + wg.Add(len(a.Servers)) for _, server := range a.Servers { - wg.Add(1) - go func(server string) { + go func(serv string) { defer wg.Done() - outerr = a.gatherServer(server, acc) + errChan.C <- a.gatherServer(serv, acc) }(server) } wg.Wait() - return outerr + return errChan.Error() } -func (a *Aerospike) gatherServer(host string, acc telegraf.Accumulator) error { - aerospikeInfo, err := getMap(STATISTICS_COMMAND, host) +func (a *Aerospike) gatherServer(hostport string, acc telegraf.Accumulator) error { + host, port, err := net.SplitHostPort(hostport) if err != nil { - return fmt.Errorf("Aerospike info failed: %s", err) + return err } - readAerospikeStats(aerospikeInfo, acc, host, "") - namespaces, err := getList(NAMESPACES_COMMAND, host) + + iport, err := strconv.Atoi(port) if err != nil { - return fmt.Errorf("Aerospike namespace list failed: %s", err) + iport = 3000 } - for ix := range namespaces { - nsInfo, err := getMap([]byte("namespace/"+namespaces[ix]+"\n"), host) - if err != nil { - return fmt.Errorf("Aerospike namespace '%s' query failed: %s", namespaces[ix], err) + + c, err := as.NewClient(host, iport) + if err != nil { + return err + } + defer c.Close() + + nodes := c.GetNodes() + for _, n := range nodes { + tags := map[string]string{ + "aerospike_host": hostport, + } + fields := map[string]interface{}{ + "node_name": n.GetName(), + } + stats, err := as.RequestNodeStats(n) + if err != nil { + return err + } + for k, v := range stats { + fields[strings.Replace(k, "-", "_", -1)] = parseValue(v) + } + acc.AddFields("aerospike_node", fields, tags, time.Now()) + + info, err := as.RequestNodeInfo(n, "namespaces") + if err != nil { + return err + } + namespaces := strings.Split(info["namespaces"], ";") + + for _, namespace := range namespaces { + nTags := map[string]string{ + "aerospike_host": hostport, + } + nTags["namespace"] = namespace + nFields := map[string]interface{}{ + "node_name": n.GetName(), + } + info, err := as.RequestNodeInfo(n, "namespace/"+namespace) + if err != nil { + continue + } + stats := strings.Split(info["namespace/"+namespace], ";") + for _, stat := range stats { + parts := strings.Split(stat, "=") + if len(parts) < 2 { + continue + } + nFields[strings.Replace(parts[0], "-", "_", -1)] = parseValue(parts[1]) + } + acc.AddFields("aerospike_namespace", nFields, nTags, time.Now()) } - readAerospikeStats(nsInfo, acc, host, namespaces[ix]) } return nil } -func getMap(key []byte, host string) (map[string]string, error) { - data, err := get(key, host) - if err != nil { - return nil, fmt.Errorf("Failed to get data: %s", err) +func parseValue(v string) interface{} { + if parsed, err := strconv.ParseInt(v, 10, 64); err == nil { + return parsed + } else if parsed, err := strconv.ParseBool(v); err == nil { + return parsed + } else { + return v } - parsed, err := unmarshalMapInfo(data, string(key)) - if err != nil { - return nil, fmt.Errorf("Failed to unmarshal data: %s", err) - } - - return parsed, nil } -func getList(key []byte, host string) ([]string, error) { - data, err := get(key, host) - if err != nil { - return nil, fmt.Errorf("Failed to get data: %s", err) +func copyTags(m map[string]string) map[string]string { + out := make(map[string]string) + for k, v := range m { + out[k] = v } - parsed, err := unmarshalListInfo(data, string(key)) - if err != nil { - return nil, fmt.Errorf("Failed to unmarshal data: %s", err) - } - - return parsed, nil -} - -func get(key []byte, host string) (map[string]string, error) { - var err error - var data map[string]string - - asInfo := &aerospikeInfoCommand{ - msg: &aerospikeMessage{ - aerospikeMessageHeader: aerospikeMessageHeader{ - Version: uint8(MSG_VERSION), - Type: uint8(MSG_TYPE), - DataLen: msgLenToBytes(int64(len(key))), - }, - Data: key, - }, - } - - cmd := asInfo.msg.Serialize() - addr, err := net.ResolveTCPAddr("tcp", host) - if err != nil { - return data, fmt.Errorf("Lookup failed for '%s': %s", host, err) - } - - conn, err := net.DialTCP("tcp", nil, addr) - if err != nil { - return data, fmt.Errorf("Connection failed for '%s': %s", host, err) - } - defer conn.Close() - - _, err = conn.Write(cmd) - if err != nil { - return data, fmt.Errorf("Failed to send to '%s': %s", host, err) - } - - msgHeader := bytes.NewBuffer(make([]byte, MSG_HEADER_SIZE)) - _, err = readLenFromConn(conn, msgHeader.Bytes(), MSG_HEADER_SIZE) - if err != nil { - return data, fmt.Errorf("Failed to read header: %s", err) - } - err = binary.Read(msgHeader, binary.BigEndian, &asInfo.msg.aerospikeMessageHeader) - if err != nil { - return data, fmt.Errorf("Failed to unmarshal header: %s", err) - } - - msgLen := msgLenFromBytes(asInfo.msg.aerospikeMessageHeader.DataLen) - - if int64(len(asInfo.msg.Data)) != msgLen { - asInfo.msg.Data = make([]byte, msgLen) - } - - _, err = readLenFromConn(conn, asInfo.msg.Data, len(asInfo.msg.Data)) - if err != nil { - return data, fmt.Errorf("Failed to read from connection to '%s': %s", host, err) - } - - data, err = asInfo.parseMultiResponse() - if err != nil { - return data, fmt.Errorf("Failed to parse response from '%s': %s", host, err) - } - - return data, err -} - -func readAerospikeStats( - stats map[string]string, - acc telegraf.Accumulator, - host string, - namespace string, -) { - fields := make(map[string]interface{}) - tags := map[string]string{ - "aerospike_host": host, - "namespace": "_service", - } - - if namespace != "" { - tags["namespace"] = namespace - } - for key, value := range stats { - // We are going to ignore all string based keys - val, err := strconv.ParseInt(value, 10, 64) - if err == nil { - if strings.Contains(key, "-") { - key = strings.Replace(key, "-", "_", -1) - } - fields[key] = val - } - } - acc.AddFields("aerospike", fields, tags) -} - -func unmarshalMapInfo(infoMap map[string]string, key string) (map[string]string, error) { - key = strings.TrimSuffix(key, "\n") - res := map[string]string{} - - v, exists := infoMap[key] - if !exists { - return res, fmt.Errorf("Key '%s' missing from info", key) - } - - values := strings.Split(v, ";") - for i := range values { - kv := strings.Split(values[i], "=") - if len(kv) > 1 { - res[kv[0]] = kv[1] - } - } - - return res, nil -} - -func unmarshalListInfo(infoMap map[string]string, key string) ([]string, error) { - key = strings.TrimSuffix(key, "\n") - - v, exists := infoMap[key] - if !exists { - return []string{}, fmt.Errorf("Key '%s' missing from info", key) - } - - values := strings.Split(v, ";") - return values, nil -} - -func readLenFromConn(c net.Conn, buffer []byte, length int) (total int, err error) { - var r int - for total < length { - r, err = c.Read(buffer[total:length]) - total += r - if err != nil { - break - } - } - return -} - -// Taken from aerospike-client-go/types/message.go -func msgLenToBytes(DataLen int64) [6]byte { - b := make([]byte, 8) - binary.BigEndian.PutUint64(b, uint64(DataLen)) - res := [6]byte{} - copy(res[:], b[2:]) - return res -} - -// Taken from aerospike-client-go/types/message.go -func msgLenFromBytes(buf [6]byte) int64 { - nbytes := append([]byte{0, 0}, buf[:]...) - DataLen := binary.BigEndian.Uint64(nbytes) - return int64(DataLen) + return out } func init() { diff --git a/plugins/inputs/aerospike/aerospike_test.go b/plugins/inputs/aerospike/aerospike_test.go index 74b70eb1d..8463432f5 100644 --- a/plugins/inputs/aerospike/aerospike_test.go +++ b/plugins/inputs/aerospike/aerospike_test.go @@ -1,7 +1,6 @@ package aerospike import ( - "reflect" "testing" "github.com/influxdata/telegraf/testutil" @@ -23,96 +22,29 @@ func TestAerospikeStatistics(t *testing.T) { err := a.Gather(&acc) require.NoError(t, err) - // Only use a few of the metrics - asMetrics := []string{ - "transactions", - "stat_write_errs", - "stat_read_reqs", - "stat_write_reqs", - } - - for _, metric := range asMetrics { - assert.True(t, acc.HasIntField("aerospike", metric), metric) - } - + assert.True(t, acc.HasMeasurement("aerospike_node")) + assert.True(t, acc.HasMeasurement("aerospike_namespace")) + assert.True(t, acc.HasIntField("aerospike_node", "batch_error")) } -func TestAerospikeMsgLenFromToBytes(t *testing.T) { - var i int64 = 8 - assert.True(t, i == msgLenFromBytes(msgLenToBytes(i))) -} +func TestAerospikeStatisticsPartialErr(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + a := &Aerospike{ + Servers: []string{ + testutil.GetLocalHost() + ":3000", + testutil.GetLocalHost() + ":9999", + }, + } -func TestReadAerospikeStatsNoNamespace(t *testing.T) { - // Also test for re-writing var acc testutil.Accumulator - stats := map[string]string{ - "stat-write-errs": "12345", - "stat_read_reqs": "12345", - } - readAerospikeStats(stats, &acc, "host1", "") - fields := map[string]interface{}{ - "stat_write_errs": int64(12345), - "stat_read_reqs": int64(12345), - } - tags := map[string]string{ - "aerospike_host": "host1", - "namespace": "_service", - } - acc.AssertContainsTaggedFields(t, "aerospike", fields, tags) -} - -func TestReadAerospikeStatsNamespace(t *testing.T) { - var acc testutil.Accumulator - stats := map[string]string{ - "stat_write_errs": "12345", - "stat_read_reqs": "12345", - } - readAerospikeStats(stats, &acc, "host1", "test") - - fields := map[string]interface{}{ - "stat_write_errs": int64(12345), - "stat_read_reqs": int64(12345), - } - tags := map[string]string{ - "aerospike_host": "host1", - "namespace": "test", - } - acc.AssertContainsTaggedFields(t, "aerospike", fields, tags) -} - -func TestAerospikeUnmarshalList(t *testing.T) { - i := map[string]string{ - "test": "one;two;three", - } - - expected := []string{"one", "two", "three"} - - list, err := unmarshalListInfo(i, "test2") - assert.True(t, err != nil) - - list, err = unmarshalListInfo(i, "test") - assert.True(t, err == nil) - equal := true - for ix := range expected { - if list[ix] != expected[ix] { - equal = false - break - } - } - assert.True(t, equal) -} - -func TestAerospikeUnmarshalMap(t *testing.T) { - i := map[string]string{ - "test": "key1=value1;key2=value2", - } - - expected := map[string]string{ - "key1": "value1", - "key2": "value2", - } - m, err := unmarshalMapInfo(i, "test") - assert.True(t, err == nil) - assert.True(t, reflect.DeepEqual(m, expected)) + err := a.Gather(&acc) + require.Error(t, err) + + assert.True(t, acc.HasMeasurement("aerospike_node")) + assert.True(t, acc.HasMeasurement("aerospike_namespace")) + assert.True(t, acc.HasIntField("aerospike_node", "batch_error")) } diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 1a386d97c..af759aac8 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -6,6 +6,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/bcache" _ "github.com/influxdata/telegraf/plugins/inputs/cassandra" _ "github.com/influxdata/telegraf/plugins/inputs/ceph" + _ "github.com/influxdata/telegraf/plugins/inputs/cgroup" _ "github.com/influxdata/telegraf/plugins/inputs/chrony" _ "github.com/influxdata/telegraf/plugins/inputs/cloudwatch" _ "github.com/influxdata/telegraf/plugins/inputs/conntrack" @@ -19,9 +20,9 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch" _ "github.com/influxdata/telegraf/plugins/inputs/exec" _ "github.com/influxdata/telegraf/plugins/inputs/filestat" - _ "github.com/influxdata/telegraf/plugins/inputs/github_webhooks" _ "github.com/influxdata/telegraf/plugins/inputs/graylog" _ "github.com/influxdata/telegraf/plugins/inputs/haproxy" + _ "github.com/influxdata/telegraf/plugins/inputs/hddtemp" _ "github.com/influxdata/telegraf/plugins/inputs/http_response" _ "github.com/influxdata/telegraf/plugins/inputs/httpjson" _ "github.com/influxdata/telegraf/plugins/inputs/influxdb" @@ -29,6 +30,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/jolokia" _ "github.com/influxdata/telegraf/plugins/inputs/kafka_consumer" _ "github.com/influxdata/telegraf/plugins/inputs/leofs" + _ "github.com/influxdata/telegraf/plugins/inputs/logparser" _ "github.com/influxdata/telegraf/plugins/inputs/lustre2" _ "github.com/influxdata/telegraf/plugins/inputs/mailchimp" _ "github.com/influxdata/telegraf/plugins/inputs/memcached" @@ -40,6 +42,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/net_response" _ "github.com/influxdata/telegraf/plugins/inputs/nginx" _ "github.com/influxdata/telegraf/plugins/inputs/nsq" + _ "github.com/influxdata/telegraf/plugins/inputs/nsq_consumer" _ "github.com/influxdata/telegraf/plugins/inputs/nstat" _ "github.com/influxdata/telegraf/plugins/inputs/ntpq" _ "github.com/influxdata/telegraf/plugins/inputs/passenger" @@ -56,9 +59,8 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/redis" _ "github.com/influxdata/telegraf/plugins/inputs/rethinkdb" _ "github.com/influxdata/telegraf/plugins/inputs/riak" - _ "github.com/influxdata/telegraf/plugins/inputs/rollbar_webhooks" _ "github.com/influxdata/telegraf/plugins/inputs/sensors" - _ "github.com/influxdata/telegraf/plugins/inputs/snmp" + _ "github.com/influxdata/telegraf/plugins/inputs/snmp_legacy" _ "github.com/influxdata/telegraf/plugins/inputs/sqlserver" _ "github.com/influxdata/telegraf/plugins/inputs/statsd" _ "github.com/influxdata/telegraf/plugins/inputs/sysstat" @@ -69,6 +71,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/twemproxy" _ "github.com/influxdata/telegraf/plugins/inputs/udp_listener" _ "github.com/influxdata/telegraf/plugins/inputs/varnish" + _ "github.com/influxdata/telegraf/plugins/inputs/webhooks" _ "github.com/influxdata/telegraf/plugins/inputs/win_perf_counters" _ "github.com/influxdata/telegraf/plugins/inputs/zfs" _ "github.com/influxdata/telegraf/plugins/inputs/zookeeper" diff --git a/plugins/inputs/apache/apache.go b/plugins/inputs/apache/apache.go index dc5dddb9d..be891bb31 100644 --- a/plugins/inputs/apache/apache.go +++ b/plugins/inputs/apache/apache.go @@ -8,7 +8,6 @@ import ( "net/url" "strconv" "strings" - "sync" "time" "github.com/influxdata/telegraf" @@ -38,8 +37,8 @@ func (n *Apache) Gather(acc telegraf.Accumulator) error { n.Urls = []string{"http://localhost/server-status?auto"} } - var wg sync.WaitGroup var outerr error + var errch = make(chan error) for _, u := range n.Urls { addr, err := url.Parse(u) @@ -47,14 +46,17 @@ func (n *Apache) Gather(acc telegraf.Accumulator) error { return fmt.Errorf("Unable to parse address '%s': %s", u, err) } - wg.Add(1) go func(addr *url.URL) { - defer wg.Done() - outerr = n.gatherUrl(addr, acc) + errch <- n.gatherUrl(addr, acc) }(addr) } - wg.Wait() + // Drain channel, waiting for all requests to finish and save last error. + for range n.Urls { + if err := <-errch; err != nil { + outerr = err + } + } return outerr } diff --git a/plugins/inputs/apache/apache_test.go b/plugins/inputs/apache/apache_test.go index 8eed61ca6..2a80b3868 100644 --- a/plugins/inputs/apache/apache_test.go +++ b/plugins/inputs/apache/apache_test.go @@ -36,7 +36,8 @@ func TestHTTPApache(t *testing.T) { defer ts.Close() a := Apache{ - Urls: []string{ts.URL}, + // Fetch it 2 times to catch possible data races. + Urls: []string{ts.URL, ts.URL}, } var acc testutil.Accumulator diff --git a/plugins/inputs/cassandra/cassandra.go b/plugins/inputs/cassandra/cassandra.go index 351232aca..e7edf7153 100644 --- a/plugins/inputs/cassandra/cassandra.go +++ b/plugins/inputs/cassandra/cassandra.go @@ -148,7 +148,7 @@ func (c cassandraMetric) addTagsFields(out map[string]interface{}) { tokens := parseJmxMetricRequest(r.(map[string]interface{})["mbean"].(string)) // Requests with wildcards for keyspace or table names will return nested // maps in the json response - if tokens["type"] == "Table" && (tokens["keyspace"] == "*" || + if (tokens["type"] == "Table" || tokens["type"] == "ColumnFamily") && (tokens["keyspace"] == "*" || tokens["scope"] == "*") { if valuesMap, ok := out["value"]; ok { for k, v := range valuesMap.(map[string]interface{}) { diff --git a/plugins/inputs/ceph/README.md b/plugins/inputs/ceph/README.md index 61b275650..ab358daaa 100644 --- a/plugins/inputs/ceph/README.md +++ b/plugins/inputs/ceph/README.md @@ -1,18 +1,18 @@ # Ceph Storage Input Plugin -Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster. +Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster. The plugin works by scanning the configured SocketDir for OSD and MON socket files. When it finds -a MON socket, it runs **ceph --admin-daemon $file perfcounters_dump**. For OSDs it runs **ceph --admin-daemon $file perf dump** +a MON socket, it runs **ceph --admin-daemon $file perfcounters_dump**. For OSDs it runs **ceph --admin-daemon $file perf dump** The resulting JSON is parsed and grouped into collections, based on top-level key. Top-level keys are used as collection tags, and all sub-keys are flattened. For example: ``` - { - "paxos": { + { + "paxos": { "refresh": 9363435, - "refresh_latency": { + "refresh_latency": { "avgcount": 9363435, "sum": 5378.794002000 } @@ -50,7 +50,7 @@ Would be parsed into the following metrics, all of which would be tagged with co ### Measurements & Fields: -All fields are collected under the **ceph** measurement and stored as float64s. For a full list of fields, see the sample perf dumps in ceph_test.go. +All fields are collected under the **ceph** measurement and stored as float64s. For a full list of fields, see the sample perf dumps in ceph_test.go. ### Tags: @@ -95,7 +95,7 @@ All measurements will have the following tags: - throttle-objecter_ops - throttle-osd_client_bytes - throttle-osd_client_messages - + ### Example Output: diff --git a/plugins/inputs/cgroup/README.md b/plugins/inputs/cgroup/README.md new file mode 100644 index 000000000..feb332dd9 --- /dev/null +++ b/plugins/inputs/cgroup/README.md @@ -0,0 +1,59 @@ +# CGroup Input Plugin For Telegraf Agent + +This input plugin will capture specific statistics per cgroup. + +Following file formats are supported: + +* Single value + +``` +VAL\n +``` + +* New line separated values + +``` +VAL0\n +VAL1\n +``` + +* Space separated values + +``` +VAL0 VAL1 ...\n +``` + +* New line separated key-space-value's + +``` +KEY0 VAL0\n +KEY1 VAL1\n +``` + + +### Tags: + +Measurements don't have any specific tags unless you define them at the telegraf level (defaults). We +used to have the path listed as a tag, but to keep cardinality in check it's easier to move this +value to a field. Thanks @sebito91! + + +### Configuration: + +``` +# [[inputs.cgroup]] + # paths = [ + # "/cgroup/memory", # root cgroup + # "/cgroup/memory/child1", # container cgroup + # "/cgroup/memory/child2/*", # all children cgroups under child2, but not child2 itself + # ] + # files = ["memory.*usage*", "memory.limit_in_bytes"] + +# [[inputs.cgroup]] + # paths = [ + # "/cgroup/cpu", # root cgroup + # "/cgroup/cpu/*", # all container cgroups + # "/cgroup/cpu/*/*", # all children cgroups under each container cgroup + # ] + # files = ["cpuacct.usage", "cpu.cfs_period_us", "cpu.cfs_quota_us"] +``` diff --git a/plugins/inputs/cgroup/cgroup.go b/plugins/inputs/cgroup/cgroup.go new file mode 100644 index 000000000..e38b6a4c1 --- /dev/null +++ b/plugins/inputs/cgroup/cgroup.go @@ -0,0 +1,35 @@ +package cgroup + +import ( + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type CGroup struct { + Paths []string `toml:"paths"` + Files []string `toml:"files"` +} + +var sampleConfig = ` + ## Directories in which to look for files, globs are supported. + # paths = [ + # "/cgroup/memory", + # "/cgroup/memory/child1", + # "/cgroup/memory/child2/*", + # ] + ## cgroup stat fields, as file names, globs are supported. + ## these file names are appended to each path from above. + # files = ["memory.*usage*", "memory.limit_in_bytes"] +` + +func (g *CGroup) SampleConfig() string { + return sampleConfig +} + +func (g *CGroup) Description() string { + return "Read specific statistics per cgroup" +} + +func init() { + inputs.Add("cgroup", func() telegraf.Input { return &CGroup{} }) +} diff --git a/plugins/inputs/cgroup/cgroup_linux.go b/plugins/inputs/cgroup/cgroup_linux.go new file mode 100644 index 000000000..ecaf8126d --- /dev/null +++ b/plugins/inputs/cgroup/cgroup_linux.go @@ -0,0 +1,243 @@ +// +build linux + +package cgroup + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "path/filepath" + "regexp" + "strconv" + + "github.com/influxdata/telegraf" +) + +const metricName = "cgroup" + +func (g *CGroup) Gather(acc telegraf.Accumulator) error { + list := make(chan pathInfo) + go g.generateDirs(list) + + for dir := range list { + if dir.err != nil { + return dir.err + } + if err := g.gatherDir(dir.path, acc); err != nil { + return err + } + } + + return nil +} + +func (g *CGroup) gatherDir(dir string, acc telegraf.Accumulator) error { + fields := make(map[string]interface{}) + + list := make(chan pathInfo) + go g.generateFiles(dir, list) + + for file := range list { + if file.err != nil { + return file.err + } + + raw, err := ioutil.ReadFile(file.path) + if err != nil { + return err + } + if len(raw) == 0 { + continue + } + + fd := fileData{data: raw, path: file.path} + if err := fd.parse(fields); err != nil { + return err + } + } + fields["path"] = dir + + acc.AddFields(metricName, fields, nil) + + return nil +} + +// ====================================================================== + +type pathInfo struct { + path string + err error +} + +func isDir(path string) (bool, error) { + result, err := os.Stat(path) + if err != nil { + return false, err + } + return result.IsDir(), nil +} + +func (g *CGroup) generateDirs(list chan<- pathInfo) { + for _, dir := range g.Paths { + // getting all dirs that match the pattern 'dir' + items, err := filepath.Glob(dir) + if err != nil { + list <- pathInfo{err: err} + return + } + + for _, item := range items { + ok, err := isDir(item) + if err != nil { + list <- pathInfo{err: err} + return + } + // supply only dirs + if ok { + list <- pathInfo{path: item} + } + } + } + close(list) +} + +func (g *CGroup) generateFiles(dir string, list chan<- pathInfo) { + for _, file := range g.Files { + // getting all file paths that match the pattern 'dir + file' + // path.Base make sure that file variable does not contains part of path + items, err := filepath.Glob(path.Join(dir, path.Base(file))) + if err != nil { + list <- pathInfo{err: err} + return + } + + for _, item := range items { + ok, err := isDir(item) + if err != nil { + list <- pathInfo{err: err} + return + } + // supply only files not dirs + if !ok { + list <- pathInfo{path: item} + } + } + } + close(list) +} + +// ====================================================================== + +type fileData struct { + data []byte + path string +} + +func (fd *fileData) format() (*fileFormat, error) { + for _, ff := range fileFormats { + ok, err := ff.match(fd.data) + if err != nil { + return nil, err + } + if ok { + return &ff, nil + } + } + + return nil, fmt.Errorf("%v: unknown file format", fd.path) +} + +func (fd *fileData) parse(fields map[string]interface{}) error { + format, err := fd.format() + if err != nil { + return err + } + + format.parser(filepath.Base(fd.path), fields, fd.data) + return nil +} + +// ====================================================================== + +type fileFormat struct { + name string + pattern string + parser func(measurement string, fields map[string]interface{}, b []byte) +} + +const keyPattern = "[[:alpha:]_]+" +const valuePattern = "[\\d-]+" + +var fileFormats = [...]fileFormat{ + // VAL\n + fileFormat{ + name: "Single value", + pattern: "^" + valuePattern + "\n$", + parser: func(measurement string, fields map[string]interface{}, b []byte) { + re := regexp.MustCompile("^(" + valuePattern + ")\n$") + matches := re.FindAllStringSubmatch(string(b), -1) + fields[measurement] = numberOrString(matches[0][1]) + }, + }, + // VAL0\n + // VAL1\n + // ... + fileFormat{ + name: "New line separated values", + pattern: "^(" + valuePattern + "\n){2,}$", + parser: func(measurement string, fields map[string]interface{}, b []byte) { + re := regexp.MustCompile("(" + valuePattern + ")\n") + matches := re.FindAllStringSubmatch(string(b), -1) + for i, v := range matches { + fields[measurement+"."+strconv.Itoa(i)] = numberOrString(v[1]) + } + }, + }, + // VAL0 VAL1 ...\n + fileFormat{ + name: "Space separated values", + pattern: "^(" + valuePattern + " )+\n$", + parser: func(measurement string, fields map[string]interface{}, b []byte) { + re := regexp.MustCompile("(" + valuePattern + ") ") + matches := re.FindAllStringSubmatch(string(b), -1) + for i, v := range matches { + fields[measurement+"."+strconv.Itoa(i)] = numberOrString(v[1]) + } + }, + }, + // KEY0 VAL0\n + // KEY1 VAL1\n + // ... + fileFormat{ + name: "New line separated key-space-value's", + pattern: "^(" + keyPattern + " " + valuePattern + "\n)+$", + parser: func(measurement string, fields map[string]interface{}, b []byte) { + re := regexp.MustCompile("(" + keyPattern + ") (" + valuePattern + ")\n") + matches := re.FindAllStringSubmatch(string(b), -1) + for _, v := range matches { + fields[measurement+"."+v[1]] = numberOrString(v[2]) + } + }, + }, +} + +func numberOrString(s string) interface{} { + i, err := strconv.Atoi(s) + if err == nil { + return i + } + + return s +} + +func (f fileFormat) match(b []byte) (bool, error) { + ok, err := regexp.Match(f.pattern, b) + if err != nil { + return false, err + } + if ok { + return true, nil + } + return false, nil +} diff --git a/plugins/inputs/cgroup/cgroup_notlinux.go b/plugins/inputs/cgroup/cgroup_notlinux.go new file mode 100644 index 000000000..2bc227410 --- /dev/null +++ b/plugins/inputs/cgroup/cgroup_notlinux.go @@ -0,0 +1,11 @@ +// +build !linux + +package cgroup + +import ( + "github.com/influxdata/telegraf" +) + +func (g *CGroup) Gather(acc telegraf.Accumulator) error { + return nil +} diff --git a/plugins/inputs/cgroup/cgroup_test.go b/plugins/inputs/cgroup/cgroup_test.go new file mode 100644 index 000000000..ff9b8d7a8 --- /dev/null +++ b/plugins/inputs/cgroup/cgroup_test.go @@ -0,0 +1,194 @@ +// +build linux + +package cgroup + +import ( + "fmt" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "reflect" +) + +var cg1 = &CGroup{ + Paths: []string{"testdata/memory"}, + Files: []string{ + "memory.empty", + "memory.max_usage_in_bytes", + "memory.limit_in_bytes", + "memory.stat", + "memory.use_hierarchy", + "notify_on_release", + }, +} + +func assertContainsFields(a *testutil.Accumulator, t *testing.T, measurement string, fieldSet []map[string]interface{}) { + a.Lock() + defer a.Unlock() + + numEquals := 0 + for _, p := range a.Metrics { + if p.Measurement == measurement { + for _, fields := range fieldSet { + if reflect.DeepEqual(fields, p.Fields) { + numEquals++ + } + } + } + } + + if numEquals != len(fieldSet) { + assert.Fail(t, fmt.Sprintf("only %d of %d are equal", numEquals, len(fieldSet))) + } +} + +func TestCgroupStatistics_1(t *testing.T) { + var acc testutil.Accumulator + + err := cg1.Gather(&acc) + require.NoError(t, err) + + fields := map[string]interface{}{ + "memory.stat.cache": 1739362304123123123, + "memory.stat.rss": 1775325184, + "memory.stat.rss_huge": 778043392, + "memory.stat.mapped_file": 421036032, + "memory.stat.dirty": -307200, + "memory.max_usage_in_bytes.0": 0, + "memory.max_usage_in_bytes.1": -1, + "memory.max_usage_in_bytes.2": 2, + "memory.limit_in_bytes": 223372036854771712, + "memory.use_hierarchy": "12-781", + "notify_on_release": 0, + "path": "testdata/memory", + } + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields}) +} + +// ====================================================================== + +var cg2 = &CGroup{ + Paths: []string{"testdata/cpu"}, + Files: []string{"cpuacct.usage_percpu"}, +} + +func TestCgroupStatistics_2(t *testing.T) { + var acc testutil.Accumulator + + err := cg2.Gather(&acc) + require.NoError(t, err) + + fields := map[string]interface{}{ + "cpuacct.usage_percpu.0": -1452543795404, + "cpuacct.usage_percpu.1": 1376681271659, + "cpuacct.usage_percpu.2": 1450950799997, + "cpuacct.usage_percpu.3": -1473113374257, + "path": "testdata/cpu", + } + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields}) +} + +// ====================================================================== + +var cg3 = &CGroup{ + Paths: []string{"testdata/memory/*"}, + Files: []string{"memory.limit_in_bytes"}, +} + +func TestCgroupStatistics_3(t *testing.T) { + var acc testutil.Accumulator + + err := cg3.Gather(&acc) + require.NoError(t, err) + + fields := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_1", + } + + fieldsTwo := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_2", + } + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields, fieldsTwo}) +} + +// ====================================================================== + +var cg4 = &CGroup{ + Paths: []string{"testdata/memory/*/*", "testdata/memory/group_2"}, + Files: []string{"memory.limit_in_bytes"}, +} + +func TestCgroupStatistics_4(t *testing.T) { + var acc testutil.Accumulator + + err := cg4.Gather(&acc) + require.NoError(t, err) + + fields := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_1/group_1_1", + } + + fieldsTwo := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_1/group_1_2", + } + + fieldsThree := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_2", + } + + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields, fieldsTwo, fieldsThree}) +} + +// ====================================================================== + +var cg5 = &CGroup{ + Paths: []string{"testdata/memory/*/group_1_1"}, + Files: []string{"memory.limit_in_bytes"}, +} + +func TestCgroupStatistics_5(t *testing.T) { + var acc testutil.Accumulator + + err := cg5.Gather(&acc) + require.NoError(t, err) + + fields := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_1/group_1_1", + } + + fieldsTwo := map[string]interface{}{ + "memory.limit_in_bytes": 223372036854771712, + "path": "testdata/memory/group_2/group_1_1", + } + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields, fieldsTwo}) +} + +// ====================================================================== + +var cg6 = &CGroup{ + Paths: []string{"testdata/memory"}, + Files: []string{"memory.us*", "*/memory.kmem.*"}, +} + +func TestCgroupStatistics_6(t *testing.T) { + var acc testutil.Accumulator + + err := cg6.Gather(&acc) + require.NoError(t, err) + + fields := map[string]interface{}{ + "memory.usage_in_bytes": 3513667584, + "memory.use_hierarchy": "12-781", + "memory.kmem.limit_in_bytes": 9223372036854771712, + "path": "testdata/memory", + } + assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields}) +} diff --git a/plugins/inputs/cgroup/testdata/blkio/blkio.io_serviced b/plugins/inputs/cgroup/testdata/blkio/blkio.io_serviced new file mode 100644 index 000000000..4b28cf721 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/blkio/blkio.io_serviced @@ -0,0 +1 @@ +Total 0 diff --git a/plugins/inputs/cgroup/testdata/blkio/blkio.throttle.io_serviced b/plugins/inputs/cgroup/testdata/blkio/blkio.throttle.io_serviced new file mode 100644 index 000000000..519480715 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/blkio/blkio.throttle.io_serviced @@ -0,0 +1,131 @@ +11:0 Read 0 +11:0 Write 0 +11:0 Sync 0 +11:0 Async 0 +11:0 Total 0 +8:0 Read 49134 +8:0 Write 216703 +8:0 Sync 177906 +8:0 Async 87931 +8:0 Total 265837 +7:7 Read 0 +7:7 Write 0 +7:7 Sync 0 +7:7 Async 0 +7:7 Total 0 +7:6 Read 0 +7:6 Write 0 +7:6 Sync 0 +7:6 Async 0 +7:6 Total 0 +7:5 Read 0 +7:5 Write 0 +7:5 Sync 0 +7:5 Async 0 +7:5 Total 0 +7:4 Read 0 +7:4 Write 0 +7:4 Sync 0 +7:4 Async 0 +7:4 Total 0 +7:3 Read 0 +7:3 Write 0 +7:3 Sync 0 +7:3 Async 0 +7:3 Total 0 +7:2 Read 0 +7:2 Write 0 +7:2 Sync 0 +7:2 Async 0 +7:2 Total 0 +7:1 Read 0 +7:1 Write 0 +7:1 Sync 0 +7:1 Async 0 +7:1 Total 0 +7:0 Read 0 +7:0 Write 0 +7:0 Sync 0 +7:0 Async 0 +7:0 Total 0 +1:15 Read 3 +1:15 Write 0 +1:15 Sync 0 +1:15 Async 3 +1:15 Total 3 +1:14 Read 3 +1:14 Write 0 +1:14 Sync 0 +1:14 Async 3 +1:14 Total 3 +1:13 Read 3 +1:13 Write 0 +1:13 Sync 0 +1:13 Async 3 +1:13 Total 3 +1:12 Read 3 +1:12 Write 0 +1:12 Sync 0 +1:12 Async 3 +1:12 Total 3 +1:11 Read 3 +1:11 Write 0 +1:11 Sync 0 +1:11 Async 3 +1:11 Total 3 +1:10 Read 3 +1:10 Write 0 +1:10 Sync 0 +1:10 Async 3 +1:10 Total 3 +1:9 Read 3 +1:9 Write 0 +1:9 Sync 0 +1:9 Async 3 +1:9 Total 3 +1:8 Read 3 +1:8 Write 0 +1:8 Sync 0 +1:8 Async 3 +1:8 Total 3 +1:7 Read 3 +1:7 Write 0 +1:7 Sync 0 +1:7 Async 3 +1:7 Total 3 +1:6 Read 3 +1:6 Write 0 +1:6 Sync 0 +1:6 Async 3 +1:6 Total 3 +1:5 Read 3 +1:5 Write 0 +1:5 Sync 0 +1:5 Async 3 +1:5 Total 3 +1:4 Read 3 +1:4 Write 0 +1:4 Sync 0 +1:4 Async 3 +1:4 Total 3 +1:3 Read 3 +1:3 Write 0 +1:3 Sync 0 +1:3 Async 3 +1:3 Total 3 +1:2 Read 3 +1:2 Write 0 +1:2 Sync 0 +1:2 Async 3 +1:2 Total 3 +1:1 Read 3 +1:1 Write 0 +1:1 Sync 0 +1:1 Async 3 +1:1 Total 3 +1:0 Read 3 +1:0 Write 0 +1:0 Sync 0 +1:0 Async 3 +1:0 Total 3 +Total 265885 diff --git a/plugins/inputs/cgroup/testdata/cpu/cpu.cfs_quota_us b/plugins/inputs/cgroup/testdata/cpu/cpu.cfs_quota_us new file mode 100644 index 000000000..3a2e3f498 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/cpu/cpu.cfs_quota_us @@ -0,0 +1 @@ +-1 diff --git a/plugins/inputs/cgroup/testdata/cpu/cpuacct.usage_percpu b/plugins/inputs/cgroup/testdata/cpu/cpuacct.usage_percpu new file mode 100644 index 000000000..36737768a --- /dev/null +++ b/plugins/inputs/cgroup/testdata/cpu/cpuacct.usage_percpu @@ -0,0 +1 @@ +-1452543795404 1376681271659 1450950799997 -1473113374257 diff --git a/plugins/inputs/cgroup/testdata/memory/group_1/group_1_1/memory.limit_in_bytes b/plugins/inputs/cgroup/testdata/memory/group_1/group_1_1/memory.limit_in_bytes new file mode 100644 index 000000000..78169435f --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_1/group_1_1/memory.limit_in_bytes @@ -0,0 +1 @@ +223372036854771712 diff --git a/plugins/inputs/cgroup/testdata/memory/group_1/group_1_1/memory.stat b/plugins/inputs/cgroup/testdata/memory/group_1/group_1_1/memory.stat new file mode 100644 index 000000000..a5493b9b2 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_1/group_1_1/memory.stat @@ -0,0 +1,5 @@ +cache 1739362304123123123 +rss 1775325184 +rss_huge 778043392 +mapped_file 421036032 +dirty -307200 diff --git a/plugins/inputs/cgroup/testdata/memory/group_1/group_1_2/memory.limit_in_bytes b/plugins/inputs/cgroup/testdata/memory/group_1/group_1_2/memory.limit_in_bytes new file mode 100644 index 000000000..78169435f --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_1/group_1_2/memory.limit_in_bytes @@ -0,0 +1 @@ +223372036854771712 diff --git a/plugins/inputs/cgroup/testdata/memory/group_1/group_1_2/memory.stat b/plugins/inputs/cgroup/testdata/memory/group_1/group_1_2/memory.stat new file mode 100644 index 000000000..a5493b9b2 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_1/group_1_2/memory.stat @@ -0,0 +1,5 @@ +cache 1739362304123123123 +rss 1775325184 +rss_huge 778043392 +mapped_file 421036032 +dirty -307200 diff --git a/plugins/inputs/cgroup/testdata/memory/group_1/memory.kmem.limit_in_bytes b/plugins/inputs/cgroup/testdata/memory/group_1/memory.kmem.limit_in_bytes new file mode 100644 index 000000000..564113cfa --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_1/memory.kmem.limit_in_bytes @@ -0,0 +1 @@ +9223372036854771712 diff --git a/plugins/inputs/cgroup/testdata/memory/group_1/memory.kmem.max_usage_in_bytes b/plugins/inputs/cgroup/testdata/memory/group_1/memory.kmem.max_usage_in_bytes new file mode 100644 index 000000000..573541ac9 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_1/memory.kmem.max_usage_in_bytes @@ -0,0 +1 @@ +0 diff --git a/plugins/inputs/cgroup/testdata/memory/group_1/memory.limit_in_bytes b/plugins/inputs/cgroup/testdata/memory/group_1/memory.limit_in_bytes new file mode 100644 index 000000000..78169435f --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_1/memory.limit_in_bytes @@ -0,0 +1 @@ +223372036854771712 diff --git a/plugins/inputs/cgroup/testdata/memory/group_1/memory.stat b/plugins/inputs/cgroup/testdata/memory/group_1/memory.stat new file mode 100644 index 000000000..a5493b9b2 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_1/memory.stat @@ -0,0 +1,5 @@ +cache 1739362304123123123 +rss 1775325184 +rss_huge 778043392 +mapped_file 421036032 +dirty -307200 diff --git a/plugins/inputs/cgroup/testdata/memory/group_2/group_1_1/memory.limit_in_bytes b/plugins/inputs/cgroup/testdata/memory/group_2/group_1_1/memory.limit_in_bytes new file mode 100644 index 000000000..78169435f --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_2/group_1_1/memory.limit_in_bytes @@ -0,0 +1 @@ +223372036854771712 diff --git a/plugins/inputs/cgroup/testdata/memory/group_2/group_1_1/memory.stat b/plugins/inputs/cgroup/testdata/memory/group_2/group_1_1/memory.stat new file mode 100644 index 000000000..a5493b9b2 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_2/group_1_1/memory.stat @@ -0,0 +1,5 @@ +cache 1739362304123123123 +rss 1775325184 +rss_huge 778043392 +mapped_file 421036032 +dirty -307200 diff --git a/plugins/inputs/cgroup/testdata/memory/group_2/memory.limit_in_bytes b/plugins/inputs/cgroup/testdata/memory/group_2/memory.limit_in_bytes new file mode 100644 index 000000000..78169435f --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_2/memory.limit_in_bytes @@ -0,0 +1 @@ +223372036854771712 diff --git a/plugins/inputs/cgroup/testdata/memory/group_2/memory.stat b/plugins/inputs/cgroup/testdata/memory/group_2/memory.stat new file mode 100644 index 000000000..a5493b9b2 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/group_2/memory.stat @@ -0,0 +1,5 @@ +cache 1739362304123123123 +rss 1775325184 +rss_huge 778043392 +mapped_file 421036032 +dirty -307200 diff --git a/plugins/inputs/cgroup/testdata/memory/memory.empty b/plugins/inputs/cgroup/testdata/memory/memory.empty new file mode 100644 index 000000000..e69de29bb diff --git a/plugins/inputs/cgroup/testdata/memory/memory.kmem.limit_in_bytes b/plugins/inputs/cgroup/testdata/memory/memory.kmem.limit_in_bytes new file mode 100644 index 000000000..564113cfa --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/memory.kmem.limit_in_bytes @@ -0,0 +1 @@ +9223372036854771712 diff --git a/plugins/inputs/cgroup/testdata/memory/memory.limit_in_bytes b/plugins/inputs/cgroup/testdata/memory/memory.limit_in_bytes new file mode 100644 index 000000000..78169435f --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/memory.limit_in_bytes @@ -0,0 +1 @@ +223372036854771712 diff --git a/plugins/inputs/cgroup/testdata/memory/memory.max_usage_in_bytes b/plugins/inputs/cgroup/testdata/memory/memory.max_usage_in_bytes new file mode 100644 index 000000000..712313d3d --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/memory.max_usage_in_bytes @@ -0,0 +1,3 @@ +0 +-1 +2 diff --git a/plugins/inputs/cgroup/testdata/memory/memory.numa_stat b/plugins/inputs/cgroup/testdata/memory/memory.numa_stat new file mode 100644 index 000000000..e7c54ebb5 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/memory.numa_stat @@ -0,0 +1,8 @@ +total=858067 N0=858067 +file=406254 N0=406254 +anon=451792 N0=451792 +unevictable=21 N0=21 +hierarchical_total=858067 N0=858067 +hierarchical_file=406254 N0=406254 +hierarchical_anon=451792 N0=451792 +hierarchical_unevictable=21 N0=21 diff --git a/plugins/inputs/cgroup/testdata/memory/memory.stat b/plugins/inputs/cgroup/testdata/memory/memory.stat new file mode 100644 index 000000000..a5493b9b2 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/memory.stat @@ -0,0 +1,5 @@ +cache 1739362304123123123 +rss 1775325184 +rss_huge 778043392 +mapped_file 421036032 +dirty -307200 diff --git a/plugins/inputs/cgroup/testdata/memory/memory.usage_in_bytes b/plugins/inputs/cgroup/testdata/memory/memory.usage_in_bytes new file mode 100644 index 000000000..661151f51 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/memory.usage_in_bytes @@ -0,0 +1 @@ +3513667584 diff --git a/plugins/inputs/cgroup/testdata/memory/memory.use_hierarchy b/plugins/inputs/cgroup/testdata/memory/memory.use_hierarchy new file mode 100644 index 000000000..07cbc8fc6 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/memory.use_hierarchy @@ -0,0 +1 @@ +12-781 diff --git a/plugins/inputs/cgroup/testdata/memory/notify_on_release b/plugins/inputs/cgroup/testdata/memory/notify_on_release new file mode 100644 index 000000000..573541ac9 --- /dev/null +++ b/plugins/inputs/cgroup/testdata/memory/notify_on_release @@ -0,0 +1 @@ +0 diff --git a/plugins/inputs/dns_query/dns_query.go b/plugins/inputs/dns_query/dns_query.go index 2231f2921..1bccc52c0 100644 --- a/plugins/inputs/dns_query/dns_query.go +++ b/plugins/inputs/dns_query/dns_query.go @@ -3,12 +3,14 @@ package dns_query import ( "errors" "fmt" - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/inputs" "github.com/miekg/dns" "net" "strconv" "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" + "github.com/influxdata/telegraf/plugins/inputs" ) type DnsQuery struct { @@ -55,12 +57,12 @@ func (d *DnsQuery) Description() string { } func (d *DnsQuery) Gather(acc telegraf.Accumulator) error { d.setDefaultValues() + + errChan := errchan.New(len(d.Domains) * len(d.Servers)) for _, domain := range d.Domains { for _, server := range d.Servers { dnsQueryTime, err := d.getDnsQueryTime(domain, server) - if err != nil { - return err - } + errChan.C <- err tags := map[string]string{ "server": server, "domain": domain, @@ -72,7 +74,7 @@ func (d *DnsQuery) Gather(acc telegraf.Accumulator) error { } } - return nil + return errChan.Error() } func (d *DnsQuery) setDefaultValues() { diff --git a/plugins/inputs/docker/docker.go b/plugins/inputs/docker/docker.go index 0af7820e1..e3876bd64 100644 --- a/plugins/inputs/docker/docker.go +++ b/plugins/inputs/docker/docker.go @@ -25,6 +25,8 @@ type Docker struct { Endpoint string ContainerNames []string Timeout internal.Duration + PerDevice bool `toml:"perdevice"` + Total bool `toml:"total"` client DockerClient } @@ -58,6 +60,13 @@ var sampleConfig = ` container_names = [] ## Timeout for docker list, info, and stats commands timeout = "5s" + + ## Whether to report for each container per-device blkio (8:0, 8:1...) and + ## network (eth0, eth1, ...) stats or not + perdevice = true + ## Whether to report for each container total blkio and network stats or not + total = false + ` // Description returns input description @@ -207,9 +216,18 @@ func (d *Docker) gatherContainer( cname = strings.TrimPrefix(container.Names[0], "/") } + // the image name sometimes has a version part. + // ie, rabbitmq:3-management + imageParts := strings.Split(container.Image, ":") + imageName := imageParts[0] + imageVersion := "unknown" + if len(imageParts) > 1 { + imageVersion = imageParts[1] + } tags := map[string]string{ - "container_name": cname, - "container_image": container.Image, + "container_name": cname, + "container_image": imageName, + "container_version": imageVersion, } if len(d.ContainerNames) > 0 { if !sliceContains(cname, d.ContainerNames) { @@ -237,7 +255,7 @@ func (d *Docker) gatherContainer( tags[k] = label } - gatherContainerStats(v, acc, tags, container.ID) + gatherContainerStats(v, acc, tags, container.ID, d.PerDevice, d.Total) return nil } @@ -247,6 +265,8 @@ func gatherContainerStats( acc telegraf.Accumulator, tags map[string]string, id string, + perDevice bool, + total bool, ) { now := stat.Read @@ -314,6 +334,7 @@ func gatherContainerStats( acc.AddFields("docker_container_cpu", fields, percputags, now) } + totalNetworkStatMap := make(map[string]interface{}) for network, netstats := range stat.Networks { netfields := map[string]interface{}{ "rx_dropped": netstats.RxDropped, @@ -327,12 +348,35 @@ func gatherContainerStats( "container_id": id, } // Create a new network tag dictionary for the "network" tag - nettags := copyTags(tags) - nettags["network"] = network - acc.AddFields("docker_container_net", netfields, nettags, now) + if perDevice { + nettags := copyTags(tags) + nettags["network"] = network + acc.AddFields("docker_container_net", netfields, nettags, now) + } + if total { + for field, value := range netfields { + if field == "container_id" { + continue + } + _, ok := totalNetworkStatMap[field] + if ok { + totalNetworkStatMap[field] = totalNetworkStatMap[field].(uint64) + value.(uint64) + } else { + totalNetworkStatMap[field] = value + } + } + } } - gatherBlockIOMetrics(stat, acc, tags, now, id) + // totalNetworkStatMap could be empty if container is running with --net=host. + if total && len(totalNetworkStatMap) != 0 { + nettags := copyTags(tags) + nettags["network"] = "total" + totalNetworkStatMap["container_id"] = id + acc.AddFields("docker_container_net", totalNetworkStatMap, nettags, now) + } + + gatherBlockIOMetrics(stat, acc, tags, now, id, perDevice, total) } func calculateMemPercent(stat *types.StatsJSON) float64 { @@ -361,6 +405,8 @@ func gatherBlockIOMetrics( tags map[string]string, now time.Time, id string, + perDevice bool, + total bool, ) { blkioStats := stat.BlkioStats // Make a map of devices to their block io stats @@ -422,11 +468,33 @@ func gatherBlockIOMetrics( deviceStatMap[device]["sectors_recursive"] = metric.Value } + totalStatMap := make(map[string]interface{}) for device, fields := range deviceStatMap { - iotags := copyTags(tags) - iotags["device"] = device fields["container_id"] = id - acc.AddFields("docker_container_blkio", fields, iotags, now) + if perDevice { + iotags := copyTags(tags) + iotags["device"] = device + acc.AddFields("docker_container_blkio", fields, iotags, now) + } + if total { + for field, value := range fields { + if field == "container_id" { + continue + } + _, ok := totalStatMap[field] + if ok { + totalStatMap[field] = totalStatMap[field].(uint64) + value.(uint64) + } else { + totalStatMap[field] = value + } + } + } + } + if total { + totalStatMap["container_id"] = id + iotags := copyTags(tags) + iotags["device"] = "total" + acc.AddFields("docker_container_blkio", totalStatMap, iotags, now) } } @@ -471,7 +539,8 @@ func parseSize(sizeStr string) (int64, error) { func init() { inputs.Add("docker", func() telegraf.Input { return &Docker{ - Timeout: internal.Duration{Duration: time.Second * 5}, + PerDevice: true, + Timeout: internal.Duration{Duration: time.Second * 5}, } }) } diff --git a/plugins/inputs/docker/docker_test.go b/plugins/inputs/docker/docker_test.go index 1574009b8..9f2e97f73 100644 --- a/plugins/inputs/docker/docker_test.go +++ b/plugins/inputs/docker/docker_test.go @@ -24,7 +24,7 @@ func TestDockerGatherContainerStats(t *testing.T) { "container_name": "redis", "container_image": "redis/image", } - gatherContainerStats(stats, &acc, tags, "123456789") + gatherContainerStats(stats, &acc, tags, "123456789", true, true) // test docker_container_net measurement netfields := map[string]interface{}{ @@ -42,6 +42,21 @@ func TestDockerGatherContainerStats(t *testing.T) { nettags["network"] = "eth0" acc.AssertContainsTaggedFields(t, "docker_container_net", netfields, nettags) + netfields = map[string]interface{}{ + "rx_dropped": uint64(6), + "rx_bytes": uint64(8), + "rx_errors": uint64(10), + "tx_packets": uint64(12), + "tx_dropped": uint64(6), + "rx_packets": uint64(8), + "tx_errors": uint64(10), + "tx_bytes": uint64(12), + "container_id": "123456789", + } + nettags = copyTags(tags) + nettags["network"] = "total" + acc.AssertContainsTaggedFields(t, "docker_container_net", netfields, nettags) + // test docker_blkio measurement blkiotags := copyTags(tags) blkiotags["device"] = "6:0" @@ -52,6 +67,15 @@ func TestDockerGatherContainerStats(t *testing.T) { } acc.AssertContainsTaggedFields(t, "docker_container_blkio", blkiofields, blkiotags) + blkiotags = copyTags(tags) + blkiotags["device"] = "total" + blkiofields = map[string]interface{}{ + "io_service_bytes_recursive_read": uint64(100), + "io_serviced_recursive_write": uint64(302), + "container_id": "123456789", + } + acc.AssertContainsTaggedFields(t, "docker_container_blkio", blkiofields, blkiotags) + // test docker_container_mem measurement memfields := map[string]interface{}{ "max_usage": uint64(1001), @@ -186,6 +210,17 @@ func testStats() *types.StatsJSON { TxBytes: 4, } + stats.Networks["eth1"] = types.NetworkStats{ + RxDropped: 5, + RxBytes: 6, + RxErrors: 7, + TxPackets: 8, + TxDropped: 5, + RxPackets: 6, + TxErrors: 7, + TxBytes: 8, + } + sbr := types.BlkioStatEntry{ Major: 6, Minor: 0, @@ -198,11 +233,19 @@ func testStats() *types.StatsJSON { Op: "write", Value: 101, } + sr2 := types.BlkioStatEntry{ + Major: 6, + Minor: 1, + Op: "write", + Value: 201, + } stats.BlkioStats.IoServiceBytesRecursive = append( stats.BlkioStats.IoServiceBytesRecursive, sbr) stats.BlkioStats.IoServicedRecursive = append( stats.BlkioStats.IoServicedRecursive, sr) + stats.BlkioStats.IoServicedRecursive = append( + stats.BlkioStats.IoServicedRecursive, sr2) return stats } @@ -378,9 +421,10 @@ func TestDockerGatherInfo(t *testing.T) { "container_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", }, map[string]string{ - "container_name": "etcd2", - "container_image": "quay.io/coreos/etcd:v2.2.2", - "cpu": "cpu3", + "container_name": "etcd2", + "container_image": "quay.io/coreos/etcd", + "cpu": "cpu3", + "container_version": "v2.2.2", }, ) acc.AssertContainsTaggedFields(t, @@ -423,8 +467,9 @@ func TestDockerGatherInfo(t *testing.T) { "container_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", }, map[string]string{ - "container_name": "etcd2", - "container_image": "quay.io/coreos/etcd:v2.2.2", + "container_name": "etcd2", + "container_image": "quay.io/coreos/etcd", + "container_version": "v2.2.2", }, ) diff --git a/plugins/inputs/dovecot/dovecot.go b/plugins/inputs/dovecot/dovecot.go index 0347016d1..56290e759 100644 --- a/plugins/inputs/dovecot/dovecot.go +++ b/plugins/inputs/dovecot/dovecot.go @@ -12,6 +12,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -51,7 +52,6 @@ const defaultPort = "24242" // Reads stats from all configured servers. func (d *Dovecot) Gather(acc telegraf.Accumulator) error { - if !validQuery[d.Type] { return fmt.Errorf("Error: %s is not a valid query type\n", d.Type) @@ -61,31 +61,27 @@ func (d *Dovecot) Gather(acc telegraf.Accumulator) error { d.Servers = append(d.Servers, "127.0.0.1:24242") } - var wg sync.WaitGroup - - var outerr error - if len(d.Filters) <= 0 { d.Filters = append(d.Filters, "") } - for _, serv := range d.Servers { + var wg sync.WaitGroup + errChan := errchan.New(len(d.Servers) * len(d.Filters)) + for _, server := range d.Servers { for _, filter := range d.Filters { wg.Add(1) - go func(serv string, filter string) { + go func(s string, f string) { defer wg.Done() - outerr = d.gatherServer(serv, acc, d.Type, filter) - }(serv, filter) + errChan.C <- d.gatherServer(s, acc, d.Type, f) + }(server, filter) } } wg.Wait() - - return outerr + return errChan.Error() } func (d *Dovecot) gatherServer(addr string, acc telegraf.Accumulator, qtype string, filter string) error { - _, _, err := net.SplitHostPort(addr) if err != nil { return fmt.Errorf("Error: %s on url %s\n", err, addr) diff --git a/plugins/inputs/elasticsearch/README.md b/plugins/inputs/elasticsearch/README.md index 88f08bd93..526bc3f39 100644 --- a/plugins/inputs/elasticsearch/README.md +++ b/plugins/inputs/elasticsearch/README.md @@ -11,6 +11,13 @@ and optionally [cluster](https://www.elastic.co/guide/en/elasticsearch/reference servers = ["http://localhost:9200"] local = true cluster_health = true + + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false ``` ### Measurements & Fields: diff --git a/plugins/inputs/elasticsearch/elasticsearch.go b/plugins/inputs/elasticsearch/elasticsearch.go index 3839f6df6..ef0a4c199 100644 --- a/plugins/inputs/elasticsearch/elasticsearch.go +++ b/plugins/inputs/elasticsearch/elasticsearch.go @@ -8,6 +8,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" jsonparser "github.com/influxdata/telegraf/plugins/parsers/json" @@ -67,25 +68,31 @@ const sampleConfig = ` ## set cluster_health to true when you want to also obtain cluster level stats cluster_health = false + + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false ` // Elasticsearch is a plugin to read stats from one or many Elasticsearch // servers. type Elasticsearch struct { - Local bool - Servers []string - ClusterHealth bool - client *http.Client + Local bool + Servers []string + ClusterHealth bool + SSLCA string `toml:"ssl_ca"` // Path to CA file + SSLCert string `toml:"ssl_cert"` // Path to host cert file + SSLKey string `toml:"ssl_key"` // Path to cert key file + InsecureSkipVerify bool // Use SSL but skip chain & host verification + client *http.Client } // NewElasticsearch return a new instance of Elasticsearch func NewElasticsearch() *Elasticsearch { - tr := &http.Transport{ResponseHeaderTimeout: time.Duration(3 * time.Second)} - client := &http.Client{ - Transport: tr, - Timeout: time.Duration(4 * time.Second), - } - return &Elasticsearch{client: client} + return &Elasticsearch{} } // SampleConfig returns sample configuration for this plugin. @@ -101,6 +108,15 @@ func (e *Elasticsearch) Description() string { // Gather reads the stats from Elasticsearch and writes it to the // Accumulator. func (e *Elasticsearch) Gather(acc telegraf.Accumulator) error { + if e.client == nil { + client, err := e.createHttpClient() + + if err != nil { + return err + } + e.client = client + } + errChan := errchan.New(len(e.Servers)) var wg sync.WaitGroup wg.Add(len(e.Servers)) @@ -128,6 +144,23 @@ func (e *Elasticsearch) Gather(acc telegraf.Accumulator) error { return errChan.Error() } +func (e *Elasticsearch) createHttpClient() (*http.Client, error) { + tlsCfg, err := internal.GetTLSConfig(e.SSLCert, e.SSLKey, e.SSLCA, e.InsecureSkipVerify) + if err != nil { + return nil, err + } + tr := &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), + TLSClientConfig: tlsCfg, + } + client := &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), + } + + return client, nil +} + func (e *Elasticsearch) gatherNodeStats(url string, acc telegraf.Accumulator) error { nodeStats := &struct { ClusterName string `json:"cluster_name"` diff --git a/plugins/inputs/elasticsearch/elasticsearch_test.go b/plugins/inputs/elasticsearch/elasticsearch_test.go index f29857507..760ac921b 100644 --- a/plugins/inputs/elasticsearch/elasticsearch_test.go +++ b/plugins/inputs/elasticsearch/elasticsearch_test.go @@ -38,7 +38,7 @@ func (t *transportMock) CancelRequest(_ *http.Request) { } func TestElasticsearch(t *testing.T) { - es := NewElasticsearch() + es := newElasticsearchWithClient() es.Servers = []string{"http://example.com:9200"} es.client.Transport = newTransportMock(http.StatusOK, statsResponse) @@ -67,7 +67,7 @@ func TestElasticsearch(t *testing.T) { } func TestGatherClusterStats(t *testing.T) { - es := NewElasticsearch() + es := newElasticsearchWithClient() es.Servers = []string{"http://example.com:9200"} es.ClusterHealth = true es.client.Transport = newTransportMock(http.StatusOK, clusterResponse) @@ -87,3 +87,9 @@ func TestGatherClusterStats(t *testing.T) { v2IndexExpected, map[string]string{"index": "v2"}) } + +func newElasticsearchWithClient() *Elasticsearch { + es := NewElasticsearch() + es.client = &http.Client{} + return es +} diff --git a/plugins/inputs/exec/exec.go b/plugins/inputs/exec/exec.go index 415831960..060a4f308 100644 --- a/plugins/inputs/exec/exec.go +++ b/plugins/inputs/exec/exec.go @@ -48,8 +48,6 @@ type Exec struct { parser parsers.Parser - wg sync.WaitGroup - runner Runner errChan chan error } @@ -119,8 +117,8 @@ func (c CommandRunner) Run( return out.Bytes(), nil } -func (e *Exec) ProcessCommand(command string, acc telegraf.Accumulator) { - defer e.wg.Done() +func (e *Exec) ProcessCommand(command string, acc telegraf.Accumulator, wg *sync.WaitGroup) { + defer wg.Done() out, err := e.runner.Run(e, command, acc) if err != nil { @@ -151,6 +149,7 @@ func (e *Exec) SetParser(parser parsers.Parser) { } func (e *Exec) Gather(acc telegraf.Accumulator) error { + var wg sync.WaitGroup // Legacy single command support if e.Command != "" { e.Commands = append(e.Commands, e.Command) @@ -177,8 +176,12 @@ func (e *Exec) Gather(acc telegraf.Accumulator) error { // There were matches, so we'll append each match together with // the arguments to the commands slice for _, match := range matches { - commands = append( - commands, strings.Join([]string{match, cmdAndArgs[1]}, " ")) + if len(cmdAndArgs) == 1 { + commands = append(commands, match) + } else { + commands = append(commands, + strings.Join([]string{match, cmdAndArgs[1]}, " ")) + } } } } @@ -186,11 +189,11 @@ func (e *Exec) Gather(acc telegraf.Accumulator) error { errChan := errchan.New(len(commands)) e.errChan = errChan.C - e.wg.Add(len(commands)) + wg.Add(len(commands)) for _, command := range commands { - go e.ProcessCommand(command, acc) + go e.ProcessCommand(command, acc, &wg) } - e.wg.Wait() + wg.Wait() return errChan.Error() } diff --git a/plugins/inputs/haproxy/haproxy.go b/plugins/inputs/haproxy/haproxy.go index 0a0b3da82..9529bad3f 100644 --- a/plugins/inputs/haproxy/haproxy.go +++ b/plugins/inputs/haproxy/haproxy.go @@ -92,9 +92,11 @@ type haproxy struct { var sampleConfig = ` ## An array of address to gather stats about. Specify an ip on hostname ## with optional port. ie localhost, 10.10.3.33:1936, etc. - - ## If no servers are specified, then default to 127.0.0.1:1936 - servers = ["http://myhaproxy.com:1936", "http://anotherhaproxy.com:1936"] + ## Make sure you specify the complete path to the stats endpoint + ## ie 10.10.3.33:1936/haproxy?stats + # + ## If no servers are specified, then default to 127.0.0.1:1936/haproxy?stats + servers = ["http://myhaproxy.com:1936/haproxy?stats"] ## Or you can also use local socket ## servers = ["socket:/run/haproxy/admin.sock"] ` @@ -111,7 +113,7 @@ func (r *haproxy) Description() string { // Returns one of the errors encountered while gather stats (if any). func (g *haproxy) Gather(acc telegraf.Accumulator) error { if len(g.Servers) == 0 { - return g.gatherServer("http://127.0.0.1:1936", acc) + return g.gatherServer("http://127.0.0.1:1936/haproxy?stats", acc) } var wg sync.WaitGroup @@ -167,12 +169,16 @@ func (g *haproxy) gatherServer(addr string, acc telegraf.Accumulator) error { g.client = client } + if !strings.HasSuffix(addr, ";csv") { + addr += "/;csv" + } + u, err := url.Parse(addr) if err != nil { return fmt.Errorf("Unable parse server address '%s': %s", addr, err) } - req, err := http.NewRequest("GET", fmt.Sprintf("%s://%s%s/;csv", u.Scheme, u.Host, u.Path), nil) + req, err := http.NewRequest("GET", addr, nil) if u.User != nil { p, _ := u.User.Password() req.SetBasicAuth(u.User.Username(), p) @@ -184,7 +190,7 @@ func (g *haproxy) gatherServer(addr string, acc telegraf.Accumulator) error { } if res.StatusCode != 200 { - return fmt.Errorf("Unable to get valid stat result from '%s': %s", addr, err) + return fmt.Errorf("Unable to get valid stat result from '%s', http response code : %d", addr, res.StatusCode) } return importCsvResult(res.Body, acc, u.Host) diff --git a/plugins/inputs/haproxy/haproxy_test.go b/plugins/inputs/haproxy/haproxy_test.go index f9057e0cd..befcabd97 100644 --- a/plugins/inputs/haproxy/haproxy_test.go +++ b/plugins/inputs/haproxy/haproxy_test.go @@ -243,7 +243,7 @@ func TestHaproxyDefaultGetFromLocalhost(t *testing.T) { err := r.Gather(&acc) require.Error(t, err) - assert.Contains(t, err.Error(), "127.0.0.1:1936/;csv") + assert.Contains(t, err.Error(), "127.0.0.1:1936/haproxy?stats/;csv") } const csvOutputSample = ` diff --git a/plugins/inputs/hddtemp/README.md b/plugins/inputs/hddtemp/README.md new file mode 100644 index 000000000..d87ae625d --- /dev/null +++ b/plugins/inputs/hddtemp/README.md @@ -0,0 +1,22 @@ +# Hddtemp Input Plugin + +This plugin reads data from hddtemp daemon + +## Requirements + +Hddtemp should be installed and its daemon running + +## Configuration + +``` +[[inputs.hddtemp]] +## By default, telegraf gathers temps data from all disks detected by the +## hddtemp. +## +## Only collect temps from the selected disks. +## +## A * as the device name will return the temperature values of all disks. +## +# address = "127.0.0.1:7634" +# devices = ["sda", "*"] +``` diff --git a/plugins/inputs/hddtemp/go-hddtemp/LICENSE b/plugins/inputs/hddtemp/go-hddtemp/LICENSE new file mode 100644 index 000000000..d5aed19c6 --- /dev/null +++ b/plugins/inputs/hddtemp/go-hddtemp/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Mendelson Gusmão + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/plugins/inputs/hddtemp/go-hddtemp/hddtemp.go b/plugins/inputs/hddtemp/go-hddtemp/hddtemp.go new file mode 100644 index 000000000..d7d650b79 --- /dev/null +++ b/plugins/inputs/hddtemp/go-hddtemp/hddtemp.go @@ -0,0 +1,61 @@ +package hddtemp + +import ( + "bytes" + "io" + "net" + "strconv" + "strings" +) + +type disk struct { + DeviceName string + Model string + Temperature int32 + Unit string + Status string +} + +func Fetch(address string) ([]disk, error) { + var ( + err error + conn net.Conn + buffer bytes.Buffer + disks []disk + ) + + if conn, err = net.Dial("tcp", address); err != nil { + return nil, err + } + + if _, err = io.Copy(&buffer, conn); err != nil { + return nil, err + } + + fields := strings.Split(buffer.String(), "|") + + for index := 0; index < len(fields)/5; index++ { + status := "" + offset := index * 5 + device := fields[offset+1] + device = device[strings.LastIndex(device, "/")+1:] + + temperatureField := fields[offset+3] + temperature, err := strconv.ParseInt(temperatureField, 10, 32) + + if err != nil { + temperature = 0 + status = temperatureField + } + + disks = append(disks, disk{ + DeviceName: device, + Model: fields[offset+2], + Temperature: int32(temperature), + Unit: fields[offset+4], + Status: status, + }) + } + + return disks, nil +} diff --git a/plugins/inputs/hddtemp/go-hddtemp/hddtemp_test.go b/plugins/inputs/hddtemp/go-hddtemp/hddtemp_test.go new file mode 100644 index 000000000..858e91a90 --- /dev/null +++ b/plugins/inputs/hddtemp/go-hddtemp/hddtemp_test.go @@ -0,0 +1,116 @@ +package hddtemp + +import ( + "net" + "reflect" + "testing" +) + +func TestFetch(t *testing.T) { + l := serve(t, []byte("|/dev/sda|foobar|36|C|")) + defer l.Close() + + disks, err := Fetch(l.Addr().String()) + + if err != nil { + t.Error("expecting err to be nil") + } + + expected := []disk{ + { + DeviceName: "sda", + Model: "foobar", + Temperature: 36, + Unit: "C", + }, + } + + if !reflect.DeepEqual(expected, disks) { + t.Error("disks' slice is different from expected") + } +} + +func TestFetchWrongAddress(t *testing.T) { + _, err := Fetch("127.0.0.1:1") + + if err == nil { + t.Error("expecting err to be non-nil") + } +} + +func TestFetchStatus(t *testing.T) { + l := serve(t, []byte("|/dev/sda|foobar|SLP|C|")) + defer l.Close() + + disks, err := Fetch(l.Addr().String()) + + if err != nil { + t.Error("expecting err to be nil") + } + + expected := []disk{ + { + DeviceName: "sda", + Model: "foobar", + Temperature: 0, + Unit: "C", + Status: "SLP", + }, + } + + if !reflect.DeepEqual(expected, disks) { + t.Error("disks' slice is different from expected") + } +} + +func TestFetchTwoDisks(t *testing.T) { + l := serve(t, []byte("|/dev/hda|ST380011A|46|C||/dev/hdd|ST340016A|SLP|*|")) + defer l.Close() + + disks, err := Fetch(l.Addr().String()) + + if err != nil { + t.Error("expecting err to be nil") + } + + expected := []disk{ + { + DeviceName: "hda", + Model: "ST380011A", + Temperature: 46, + Unit: "C", + }, + { + DeviceName: "hdd", + Model: "ST340016A", + Temperature: 0, + Unit: "*", + Status: "SLP", + }, + } + + if !reflect.DeepEqual(expected, disks) { + t.Error("disks' slice is different from expected") + } +} + +func serve(t *testing.T, data []byte) net.Listener { + l, err := net.Listen("tcp", "127.0.0.1:0") + + if err != nil { + t.Fatal(err) + } + + go func(t *testing.T) { + conn, err := l.Accept() + + if err != nil { + t.Fatal(err) + } + + conn.Write(data) + conn.Close() + }(t) + + return l +} diff --git a/plugins/inputs/hddtemp/hddtemp.go b/plugins/inputs/hddtemp/hddtemp.go new file mode 100644 index 000000000..c1e01c3c6 --- /dev/null +++ b/plugins/inputs/hddtemp/hddtemp.go @@ -0,0 +1,74 @@ +// +build linux + +package hddtemp + +import ( + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" + gohddtemp "github.com/influxdata/telegraf/plugins/inputs/hddtemp/go-hddtemp" +) + +const defaultAddress = "127.0.0.1:7634" + +type HDDTemp struct { + Address string + Devices []string +} + +func (_ *HDDTemp) Description() string { + return "Monitor disks' temperatures using hddtemp" +} + +var hddtempSampleConfig = ` + ## By default, telegraf gathers temps data from all disks detected by the + ## hddtemp. + ## + ## Only collect temps from the selected disks. + ## + ## A * as the device name will return the temperature values of all disks. + ## + # address = "127.0.0.1:7634" + # devices = ["sda", "*"] +` + +func (_ *HDDTemp) SampleConfig() string { + return hddtempSampleConfig +} + +func (h *HDDTemp) Gather(acc telegraf.Accumulator) error { + disks, err := gohddtemp.Fetch(h.Address) + + if err != nil { + return err + } + + for _, disk := range disks { + for _, chosenDevice := range h.Devices { + if chosenDevice == "*" || chosenDevice == disk.DeviceName { + tags := map[string]string{ + "device": disk.DeviceName, + "model": disk.Model, + "unit": disk.Unit, + "status": disk.Status, + } + + fields := map[string]interface{}{ + disk.DeviceName: disk.Temperature, + } + + acc.AddFields("hddtemp", fields, tags) + } + } + } + + return nil +} + +func init() { + inputs.Add("hddtemp", func() telegraf.Input { + return &HDDTemp{ + Address: defaultAddress, + Devices: []string{"*"}, + } + }) +} diff --git a/plugins/inputs/hddtemp/hddtemp_nocompile.go b/plugins/inputs/hddtemp/hddtemp_nocompile.go new file mode 100644 index 000000000..0c5801670 --- /dev/null +++ b/plugins/inputs/hddtemp/hddtemp_nocompile.go @@ -0,0 +1,3 @@ +// +build !linux + +package hddtemp diff --git a/plugins/inputs/jolokia/jolokia.go b/plugins/inputs/jolokia/jolokia.go index 244338559..53bb65fd0 100644 --- a/plugins/inputs/jolokia/jolokia.go +++ b/plugins/inputs/jolokia/jolokia.go @@ -249,7 +249,14 @@ func (j *Jolokia) Gather(acc telegraf.Accumulator) error { switch t := values.(type) { case map[string]interface{}: for k, v := range t { - fields[measurement+"_"+k] = v + switch t2 := v.(type) { + case map[string]interface{}: + for k2, v2 := range t2 { + fields[measurement+"_"+k+"_"+k2] = v2 + } + case interface{}: + fields[measurement+"_"+k] = t2 + } } case interface{}: fields[measurement] = t diff --git a/plugins/inputs/kafka_consumer/README.md b/plugins/inputs/kafka_consumer/README.md index f5f6a359e..afdb51e32 100644 --- a/plugins/inputs/kafka_consumer/README.md +++ b/plugins/inputs/kafka_consumer/README.md @@ -22,7 +22,7 @@ from the same topic in parallel. ## Offset (must be either "oldest" or "newest") offset = "oldest" - ## Data format to consume. + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: @@ -32,11 +32,5 @@ from the same topic in parallel. ## Testing -Running integration tests requires running Zookeeper & Kafka. The following -commands assume you're on OS X & using [boot2docker](http://boot2docker.io/) or docker-machine through [Docker Toolbox](https://www.docker.com/docker-toolbox). - -To start Kafka & Zookeeper: - -``` -docker run -d -p 2181:2181 -p 9092:9092 --env ADVERTISED_HOST=`boot2docker ip || docker-machine ip ` --env ADVERTISED_PORT=9092 spotify/kafka -``` +Running integration tests requires running Zookeeper & Kafka. See Makefile +for kafka container command. diff --git a/plugins/inputs/kafka_consumer/kafka_consumer.go b/plugins/inputs/kafka_consumer/kafka_consumer.go index a2cda43d6..5600d82a4 100644 --- a/plugins/inputs/kafka_consumer/kafka_consumer.go +++ b/plugins/inputs/kafka_consumer/kafka_consumer.go @@ -50,7 +50,7 @@ var sampleConfig = ` ## an array of Zookeeper connection strings zookeeper_peers = ["localhost:2181"] ## Zookeeper Chroot - zookeeper_chroot = "/" + zookeeper_chroot = "" ## the name of the consumer group consumer_group = "telegraf_metrics_consumers" ## Offset (must be either "oldest" or "newest") diff --git a/plugins/inputs/logparser/README.md b/plugins/inputs/logparser/README.md new file mode 100644 index 000000000..1affcd811 --- /dev/null +++ b/plugins/inputs/logparser/README.md @@ -0,0 +1,95 @@ +# logparser Input Plugin + +The logparser plugin streams and parses the given logfiles. Currently it only +has the capability of parsing "grok" patterns from logfiles, which also supports +regex patterns. + +### Configuration: + +```toml +[[inputs.logparser]] + ## Log files to parse. + ## These accept standard unix glob matching rules, but with the addition of + ## ** as a "super asterisk". ie: + ## /var/log/**.log -> recursively find all .log files in /var/log + ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log + ## /var/log/apache.log -> only tail the apache log file + files = ["/var/log/apache/access.log"] + ## Read file from beginning. + from_beginning = false + + ## Parse logstash-style "grok" patterns: + ## Telegraf built-in parsing patterns: https://goo.gl/dkay10 + [inputs.logparser.grok] + ## This is a list of patterns to check the given log file(s) for. + ## Note that adding patterns here increases processing time. The most + ## efficient configuration is to have one pattern per logparser. + ## Other common built-in patterns are: + ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) + ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) + patterns = ["%{COMBINED_LOG_FORMAT}"] + ## Name of the outputted measurement name. + measurement = "apache_access_log" + ## Full path(s) to custom pattern files. + custom_pattern_files = [] + ## Custom patterns can also be defined here. Put one pattern per line. + custom_patterns = ''' + ''' +``` + +## Grok Parser + +The grok parser uses a slightly modified version of logstash "grok" patterns, +with the format `%{[:][:]}` + + +Telegraf has many of it's own +[built-in patterns](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/logparser/grok/patterns/influx-patterns), +as well as supporting +[logstash's builtin patterns](https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns). + + +The best way to get acquainted with grok patterns is to read the logstash docs, +which are available here: + https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html + + +If you need help building patterns to match your logs, +you will find the http://grokdebug.herokuapp.com application quite useful! + + +By default all named captures are converted into string fields. +Modifiers can be used to convert captures to other types or tags. +Timestamp modifiers can be used to convert captures to the timestamp of the + parsed metric. + + +- Available modifiers: + - string (default if nothing is specified) + - int + - float + - duration (ie, 5.23ms gets converted to int nanoseconds) + - tag (converts the field into a tag) + - drop (drops the field completely) +- Timestamp modifiers: + - ts (This will auto-learn the timestamp format) + - ts-ansic ("Mon Jan _2 15:04:05 2006") + - ts-unix ("Mon Jan _2 15:04:05 MST 2006") + - ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") + - ts-rfc822 ("02 Jan 06 15:04 MST") + - ts-rfc822z ("02 Jan 06 15:04 -0700") + - ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") + - ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") + - ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") + - ts-rfc3339 ("2006-01-02T15:04:05Z07:00") + - ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") + - ts-httpd ("02/Jan/2006:15:04:05 -0700") + - ts-epoch (seconds since unix epoch) + - ts-epochnano (nanoseconds since unix epoch) + - ts-"CUSTOM" + + +CUSTOM time layouts must be within quotes and be the representation of the +"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006` +See https://golang.org/pkg/time/#Parse for more details. + diff --git a/plugins/inputs/logparser/grok/grok.go b/plugins/inputs/logparser/grok/grok.go new file mode 100644 index 000000000..70b759826 --- /dev/null +++ b/plugins/inputs/logparser/grok/grok.go @@ -0,0 +1,440 @@ +package grok + +import ( + "bufio" + "fmt" + "log" + "os" + "regexp" + "strconv" + "strings" + "time" + + "github.com/vjeantet/grok" + + "github.com/influxdata/telegraf" +) + +var timeLayouts = map[string]string{ + "ts-ansic": "Mon Jan _2 15:04:05 2006", + "ts-unix": "Mon Jan _2 15:04:05 MST 2006", + "ts-ruby": "Mon Jan 02 15:04:05 -0700 2006", + "ts-rfc822": "02 Jan 06 15:04 MST", + "ts-rfc822z": "02 Jan 06 15:04 -0700", // RFC822 with numeric zone + "ts-rfc850": "Monday, 02-Jan-06 15:04:05 MST", + "ts-rfc1123": "Mon, 02 Jan 2006 15:04:05 MST", + "ts-rfc1123z": "Mon, 02 Jan 2006 15:04:05 -0700", // RFC1123 with numeric zone + "ts-rfc3339": "2006-01-02T15:04:05Z07:00", + "ts-rfc3339nano": "2006-01-02T15:04:05.999999999Z07:00", + "ts-httpd": "02/Jan/2006:15:04:05 -0700", + // These three are not exactly "layouts", but they are special cases that + // will get handled in the ParseLine function. + "ts-epoch": "EPOCH", + "ts-epochnano": "EPOCH_NANO", + "ts": "GENERIC_TIMESTAMP", // try parsing all known timestamp layouts. +} + +const ( + INT = "int" + TAG = "tag" + FLOAT = "float" + STRING = "string" + DURATION = "duration" + DROP = "drop" + EPOCH = "EPOCH" + EPOCH_NANO = "EPOCH_NANO" + GENERIC_TIMESTAMP = "GENERIC_TIMESTAMP" +) + +var ( + // matches named captures that contain a modifier. + // ie, + // %{NUMBER:bytes:int} + // %{IPORHOST:clientip:tag} + // %{HTTPDATE:ts1:ts-http} + // %{HTTPDATE:ts2:ts-"02 Jan 06 15:04"} + modifierRe = regexp.MustCompile(`%{\w+:(\w+):(ts-".+"|t?s?-?\w+)}`) + // matches a plain pattern name. ie, %{NUMBER} + patternOnlyRe = regexp.MustCompile(`%{(\w+)}`) +) + +type Parser struct { + Patterns []string + // namedPatterns is a list of internally-assigned names to the patterns + // specified by the user in Patterns. + // They will look like: + // GROK_INTERNAL_PATTERN_0, GROK_INTERNAL_PATTERN_1, etc. + namedPatterns []string + CustomPatterns string + CustomPatternFiles []string + Measurement string + + // typeMap is a map of patterns -> capture name -> modifier, + // ie, { + // "%{TESTLOG}": + // { + // "bytes": "int", + // "clientip": "tag" + // } + // } + typeMap map[string]map[string]string + // tsMap is a map of patterns -> capture name -> timestamp layout. + // ie, { + // "%{TESTLOG}": + // { + // "httptime": "02/Jan/2006:15:04:05 -0700" + // } + // } + tsMap map[string]map[string]string + // patterns is a map of all of the parsed patterns from CustomPatterns + // and CustomPatternFiles. + // ie, { + // "DURATION": "%{NUMBER}[nuµm]?s" + // "RESPONSE_CODE": "%{NUMBER:rc:tag}" + // } + patterns map[string]string + // foundTsLayouts is a slice of timestamp patterns that have been found + // in the log lines. This slice gets updated if the user uses the generic + // 'ts' modifier for timestamps. This slice is checked first for matches, + // so that previously-matched layouts get priority over all other timestamp + // layouts. + foundTsLayouts []string + + g *grok.Grok + tsModder *tsModder +} + +func (p *Parser) Compile() error { + p.typeMap = make(map[string]map[string]string) + p.tsMap = make(map[string]map[string]string) + p.patterns = make(map[string]string) + p.tsModder = &tsModder{} + var err error + p.g, err = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true}) + if err != nil { + return err + } + + // Give Patterns fake names so that they can be treated as named + // "custom patterns" + p.namedPatterns = make([]string, len(p.Patterns)) + for i, pattern := range p.Patterns { + name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) + p.CustomPatterns += "\n" + name + " " + pattern + "\n" + p.namedPatterns[i] = "%{" + name + "}" + } + + // Combine user-supplied CustomPatterns with DEFAULT_PATTERNS and parse + // them together as the same type of pattern. + p.CustomPatterns = DEFAULT_PATTERNS + p.CustomPatterns + if len(p.CustomPatterns) != 0 { + scanner := bufio.NewScanner(strings.NewReader(p.CustomPatterns)) + p.addCustomPatterns(scanner) + } + + // Parse any custom pattern files supplied. + for _, filename := range p.CustomPatternFiles { + file, err := os.Open(filename) + if err != nil { + return err + } + + scanner := bufio.NewScanner(bufio.NewReader(file)) + p.addCustomPatterns(scanner) + } + + if p.Measurement == "" { + p.Measurement = "logparser_grok" + } + + return p.compileCustomPatterns() +} + +func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { + var err error + // values are the parsed fields from the log line + var values map[string]string + // the matching pattern string + var patternName string + for _, pattern := range p.namedPatterns { + if values, err = p.g.Parse(pattern, line); err != nil { + return nil, err + } + if len(values) != 0 { + patternName = pattern + break + } + } + + if len(values) == 0 { + return nil, nil + } + + fields := make(map[string]interface{}) + tags := make(map[string]string) + timestamp := time.Now() + for k, v := range values { + if k == "" || v == "" { + continue + } + + // t is the modifier of the field + var t string + // check if pattern has some modifiers + if types, ok := p.typeMap[patternName]; ok { + t = types[k] + } + // if we didn't find a modifier, check if we have a timestamp layout + if t == "" { + if ts, ok := p.tsMap[patternName]; ok { + // check if the modifier is a timestamp layout + if layout, ok := ts[k]; ok { + t = layout + } + } + } + // if we didn't find a type OR timestamp modifier, assume string + if t == "" { + t = STRING + } + + switch t { + case INT: + iv, err := strconv.ParseInt(v, 10, 64) + if err != nil { + log.Printf("ERROR parsing %s to int: %s", v, err) + } else { + fields[k] = iv + } + case FLOAT: + fv, err := strconv.ParseFloat(v, 64) + if err != nil { + log.Printf("ERROR parsing %s to float: %s", v, err) + } else { + fields[k] = fv + } + case DURATION: + d, err := time.ParseDuration(v) + if err != nil { + log.Printf("ERROR parsing %s to duration: %s", v, err) + } else { + fields[k] = int64(d) + } + case TAG: + tags[k] = v + case STRING: + fields[k] = strings.Trim(v, `"`) + case EPOCH: + iv, err := strconv.ParseInt(v, 10, 64) + if err != nil { + log.Printf("ERROR parsing %s to int: %s", v, err) + } else { + timestamp = time.Unix(iv, 0) + } + case EPOCH_NANO: + iv, err := strconv.ParseInt(v, 10, 64) + if err != nil { + log.Printf("ERROR parsing %s to int: %s", v, err) + } else { + timestamp = time.Unix(0, iv) + } + case GENERIC_TIMESTAMP: + var foundTs bool + // first try timestamp layouts that we've already found + for _, layout := range p.foundTsLayouts { + ts, err := time.Parse(layout, v) + if err == nil { + timestamp = ts + foundTs = true + break + } + } + // if we haven't found a timestamp layout yet, try all timestamp + // layouts. + if !foundTs { + for _, layout := range timeLayouts { + ts, err := time.Parse(layout, v) + if err == nil { + timestamp = ts + foundTs = true + p.foundTsLayouts = append(p.foundTsLayouts, layout) + break + } + } + } + // if we still haven't found a timestamp layout, log it and we will + // just use time.Now() + if !foundTs { + log.Printf("ERROR parsing timestamp [%s], could not find any "+ + "suitable time layouts.", v) + } + case DROP: + // goodbye! + default: + ts, err := time.Parse(t, v) + if err == nil { + timestamp = ts + } else { + log.Printf("ERROR parsing %s to time layout [%s]: %s", v, t, err) + } + } + } + + return telegraf.NewMetric(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp)) +} + +func (p *Parser) addCustomPatterns(scanner *bufio.Scanner) { + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if len(line) > 0 && line[0] != '#' { + names := strings.SplitN(line, " ", 2) + p.patterns[names[0]] = names[1] + } + } +} + +func (p *Parser) compileCustomPatterns() error { + var err error + // check if the pattern contains a subpattern that is already defined + // replace it with the subpattern for modifier inheritance. + for i := 0; i < 2; i++ { + for name, pattern := range p.patterns { + subNames := patternOnlyRe.FindAllStringSubmatch(pattern, -1) + for _, subName := range subNames { + if subPattern, ok := p.patterns[subName[1]]; ok { + pattern = strings.Replace(pattern, subName[0], subPattern, 1) + } + } + p.patterns[name] = pattern + } + } + + // check if pattern contains modifiers. Parse them out if it does. + for name, pattern := range p.patterns { + if modifierRe.MatchString(pattern) { + // this pattern has modifiers, so parse out the modifiers + pattern, err = p.parseTypedCaptures(name, pattern) + if err != nil { + return err + } + p.patterns[name] = pattern + } + } + + return p.g.AddPatternsFromMap(p.patterns) +} + +// parseTypedCaptures parses the capture modifiers, and then deletes the +// modifier from the line so that it is a valid "grok" pattern again. +// ie, +// %{NUMBER:bytes:int} => %{NUMBER:bytes} (stores %{NUMBER}->bytes->int) +// %{IPORHOST:clientip:tag} => %{IPORHOST:clientip} (stores %{IPORHOST}->clientip->tag) +func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) { + matches := modifierRe.FindAllStringSubmatch(pattern, -1) + + // grab the name of the capture pattern + patternName := "%{" + name + "}" + // create type map for this pattern + p.typeMap[patternName] = make(map[string]string) + p.tsMap[patternName] = make(map[string]string) + + // boolean to verify that each pattern only has a single ts- data type. + hasTimestamp := false + for _, match := range matches { + // regex capture 1 is the name of the capture + // regex capture 2 is the modifier of the capture + if strings.HasPrefix(match[2], "ts") { + if hasTimestamp { + return pattern, fmt.Errorf("logparser pattern compile error: "+ + "Each pattern is allowed only one named "+ + "timestamp data type. pattern: %s", pattern) + } + if layout, ok := timeLayouts[match[2]]; ok { + // built-in time format + p.tsMap[patternName][match[1]] = layout + } else { + // custom time format + p.tsMap[patternName][match[1]] = strings.TrimSuffix(strings.TrimPrefix(match[2], `ts-"`), `"`) + } + hasTimestamp = true + } else { + p.typeMap[patternName][match[1]] = match[2] + } + + // the modifier is not a valid part of a "grok" pattern, so remove it + // from the pattern. + pattern = strings.Replace(pattern, ":"+match[2]+"}", "}", 1) + } + + return pattern, nil +} + +// tsModder is a struct for incrementing identical timestamps of log lines +// so that we don't push identical metrics that will get overwritten. +type tsModder struct { + dupe time.Time + last time.Time + incr time.Duration + incrn time.Duration + rollover time.Duration +} + +// tsMod increments the given timestamp one unit more from the previous +// duplicate timestamp. +// the increment unit is determined as the next smallest time unit below the +// most significant time unit of ts. +// ie, if the input is at ms precision, it will increment it 1µs. +func (t *tsModder) tsMod(ts time.Time) time.Time { + defer func() { t.last = ts }() + // don't mod the time if we don't need to + if t.last.IsZero() || ts.IsZero() { + t.incrn = 0 + t.rollover = 0 + return ts + } + if !ts.Equal(t.last) && !ts.Equal(t.dupe) { + t.incr = 0 + t.incrn = 0 + t.rollover = 0 + return ts + } + + if ts.Equal(t.last) { + t.dupe = ts + } + + if ts.Equal(t.dupe) && t.incr == time.Duration(0) { + tsNano := ts.UnixNano() + + d := int64(10) + counter := 1 + for { + a := tsNano % d + if a > 0 { + break + } + d = d * 10 + counter++ + } + + switch { + case counter <= 6: + t.incr = time.Nanosecond + case counter <= 9: + t.incr = time.Microsecond + case counter > 9: + t.incr = time.Millisecond + } + } + + t.incrn++ + if t.incrn == 999 && t.incr > time.Nanosecond { + t.rollover = t.incr * t.incrn + t.incrn = 1 + t.incr = t.incr / 1000 + if t.incr < time.Nanosecond { + t.incr = time.Nanosecond + } + } + return ts.Add(t.incr*t.incrn + t.rollover) +} diff --git a/plugins/inputs/logparser/grok/grok_test.go b/plugins/inputs/logparser/grok/grok_test.go new file mode 100644 index 000000000..bc8d980f2 --- /dev/null +++ b/plugins/inputs/logparser/grok/grok_test.go @@ -0,0 +1,587 @@ +package grok + +import ( + "testing" + "time" + + "github.com/influxdata/telegraf" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var benchM telegraf.Metric + +func Benchmark_ParseLine_CommonLogFormat(b *testing.B) { + p := &Parser{ + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + p.Compile() + + var m telegraf.Metric + for n := 0; n < b.N; n++ { + m, _ = p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) + } + benchM = m +} + +func Benchmark_ParseLine_CombinedLogFormat(b *testing.B) { + p := &Parser{ + Patterns: []string{"%{COMBINED_LOG_FORMAT}"}, + } + p.Compile() + + var m telegraf.Metric + for n := 0; n < b.N; n++ { + m, _ = p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"`) + } + benchM = m +} + +func Benchmark_ParseLine_CustomPattern(b *testing.B) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatterns: ` + DURATION %{NUMBER}[nuµm]?s + RESPONSE_CODE %{NUMBER:response_code:tag} + RESPONSE_TIME %{DURATION:response_time:duration} + TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + `, + } + p.Compile() + + var m telegraf.Metric + for n := 0; n < b.N; n++ { + m, _ = p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) + } + benchM = m +} + +func TestMeasurementName(t *testing.T) { + p := &Parser{ + Measurement: "my_web_log", + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank", + "client_ip": "127.0.0.1", + "http_version": float64(1.0), + "ident": "user-identifier", + "request": "/apache_pb.gif", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) + assert.Equal(t, "my_web_log", m.Name()) +} + +func TestCustomInfluxdbHttpd(t *testing.T) { + p := &Parser{ + Patterns: []string{`\[httpd\] %{COMBINED_LOG_FORMAT} %{UUID:uuid:drop} %{NUMBER:response_time_us:int}`}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:11:33:29 +0100] "POST /write?consistency=any&db=telegraf&precision=ns&rp= HTTP/1.1" 204 0 "-" "InfluxDBClient" 6f61bc44-321b-11e6-8050-000000000000 2513`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(0), + "auth": "-", + "client_ip": "::1", + "http_version": float64(1.1), + "ident": "-", + "referrer": "-", + "request": "/write?consistency=any&db=telegraf&precision=ns&rp=", + "response_time_us": int64(2513), + "agent": "InfluxDBClient", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "POST", "resp_code": "204"}, m.Tags()) + + // Parse an influxdb GET request + m, err = p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:12:10:02 +0100] "GET /query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h HTTP/1.1" 200 578 "http://localhost:8083/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36" 8a3806f1-3220-11e6-8006-000000000000 988`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(578), + "auth": "-", + "client_ip": "::1", + "http_version": float64(1.1), + "ident": "-", + "referrer": "http://localhost:8083/", + "request": "/query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h", + "response_time_us": int64(988), + "agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) +} + +// common log format +// 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 +func TestBuiltinCommonLogFormat(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank", + "client_ip": "127.0.0.1", + "http_version": float64(1.0), + "ident": "user-identifier", + "request": "/apache_pb.gif", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) +} + +// combined log format +// 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla" +func TestBuiltinCombinedLogFormat(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{COMBINED_LOG_FORMAT}"}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank", + "client_ip": "127.0.0.1", + "http_version": float64(1.0), + "ident": "user-identifier", + "request": "/apache_pb.gif", + "referrer": "-", + "agent": "Mozilla", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) +} + +func TestCompileStringAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}"}, + CustomPatterns: ` + DURATION %{NUMBER}[nuµm]?s + RESPONSE_CODE %{NUMBER:response_code:tag} + RESPONSE_TIME %{DURATION:response_time:duration} + TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + `, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) +} + +func TestCompileErrorsOnInvalidPattern(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatterns: ` + DURATION %{NUMBER}[nuµm]?s + RESPONSE_CODE %{NUMBER:response_code:tag} + RESPONSE_TIME %{DURATION:response_time:duration} + TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + `, + } + assert.Error(t, p.Compile()) + + metricA, _ := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) + require.Nil(t, metricA) +} + +func TestParsePatternsWithoutCustom(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{POSINT:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}"}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1466004605359052000 response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) + assert.Equal(t, time.Unix(0, 1466004605359052000), metricA.Time()) +} + +func TestParseEpochNano(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{MYAPP}"}, + CustomPatterns: ` + MYAPP %{POSINT:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} + `, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1466004605359052000 response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) + assert.Equal(t, time.Unix(0, 1466004605359052000), metricA.Time()) +} + +func TestParseEpoch(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{MYAPP}"}, + CustomPatterns: ` + MYAPP %{POSINT:ts:ts-epoch} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} + `, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1466004605 response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) + assert.Equal(t, time.Unix(1466004605, 0), metricA.Time()) +} + +func TestParseEpochErrors(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{MYAPP}"}, + CustomPatterns: ` + MYAPP %{WORD:ts:ts-epoch} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} + `, + } + assert.NoError(t, p.Compile()) + + _, err := p.ParseLine(`foobar response_time=20821 mymetric=10890.645`) + assert.NoError(t, err) + + p = &Parser{ + Patterns: []string{"%{MYAPP}"}, + CustomPatterns: ` + MYAPP %{WORD:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} + `, + } + assert.NoError(t, p.Compile()) + + _, err = p.ParseLine(`foobar response_time=20821 mymetric=10890.645`) + assert.NoError(t, err) +} + +func TestParseGenericTimestamp(t *testing.T) { + p := &Parser{ + Patterns: []string{`\[%{HTTPDATE:ts:ts}\] response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}`}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[09/Jun/2016:03:37:03 +0000] response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) + assert.Equal(t, time.Unix(1465443423, 0).UTC(), metricA.Time().UTC()) + + metricB, err := p.ParseLine(`[09/Jun/2016:03:37:04 +0000] response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricB) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricB.Fields()) + assert.Equal(t, map[string]string{}, metricB.Tags()) + assert.Equal(t, time.Unix(1465443424, 0).UTC(), metricB.Time().UTC()) +} + +func TestParseGenericTimestampNotFound(t *testing.T) { + p := &Parser{ + Patterns: []string{`\[%{NOTSPACE:ts:ts}\] response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}`}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[foobar] response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) +} + +func TestCompileFileAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"./testdata/test-patterns"}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + "myint": int64(101), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) + assert.Equal(t, + time.Date(2016, time.June, 4, 12, 41, 45, 0, time.FixedZone("foo", 60*60)).Nanosecond(), + metricA.Time().Nanosecond()) + + metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) + require.NotNil(t, metricB) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "myfloat": 1.25, + "mystring": "mystring", + "nomodifier": "nomodifier", + }, + metricB.Fields()) + assert.Equal(t, map[string]string{}, metricB.Tags()) + assert.Equal(t, + time.Date(2016, time.June, 4, 12, 41, 45, 0, time.FixedZone("foo", 60*60)).Nanosecond(), + metricB.Time().Nanosecond()) +} + +func TestCompileNoModifiersAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_C}"}, + CustomPatterns: ` + DURATION %{NUMBER}[nuµm]?s + TEST_LOG_C %{NUMBER:myfloat} %{NUMBER} %{IPORHOST:clientip} %{DURATION:rt} + `, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": "1.25", + "rt": "5.432µs", + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) +} + +func TestCompileNoNamesAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_C}"}, + CustomPatterns: ` + DURATION %{NUMBER}[nuµm]?s + TEST_LOG_C %{NUMBER} %{NUMBER} %{IPORHOST} %{DURATION} + `, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) + require.Nil(t, metricA) + assert.NoError(t, err) +} + +func TestParseNoMatch(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"./testdata/test-patterns"}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`) + assert.NoError(t, err) + assert.Nil(t, metricA) +} + +func TestCompileErrors(t *testing.T) { + // Compile fails because there are multiple timestamps: + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts1:ts-httpd} %{HTTPDATE:ts2:ts-httpd} %{NUMBER:mynum:int} + `, + } + assert.Error(t, p.Compile()) + + // Compile fails because file doesn't exist: + p = &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"/tmp/foo/bar/baz"}, + } + assert.Error(t, p.Compile()) +} + +func TestParseErrors(t *testing.T) { + // Parse fails because the pattern doesn't exist + p := &Parser{ + Patterns: []string{"%{TEST_LOG_B}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int} %{} + `, + } + assert.Error(t, p.Compile()) + _, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`) + assert.Error(t, err) + + // Parse fails because myword is not an int + p = &Parser{ + Patterns: []string{"%{TEST_LOG_A}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int} + `, + } + assert.NoError(t, p.Compile()) + _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) + assert.Error(t, err) + + // Parse fails because myword is not a float + p = &Parser{ + Patterns: []string{"%{TEST_LOG_A}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:float} + `, + } + assert.NoError(t, p.Compile()) + _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) + assert.Error(t, err) + + // Parse fails because myword is not a duration + p = &Parser{ + Patterns: []string{"%{TEST_LOG_A}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:duration} + `, + } + assert.NoError(t, p.Compile()) + _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) + assert.Error(t, err) + + // Parse fails because the time layout is wrong. + p = &Parser{ + Patterns: []string{"%{TEST_LOG_A}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts:ts-unix} %{WORD:myword:duration} + `, + } + assert.NoError(t, p.Compile()) + _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) + assert.Error(t, err) +} + +func TestTsModder(t *testing.T) { + tsm := &tsModder{} + + reftime := time.Date(2006, time.December, 1, 1, 1, 1, int(time.Millisecond), time.UTC) + modt := tsm.tsMod(reftime) + assert.Equal(t, reftime, modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Microsecond*1), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Microsecond*2), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Microsecond*3), modt) + + reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond), time.UTC) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime, modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*1), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*2), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*3), modt) + + reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond)*999, time.UTC) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime, modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*1), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*2), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*3), modt) + + reftime = time.Date(2006, time.December, 1, 1, 1, 1, 0, time.UTC) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime, modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Millisecond*1), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Millisecond*2), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Millisecond*3), modt) + + reftime = time.Time{} + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime, modt) +} + +func TestTsModder_Rollover(t *testing.T) { + tsm := &tsModder{} + + reftime := time.Date(2006, time.December, 1, 1, 1, 1, int(time.Millisecond), time.UTC) + modt := tsm.tsMod(reftime) + for i := 1; i < 1000; i++ { + modt = tsm.tsMod(reftime) + } + assert.Equal(t, reftime.Add(time.Microsecond*999+time.Nanosecond), modt) + + reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond), time.UTC) + modt = tsm.tsMod(reftime) + for i := 1; i < 1001; i++ { + modt = tsm.tsMod(reftime) + } + assert.Equal(t, reftime.Add(time.Nanosecond*1000), modt) +} diff --git a/plugins/inputs/logparser/grok/influx_patterns.go b/plugins/inputs/logparser/grok/influx_patterns.go new file mode 100644 index 000000000..ff9d60ebf --- /dev/null +++ b/plugins/inputs/logparser/grok/influx_patterns.go @@ -0,0 +1,78 @@ +package grok + +// THIS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns +const DEFAULT_PATTERNS = ` +# Captures are a slightly modified version of logstash "grok" patterns, with +# the format %{[:][:]} +# By default all named captures are converted into string fields. +# Modifiers can be used to convert captures to other types or tags. +# Timestamp modifiers can be used to convert captures to the timestamp of the +# parsed metric. + +# View logstash grok pattern docs here: +# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html +# All default logstash patterns are supported, these can be viewed here: +# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns + +# Available modifiers: +# string (default if nothing is specified) +# int +# float +# duration (ie, 5.23ms gets converted to int nanoseconds) +# tag (converts the field into a tag) +# drop (drops the field completely) +# Timestamp modifiers: +# ts-ansic ("Mon Jan _2 15:04:05 2006") +# ts-unix ("Mon Jan _2 15:04:05 MST 2006") +# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") +# ts-rfc822 ("02 Jan 06 15:04 MST") +# ts-rfc822z ("02 Jan 06 15:04 -0700") +# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") +# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") +# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") +# ts-rfc3339 ("2006-01-02T15:04:05Z07:00") +# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") +# ts-httpd ("02/Jan/2006:15:04:05 -0700") +# ts-epoch (seconds since unix epoch) +# ts-epochnano (nanoseconds since unix epoch) +# ts-"CUSTOM" +# CUSTOM time layouts must be within quotes and be the representation of the +# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006 +# See https://golang.org/pkg/time/#Parse for more details. + +# Example log file pattern, example log looks like this: +# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs +# Breakdown of the DURATION pattern below: +# NUMBER is a builtin logstash grok pattern matching float & int numbers. +# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets. +# s is also regex, this pattern must end in "s". +# so DURATION will match something like '5.324ms' or '6.1µs' or '10s' +DURATION %{NUMBER}[nuµm]?s +RESPONSE_CODE %{NUMBER:response_code:tag} +RESPONSE_TIME %{DURATION:response_time_ns:duration} +EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + +# Wider-ranging username matching vs. logstash built-in %{USER} +NGUSERNAME [a-zA-Z\.\@\-\+_%]+ +NGUSER %{NGUSERNAME} +# Wider-ranging client IP matching +CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1) + +## +## COMMON LOG PATTERNS +## + +# apache & nginx logs, this is also known as the "common log format" +# see https://en.wikipedia.org/wiki/Common_Log_Format +COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) + +# Combined log format is the same as the common log format but with the addition +# of two quoted strings at the end for "referrer" and "agent" +# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html +COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent} + +# HTTPD log formats +HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} +HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message} +HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} +` diff --git a/plugins/inputs/logparser/grok/patterns/influx-patterns b/plugins/inputs/logparser/grok/patterns/influx-patterns new file mode 100644 index 000000000..6f4d81f89 --- /dev/null +++ b/plugins/inputs/logparser/grok/patterns/influx-patterns @@ -0,0 +1,73 @@ +# Captures are a slightly modified version of logstash "grok" patterns, with +# the format %{[:][:]} +# By default all named captures are converted into string fields. +# Modifiers can be used to convert captures to other types or tags. +# Timestamp modifiers can be used to convert captures to the timestamp of the +# parsed metric. + +# View logstash grok pattern docs here: +# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html +# All default logstash patterns are supported, these can be viewed here: +# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns + +# Available modifiers: +# string (default if nothing is specified) +# int +# float +# duration (ie, 5.23ms gets converted to int nanoseconds) +# tag (converts the field into a tag) +# drop (drops the field completely) +# Timestamp modifiers: +# ts-ansic ("Mon Jan _2 15:04:05 2006") +# ts-unix ("Mon Jan _2 15:04:05 MST 2006") +# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") +# ts-rfc822 ("02 Jan 06 15:04 MST") +# ts-rfc822z ("02 Jan 06 15:04 -0700") +# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") +# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") +# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") +# ts-rfc3339 ("2006-01-02T15:04:05Z07:00") +# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") +# ts-httpd ("02/Jan/2006:15:04:05 -0700") +# ts-epoch (seconds since unix epoch) +# ts-epochnano (nanoseconds since unix epoch) +# ts-"CUSTOM" +# CUSTOM time layouts must be within quotes and be the representation of the +# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006 +# See https://golang.org/pkg/time/#Parse for more details. + +# Example log file pattern, example log looks like this: +# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs +# Breakdown of the DURATION pattern below: +# NUMBER is a builtin logstash grok pattern matching float & int numbers. +# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets. +# s is also regex, this pattern must end in "s". +# so DURATION will match something like '5.324ms' or '6.1µs' or '10s' +DURATION %{NUMBER}[nuµm]?s +RESPONSE_CODE %{NUMBER:response_code:tag} +RESPONSE_TIME %{DURATION:response_time_ns:duration} +EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + +# Wider-ranging username matching vs. logstash built-in %{USER} +NGUSERNAME [a-zA-Z\.\@\-\+_%]+ +NGUSER %{NGUSERNAME} +# Wider-ranging client IP matching +CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1) + +## +## COMMON LOG PATTERNS +## + +# apache & nginx logs, this is also known as the "common log format" +# see https://en.wikipedia.org/wiki/Common_Log_Format +COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) + +# Combined log format is the same as the common log format but with the addition +# of two quoted strings at the end for "referrer" and "agent" +# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html +COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent} + +# HTTPD log formats +HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} +HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message} +HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} diff --git a/plugins/inputs/logparser/grok/testdata/test-patterns b/plugins/inputs/logparser/grok/testdata/test-patterns new file mode 100644 index 000000000..ba995fbd1 --- /dev/null +++ b/plugins/inputs/logparser/grok/testdata/test-patterns @@ -0,0 +1,14 @@ +# Test A log line: +# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101 +DURATION %{NUMBER}[nuµm]?s +RESPONSE_CODE %{NUMBER:response_code:tag} +RESPONSE_TIME %{DURATION:response_time:duration} +TEST_LOG_A \[%{HTTPDATE:timestamp:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} %{NUMBER:myint:int} + +# Test B log line: +# [04/06/2016--12:41:45] 1.25 mystring dropme nomodifier +TEST_TIMESTAMP %{MONTHDAY}/%{MONTHNUM}/%{YEAR}--%{TIME} +TEST_LOG_B \[%{TEST_TIMESTAMP:timestamp:ts-"02/01/2006--15:04:05"}\] %{NUMBER:myfloat:float} %{WORD:mystring:string} %{WORD:dropme:drop} %{WORD:nomodifier} + +TEST_TIMESTAMP %{MONTHDAY}/%{MONTHNUM}/%{YEAR}--%{TIME} +TEST_LOG_BAD \[%{TEST_TIMESTAMP:timestamp:ts-"02/01/2006--15:04:05"}\] %{NUMBER:myfloat:float} %{WORD:mystring:int} %{WORD:dropme:drop} %{WORD:nomodifier} diff --git a/plugins/inputs/logparser/grok/testdata/test_a.log b/plugins/inputs/logparser/grok/testdata/test_a.log new file mode 100644 index 000000000..a44d72fdf --- /dev/null +++ b/plugins/inputs/logparser/grok/testdata/test_a.log @@ -0,0 +1 @@ +[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101 diff --git a/plugins/inputs/logparser/grok/testdata/test_b.log b/plugins/inputs/logparser/grok/testdata/test_b.log new file mode 100644 index 000000000..49e2983e8 --- /dev/null +++ b/plugins/inputs/logparser/grok/testdata/test_b.log @@ -0,0 +1 @@ +[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier diff --git a/plugins/inputs/logparser/logparser.go b/plugins/inputs/logparser/logparser.go new file mode 100644 index 000000000..8ded03edc --- /dev/null +++ b/plugins/inputs/logparser/logparser.go @@ -0,0 +1,231 @@ +package logparser + +import ( + "fmt" + "log" + "reflect" + "sync" + + "github.com/hpcloud/tail" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" + "github.com/influxdata/telegraf/internal/globpath" + "github.com/influxdata/telegraf/plugins/inputs" + + // Parsers + "github.com/influxdata/telegraf/plugins/inputs/logparser/grok" +) + +type LogParser interface { + ParseLine(line string) (telegraf.Metric, error) + Compile() error +} + +type LogParserPlugin struct { + Files []string + FromBeginning bool + + tailers []*tail.Tail + lines chan string + done chan struct{} + wg sync.WaitGroup + acc telegraf.Accumulator + parsers []LogParser + + sync.Mutex + + GrokParser *grok.Parser `toml:"grok"` +} + +const sampleConfig = ` + ## Log files to parse. + ## These accept standard unix glob matching rules, but with the addition of + ## ** as a "super asterisk". ie: + ## /var/log/**.log -> recursively find all .log files in /var/log + ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log + ## /var/log/apache.log -> only tail the apache log file + files = ["/var/log/apache/access.log"] + ## Read file from beginning. + from_beginning = false + + ## Parse logstash-style "grok" patterns: + ## Telegraf built-in parsing patterns: https://goo.gl/dkay10 + [inputs.logparser.grok] + ## This is a list of patterns to check the given log file(s) for. + ## Note that adding patterns here increases processing time. The most + ## efficient configuration is to have one pattern per logparser. + ## Other common built-in patterns are: + ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) + ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) + patterns = ["%{COMBINED_LOG_FORMAT}"] + ## Name of the outputted measurement name. + measurement = "apache_access_log" + ## Full path(s) to custom pattern files. + custom_pattern_files = [] + ## Custom patterns can also be defined here. Put one pattern per line. + custom_patterns = ''' + ''' +` + +func (l *LogParserPlugin) SampleConfig() string { + return sampleConfig +} + +func (l *LogParserPlugin) Description() string { + return "Stream and parse log file(s)." +} + +func (l *LogParserPlugin) Gather(acc telegraf.Accumulator) error { + return nil +} + +func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { + l.Lock() + defer l.Unlock() + + l.acc = acc + l.lines = make(chan string, 1000) + l.done = make(chan struct{}) + + // Looks for fields which implement LogParser interface + l.parsers = []LogParser{} + s := reflect.ValueOf(l).Elem() + for i := 0; i < s.NumField(); i++ { + f := s.Field(i) + + if !f.CanInterface() { + continue + } + + if lpPlugin, ok := f.Interface().(LogParser); ok { + if reflect.ValueOf(lpPlugin).IsNil() { + continue + } + l.parsers = append(l.parsers, lpPlugin) + } + } + + if len(l.parsers) == 0 { + return fmt.Errorf("ERROR: logparser input plugin: no parser defined.") + } + + // compile log parser patterns: + errChan := errchan.New(len(l.parsers)) + for _, parser := range l.parsers { + if err := parser.Compile(); err != nil { + errChan.C <- err + } + } + if err := errChan.Error(); err != nil { + return err + } + + var seek tail.SeekInfo + if !l.FromBeginning { + seek.Whence = 2 + seek.Offset = 0 + } + + l.wg.Add(1) + go l.parser() + + // Create a "tailer" for each file + for _, filepath := range l.Files { + g, err := globpath.Compile(filepath) + if err != nil { + log.Printf("ERROR Glob %s failed to compile, %s", filepath, err) + continue + } + files := g.Match() + errChan = errchan.New(len(files)) + for file, _ := range files { + tailer, err := tail.TailFile(file, + tail.Config{ + ReOpen: true, + Follow: true, + Location: &seek, + MustExist: true, + }) + errChan.C <- err + + // create a goroutine for each "tailer" + l.wg.Add(1) + go l.receiver(tailer) + l.tailers = append(l.tailers, tailer) + } + } + + return errChan.Error() +} + +// receiver is launched as a goroutine to continuously watch a tailed logfile +// for changes and send any log lines down the l.lines channel. +func (l *LogParserPlugin) receiver(tailer *tail.Tail) { + defer l.wg.Done() + + var line *tail.Line + for line = range tailer.Lines { + if line.Err != nil { + log.Printf("ERROR tailing file %s, Error: %s\n", + tailer.Filename, line.Err) + continue + } + + select { + case <-l.done: + case l.lines <- line.Text: + } + } +} + +// parser is launched as a goroutine to watch the l.lines channel. +// when a line is available, parser parses it and adds the metric(s) to the +// accumulator. +func (l *LogParserPlugin) parser() { + defer l.wg.Done() + + var m telegraf.Metric + var err error + var line string + for { + select { + case <-l.done: + return + case line = <-l.lines: + if line == "" || line == "\n" { + continue + } + } + + for _, parser := range l.parsers { + m, err = parser.ParseLine(line) + if err == nil { + if m != nil { + l.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) + } + } + } + } +} + +func (l *LogParserPlugin) Stop() { + l.Lock() + defer l.Unlock() + + for _, t := range l.tailers { + err := t.Stop() + if err != nil { + log.Printf("ERROR stopping tail on file %s\n", t.Filename) + } + t.Cleanup() + } + close(l.done) + l.wg.Wait() +} + +func init() { + inputs.Add("logparser", func() telegraf.Input { + return &LogParserPlugin{} + }) +} diff --git a/plugins/inputs/logparser/logparser_test.go b/plugins/inputs/logparser/logparser_test.go new file mode 100644 index 000000000..97f33067e --- /dev/null +++ b/plugins/inputs/logparser/logparser_test.go @@ -0,0 +1,119 @@ +package logparser + +import ( + "runtime" + "strings" + "testing" + "time" + + "github.com/influxdata/telegraf/testutil" + + "github.com/influxdata/telegraf/plugins/inputs/logparser/grok" + + "github.com/stretchr/testify/assert" +) + +func TestStartNoParsers(t *testing.T) { + logparser := &LogParserPlugin{ + FromBeginning: true, + Files: []string{"grok/testdata/*.log"}, + } + + acc := testutil.Accumulator{} + assert.Error(t, logparser.Start(&acc)) +} + +func TestGrokParseLogFilesNonExistPattern(t *testing.T) { + thisdir := getCurrentDir() + p := &grok.Parser{ + Patterns: []string{"%{FOOBAR}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + } + + logparser := &LogParserPlugin{ + FromBeginning: true, + Files: []string{thisdir + "grok/testdata/*.log"}, + GrokParser: p, + } + + acc := testutil.Accumulator{} + assert.Error(t, logparser.Start(&acc)) + + time.Sleep(time.Millisecond * 500) + logparser.Stop() +} + +func TestGrokParseLogFiles(t *testing.T) { + thisdir := getCurrentDir() + p := &grok.Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + } + + logparser := &LogParserPlugin{ + FromBeginning: true, + Files: []string{thisdir + "grok/testdata/*.log"}, + GrokParser: p, + } + + acc := testutil.Accumulator{} + assert.NoError(t, logparser.Start(&acc)) + + time.Sleep(time.Millisecond * 500) + logparser.Stop() + + acc.AssertContainsTaggedFields(t, "logparser_grok", + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + "myint": int64(101), + }, + map[string]string{"response_code": "200"}) + + acc.AssertContainsTaggedFields(t, "logparser_grok", + map[string]interface{}{ + "myfloat": 1.25, + "mystring": "mystring", + "nomodifier": "nomodifier", + }, + map[string]string{}) +} + +// Test that test_a.log line gets parsed even though we don't have the correct +// pattern available for test_b.log +func TestGrokParseLogFilesOneBad(t *testing.T) { + thisdir := getCurrentDir() + p := &grok.Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_BAD}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + } + assert.NoError(t, p.Compile()) + + logparser := &LogParserPlugin{ + FromBeginning: true, + Files: []string{thisdir + "grok/testdata/test_a.log"}, + GrokParser: p, + } + + acc := testutil.Accumulator{} + acc.SetDebug(true) + assert.NoError(t, logparser.Start(&acc)) + + time.Sleep(time.Millisecond * 500) + logparser.Stop() + + acc.AssertContainsTaggedFields(t, "logparser_grok", + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + "myint": int64(101), + }, + map[string]string{"response_code": "200"}) +} + +func getCurrentDir() string { + _, filename, _, _ := runtime.Caller(1) + return strings.Replace(filename, "logparser_test.go", "", 1) +} diff --git a/plugins/inputs/memcached/memcached.go b/plugins/inputs/memcached/memcached.go index c631a1ed1..5ee538e93 100644 --- a/plugins/inputs/memcached/memcached.go +++ b/plugins/inputs/memcached/memcached.go @@ -9,6 +9,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -73,19 +74,16 @@ func (m *Memcached) Gather(acc telegraf.Accumulator) error { return m.gatherServer(":11211", false, acc) } + errChan := errchan.New(len(m.Servers) + len(m.UnixSockets)) for _, serverAddress := range m.Servers { - if err := m.gatherServer(serverAddress, false, acc); err != nil { - return err - } + errChan.C <- m.gatherServer(serverAddress, false, acc) } for _, unixAddress := range m.UnixSockets { - if err := m.gatherServer(unixAddress, true, acc); err != nil { - return err - } + errChan.C <- m.gatherServer(unixAddress, true, acc) } - return nil + return errChan.Error() } func (m *Memcached) gatherServer( diff --git a/plugins/inputs/mesos/README.md b/plugins/inputs/mesos/README.md index 20a6dd244..1d3a5f7bf 100644 --- a/plugins/inputs/mesos/README.md +++ b/plugins/inputs/mesos/README.md @@ -1,6 +1,6 @@ # Mesos Input Plugin -This input plugin gathers metrics from Mesos (*currently only Mesos masters*). +This input plugin gathers metrics from Mesos. For more information, please check the [Mesos Observability Metrics](http://mesos.apache.org/documentation/latest/monitoring/) page. ### Configuration: @@ -8,14 +8,41 @@ For more information, please check the [Mesos Observability Metrics](http://meso ```toml # Telegraf plugin for gathering metrics from N Mesos masters [[inputs.mesos]] - # Timeout, in ms. + ## Timeout, in ms. timeout = 100 - # A list of Mesos masters, default value is localhost:5050. + ## A list of Mesos masters. masters = ["localhost:5050"] - # Metrics groups to be collected, by default, all enabled. - master_collections = ["resources","master","system","slaves","frameworks","messages","evqueue","registrar"] + ## Master metrics groups to be collected, by default, all enabled. + master_collections = [ + "resources", + "master", + "system", + "agents", + "frameworks", + "tasks", + "messages", + "evqueue", + "registrar", + ] + ## A list of Mesos slaves, default is [] + # slaves = [] + ## Slave metrics groups to be collected, by default, all enabled. + # slave_collections = [ + # "resources", + # "agent", + # "system", + # "executors", + # "tasks", + # "messages", + # ] + ## Include mesos tasks statistics, default is false + # slave_tasks = true ``` +By default this plugin is not configured to gather metrics from mesos. Since a mesos cluster can be deployed in numerous ways it does not provide any default +values. User needs to specify master/slave nodes this plugin will gather metrics from. Additionally, enabling `slave_tasks` will allow +gathering metrics from tasks running on specified slaves (this option is disabled by default). + ### Measurements & Fields: Mesos master metric groups @@ -33,6 +60,12 @@ Mesos master metric groups - master/disk_revocable_percent - master/disk_revocable_total - master/disk_revocable_used + - master/gpus_percent + - master/gpus_used + - master/gpus_total + - master/gpus_revocable_percent + - master/gpus_revocable_total + - master/gpus_revocable_used - master/mem_percent - master/mem_used - master/mem_total @@ -136,17 +169,111 @@ Mesos master metric groups - registrar/state_store_ms/p999 - registrar/state_store_ms/p9999 +Mesos slave metric groups +- resources + - slave/cpus_percent + - slave/cpus_used + - slave/cpus_total + - slave/cpus_revocable_percent + - slave/cpus_revocable_total + - slave/cpus_revocable_used + - slave/disk_percent + - slave/disk_used + - slave/disk_total + - slave/disk_revocable_percent + - slave/disk_revocable_total + - slave/disk_revocable_used + - slave/gpus_percent + - slave/gpus_used + - slave/gpus_total, + - slave/gpus_revocable_percent + - slave/gpus_revocable_total + - slave/gpus_revocable_used + - slave/mem_percent + - slave/mem_used + - slave/mem_total + - slave/mem_revocable_percent + - slave/mem_revocable_total + - slave/mem_revocable_used + +- agent + - slave/registered + - slave/uptime_secs + +- system + - system/cpus_total + - system/load_15min + - system/load_5min + - system/load_1min + - system/mem_free_bytes + - system/mem_total_bytes + +- executors + - containerizer/mesos/container_destroy_errors + - slave/container_launch_errors + - slave/executors_preempted + - slave/frameworks_active + - slave/executor_directory_max_allowed_age_secs + - slave/executors_registering + - slave/executors_running + - slave/executors_terminated + - slave/executors_terminating + - slave/recovery_errors + +- tasks + - slave/tasks_failed + - slave/tasks_finished + - slave/tasks_killed + - slave/tasks_lost + - slave/tasks_running + - slave/tasks_staging + - slave/tasks_starting + +- messages + - slave/invalid_framework_messages + - slave/invalid_status_updates + - slave/valid_framework_messages + - slave/valid_status_updates + +Mesos tasks metric groups + +- executor_id +- executor_name +- framework_id +- source +- statistics (all metrics below will have `statistics_` prefix included in their names + - cpus_limit + - cpus_system_time_secs + - cpus_user_time_secs + - mem_anon_bytes + - mem_cache_bytes + - mem_critical_pressure_counter + - mem_file_bytes + - mem_limit_bytes + - mem_low_pressure_counter + - mem_mapped_file_bytes + - mem_medium_pressure_counter + - mem_rss_bytes + - mem_swap_bytes + - mem_total_bytes + - mem_total_memsw_bytes + - mem_unevictable_bytes + - timestamp + ### Tags: -- All measurements have the following tags: +- All master/slave measurements have the following tags: + - server + - role (master/slave) + +- Tasks measurements have the following tags: - server ### Example Output: - ``` $ telegraf -config ~/mesos.conf -input-filter mesos -test * Plugin: mesos, Collection 1 -mesos,server=172.17.8.101 allocator/event_queue_dispatches=0,master/cpus_percent=0, +mesos,host=172.17.8.102,server=172.17.8.101 allocator/event_queue_dispatches=0,master/cpus_percent=0, master/cpus_revocable_percent=0,master/cpus_revocable_total=0, master/cpus_revocable_used=0,master/cpus_total=2, master/cpus_used=0,master/disk_percent=0,master/disk_revocable_percent=0, @@ -163,3 +290,16 @@ master/mem_revocable_used=0,master/mem_total=1002, master/mem_used=0,master/messages_authenticate=0, master/messages_deactivate_framework=0 ... ``` + +Meoso tasks metrics (if enabled): +``` +mesos-tasks,host=172.17.8.102,server=172.17.8.101,task_id=hello-world.e4b5b497-2ccd-11e6-a659-0242fb222ce2 +statistics_cpus_limit=0.2,statistics_cpus_system_time_secs=142.49,statistics_cpus_user_time_secs=388.14, +statistics_mem_anon_bytes=359129088,statistics_mem_cache_bytes=3964928, +statistics_mem_critical_pressure_counter=0,statistics_mem_file_bytes=3964928, +statistics_mem_limit_bytes=767557632,statistics_mem_low_pressure_counter=0, +statistics_mem_mapped_file_bytes=114688,statistics_mem_medium_pressure_counter=0, +statistics_mem_rss_bytes=359129088,statistics_mem_swap_bytes=0,statistics_mem_total_bytes=363094016, +statistics_mem_total_memsw_bytes=363094016,statistics_mem_unevictable_bytes=0, +statistics_timestamp=1465486052.70525 1465486053052811792... +``` diff --git a/plugins/inputs/mesos/mesos.go b/plugins/inputs/mesos/mesos.go index b096a20d9..a719dc9f4 100644 --- a/plugins/inputs/mesos/mesos.go +++ b/plugins/inputs/mesos/mesos.go @@ -17,33 +17,57 @@ import ( jsonparser "github.com/influxdata/telegraf/plugins/parsers/json" ) +type Role string + +const ( + MASTER Role = "master" + SLAVE = "slave" +) + type Mesos struct { Timeout int Masters []string MasterCols []string `toml:"master_collections"` + Slaves []string + SlaveCols []string `toml:"slave_collections"` + SlaveTasks bool } -var defaultMetrics = []string{ - "resources", "master", "system", "slaves", "frameworks", - "tasks", "messages", "evqueue", "messages", "registrar", +var allMetrics = map[Role][]string{ + MASTER: []string{"resources", "master", "system", "agents", "frameworks", "tasks", "messages", "evqueue", "registrar"}, + SLAVE: []string{"resources", "agent", "system", "executors", "tasks", "messages"}, } var sampleConfig = ` - # Timeout, in ms. + ## Timeout, in ms. timeout = 100 - # A list of Mesos masters, default value is localhost:5050. + ## A list of Mesos masters. masters = ["localhost:5050"] - # Metrics groups to be collected, by default, all enabled. + ## Master metrics groups to be collected, by default, all enabled. master_collections = [ "resources", "master", "system", - "slaves", + "agents", "frameworks", + "tasks", "messages", "evqueue", "registrar", ] + ## A list of Mesos slaves, default is [] + # slaves = [] + ## Slave metrics groups to be collected, by default, all enabled. + # slave_collections = [ + # "resources", + # "agent", + # "system", + # "executors", + # "tasks", + # "messages", + # ] + ## Include mesos tasks statistics, default is false + # slave_tasks = true ` // SampleConfig returns a sample configuration block @@ -56,21 +80,54 @@ func (m *Mesos) Description() string { return "Telegraf plugin for gathering metrics from N Mesos masters" } +func (m *Mesos) SetDefaults() { + if len(m.MasterCols) == 0 { + m.MasterCols = allMetrics[MASTER] + } + + if len(m.SlaveCols) == 0 { + m.SlaveCols = allMetrics[SLAVE] + } + + if m.Timeout == 0 { + log.Println("[mesos] Missing timeout value, setting default value (100ms)") + m.Timeout = 100 + } +} + // Gather() metrics from given list of Mesos Masters func (m *Mesos) Gather(acc telegraf.Accumulator) error { var wg sync.WaitGroup var errorChannel chan error - if len(m.Masters) == 0 { - m.Masters = []string{"localhost:5050"} - } + m.SetDefaults() - errorChannel = make(chan error, len(m.Masters)*2) + errorChannel = make(chan error, len(m.Masters)+2*len(m.Slaves)) for _, v := range m.Masters { wg.Add(1) go func(c string) { - errorChannel <- m.gatherMetrics(c, acc) + errorChannel <- m.gatherMainMetrics(c, ":5050", MASTER, acc) + wg.Done() + return + }(v) + } + + for _, v := range m.Slaves { + wg.Add(1) + go func(c string) { + errorChannel <- m.gatherMainMetrics(c, ":5051", MASTER, acc) + wg.Done() + return + }(v) + + if !m.SlaveTasks { + continue + } + + wg.Add(1) + go func(c string) { + errorChannel <- m.gatherSlaveTaskMetrics(c, ":5051", acc) wg.Done() return }(v) @@ -94,7 +151,7 @@ func (m *Mesos) Gather(acc telegraf.Accumulator) error { } // metricsDiff() returns set names for removal -func metricsDiff(w []string) []string { +func metricsDiff(role Role, w []string) []string { b := []string{} s := make(map[string]bool) @@ -106,7 +163,7 @@ func metricsDiff(w []string) []string { s[v] = true } - for _, d := range defaultMetrics { + for _, d := range allMetrics[role] { if _, ok := s[d]; !ok { b = append(b, d) } @@ -116,156 +173,239 @@ func metricsDiff(w []string) []string { } // masterBlocks serves as kind of metrics registry groupping them in sets -func masterBlocks(g string) []string { +func getMetrics(role Role, group string) []string { var m map[string][]string m = make(map[string][]string) - m["resources"] = []string{ - "master/cpus_percent", - "master/cpus_used", - "master/cpus_total", - "master/cpus_revocable_percent", - "master/cpus_revocable_total", - "master/cpus_revocable_used", - "master/disk_percent", - "master/disk_used", - "master/disk_total", - "master/disk_revocable_percent", - "master/disk_revocable_total", - "master/disk_revocable_used", - "master/mem_percent", - "master/mem_used", - "master/mem_total", - "master/mem_revocable_percent", - "master/mem_revocable_total", - "master/mem_revocable_used", + if role == MASTER { + m["resources"] = []string{ + "master/cpus_percent", + "master/cpus_used", + "master/cpus_total", + "master/cpus_revocable_percent", + "master/cpus_revocable_total", + "master/cpus_revocable_used", + "master/disk_percent", + "master/disk_used", + "master/disk_total", + "master/disk_revocable_percent", + "master/disk_revocable_total", + "master/disk_revocable_used", + "master/gpus_percent", + "master/gpus_used", + "master/gpus_total", + "master/gpus_revocable_percent", + "master/gpus_revocable_total", + "master/gpus_revocable_used", + "master/mem_percent", + "master/mem_used", + "master/mem_total", + "master/mem_revocable_percent", + "master/mem_revocable_total", + "master/mem_revocable_used", + } + + m["master"] = []string{ + "master/elected", + "master/uptime_secs", + } + + m["system"] = []string{ + "system/cpus_total", + "system/load_15min", + "system/load_5min", + "system/load_1min", + "system/mem_free_bytes", + "system/mem_total_bytes", + } + + m["agents"] = []string{ + "master/slave_registrations", + "master/slave_removals", + "master/slave_reregistrations", + "master/slave_shutdowns_scheduled", + "master/slave_shutdowns_canceled", + "master/slave_shutdowns_completed", + "master/slaves_active", + "master/slaves_connected", + "master/slaves_disconnected", + "master/slaves_inactive", + } + + m["frameworks"] = []string{ + "master/frameworks_active", + "master/frameworks_connected", + "master/frameworks_disconnected", + "master/frameworks_inactive", + "master/outstanding_offers", + } + + m["tasks"] = []string{ + "master/tasks_error", + "master/tasks_failed", + "master/tasks_finished", + "master/tasks_killed", + "master/tasks_lost", + "master/tasks_running", + "master/tasks_staging", + "master/tasks_starting", + } + + m["messages"] = []string{ + "master/invalid_executor_to_framework_messages", + "master/invalid_framework_to_executor_messages", + "master/invalid_status_update_acknowledgements", + "master/invalid_status_updates", + "master/dropped_messages", + "master/messages_authenticate", + "master/messages_deactivate_framework", + "master/messages_decline_offers", + "master/messages_executor_to_framework", + "master/messages_exited_executor", + "master/messages_framework_to_executor", + "master/messages_kill_task", + "master/messages_launch_tasks", + "master/messages_reconcile_tasks", + "master/messages_register_framework", + "master/messages_register_slave", + "master/messages_reregister_framework", + "master/messages_reregister_slave", + "master/messages_resource_request", + "master/messages_revive_offers", + "master/messages_status_update", + "master/messages_status_update_acknowledgement", + "master/messages_unregister_framework", + "master/messages_unregister_slave", + "master/messages_update_slave", + "master/recovery_slave_removals", + "master/slave_removals/reason_registered", + "master/slave_removals/reason_unhealthy", + "master/slave_removals/reason_unregistered", + "master/valid_framework_to_executor_messages", + "master/valid_status_update_acknowledgements", + "master/valid_status_updates", + "master/task_lost/source_master/reason_invalid_offers", + "master/task_lost/source_master/reason_slave_removed", + "master/task_lost/source_slave/reason_executor_terminated", + "master/valid_executor_to_framework_messages", + } + + m["evqueue"] = []string{ + "master/event_queue_dispatches", + "master/event_queue_http_requests", + "master/event_queue_messages", + } + + m["registrar"] = []string{ + "registrar/state_fetch_ms", + "registrar/state_store_ms", + "registrar/state_store_ms/max", + "registrar/state_store_ms/min", + "registrar/state_store_ms/p50", + "registrar/state_store_ms/p90", + "registrar/state_store_ms/p95", + "registrar/state_store_ms/p99", + "registrar/state_store_ms/p999", + "registrar/state_store_ms/p9999", + } + } else if role == SLAVE { + m["resources"] = []string{ + "slave/cpus_percent", + "slave/cpus_used", + "slave/cpus_total", + "slave/cpus_revocable_percent", + "slave/cpus_revocable_total", + "slave/cpus_revocable_used", + "slave/disk_percent", + "slave/disk_used", + "slave/disk_total", + "slave/disk_revocable_percent", + "slave/disk_revocable_total", + "slave/disk_revocable_used", + "slave/gpus_percent", + "slave/gpus_used", + "slave/gpus_total", + "slave/gpus_revocable_percent", + "slave/gpus_revocable_total", + "slave/gpus_revocable_used", + "slave/mem_percent", + "slave/mem_used", + "slave/mem_total", + "slave/mem_revocable_percent", + "slave/mem_revocable_total", + "slave/mem_revocable_used", + } + + m["agent"] = []string{ + "slave/registered", + "slave/uptime_secs", + } + + m["system"] = []string{ + "system/cpus_total", + "system/load_15min", + "system/load_5min", + "system/load_1min", + "system/mem_free_bytes", + "system/mem_total_bytes", + } + + m["executors"] = []string{ + "containerizer/mesos/container_destroy_errors", + "slave/container_launch_errors", + "slave/executors_preempted", + "slave/frameworks_active", + "slave/executor_directory_max_allowed_age_secs", + "slave/executors_registering", + "slave/executors_running", + "slave/executors_terminated", + "slave/executors_terminating", + "slave/recovery_errors", + } + + m["tasks"] = []string{ + "slave/tasks_failed", + "slave/tasks_finished", + "slave/tasks_killed", + "slave/tasks_lost", + "slave/tasks_running", + "slave/tasks_staging", + "slave/tasks_starting", + } + + m["messages"] = []string{ + "slave/invalid_framework_messages", + "slave/invalid_status_updates", + "slave/valid_framework_messages", + "slave/valid_status_updates", + } } - m["master"] = []string{ - "master/elected", - "master/uptime_secs", - } - - m["system"] = []string{ - "system/cpus_total", - "system/load_15min", - "system/load_5min", - "system/load_1min", - "system/mem_free_bytes", - "system/mem_total_bytes", - } - - m["slaves"] = []string{ - "master/slave_registrations", - "master/slave_removals", - "master/slave_reregistrations", - "master/slave_shutdowns_scheduled", - "master/slave_shutdowns_canceled", - "master/slave_shutdowns_completed", - "master/slaves_active", - "master/slaves_connected", - "master/slaves_disconnected", - "master/slaves_inactive", - } - - m["frameworks"] = []string{ - "master/frameworks_active", - "master/frameworks_connected", - "master/frameworks_disconnected", - "master/frameworks_inactive", - "master/outstanding_offers", - } - - m["tasks"] = []string{ - "master/tasks_error", - "master/tasks_failed", - "master/tasks_finished", - "master/tasks_killed", - "master/tasks_lost", - "master/tasks_running", - "master/tasks_staging", - "master/tasks_starting", - } - - m["messages"] = []string{ - "master/invalid_executor_to_framework_messages", - "master/invalid_framework_to_executor_messages", - "master/invalid_status_update_acknowledgements", - "master/invalid_status_updates", - "master/dropped_messages", - "master/messages_authenticate", - "master/messages_deactivate_framework", - "master/messages_decline_offers", - "master/messages_executor_to_framework", - "master/messages_exited_executor", - "master/messages_framework_to_executor", - "master/messages_kill_task", - "master/messages_launch_tasks", - "master/messages_reconcile_tasks", - "master/messages_register_framework", - "master/messages_register_slave", - "master/messages_reregister_framework", - "master/messages_reregister_slave", - "master/messages_resource_request", - "master/messages_revive_offers", - "master/messages_status_update", - "master/messages_status_update_acknowledgement", - "master/messages_unregister_framework", - "master/messages_unregister_slave", - "master/messages_update_slave", - "master/recovery_slave_removals", - "master/slave_removals/reason_registered", - "master/slave_removals/reason_unhealthy", - "master/slave_removals/reason_unregistered", - "master/valid_framework_to_executor_messages", - "master/valid_status_update_acknowledgements", - "master/valid_status_updates", - "master/task_lost/source_master/reason_invalid_offers", - "master/task_lost/source_master/reason_slave_removed", - "master/task_lost/source_slave/reason_executor_terminated", - "master/valid_executor_to_framework_messages", - } - - m["evqueue"] = []string{ - "master/event_queue_dispatches", - "master/event_queue_http_requests", - "master/event_queue_messages", - } - - m["registrar"] = []string{ - "registrar/state_fetch_ms", - "registrar/state_store_ms", - "registrar/state_store_ms/max", - "registrar/state_store_ms/min", - "registrar/state_store_ms/p50", - "registrar/state_store_ms/p90", - "registrar/state_store_ms/p95", - "registrar/state_store_ms/p99", - "registrar/state_store_ms/p999", - "registrar/state_store_ms/p9999", - } - - ret, ok := m[g] + ret, ok := m[group] if !ok { - log.Println("[mesos] Unkown metrics group: ", g) + log.Printf("[mesos] Unkown %s metrics group: %s\n", role, group) return []string{} } return ret } -// removeGroup(), remove unwanted sets -func (m *Mesos) removeGroup(j *map[string]interface{}) { +func (m *Mesos) filterMetrics(role Role, metrics *map[string]interface{}) { var ok bool + var selectedMetrics []string - b := metricsDiff(m.MasterCols) + if role == MASTER { + selectedMetrics = m.MasterCols + } else if role == SLAVE { + selectedMetrics = m.SlaveCols + } - for _, k := range b { - for _, v := range masterBlocks(k) { - if _, ok = (*j)[v]; ok { - delete((*j), v) + for _, k := range metricsDiff(role, selectedMetrics) { + for _, v := range getMetrics(role, k) { + if _, ok = (*metrics)[v]; ok { + delete((*metrics), v) } } } @@ -280,23 +420,66 @@ var client = &http.Client{ Timeout: time.Duration(4 * time.Second), } -// This should not belong to the object -func (m *Mesos) gatherMetrics(a string, acc telegraf.Accumulator) error { - var jsonOut map[string]interface{} +func (m *Mesos) gatherSlaveTaskMetrics(address string, defaultPort string, acc telegraf.Accumulator) error { + var metrics []map[string]interface{} - host, _, err := net.SplitHostPort(a) + host, _, err := net.SplitHostPort(address) if err != nil { - host = a - a = a + ":5050" + host = address + address = address + defaultPort } tags := map[string]string{ "server": host, } - if m.Timeout == 0 { - log.Println("[mesos] Missing timeout value, setting default value (100ms)") - m.Timeout = 100 + ts := strconv.Itoa(m.Timeout) + "ms" + + resp, err := client.Get("http://" + address + "/monitor/statistics?timeout=" + ts) + + if err != nil { + return err + } + + data, err := ioutil.ReadAll(resp.Body) + resp.Body.Close() + if err != nil { + return err + } + + if err = json.Unmarshal([]byte(data), &metrics); err != nil { + return errors.New("Error decoding JSON response") + } + + for _, task := range metrics { + tags["task_id"] = task["executor_id"].(string) + + jf := jsonparser.JSONFlattener{} + err = jf.FlattenJSON("", task) + + if err != nil { + return err + } + + acc.AddFields("mesos-tasks", jf.Fields, tags) + } + + return nil +} + +// This should not belong to the object +func (m *Mesos) gatherMainMetrics(a string, defaultPort string, role Role, acc telegraf.Accumulator) error { + var jsonOut map[string]interface{} + + host, _, err := net.SplitHostPort(a) + if err != nil { + host = a + a = a + defaultPort + } + + tags := map[string]string{ + "server": host, + "role": string(role), } ts := strconv.Itoa(m.Timeout) + "ms" @@ -317,7 +500,7 @@ func (m *Mesos) gatherMetrics(a string, acc telegraf.Accumulator) error { return errors.New("Error decoding JSON response") } - m.removeGroup(&jsonOut) + m.filterMetrics(role, &jsonOut) jf := jsonparser.JSONFlattener{} diff --git a/plugins/inputs/mesos/mesos_test.go b/plugins/inputs/mesos/mesos_test.go index c56580649..062e23e4a 100644 --- a/plugins/inputs/mesos/mesos_test.go +++ b/plugins/inputs/mesos/mesos_test.go @@ -2,70 +2,275 @@ package mesos import ( "encoding/json" + "fmt" "math/rand" "net/http" "net/http/httptest" "os" "testing" + jsonparser "github.com/influxdata/telegraf/plugins/parsers/json" "github.com/influxdata/telegraf/testutil" ) -var mesosMetrics map[string]interface{} -var ts *httptest.Server +var masterMetrics map[string]interface{} +var masterTestServer *httptest.Server +var slaveMetrics map[string]interface{} +var slaveTaskMetrics map[string]interface{} +var slaveTestServer *httptest.Server + +func randUUID() string { + b := make([]byte, 16) + rand.Read(b) + return fmt.Sprintf("%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) +} func generateMetrics() { - mesosMetrics = make(map[string]interface{}) + masterMetrics = make(map[string]interface{}) - metricNames := []string{"master/cpus_percent", "master/cpus_used", "master/cpus_total", - "master/cpus_revocable_percent", "master/cpus_revocable_total", "master/cpus_revocable_used", - "master/disk_percent", "master/disk_used", "master/disk_total", "master/disk_revocable_percent", - "master/disk_revocable_total", "master/disk_revocable_used", "master/mem_percent", - "master/mem_used", "master/mem_total", "master/mem_revocable_percent", "master/mem_revocable_total", - "master/mem_revocable_used", "master/elected", "master/uptime_secs", "system/cpus_total", - "system/load_15min", "system/load_5min", "system/load_1min", "system/mem_free_bytes", - "system/mem_total_bytes", "master/slave_registrations", "master/slave_removals", - "master/slave_reregistrations", "master/slave_shutdowns_scheduled", "master/slave_shutdowns_canceled", - "master/slave_shutdowns_completed", "master/slaves_active", "master/slaves_connected", - "master/slaves_disconnected", "master/slaves_inactive", "master/frameworks_active", - "master/frameworks_connected", "master/frameworks_disconnected", "master/frameworks_inactive", - "master/outstanding_offers", "master/tasks_error", "master/tasks_failed", "master/tasks_finished", - "master/tasks_killed", "master/tasks_lost", "master/tasks_running", "master/tasks_staging", - "master/tasks_starting", "master/invalid_executor_to_framework_messages", "master/invalid_framework_to_executor_messages", - "master/invalid_status_update_acknowledgements", "master/invalid_status_updates", - "master/dropped_messages", "master/messages_authenticate", "master/messages_deactivate_framework", - "master/messages_decline_offers", "master/messages_executor_to_framework", "master/messages_exited_executor", - "master/messages_framework_to_executor", "master/messages_kill_task", "master/messages_launch_tasks", - "master/messages_reconcile_tasks", "master/messages_register_framework", "master/messages_register_slave", - "master/messages_reregister_framework", "master/messages_reregister_slave", "master/messages_resource_request", - "master/messages_revive_offers", "master/messages_status_update", "master/messages_status_update_acknowledgement", - "master/messages_unregister_framework", "master/messages_unregister_slave", "master/messages_update_slave", - "master/recovery_slave_removals", "master/slave_removals/reason_registered", "master/slave_removals/reason_unhealthy", - "master/slave_removals/reason_unregistered", "master/valid_framework_to_executor_messages", "master/valid_status_update_acknowledgements", - "master/valid_status_updates", "master/task_lost/source_master/reason_invalid_offers", - "master/task_lost/source_master/reason_slave_removed", "master/task_lost/source_slave/reason_executor_terminated", - "master/valid_executor_to_framework_messages", "master/event_queue_dispatches", - "master/event_queue_http_requests", "master/event_queue_messages", "registrar/state_fetch_ms", - "registrar/state_store_ms", "registrar/state_store_ms/max", "registrar/state_store_ms/min", - "registrar/state_store_ms/p50", "registrar/state_store_ms/p90", "registrar/state_store_ms/p95", - "registrar/state_store_ms/p99", "registrar/state_store_ms/p999", "registrar/state_store_ms/p9999"} + metricNames := []string{ + // resources + "master/cpus_percent", + "master/cpus_used", + "master/cpus_total", + "master/cpus_revocable_percent", + "master/cpus_revocable_total", + "master/cpus_revocable_used", + "master/disk_percent", + "master/disk_used", + "master/disk_total", + "master/disk_revocable_percent", + "master/disk_revocable_total", + "master/disk_revocable_used", + "master/gpus_percent", + "master/gpus_used", + "master/gpus_total", + "master/gpus_revocable_percent", + "master/gpus_revocable_total", + "master/gpus_revocable_used", + "master/mem_percent", + "master/mem_used", + "master/mem_total", + "master/mem_revocable_percent", + "master/mem_revocable_total", + "master/mem_revocable_used", + // master + "master/elected", + "master/uptime_secs", + // system + "system/cpus_total", + "system/load_15min", + "system/load_5min", + "system/load_1min", + "system/mem_free_bytes", + "system/mem_total_bytes", + // agents + "master/slave_registrations", + "master/slave_removals", + "master/slave_reregistrations", + "master/slave_shutdowns_scheduled", + "master/slave_shutdowns_canceled", + "master/slave_shutdowns_completed", + "master/slaves_active", + "master/slaves_connected", + "master/slaves_disconnected", + "master/slaves_inactive", + // frameworks + "master/frameworks_active", + "master/frameworks_connected", + "master/frameworks_disconnected", + "master/frameworks_inactive", + "master/outstanding_offers", + // tasks + "master/tasks_error", + "master/tasks_failed", + "master/tasks_finished", + "master/tasks_killed", + "master/tasks_lost", + "master/tasks_running", + "master/tasks_staging", + "master/tasks_starting", + // messages + "master/invalid_executor_to_framework_messages", + "master/invalid_framework_to_executor_messages", + "master/invalid_status_update_acknowledgements", + "master/invalid_status_updates", + "master/dropped_messages", + "master/messages_authenticate", + "master/messages_deactivate_framework", + "master/messages_decline_offers", + "master/messages_executor_to_framework", + "master/messages_exited_executor", + "master/messages_framework_to_executor", + "master/messages_kill_task", + "master/messages_launch_tasks", + "master/messages_reconcile_tasks", + "master/messages_register_framework", + "master/messages_register_slave", + "master/messages_reregister_framework", + "master/messages_reregister_slave", + "master/messages_resource_request", + "master/messages_revive_offers", + "master/messages_status_update", + "master/messages_status_update_acknowledgement", + "master/messages_unregister_framework", + "master/messages_unregister_slave", + "master/messages_update_slave", + "master/recovery_slave_removals", + "master/slave_removals/reason_registered", + "master/slave_removals/reason_unhealthy", + "master/slave_removals/reason_unregistered", + "master/valid_framework_to_executor_messages", + "master/valid_status_update_acknowledgements", + "master/valid_status_updates", + "master/task_lost/source_master/reason_invalid_offers", + "master/task_lost/source_master/reason_slave_removed", + "master/task_lost/source_slave/reason_executor_terminated", + "master/valid_executor_to_framework_messages", + // evgqueue + "master/event_queue_dispatches", + "master/event_queue_http_requests", + "master/event_queue_messages", + // registrar + "registrar/state_fetch_ms", + "registrar/state_store_ms", + "registrar/state_store_ms/max", + "registrar/state_store_ms/min", + "registrar/state_store_ms/p50", + "registrar/state_store_ms/p90", + "registrar/state_store_ms/p95", + "registrar/state_store_ms/p99", + "registrar/state_store_ms/p999", + "registrar/state_store_ms/p9999", + } for _, k := range metricNames { - mesosMetrics[k] = rand.Float64() + masterMetrics[k] = rand.Float64() + } + + slaveMetrics = make(map[string]interface{}) + + metricNames = []string{ + // resources + "slave/cpus_percent", + "slave/cpus_used", + "slave/cpus_total", + "slave/cpus_revocable_percent", + "slave/cpus_revocable_total", + "slave/cpus_revocable_used", + "slave/disk_percent", + "slave/disk_used", + "slave/disk_total", + "slave/disk_revocable_percent", + "slave/disk_revocable_total", + "slave/disk_revocable_used", + "slave/gpus_percent", + "slave/gpus_used", + "slave/gpus_total", + "slave/gpus_revocable_percent", + "slave/gpus_revocable_total", + "slave/gpus_revocable_used", + "slave/mem_percent", + "slave/mem_used", + "slave/mem_total", + "slave/mem_revocable_percent", + "slave/mem_revocable_total", + "slave/mem_revocable_used", + // agent + "slave/registered", + "slave/uptime_secs", + // system + "system/cpus_total", + "system/load_15min", + "system/load_5min", + "system/load_1min", + "system/mem_free_bytes", + "system/mem_total_bytes", + // executors + "containerizer/mesos/container_destroy_errors", + "slave/container_launch_errors", + "slave/executors_preempted", + "slave/frameworks_active", + "slave/executor_directory_max_allowed_age_secs", + "slave/executors_registering", + "slave/executors_running", + "slave/executors_terminated", + "slave/executors_terminating", + "slave/recovery_errors", + // tasks + "slave/tasks_failed", + "slave/tasks_finished", + "slave/tasks_killed", + "slave/tasks_lost", + "slave/tasks_running", + "slave/tasks_staging", + "slave/tasks_starting", + // messages + "slave/invalid_framework_messages", + "slave/invalid_status_updates", + "slave/valid_framework_messages", + "slave/valid_status_updates", + } + + for _, k := range metricNames { + slaveMetrics[k] = rand.Float64() + } + + slaveTaskMetrics = map[string]interface{}{ + "executor_id": fmt.Sprintf("task_%s", randUUID()), + "executor_name": "Some task description", + "framework_id": randUUID(), + "source": fmt.Sprintf("task_source_%s", randUUID()), + "statistics": map[string]interface{}{ + "cpus_limit": rand.Float64(), + "cpus_system_time_secs": rand.Float64(), + "cpus_user_time_secs": rand.Float64(), + "mem_anon_bytes": float64(rand.Int63()), + "mem_cache_bytes": float64(rand.Int63()), + "mem_critical_pressure_counter": float64(rand.Int63()), + "mem_file_bytes": float64(rand.Int63()), + "mem_limit_bytes": float64(rand.Int63()), + "mem_low_pressure_counter": float64(rand.Int63()), + "mem_mapped_file_bytes": float64(rand.Int63()), + "mem_medium_pressure_counter": float64(rand.Int63()), + "mem_rss_bytes": float64(rand.Int63()), + "mem_swap_bytes": float64(rand.Int63()), + "mem_total_bytes": float64(rand.Int63()), + "mem_total_memsw_bytes": float64(rand.Int63()), + "mem_unevictable_bytes": float64(rand.Int63()), + "timestamp": rand.Float64(), + }, } } func TestMain(m *testing.M) { generateMetrics() - r := http.NewServeMux() - r.HandleFunc("/metrics/snapshot", func(w http.ResponseWriter, r *http.Request) { + + masterRouter := http.NewServeMux() + masterRouter.HandleFunc("/metrics/snapshot", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(mesosMetrics) + json.NewEncoder(w).Encode(masterMetrics) }) - ts = httptest.NewServer(r) + masterTestServer = httptest.NewServer(masterRouter) + + slaveRouter := http.NewServeMux() + slaveRouter.HandleFunc("/metrics/snapshot", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(slaveMetrics) + }) + slaveRouter.HandleFunc("/monitor/statistics", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode([]map[string]interface{}{slaveTaskMetrics}) + }) + slaveTestServer = httptest.NewServer(slaveRouter) + rc := m.Run() - ts.Close() + + masterTestServer.Close() + slaveTestServer.Close() os.Exit(rc) } @@ -73,7 +278,7 @@ func TestMesosMaster(t *testing.T) { var acc testutil.Accumulator m := Mesos{ - Masters: []string{ts.Listener.Addr().String()}, + Masters: []string{masterTestServer.Listener.Addr().String()}, Timeout: 10, } @@ -83,34 +288,88 @@ func TestMesosMaster(t *testing.T) { t.Errorf(err.Error()) } - acc.AssertContainsFields(t, "mesos", mesosMetrics) + acc.AssertContainsFields(t, "mesos", masterMetrics) } -func TestRemoveGroup(t *testing.T) { - generateMetrics() - +func TestMasterFilter(t *testing.T) { m := Mesos{ MasterCols: []string{ "resources", "master", "registrar", }, } b := []string{ - "system", "slaves", "frameworks", - "messages", "evqueue", + "system", "agents", "frameworks", + "messages", "evqueue", "tasks", } - m.removeGroup(&mesosMetrics) + m.filterMetrics(MASTER, &masterMetrics) for _, v := range b { - for _, x := range masterBlocks(v) { - if _, ok := mesosMetrics[x]; ok { + for _, x := range getMetrics(MASTER, v) { + if _, ok := masterMetrics[x]; ok { t.Errorf("Found key %s, it should be gone.", x) } } } for _, v := range m.MasterCols { - for _, x := range masterBlocks(v) { - if _, ok := mesosMetrics[x]; !ok { + for _, x := range getMetrics(MASTER, v) { + if _, ok := masterMetrics[x]; !ok { + t.Errorf("Didn't find key %s, it should present.", x) + } + } + } +} + +func TestMesosSlave(t *testing.T) { + var acc testutil.Accumulator + + m := Mesos{ + Masters: []string{}, + Slaves: []string{slaveTestServer.Listener.Addr().String()}, + SlaveTasks: true, + Timeout: 10, + } + + err := m.Gather(&acc) + + if err != nil { + t.Errorf(err.Error()) + } + + acc.AssertContainsFields(t, "mesos", slaveMetrics) + + jf := jsonparser.JSONFlattener{} + err = jf.FlattenJSON("", slaveTaskMetrics) + + if err != nil { + t.Errorf(err.Error()) + } + + acc.AssertContainsFields(t, "mesos-tasks", jf.Fields) +} + +func TestSlaveFilter(t *testing.T) { + m := Mesos{ + SlaveCols: []string{ + "resources", "agent", "tasks", + }, + } + b := []string{ + "system", "executors", "messages", + } + + m.filterMetrics(SLAVE, &slaveMetrics) + + for _, v := range b { + for _, x := range getMetrics(SLAVE, v) { + if _, ok := slaveMetrics[x]; ok { + t.Errorf("Found key %s, it should be gone.", x) + } + } + } + for _, v := range m.MasterCols { + for _, x := range getMetrics(SLAVE, v) { + if _, ok := slaveMetrics[x]; !ok { t.Errorf("Didn't find key %s, it should present.", x) } } diff --git a/plugins/inputs/mock_Plugin.go b/plugins/inputs/mock_Plugin.go index caf30f72f..4dec121bc 100644 --- a/plugins/inputs/mock_Plugin.go +++ b/plugins/inputs/mock_Plugin.go @@ -6,10 +6,22 @@ import ( "github.com/stretchr/testify/mock" ) +// MockPlugin struct should be named the same as the Plugin type MockPlugin struct { mock.Mock } +// Description will appear directly above the plugin definition in the config file +func (m *MockPlugin) Description() string { + return `This is an example plugin` +} + +// SampleConfig will populate the sample configuration portion of the plugin's configuration +func (m *MockPlugin) SampleConfig() string { + return ` sampleVar = 'foo'` +} + +// Gather defines what data the plugin will gather. func (m *MockPlugin) Gather(_a0 telegraf.Accumulator) error { ret := m.Called(_a0) diff --git a/plugins/inputs/mongodb/README.md b/plugins/inputs/mongodb/README.md index 66ff2668e..72f87feb8 100644 --- a/plugins/inputs/mongodb/README.md +++ b/plugins/inputs/mongodb/README.md @@ -10,6 +10,7 @@ ## mongodb://10.10.3.33:18832, ## 10.0.0.1:10000, etc. servers = ["127.0.0.1:27017"] + gather_perdb_stats = false ``` For authenticated mongodb istances use connection mongdb connection URI @@ -52,3 +53,15 @@ and create a single measurement containing values e.g. * ttl_passes_per_sec * repl_lag * jumbo_chunks (only if mongos or mongo config) + +If gather_db_stats is set to true, it will also collect per database stats exposed by db.stats() +creating another measurement called mongodb_db_stats and containing values: + * collections + * objects + * avg_obj_size + * data_size + * storage_size + * num_extents + * indexes + * index_size + * ok diff --git a/plugins/inputs/mongodb/mongodb.go b/plugins/inputs/mongodb/mongodb.go index f38fa31ef..a4bdabd96 100644 --- a/plugins/inputs/mongodb/mongodb.go +++ b/plugins/inputs/mongodb/mongodb.go @@ -10,14 +10,16 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" "gopkg.in/mgo.v2" ) type MongoDB struct { - Servers []string - Ssl Ssl - mongos map[string]*Server + Servers []string + Ssl Ssl + mongos map[string]*Server + GatherPerdbStats bool } type Ssl struct { @@ -32,6 +34,7 @@ var sampleConfig = ` ## mongodb://10.10.3.33:18832, ## 10.0.0.1:10000, etc. servers = ["127.0.0.1:27017"] + gather_perdb_stats = false ` func (m *MongoDB) SampleConfig() string { @@ -53,9 +56,7 @@ func (m *MongoDB) Gather(acc telegraf.Accumulator) error { } var wg sync.WaitGroup - - var outerr error - + errChan := errchan.New(len(m.Servers)) for _, serv := range m.Servers { u, err := url.Parse(serv) if err != nil { @@ -71,13 +72,12 @@ func (m *MongoDB) Gather(acc telegraf.Accumulator) error { wg.Add(1) go func(srv *Server) { defer wg.Done() - outerr = m.gatherServer(srv, acc) + errChan.C <- m.gatherServer(srv, acc) }(m.getMongoServer(u)) } wg.Wait() - - return outerr + return errChan.Error() } func (m *MongoDB) getMongoServer(url *url.URL) *Server { @@ -135,7 +135,7 @@ func (m *MongoDB) gatherServer(server *Server, acc telegraf.Accumulator) error { } server.Session = sess } - return server.gatherData(acc) + return server.gatherData(acc, m.GatherPerdbStats) } func init() { diff --git a/plugins/inputs/mongodb/mongodb_data.go b/plugins/inputs/mongodb/mongodb_data.go index 7a52d650a..afa4ddd2f 100644 --- a/plugins/inputs/mongodb/mongodb_data.go +++ b/plugins/inputs/mongodb/mongodb_data.go @@ -12,6 +12,12 @@ type MongodbData struct { StatLine *StatLine Fields map[string]interface{} Tags map[string]string + DbData []DbData +} + +type DbData struct { + Name string + Fields map[string]interface{} } func NewMongodbData(statLine *StatLine, tags map[string]string) *MongodbData { @@ -22,6 +28,7 @@ func NewMongodbData(statLine *StatLine, tags map[string]string) *MongodbData { StatLine: statLine, Tags: tags, Fields: make(map[string]interface{}), + DbData: []DbData{}, } } @@ -72,6 +79,34 @@ var WiredTigerStats = map[string]string{ "percent_cache_used": "CacheUsedPercent", } +var DbDataStats = map[string]string{ + "collections": "Collections", + "objects": "Objects", + "avg_obj_size": "AvgObjSize", + "data_size": "DataSize", + "storage_size": "StorageSize", + "num_extents": "NumExtents", + "indexes": "Indexes", + "index_size": "IndexSize", + "ok": "Ok", +} + +func (d *MongodbData) AddDbStats() { + for _, dbstat := range d.StatLine.DbStatsLines { + dbStatLine := reflect.ValueOf(&dbstat).Elem() + newDbData := &DbData{ + Name: dbstat.Name, + Fields: make(map[string]interface{}), + } + newDbData.Fields["type"] = "db_stat" + for key, value := range DbDataStats { + val := dbStatLine.FieldByName(value).Interface() + newDbData.Fields[key] = val + } + d.DbData = append(d.DbData, *newDbData) + } +} + func (d *MongodbData) AddDefaultStats() { statLine := reflect.ValueOf(d.StatLine).Elem() d.addStat(statLine, DefaultStats) @@ -113,4 +148,15 @@ func (d *MongodbData) flush(acc telegraf.Accumulator) { d.StatLine.Time, ) d.Fields = make(map[string]interface{}) + + for _, db := range d.DbData { + d.Tags["db_name"] = db.Name + acc.AddFields( + "mongodb_db_stats", + db.Fields, + d.Tags, + d.StatLine.Time, + ) + db.Fields = make(map[string]interface{}) + } } diff --git a/plugins/inputs/mongodb/mongodb_server.go b/plugins/inputs/mongodb/mongodb_server.go index e4213bbaf..e797fd6ab 100644 --- a/plugins/inputs/mongodb/mongodb_server.go +++ b/plugins/inputs/mongodb/mongodb_server.go @@ -22,7 +22,7 @@ func (s *Server) getDefaultTags() map[string]string { return tags } -func (s *Server) gatherData(acc telegraf.Accumulator) error { +func (s *Server) gatherData(acc telegraf.Accumulator, gatherDbStats bool) error { s.Session.SetMode(mgo.Eventual, true) s.Session.SetSocketTimeout(0) result_server := &ServerStatus{} @@ -42,10 +42,34 @@ func (s *Server) gatherData(acc telegraf.Accumulator) error { JumboChunksCount: int64(jumbo_chunks), } + result_db_stats := &DbStats{} + + if gatherDbStats == true { + names := []string{} + names, err = s.Session.DatabaseNames() + if err != nil { + log.Println("Error getting database names (" + err.Error() + ")") + } + for _, db_name := range names { + db_stat_line := &DbStatsData{} + err = s.Session.DB(db_name).Run(bson.D{{"dbStats", 1}}, db_stat_line) + if err != nil { + log.Println("Error getting db stats from " + db_name + "(" + err.Error() + ")") + } + db := &Db{ + Name: db_name, + DbStatsData: db_stat_line, + } + + result_db_stats.Dbs = append(result_db_stats.Dbs, *db) + } + } + result := &MongoStatus{ ServerStatus: result_server, ReplSetStatus: result_repl, ClusterStatus: result_cluster, + DbStats: result_db_stats, } defer func() { @@ -64,6 +88,7 @@ func (s *Server) gatherData(acc telegraf.Accumulator) error { s.getDefaultTags(), ) data.AddDefaultStats() + data.AddDbStats() data.flush(acc) } return nil diff --git a/plugins/inputs/mongodb/mongodb_server_test.go b/plugins/inputs/mongodb/mongodb_server_test.go index 52869724c..7ad0f38a2 100644 --- a/plugins/inputs/mongodb/mongodb_server_test.go +++ b/plugins/inputs/mongodb/mongodb_server_test.go @@ -29,12 +29,12 @@ func TestGetDefaultTags(t *testing.T) { func TestAddDefaultStats(t *testing.T) { var acc testutil.Accumulator - err := server.gatherData(&acc) + err := server.gatherData(&acc, false) require.NoError(t, err) time.Sleep(time.Duration(1) * time.Second) // need to call this twice so it can perform the diff - err = server.gatherData(&acc) + err = server.gatherData(&acc, false) require.NoError(t, err) for key, _ := range DefaultStats { diff --git a/plugins/inputs/mongodb/mongostat.go b/plugins/inputs/mongodb/mongostat.go index 23bd05f72..50f65333e 100644 --- a/plugins/inputs/mongodb/mongostat.go +++ b/plugins/inputs/mongodb/mongostat.go @@ -35,6 +35,7 @@ type MongoStatus struct { ServerStatus *ServerStatus ReplSetStatus *ReplSetStatus ClusterStatus *ClusterStatus + DbStats *DbStats } type ServerStatus struct { @@ -65,6 +66,32 @@ type ServerStatus struct { Metrics *MetricsStats `bson:"metrics"` } +// DbStats stores stats from all dbs +type DbStats struct { + Dbs []Db +} + +// Db represent a single DB +type Db struct { + Name string + DbStatsData *DbStatsData +} + +// DbStatsData stores stats from a db +type DbStatsData struct { + Db string `bson:"db"` + Collections int64 `bson:"collections"` + Objects int64 `bson:"objects"` + AvgObjSize float64 `bson:"avgObjSize"` + DataSize int64 `bson:"dataSize"` + StorageSize int64 `bson:"storageSize"` + NumExtents int64 `bson:"numExtents"` + Indexes int64 `bson:"indexes"` + IndexSize int64 `bson:"indexSize"` + Ok int64 `bson:"ok"` + GleStats interface{} `bson:"gleStats"` +} + // ClusterStatus stores information related to the whole cluster type ClusterStatus struct { JumboChunksCount int64 @@ -396,6 +423,22 @@ type StatLine struct { // Cluster fields JumboChunksCount int64 + + // DB stats field + DbStatsLines []DbStatLine +} + +type DbStatLine struct { + Name string + Collections int64 + Objects int64 + AvgObjSize float64 + DataSize int64 + StorageSize int64 + NumExtents int64 + Indexes int64 + IndexSize int64 + Ok int64 } func parseLocks(stat ServerStatus) map[string]LockUsage { @@ -677,5 +720,27 @@ func NewStatLine(oldMongo, newMongo MongoStatus, key string, all bool, sampleSec newClusterStat := *newMongo.ClusterStatus returnVal.JumboChunksCount = newClusterStat.JumboChunksCount + newDbStats := *newMongo.DbStats + for _, db := range newDbStats.Dbs { + dbStatsData := db.DbStatsData + // mongos doesn't have the db key, so setting the db name + if dbStatsData.Db == "" { + dbStatsData.Db = db.Name + } + dbStatLine := &DbStatLine{ + Name: dbStatsData.Db, + Collections: dbStatsData.Collections, + Objects: dbStatsData.Objects, + AvgObjSize: dbStatsData.AvgObjSize, + DataSize: dbStatsData.DataSize, + StorageSize: dbStatsData.StorageSize, + NumExtents: dbStatsData.NumExtents, + Indexes: dbStatsData.Indexes, + IndexSize: dbStatsData.IndexSize, + Ok: dbStatsData.Ok, + } + returnVal.DbStatsLines = append(returnVal.DbStatsLines, *dbStatLine) + } + return returnVal } diff --git a/plugins/inputs/mysql/README.md b/plugins/inputs/mysql/README.md index 730caab91..20dd73e02 100644 --- a/plugins/inputs/mysql/README.md +++ b/plugins/inputs/mysql/README.md @@ -53,13 +53,13 @@ This plugin gathers the statistic data from MySQL server ## gather metrics from SHOW BINARY LOGS command output gather_binary_logs = false # - ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_TABLE + ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMARY_BY_TABLE gather_table_io_waits = false # ## gather metrics from PERFORMANCE_SCHEMA.TABLE_LOCK_WAITS gather_table_lock_waits = false # - ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_INDEX_USAGE + ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMARY_BY_INDEX_USAGE gather_index_io_waits = false # ## gather metrics from PERFORMANCE_SCHEMA.EVENT_WAITS diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index b8ff3945a..10b8c2f75 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -7,10 +7,12 @@ import ( "net/url" "strconv" "strings" + "sync" "time" _ "github.com/go-sql-driver/mysql" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -118,26 +120,27 @@ func (m *Mysql) InitMysql() { func (m *Mysql) Gather(acc telegraf.Accumulator) error { if len(m.Servers) == 0 { - // if we can't get stats in this case, thats fine, don't report - // an error. - m.gatherServer(localhost, acc) - return nil + // default to localhost if nothing specified. + return m.gatherServer(localhost, acc) } - // Initialise additional query intervals if !initDone { m.InitMysql() } + var wg sync.WaitGroup + errChan := errchan.New(len(m.Servers)) // Loop through each server and collect metrics - for _, serv := range m.Servers { - err := m.gatherServer(serv, acc) - if err != nil { - return err - } + for _, server := range m.Servers { + wg.Add(1) + go func(s string) { + defer wg.Done() + errChan.C <- m.gatherServer(s, acc) + }(server) } - return nil + wg.Wait() + return errChan.Error() } type mapping struct { @@ -306,6 +309,10 @@ var mappings = []*mapping{ onServer: "Threadpool_", inExport: "threadpool_", }, + { + onServer: "wsrep_", + inExport: "wsrep_", + }, } var ( diff --git a/plugins/inputs/mysql/mysql_test.go b/plugins/inputs/mysql/mysql_test.go index 989c21722..3ab9187b5 100644 --- a/plugins/inputs/mysql/mysql_test.go +++ b/plugins/inputs/mysql/mysql_test.go @@ -20,7 +20,6 @@ func TestMysqlDefaultsToLocal(t *testing.T) { } var acc testutil.Accumulator - err := m.Gather(&acc) require.NoError(t, err) diff --git a/plugins/inputs/nginx/nginx.go b/plugins/inputs/nginx/nginx.go index c13ba39f3..3fe8c04d1 100644 --- a/plugins/inputs/nginx/nginx.go +++ b/plugins/inputs/nginx/nginx.go @@ -12,6 +12,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -34,7 +35,7 @@ func (n *Nginx) Description() string { func (n *Nginx) Gather(acc telegraf.Accumulator) error { var wg sync.WaitGroup - var outerr error + errChan := errchan.New(len(n.Urls)) for _, u := range n.Urls { addr, err := url.Parse(u) @@ -45,13 +46,12 @@ func (n *Nginx) Gather(acc telegraf.Accumulator) error { wg.Add(1) go func(addr *url.URL) { defer wg.Done() - outerr = n.gatherUrl(addr, acc) + errChan.C <- n.gatherUrl(addr, acc) }(addr) } wg.Wait() - - return outerr + return errChan.Error() } var tr = &http.Transport{ @@ -97,11 +97,12 @@ func (n *Nginx) gatherUrl(addr *url.URL, acc telegraf.Accumulator) error { if err != nil { return err } - data := strings.SplitN(strings.TrimSpace(line), " ", 3) + data := strings.Fields(line) accepts, err := strconv.ParseUint(data[0], 10, 64) if err != nil { return err } + handled, err := strconv.ParseUint(data[1], 10, 64) if err != nil { return err @@ -116,7 +117,7 @@ func (n *Nginx) gatherUrl(addr *url.URL, acc telegraf.Accumulator) error { if err != nil { return err } - data = strings.SplitN(strings.TrimSpace(line), " ", 6) + data = strings.Fields(line) reading, err := strconv.ParseUint(data[1], 10, 64) if err != nil { return err diff --git a/plugins/inputs/nginx/nginx_test.go b/plugins/inputs/nginx/nginx_test.go index 895e3e583..4c8fabfe0 100644 --- a/plugins/inputs/nginx/nginx_test.go +++ b/plugins/inputs/nginx/nginx_test.go @@ -13,12 +13,18 @@ import ( "github.com/stretchr/testify/require" ) -const sampleResponse = ` +const nginxSampleResponse = ` Active connections: 585 server accepts handled requests 85340 85340 35085 Reading: 4 Writing: 135 Waiting: 446 ` +const tengineSampleResponse = ` +Active connections: 403 +server accepts handled requests request_time + 853 8533 3502 1546565864 +Reading: 8 Writing: 125 Waiting: 946 +` // Verify that nginx tags are properly parsed based on the server func TestNginxTags(t *testing.T) { @@ -36,7 +42,9 @@ func TestNginxGeneratesMetrics(t *testing.T) { var rsp string if r.URL.Path == "/stub_status" { - rsp = sampleResponse + rsp = nginxSampleResponse + } else if r.URL.Path == "/tengine_status" { + rsp = tengineSampleResponse } else { panic("Cannot handle request") } @@ -49,12 +57,20 @@ func TestNginxGeneratesMetrics(t *testing.T) { Urls: []string{fmt.Sprintf("%s/stub_status", ts.URL)}, } - var acc testutil.Accumulator + nt := &Nginx{ + Urls: []string{fmt.Sprintf("%s/tengine_status", ts.URL)}, + } - err := n.Gather(&acc) - require.NoError(t, err) + var acc_nginx testutil.Accumulator + var acc_tengine testutil.Accumulator - fields := map[string]interface{}{ + err_nginx := n.Gather(&acc_nginx) + err_tengine := nt.Gather(&acc_tengine) + + require.NoError(t, err_nginx) + require.NoError(t, err_tengine) + + fields_nginx := map[string]interface{}{ "active": uint64(585), "accepts": uint64(85340), "handled": uint64(85340), @@ -63,6 +79,17 @@ func TestNginxGeneratesMetrics(t *testing.T) { "writing": uint64(135), "waiting": uint64(446), } + + fields_tengine := map[string]interface{}{ + "active": uint64(403), + "accepts": uint64(853), + "handled": uint64(8533), + "requests": uint64(3502), + "reading": uint64(8), + "writing": uint64(125), + "waiting": uint64(946), + } + addr, err := url.Parse(ts.URL) if err != nil { panic(err) @@ -81,5 +108,6 @@ func TestNginxGeneratesMetrics(t *testing.T) { } tags := map[string]string{"server": host, "port": port} - acc.AssertContainsTaggedFields(t, "nginx", fields, tags) + acc_nginx.AssertContainsTaggedFields(t, "nginx", fields_nginx, tags) + acc_tengine.AssertContainsTaggedFields(t, "nginx", fields_tengine, tags) } diff --git a/plugins/inputs/nsq/nsq.go b/plugins/inputs/nsq/nsq.go index 35ba76866..8bfd72788 100644 --- a/plugins/inputs/nsq/nsq.go +++ b/plugins/inputs/nsq/nsq.go @@ -32,6 +32,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -65,19 +66,17 @@ func (n *NSQ) Description() string { func (n *NSQ) Gather(acc telegraf.Accumulator) error { var wg sync.WaitGroup - var outerr error - + errChan := errchan.New(len(n.Endpoints)) for _, e := range n.Endpoints { wg.Add(1) go func(e string) { defer wg.Done() - outerr = n.gatherEndpoint(e, acc) + errChan.C <- n.gatherEndpoint(e, acc) }(e) } wg.Wait() - - return outerr + return errChan.Error() } var tr = &http.Transport{ diff --git a/plugins/inputs/nsq_consumer/README.md b/plugins/inputs/nsq_consumer/README.md new file mode 100644 index 000000000..eac494ccb --- /dev/null +++ b/plugins/inputs/nsq_consumer/README.md @@ -0,0 +1,25 @@ +# NSQ Consumer Input Plugin + +The [NSQ](http://nsq.io/) consumer plugin polls a specified NSQD +topic and adds messages to InfluxDB. This plugin allows a message to be in any of the supported `data_format` types. + +## Configuration + +```toml +# Read metrics from NSQD topic(s) +[[inputs.nsq_consumer]] + ## An array of NSQD HTTP API endpoints + server = "localhost:4150" + topic = "telegraf" + channel = "consumer" + max_in_flight = 100 + + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" +``` + +## Testing +The `nsq_consumer_test` mocks out the interaction with `NSQD`. It requires no outside dependencies. diff --git a/plugins/inputs/nsq_consumer/nsq_consumer.go b/plugins/inputs/nsq_consumer/nsq_consumer.go new file mode 100644 index 000000000..b227b7e50 --- /dev/null +++ b/plugins/inputs/nsq_consumer/nsq_consumer.go @@ -0,0 +1,99 @@ +package nsq_consumer + +import ( + "log" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" + "github.com/influxdata/telegraf/plugins/parsers" + "github.com/nsqio/go-nsq" +) + +//NSQConsumer represents the configuration of the plugin +type NSQConsumer struct { + Server string + Topic string + Channel string + MaxInFlight int + parser parsers.Parser + consumer *nsq.Consumer + acc telegraf.Accumulator +} + +var sampleConfig = ` + ## An string representing the NSQD TCP Endpoint + server = "localhost:4150" + topic = "telegraf" + channel = "consumer" + max_in_flight = 100 + + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" +` + +func init() { + inputs.Add("nsq_consumer", func() telegraf.Input { + return &NSQConsumer{} + }) +} + +// SetParser takes the data_format from the config and finds the right parser for that format +func (n *NSQConsumer) SetParser(parser parsers.Parser) { + n.parser = parser +} + +// SampleConfig returns config values for generating a sample configuration file +func (n *NSQConsumer) SampleConfig() string { + return sampleConfig +} + +// Description prints description string +func (n *NSQConsumer) Description() string { + return "Read NSQ topic for metrics." +} + +// Start pulls data from nsq +func (n *NSQConsumer) Start(acc telegraf.Accumulator) error { + n.acc = acc + n.connect() + n.consumer.AddConcurrentHandlers(nsq.HandlerFunc(func(message *nsq.Message) error { + metrics, err := n.parser.Parse(message.Body) + if err != nil { + log.Printf("NSQConsumer Parse Error\nmessage:%s\nerror:%s", string(message.Body), err.Error()) + return nil + } + for _, metric := range metrics { + n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time()) + } + message.Finish() + return nil + }), n.MaxInFlight) + n.consumer.ConnectToNSQD(n.Server) + return nil +} + +// Stop processing messages +func (n *NSQConsumer) Stop() { + n.consumer.Stop() +} + +// Gather is a noop +func (n *NSQConsumer) Gather(acc telegraf.Accumulator) error { + return nil +} + +func (n *NSQConsumer) connect() error { + if n.consumer == nil { + config := nsq.NewConfig() + config.MaxInFlight = n.MaxInFlight + consumer, err := nsq.NewConsumer(n.Topic, n.Channel, config) + if err != nil { + return err + } + n.consumer = consumer + } + return nil +} diff --git a/plugins/inputs/nsq_consumer/nsq_consumer_test.go b/plugins/inputs/nsq_consumer/nsq_consumer_test.go new file mode 100644 index 000000000..59db675a5 --- /dev/null +++ b/plugins/inputs/nsq_consumer/nsq_consumer_test.go @@ -0,0 +1,245 @@ +package nsq_consumer + +import ( + "bufio" + "bytes" + "encoding/binary" + "io" + "log" + "net" + "strconv" + "testing" + "time" + + "github.com/influxdata/telegraf/plugins/parsers" + "github.com/influxdata/telegraf/testutil" + "github.com/nsqio/go-nsq" + "github.com/stretchr/testify/assert" +) + +// This test is modeled after the kafka consumer integration test +func TestReadsMetricsFromNSQ(t *testing.T) { + msgID := nsq.MessageID{'1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'a', 's', 'd', 'f', 'g', 'h'} + msg := nsq.NewMessage(msgID, []byte("cpu_load_short,direction=in,host=server01,region=us-west value=23422.0 1422568543702900257")) + + script := []instruction{ + // SUB + instruction{0, nsq.FrameTypeResponse, []byte("OK")}, + // IDENTIFY + instruction{0, nsq.FrameTypeResponse, []byte("OK")}, + instruction{20 * time.Millisecond, nsq.FrameTypeMessage, frameMessage(msg)}, + // needed to exit test + instruction{100 * time.Millisecond, -1, []byte("exit")}, + } + + addr, _ := net.ResolveTCPAddr("tcp", "127.0.0.1:4155") + newMockNSQD(script, addr.String()) + + consumer := &NSQConsumer{ + Server: "127.0.0.1:4155", + Topic: "telegraf", + Channel: "consume", + MaxInFlight: 1, + } + + p, _ := parsers.NewInfluxParser() + consumer.SetParser(p) + var acc testutil.Accumulator + assert.Equal(t, 0, len(acc.Metrics), "There should not be any points") + if err := consumer.Start(&acc); err != nil { + t.Fatal(err.Error()) + } else { + defer consumer.Stop() + } + + waitForPoint(&acc, t) + + if len(acc.Metrics) == 1 { + point := acc.Metrics[0] + assert.Equal(t, "cpu_load_short", point.Measurement) + assert.Equal(t, map[string]interface{}{"value": 23422.0}, point.Fields) + assert.Equal(t, map[string]string{ + "host": "server01", + "direction": "in", + "region": "us-west", + }, point.Tags) + assert.Equal(t, time.Unix(0, 1422568543702900257).Unix(), point.Time.Unix()) + } else { + t.Errorf("No points found in accumulator, expected 1") + } + +} + +// Waits for the metric that was sent to the kafka broker to arrive at the kafka +// consumer +func waitForPoint(acc *testutil.Accumulator, t *testing.T) { + // Give the kafka container up to 2 seconds to get the point to the consumer + ticker := time.NewTicker(5 * time.Millisecond) + defer ticker.Stop() + counter := 0 + for { + select { + case <-ticker.C: + counter++ + if counter > 1000 { + t.Fatal("Waited for 5s, point never arrived to consumer") + } else if acc.NFields() == 1 { + return + } + } + } +} + +func newMockNSQD(script []instruction, addr string) *mockNSQD { + n := &mockNSQD{ + script: script, + exitChan: make(chan int), + } + + tcpListener, err := net.Listen("tcp", addr) + if err != nil { + log.Fatalf("FATAL: listen (%s) failed - %s", n.tcpAddr.String(), err) + } + n.tcpListener = tcpListener + n.tcpAddr = tcpListener.Addr().(*net.TCPAddr) + + go n.listen() + + return n +} + +// The code below allows us to mock the interactions with nsqd. This is taken from: +// https://github.com/nsqio/go-nsq/blob/master/mock_test.go +type instruction struct { + delay time.Duration + frameType int32 + body []byte +} + +type mockNSQD struct { + script []instruction + got [][]byte + tcpAddr *net.TCPAddr + tcpListener net.Listener + exitChan chan int +} + +func (n *mockNSQD) listen() { + for { + conn, err := n.tcpListener.Accept() + if err != nil { + break + } + go n.handle(conn) + } + close(n.exitChan) +} + +func (n *mockNSQD) handle(conn net.Conn) { + var idx int + buf := make([]byte, 4) + _, err := io.ReadFull(conn, buf) + if err != nil { + log.Fatalf("ERROR: failed to read protocol version - %s", err) + } + + readChan := make(chan []byte) + readDoneChan := make(chan int) + scriptTime := time.After(n.script[0].delay) + rdr := bufio.NewReader(conn) + + go func() { + for { + line, err := rdr.ReadBytes('\n') + if err != nil { + return + } + // trim the '\n' + line = line[:len(line)-1] + readChan <- line + <-readDoneChan + } + }() + + var rdyCount int + for idx < len(n.script) { + select { + case line := <-readChan: + n.got = append(n.got, line) + params := bytes.Split(line, []byte(" ")) + switch { + case bytes.Equal(params[0], []byte("IDENTIFY")): + l := make([]byte, 4) + _, err := io.ReadFull(rdr, l) + if err != nil { + log.Printf(err.Error()) + goto exit + } + size := int32(binary.BigEndian.Uint32(l)) + b := make([]byte, size) + _, err = io.ReadFull(rdr, b) + if err != nil { + log.Printf(err.Error()) + goto exit + } + case bytes.Equal(params[0], []byte("RDY")): + rdy, _ := strconv.Atoi(string(params[1])) + rdyCount = rdy + case bytes.Equal(params[0], []byte("FIN")): + case bytes.Equal(params[0], []byte("REQ")): + } + readDoneChan <- 1 + case <-scriptTime: + inst := n.script[idx] + if bytes.Equal(inst.body, []byte("exit")) { + goto exit + } + if inst.frameType == nsq.FrameTypeMessage { + if rdyCount == 0 { + scriptTime = time.After(n.script[idx+1].delay) + continue + } + rdyCount-- + } + _, err := conn.Write(framedResponse(inst.frameType, inst.body)) + if err != nil { + log.Printf(err.Error()) + goto exit + } + scriptTime = time.After(n.script[idx+1].delay) + idx++ + } + } + +exit: + n.tcpListener.Close() + conn.Close() +} + +func framedResponse(frameType int32, data []byte) []byte { + var w bytes.Buffer + + beBuf := make([]byte, 4) + size := uint32(len(data)) + 4 + + binary.BigEndian.PutUint32(beBuf, size) + _, err := w.Write(beBuf) + if err != nil { + return nil + } + + binary.BigEndian.PutUint32(beBuf, uint32(frameType)) + _, err = w.Write(beBuf) + if err != nil { + return nil + } + + w.Write(data) + return w.Bytes() +} + +func frameMessage(m *nsq.Message) []byte { + var b bytes.Buffer + m.WriteTo(&b) + return b.Bytes() +} diff --git a/plugins/inputs/nstat/nstat.go b/plugins/inputs/nstat/nstat.go index d32ef004c..5096d7b03 100644 --- a/plugins/inputs/nstat/nstat.go +++ b/plugins/inputs/nstat/nstat.go @@ -43,9 +43,9 @@ var sampleConfig = ` ## file paths for proc files. If empty default paths will be used: ## /proc/net/netstat, /proc/net/snmp, /proc/net/snmp6 ## These can also be overridden with env variables, see README. - proc_net_netstat = "" - proc_net_snmp = "" - proc_net_snmp6 = "" + proc_net_netstat = "/proc/net/netstat" + proc_net_snmp = "/proc/net/snmp" + proc_net_snmp6 = "/proc/net/snmp6" ## dump metrics with 0 values too dump_zeros = true ` @@ -141,7 +141,7 @@ func (ns *Nstat) loadPaths() { ns.ProcNetSNMP = proc(ENV_SNMP, NET_SNMP) } if ns.ProcNetSNMP6 == "" { - ns.ProcNetSNMP = proc(ENV_SNMP6, NET_SNMP6) + ns.ProcNetSNMP6 = proc(ENV_SNMP6, NET_SNMP6) } } diff --git a/plugins/inputs/ntpq/ntpq.go b/plugins/inputs/ntpq/ntpq.go index e9dc1cc14..0bcaa04e5 100644 --- a/plugins/inputs/ntpq/ntpq.go +++ b/plugins/inputs/ntpq/ntpq.go @@ -119,7 +119,7 @@ func (n *NTPQ) Gather(acc telegraf.Accumulator) error { // Get integer metrics from output for key, index := range intI { - if index == -1 { + if index == -1 || index >= len(fields) { continue } if fields[index] == "-" { @@ -169,7 +169,7 @@ func (n *NTPQ) Gather(acc telegraf.Accumulator) error { // get float metrics from output for key, index := range floatI { - if index == -1 { + if index == -1 || index >= len(fields) { continue } if fields[index] == "-" { diff --git a/plugins/inputs/ntpq/ntpq_test.go b/plugins/inputs/ntpq/ntpq_test.go index 7e83243c0..4b6489949 100644 --- a/plugins/inputs/ntpq/ntpq_test.go +++ b/plugins/inputs/ntpq/ntpq_test.go @@ -41,6 +41,35 @@ func TestSingleNTPQ(t *testing.T) { acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) } +func TestMissingJitterField(t *testing.T) { + tt := tester{ + ret: []byte(missingJitterField), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "when": int64(101), + "poll": int64(256), + "reach": int64(37), + "delay": float64(51.016), + "offset": float64(233.010), + } + tags := map[string]string{ + "remote": "uschi5-ntp-002.", + "state_prefix": "*", + "refid": "10.177.80.46", + "stratum": "2", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + func TestBadIntNTPQ(t *testing.T) { tt := tester{ ret: []byte(badIntParseNTPQ), @@ -381,6 +410,11 @@ var singleNTPQ = ` remote refid st t when poll reach delay *uschi5-ntp-002. 10.177.80.46 2 u 101 256 37 51.016 233.010 17.462 ` +var missingJitterField = ` remote refid st t when poll reach delay offset jitter +============================================================================== +*uschi5-ntp-002. 10.177.80.46 2 u 101 256 37 51.016 233.010 +` + var badHeaderNTPQ = `remote refid foobar t when poll reach delay offset jitter ============================================================================== *uschi5-ntp-002. 10.177.80.46 2 u 101 256 37 51.016 233.010 17.462 diff --git a/plugins/inputs/ping/README.md b/plugins/inputs/ping/README.md new file mode 100644 index 000000000..1f087c774 --- /dev/null +++ b/plugins/inputs/ping/README.md @@ -0,0 +1,36 @@ +# Ping input plugin + +This input plugin will measures the round-trip + +## Windows: +### Configration: +``` + ## urls to ping + urls = ["www.google.com"] # required + + ## number of pings to send per collection (ping -n ) + count = 4 # required + + ## Ping timeout, in seconds. 0 means default timeout (ping -w ) + Timeout = 0 +``` +### Measurements & Fields: +- packets_transmitted ( from ping output ) +- reply_received ( increasing only on valid metric from echo replay, eg. 'Destination net unreachable' reply will increment packets_received but not reply_received ) +- packets_received ( from ping output ) +- percent_reply_loss ( compute from packets_transmitted and reply_received ) +- percent_packets_loss ( compute from packets_transmitted and packets_received ) +- errors ( when host can not be found or wrong prameters is passed to application ) +- response time + - average_response_ms ( compute from minimum_response_ms and maximum_response_ms ) + - minimum_response_ms ( from ping output ) + - maximum_response_ms ( from ping output ) + +### Tags: +- server + +### Example Output: +``` +* Plugin: ping, Collection 1 +ping,host=WIN-PBAPLP511R7,url=www.google.com average_response_ms=7i,maximum_response_ms=9i,minimum_response_ms=7i,packets_received=4i,packets_transmitted=4i,percent_packet_loss=0,percent_reply_loss=0,reply_received=4i 1469879119000000000 +``` \ No newline at end of file diff --git a/plugins/inputs/ping/ping.go b/plugins/inputs/ping/ping.go index dfe67dc3f..b6baa7d99 100644 --- a/plugins/inputs/ping/ping.go +++ b/plugins/inputs/ping/ping.go @@ -28,7 +28,7 @@ type Ping struct { // Number of pings to send (ping -c ) Count int - // Ping timeout, in seconds. 0 means no timeout (ping -t ) + // Ping timeout, in seconds. 0 means no timeout (ping -W ) Timeout float64 // Interface to send ping from (ping -I ) @@ -55,7 +55,7 @@ const sampleConfig = ` count = 1 # required ## interval, in s, at which to ping. 0 == default (ping -i ) ping_interval = 0.0 - ## ping timeout, in s. 0 == no timeout (ping -W ) + ## per-ping timeout, in s. 0 == no timeout (ping -W ) timeout = 1.0 ## interface to send ping from (ping -I ) interface = "" @@ -76,7 +76,8 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error { go func(u string) { defer wg.Done() args := p.args(u) - out, err := p.pingHost(p.Timeout, args...) + totalTimeout := float64(p.Count)*p.Timeout + float64(p.Count-1)*p.PingInterval + out, err := p.pingHost(totalTimeout, args...) if err != nil { // Combine go err + stderr output errorChannel <- errors.New( @@ -138,8 +139,8 @@ func (p *Ping) args(url string) []string { } if p.Timeout > 0 { switch runtime.GOOS { - case "darwin", "freebsd": - args = append(args, "-t", strconv.FormatFloat(p.Timeout, 'f', 1, 64)) + case "darwin": + args = append(args, "-W", strconv.FormatFloat(p.Timeout*1000, 'f', 1, 64)) case "linux": args = append(args, "-W", strconv.FormatFloat(p.Timeout, 'f', 1, 64)) default: diff --git a/plugins/inputs/ping/ping_test.go b/plugins/inputs/ping/ping_test.go index 25ecdf2fa..b5d0d16e7 100644 --- a/plugins/inputs/ping/ping_test.go +++ b/plugins/inputs/ping/ping_test.go @@ -95,7 +95,10 @@ func TestArgs(t *testing.T) { p.Timeout = 12.0 actual = p.args("www.google.com") switch runtime.GOOS { - case "darwin", "freebsd": + case "darwin": + expected = []string{"-c", "2", "-n", "-s", "16", "-I", "eth0", "-W", + "12000.0", "www.google.com"} + case "freebsd": expected = []string{"-c", "2", "-n", "-s", "16", "-I", "eth0", "-t", "12.0", "www.google.com"} default: @@ -111,7 +114,10 @@ func TestArgs(t *testing.T) { p.PingInterval = 1.2 actual = p.args("www.google.com") switch runtime.GOOS { - case "darwin", "freebsd": + case "darwin": + expected = []string{"-c", "2", "-n", "-s", "16", "-I", "eth0", "-W", + "12000.0", "-i", "1.2", "www.google.com"} + case "freebsd": expected = []string{"-c", "2", "-n", "-s", "16", "-I", "eth0", "-t", "12.0", "-i", "1.2", "www.google.com"} default: diff --git a/plugins/inputs/ping/ping_windows.go b/plugins/inputs/ping/ping_windows.go index b1d3ef06f..7fb112810 100644 --- a/plugins/inputs/ping/ping_windows.go +++ b/plugins/inputs/ping/ping_windows.go @@ -1,3 +1,223 @@ // +build windows - package ping + +import ( + "errors" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" + "os/exec" + "regexp" + "strconv" + "strings" + "sync" + "time" +) + +// HostPinger is a function that runs the "ping" function using a list of +// passed arguments. This can be easily switched with a mocked ping function +// for unit test purposes (see ping_test.go) +type HostPinger func(timeout float64, args ...string) (string, error) + +type Ping struct { + // Number of pings to send (ping -c ) + Count int + + // Ping timeout, in seconds. 0 means no timeout (ping -W ) + Timeout float64 + + // URLs to ping + Urls []string + + // host ping function + pingHost HostPinger +} + +func (s *Ping) Description() string { + return "Ping given url(s) and return statistics" +} + +const sampleConfig = ` + ## urls to ping + urls = ["www.google.com"] # required + + ## number of pings to send per collection (ping -n ) + count = 4 # required + + ## Ping timeout, in seconds. 0 means default timeout (ping -w ) + Timeout = 0 +` + +func (s *Ping) SampleConfig() string { + return sampleConfig +} + +func hostPinger(timeout float64, args ...string) (string, error) { + bin, err := exec.LookPath("ping") + if err != nil { + return "", err + } + c := exec.Command(bin, args...) + out, err := internal.CombinedOutputTimeout(c, + time.Second*time.Duration(timeout+1)) + return string(out), err +} + +// processPingOutput takes in a string output from the ping command +// based on linux implementation but using regex ( multilanguage support ) ( shouldn't affect the performance of the program ) +// It returns (, , , , , ) +func processPingOutput(out string) (int, int, int, int, int, int, error) { + // So find a line contain 3 numbers except reply lines + var stats, aproxs []string = nil, nil + err := errors.New("Fatal error processing ping output") + stat := regexp.MustCompile(`=\W*(\d+)\D*=\W*(\d+)\D*=\W*(\d+)`) + aprox := regexp.MustCompile(`=\W*(\d+)\D*ms\D*=\W*(\d+)\D*ms\D*=\W*(\d+)\D*ms`) + tttLine := regexp.MustCompile(`TTL=\d+`) + lines := strings.Split(out, "\n") + var receivedReply int = 0 + for _, line := range lines { + if tttLine.MatchString(line) { + receivedReply++ + } else { + if stats == nil { + stats = stat.FindStringSubmatch(line) + } + if stats != nil && aproxs == nil { + aproxs = aprox.FindStringSubmatch(line) + } + } + } + + // stats data should contain 4 members: entireExpression + ( Send, Receive, Lost ) + if len(stats) != 4 { + return 0, 0, 0, 0, 0, 0, err + } + trans, err := strconv.Atoi(stats[1]) + if err != nil { + return 0, 0, 0, 0, 0, 0, err + } + receivedPacket, err := strconv.Atoi(stats[2]) + if err != nil { + return 0, 0, 0, 0, 0, 0, err + } + + // aproxs data should contain 4 members: entireExpression + ( min, max, avg ) + if len(aproxs) != 4 { + return trans, receivedReply, receivedPacket, 0, 0, 0, err + } + min, err := strconv.Atoi(aproxs[1]) + if err != nil { + return trans, receivedReply, receivedPacket, 0, 0, 0, err + } + max, err := strconv.Atoi(aproxs[2]) + if err != nil { + return trans, receivedReply, receivedPacket, 0, 0, 0, err + } + avg, err := strconv.Atoi(aproxs[3]) + if err != nil { + return 0, 0, 0, 0, 0, 0, err + } + + return trans, receivedReply, receivedPacket, avg, min, max, err +} + +func (p *Ping) timeout() float64 { + // According to MSDN, default ping timeout for windows is 4 second + // Add also one second interval + + if p.Timeout > 0 { + return p.Timeout + 1 + } + return 4 + 1 +} + +// args returns the arguments for the 'ping' executable +func (p *Ping) args(url string) []string { + args := []string{"-n", strconv.Itoa(p.Count)} + + if p.Timeout > 0 { + args = append(args, "-w", strconv.FormatFloat(p.Timeout*1000, 'f', 0, 64)) + } + + args = append(args, url) + + return args +} + +func (p *Ping) Gather(acc telegraf.Accumulator) error { + var wg sync.WaitGroup + errorChannel := make(chan error, len(p.Urls)*2) + var pendingError error = nil + // Spin off a go routine for each url to ping + for _, url := range p.Urls { + wg.Add(1) + go func(u string) { + defer wg.Done() + args := p.args(u) + totalTimeout := p.timeout() * float64(p.Count) + out, err := p.pingHost(totalTimeout, args...) + // ping host return exitcode != 0 also when there was no response from host + // but command was execute succesfully + if err != nil { + // Combine go err + stderr output + pendingError = errors.New(strings.TrimSpace(out) + ", " + err.Error()) + } + tags := map[string]string{"url": u} + trans, recReply, receivePacket, avg, min, max, err := processPingOutput(out) + if err != nil { + // fatal error + if pendingError != nil { + errorChannel <- pendingError + } + errorChannel <- err + fields := map[string]interface{}{ + "errors": 100.0, + } + + acc.AddFields("ping", fields, tags) + + return + } + // Calculate packet loss percentage + lossReply := float64(trans-recReply) / float64(trans) * 100.0 + lossPackets := float64(trans-receivePacket) / float64(trans) * 100.0 + fields := map[string]interface{}{ + "packets_transmitted": trans, + "reply_received": recReply, + "packets_received": receivePacket, + "percent_packet_loss": lossPackets, + "percent_reply_loss": lossReply, + } + if avg > 0 { + fields["average_response_ms"] = avg + } + if min > 0 { + fields["minimum_response_ms"] = min + } + if max > 0 { + fields["maximum_response_ms"] = max + } + acc.AddFields("ping", fields, tags) + }(url) + } + + wg.Wait() + close(errorChannel) + + // Get all errors and return them as one giant error + errorStrings := []string{} + for err := range errorChannel { + errorStrings = append(errorStrings, err.Error()) + } + + if len(errorStrings) == 0 { + return nil + } + return errors.New(strings.Join(errorStrings, "\n")) +} + +func init() { + inputs.Add("ping", func() telegraf.Input { + return &Ping{pingHost: hostPinger} + }) +} diff --git a/plugins/inputs/ping/ping_windows_test.go b/plugins/inputs/ping/ping_windows_test.go new file mode 100644 index 000000000..34428b814 --- /dev/null +++ b/plugins/inputs/ping/ping_windows_test.go @@ -0,0 +1,328 @@ +// +build windows +package ping + +import ( + "errors" + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" + "testing" +) + +// Windows ping format ( should support multilanguage ?) +var winPLPingOutput = ` +Badanie 8.8.8.8 z 32 bajtami danych: +Odpowiedz z 8.8.8.8: bajtow=32 czas=49ms TTL=43 +Odpowiedz z 8.8.8.8: bajtow=32 czas=46ms TTL=43 +Odpowiedz z 8.8.8.8: bajtow=32 czas=48ms TTL=43 +Odpowiedz z 8.8.8.8: bajtow=32 czas=57ms TTL=43 + +Statystyka badania ping dla 8.8.8.8: + Pakiety: Wyslane = 4, Odebrane = 4, Utracone = 0 + (0% straty), +Szacunkowy czas bladzenia pakietww w millisekundach: + Minimum = 46 ms, Maksimum = 57 ms, Czas sredni = 50 ms +` + +// Windows ping format ( should support multilanguage ?) +var winENPingOutput = ` +Pinging 8.8.8.8 with 32 bytes of data: +Reply from 8.8.8.8: bytes=32 time=52ms TTL=43 +Reply from 8.8.8.8: bytes=32 time=50ms TTL=43 +Reply from 8.8.8.8: bytes=32 time=50ms TTL=43 +Reply from 8.8.8.8: bytes=32 time=51ms TTL=43 + +Ping statistics for 8.8.8.8: + Packets: Sent = 4, Received = 4, Lost = 0 (0% loss), +Approximate round trip times in milli-seconds: + Minimum = 50ms, Maximum = 52ms, Average = 50ms +` + +func TestHost(t *testing.T) { + trans, recReply, recPacket, avg, min, max, err := processPingOutput(winPLPingOutput) + assert.NoError(t, err) + assert.Equal(t, 4, trans, "4 packets were transmitted") + assert.Equal(t, 4, recReply, "4 packets were reply") + assert.Equal(t, 4, recPacket, "4 packets were received") + assert.Equal(t, 50, avg, "Average 50") + assert.Equal(t, 46, min, "Min 46") + assert.Equal(t, 57, max, "max 57") + + trans, recReply, recPacket, avg, min, max, err = processPingOutput(winENPingOutput) + assert.NoError(t, err) + assert.Equal(t, 4, trans, "4 packets were transmitted") + assert.Equal(t, 4, recReply, "4 packets were reply") + assert.Equal(t, 4, recPacket, "4 packets were received") + assert.Equal(t, 50, avg, "Average 50") + assert.Equal(t, 50, min, "Min 50") + assert.Equal(t, 52, max, "Max 52") +} + +func mockHostPinger(timeout float64, args ...string) (string, error) { + return winENPingOutput, nil +} + +// Test that Gather function works on a normal ping +func TestPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com", "www.reddit.com"}, + pingHost: mockHostPinger, + } + + p.Gather(&acc) + tags := map[string]string{"url": "www.google.com"} + fields := map[string]interface{}{ + "packets_transmitted": 4, + "packets_received": 4, + "reply_received": 4, + "percent_packet_loss": 0.0, + "percent_reply_loss": 0.0, + "average_response_ms": 50, + "minimum_response_ms": 50, + "maximum_response_ms": 52, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) + + tags = map[string]string{"url": "www.reddit.com"} + acc.AssertContainsTaggedFields(t, "ping", fields, tags) +} + +var errorPingOutput = ` +Badanie nask.pl [195.187.242.157] z 32 bajtami danych: +Upłynął limit czasu żądania. +Upłynął limit czasu żądania. +Upłynął limit czasu żądania. +Upłynął limit czasu żądania. + +Statystyka badania ping dla 195.187.242.157: + Pakiety: Wysłane = 4, Odebrane = 0, Utracone = 4 + (100% straty), +` + +func mockErrorHostPinger(timeout float64, args ...string) (string, error) { + return errorPingOutput, errors.New("No packets received") +} + +// Test that Gather works on a ping with no transmitted packets, even though the +// command returns an error +func TestBadPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.amazon.com"}, + pingHost: mockErrorHostPinger, + } + + p.Gather(&acc) + tags := map[string]string{"url": "www.amazon.com"} + fields := map[string]interface{}{ + "packets_transmitted": 4, + "packets_received": 0, + "reply_received": 0, + "percent_packet_loss": 100.0, + "percent_reply_loss": 100.0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) +} + +var lossyPingOutput = ` +Badanie thecodinglove.com [66.6.44.4] z 9800 bajtami danych: +Upłynął limit czasu żądania. +Odpowiedź z 66.6.44.4: bajtów=9800 czas=114ms TTL=48 +Odpowiedź z 66.6.44.4: bajtów=9800 czas=114ms TTL=48 +Odpowiedź z 66.6.44.4: bajtów=9800 czas=118ms TTL=48 +Odpowiedź z 66.6.44.4: bajtów=9800 czas=114ms TTL=48 +Odpowiedź z 66.6.44.4: bajtów=9800 czas=114ms TTL=48 +Upłynął limit czasu żądania. +Odpowiedź z 66.6.44.4: bajtów=9800 czas=119ms TTL=48 +Odpowiedź z 66.6.44.4: bajtów=9800 czas=116ms TTL=48 + +Statystyka badania ping dla 66.6.44.4: + Pakiety: Wysłane = 9, Odebrane = 7, Utracone = 2 + (22% straty), +Szacunkowy czas błądzenia pakietów w millisekundach: + Minimum = 114 ms, Maksimum = 119 ms, Czas średni = 115 ms +` + +func mockLossyHostPinger(timeout float64, args ...string) (string, error) { + return lossyPingOutput, nil +} + +// Test that Gather works on a ping with lossy packets +func TestLossyPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com"}, + pingHost: mockLossyHostPinger, + } + + p.Gather(&acc) + tags := map[string]string{"url": "www.google.com"} + fields := map[string]interface{}{ + "packets_transmitted": 9, + "packets_received": 7, + "reply_received": 7, + "percent_packet_loss": 22.22222222222222, + "percent_reply_loss": 22.22222222222222, + "average_response_ms": 115, + "minimum_response_ms": 114, + "maximum_response_ms": 119, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) +} + +// Fatal ping output (invalid argument) +var fatalPingOutput = ` +Bad option -d. + + +Usage: ping [-t] [-a] [-n count] [-l size] [-f] [-i TTL] [-v TOS] + [-r count] [-s count] [[-j host-list] | [-k host-list]] + [-w timeout] [-R] [-S srcaddr] [-4] [-6] target_name + +Options: + -t Ping the specified host until stopped. + To see statistics and continue - type Control-Break; + To stop - type Control-C. + -a Resolve addresses to hostnames. + -n count Number of echo requests to send. + -l size Send buffer size. + -f Set Don't Fragment flag in packet (IPv4-only). + -i TTL Time To Live. + -v TOS Type Of Service (IPv4-only. This setting has been deprecated + and has no effect on the type of service field in the IP Header). + -r count Record route for count hops (IPv4-only). + -s count Timestamp for count hops (IPv4-only). + -j host-list Loose source route along host-list (IPv4-only). + -k host-list Strict source route along host-list (IPv4-only). + -w timeout Timeout in milliseconds to wait for each reply. + -R Use routing header to test reverse route also (IPv6-only). + -S srcaddr Source address to use. + -4 Force using IPv4. + -6 Force using IPv6. + +` + +func mockFatalHostPinger(timeout float64, args ...string) (string, error) { + return fatalPingOutput, errors.New("So very bad") +} + +// Test that a fatal ping command does not gather any statistics. +func TestFatalPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.amazon.com"}, + pingHost: mockFatalHostPinger, + } + + p.Gather(&acc) + assert.True(t, acc.HasFloatField("ping", "errors"), + "Fatal ping should have packet measurements") + assert.False(t, acc.HasIntField("ping", "packets_transmitted"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasIntField("ping", "packets_received"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasFloatField("ping", "percent_packet_loss"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasFloatField("ping", "percent_reply_loss"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasIntField("ping", "average_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasIntField("ping", "maximum_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasIntField("ping", "minimum_response_ms"), + "Fatal ping should not have packet measurements") +} + +var UnreachablePingOutput = ` +Pinging www.google.pl [8.8.8.8] with 32 bytes of data: +Request timed out. +Request timed out. +Reply from 194.204.175.50: Destination net unreachable. +Request timed out. + +Ping statistics for 8.8.8.8: + Packets: Sent = 4, Received = 1, Lost = 3 (75% loss), +` + +func mockUnreachableHostPinger(timeout float64, args ...string) (string, error) { + return UnreachablePingOutput, errors.New("So very bad") +} + +//Reply from 185.28.251.217: TTL expired in transit. + +// in case 'Destination net unreachable' ping app return receive packet which is not what we need +// it's not contain valid metric so treat it as lost one +func TestUnreachablePingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com"}, + pingHost: mockUnreachableHostPinger, + } + + p.Gather(&acc) + + tags := map[string]string{"url": "www.google.com"} + fields := map[string]interface{}{ + "packets_transmitted": 4, + "packets_received": 1, + "reply_received": 0, + "percent_packet_loss": 75.0, + "percent_reply_loss": 100.0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) + + assert.False(t, acc.HasFloatField("ping", "errors"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasIntField("ping", "average_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasIntField("ping", "maximum_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasIntField("ping", "minimum_response_ms"), + "Fatal ping should not have packet measurements") +} + +var TTLExpiredPingOutput = ` +Pinging www.google.pl [8.8.8.8] with 32 bytes of data: +Request timed out. +Request timed out. +Reply from 185.28.251.217: TTL expired in transit. +Request timed out. + +Ping statistics for 8.8.8.8: + Packets: Sent = 4, Received = 1, Lost = 3 (75% loss), +` + +func mockTTLExpiredPinger(timeout float64, args ...string) (string, error) { + return TTLExpiredPingOutput, errors.New("So very bad") +} + +// in case 'Destination net unreachable' ping app return receive packet which is not what we need +// it's not contain valid metric so treat it as lost one +func TestTTLExpiredPingGather(t *testing.T) { + var acc testutil.Accumulator + p := Ping{ + Urls: []string{"www.google.com"}, + pingHost: mockTTLExpiredPinger, + } + + p.Gather(&acc) + + tags := map[string]string{"url": "www.google.com"} + fields := map[string]interface{}{ + "packets_transmitted": 4, + "packets_received": 1, + "reply_received": 0, + "percent_packet_loss": 75.0, + "percent_reply_loss": 100.0, + } + acc.AssertContainsTaggedFields(t, "ping", fields, tags) + + assert.False(t, acc.HasFloatField("ping", "errors"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasIntField("ping", "average_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasIntField("ping", "maximum_response_ms"), + "Fatal ping should not have packet measurements") + assert.False(t, acc.HasIntField("ping", "minimum_response_ms"), + "Fatal ping should not have packet measurements") +} diff --git a/plugins/inputs/postgresql_extensible/postgresql_extensible.go b/plugins/inputs/postgresql_extensible/postgresql_extensible.go index 75bc6b936..ec281fca2 100644 --- a/plugins/inputs/postgresql_extensible/postgresql_extensible.go +++ b/plugins/inputs/postgresql_extensible/postgresql_extensible.go @@ -266,29 +266,33 @@ func (p *Postgresql) accRow(meas_name string, row scanner, acc telegraf.Accumula tags := map[string]string{} tags["server"] = tagAddress tags["db"] = dbname.String() - var isATag int fields := make(map[string]interface{}) +COLUMN: for col, val := range columnMap { if acc.Debug() { log.Printf("postgresql_extensible: column: %s = %T: %s\n", col, *val, *val) } _, ignore := ignoredColumns[col] - if !ignore && *val != nil { - isATag = 0 - for tag := range p.AdditionalTags { - if col == p.AdditionalTags[tag] { - isATag = 1 - value_type_p := fmt.Sprintf(`%T`, *val) - if value_type_p == "[]uint8" { - tags[col] = fmt.Sprintf(`%s`, *val) - } else if value_type_p == "int64" { - tags[col] = fmt.Sprintf(`%v`, *val) - } - } + if ignore || *val == nil { + continue + } + for _, tag := range p.AdditionalTags { + if col != tag { + continue } - if isATag == 0 { - fields[col] = *val + switch v := (*val).(type) { + case []byte: + tags[col] = string(v) + case int64: + tags[col] = fmt.Sprintf("%d", v) } + continue COLUMN + } + + if v, ok := (*val).([]byte); ok { + fields[col] = string(v) + } else { + fields[col] = *val } } acc.AddFields(meas_name, fields, tags) diff --git a/plugins/inputs/procstat/procstat.go b/plugins/inputs/procstat/procstat.go index 3b9f0f76c..358dc4c0f 100644 --- a/plugins/inputs/procstat/procstat.go +++ b/plugins/inputs/procstat/procstat.go @@ -70,7 +70,7 @@ func (p *Procstat) Gather(acc telegraf.Accumulator) error { p.Exe, p.PidFile, p.Pattern, p.User, err.Error()) } else { for pid, proc := range p.pidmap { - p := NewSpecProcessor(p.ProcessName, p.Prefix, acc, proc, p.tagmap[pid]) + p := NewSpecProcessor(p.ProcessName, p.Prefix, pid, acc, proc, p.tagmap[pid]) p.pushMetrics() } } @@ -140,7 +140,6 @@ func (p *Procstat) pidsFromFile() ([]int32, error) { out = append(out, int32(pid)) p.tagmap[int32(pid)] = map[string]string{ "pidfile": p.PidFile, - "pid": strings.TrimSpace(string(pidString)), } } } @@ -165,7 +164,6 @@ func (p *Procstat) pidsFromExe() ([]int32, error) { out = append(out, int32(ipid)) p.tagmap[int32(ipid)] = map[string]string{ "exe": p.Exe, - "pid": pid, } } else { outerr = err @@ -193,7 +191,6 @@ func (p *Procstat) pidsFromPattern() ([]int32, error) { out = append(out, int32(ipid)) p.tagmap[int32(ipid)] = map[string]string{ "pattern": p.Pattern, - "pid": pid, } } else { outerr = err @@ -221,7 +218,6 @@ func (p *Procstat) pidsFromUser() ([]int32, error) { out = append(out, int32(ipid)) p.tagmap[int32(ipid)] = map[string]string{ "user": p.User, - "pid": pid, } } else { outerr = err diff --git a/plugins/inputs/procstat/spec_processor.go b/plugins/inputs/procstat/spec_processor.go index 0e73b60e9..5143d8bcc 100644 --- a/plugins/inputs/procstat/spec_processor.go +++ b/plugins/inputs/procstat/spec_processor.go @@ -10,6 +10,7 @@ import ( type SpecProcessor struct { Prefix string + pid int32 tags map[string]string fields map[string]interface{} acc telegraf.Accumulator @@ -19,6 +20,7 @@ type SpecProcessor struct { func NewSpecProcessor( processName string, prefix string, + pid int32, acc telegraf.Accumulator, p *process.Process, tags map[string]string, @@ -33,6 +35,7 @@ func NewSpecProcessor( } return &SpecProcessor{ Prefix: prefix, + pid: pid, tags: tags, fields: make(map[string]interface{}), acc: acc, @@ -45,7 +48,7 @@ func (p *SpecProcessor) pushMetrics() { if p.Prefix != "" { prefix = p.Prefix + "_" } - fields := map[string]interface{}{} + fields := map[string]interface{}{"pid": p.pid} numThreads, err := p.proc.NumThreads() if err == nil { @@ -68,7 +71,7 @@ func (p *SpecProcessor) pushMetrics() { fields[prefix+"read_count"] = io.ReadCount fields[prefix+"write_count"] = io.WriteCount fields[prefix+"read_bytes"] = io.ReadBytes - fields[prefix+"write_bytes"] = io.WriteCount + fields[prefix+"write_bytes"] = io.WriteBytes } cpu_time, err := p.proc.Times() diff --git a/plugins/inputs/prometheus/README.md b/plugins/inputs/prometheus/README.md index 3aa8c8afd..8298b9d27 100644 --- a/plugins/inputs/prometheus/README.md +++ b/plugins/inputs/prometheus/README.md @@ -30,6 +30,26 @@ to filter and some tags kubeservice = "kube-apiserver" ``` +```toml +# Authorize with a bearer token skipping cert verification +[[inputs.prometheus]] + # An array of urls to scrape metrics from. + urls = ["http://my-kube-apiserver:8080/metrics"] + bearer_token = '/path/to/bearer/token' + insecure_skip_verify = true +``` + +```toml +# Authorize using x509 certs +[[inputs.prometheus]] + # An array of urls to scrape metrics from. + urls = ["https://my-kube-apiserver:8080/metrics"] + + ssl_ca = '/path/to/cafile' + ssl_cert = '/path/to/certfile' + ssl_key = '/path/to/keyfile' +``` + ### Measurements & Fields & Tags: Measurements and fields could be any thing. diff --git a/plugins/inputs/prometheus/parser.go b/plugins/inputs/prometheus/parser.go index c6ad211f8..3c9ddc503 100644 --- a/plugins/inputs/prometheus/parser.go +++ b/plugins/inputs/prometheus/parser.go @@ -10,6 +10,8 @@ import ( "io" "math" "mime" + "net/http" + "time" "github.com/influxdata/telegraf" @@ -18,17 +20,9 @@ import ( "github.com/prometheus/common/expfmt" ) -// PrometheusParser is an object for Parsing incoming metrics. -type PrometheusParser struct { - // PromFormat - PromFormat map[string]string - // DefaultTags will be added to every parsed metric - // DefaultTags map[string]string -} - // Parse returns a slice of Metrics from a text representation of a // metrics -func (p *PrometheusParser) Parse(buf []byte) ([]telegraf.Metric, error) { +func Parse(buf []byte, header http.Header) ([]telegraf.Metric, error) { var metrics []telegraf.Metric var parser expfmt.TextParser // parse even if the buffer begins with a newline @@ -37,91 +31,71 @@ func (p *PrometheusParser) Parse(buf []byte) ([]telegraf.Metric, error) { buffer := bytes.NewBuffer(buf) reader := bufio.NewReader(buffer) - // Get format - mediatype, params, err := mime.ParseMediaType(p.PromFormat["Content-Type"]) + mediatype, params, err := mime.ParseMediaType(header.Get("Content-Type")) // Prepare output metricFamilies := make(map[string]*dto.MetricFamily) + if err == nil && mediatype == "application/vnd.google.protobuf" && params["encoding"] == "delimited" && params["proto"] == "io.prometheus.client.MetricFamily" { for { - metricFamily := &dto.MetricFamily{} - if _, err = pbutil.ReadDelimited(reader, metricFamily); err != nil { - if err == io.EOF { + mf := &dto.MetricFamily{} + if _, ierr := pbutil.ReadDelimited(reader, mf); ierr != nil { + if ierr == io.EOF { break } - return nil, fmt.Errorf("reading metric family protocol buffer failed: %s", err) + return nil, fmt.Errorf("reading metric family protocol buffer failed: %s", ierr) } - metricFamilies[metricFamily.GetName()] = metricFamily + metricFamilies[mf.GetName()] = mf } } else { metricFamilies, err = parser.TextToMetricFamilies(reader) if err != nil { return nil, fmt.Errorf("reading text format failed: %s", err) } - // read metrics - for metricName, mf := range metricFamilies { - for _, m := range mf.Metric { - // reading tags - tags := makeLabels(m) - /* - for key, value := range p.DefaultTags { - tags[key] = value - } - */ - // reading fields - fields := make(map[string]interface{}) - if mf.GetType() == dto.MetricType_SUMMARY { - // summary metric - fields = makeQuantiles(m) - fields["count"] = float64(m.GetHistogram().GetSampleCount()) - fields["sum"] = float64(m.GetSummary().GetSampleSum()) - } else if mf.GetType() == dto.MetricType_HISTOGRAM { - // historgram metric - fields = makeBuckets(m) - fields["count"] = float64(m.GetHistogram().GetSampleCount()) - fields["sum"] = float64(m.GetSummary().GetSampleSum()) + } + // read metrics + for metricName, mf := range metricFamilies { + for _, m := range mf.Metric { + // reading tags + tags := makeLabels(m) + // reading fields + fields := make(map[string]interface{}) + if mf.GetType() == dto.MetricType_SUMMARY { + // summary metric + fields = makeQuantiles(m) + fields["count"] = float64(m.GetSummary().GetSampleCount()) + fields["sum"] = float64(m.GetSummary().GetSampleSum()) + } else if mf.GetType() == dto.MetricType_HISTOGRAM { + // historgram metric + fields = makeBuckets(m) + fields["count"] = float64(m.GetHistogram().GetSampleCount()) + fields["sum"] = float64(m.GetHistogram().GetSampleSum()) + + } else { + // standard metric + fields = getNameAndValue(m) + } + // converting to telegraf metric + if len(fields) > 0 { + var t time.Time + if m.TimestampMs != nil && *m.TimestampMs > 0 { + t = time.Unix(0, *m.TimestampMs*1000000) } else { - // standard metric - fields = getNameAndValue(m) + t = time.Now() } - // converting to telegraf metric - if len(fields) > 0 { - metric, err := telegraf.NewMetric(metricName, tags, fields) - if err == nil { - metrics = append(metrics, metric) - } + metric, err := telegraf.NewMetric(metricName, tags, fields, t) + if err == nil { + metrics = append(metrics, metric) } } } } + return metrics, err } -// Parse one line -func (p *PrometheusParser) ParseLine(line string) (telegraf.Metric, error) { - metrics, err := p.Parse([]byte(line + "\n")) - - if err != nil { - return nil, err - } - - if len(metrics) < 1 { - return nil, fmt.Errorf( - "Can not parse the line: %s, for data format: prometheus", line) - } - - return metrics[0], nil -} - -/* -// Set default tags -func (p *PrometheusParser) SetDefaultTags(tags map[string]string) { - p.DefaultTags = tags -} -*/ - // Get Quantiles from summary metric func makeQuantiles(m *dto.Metric) map[string]interface{} { fields := make(map[string]interface{}) diff --git a/plugins/inputs/prometheus/parser_test.go b/plugins/inputs/prometheus/parser_test.go index 5c33260be..4f2a8516f 100644 --- a/plugins/inputs/prometheus/parser_test.go +++ b/plugins/inputs/prometheus/parser_test.go @@ -1,6 +1,7 @@ package prometheus import ( + "net/http" "testing" "time" @@ -101,10 +102,8 @@ cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 ` func TestParseValidPrometheus(t *testing.T) { - parser := PrometheusParser{} - // Gauge value - metrics, err := parser.Parse([]byte(validUniqueGauge)) + metrics, err := Parse([]byte(validUniqueGauge), http.Header{}) assert.NoError(t, err) assert.Len(t, metrics, 1) assert.Equal(t, "cadvisor_version_info", metrics[0].Name()) @@ -118,8 +117,7 @@ func TestParseValidPrometheus(t *testing.T) { }, metrics[0].Tags()) // Counter value - //parser.SetDefaultTags(map[string]string{"mytag": "mytagvalue"}) - metrics, err = parser.Parse([]byte(validUniqueCounter)) + metrics, err = Parse([]byte(validUniqueCounter), http.Header{}) assert.NoError(t, err) assert.Len(t, metrics, 1) assert.Equal(t, "get_token_fail_count", metrics[0].Name()) @@ -129,8 +127,8 @@ func TestParseValidPrometheus(t *testing.T) { assert.Equal(t, map[string]string{}, metrics[0].Tags()) // Summary data - //parser.SetDefaultTags(map[string]string{}) - metrics, err = parser.Parse([]byte(validUniqueSummary)) + //SetDefaultTags(map[string]string{}) + metrics, err = Parse([]byte(validUniqueSummary), http.Header{}) assert.NoError(t, err) assert.Len(t, metrics, 1) assert.Equal(t, "http_request_duration_microseconds", metrics[0].Name()) @@ -138,20 +136,20 @@ func TestParseValidPrometheus(t *testing.T) { "0.5": 552048.506, "0.9": 5.876804288e+06, "0.99": 5.876804288e+06, - "count": 0.0, + "count": 9.0, "sum": 1.8909097205e+07, }, metrics[0].Fields()) assert.Equal(t, map[string]string{"handler": "prometheus"}, metrics[0].Tags()) // histogram data - metrics, err = parser.Parse([]byte(validUniqueHistogram)) + metrics, err = Parse([]byte(validUniqueHistogram), http.Header{}) assert.NoError(t, err) assert.Len(t, metrics, 1) assert.Equal(t, "apiserver_request_latencies", metrics[0].Name()) assert.Equal(t, map[string]interface{}{ "500000": 2000.0, "count": 2025.0, - "sum": 0.0, + "sum": 1.02726334e+08, "250000": 1997.0, "2e+06": 2012.0, "4e+06": 2017.0, @@ -165,11 +163,3 @@ func TestParseValidPrometheus(t *testing.T) { metrics[0].Tags()) } - -func TestParseLineInvalidPrometheus(t *testing.T) { - parser := PrometheusParser{} - metric, err := parser.ParseLine(validUniqueLine) - assert.NotNil(t, err) - assert.Nil(t, metric) - -} diff --git a/plugins/inputs/prometheus/prometheus.go b/plugins/inputs/prometheus/prometheus.go index 1c60a363e..12f7fd38e 100644 --- a/plugins/inputs/prometheus/prometheus.go +++ b/plugins/inputs/prometheus/prometheus.go @@ -1,10 +1,10 @@ package prometheus import ( - "crypto/tls" "errors" "fmt" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" "io/ioutil" "net" @@ -13,23 +13,37 @@ import ( "time" ) +const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3` + type Prometheus struct { Urls []string - // Use SSL but skip chain & host verification - InsecureSkipVerify bool // Bearer Token authorization file path BearerToken string `toml:"bearer_token"` + + // Path to CA file + SSLCA string `toml:"ssl_ca"` + // Path to host cert file + SSLCert string `toml:"ssl_cert"` + // Path to cert key file + SSLKey string `toml:"ssl_key"` + // Use SSL but skip chain & host verification + InsecureSkipVerify bool } var sampleConfig = ` ## An array of urls to scrape metrics from. urls = ["http://localhost:9100/metrics"] - ## Use SSL but skip chain & host verification - # insecure_skip_verify = false ## Use bearer token for authorization # bearer_token = /path/to/bearer/token + + ## Optional SSL Config + # ssl_ca = /path/to/cafile + # ssl_cert = /path/to/certfile + # ssl_key = /path/to/keyfile + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false ` func (p *Prometheus) SampleConfig() string { @@ -74,20 +88,25 @@ var client = &http.Client{ func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error { collectDate := time.Now() var req, err = http.NewRequest("GET", url, nil) - req.Header = make(http.Header) + req.Header.Add("Accept", acceptHeader) var token []byte var resp *http.Response + tlsCfg, err := internal.GetTLSConfig( + p.SSLCert, p.SSLKey, p.SSLCA, p.InsecureSkipVerify) + if err != nil { + return err + } + var rt http.RoundTripper = &http.Transport{ Dial: (&net.Dialer{ Timeout: 5 * time.Second, KeepAlive: 30 * time.Second, }).Dial, - TLSHandshakeTimeout: 5 * time.Second, - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: p.InsecureSkipVerify, - }, + TLSHandshakeTimeout: 5 * time.Second, + TLSClientConfig: tlsCfg, ResponseHeaderTimeout: time.Duration(3 * time.Second), + DisableKeepAlives: true, } if p.BearerToken != "" { @@ -112,20 +131,9 @@ func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error { return fmt.Errorf("error reading body: %s", err) } - // Headers - headers := make(map[string]string) - for key, value := range headers { - headers[key] = value - } - - // Prepare Prometheus parser config - promparser := PrometheusParser{ - PromFormat: headers, - } - - metrics, err := promparser.Parse(body) + metrics, err := Parse(body, resp.Header) if err != nil { - return fmt.Errorf("error getting processing samples for %s: %s", + return fmt.Errorf("error reading metrics for %s: %s", url, err) } // Add (or not) collected metrics diff --git a/plugins/inputs/rabbitmq/rabbitmq.go b/plugins/inputs/rabbitmq/rabbitmq.go index 18d666a08..8a879d179 100644 --- a/plugins/inputs/rabbitmq/rabbitmq.go +++ b/plugins/inputs/rabbitmq/rabbitmq.go @@ -9,35 +9,59 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) +// DefaultUsername will set a default value that corrasponds to the default +// value used by Rabbitmq const DefaultUsername = "guest" + +// DefaultPassword will set a default value that corrasponds to the default +// value used by Rabbitmq const DefaultPassword = "guest" + +// DefaultURL will set a default value that corrasponds to the default value +// used by Rabbitmq const DefaultURL = "http://localhost:15672" +// RabbitMQ defines the configuration necessary for gathering metrics, +// see the sample config for further details type RabbitMQ struct { URL string Name string Username string Password string - Nodes []string - Queues []string + // Path to CA file + SSLCA string `toml:"ssl_ca"` + // Path to host cert file + SSLCert string `toml:"ssl_cert"` + // Path to cert key file + SSLKey string `toml:"ssl_key"` + // Use SSL but skip chain & host verification + InsecureSkipVerify bool + + // InsecureSkipVerify bool + Nodes []string + Queues []string Client *http.Client } +// OverviewResponse ... type OverviewResponse struct { MessageStats *MessageStats `json:"message_stats"` ObjectTotals *ObjectTotals `json:"object_totals"` QueueTotals *QueueTotals `json:"queue_totals"` } +// Details ... type Details struct { Rate float64 } +// MessageStats ... type MessageStats struct { Ack int64 AckDetails Details `json:"ack_details"` @@ -51,6 +75,7 @@ type MessageStats struct { RedeliverDetails Details `json:"redeliver_details"` } +// ObjectTotals ... type ObjectTotals struct { Channels int64 Connections int64 @@ -59,6 +84,7 @@ type ObjectTotals struct { Queues int64 } +// QueueTotals ... type QueueTotals struct { Messages int64 MessagesReady int64 `json:"messages_ready"` @@ -66,10 +92,11 @@ type QueueTotals struct { MessageBytes int64 `json:"message_bytes"` MessageBytesReady int64 `json:"message_bytes_ready"` MessageBytesUnacknowledged int64 `json:"message_bytes_unacknowledged"` - MessageRam int64 `json:"message_bytes_ram"` + MessageRAM int64 `json:"message_bytes_ram"` MessagePersistent int64 `json:"message_bytes_persistent"` } +// Queue ... type Queue struct { QueueTotals // just to not repeat the same code MessageStats `json:"message_stats"` @@ -83,6 +110,7 @@ type Queue struct { AutoDelete bool `json:"auto_delete"` } +// Node ... type Node struct { Name string @@ -99,6 +127,7 @@ type Node struct { SocketsUsed int64 `json:"sockets_used"` } +// gatherFunc ... type gatherFunc func(r *RabbitMQ, acc telegraf.Accumulator, errChan chan error) var gatherFunctions = []gatherFunc{gatherOverview, gatherNodes, gatherQueues} @@ -109,22 +138,40 @@ var sampleConfig = ` # username = "guest" # password = "guest" + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false + ## A list of nodes to pull metrics about. If not specified, metrics for ## all nodes are gathered. # nodes = ["rabbit@node1", "rabbit@node2"] ` +// SampleConfig ... func (r *RabbitMQ) SampleConfig() string { return sampleConfig } +// Description ... func (r *RabbitMQ) Description() string { return "Read metrics from one or many RabbitMQ servers via the management API" } +// Gather ... func (r *RabbitMQ) Gather(acc telegraf.Accumulator) error { if r.Client == nil { - tr := &http.Transport{ResponseHeaderTimeout: time.Duration(3 * time.Second)} + tlsCfg, err := internal.GetTLSConfig( + r.SSLCert, r.SSLKey, r.SSLCA, r.InsecureSkipVerify) + if err != nil { + return err + } + tr := &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), + TLSClientConfig: tlsCfg, + } r.Client = &http.Client{ Transport: tr, Timeout: time.Duration(4 * time.Second), @@ -286,7 +333,7 @@ func gatherQueues(r *RabbitMQ, acc telegraf.Accumulator, errChan chan error) { "message_bytes": queue.MessageBytes, "message_bytes_ready": queue.MessageBytesReady, "message_bytes_unacked": queue.MessageBytesUnacknowledged, - "message_bytes_ram": queue.MessageRam, + "message_bytes_ram": queue.MessageRAM, "message_bytes_persist": queue.MessagePersistent, "messages": queue.Messages, "messages_ready": queue.MessagesReady, diff --git a/plugins/inputs/redis/README.md b/plugins/inputs/redis/README.md index 1cbaea0ca..51b596aa0 100644 --- a/plugins/inputs/redis/README.md +++ b/plugins/inputs/redis/README.md @@ -43,6 +43,7 @@ - latest_fork_usec - connected_slaves - master_repl_offset + - master_last_io_seconds_ago - repl_backlog_active - repl_backlog_size - repl_backlog_histlen @@ -57,6 +58,7 @@ - All measurements have the following tags: - port - server + - replication role ### Example Output: diff --git a/plugins/inputs/redis/redis.go b/plugins/inputs/redis/redis.go index 94f562471..b08eedee3 100644 --- a/plugins/inputs/redis/redis.go +++ b/plugins/inputs/redis/redis.go @@ -12,6 +12,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -25,6 +26,7 @@ var sampleConfig = ` ## e.g. ## tcp://localhost:6379 ## tcp://:password@192.168.99.100 + ## unix:///var/run/redis.sock ## ## If no servers are specified, then localhost is used as the host. ## If no port is specified, 6379 is used @@ -66,6 +68,7 @@ var Tracking = map[string]string{ "latest_fork_usec": "latest_fork_usec", "connected_slaves": "connected_slaves", "master_repl_offset": "master_repl_offset", + "master_last_io_seconds_ago": "master_last_io_seconds_ago", "repl_backlog_active": "repl_backlog_active", "repl_backlog_size": "repl_backlog_size", "repl_backlog_histlen": "repl_backlog_histlen", @@ -74,27 +77,32 @@ var Tracking = map[string]string{ "used_cpu_user": "used_cpu_user", "used_cpu_sys_children": "used_cpu_sys_children", "used_cpu_user_children": "used_cpu_user_children", - "role": "role", + "role": "replication_role", } var ErrProtocolError = errors.New("redis protocol error") +const defaultPort = "6379" + // Reads stats from all configured servers accumulates stats. // Returns one of the errors encountered while gather stats (if any). func (r *Redis) Gather(acc telegraf.Accumulator) error { if len(r.Servers) == 0 { url := &url.URL{ - Host: ":6379", + Scheme: "tcp", + Host: ":6379", } r.gatherServer(url, acc) return nil } var wg sync.WaitGroup - - var outerr error - + errChan := errchan.New(len(r.Servers)) for _, serv := range r.Servers { + if !strings.HasPrefix(serv, "tcp://") && !strings.HasPrefix(serv, "unix://") { + serv = "tcp://" + serv + } + u, err := url.Parse(serv) if err != nil { return fmt.Errorf("Unable to parse to address '%s': %s", serv, err) @@ -104,29 +112,35 @@ func (r *Redis) Gather(acc telegraf.Accumulator) error { u.Host = serv u.Path = "" } + if u.Scheme == "tcp" { + _, _, err := net.SplitHostPort(u.Host) + if err != nil { + u.Host = u.Host + ":" + defaultPort + } + } + wg.Add(1) go func(serv string) { defer wg.Done() - outerr = r.gatherServer(u, acc) + errChan.C <- r.gatherServer(u, acc) }(serv) } wg.Wait() - - return outerr + return errChan.Error() } -const defaultPort = "6379" - func (r *Redis) gatherServer(addr *url.URL, acc telegraf.Accumulator) error { - _, _, err := net.SplitHostPort(addr.Host) - if err != nil { - addr.Host = addr.Host + ":" + defaultPort - } + var address string - c, err := net.DialTimeout("tcp", addr.Host, defaultTimeout) + if addr.Scheme == "unix" { + address = addr.Path + } else { + address = addr.Host + } + c, err := net.DialTimeout(addr.Scheme, address, defaultTimeout) if err != nil { - return fmt.Errorf("Unable to connect to redis server '%s': %s", addr.Host, err) + return fmt.Errorf("Unable to connect to redis server '%s': %s", address, err) } defer c.Close() @@ -154,12 +168,17 @@ func (r *Redis) gatherServer(addr *url.URL, acc telegraf.Accumulator) error { c.Write([]byte("EOF\r\n")) rdr := bufio.NewReader(c) - // Setup tags for all redis metrics - host, port := "unknown", "unknown" - // If there's an error, ignore and use 'unknown' tags - host, port, _ = net.SplitHostPort(addr.Host) - tags := map[string]string{"server": host, "port": port} + var tags map[string]string + if addr.Scheme == "unix" { + tags = map[string]string{"socket": addr.Path} + } else { + // Setup tags for all redis metrics + host, port := "unknown", "unknown" + // If there's an error, ignore and use 'unknown' tags + host, port, _ = net.SplitHostPort(addr.Host) + tags = map[string]string{"server": host, "port": port} + } return gatherInfoOutput(rdr, acc, tags) } @@ -208,7 +227,7 @@ func gatherInfoOutput( } if name == "role" { - tags["role"] = val + tags["replication_role"] = val continue } diff --git a/plugins/inputs/redis/redis_test.go b/plugins/inputs/redis/redis_test.go index b12950ee4..2e2fc1e37 100644 --- a/plugins/inputs/redis/redis_test.go +++ b/plugins/inputs/redis/redis_test.go @@ -35,7 +35,7 @@ func TestRedis_ParseMetrics(t *testing.T) { err := gatherInfoOutput(rdr, &acc, tags) require.NoError(t, err) - tags = map[string]string{"host": "redis.net", "role": "master"} + tags = map[string]string{"host": "redis.net", "replication_role": "master"} fields := map[string]interface{}{ "uptime": uint64(238), "clients": uint64(1), @@ -71,7 +71,7 @@ func TestRedis_ParseMetrics(t *testing.T) { "used_cpu_user_children": float64(0.00), "keyspace_hitrate": float64(0.50), } - keyspaceTags := map[string]string{"host": "redis.net", "role": "master", "database": "db0"} + keyspaceTags := map[string]string{"host": "redis.net", "replication_role": "master", "database": "db0"} keyspaceFields := map[string]interface{}{ "avg_ttl": uint64(0), "expires": uint64(0), diff --git a/plugins/inputs/riak/README.md b/plugins/inputs/riak/README.md index 07f2eb09d..7832feecf 100644 --- a/plugins/inputs/riak/README.md +++ b/plugins/inputs/riak/README.md @@ -58,6 +58,8 @@ Riak provides one measurement named "riak", with the following fields: - vnode_index_writes_total - vnode_puts - vnode_puts_total +- read_repairs +- read_repairs_total Measurements of time (such as node_get_fsm_time_mean) are measured in nanoseconds. @@ -72,5 +74,5 @@ All measurements have the following tags: ``` $ ./telegraf -config telegraf.conf -input-filter riak -test -> riak,nodename=riak@127.0.0.1,server=localhost:8098 cpu_avg1=31i,cpu_avg15=69i,cpu_avg5=51i,memory_code=11563738i,memory_ets=5925872i,memory_processes=30236069i,memory_system=93074971i,memory_total=123311040i,node_get_fsm_objsize_100=0i,node_get_fsm_objsize_95=0i,node_get_fsm_objsize_99=0i,node_get_fsm_objsize_mean=0i,node_get_fsm_objsize_median=0i,node_get_fsm_siblings_100=0i,node_get_fsm_siblings_95=0i,node_get_fsm_siblings_99=0i,node_get_fsm_siblings_mean=0i,node_get_fsm_siblings_median=0i,node_get_fsm_time_100=0i,node_get_fsm_time_95=0i,node_get_fsm_time_99=0i,node_get_fsm_time_mean=0i,node_get_fsm_time_median=0i,node_gets=0i,node_gets_total=19i,node_put_fsm_time_100=0i,node_put_fsm_time_95=0i,node_put_fsm_time_99=0i,node_put_fsm_time_mean=0i,node_put_fsm_time_median=0i,node_puts=0i,node_puts_total=0i,pbc_active=0i,pbc_connects=0i,pbc_connects_total=20i,vnode_gets=0i,vnode_gets_total=57i,vnode_index_reads=0i,vnode_index_reads_total=0i,vnode_index_writes=0i,vnode_index_writes_total=0i,vnode_puts=0i,vnode_puts_total=0i 1455913392622482332 -``` \ No newline at end of file +> riak,nodename=riak@127.0.0.1,server=localhost:8098 cpu_avg1=31i,cpu_avg15=69i,cpu_avg5=51i,memory_code=11563738i,memory_ets=5925872i,memory_processes=30236069i,memory_system=93074971i,memory_total=123311040i,node_get_fsm_objsize_100=0i,node_get_fsm_objsize_95=0i,node_get_fsm_objsize_99=0i,node_get_fsm_objsize_mean=0i,node_get_fsm_objsize_median=0i,node_get_fsm_siblings_100=0i,node_get_fsm_siblings_95=0i,node_get_fsm_siblings_99=0i,node_get_fsm_siblings_mean=0i,node_get_fsm_siblings_median=0i,node_get_fsm_time_100=0i,node_get_fsm_time_95=0i,node_get_fsm_time_99=0i,node_get_fsm_time_mean=0i,node_get_fsm_time_median=0i,node_gets=0i,node_gets_total=19i,node_put_fsm_time_100=0i,node_put_fsm_time_95=0i,node_put_fsm_time_99=0i,node_put_fsm_time_mean=0i,node_put_fsm_time_median=0i,node_puts=0i,node_puts_total=0i,pbc_active=0i,pbc_connects=0i,pbc_connects_total=20i,vnode_gets=0i,vnode_gets_total=57i,vnode_index_reads=0i,vnode_index_reads_total=0i,vnode_index_writes=0i,vnode_index_writes_total=0i,vnode_puts=0i,vnode_puts_total=0i,read_repair=0i,read_repairs_total=0i 1455913392622482332 +``` diff --git a/plugins/inputs/riak/riak.go b/plugins/inputs/riak/riak.go index 56231176b..19bf7df04 100644 --- a/plugins/inputs/riak/riak.go +++ b/plugins/inputs/riak/riak.go @@ -75,6 +75,8 @@ type riakStats struct { VnodeIndexWritesTotal int64 `json:"vnode_index_writes_total"` VnodePuts int64 `json:"vnode_puts"` VnodePutsTotal int64 `json:"vnode_puts_total"` + ReadRepairs int64 `json:"read_repairs"` + ReadRepairsTotal int64 `json:"read_repairs_total"` } // A sample configuration to only gather stats from localhost, default port. @@ -187,6 +189,8 @@ func (r *Riak) gatherServer(s string, acc telegraf.Accumulator) error { "vnode_index_writes_total": stats.VnodeIndexWritesTotal, "vnode_puts": stats.VnodePuts, "vnode_puts_total": stats.VnodePutsTotal, + "read_repairs": stats.ReadRepairs, + "read_repairs_total": stats.ReadRepairsTotal, } // Accumulate the tags and values diff --git a/plugins/inputs/riak/riak_test.go b/plugins/inputs/riak/riak_test.go index 49da4e7ea..09f9a961f 100644 --- a/plugins/inputs/riak/riak_test.go +++ b/plugins/inputs/riak/riak_test.go @@ -66,6 +66,8 @@ func TestRiak(t *testing.T) { "node_put_fsm_time_99": int64(84422), "node_put_fsm_time_mean": int64(10832), "node_put_fsm_time_median": int64(4085), + "read_repairs": int64(2), + "read_repairs_total": int64(7918375), "node_puts": int64(1155), "node_puts_total": int64(444895769), "pbc_active": int64(360), diff --git a/plugins/inputs/rollbar_webhooks/rollbar_webhooks.go b/plugins/inputs/rollbar_webhooks/rollbar_webhooks.go deleted file mode 100644 index 5e7dc8847..000000000 --- a/plugins/inputs/rollbar_webhooks/rollbar_webhooks.go +++ /dev/null @@ -1,119 +0,0 @@ -package rollbar_webhooks - -import ( - "encoding/json" - "errors" - "fmt" - "io/ioutil" - "log" - "net/http" - "sync" - "time" - - "github.com/gorilla/mux" - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/inputs" -) - -func init() { - inputs.Add("rollbar_webhooks", func() telegraf.Input { return NewRollbarWebhooks() }) -} - -type RollbarWebhooks struct { - ServiceAddress string - // Lock for the struct - sync.Mutex - // Events buffer to store events between Gather calls - events []Event -} - -func NewRollbarWebhooks() *RollbarWebhooks { - return &RollbarWebhooks{} -} - -func (rb *RollbarWebhooks) SampleConfig() string { - return ` - ## Address and port to host Webhook listener on - service_address = ":1619" -` -} - -func (rb *RollbarWebhooks) Description() string { - return "A Rollbar Webhook Event collector" -} - -func (rb *RollbarWebhooks) Gather(acc telegraf.Accumulator) error { - rb.Lock() - defer rb.Unlock() - for _, event := range rb.events { - acc.AddFields("rollbar_webhooks", event.Fields(), event.Tags(), time.Now()) - } - rb.events = make([]Event, 0) - return nil -} - -func (rb *RollbarWebhooks) Listen() { - r := mux.NewRouter() - r.HandleFunc("/", rb.eventHandler).Methods("POST") - err := http.ListenAndServe(fmt.Sprintf("%s", rb.ServiceAddress), r) - if err != nil { - log.Printf("Error starting server: %v", err) - } -} - -func (rb *RollbarWebhooks) Start(_ telegraf.Accumulator) error { - go rb.Listen() - log.Printf("Started the rollbar_webhooks service on %s\n", rb.ServiceAddress) - return nil -} - -func (rb *RollbarWebhooks) Stop() { - log.Println("Stopping the rbWebhooks service") -} - -func (rb *RollbarWebhooks) eventHandler(w http.ResponseWriter, r *http.Request) { - defer r.Body.Close() - data, err := ioutil.ReadAll(r.Body) - if err != nil { - w.WriteHeader(http.StatusBadRequest) - return - } - - dummyEvent := &DummyEvent{} - err = json.Unmarshal(data, dummyEvent) - if err != nil { - w.WriteHeader(http.StatusBadRequest) - return - } - - event, err := NewEvent(dummyEvent, data) - if err != nil { - w.WriteHeader(http.StatusOK) - return - } - - rb.Lock() - rb.events = append(rb.events, event) - rb.Unlock() - - w.WriteHeader(http.StatusOK) -} - -func generateEvent(event Event, data []byte) (Event, error) { - err := json.Unmarshal(data, event) - if err != nil { - return nil, err - } - return event, nil -} - -func NewEvent(dummyEvent *DummyEvent, data []byte) (Event, error) { - switch dummyEvent.EventName { - case "new_item": - return generateEvent(&NewItem{}, data) - case "deploy": - return generateEvent(&Deploy{}, data) - default: - return nil, errors.New("Not implemented type: " + dummyEvent.EventName) - } -} diff --git a/plugins/inputs/sensors/README.md b/plugins/inputs/sensors/README.md new file mode 100644 index 000000000..237a9b789 --- /dev/null +++ b/plugins/inputs/sensors/README.md @@ -0,0 +1,47 @@ +# sensors Input Plugin + +Collect [lm-sensors](https://en.wikipedia.org/wiki/Lm_sensors) metrics - requires the lm-sensors +package installed. + +This plugin collects sensor metrics with the `sensors` executable from the lm-sensor package. + +### Configuration: +``` +# Monitor sensors, requires lm-sensors package +[[inputs.sensors]] + ## Remove numbers from field names. + ## If true, a field name like 'temp1_input' will be changed to 'temp_input'. + # remove_numbers = true +``` + +### Measurements & Fields: +Fields are created dynamicaly depending on the sensors. All fields are float. + +### Tags: + +- All measurements have the following tags: + - chip + - feature + +### Example Output: + +#### Default +``` +$ telegraf -config telegraf.conf -input-filter sensors -test +* Plugin: sensors, Collection 1 +> sensors,chip=power_meter-acpi-0,feature=power1 power_average=0,power_average_interval=300 1466751326000000000 +> sensors,chip=k10temp-pci-00c3,feature=temp1 temp_crit=70,temp_crit_hyst=65,temp_input=29,temp_max=70 1466751326000000000 +> sensors,chip=k10temp-pci-00cb,feature=temp1 temp_input=29,temp_max=70 1466751326000000000 +> sensors,chip=k10temp-pci-00d3,feature=temp1 temp_input=27.5,temp_max=70 1466751326000000000 +> sensors,chip=k10temp-pci-00db,feature=temp1 temp_crit=70,temp_crit_hyst=65,temp_input=29.5,temp_max=70 1466751326000000000 +``` + +#### With remove_numbers=false +``` +* Plugin: sensors, Collection 1 +> sensors,chip=power_meter-acpi-0,feature=power1 power1_average=0,power1_average_interval=300 1466753424000000000 +> sensors,chip=k10temp-pci-00c3,feature=temp1 temp1_crit=70,temp1_crit_hyst=65,temp1_input=29.125,temp1_max=70 1466753424000000000 +> sensors,chip=k10temp-pci-00cb,feature=temp1 temp1_input=29,temp1_max=70 1466753424000000000 +> sensors,chip=k10temp-pci-00d3,feature=temp1 temp1_input=29.5,temp1_max=70 1466753424000000000 +> sensors,chip=k10temp-pci-00db,feature=temp1 temp1_crit=70,temp1_crit_hyst=65,temp1_input=30,temp1_max=70 1466753424000000000 +``` diff --git a/plugins/inputs/sensors/sensors.go b/plugins/inputs/sensors/sensors.go index dbb304b71..6e165e4cb 100644 --- a/plugins/inputs/sensors/sensors.go +++ b/plugins/inputs/sensors/sensors.go @@ -1,91 +1,118 @@ -// +build linux,sensors +// +build linux package sensors import ( + "errors" + "fmt" + "os/exec" + "regexp" + "strconv" "strings" - - "github.com/md14454/gosensors" + "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" ) +var ( + execCommand = exec.Command // execCommand is used to mock commands in tests. + numberRegp = regexp.MustCompile("[0-9]+") +) + type Sensors struct { - Sensors []string + RemoveNumbers bool `toml:"remove_numbers"` + path string } -func (_ *Sensors) Description() string { - return "Monitor sensors using lm-sensors package" +func (*Sensors) Description() string { + return "Monitor sensors, requires lm-sensors package" } -var sensorsSampleConfig = ` - ## By default, telegraf gathers stats from all sensors detected by the - ## lm-sensors module. - ## - ## Only collect stats from the selected sensors. Sensors are listed as - ## :. This information can be found by running the - ## sensors command, e.g. sensors -u - ## - ## A * as the feature name will return all features of the chip - ## - # sensors = ["coretemp-isa-0000:Core 0", "coretemp-isa-0001:*"] +func (*Sensors) SampleConfig() string { + return ` + ## Remove numbers from field names. + ## If true, a field name like 'temp1_input' will be changed to 'temp_input'. + # remove_numbers = true ` -func (_ *Sensors) SampleConfig() string { - return sensorsSampleConfig } func (s *Sensors) Gather(acc telegraf.Accumulator) error { - gosensors.Init() - defer gosensors.Cleanup() - - for _, chip := range gosensors.GetDetectedChips() { - for _, feature := range chip.GetFeatures() { - chipName := chip.String() - featureLabel := feature.GetLabel() - - if len(s.Sensors) != 0 { - var found bool - - for _, sensor := range s.Sensors { - parts := strings.SplitN(sensor, ":", 2) - - if parts[0] == chipName { - if parts[1] == "*" || parts[1] == featureLabel { - found = true - break - } - } - } - - if !found { - continue - } - } - - tags := map[string]string{ - "chip": chipName, - "adapter": chip.AdapterName(), - "feature-name": feature.Name, - "feature-label": featureLabel, - } - - fieldName := chipName + ":" + featureLabel - - fields := map[string]interface{}{ - fieldName: feature.GetValue(), - } - - acc.AddFields("sensors", fields, tags) - } + if len(s.path) == 0 { + return errors.New("sensors not found: verify that lm-sensors package is installed and that sensors is in your PATH") } + return s.parse(acc) +} + +// parse forks the command: +// sensors -u -A +// and parses the output to add it to the telegraf.Accumulator. +func (s *Sensors) parse(acc telegraf.Accumulator) error { + tags := map[string]string{} + fields := map[string]interface{}{} + chip := "" + cmd := execCommand(s.path, "-A", "-u") + out, err := internal.CombinedOutputTimeout(cmd, time.Second*5) + if err != nil { + return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out)) + } + lines := strings.Split(strings.TrimSpace(string(out)), "\n") + for _, line := range lines { + if len(line) == 0 { + acc.AddFields("sensors", fields, tags) + chip = "" + tags = map[string]string{} + fields = map[string]interface{}{} + continue + } + if len(chip) == 0 { + chip = line + tags["chip"] = chip + continue + } + if !strings.HasPrefix(line, " ") { + if len(tags) > 1 { + acc.AddFields("sensors", fields, tags) + } + fields = map[string]interface{}{} + tags = map[string]string{ + "chip": chip, + "feature": strings.TrimRight(snake(line), ":"), + } + } else { + splitted := strings.Split(line, ":") + fieldName := strings.TrimSpace(splitted[0]) + if s.RemoveNumbers { + fieldName = numberRegp.ReplaceAllString(fieldName, "") + } + fieldValue, err := strconv.ParseFloat(strings.TrimSpace(splitted[1]), 64) + if err != nil { + return err + } + fields[fieldName] = fieldValue + } + } + acc.AddFields("sensors", fields, tags) return nil } func init() { + s := Sensors{ + RemoveNumbers: true, + } + path, _ := exec.LookPath("sensors") + if len(path) > 0 { + s.path = path + } inputs.Add("sensors", func() telegraf.Input { - return &Sensors{} + return &s }) } + +// snake converts string to snake case +func snake(input string) string { + return strings.ToLower(strings.Replace(input, " ", "_", -1)) +} diff --git a/plugins/inputs/sensors/sensors_nocompile.go b/plugins/inputs/sensors/sensors_nocompile.go deleted file mode 100644 index 5c38a437b..000000000 --- a/plugins/inputs/sensors/sensors_nocompile.go +++ /dev/null @@ -1,3 +0,0 @@ -// +build !linux !sensors - -package sensors diff --git a/plugins/inputs/sensors/sensors_notlinux.go b/plugins/inputs/sensors/sensors_notlinux.go new file mode 100644 index 000000000..62a621159 --- /dev/null +++ b/plugins/inputs/sensors/sensors_notlinux.go @@ -0,0 +1,3 @@ +// +build !linux + +package sensors diff --git a/plugins/inputs/sensors/sensors_test.go b/plugins/inputs/sensors/sensors_test.go new file mode 100644 index 000000000..01d27abcf --- /dev/null +++ b/plugins/inputs/sensors/sensors_test.go @@ -0,0 +1,328 @@ +// +build linux + +package sensors + +import ( + "fmt" + "os" + "os/exec" + "testing" + + "github.com/influxdata/telegraf/testutil" +) + +func TestGatherDefault(t *testing.T) { + s := Sensors{ + RemoveNumbers: true, + path: "sensors", + } + // overwriting exec commands with mock commands + execCommand = fakeExecCommand + defer func() { execCommand = exec.Command }() + var acc testutil.Accumulator + + err := s.Gather(&acc) + if err != nil { + t.Fatal(err) + } + + var tests = []struct { + tags map[string]string + fields map[string]interface{} + }{ + { + map[string]string{ + "chip": "acpitz-virtual-0", + "feature": "temp1", + }, + map[string]interface{}{ + "temp_input": 8.3, + "temp_crit": 31.3, + }, + }, + { + map[string]string{ + "chip": "power_meter-acpi-0", + "feature": "power1", + }, + map[string]interface{}{ + "power_average": 0.0, + "power_average_interval": 300.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0000", + "feature": "physical_id_0", + }, + map[string]interface{}{ + "temp_input": 77.0, + "temp_max": 82.0, + "temp_crit": 92.0, + "temp_crit_alarm": 0.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0000", + "feature": "core_0", + }, + map[string]interface{}{ + "temp_input": 75.0, + "temp_max": 82.0, + "temp_crit": 92.0, + "temp_crit_alarm": 0.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0000", + "feature": "core_1", + }, + map[string]interface{}{ + "temp_input": 77.0, + "temp_max": 82.0, + "temp_crit": 92.0, + "temp_crit_alarm": 0.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0001", + "feature": "physical_id_1", + }, + map[string]interface{}{ + "temp_input": 70.0, + "temp_max": 82.0, + "temp_crit": 92.0, + "temp_crit_alarm": 0.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0001", + "feature": "core_0", + }, + map[string]interface{}{ + "temp_input": 66.0, + "temp_max": 82.0, + "temp_crit": 92.0, + "temp_crit_alarm": 0.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0001", + "feature": "core_1", + }, + map[string]interface{}{ + "temp_input": 70.0, + "temp_max": 82.0, + "temp_crit": 92.0, + "temp_crit_alarm": 0.0, + }, + }, + } + + for _, test := range tests { + acc.AssertContainsTaggedFields(t, "sensors", test.fields, test.tags) + } +} + +func TestGatherNotRemoveNumbers(t *testing.T) { + s := Sensors{ + RemoveNumbers: false, + path: "sensors", + } + // overwriting exec commands with mock commands + execCommand = fakeExecCommand + defer func() { execCommand = exec.Command }() + var acc testutil.Accumulator + + err := s.Gather(&acc) + if err != nil { + t.Fatal(err) + } + + var tests = []struct { + tags map[string]string + fields map[string]interface{} + }{ + { + map[string]string{ + "chip": "acpitz-virtual-0", + "feature": "temp1", + }, + map[string]interface{}{ + "temp1_input": 8.3, + "temp1_crit": 31.3, + }, + }, + { + map[string]string{ + "chip": "power_meter-acpi-0", + "feature": "power1", + }, + map[string]interface{}{ + "power1_average": 0.0, + "power1_average_interval": 300.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0000", + "feature": "physical_id_0", + }, + map[string]interface{}{ + "temp1_input": 77.0, + "temp1_max": 82.0, + "temp1_crit": 92.0, + "temp1_crit_alarm": 0.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0000", + "feature": "core_0", + }, + map[string]interface{}{ + "temp2_input": 75.0, + "temp2_max": 82.0, + "temp2_crit": 92.0, + "temp2_crit_alarm": 0.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0000", + "feature": "core_1", + }, + map[string]interface{}{ + "temp3_input": 77.0, + "temp3_max": 82.0, + "temp3_crit": 92.0, + "temp3_crit_alarm": 0.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0001", + "feature": "physical_id_1", + }, + map[string]interface{}{ + "temp1_input": 70.0, + "temp1_max": 82.0, + "temp1_crit": 92.0, + "temp1_crit_alarm": 0.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0001", + "feature": "core_0", + }, + map[string]interface{}{ + "temp2_input": 66.0, + "temp2_max": 82.0, + "temp2_crit": 92.0, + "temp2_crit_alarm": 0.0, + }, + }, + { + map[string]string{ + "chip": "coretemp-isa-0001", + "feature": "core_1", + }, + map[string]interface{}{ + "temp3_input": 70.0, + "temp3_max": 82.0, + "temp3_crit": 92.0, + "temp3_crit_alarm": 0.0, + }, + }, + } + + for _, test := range tests { + acc.AssertContainsTaggedFields(t, "sensors", test.fields, test.tags) + } +} + +// fackeExecCommand is a helper function that mock +// the exec.Command call (and call the test binary) +func fakeExecCommand(command string, args ...string) *exec.Cmd { + cs := []string{"-test.run=TestHelperProcess", "--", command} + cs = append(cs, args...) + cmd := exec.Command(os.Args[0], cs...) + cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"} + return cmd +} + +// TestHelperProcess isn't a real test. It's used to mock exec.Command +// For example, if you run: +// GO_WANT_HELPER_PROCESS=1 go test -test.run=TestHelperProcess -- chrony tracking +// it returns below mockData. +func TestHelperProcess(t *testing.T) { + if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" { + return + } + + mockData := `acpitz-virtual-0 +temp1: + temp1_input: 8.300 + temp1_crit: 31.300 + +power_meter-acpi-0 +power1: + power1_average: 0.000 + power1_average_interval: 300.000 + +coretemp-isa-0000 +Physical id 0: + temp1_input: 77.000 + temp1_max: 82.000 + temp1_crit: 92.000 + temp1_crit_alarm: 0.000 +Core 0: + temp2_input: 75.000 + temp2_max: 82.000 + temp2_crit: 92.000 + temp2_crit_alarm: 0.000 +Core 1: + temp3_input: 77.000 + temp3_max: 82.000 + temp3_crit: 92.000 + temp3_crit_alarm: 0.000 + +coretemp-isa-0001 +Physical id 1: + temp1_input: 70.000 + temp1_max: 82.000 + temp1_crit: 92.000 + temp1_crit_alarm: 0.000 +Core 0: + temp2_input: 66.000 + temp2_max: 82.000 + temp2_crit: 92.000 + temp2_crit_alarm: 0.000 +Core 1: + temp3_input: 70.000 + temp3_max: 82.000 + temp3_crit: 92.000 + temp3_crit_alarm: 0.000 +` + + args := os.Args + + // Previous arguments are tests stuff, that looks like : + // /tmp/go-build970079519/…/_test/integration.test -test.run=TestHelperProcess -- + cmd, args := args[3], args[4:] + + if cmd == "sensors" { + fmt.Fprint(os.Stdout, mockData) + } else { + fmt.Fprint(os.Stdout, "command not found") + os.Exit(1) + + } + os.Exit(0) +} diff --git a/plugins/inputs/snmp/README.md b/plugins/inputs/snmp_legacy/README.md similarity index 100% rename from plugins/inputs/snmp/README.md rename to plugins/inputs/snmp_legacy/README.md diff --git a/plugins/inputs/snmp/snmp.go b/plugins/inputs/snmp_legacy/snmp_legacy.go similarity index 99% rename from plugins/inputs/snmp/snmp.go rename to plugins/inputs/snmp_legacy/snmp_legacy.go index 3cbfa0db1..b8b9a1232 100644 --- a/plugins/inputs/snmp/snmp.go +++ b/plugins/inputs/snmp_legacy/snmp_legacy.go @@ -1,4 +1,4 @@ -package snmp +package snmp_legacy import ( "io/ioutil" @@ -225,7 +225,7 @@ func (s *Snmp) SampleConfig() string { // Description returns description of Zookeeper plugin func (s *Snmp) Description() string { - return `Reads oids value from one or many snmp agents` + return `DEPRECATED! PLEASE USE inputs.snmp INSTEAD.` } func fillnode(parentNode Node, oid_name string, ids []string) { @@ -812,7 +812,7 @@ func (h *Host) HandleResponse( } func init() { - inputs.Add("snmp", func() telegraf.Input { + inputs.Add("snmp_legacy", func() telegraf.Input { return &Snmp{} }) } diff --git a/plugins/inputs/snmp/snmp_test.go b/plugins/inputs/snmp_legacy/snmp_legacy_test.go similarity index 99% rename from plugins/inputs/snmp/snmp_test.go rename to plugins/inputs/snmp_legacy/snmp_legacy_test.go index 2faaa1408..a6bf2922b 100644 --- a/plugins/inputs/snmp/snmp_test.go +++ b/plugins/inputs/snmp_legacy/snmp_legacy_test.go @@ -1,4 +1,4 @@ -package snmp +package snmp_legacy import ( "testing" diff --git a/plugins/inputs/snmp/testdata/oids.txt b/plugins/inputs/snmp_legacy/testdata/oids.txt similarity index 100% rename from plugins/inputs/snmp/testdata/oids.txt rename to plugins/inputs/snmp_legacy/testdata/oids.txt diff --git a/plugins/inputs/sqlserver/sqlserver.go b/plugins/inputs/sqlserver/sqlserver.go index f91e66c24..5b754d772 100644 --- a/plugins/inputs/sqlserver/sqlserver.go +++ b/plugins/inputs/sqlserver/sqlserver.go @@ -400,6 +400,8 @@ IF OBJECT_ID('tempdb..#baseline') IS NOT NULL DROP TABLE #baseline; SELECT DB_NAME(mf.database_id) AS database_name , + mf.size as database_size_8k_pages, + mf.max_size as database_max_size_8k_pages, size_on_disk_bytes , type_desc as datafile_type, GETDATE() AS baselineDate @@ -435,6 +437,50 @@ FROM #baseline WHERE datafile_type = ''ROWS'' ) as V PIVOT(SUM(size_on_disk_bytes) FOR database_name IN (' + @ColumnName + ')) AS PVTTable + +UNION ALL + +SELECT measurement = ''Rows size (8KB pages)'', servername = REPLACE(@@SERVERNAME, ''\'', '':''), type = ''Database size'' +, ' + @ColumnName + ' FROM +( +SELECT database_name, database_size_8k_pages +FROM #baseline +WHERE datafile_type = ''ROWS'' +) as V +PIVOT(SUM(database_size_8k_pages) FOR database_name IN (' + @ColumnName + ')) AS PVTTable + +UNION ALL + +SELECT measurement = ''Log size (8KB pages)'', servername = REPLACE(@@SERVERNAME, ''\'', '':''), type = ''Database size'' +, ' + @ColumnName + ' FROM +( +SELECT database_name, database_size_8k_pages +FROM #baseline +WHERE datafile_type = ''LOG'' +) as V +PIVOT(SUM(database_size_8k_pages) FOR database_name IN (' + @ColumnName + ')) AS PVTTable + +UNION ALL + +SELECT measurement = ''Rows max size (8KB pages)'', servername = REPLACE(@@SERVERNAME, ''\'', '':''), type = ''Database size'' +, ' + @ColumnName + ' FROM +( +SELECT database_name, database_max_size_8k_pages +FROM #baseline +WHERE datafile_type = ''ROWS'' +) as V +PIVOT(SUM(database_max_size_8k_pages) FOR database_name IN (' + @ColumnName + ')) AS PVTTable + +UNION ALL + +SELECT measurement = ''Logs max size (8KB pages)'', servername = REPLACE(@@SERVERNAME, ''\'', '':''), type = ''Database size'' +, ' + @ColumnName + ' FROM +( +SELECT database_name, database_max_size_8k_pages +FROM #baseline +WHERE datafile_type = ''LOG'' +) as V +PIVOT(SUM(database_max_size_8k_pages) FOR database_name IN (' + @ColumnName + ')) AS PVTTable ' --PRINT @DynamicPivotQuery EXEC sp_executesql @DynamicPivotQuery; diff --git a/plugins/inputs/statsd/statsd.go b/plugins/inputs/statsd/statsd.go index 69638af06..fb191974f 100644 --- a/plugins/inputs/statsd/statsd.go +++ b/plugins/inputs/statsd/statsd.go @@ -27,7 +27,8 @@ const ( defaultSeparator = "_" ) -var dropwarn = "ERROR: Message queue full. Discarding line [%s] " + +var dropwarn = "ERROR: statsd message queue full. " + + "We have dropped %d messages so far. " + "You may want to increase allowed_pending_messages in the config\n" var prevInstance *Statsd @@ -65,6 +66,8 @@ type Statsd struct { sync.Mutex wg sync.WaitGroup + // drops tracks the number of dropped metrics. + drops int // Channel for all incoming statsd packets in chan []byte @@ -291,7 +294,10 @@ func (s *Statsd) udpListen() error { select { case s.in <- bufCopy: default: - log.Printf(dropwarn, string(buf[:n])) + s.drops++ + if s.drops == 1 || s.drops%s.AllowedPendingMessages == 0 { + log.Printf(dropwarn, s.drops) + } } } } diff --git a/plugins/inputs/system/disk.go b/plugins/inputs/system/disk.go index 5784a7322..f79295294 100644 --- a/plugins/inputs/system/disk.go +++ b/plugins/inputs/system/disk.go @@ -92,8 +92,8 @@ var diskIoSampleConfig = ` ## disk partitions. ## Setting devices will restrict the stats to the specified devices. # devices = ["sda", "sdb"] - ## Uncomment the following line if you do not need disk serial numbers. - # skip_serial_number = true + ## Uncomment the following line if you need disk serial numbers. + # skip_serial_number = false ` func (_ *DiskIOStats) SampleConfig() string { @@ -151,6 +151,6 @@ func init() { }) inputs.Add("diskio", func() telegraf.Input { - return &DiskIOStats{ps: &systemPS{}} + return &DiskIOStats{ps: &systemPS{}, SkipSerialNumber: true} }) } diff --git a/plugins/inputs/tail/tail.go b/plugins/inputs/tail/tail.go index 7386e053d..942fd6bae 100644 --- a/plugins/inputs/tail/tail.go +++ b/plugins/inputs/tail/tail.go @@ -86,9 +86,10 @@ func (t *Tail) Start(acc telegraf.Accumulator) error { for file, _ := range g.Match() { tailer, err := tail.TailFile(file, tail.Config{ - ReOpen: true, - Follow: true, - Location: &seek, + ReOpen: true, + Follow: true, + Location: &seek, + MustExist: true, }) if err != nil { errS += err.Error() + " " diff --git a/plugins/inputs/tail/tail_test.go b/plugins/inputs/tail/tail_test.go index f9f6bff28..31ecfbf30 100644 --- a/plugins/inputs/tail/tail_test.go +++ b/plugins/inputs/tail/tail_test.go @@ -17,6 +17,8 @@ func TestTailFromBeginning(t *testing.T) { tmpfile, err := ioutil.TempFile("", "") require.NoError(t, err) defer os.Remove(tmpfile.Name()) + _, err = tmpfile.WriteString("cpu,mytag=foo usage_idle=100\n") + require.NoError(t, err) tt := NewTail() tt.FromBeginning = true @@ -28,12 +30,10 @@ func TestTailFromBeginning(t *testing.T) { acc := testutil.Accumulator{} require.NoError(t, tt.Start(&acc)) - - _, err = tmpfile.WriteString("cpu,mytag=foo usage_idle=100\n") - require.NoError(t, err) + time.Sleep(time.Millisecond * 100) require.NoError(t, tt.Gather(&acc)) // arbitrary sleep to wait for message to show up - time.Sleep(time.Millisecond * 250) + time.Sleep(time.Millisecond * 150) acc.AssertContainsTaggedFields(t, "cpu", map[string]interface{}{ diff --git a/plugins/inputs/tcp_listener/tcp_listener.go b/plugins/inputs/tcp_listener/tcp_listener.go index a420ed759..b8bea2bd6 100644 --- a/plugins/inputs/tcp_listener/tcp_listener.go +++ b/plugins/inputs/tcp_listener/tcp_listener.go @@ -29,6 +29,10 @@ type TcpListener struct { // is an available bool in accept, then we are below the maximum and can // accept the connection accept chan bool + // drops tracks the number of dropped metrics. + drops int + // malformed tracks the number of malformed packets + malformed int // track the listener here so we can close it in Stop() listener *net.TCPListener @@ -39,9 +43,13 @@ type TcpListener struct { acc telegraf.Accumulator } -var dropwarn = "ERROR: Message queue full. Discarding metric [%s], " + +var dropwarn = "ERROR: tcp_listener message queue full. " + + "We have dropped %d messages so far. " + "You may want to increase allowed_pending_messages in the config\n" +var malformedwarn = "WARNING: tcp_listener has received %d malformed packets" + + " thus far." + const sampleConfig = ` ## Address and port to host TCP listener on service_address = ":8094" @@ -150,7 +158,6 @@ func (t *TcpListener) tcpListen() error { if err != nil { return err } - // log.Printf("Received TCP Connection from %s", conn.RemoteAddr()) select { case <-t.accept: @@ -186,7 +193,6 @@ func (t *TcpListener) handler(conn *net.TCPConn, id string) { defer func() { t.wg.Done() conn.Close() - // log.Printf("Closed TCP Connection from %s", conn.RemoteAddr()) // Add one connection potential back to channel when this one closes t.accept <- true t.forget(id) @@ -212,7 +218,10 @@ func (t *TcpListener) handler(conn *net.TCPConn, id string) { select { case t.in <- bufCopy: default: - log.Printf(dropwarn, scanner.Text()) + t.drops++ + if t.drops == 1 || t.drops%t.AllowedPendingMessages == 0 { + log.Printf(dropwarn, t.drops) + } } } } @@ -228,31 +237,29 @@ func (t *TcpListener) tcpParser() error { for { select { case <-t.done: - return nil + // drain input packets before finishing: + if len(t.in) == 0 { + return nil + } case packet = <-t.in: if len(packet) == 0 { continue } metrics, err = t.parser.Parse(packet) if err == nil { - t.storeMetrics(metrics) + for _, m := range metrics { + t.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) + } } else { - log.Printf("Malformed packet: [%s], Error: %s\n", - string(packet), err) + t.malformed++ + if t.malformed == 1 || t.malformed%1000 == 0 { + log.Printf(malformedwarn, t.malformed) + } } } } } -func (t *TcpListener) storeMetrics(metrics []telegraf.Metric) error { - t.Lock() - defer t.Unlock() - for _, m := range metrics { - t.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) - } - return nil -} - // forget a TCP connection func (t *TcpListener) forget(id string) { t.cleanup.Lock() diff --git a/plugins/inputs/tcp_listener/tcp_listener_test.go b/plugins/inputs/tcp_listener/tcp_listener_test.go index b4aec9dd2..f7e5784d3 100644 --- a/plugins/inputs/tcp_listener/tcp_listener_test.go +++ b/plugins/inputs/tcp_listener/tcp_listener_test.go @@ -37,6 +37,62 @@ func newTestTcpListener() (*TcpListener, chan []byte) { return listener, in } +// benchmark how long it takes to accept & process 100,000 metrics: +func BenchmarkTCP(b *testing.B) { + listener := TcpListener{ + ServiceAddress: ":8198", + AllowedPendingMessages: 100000, + MaxTCPConnections: 250, + } + listener.parser, _ = parsers.NewInfluxParser() + acc := &testutil.Accumulator{Discard: true} + + // send multiple messages to socket + for n := 0; n < b.N; n++ { + err := listener.Start(acc) + if err != nil { + panic(err) + } + + time.Sleep(time.Millisecond * 25) + conn, err := net.Dial("tcp", "127.0.0.1:8198") + if err != nil { + panic(err) + } + for i := 0; i < 100000; i++ { + fmt.Fprintf(conn, testMsg) + } + // wait for 100,000 metrics to get added to accumulator + time.Sleep(time.Millisecond) + listener.Stop() + } +} + +func TestHighTrafficTCP(t *testing.T) { + listener := TcpListener{ + ServiceAddress: ":8199", + AllowedPendingMessages: 100000, + MaxTCPConnections: 250, + } + listener.parser, _ = parsers.NewInfluxParser() + acc := &testutil.Accumulator{} + + // send multiple messages to socket + err := listener.Start(acc) + require.NoError(t, err) + + time.Sleep(time.Millisecond * 25) + conn, err := net.Dial("tcp", "127.0.0.1:8199") + require.NoError(t, err) + for i := 0; i < 100000; i++ { + fmt.Fprintf(conn, testMsg) + } + time.Sleep(time.Millisecond) + listener.Stop() + + assert.Equal(t, 100000, len(acc.Metrics)) +} + func TestConnectTCP(t *testing.T) { listener := TcpListener{ ServiceAddress: ":8194", diff --git a/plugins/inputs/udp_listener/udp_listener.go b/plugins/inputs/udp_listener/udp_listener.go index 8e2637ce7..fa773f624 100644 --- a/plugins/inputs/udp_listener/udp_listener.go +++ b/plugins/inputs/udp_listener/udp_listener.go @@ -3,8 +3,8 @@ package udp_listener import ( "log" "net" - "strings" "sync" + "time" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" @@ -25,6 +25,10 @@ type UdpListener struct { in chan []byte done chan struct{} + // drops tracks the number of dropped metrics. + drops int + // malformed tracks the number of malformed packets + malformed int parser parsers.Parser @@ -38,9 +42,13 @@ type UdpListener struct { // https://en.wikipedia.org/wiki/User_Datagram_Protocol#Packet_structure const UDP_MAX_PACKET_SIZE int = 64 * 1024 -var dropwarn = "ERROR: Message queue full. Discarding line [%s] " + +var dropwarn = "ERROR: udp_listener message queue full. " + + "We have dropped %d messages so far. " + "You may want to increase allowed_pending_messages in the config\n" +var malformedwarn = "WARNING: udp_listener has received %d malformed packets" + + " thus far." + const sampleConfig = ` ## Address and port to host UDP listener on service_address = ":8092" @@ -91,9 +99,11 @@ func (u *UdpListener) Start(acc telegraf.Accumulator) error { } func (u *UdpListener) Stop() { + u.Lock() + defer u.Unlock() close(u.done) - u.listener.Close() u.wg.Wait() + u.listener.Close() close(u.in) log.Println("Stopped UDP listener service on ", u.ServiceAddress) } @@ -114,9 +124,13 @@ func (u *UdpListener) udpListen() error { case <-u.done: return nil default: + u.listener.SetReadDeadline(time.Now().Add(time.Second)) n, _, err := u.listener.ReadFromUDP(buf) - if err != nil && !strings.Contains(err.Error(), "closed network") { - log.Printf("ERROR: %s\n", err.Error()) + if err != nil { + if err, ok := err.(net.Error); ok && err.Timeout() { + } else { + log.Printf("ERROR: %s\n", err.Error()) + } continue } bufCopy := make([]byte, n) @@ -125,7 +139,10 @@ func (u *UdpListener) udpListen() error { select { case u.in <- bufCopy: default: - log.Printf(dropwarn, string(bufCopy)) + u.drops++ + if u.drops == 1 || u.drops%u.AllowedPendingMessages == 0 { + log.Printf(dropwarn, u.drops) + } } } } @@ -140,27 +157,25 @@ func (u *UdpListener) udpParser() error { for { select { case <-u.done: - return nil + if len(u.in) == 0 { + return nil + } case packet = <-u.in: metrics, err = u.parser.Parse(packet) if err == nil { - u.storeMetrics(metrics) + for _, m := range metrics { + u.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) + } } else { - log.Printf("Malformed packet: [%s], Error: %s\n", packet, err) + u.malformed++ + if u.malformed == 1 || u.malformed%1000 == 0 { + log.Printf(malformedwarn, u.malformed) + } } } } } -func (u *UdpListener) storeMetrics(metrics []telegraf.Metric) error { - u.Lock() - defer u.Unlock() - for _, m := range metrics { - u.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) - } - return nil -} - func init() { inputs.Add("udp_listener", func() telegraf.Input { return &UdpListener{} diff --git a/plugins/inputs/udp_listener/udp_listener_test.go b/plugins/inputs/udp_listener/udp_listener_test.go index bdbab318b..fa9980682 100644 --- a/plugins/inputs/udp_listener/udp_listener_test.go +++ b/plugins/inputs/udp_listener/udp_listener_test.go @@ -1,20 +1,36 @@ package udp_listener import ( + "fmt" "io/ioutil" "log" + "net" "testing" "time" "github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + testMsg = "cpu_load_short,host=server01 value=12.0 1422568543702900257\n" + + testMsgs = ` +cpu_load_short,host=server02 value=12.0 1422568543702900257 +cpu_load_short,host=server03 value=12.0 1422568543702900257 +cpu_load_short,host=server04 value=12.0 1422568543702900257 +cpu_load_short,host=server05 value=12.0 1422568543702900257 +cpu_load_short,host=server06 value=12.0 1422568543702900257 +` ) func newTestUdpListener() (*UdpListener, chan []byte) { in := make(chan []byte, 1500) listener := &UdpListener{ ServiceAddress: ":8125", - UDPPacketSize: 1500, AllowedPendingMessages: 10000, in: in, done: make(chan struct{}), @@ -22,6 +38,72 @@ func newTestUdpListener() (*UdpListener, chan []byte) { return listener, in } +func TestHighTrafficUDP(t *testing.T) { + listener := UdpListener{ + ServiceAddress: ":8126", + AllowedPendingMessages: 100000, + } + listener.parser, _ = parsers.NewInfluxParser() + acc := &testutil.Accumulator{} + + // send multiple messages to socket + err := listener.Start(acc) + require.NoError(t, err) + + time.Sleep(time.Millisecond * 25) + conn, err := net.Dial("udp", "127.0.0.1:8126") + require.NoError(t, err) + for i := 0; i < 20000; i++ { + // arbitrary, just to give the OS buffer some slack handling the + // packet storm. + time.Sleep(time.Microsecond) + fmt.Fprintf(conn, testMsgs) + } + time.Sleep(time.Millisecond) + listener.Stop() + + // this is not an exact science, since UDP packets can easily get lost or + // dropped, but assume that the OS will be able to + // handle at least 90% of the sent UDP packets. + assert.InDelta(t, 100000, len(acc.Metrics), 10000) +} + +func TestConnectUDP(t *testing.T) { + listener := UdpListener{ + ServiceAddress: ":8127", + AllowedPendingMessages: 10000, + } + listener.parser, _ = parsers.NewInfluxParser() + + acc := &testutil.Accumulator{} + require.NoError(t, listener.Start(acc)) + defer listener.Stop() + + time.Sleep(time.Millisecond * 25) + conn, err := net.Dial("udp", "127.0.0.1:8127") + require.NoError(t, err) + + // send single message to socket + fmt.Fprintf(conn, testMsg) + time.Sleep(time.Millisecond * 15) + acc.AssertContainsTaggedFields(t, "cpu_load_short", + map[string]interface{}{"value": float64(12)}, + map[string]string{"host": "server01"}, + ) + + // send multiple messages to socket + fmt.Fprintf(conn, testMsgs) + time.Sleep(time.Millisecond * 15) + hostTags := []string{"server02", "server03", + "server04", "server05", "server06"} + for _, hostTag := range hostTags { + acc.AssertContainsTaggedFields(t, "cpu_load_short", + map[string]interface{}{"value": float64(12)}, + map[string]string{"host": hostTag}, + ) + } +} + func TestRunParser(t *testing.T) { log.SetOutput(ioutil.Discard) var testmsg = []byte("cpu_load_short,host=server01 value=12.0 1422568543702900257") diff --git a/plugins/inputs/varnish/varnish.go b/plugins/inputs/varnish/varnish.go index 1a3e4c558..2b0e84514 100644 --- a/plugins/inputs/varnish/varnish.go +++ b/plugins/inputs/varnish/varnish.go @@ -12,9 +12,8 @@ import ( "strings" "time" - "github.com/gobwas/glob" - "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/filter" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -26,7 +25,7 @@ type Varnish struct { Stats []string Binary string - filter glob.Glob + filter filter.Filter run runner } @@ -78,13 +77,13 @@ func (s *Varnish) Gather(acc telegraf.Accumulator) error { if s.filter == nil { var err error if len(s.Stats) == 0 { - s.filter, err = internal.CompileFilter(defaultStats) + s.filter, err = filter.CompileFilter(defaultStats) } else { // legacy support, change "all" -> "*": if s.Stats[0] == "all" { s.Stats[0] = "*" } - s.filter, err = internal.CompileFilter(s.Stats) + s.filter, err = filter.CompileFilter(s.Stats) } if err != nil { return err diff --git a/plugins/inputs/webhooks/README.md b/plugins/inputs/webhooks/README.md new file mode 100644 index 000000000..86e6685b8 --- /dev/null +++ b/plugins/inputs/webhooks/README.md @@ -0,0 +1,28 @@ +# Webhooks + +This is a Telegraf service plugin that start an http server and register multiple webhook listeners. + +```sh +$ telegraf -sample-config -input-filter webhooks -output-filter influxdb > config.conf.new +``` + +Change the config file to point to the InfluxDB server you are using and adjust the settings to match your environment. Once that is complete: + +```sh +$ cp config.conf.new /etc/telegraf/telegraf.conf +$ sudo service telegraf start +``` + +## Available webhooks + +- [Github](github/) +- [Mandrill](mandrill/) +- [Rollbar](rollbar/) + +## Adding new webhooks plugin + +1. Add your webhook plugin inside the `webhooks` folder +1. Your plugin must implement the `Webhook` interface +1. Import your plugin in the `webhooks.go` file and add it to the `Webhooks` struct + +Both [Github](github/) and [Rollbar](rollbar/) are good example to follow. diff --git a/plugins/inputs/github_webhooks/README.md b/plugins/inputs/webhooks/github/README.md similarity index 91% rename from plugins/inputs/github_webhooks/README.md rename to plugins/inputs/webhooks/github/README.md index 230e5366b..68594cd78 100644 --- a/plugins/inputs/github_webhooks/README.md +++ b/plugins/inputs/webhooks/github/README.md @@ -1,15 +1,6 @@ -# github_webhooks +# github webhooks -This is a Telegraf service plugin that listens for events kicked off by Github's Webhooks service and persists data from them into configured outputs. To set up the listener first generate the proper configuration: -```sh -$ telegraf -sample-config -input-filter github_webhooks -output-filter influxdb > config.conf.new -``` -Change the config file to point to the InfluxDB server you are using and adjust the settings to match your environment. Once that is complete: -```sh -$ cp config.conf.new /etc/telegraf/telegraf.conf -$ sudo service telegraf start -``` -Once the server is running you should configure your Organization's Webhooks to point at the `github_webhooks` service. To do this go to `github.com/{my_organization}` and click `Settings > Webhooks > Add webhook`. In the resulting menu set `Payload URL` to `http://:1618`, `Content type` to `application/json` and under the section `Which events would you like to trigger this webhook?` select 'Send me everything'. By default all of the events will write to the `github_webhooks` measurement, this is configurable by setting the `measurement_name` in the config file. +You should configure your Organization's Webhooks to point at the `webhooks` service. To do this go to `github.com/{my_organization}` and click `Settings > Webhooks > Add webhook`. In the resulting menu set `Payload URL` to `http://:1619/github`, `Content type` to `application/json` and under the section `Which events would you like to trigger this webhook?` select 'Send me everything'. By default all of the events will write to the `github_webhooks` measurement, this is configurable by setting the `measurement_name` in the config file. ## Events diff --git a/plugins/inputs/github_webhooks/github_webhooks.go b/plugins/inputs/webhooks/github/github_webhooks.go similarity index 58% rename from plugins/inputs/github_webhooks/github_webhooks.go rename to plugins/inputs/webhooks/github/github_webhooks.go index 9e8fc22cd..5327363f4 100644 --- a/plugins/inputs/github_webhooks/github_webhooks.go +++ b/plugins/inputs/webhooks/github/github_webhooks.go @@ -1,78 +1,27 @@ -package github_webhooks +package github import ( "encoding/json" - "fmt" "io/ioutil" "log" "net/http" - "sync" "github.com/gorilla/mux" "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/inputs" ) -func init() { - inputs.Add("github_webhooks", func() telegraf.Input { return &GithubWebhooks{} }) +type GithubWebhook struct { + Path string + acc telegraf.Accumulator } -type GithubWebhooks struct { - ServiceAddress string - // Lock for the struct - sync.Mutex - // Events buffer to store events between Gather calls - events []Event +func (gh *GithubWebhook) Register(router *mux.Router, acc telegraf.Accumulator) { + router.HandleFunc(gh.Path, gh.eventHandler).Methods("POST") + log.Printf("Started the webhooks_github on %s\n", gh.Path) + gh.acc = acc } -func NewGithubWebhooks() *GithubWebhooks { - return &GithubWebhooks{} -} - -func (gh *GithubWebhooks) SampleConfig() string { - return ` - ## Address and port to host Webhook listener on - service_address = ":1618" -` -} - -func (gh *GithubWebhooks) Description() string { - return "A Github Webhook Event collector" -} - -// Writes the points from <-gh.in to the Accumulator -func (gh *GithubWebhooks) Gather(acc telegraf.Accumulator) error { - gh.Lock() - defer gh.Unlock() - for _, event := range gh.events { - p := event.NewMetric() - acc.AddFields("github_webhooks", p.Fields(), p.Tags(), p.Time()) - } - gh.events = make([]Event, 0) - return nil -} - -func (gh *GithubWebhooks) Listen() { - r := mux.NewRouter() - r.HandleFunc("/", gh.eventHandler).Methods("POST") - err := http.ListenAndServe(fmt.Sprintf("%s", gh.ServiceAddress), r) - if err != nil { - log.Printf("Error starting server: %v", err) - } -} - -func (gh *GithubWebhooks) Start(_ telegraf.Accumulator) error { - go gh.Listen() - log.Printf("Started the github_webhooks service on %s\n", gh.ServiceAddress) - return nil -} - -func (gh *GithubWebhooks) Stop() { - log.Println("Stopping the ghWebhooks service") -} - -// Handles the / route -func (gh *GithubWebhooks) eventHandler(w http.ResponseWriter, r *http.Request) { +func (gh *GithubWebhook) eventHandler(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() eventType := r.Header["X-Github-Event"][0] data, err := ioutil.ReadAll(r.Body) @@ -85,9 +34,10 @@ func (gh *GithubWebhooks) eventHandler(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusBadRequest) return } - gh.Lock() - gh.events = append(gh.events, e) - gh.Unlock() + + p := e.NewMetric() + gh.acc.AddFields("github_webhooks", p.Fields(), p.Tags(), p.Time()) + w.WriteHeader(http.StatusOK) } diff --git a/plugins/inputs/github_webhooks/github_webhooks_mock_json.go b/plugins/inputs/webhooks/github/github_webhooks_mock_json.go similarity index 99% rename from plugins/inputs/github_webhooks/github_webhooks_mock_json.go rename to plugins/inputs/webhooks/github/github_webhooks_mock_json.go index 386d62e65..91af9a330 100644 --- a/plugins/inputs/github_webhooks/github_webhooks_mock_json.go +++ b/plugins/inputs/webhooks/github/github_webhooks_mock_json.go @@ -1,4 +1,4 @@ -package github_webhooks +package github func CommitCommentEventJSON() string { return `{ diff --git a/plugins/inputs/github_webhooks/github_webhooks_models.go b/plugins/inputs/webhooks/github/github_webhooks_models.go similarity index 99% rename from plugins/inputs/github_webhooks/github_webhooks_models.go rename to plugins/inputs/webhooks/github/github_webhooks_models.go index 2902708c2..9cbcef9f4 100644 --- a/plugins/inputs/github_webhooks/github_webhooks_models.go +++ b/plugins/inputs/webhooks/github/github_webhooks_models.go @@ -1,4 +1,4 @@ -package github_webhooks +package github import ( "fmt" diff --git a/plugins/inputs/github_webhooks/github_webhooks_test.go b/plugins/inputs/webhooks/github/github_webhooks_test.go similarity index 91% rename from plugins/inputs/github_webhooks/github_webhooks_test.go rename to plugins/inputs/webhooks/github/github_webhooks_test.go index a71d68548..7bee5372d 100644 --- a/plugins/inputs/github_webhooks/github_webhooks_test.go +++ b/plugins/inputs/webhooks/github/github_webhooks_test.go @@ -1,15 +1,18 @@ -package github_webhooks +package github import ( "net/http" "net/http/httptest" "strings" "testing" + + "github.com/influxdata/telegraf/testutil" ) func GithubWebhookRequest(event string, jsonString string, t *testing.T) { - gh := NewGithubWebhooks() - req, _ := http.NewRequest("POST", "/", strings.NewReader(jsonString)) + var acc testutil.Accumulator + gh := &GithubWebhook{Path: "/github", acc: &acc} + req, _ := http.NewRequest("POST", "/github", strings.NewReader(jsonString)) req.Header.Add("X-Github-Event", event) w := httptest.NewRecorder() gh.eventHandler(w, req) diff --git a/plugins/inputs/webhooks/mandrill/README.md b/plugins/inputs/webhooks/mandrill/README.md new file mode 100644 index 000000000..2fb4914e1 --- /dev/null +++ b/plugins/inputs/webhooks/mandrill/README.md @@ -0,0 +1,15 @@ +# mandrill webhook + +You should configure your Mandrill's Webhooks to point at the `webhooks` service. To do this go to `mandrillapp.com/` and click `Settings > Webhooks`. In the resulting page, click on `Add a Webhook`, select all events, and set the `URL` to `http://:1619/mandrill`, and click on `Create Webhook`. + +## Events + +See the [webhook doc](https://mandrill.zendesk.com/hc/en-us/articles/205583307-Message-Event-Webhook-format). + +All events for logs the original timestamp, the event name and the unique identifier of the message that generated the event. + +**Tags:** +* 'event' = `event.event` string + +**Fields:** +* 'id' = `event._id` string diff --git a/plugins/inputs/webhooks/mandrill/mandrill_webhooks.go b/plugins/inputs/webhooks/mandrill/mandrill_webhooks.go new file mode 100644 index 000000000..e9d4a6de4 --- /dev/null +++ b/plugins/inputs/webhooks/mandrill/mandrill_webhooks.go @@ -0,0 +1,56 @@ +package mandrill + +import ( + "encoding/json" + "io/ioutil" + "log" + "net/http" + "net/url" + "time" + + "github.com/gorilla/mux" + "github.com/influxdata/telegraf" +) + +type MandrillWebhook struct { + Path string + acc telegraf.Accumulator +} + +func (md *MandrillWebhook) Register(router *mux.Router, acc telegraf.Accumulator) { + router.HandleFunc(md.Path, md.returnOK).Methods("HEAD") + router.HandleFunc(md.Path, md.eventHandler).Methods("POST") + + log.Printf("Started the webhooks_mandrill on %s\n", md.Path) + md.acc = acc +} + +func (md *MandrillWebhook) returnOK(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) +} + +func (md *MandrillWebhook) eventHandler(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + body, err := ioutil.ReadAll(r.Body) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return + } + data, err := url.ParseQuery(string(body)) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return + } + var events []MandrillEvent + err = json.Unmarshal([]byte(data.Get("mandrill_events")), &events) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return + } + + for _, event := range events { + md.acc.AddFields("mandrill_webhooks", event.Fields(), event.Tags(), time.Unix(event.TimeStamp, 0)) + } + + w.WriteHeader(http.StatusOK) +} diff --git a/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events.go b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events.go new file mode 100644 index 000000000..b36b13e54 --- /dev/null +++ b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events.go @@ -0,0 +1,24 @@ +package mandrill + +type Event interface { + Tags() map[string]string + Fields() map[string]interface{} +} + +type MandrillEvent struct { + EventName string `json:"event"` + TimeStamp int64 `json:"ts"` + Id string `json:"_id"` +} + +func (me *MandrillEvent) Tags() map[string]string { + return map[string]string{ + "event": me.EventName, + } +} + +func (me *MandrillEvent) Fields() map[string]interface{} { + return map[string]interface{}{ + "id": me.Id, + } +} diff --git a/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events_json_test.go b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events_json_test.go new file mode 100644 index 000000000..4ab385e18 --- /dev/null +++ b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_events_json_test.go @@ -0,0 +1,58 @@ +package mandrill + +func SendEventJSON() string { + return ` + { + "event": "send", + "msg": { + "ts": 1365109999, + "subject": "This an example webhook message", + "email": "example.webhook@mandrillapp.com", + "sender": "example.sender@mandrillapp.com", + "tags": [ + "webhook-example" + ], + "opens": [ + + ], + "clicks": [ + + ], + "state": "sent", + "metadata": { + "user_id": 111 + }, + "_id": "exampleaaaaaaaaaaaaaaaaaaaaaaaaa", + "_version": "exampleaaaaaaaaaaaaaaa" + }, + "_id": "id1", + "ts": 1384954004 + }` +} + +func HardBounceEventJSON() string { + return ` + { + "event": "hard_bounce", + "msg": { + "ts": 1365109999, + "subject": "This an example webhook message", + "email": "example.webhook@mandrillapp.com", + "sender": "example.sender@mandrillapp.com", + "tags": [ + "webhook-example" + ], + "state": "bounced", + "metadata": { + "user_id": 111 + }, + "_id": "exampleaaaaaaaaaaaaaaaaaaaaaaaaa2", + "_version": "exampleaaaaaaaaaaaaaaa", + "bounce_description": "bad_mailbox", + "bgtools_code": 10, + "diag": "smtp;550 5.1.1 The email account that you tried to reach does not exist. Please try double-checking the recipient's email address for typos or unnecessary spaces." + }, + "_id": "id2", + "ts": 1384954004 + }` +} diff --git a/plugins/inputs/webhooks/mandrill/mandrill_webhooks_test.go b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_test.go new file mode 100644 index 000000000..94ac68684 --- /dev/null +++ b/plugins/inputs/webhooks/mandrill/mandrill_webhooks_test.go @@ -0,0 +1,85 @@ +package mandrill + +import ( + "github.com/influxdata/telegraf/testutil" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" +) + +func postWebhooks(md *MandrillWebhook, eventBody string) *httptest.ResponseRecorder { + body := url.Values{} + body.Set("mandrill_events", eventBody) + req, _ := http.NewRequest("POST", "/mandrill", strings.NewReader(body.Encode())) + w := httptest.NewRecorder() + + md.eventHandler(w, req) + + return w +} + +func headRequest(md *MandrillWebhook) *httptest.ResponseRecorder { + req, _ := http.NewRequest("HEAD", "/mandrill", strings.NewReader("")) + w := httptest.NewRecorder() + + md.returnOK(w, req) + + return w +} + +func TestHead(t *testing.T) { + md := &MandrillWebhook{Path: "/mandrill"} + resp := headRequest(md) + if resp.Code != http.StatusOK { + t.Errorf("HEAD returned HTTP status code %v.\nExpected %v", resp.Code, http.StatusOK) + } +} + +func TestSendEvent(t *testing.T) { + var acc testutil.Accumulator + md := &MandrillWebhook{Path: "/mandrill", acc: &acc} + resp := postWebhooks(md, "["+SendEventJSON()+"]") + if resp.Code != http.StatusOK { + t.Errorf("POST send returned HTTP status code %v.\nExpected %v", resp.Code, http.StatusOK) + } + + fields := map[string]interface{}{ + "id": "id1", + } + + tags := map[string]string{ + "event": "send", + } + + acc.AssertContainsTaggedFields(t, "mandrill_webhooks", fields, tags) +} + +func TestMultipleEvents(t *testing.T) { + var acc testutil.Accumulator + md := &MandrillWebhook{Path: "/mandrill", acc: &acc} + resp := postWebhooks(md, "["+SendEventJSON()+","+HardBounceEventJSON()+"]") + if resp.Code != http.StatusOK { + t.Errorf("POST send returned HTTP status code %v.\nExpected %v", resp.Code, http.StatusOK) + } + + fields := map[string]interface{}{ + "id": "id1", + } + + tags := map[string]string{ + "event": "send", + } + + acc.AssertContainsTaggedFields(t, "mandrill_webhooks", fields, tags) + + fields = map[string]interface{}{ + "id": "id2", + } + + tags = map[string]string{ + "event": "hard_bounce", + } + acc.AssertContainsTaggedFields(t, "mandrill_webhooks", fields, tags) +} diff --git a/plugins/inputs/rollbar_webhooks/README.md b/plugins/inputs/webhooks/rollbar/README.md similarity index 53% rename from plugins/inputs/rollbar_webhooks/README.md rename to plugins/inputs/webhooks/rollbar/README.md index d6938df28..f6c871a07 100644 --- a/plugins/inputs/rollbar_webhooks/README.md +++ b/plugins/inputs/webhooks/rollbar/README.md @@ -1,15 +1,6 @@ -# rollbar_webhooks +# rollbar webhooks -This is a Telegraf service plugin that listens for events kicked off by Rollbar Webhooks service and persists data from them into configured outputs. To set up the listener first generate the proper configuration: -```sh -$ telegraf -sample-config -input-filter rollbar_webhooks -output-filter influxdb > config.conf.new -``` -Change the config file to point to the InfluxDB server you are using and adjust the settings to match your environment. Once that is complete: -```sh -$ cp config.conf.new /etc/telegraf/telegraf.conf -$ sudo service telegraf start -``` -Once the server is running you should configure your Rollbar's Webhooks to point at the `rollbar_webhooks` service. To do this go to `rollbar.com/` and click `Settings > Notifications > Webhook`. In the resulting page set `URL` to `http://:1619`, and click on `Enable Webhook Integration`. +You should configure your Rollbar's Webhooks to point at the `webhooks` service. To do this go to `rollbar.com/` and click `Settings > Notifications > Webhook`. In the resulting page set `URL` to `http://:1619/rollbar`, and click on `Enable Webhook Integration`. ## Events diff --git a/plugins/inputs/webhooks/rollbar/rollbar_webhooks.go b/plugins/inputs/webhooks/rollbar/rollbar_webhooks.go new file mode 100644 index 000000000..8b8dada50 --- /dev/null +++ b/plugins/inputs/webhooks/rollbar/rollbar_webhooks.go @@ -0,0 +1,69 @@ +package rollbar + +import ( + "encoding/json" + "errors" + "io/ioutil" + "log" + "net/http" + "time" + + "github.com/gorilla/mux" + "github.com/influxdata/telegraf" +) + +type RollbarWebhook struct { + Path string + acc telegraf.Accumulator +} + +func (rb *RollbarWebhook) Register(router *mux.Router, acc telegraf.Accumulator) { + router.HandleFunc(rb.Path, rb.eventHandler).Methods("POST") + log.Printf("Started the webhooks_rollbar on %s\n", rb.Path) + rb.acc = acc +} + +func (rb *RollbarWebhook) eventHandler(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + data, err := ioutil.ReadAll(r.Body) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return + } + + dummyEvent := &DummyEvent{} + err = json.Unmarshal(data, dummyEvent) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return + } + + event, err := NewEvent(dummyEvent, data) + if err != nil { + w.WriteHeader(http.StatusOK) + return + } + + rb.acc.AddFields("rollbar_webhooks", event.Fields(), event.Tags(), time.Now()) + + w.WriteHeader(http.StatusOK) +} + +func generateEvent(event Event, data []byte) (Event, error) { + err := json.Unmarshal(data, event) + if err != nil { + return nil, err + } + return event, nil +} + +func NewEvent(dummyEvent *DummyEvent, data []byte) (Event, error) { + switch dummyEvent.EventName { + case "new_item": + return generateEvent(&NewItem{}, data) + case "deploy": + return generateEvent(&Deploy{}, data) + default: + return nil, errors.New("Not implemented type: " + dummyEvent.EventName) + } +} diff --git a/plugins/inputs/rollbar_webhooks/rollbar_webhooks_events.go b/plugins/inputs/webhooks/rollbar/rollbar_webhooks_events.go similarity index 98% rename from plugins/inputs/rollbar_webhooks/rollbar_webhooks_events.go rename to plugins/inputs/webhooks/rollbar/rollbar_webhooks_events.go index 8cccec336..e40e95858 100644 --- a/plugins/inputs/rollbar_webhooks/rollbar_webhooks_events.go +++ b/plugins/inputs/webhooks/rollbar/rollbar_webhooks_events.go @@ -1,4 +1,4 @@ -package rollbar_webhooks +package rollbar import "strconv" diff --git a/plugins/inputs/rollbar_webhooks/rollbar_webhooks_events_json_test.go b/plugins/inputs/webhooks/rollbar/rollbar_webhooks_events_json_test.go similarity index 98% rename from plugins/inputs/rollbar_webhooks/rollbar_webhooks_events_json_test.go rename to plugins/inputs/webhooks/rollbar/rollbar_webhooks_events_json_test.go index 99a6db8ff..5244a9d2f 100644 --- a/plugins/inputs/rollbar_webhooks/rollbar_webhooks_events_json_test.go +++ b/plugins/inputs/webhooks/rollbar/rollbar_webhooks_events_json_test.go @@ -1,4 +1,4 @@ -package rollbar_webhooks +package rollbar func NewItemJSON() string { return ` diff --git a/plugins/inputs/rollbar_webhooks/rollbar_webhooks_test.go b/plugins/inputs/webhooks/rollbar/rollbar_webhooks_test.go similarity index 85% rename from plugins/inputs/rollbar_webhooks/rollbar_webhooks_test.go rename to plugins/inputs/webhooks/rollbar/rollbar_webhooks_test.go index e0b183a8c..9b54a8281 100644 --- a/plugins/inputs/rollbar_webhooks/rollbar_webhooks_test.go +++ b/plugins/inputs/webhooks/rollbar/rollbar_webhooks_test.go @@ -1,4 +1,4 @@ -package rollbar_webhooks +package rollbar import ( "net/http" @@ -9,7 +9,7 @@ import ( "github.com/influxdata/telegraf/testutil" ) -func postWebhooks(rb *RollbarWebhooks, eventBody string) *httptest.ResponseRecorder { +func postWebhooks(rb *RollbarWebhook, eventBody string) *httptest.ResponseRecorder { req, _ := http.NewRequest("POST", "/", strings.NewReader(eventBody)) w := httptest.NewRecorder() w.Code = 500 @@ -21,12 +21,11 @@ func postWebhooks(rb *RollbarWebhooks, eventBody string) *httptest.ResponseRecor func TestNewItem(t *testing.T) { var acc testutil.Accumulator - rb := NewRollbarWebhooks() + rb := &RollbarWebhook{Path: "/rollbar", acc: &acc} resp := postWebhooks(rb, NewItemJSON()) if resp.Code != http.StatusOK { t.Errorf("POST new_item returned HTTP status code %v.\nExpected %v", resp.Code, http.StatusOK) } - rb.Gather(&acc) fields := map[string]interface{}{ "id": 272716944, @@ -45,12 +44,11 @@ func TestNewItem(t *testing.T) { func TestDeploy(t *testing.T) { var acc testutil.Accumulator - rb := NewRollbarWebhooks() + rb := &RollbarWebhook{Path: "/rollbar", acc: &acc} resp := postWebhooks(rb, DeployJSON()) if resp.Code != http.StatusOK { t.Errorf("POST deploy returned HTTP status code %v.\nExpected %v", resp.Code, http.StatusOK) } - rb.Gather(&acc) fields := map[string]interface{}{ "id": 187585, @@ -66,7 +64,7 @@ func TestDeploy(t *testing.T) { } func TestUnknowItem(t *testing.T) { - rb := NewRollbarWebhooks() + rb := &RollbarWebhook{Path: "/rollbar"} resp := postWebhooks(rb, UnknowJSON()) if resp.Code != http.StatusOK { t.Errorf("POST unknow returned HTTP status code %v.\nExpected %v", resp.Code, http.StatusOK) diff --git a/plugins/inputs/webhooks/webhooks.go b/plugins/inputs/webhooks/webhooks.go new file mode 100644 index 000000000..884435c36 --- /dev/null +++ b/plugins/inputs/webhooks/webhooks.go @@ -0,0 +1,104 @@ +package webhooks + +import ( + "fmt" + "log" + "net/http" + "reflect" + + "github.com/gorilla/mux" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" + + "github.com/influxdata/telegraf/plugins/inputs/webhooks/github" + "github.com/influxdata/telegraf/plugins/inputs/webhooks/mandrill" + "github.com/influxdata/telegraf/plugins/inputs/webhooks/rollbar" +) + +type Webhook interface { + Register(router *mux.Router, acc telegraf.Accumulator) +} + +func init() { + inputs.Add("webhooks", func() telegraf.Input { return NewWebhooks() }) +} + +type Webhooks struct { + ServiceAddress string + + Github *github.GithubWebhook + Mandrill *mandrill.MandrillWebhook + Rollbar *rollbar.RollbarWebhook +} + +func NewWebhooks() *Webhooks { + return &Webhooks{} +} + +func (wb *Webhooks) SampleConfig() string { + return ` + ## Address and port to host Webhook listener on + service_address = ":1619" + + [inputs.webhooks.github] + path = "/github" + + [inputs.webhooks.mandrill] + path = "/mandrill" + + [inputs.webhooks.rollbar] + path = "/rollbar" + ` +} + +func (wb *Webhooks) Description() string { + return "A Webhooks Event collector" +} + +func (wb *Webhooks) Gather(_ telegraf.Accumulator) error { + return nil +} + +func (wb *Webhooks) Listen(acc telegraf.Accumulator) { + r := mux.NewRouter() + + for _, webhook := range wb.AvailableWebhooks() { + webhook.Register(r, acc) + } + + err := http.ListenAndServe(fmt.Sprintf("%s", wb.ServiceAddress), r) + if err != nil { + log.Printf("Error starting server: %v", err) + } +} + +// Looks for fields which implement Webhook interface +func (wb *Webhooks) AvailableWebhooks() []Webhook { + webhooks := make([]Webhook, 0) + s := reflect.ValueOf(wb).Elem() + for i := 0; i < s.NumField(); i++ { + f := s.Field(i) + + if !f.CanInterface() { + continue + } + + if wbPlugin, ok := f.Interface().(Webhook); ok { + if !reflect.ValueOf(wbPlugin).IsNil() { + webhooks = append(webhooks, wbPlugin) + } + } + } + + return webhooks +} + +func (wb *Webhooks) Start(acc telegraf.Accumulator) error { + go wb.Listen(acc) + log.Printf("Started the webhooks service on %s\n", wb.ServiceAddress) + return nil +} + +func (rb *Webhooks) Stop() { + log.Println("Stopping the Webhooks service") +} diff --git a/plugins/inputs/webhooks/webhooks_test.go b/plugins/inputs/webhooks/webhooks_test.go new file mode 100644 index 000000000..85d359e1c --- /dev/null +++ b/plugins/inputs/webhooks/webhooks_test.go @@ -0,0 +1,29 @@ +package webhooks + +import ( + "reflect" + "testing" + + "github.com/influxdata/telegraf/plugins/inputs/webhooks/github" + "github.com/influxdata/telegraf/plugins/inputs/webhooks/rollbar" +) + +func TestAvailableWebhooks(t *testing.T) { + wb := NewWebhooks() + expected := make([]Webhook, 0) + if !reflect.DeepEqual(wb.AvailableWebhooks(), expected) { + t.Errorf("expected to %v.\nGot %v", expected, wb.AvailableWebhooks()) + } + + wb.Github = &github.GithubWebhook{Path: "/github"} + expected = append(expected, wb.Github) + if !reflect.DeepEqual(wb.AvailableWebhooks(), expected) { + t.Errorf("expected to be %v.\nGot %v", expected, wb.AvailableWebhooks()) + } + + wb.Rollbar = &rollbar.RollbarWebhook{Path: "/rollbar"} + expected = append(expected, wb.Rollbar) + if !reflect.DeepEqual(wb.AvailableWebhooks(), expected) { + t.Errorf("expected to be %v.\nGot %v", expected, wb.AvailableWebhooks()) + } +} diff --git a/plugins/inputs/win_perf_counters/win_perf_counters.go b/plugins/inputs/win_perf_counters/win_perf_counters.go index 4684289ee..60b9ff55d 100644 --- a/plugins/inputs/win_perf_counters/win_perf_counters.go +++ b/plugins/inputs/win_perf_counters/win_perf_counters.go @@ -107,7 +107,8 @@ type item struct { counterHandle win.PDH_HCOUNTER } -var sanitizedChars = strings.NewReplacer("/sec", "_persec", "/Sec", "_persec", " ", "_") +var sanitizedChars = strings.NewReplacer("/sec", "_persec", "/Sec", "_persec", + " ", "_", "%", "Percent", `\`, "") func (m *Win_PerfCounters) AddItem(metrics *itemList, query string, objectName string, counter string, instance string, measurement string, include_total bool) { @@ -271,6 +272,9 @@ func (m *Win_PerfCounters) Gather(acc telegraf.Accumulator) error { &bufCount, &emptyBuf[0]) // uses null ptr here according to MSDN. if ret == win.PDH_MORE_DATA { filledBuf := make([]win.PDH_FMT_COUNTERVALUE_ITEM_DOUBLE, bufCount*size) + if len(filledBuf) == 0 { + continue + } ret = win.PdhGetFormattedCounterArrayDouble(metric.counterHandle, &bufSize, &bufCount, &filledBuf[0]) for i := 0; i < int(bufCount); i++ { @@ -299,13 +303,12 @@ func (m *Win_PerfCounters) Gather(acc telegraf.Accumulator) error { tags["instance"] = s } tags["objectname"] = metric.objectName - fields[sanitizedChars.Replace(string(metric.counter))] = float32(c.FmtValue.DoubleValue) + fields[sanitizedChars.Replace(metric.counter)] = + float32(c.FmtValue.DoubleValue) - var measurement string - if metric.measurement == "" { + measurement := sanitizedChars.Replace(metric.measurement) + if measurement == "" { measurement = "win_perf_counters" - } else { - measurement = metric.measurement } acc.AddFields(measurement, fields, tags) } diff --git a/plugins/inputs/zookeeper/README.md b/plugins/inputs/zookeeper/README.md index fe7a8a4ad..80281a87d 100644 --- a/plugins/inputs/zookeeper/README.md +++ b/plugins/inputs/zookeeper/README.md @@ -27,36 +27,39 @@ echo mntr | nc localhost 2181 zk_max_file_descriptor_count 1024 - only available on Unix platforms ``` -## Measurements: -#### Zookeeper measurements: +## Configuration -Meta: -- units: int64 -- tags: `server= port=` +``` +# Reads 'mntr' stats from one or many zookeeper servers +[[inputs.zookeeper]] + ## An array of address to gather stats about. Specify an ip or hostname + ## with port. ie localhost:2181, 10.0.0.1:2181, etc. -Measurement names: -- zookeeper_avg_latency -- zookeeper_max_latency -- zookeeper_min_latency -- zookeeper_packets_received -- zookeeper_packets_sent -- zookeeper_outstanding_requests -- zookeeper_znode_count -- zookeeper_watch_count -- zookeeper_ephemerals_count -- zookeeper_approximate_data_size -- zookeeper_followers #only exposed by the Leader -- zookeeper_synced_followers #only exposed by the Leader -- zookeeper_pending_syncs #only exposed by the Leader -- zookeeper_open_file_descriptor_count -- zookeeper_max_file_descriptor_count + ## If no servers are specified, then localhost is used as the host. + ## If no port is specified, 2181 is used + servers = [":2181"] +``` -#### Zookeeper string measurements: +## InfluxDB Measurement: -Meta: -- units: string -- tags: `server= port=` - -Measurement names: -- zookeeper_version -- zookeeper_server_state \ No newline at end of file +``` +M zookeeper + T host + T port + T state + + F approximate_data_size integer + F avg_latency integer + F ephemerals_count integer + F max_file_descriptor_count integer + F max_latency integer + F min_latency integer + F num_alive_connections integer + F open_file_descriptor_count integer + F outstanding_requests integer + F packets_received integer + F packets_sent integer + F version string + F watch_count integer + F znode_count integer +``` \ No newline at end of file diff --git a/plugins/inputs/zookeeper/zookeeper.go b/plugins/inputs/zookeeper/zookeeper.go index 54defc56f..c11b55f68 100644 --- a/plugins/inputs/zookeeper/zookeeper.go +++ b/plugins/inputs/zookeeper/zookeeper.go @@ -55,6 +55,7 @@ func (z *Zookeeper) Gather(acc telegraf.Accumulator) error { } func (z *Zookeeper) gatherServer(address string, acc telegraf.Accumulator) error { + var zookeeper_state string _, _, err := net.SplitHostPort(address) if err != nil { address = address + ":2181" @@ -78,7 +79,6 @@ func (z *Zookeeper) gatherServer(address string, acc telegraf.Accumulator) error if len(service) != 2 { return fmt.Errorf("Invalid service address: %s", address) } - tags := map[string]string{"server": service[0], "port": service[1]} fields := make(map[string]interface{}) for scanner.Scan() { @@ -92,15 +92,24 @@ func (z *Zookeeper) gatherServer(address string, acc telegraf.Accumulator) error } measurement := strings.TrimPrefix(parts[1], "zk_") - sValue := string(parts[2]) - - iVal, err := strconv.ParseInt(sValue, 10, 64) - if err == nil { - fields[measurement] = iVal + if measurement == "server_state" { + zookeeper_state = parts[2] } else { - fields[measurement] = sValue + sValue := string(parts[2]) + + iVal, err := strconv.ParseInt(sValue, 10, 64) + if err == nil { + fields[measurement] = iVal + } else { + fields[measurement] = sValue + } } } + tags := map[string]string{ + "server": service[0], + "port": service[1], + "state": zookeeper_state, + } acc.AddFields("zookeeper", fields, tags) return nil diff --git a/plugins/outputs/graphite/README.md b/plugins/outputs/graphite/README.md index 2de699dea..3e2369e21 100644 --- a/plugins/outputs/graphite/README.md +++ b/plugins/outputs/graphite/README.md @@ -9,6 +9,8 @@ via raw TCP. # Configuration for Graphite server to send metrics to [[outputs.graphite]] ## TCP endpoint for your graphite instance. + ## If multiple endpoints are configured, the output will be load balanced. + ## Only one of the endpoints will be written to with each iteration. servers = ["localhost:2003"] ## Prefix metrics name prefix = "" diff --git a/plugins/outputs/graphite/graphite.go b/plugins/outputs/graphite/graphite.go index 2a573e345..fb95aff83 100644 --- a/plugins/outputs/graphite/graphite.go +++ b/plugins/outputs/graphite/graphite.go @@ -2,7 +2,6 @@ package graphite import ( "errors" - "fmt" "log" "math/rand" "net" @@ -25,6 +24,8 @@ type Graphite struct { var sampleConfig = ` ## TCP endpoint for your graphite instance. + ## If multiple endpoints are configured, output will be load balanced. + ## Only one of the endpoints will be written to with each iteration. servers = ["localhost:2003"] ## Prefix metrics name prefix = "" @@ -96,9 +97,12 @@ func (g *Graphite) Write(metrics []telegraf.Metric) error { // Send data to a random server p := rand.Perm(len(g.conns)) for _, n := range p { - if _, e := fmt.Fprintf(g.conns[n], graphitePoints); e != nil { + if g.Timeout > 0 { + g.conns[n].SetWriteDeadline(time.Now().Add(time.Duration(g.Timeout) * time.Second)) + } + if _, e := g.conns[n].Write([]byte(graphitePoints)); e != nil { // Error - log.Println("ERROR: " + err.Error()) + log.Println("ERROR: " + e.Error()) // Let's try the next one } else { // Success diff --git a/plugins/outputs/influxdb/README.md b/plugins/outputs/influxdb/README.md index b55a2c4c9..864177a36 100644 --- a/plugins/outputs/influxdb/README.md +++ b/plugins/outputs/influxdb/README.md @@ -2,6 +2,42 @@ This plugin writes to [InfluxDB](https://www.influxdb.com) via HTTP or UDP. +### Configuration: + +```toml +# Configuration for influxdb server to send metrics to +[[outputs.influxdb]] + ## The full HTTP or UDP endpoint URL for your InfluxDB instance. + ## Multiple urls can be specified as part of the same cluster, + ## this means that only ONE of the urls will be written to each interval. + # urls = ["udp://localhost:8089"] # UDP endpoint example + urls = ["http://localhost:8086"] # required + ## The target database for metrics (telegraf will create it if not exists). + database = "telegraf" # required + + ## Retention policy to write to. Empty string writes to the default rp. + retention_policy = "" + ## Write consistency (clusters only), can be: "any", "one", "quorum", "all" + write_consistency = "any" + + ## Write timeout (for the InfluxDB client), formatted as a string. + ## If not provided, will default to 5s. 0s means no timeout (not recommended). + timeout = "5s" + # username = "telegraf" + # password = "metricsmetricsmetricsmetrics" + ## Set the user agent for HTTP POSTs (can be useful for log differentiation) + # user_agent = "telegraf" + ## Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) + # udp_payload = 512 + + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false +``` + ### Required parameters: * `urls`: List of strings, this is for InfluxDB clustering @@ -12,16 +48,14 @@ to write to. Each URL should start with either `http://` or `udp://` ### Optional parameters: +* `write_consistency`: Write consistency (clusters only), can be: "any", "one", "quorum", "all". * `retention_policy`: Retention policy to write to. -* `precision`: Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h". note: using "s" precision greatly improves InfluxDB compression. * `timeout`: Write timeout (for the InfluxDB client), formatted as a string. If not provided, will default to 5s. 0s means no timeout (not recommended). * `username`: Username for influxdb * `password`: Password for influxdb * `user_agent`: Set the user agent for HTTP POSTs (can be useful for log differentiation) * `udp_payload`: Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) - ## Optional SSL Config * `ssl_ca`: SSL CA * `ssl_cert`: SSL CERT * `ssl_key`: SSL key * `insecure_skip_verify`: Use SSL but skip chain & host verification (default: false) -* `write_consistency`: Write consistency for clusters only, can be: "any", "one", "quorom", "all" diff --git a/plugins/outputs/influxdb/influxdb.go b/plugins/outputs/influxdb/influxdb.go index f359b8fab..1d6110b34 100644 --- a/plugins/outputs/influxdb/influxdb.go +++ b/plugins/outputs/influxdb/influxdb.go @@ -24,7 +24,6 @@ type InfluxDB struct { Password string Database string UserAgent string - Precision string RetentionPolicy string WriteConsistency string Timeout internal.Duration @@ -39,6 +38,9 @@ type InfluxDB struct { // Use SSL but skip chain & host verification InsecureSkipVerify bool + // Precision is only here for legacy support. It will be ignored. + Precision string + conns []client.Client } @@ -50,13 +52,10 @@ var sampleConfig = ` urls = ["http://localhost:8086"] # required ## The target database for metrics (telegraf will create it if not exists). database = "telegraf" # required - ## Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h". - ## note: using "s" precision greatly improves InfluxDB compression. - precision = "s" - ## Retention policy to write to. - retention_policy = "default" - ## Write consistency (clusters only), can be: "any", "one", "quorom", "all" + ## Retention policy to write to. Empty string writes to the default rp. + retention_policy = "" + ## Write consistency (clusters only), can be: "any", "one", "quorum", "all" write_consistency = "any" ## Write timeout (for the InfluxDB client), formatted as a string. @@ -147,7 +146,7 @@ func (i *InfluxDB) Connect() error { func createDatabase(c client.Client, database string) error { // Create Database if it doesn't exist _, err := c.Query(client.Query{ - Command: fmt.Sprintf("CREATE DATABASE IF NOT EXISTS \"%s\"", database), + Command: fmt.Sprintf("CREATE DATABASE \"%s\"", database), }) return err } @@ -184,7 +183,6 @@ func (i *InfluxDB) Write(metrics []telegraf.Metric) error { } bp, err := client.NewBatchPoints(client.BatchPointsConfig{ Database: i.Database, - Precision: i.Precision, RetentionPolicy: i.RetentionPolicy, WriteConsistency: i.WriteConsistency, }) diff --git a/plugins/outputs/instrumental/instrumental.go b/plugins/outputs/instrumental/instrumental.go index 461ba9d9e..2fcc28cc0 100644 --- a/plugins/outputs/instrumental/instrumental.go +++ b/plugins/outputs/instrumental/instrumental.go @@ -28,8 +28,10 @@ type Instrumental struct { } const ( - DefaultHost = "collector.instrumentalapp.com" - AuthFormat = "hello version go/telegraf/1.0\nauthenticate %s\n" + DefaultHost = "collector.instrumentalapp.com" + HelloMessage = "hello version go/telegraf/1.1\n" + AuthFormat = "authenticate %s\n" + HandshakeFormat = HelloMessage + AuthFormat ) var ( @@ -52,6 +54,7 @@ var sampleConfig = ` func (i *Instrumental) Connect() error { connection, err := net.DialTimeout("tcp", i.Host+":8000", i.Timeout.Duration) + if err != nil { i.conn = nil return err @@ -151,6 +154,11 @@ func (i *Instrumental) Write(metrics []telegraf.Metric) error { return err } + // force the connection closed after sending data + // to deal with various disconnection scenarios and eschew holding + // open idle connections en masse + i.Close() + return nil } @@ -163,7 +171,7 @@ func (i *Instrumental) SampleConfig() string { } func (i *Instrumental) authenticate(conn net.Conn) error { - _, err := fmt.Fprintf(conn, AuthFormat, i.ApiToken) + _, err := fmt.Fprintf(conn, HandshakeFormat, i.ApiToken) if err != nil { return err } diff --git a/plugins/outputs/instrumental/instrumental_test.go b/plugins/outputs/instrumental/instrumental_test.go index ceb53bac6..9708a2590 100644 --- a/plugins/outputs/instrumental/instrumental_test.go +++ b/plugins/outputs/instrumental/instrumental_test.go @@ -24,7 +24,6 @@ func TestWrite(t *testing.T) { ApiToken: "abc123token", Prefix: "my.prefix", } - i.Connect() // Default to gauge m1, _ := telegraf.NewMetric( @@ -40,10 +39,8 @@ func TestWrite(t *testing.T) { time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), ) - // Simulate a connection close and reconnect. metrics := []telegraf.Metric{m1, m2} i.Write(metrics) - i.Close() // Counter and Histogram are increments m3, _ := telegraf.NewMetric( @@ -70,7 +67,6 @@ func TestWrite(t *testing.T) { i.Write(metrics) wg.Wait() - i.Close() } func TCPServer(t *testing.T, wg *sync.WaitGroup) { @@ -82,10 +78,9 @@ func TCPServer(t *testing.T, wg *sync.WaitGroup) { tp := textproto.NewReader(reader) hello, _ := tp.ReadLine() - assert.Equal(t, "hello version go/telegraf/1.0", hello) + assert.Equal(t, "hello version go/telegraf/1.1", hello) auth, _ := tp.ReadLine() assert.Equal(t, "authenticate abc123token", auth) - conn.Write([]byte("ok\nok\n")) data1, _ := tp.ReadLine() @@ -99,10 +94,9 @@ func TCPServer(t *testing.T, wg *sync.WaitGroup) { tp = textproto.NewReader(reader) hello, _ = tp.ReadLine() - assert.Equal(t, "hello version go/telegraf/1.0", hello) + assert.Equal(t, "hello version go/telegraf/1.1", hello) auth, _ = tp.ReadLine() assert.Equal(t, "authenticate abc123token", auth) - conn.Write([]byte("ok\nok\n")) data3, _ := tp.ReadLine() diff --git a/plugins/outputs/kafka/README.md b/plugins/outputs/kafka/README.md new file mode 100644 index 000000000..390407e14 --- /dev/null +++ b/plugins/outputs/kafka/README.md @@ -0,0 +1,67 @@ +# Kafka Producer Output Plugin + +This plugin writes to a [Kafka Broker](http://kafka.apache.org/07/quickstart.html) acting a Kafka Producer. + +``` +[[outputs.kafka]] + ## URLs of kafka brokers + brokers = ["localhost:9092"] + ## Kafka topic for producer messages + topic = "telegraf" + ## Telegraf tag to use as a routing key + ## ie, if this tag exists, it's value will be used as the routing key + routing_tag = "host" + + ## CompressionCodec represents the various compression codecs recognized by + ## Kafka in messages. + ## 0 : No compression + ## 1 : Gzip compression + ## 2 : Snappy compression + compression_codec = 0 + + ## RequiredAcks is used in Produce Requests to tell the broker how many + ## replica acknowledgements it must see before responding + ## 0 : the producer never waits for an acknowledgement from the broker. + ## This option provides the lowest latency but the weakest durability + ## guarantees (some data will be lost when a server fails). + ## 1 : the producer gets an acknowledgement after the leader replica has + ## received the data. This option provides better durability as the + ## client waits until the server acknowledges the request as successful + ## (only messages that were written to the now-dead leader but not yet + ## replicated will be lost). + ## -1: the producer gets an acknowledgement after all in-sync replicas have + ## received the data. This option provides the best durability, we + ## guarantee that no messages will be lost as long as at least one in + ## sync replica remains. + required_acks = -1 + + ## The total number of times to retry sending a message + max_retry = 3 + + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false + + data_format = "influx" +``` + +### Required parameters: + +* `brokers`: List of strings, this is for speaking to a cluster of `kafka` brokers. On each flush interval, Telegraf will randomly choose one of the urls to write to. Each URL should just include host and port e.g. -> `["{host}:{port}","{host2}:{port2}"]` +* `topic`: The `kafka` topic to publish to. + + +### Optional parameters: + +* `routing_tag`: if this tag exists, it's value will be used as the routing key +* `compression_codec`: What level of compression to use: `0` -> no compression, `1` -> gzip compression, `2` -> snappy compression +* `required_acks`: a setting for how may `acks` required from the `kafka` broker cluster. +* `max_retry`: Max number of times to retry failed write +* `ssl_ca`: SSL CA +* `ssl_cert`: SSL CERT +* `ssl_key`: SSL key +* `insecure_skip_verify`: Use SSL but skip chain & host verification (default: false) +* `data_format`: [About Telegraf data formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md) diff --git a/plugins/outputs/librato/librato.go b/plugins/outputs/librato/librato.go index 15d6adbb2..17d0d4c6a 100644 --- a/plugins/outputs/librato/librato.go +++ b/plugins/outputs/librato/librato.go @@ -7,6 +7,7 @@ import ( "io/ioutil" "log" "net/http" + "regexp" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" @@ -14,19 +15,22 @@ import ( "github.com/influxdata/telegraf/plugins/serializers/graphite" ) +// Librato structure for configuration and client type Librato struct { - ApiUser string - ApiToken string - Debug bool - NameFromTags bool - SourceTag string - Timeout internal.Duration - Template string + APIUser string + APIToken string + Debug bool + SourceTag string // Deprecated, keeping for backward-compatibility + Timeout internal.Duration + Template string - apiUrl string + APIUrl string client *http.Client } +// https://www.librato.com/docs/kb/faq/best_practices/naming_convention_metrics_sources.html#naming-limitations-for-sources-and-metrics +var reUnacceptedChar = regexp.MustCompile("[^.a-zA-Z0-9_-]") + var sampleConfig = ` ## Librator API Docs ## http://dev.librato.com/v1/metrics-authentication @@ -36,20 +40,21 @@ var sampleConfig = ` api_token = "my-secret-token" # required. ## Debug # debug = false - ## Tag Field to populate source attribute (optional) - ## This is typically the _hostname_ from which the metric was obtained. - source_tag = "host" ## Connection timeout. # timeout = "5s" - ## Output Name Template (same as graphite buckets) + ## Output source Template (same as graphite buckets) ## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#graphite - template = "host.tags.measurement.field" + ## This template is used in librato's source (not metric's name) + template = "host" + ` +// LMetrics is the default struct for Librato's API fromat type LMetrics struct { Gauges []*Gauge `json:"gauges"` } +// Gauge is the gauge format for Librato's API fromat type Gauge struct { Name string `json:"name"` Value float64 `json:"value"` @@ -57,17 +62,22 @@ type Gauge struct { MeasureTime int64 `json:"measure_time"` } -const librato_api = "https://metrics-api.librato.com/v1/metrics" +const libratoAPI = "https://metrics-api.librato.com/v1/metrics" -func NewLibrato(apiUrl string) *Librato { +// NewLibrato is the main constructor for librato output plugins +func NewLibrato(apiURL string) *Librato { return &Librato{ - apiUrl: apiUrl, + APIUrl: apiURL, + Template: "host", } } +// Connect is the default output plugin connection function who make sure it +// can connect to the endpoint func (l *Librato) Connect() error { - if l.ApiUser == "" || l.ApiToken == "" { - return fmt.Errorf("api_user and api_token are required fields for librato output") + if l.APIUser == "" || l.APIToken == "" { + return fmt.Errorf( + "api_user and api_token are required fields for librato output") } l.client = &http.Client{ Timeout: l.Timeout.Duration, @@ -76,18 +86,23 @@ func (l *Librato) Connect() error { } func (l *Librato) Write(metrics []telegraf.Metric) error { + if len(metrics) == 0 { return nil } - lmetrics := LMetrics{} + if l.Template == "" { + l.Template = "host" + } + if l.SourceTag != "" { + l.Template = l.SourceTag + } + tempGauges := []*Gauge{} - metricCounter := 0 for _, m := range metrics { if gauges, err := l.buildGauges(m); err == nil { for _, gauge := range gauges { tempGauges = append(tempGauges, gauge) - metricCounter++ if l.Debug { log.Printf("[DEBUG] Got a gauge: %v\n", gauge) } @@ -100,82 +115,115 @@ func (l *Librato) Write(metrics []telegraf.Metric) error { } } - lmetrics.Gauges = make([]*Gauge, metricCounter) - copy(lmetrics.Gauges, tempGauges[0:]) - metricsBytes, err := json.Marshal(lmetrics) - if err != nil { - return fmt.Errorf("unable to marshal Metrics, %s\n", err.Error()) - } else { + metricCounter := len(tempGauges) + // make sur we send a batch of maximum 300 + sizeBatch := 300 + for start := 0; start < metricCounter; start += sizeBatch { + lmetrics := LMetrics{} + end := start + sizeBatch + if end > metricCounter { + end = metricCounter + sizeBatch = end - start + } + lmetrics.Gauges = make([]*Gauge, sizeBatch) + copy(lmetrics.Gauges, tempGauges[start:end]) + metricsBytes, err := json.Marshal(lmetrics) + if err != nil { + return fmt.Errorf("unable to marshal Metrics, %s\n", err.Error()) + } + if l.Debug { log.Printf("[DEBUG] Librato request: %v\n", string(metricsBytes)) } - } - req, err := http.NewRequest("POST", l.apiUrl, bytes.NewBuffer(metricsBytes)) - if err != nil { - return fmt.Errorf("unable to create http.Request, %s\n", err.Error()) - } - req.Header.Add("Content-Type", "application/json") - req.SetBasicAuth(l.ApiUser, l.ApiToken) - resp, err := l.client.Do(req) - if err != nil { - if l.Debug { - log.Printf("[DEBUG] Error POSTing metrics: %v\n", err.Error()) + req, err := http.NewRequest( + "POST", + l.APIUrl, + bytes.NewBuffer(metricsBytes)) + if err != nil { + return fmt.Errorf( + "unable to create http.Request, %s\n", + err.Error()) } - return fmt.Errorf("error POSTing metrics, %s\n", err.Error()) - } else { - if l.Debug { + req.Header.Add("Content-Type", "application/json") + req.SetBasicAuth(l.APIUser, l.APIToken) + + resp, err := l.client.Do(req) + if err != nil { + if l.Debug { + log.Printf("[DEBUG] Error POSTing metrics: %v\n", err.Error()) + } + return fmt.Errorf("error POSTing metrics, %s\n", err.Error()) + } + defer resp.Body.Close() + + if resp.StatusCode != 200 || l.Debug { htmlData, err := ioutil.ReadAll(resp.Body) if err != nil { log.Printf("[DEBUG] Couldn't get response! (%v)\n", err) - } else { + } + if resp.StatusCode != 200 { + return fmt.Errorf( + "received bad status code, %d\n %s", + resp.StatusCode, + string(htmlData)) + } + if l.Debug { log.Printf("[DEBUG] Librato response: %v\n", string(htmlData)) } } } - defer resp.Body.Close() - - if resp.StatusCode != 200 { - return fmt.Errorf("received bad status code, %d\n", resp.StatusCode) - } - return nil } +// SampleConfig is function who return the default configuration for this +// output func (l *Librato) SampleConfig() string { return sampleConfig } +// Description is function who return the Description of this output func (l *Librato) Description() string { return "Configuration for Librato API to send metrics to." } func (l *Librato) buildGauges(m telegraf.Metric) ([]*Gauge, error) { + gauges := []*Gauge{} - serializer := graphite.GraphiteSerializer{Template: l.Template} - bucket := serializer.SerializeBucketName(m.Name(), m.Tags()) + if m.Time().Unix() == 0 { + return gauges, fmt.Errorf( + "Measure time must not be zero\n <%s> \n", + m.String()) + } + metricSource := graphite.InsertField( + graphite.SerializeBucketName("", m.Tags(), l.Template, ""), + "value") + if metricSource == "" { + return gauges, + fmt.Errorf("undeterminable Source type from Field, %s\n", + l.Template) + } for fieldName, value := range m.Fields() { + + metricName := m.Name() + if fieldName != "value" { + metricName = fmt.Sprintf("%s.%s", m.Name(), fieldName) + } + gauge := &Gauge{ - Name: graphite.InsertField(bucket, fieldName), + Source: reUnacceptedChar.ReplaceAllString(metricSource, "-"), + Name: reUnacceptedChar.ReplaceAllString(metricName, "-"), MeasureTime: m.Time().Unix(), } - if !gauge.verifyValue(value) { + if !verifyValue(value) { continue } if err := gauge.setValue(value); err != nil { - return gauges, fmt.Errorf("unable to extract value from Fields, %s\n", + return gauges, fmt.Errorf( + "unable to extract value from Fields, %s\n", err.Error()) } - if l.SourceTag != "" { - if source, ok := m.Tags()[l.SourceTag]; ok { - gauge.Source = source - } else { - return gauges, - fmt.Errorf("undeterminable Source type from Field, %s\n", - l.SourceTag) - } - } gauges = append(gauges, gauge) } if l.Debug { @@ -184,7 +232,7 @@ func (l *Librato) buildGauges(m telegraf.Metric) ([]*Gauge, error) { return gauges, nil } -func (g *Gauge) verifyValue(v interface{}) bool { +func verifyValue(v interface{}) bool { switch v.(type) { case string: return false @@ -210,12 +258,13 @@ func (g *Gauge) setValue(v interface{}) error { return nil } +//Close is used to close the connection to librato Output func (l *Librato) Close() error { return nil } func init() { outputs.Add("librato", func() telegraf.Output { - return NewLibrato(librato_api) + return NewLibrato(libratoAPI) }) } diff --git a/plugins/outputs/librato/librato_test.go b/plugins/outputs/librato/librato_test.go index e90339928..dd5755a8c 100644 --- a/plugins/outputs/librato/librato_test.go +++ b/plugins/outputs/librato/librato_test.go @@ -1,7 +1,6 @@ package librato import ( - "encoding/json" "fmt" "net/http" "net/http/httptest" @@ -10,141 +9,137 @@ import ( "time" "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/serializers/graphite" - "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/require" ) var ( - fakeUrl = "http://test.librato.com" + fakeURL = "http://test.librato.com" fakeUser = "telegraf@influxdb.com" fakeToken = "123456" ) func fakeLibrato() *Librato { - l := NewLibrato(fakeUrl) - l.ApiUser = fakeUser - l.ApiToken = fakeToken + l := NewLibrato(fakeURL) + l.APIUser = fakeUser + l.APIToken = fakeToken return l } -func BuildTags(t *testing.T) { - testMetric := testutil.TestMetric(0.0, "test1") - graphiteSerializer := graphite.GraphiteSerializer{} - tags, err := graphiteSerializer.Serialize(testMetric) - fmt.Printf("Tags: %v", tags) - require.NoError(t, err) -} - func TestUriOverride(t *testing.T) { - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) - })) + ts := httptest.NewServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) defer ts.Close() l := NewLibrato(ts.URL) - l.ApiUser = "telegraf@influxdb.com" - l.ApiToken = "123456" + l.APIUser = "telegraf@influxdb.com" + l.APIToken = "123456" err := l.Connect() require.NoError(t, err) - err = l.Write(testutil.MockMetrics()) + err = l.Write([]telegraf.Metric{newHostMetric(int32(0), "name", "host")}) require.NoError(t, err) } func TestBadStatusCode(t *testing.T) { - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusServiceUnavailable) - json.NewEncoder(w).Encode(`{ - "errors": { - "system": [ - "The API is currently down for maintenance. It'll be back shortly." - ] - } - }`) - })) + ts := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + })) defer ts.Close() l := NewLibrato(ts.URL) - l.ApiUser = "telegraf@influxdb.com" - l.ApiToken = "123456" + l.APIUser = "telegraf@influxdb.com" + l.APIToken = "123456" err := l.Connect() require.NoError(t, err) - err = l.Write(testutil.MockMetrics()) + err = l.Write([]telegraf.Metric{newHostMetric(int32(0), "name", "host")}) if err == nil { t.Errorf("error expected but none returned") } else { - require.EqualError(t, fmt.Errorf("received bad status code, 503\n"), err.Error()) + require.EqualError( + t, + fmt.Errorf("received bad status code, 503\n "), err.Error()) } } func TestBuildGauge(t *testing.T) { + + mtime := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix() var gaugeTests = []struct { ptIn telegraf.Metric outGauge *Gauge err error }{ { - testutil.TestMetric(0.0, "test1"), + newHostMetric(0.0, "test1", "host1"), &Gauge{ - Name: "value1.test1", - MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), + Name: "test1", + MeasureTime: mtime, Value: 0.0, + Source: "host1", }, nil, }, { - testutil.TestMetric(1.0, "test2"), + newHostMetric(1.0, "test2", "host2"), &Gauge{ - Name: "value1.test2", - MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), + Name: "test2", + MeasureTime: mtime, Value: 1.0, + Source: "host2", }, nil, }, { - testutil.TestMetric(10, "test3"), + newHostMetric(10, "test3", "host3"), &Gauge{ - Name: "value1.test3", - MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), + Name: "test3", + MeasureTime: mtime, Value: 10.0, + Source: "host3", }, nil, }, { - testutil.TestMetric(int32(112345), "test4"), + newHostMetric(int32(112345), "test4", "host4"), &Gauge{ - Name: "value1.test4", - MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), + Name: "test4", + MeasureTime: mtime, Value: 112345.0, + Source: "host4", }, nil, }, { - testutil.TestMetric(int64(112345), "test5"), + newHostMetric(int64(112345), "test5", "host5"), &Gauge{ - Name: "value1.test5", - MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), + Name: "test5", + MeasureTime: mtime, Value: 112345.0, + Source: "host5", }, nil, }, { - testutil.TestMetric(float32(11234.5), "test6"), + newHostMetric(float32(11234.5), "test6", "host6"), &Gauge{ - Name: "value1.test6", - MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), + Name: "test6", + MeasureTime: mtime, Value: 11234.5, + Source: "host6", }, nil, }, { - testutil.TestMetric("11234.5", "test7"), + newHostMetric("11234.5", "test7", "host7"), nil, nil, }, } - l := NewLibrato(fakeUrl) + l := NewLibrato(fakeURL) for _, gt := range gaugeTests { gauges, err := l.buildGauges(gt.ptIn) if err != nil && gt.err == nil { @@ -167,61 +162,121 @@ func TestBuildGauge(t *testing.T) { } } +func newHostMetric(value interface{}, name, host string) (metric telegraf.Metric) { + metric, _ = telegraf.NewMetric( + name, + map[string]string{"host": host}, + map[string]interface{}{"value": value}, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + ) + return +} + func TestBuildGaugeWithSource(t *testing.T) { + mtime := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC) pt1, _ := telegraf.NewMetric( "test1", map[string]string{"hostname": "192.168.0.1", "tag1": "value1"}, map[string]interface{}{"value": 0.0}, - time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), + mtime, ) pt2, _ := telegraf.NewMetric( "test2", map[string]string{"hostnam": "192.168.0.1", "tag1": "value1"}, map[string]interface{}{"value": 1.0}, - time.Date(2010, time.December, 10, 23, 0, 0, 0, time.UTC), + mtime, + ) + pt3, _ := telegraf.NewMetric( + "test3", + map[string]string{ + "hostname": "192.168.0.1", + "tag2": "value2", + "tag1": "value1"}, + map[string]interface{}{"value": 1.0}, + mtime, + ) + pt4, _ := telegraf.NewMetric( + "test4", + map[string]string{ + "hostname": "192.168.0.1", + "tag2": "value2", + "tag1": "value1"}, + map[string]interface{}{"value": 1.0}, + mtime, ) var gaugeTests = []struct { ptIn telegraf.Metric + template string outGauge *Gauge err error }{ { pt1, + "hostname", &Gauge{ - Name: "192_168_0_1.value1.test1", - MeasureTime: time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), + Name: "test1", + MeasureTime: mtime.Unix(), Value: 0.0, - Source: "192.168.0.1", + Source: "192_168_0_1", }, nil, }, { pt2, + "hostname", &Gauge{ - Name: "192_168_0_1.value1.test1", - MeasureTime: time.Date(2010, time.December, 10, 23, 0, 0, 0, time.UTC).Unix(), + Name: "test2", + MeasureTime: mtime.Unix(), Value: 1.0, }, fmt.Errorf("undeterminable Source type from Field, hostname"), }, + { + pt3, + "tags", + &Gauge{ + Name: "test3", + MeasureTime: mtime.Unix(), + Value: 1.0, + Source: "192_168_0_1.value1.value2", + }, + nil, + }, + { + pt4, + "hostname.tag2", + &Gauge{ + Name: "test4", + MeasureTime: mtime.Unix(), + Value: 1.0, + Source: "192_168_0_1.value2", + }, + nil, + }, } - l := NewLibrato(fakeUrl) - l.SourceTag = "hostname" + l := NewLibrato(fakeURL) for _, gt := range gaugeTests { + l.Template = gt.template gauges, err := l.buildGauges(gt.ptIn) if err != nil && gt.err == nil { t.Errorf("%s: unexpected error, %+v\n", gt.ptIn.Name(), err) } if gt.err != nil && err == nil { - t.Errorf("%s: expected an error (%s) but none returned", gt.ptIn.Name(), gt.err.Error()) + t.Errorf( + "%s: expected an error (%s) but none returned", + gt.ptIn.Name(), + gt.err.Error()) } if len(gauges) == 0 { continue } if gt.err == nil && !reflect.DeepEqual(gauges[0], gt.outGauge) { - t.Errorf("%s: \nexpected %+v\ngot %+v\n", gt.ptIn.Name(), gt.outGauge, gauges[0]) + t.Errorf( + "%s: \nexpected %+v\ngot %+v\n", + gt.ptIn.Name(), + gt.outGauge, gauges[0]) } } } diff --git a/plugins/outputs/opentsdb/opentsdb_test.go b/plugins/outputs/opentsdb/opentsdb_test.go index 30323725b..6c141d463 100644 --- a/plugins/outputs/opentsdb/opentsdb_test.go +++ b/plugins/outputs/opentsdb/opentsdb_test.go @@ -3,9 +3,8 @@ package opentsdb import ( "reflect" "testing" - - "github.com/influxdata/telegraf/testutil" - "github.com/stretchr/testify/require" + // "github.com/influxdata/telegraf/testutil" + // "github.com/stretchr/testify/require" ) func TestBuildTagsTelnet(t *testing.T) { @@ -42,40 +41,40 @@ func TestBuildTagsTelnet(t *testing.T) { } } -func TestWrite(t *testing.T) { - if testing.Short() { - t.Skip("Skipping integration test in short mode") - } +// func TestWrite(t *testing.T) { +// if testing.Short() { +// t.Skip("Skipping integration test in short mode") +// } - o := &OpenTSDB{ - Host: testutil.GetLocalHost(), - Port: 4242, - Prefix: "prefix.test.", - } +// o := &OpenTSDB{ +// Host: testutil.GetLocalHost(), +// Port: 4242, +// Prefix: "prefix.test.", +// } - // Verify that we can connect to the OpenTSDB instance - err := o.Connect() - require.NoError(t, err) +// // Verify that we can connect to the OpenTSDB instance +// err := o.Connect() +// require.NoError(t, err) - // Verify that we can successfully write data to OpenTSDB - err = o.Write(testutil.MockMetrics()) - require.NoError(t, err) +// // Verify that we can successfully write data to OpenTSDB +// err = o.Write(testutil.MockMetrics()) +// require.NoError(t, err) - // Verify postive and negative test cases of writing data - metrics := testutil.MockMetrics() - metrics = append(metrics, testutil.TestMetric(float64(1.0), - "justametric.float")) - metrics = append(metrics, testutil.TestMetric(int64(123456789), - "justametric.int")) - metrics = append(metrics, testutil.TestMetric(uint64(123456789012345), - "justametric.uint")) - metrics = append(metrics, testutil.TestMetric("Lorem Ipsum", - "justametric.string")) - metrics = append(metrics, testutil.TestMetric(float64(42.0), - "justametric.anotherfloat")) - metrics = append(metrics, testutil.TestMetric(float64(42.0), - "metric w/ specialchars")) +// // Verify postive and negative test cases of writing data +// metrics := testutil.MockMetrics() +// metrics = append(metrics, testutil.TestMetric(float64(1.0), +// "justametric.float")) +// metrics = append(metrics, testutil.TestMetric(int64(123456789), +// "justametric.int")) +// metrics = append(metrics, testutil.TestMetric(uint64(123456789012345), +// "justametric.uint")) +// metrics = append(metrics, testutil.TestMetric("Lorem Ipsum", +// "justametric.string")) +// metrics = append(metrics, testutil.TestMetric(float64(42.0), +// "justametric.anotherfloat")) +// metrics = append(metrics, testutil.TestMetric(float64(42.0), +// "metric w/ specialchars")) - err = o.Write(metrics) - require.NoError(t, err) -} +// err = o.Write(metrics) +// require.NoError(t, err) +// } diff --git a/plugins/outputs/prometheus_client/prometheus_client.go b/plugins/outputs/prometheus_client/prometheus_client.go index d5e3f1ced..ce6dc1f57 100644 --- a/plugins/outputs/prometheus_client/prometheus_client.go +++ b/plugins/outputs/prometheus_client/prometheus_client.go @@ -5,28 +5,21 @@ import ( "log" "net/http" "regexp" - "strings" + "sync" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/outputs" "github.com/prometheus/client_golang/prometheus" ) -var ( - sanitizedChars = strings.NewReplacer("/", "_", "@", "_", " ", "_", "-", "_", ".", "_") - - // Prometheus metric names must match this regex - // see https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels - metricName = regexp.MustCompile("^[a-zA-Z_:][a-zA-Z0-9_:]*$") - - // Prometheus labels must match this regex - // see https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels - labelName = regexp.MustCompile("^[a-zA-Z_][a-zA-Z0-9_]*$") -) +var invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) type PrometheusClient struct { - Listen string - metrics map[string]*prometheus.UntypedVec + Listen string + + metrics map[string]prometheus.Metric + + sync.Mutex } var sampleConfig = ` @@ -35,6 +28,15 @@ var sampleConfig = ` ` func (p *PrometheusClient) Start() error { + prometheus.MustRegister(p) + defer func() { + if r := recover(); r != nil { + // recovering from panic here because there is no way to stop a + // running http go server except by a kill signal. Since the server + // does not stop on SIGHUP, Start() will panic when the process + // is reloaded. + } + }() if p.Listen == "" { p.Listen = "localhost:9126" } @@ -44,7 +46,6 @@ func (p *PrometheusClient) Start() error { Addr: p.Listen, } - p.metrics = make(map[string]*prometheus.UntypedVec) go server.ListenAndServe() return nil } @@ -72,25 +73,42 @@ func (p *PrometheusClient) Description() string { return "Configuration for the Prometheus client to spawn" } +// Implements prometheus.Collector +func (p *PrometheusClient) Describe(ch chan<- *prometheus.Desc) { + prometheus.NewGauge(prometheus.GaugeOpts{Name: "Dummy", Help: "Dummy"}).Describe(ch) +} + +// Implements prometheus.Collector +func (p *PrometheusClient) Collect(ch chan<- prometheus.Metric) { + p.Lock() + defer p.Unlock() + + for _, m := range p.metrics { + ch <- m + } +} + func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { + p.Lock() + defer p.Unlock() + + p.metrics = make(map[string]prometheus.Metric) + if len(metrics) == 0 { return nil } for _, point := range metrics { key := point.Name() - key = sanitizedChars.Replace(key) + key = invalidNameCharRE.ReplaceAllString(key, "_") var labels []string l := prometheus.Labels{} for k, v := range point.Tags() { - k = sanitizedChars.Replace(k) + k = invalidNameCharRE.ReplaceAllString(k, "_") if len(k) == 0 { continue } - if !labelName.MatchString(k) { - continue - } labels = append(labels, k) l[k] = v } @@ -105,7 +123,7 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { } // sanitize the measurement name - n = sanitizedChars.Replace(n) + n = invalidNameCharRE.ReplaceAllString(n, "_") var mname string if n == "value" { mname = key @@ -113,48 +131,23 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { mname = fmt.Sprintf("%s_%s", key, n) } - // verify that it is a valid measurement name - if !metricName.MatchString(mname) { - continue - } - - // Create a new metric if it hasn't been created yet. - if _, ok := p.metrics[mname]; !ok { - p.metrics[mname] = prometheus.NewUntypedVec( - prometheus.UntypedOpts{ - Name: mname, - Help: "Telegraf collected metric", - }, - labels, - ) - if err := prometheus.Register(p.metrics[mname]); err != nil { - log.Printf("prometheus_client: Metric failed to register with prometheus, %s", err) - continue - } - } - + desc := prometheus.NewDesc(mname, "Telegraf collected metric", nil, l) + var metric prometheus.Metric + var err error switch val := val.(type) { case int64: - m, err := p.metrics[mname].GetMetricWith(l) - if err != nil { - log.Printf("ERROR Getting metric in Prometheus output, "+ - "key: %s, labels: %v,\nerr: %s\n", - mname, l, err.Error()) - continue - } - m.Set(float64(val)) + metric, err = prometheus.NewConstMetric(desc, prometheus.UntypedValue, float64(val)) case float64: - m, err := p.metrics[mname].GetMetricWith(l) - if err != nil { - log.Printf("ERROR Getting metric in Prometheus output, "+ - "key: %s, labels: %v,\nerr: %s\n", - mname, l, err.Error()) - continue - } - m.Set(val) + metric, err = prometheus.NewConstMetric(desc, prometheus.UntypedValue, val) default: continue } + if err != nil { + log.Printf("ERROR creating prometheus metric, "+ + "key: %s, labels: %v,\nerr: %s\n", + mname, l, err.Error()) + } + p.metrics[desc.String()] = metric } } return nil diff --git a/plugins/outputs/prometheus_client/prometheus_client_test.go b/plugins/outputs/prometheus_client/prometheus_client_test.go index 15ed7b7e4..14aee13d9 100644 --- a/plugins/outputs/prometheus_client/prometheus_client_test.go +++ b/plugins/outputs/prometheus_client/prometheus_client_test.go @@ -17,6 +17,7 @@ func TestPrometheusWritePointEmptyTag(t *testing.T) { if testing.Short() { t.Skip("Skipping integration test in short mode") } + now := time.Now() pTesting = &PrometheusClient{Listen: "localhost:9127"} err := pTesting.Start() time.Sleep(time.Millisecond * 200) @@ -30,11 +31,13 @@ func TestPrometheusWritePointEmptyTag(t *testing.T) { pt1, _ := telegraf.NewMetric( "test_point_1", tags, - map[string]interface{}{"value": 0.0}) + map[string]interface{}{"value": 0.0}, + now) pt2, _ := telegraf.NewMetric( "test_point_2", tags, - map[string]interface{}{"value": 1.0}) + map[string]interface{}{"value": 1.0}, + now) var metrics = []telegraf.Metric{ pt1, pt2, @@ -63,11 +66,13 @@ func TestPrometheusWritePointEmptyTag(t *testing.T) { pt3, _ := telegraf.NewMetric( "test_point_3", tags, - map[string]interface{}{"value": 0.0}) + map[string]interface{}{"value": 0.0}, + now) pt4, _ := telegraf.NewMetric( "test_point_4", tags, - map[string]interface{}{"value": 1.0}) + map[string]interface{}{"value": 1.0}, + now) metrics = []telegraf.Metric{ pt3, pt4, diff --git a/plugins/serializers/graphite/graphite.go b/plugins/serializers/graphite/graphite.go index bf2e75579..6a6fd9cac 100644 --- a/plugins/serializers/graphite/graphite.go +++ b/plugins/serializers/graphite/graphite.go @@ -10,30 +10,34 @@ import ( const DEFAULT_TEMPLATE = "host.tags.measurement.field" -var fieldDeleter = strings.NewReplacer(".FIELDNAME", "", "FIELDNAME.", "") +var ( + fieldDeleter = strings.NewReplacer(".FIELDNAME", "", "FIELDNAME.", "") + sanitizedChars = strings.NewReplacer("/", "-", "@", "-", "*", "-", " ", "_", "..", ".", `\`, "") +) type GraphiteSerializer struct { Prefix string Template string } -var sanitizedChars = strings.NewReplacer("/", "-", "@", "-", "*", "-", " ", "_", "..", ".") - func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]string, error) { out := []string{} // Convert UnixNano to Unix timestamps timestamp := metric.UnixNano() / 1000000000 - bucket := s.SerializeBucketName(metric.Name(), metric.Tags()) + bucket := SerializeBucketName(metric.Name(), metric.Tags(), s.Template, s.Prefix) + if bucket == "" { + return out, nil + } for fieldName, value := range metric.Fields() { // Convert value to string valueS := fmt.Sprintf("%#v", value) point := fmt.Sprintf("%s %s %d", // insert "field" section of template - InsertField(bucket, fieldName), - valueS, + sanitizedChars.Replace(InsertField(bucket, fieldName)), + sanitizedChars.Replace(valueS), timestamp) out = append(out, point) } @@ -48,12 +52,14 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]string, error) // FIELDNAME. It is up to the user to replace this. This is so that // SerializeBucketName can be called just once per measurement, rather than // once per field. See GraphiteSerializer.InsertField() function. -func (s *GraphiteSerializer) SerializeBucketName( +func SerializeBucketName( measurement string, tags map[string]string, + template string, + prefix string, ) string { - if s.Template == "" { - s.Template = DEFAULT_TEMPLATE + if template == "" { + template = DEFAULT_TEMPLATE } tagsCopy := make(map[string]string) for k, v := range tags { @@ -61,7 +67,7 @@ func (s *GraphiteSerializer) SerializeBucketName( } var out []string - templateParts := strings.Split(s.Template, ".") + templateParts := strings.Split(template, ".") for _, templatePart := range templateParts { switch templatePart { case "measurement": @@ -89,10 +95,14 @@ func (s *GraphiteSerializer) SerializeBucketName( } } - if s.Prefix == "" { - return sanitizedChars.Replace(strings.Join(out, ".")) + if len(out) == 0 { + return "" } - return sanitizedChars.Replace(s.Prefix + "." + strings.Join(out, ".")) + + if prefix == "" { + return strings.Join(out, ".") + } + return prefix + "." + strings.Join(out, ".") } // InsertField takes the bucket string from SerializeBucketName and replaces the diff --git a/plugins/serializers/graphite/graphite_test.go b/plugins/serializers/graphite/graphite_test.go index 64c65d16b..57196b861 100644 --- a/plugins/serializers/graphite/graphite_test.go +++ b/plugins/serializers/graphite/graphite_test.go @@ -160,6 +160,58 @@ func TestSerializeValueField2(t *testing.T) { assert.Equal(t, expS, mS) } +// test that fields with spaces get fixed. +func TestSerializeFieldWithSpaces(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "host": "localhost", + "cpu": "cpu0", + "datacenter": "us-west-2", + } + fields := map[string]interface{}{ + `field\ with\ spaces`: float64(91.5), + } + m, err := telegraf.NewMetric("cpu", tags, fields, now) + assert.NoError(t, err) + + s := GraphiteSerializer{ + Template: "host.tags.measurement.field", + } + mS, err := s.Serialize(m) + assert.NoError(t, err) + + expS := []string{ + fmt.Sprintf("localhost.cpu0.us-west-2.cpu.field_with_spaces 91.5 %d", now.Unix()), + } + assert.Equal(t, expS, mS) +} + +// test that tags with spaces get fixed. +func TestSerializeTagWithSpaces(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "host": "localhost", + "cpu": `cpu\ 0`, + "datacenter": "us-west-2", + } + fields := map[string]interface{}{ + `field_with_spaces`: float64(91.5), + } + m, err := telegraf.NewMetric("cpu", tags, fields, now) + assert.NoError(t, err) + + s := GraphiteSerializer{ + Template: "host.tags.measurement.field", + } + mS, err := s.Serialize(m) + assert.NoError(t, err) + + expS := []string{ + fmt.Sprintf("localhost.cpu_0.us-west-2.cpu.field_with_spaces 91.5 %d", now.Unix()), + } + assert.Equal(t, expS, mS) +} + // test that a field named "value" gets ignored at beginning of template. func TestSerializeValueField3(t *testing.T) { now := time.Now() @@ -186,6 +238,32 @@ func TestSerializeValueField3(t *testing.T) { assert.Equal(t, expS, mS) } +// test that a field named "value" gets ignored at beginning of template. +func TestSerializeValueField5(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "host": "localhost", + "cpu": "cpu0", + "datacenter": "us-west-2", + } + fields := map[string]interface{}{ + "value": float64(91.5), + } + m, err := telegraf.NewMetric("cpu", tags, fields, now) + assert.NoError(t, err) + + s := GraphiteSerializer{ + Template: template5, + } + mS, err := s.Serialize(m) + assert.NoError(t, err) + + expS := []string{ + fmt.Sprintf("localhost.us-west-2.cpu0.cpu 91.5 %d", now.Unix()), + } + assert.Equal(t, expS, mS) +} + func TestSerializeMetricPrefix(t *testing.T) { now := time.Now() tags := map[string]string{ @@ -225,8 +303,7 @@ func TestSerializeBucketNameNoHost(t *testing.T) { m, err := telegraf.NewMetric("cpu", tags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), "", "") expS := "cpu0.us-west-2.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -240,8 +317,7 @@ func TestSerializeBucketNameHost(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), "", "") expS := "localhost.cpu0.us-west-2.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -255,8 +331,7 @@ func TestSerializeBucketNamePrefix(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Prefix: "prefix"} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), "", "prefix") expS := "prefix.localhost.cpu0.us-west-2.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -270,8 +345,7 @@ func TestTemplate1(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template1} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template1, "") expS := "cpu0.us-west-2.localhost.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -285,8 +359,7 @@ func TestTemplate2(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template2} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template2, "") expS := "localhost.cpu.FIELDNAME" assert.Equal(t, expS, mS) @@ -300,8 +373,7 @@ func TestTemplate3(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template3} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template3, "") expS := "localhost.cpu0.us-west-2.FIELDNAME" assert.Equal(t, expS, mS) @@ -315,28 +387,12 @@ func TestTemplate4(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template4} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template4, "") expS := "localhost.cpu0.us-west-2.cpu" assert.Equal(t, expS, mS) } -func TestTemplate5(t *testing.T) { - now := time.Now() - fields := map[string]interface{}{ - "usage_idle": float64(91.5), - } - m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) - assert.NoError(t, err) - - s := GraphiteSerializer{Template: template5} - mS := s.SerializeBucketName(m.Name(), m.Tags()) - - expS := "localhost.us-west-2.cpu0.cpu.FIELDNAME" - assert.Equal(t, expS, mS) -} - func TestTemplate6(t *testing.T) { now := time.Now() fields := map[string]interface{}{ @@ -345,8 +401,7 @@ func TestTemplate6(t *testing.T) { m, err := telegraf.NewMetric("cpu", defaultTags, fields, now) assert.NoError(t, err) - s := GraphiteSerializer{Template: template6} - mS := s.SerializeBucketName(m.Name(), m.Tags()) + mS := SerializeBucketName(m.Name(), m.Tags(), template6, "") expS := "localhost.cpu0.us-west-2.cpu.FIELDNAME" assert.Equal(t, expS, mS) diff --git a/scripts/build.py b/scripts/build.py index 426aa87bb..77befd599 100755 --- a/scripts/build.py +++ b/scripts/build.py @@ -83,29 +83,17 @@ targets = { } supported_builds = { - "darwin": [ "amd64" ], "windows": [ "amd64" ], "linux": [ "amd64", "i386", "armhf", "armel", "arm64", "static_amd64" ], "freebsd": [ "amd64" ] } supported_packages = { - "darwin": [ "tar" ], "linux": [ "deb", "rpm", "tar" ], "windows": [ "zip" ], "freebsd": [ "tar" ] } -supported_tags = { - # "linux": { - # "amd64": ["sensors"] - # } -} - -prereq_cmds = { - # "linux": "sudo apt-get install lm-sensors libsensors4-dev" -} - ################ #### Telegraf Functions ################ diff --git a/scripts/circle-test.sh b/scripts/circle-test.sh index 2333b5b73..93bafe320 100755 --- a/scripts/circle-test.sh +++ b/scripts/circle-test.sh @@ -69,6 +69,8 @@ exit_if_fail telegraf -config $tmpdir/config.toml \ -test -input-filter cpu:mem cat $GOPATH/bin/telegraf | gzip > $CIRCLE_ARTIFACTS/telegraf.gz +go build -o telegraf-race -race -ldflags "-X main.version=${VERSION}-RACE" cmd/telegraf/telegraf.go +cat telegraf-race | gzip > $CIRCLE_ARTIFACTS/telegraf-race.gz eval "git describe --exact-match HEAD" if [ $? -eq 0 ]; then diff --git a/scripts/post-install.sh b/scripts/post-install.sh index fb0b441e8..95045be1f 100644 --- a/scripts/post-install.sh +++ b/scripts/post-install.sh @@ -37,6 +37,10 @@ chmod 755 $LOG_DIR if [[ -L /etc/init.d/telegraf ]]; then rm -f /etc/init.d/telegraf fi +# Remove legacy symlink, if it exists +if [[ -L /etc/systemd/system/telegraf.service ]]; then + rm -f /etc/systemd/system/telegraf.service +fi # Add defaults file, if it doesn't exist if [[ ! -f /etc/default/telegraf ]]; then diff --git a/scripts/post-remove.sh b/scripts/post-remove.sh index 96b178f4d..0f262d225 100644 --- a/scripts/post-remove.sh +++ b/scripts/post-remove.sh @@ -15,32 +15,28 @@ function disable_chkconfig { rm -f /etc/init.d/telegraf } -if [[ -f /etc/redhat-release ]]; then - # RHEL-variant logic - if [[ "$1" = "0" ]]; then - # InfluxDB is no longer installed, remove from init system - rm -f /etc/default/telegraf - - which systemctl &>/dev/null - if [[ $? -eq 0 ]]; then - disable_systemd - else - # Assuming sysv - disable_chkconfig - fi +if [[ "$1" == "0" ]]; then + # RHEL and any distribution that follow RHEL, Amazon Linux covered + # telegraf is no longer installed, remove from init system + rm -f /etc/default/telegraf + + which systemctl &>/dev/null + if [[ $? -eq 0 ]]; then + disable_systemd + else + # Assuming sysv + disable_chkconfig fi -elif [[ -f /etc/debian_version ]]; then +elif [ "$1" == "remove" -o "$1" == "purge" ]; then # Debian/Ubuntu logic - if [[ "$1" != "upgrade" ]]; then - # Remove/purge - rm -f /etc/default/telegraf - - which systemctl &>/dev/null - if [[ $? -eq 0 ]]; then - disable_systemd - else - # Assuming sysv - disable_update_rcd - fi + # Remove/purge + rm -f /etc/default/telegraf + + which systemctl &>/dev/null + if [[ $? -eq 0 ]]; then + disable_systemd + else + # Assuming sysv + disable_update_rcd fi fi diff --git a/scripts/telegraf.service b/scripts/telegraf.service index a7824c9a7..81c9b5408 100644 --- a/scripts/telegraf.service +++ b/scripts/telegraf.service @@ -15,4 +15,3 @@ KillMode=control-group [Install] WantedBy=multi-user.target -Alias=telegraf.service diff --git a/testutil/accumulator.go b/testutil/accumulator.go index 9b6fb2373..62b765a3c 100644 --- a/testutil/accumulator.go +++ b/testutil/accumulator.go @@ -5,6 +5,7 @@ import ( "fmt" "reflect" "sync" + "sync/atomic" "testing" "time" @@ -27,8 +28,11 @@ func (p *Metric) String() string { type Accumulator struct { sync.Mutex - Metrics []*Metric - debug bool + Metrics []*Metric + nMetrics uint64 + Discard bool + Errors []error + debug bool } // Add adds a measurement point to the accumulator @@ -42,6 +46,10 @@ func (a *Accumulator) Add( a.AddFields(measurement, fields, tags, t...) } +func (a *Accumulator) NMetrics() uint64 { + return atomic.LoadUint64(&a.nMetrics) +} + // AddFields adds a measurement point with a specified timestamp. func (a *Accumulator) AddFields( measurement string, @@ -49,6 +57,10 @@ func (a *Accumulator) AddFields( tags map[string]string, timestamp ...time.Time, ) { + atomic.AddUint64(&a.nMetrics, 1) + if a.Discard { + return + } a.Lock() defer a.Unlock() if tags == nil { @@ -84,6 +96,24 @@ func (a *Accumulator) AddFields( a.Metrics = append(a.Metrics, p) } +// AddError appends the given error to Accumulator.Errors. +func (a *Accumulator) AddError(err error) { + if err == nil { + return + } + a.Lock() + a.Errors = append(a.Errors, err) + a.Unlock() +} + +func (a *Accumulator) SetPrecision(precision, interval time.Duration) { + return +} + +func (a *Accumulator) DisablePrecision() { + return +} + func (a *Accumulator) Debug() bool { // stub for implementing Accumulator interface. return a.debug