diff --git a/CHANGELOG.md b/CHANGELOG.md index dc83441b2..09a00f069 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,104 @@ -## v0.10.5 [unreleased] +## v0.12.1 [unreleased] + +### Features +- [#976](https://github.com/influxdata/telegraf/pull/976): Reduce allocations in the UDP and statsd inputs. +- [#979](https://github.com/influxdata/telegraf/pull/979): Reduce allocations in the TCP listener. +- [#935](https://github.com/influxdata/telegraf/pull/935): AWS Cloudwatch input plugin. Thanks @joshhardy & @ljosa! + +### Bugfixes +- [#968](https://github.com/influxdata/telegraf/issues/968): Processes plugin gets unknown state when spaces are in (command name) +- [#969](https://github.com/influxdata/telegraf/pull/969): ipmi_sensors: allow : in password. Thanks @awaw! +- [#972](https://github.com/influxdata/telegraf/pull/972): dovecot: remove extra newline in dovecot command. Thanks @mrannanj! +- [#645](https://github.com/influxdata/telegraf/issues/645): docker plugin i/o error on closed pipe. Thanks @tripledes! + +## v0.12.0 [2016-04-05] + +### Features +- [#951](https://github.com/influxdata/telegraf/pull/951): Parse environment variables in the config file. +- [#948](https://github.com/influxdata/telegraf/pull/948): Cleanup config file and make default package version include all plugins (but commented). +- [#927](https://github.com/influxdata/telegraf/pull/927): Adds parsing of tags to the statsd input when using DataDog's dogstatsd extension +- [#863](https://github.com/influxdata/telegraf/pull/863): AMQP output: allow external auth. Thanks @ekini! +- [#707](https://github.com/influxdata/telegraf/pull/707): Improved prometheus plugin. Thanks @titilambert! +- [#878](https://github.com/influxdata/telegraf/pull/878): Added json serializer. Thanks @ch3lo! +- [#880](https://github.com/influxdata/telegraf/pull/880): Add the ability to specify the bearer token to the prometheus plugin. Thanks @jchauncey! +- [#882](https://github.com/influxdata/telegraf/pull/882): Fixed SQL Server Plugin issues +- [#849](https://github.com/influxdata/telegraf/issues/849): Adding ability to parse single values as an input data type. +- [#844](https://github.com/influxdata/telegraf/pull/844): postgres_extensible plugin added. Thanks @menardorama! +- [#866](https://github.com/influxdata/telegraf/pull/866): couchbase input plugin. Thanks @ljosa! +- [#789](https://github.com/influxdata/telegraf/pull/789): Support multiple field specification and `field*` in graphite templates. Thanks @chrusty! +- [#762](https://github.com/influxdata/telegraf/pull/762): Nagios parser for the exec plugin. Thanks @titilambert! +- [#848](https://github.com/influxdata/telegraf/issues/848): Provide option to omit host tag from telegraf agent. +- [#928](https://github.com/influxdata/telegraf/pull/928): Deprecating the statsd "convert_names" options, expose separator config. +- [#919](https://github.com/influxdata/telegraf/pull/919): ipmi_sensor input plugin. Thanks @ebookbug! +- [#945](https://github.com/influxdata/telegraf/pull/945): KAFKA output: codec, acks, and retry configuration. Thanks @framiere! + +### Bugfixes +- [#890](https://github.com/influxdata/telegraf/issues/890): Create TLS config even if only ssl_ca is provided. +- [#884](https://github.com/influxdata/telegraf/issues/884): Do not call write method if there are 0 metrics to write. +- [#898](https://github.com/influxdata/telegraf/issues/898): Put database name in quotes, fixes special characters in the database name. +- [#656](https://github.com/influxdata/telegraf/issues/656): No longer run `lsof` on linux to get netstat data, fixes permissions issue. +- [#907](https://github.com/influxdata/telegraf/issues/907): Fix prometheus invalid label/measurement name key. +- [#841](https://github.com/influxdata/telegraf/issues/841): Fix memcached unix socket panic. +- [#873](https://github.com/influxdata/telegraf/issues/873): Fix SNMP plugin sometimes not returning metrics. Thanks @titiliambert! +- [#934](https://github.com/influxdata/telegraf/pull/934): phpfpm: Fix fcgi uri path. Thanks @rudenkovk! +- [#805](https://github.com/influxdata/telegraf/issues/805): Kafka consumer stops gathering after i/o timeout. +- [#959](https://github.com/influxdata/telegraf/pull/959): reduce mongodb & prometheus collection timeouts. Thanks @PierreF! + +## v0.11.1 [2016-03-17] + +### Release Notes +- Primarily this release was cut to fix [#859](https://github.com/influxdata/telegraf/issues/859) + +### Features +- [#747](https://github.com/influxdata/telegraf/pull/747): Start telegraf on install & remove on uninstall. Thanks @pierref! +- [#794](https://github.com/influxdata/telegraf/pull/794): Add service reload ability. Thanks @entertainyou! + +### Bugfixes +- [#852](https://github.com/influxdata/telegraf/issues/852): Windows zip package fix +- [#859](https://github.com/influxdata/telegraf/issues/859): httpjson plugin panic + +## v0.11.0 [2016-03-15] ### Release Notes ### Features +- [#692](https://github.com/influxdata/telegraf/pull/770): Support InfluxDB retention policies +- [#771](https://github.com/influxdata/telegraf/pull/771): Default timeouts for input plugns. Thanks @PierreF! +- [#758](https://github.com/influxdata/telegraf/pull/758): UDP Listener input plugin, thanks @whatyouhide! +- [#769](https://github.com/influxdata/telegraf/issues/769): httpjson plugin: allow specifying SSL configuration. +- [#735](https://github.com/influxdata/telegraf/pull/735): SNMP Table feature. Thanks @titilambert! +- [#754](https://github.com/influxdata/telegraf/pull/754): docker plugin: adding `docker info` metrics to output. Thanks @titilambert! +- [#788](https://github.com/influxdata/telegraf/pull/788): -input-list and -output-list command-line options. Thanks @ebookbug! +- [#778](https://github.com/influxdata/telegraf/pull/778): Adding a TCP input listener. +- [#797](https://github.com/influxdata/telegraf/issues/797): Provide option for persistent MQTT consumer client sessions. +- [#799](https://github.com/influxdata/telegraf/pull/799): Add number of threads for procstat input plugin. Thanks @titilambert! +- [#776](https://github.com/influxdata/telegraf/pull/776): Add Zookeeper chroot option to kafka_consumer. Thanks @prune998! +- [#811](https://github.com/influxdata/telegraf/pull/811): Add processes plugin for classifying total procs on system. Thanks @titilambert! +- [#235](https://github.com/influxdata/telegraf/issues/235): Add number of users to the `system` input plugin. +- [#826](https://github.com/influxdata/telegraf/pull/826): "kernel" linux plugin for /proc/stat metrics (context switches, interrupts, etc.) +- [#847](https://github.com/influxdata/telegraf/pull/847): `ntpq`: Input plugin for running ntp query executable and gathering metrics. ### Bugfixes +- [#748](https://github.com/influxdata/telegraf/issues/748): Fix sensor plugin split on ":" +- [#722](https://github.com/influxdata/telegraf/pull/722): Librato output plugin fixes. Thanks @chrusty! +- [#745](https://github.com/influxdata/telegraf/issues/745): Fix Telegraf toml parse panic on large config files. Thanks @titilambert! +- [#781](https://github.com/influxdata/telegraf/pull/781): Fix mqtt_consumer username not being set. Thanks @chaton78! +- [#786](https://github.com/influxdata/telegraf/pull/786): Fix mqtt output username not being set. Thanks @msangoi! +- [#773](https://github.com/influxdata/telegraf/issues/773): Fix duplicate measurements in snmp plugin. Thanks @titilambert! +- [#708](https://github.com/influxdata/telegraf/issues/708): packaging: build ARM package +- [#713](https://github.com/influxdata/telegraf/issues/713): packaging: insecure permissions error on log directory +- [#816](https://github.com/influxdata/telegraf/issues/816): Fix phpfpm panic if fcgi endpoint unreachable. +- [#828](https://github.com/influxdata/telegraf/issues/828): fix net_response plugin overwriting host tag. +- [#821](https://github.com/influxdata/telegraf/issues/821): Remove postgres password from server tag. Thanks @menardorama! + +## v0.10.4.1 + +### Release Notes +- Bug in the build script broke deb and rpm packages. + +### Bugfixes +- [#750](https://github.com/influxdata/telegraf/issues/750): deb package broken +- [#752](https://github.com/influxdata/telegraf/issues/752): rpm package broken ## v0.10.4 [2016-02-24] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index afbfbf088..3997a448e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -80,7 +80,7 @@ func (s *Simple) SampleConfig() string { return "ok = true # indicate if everything is fine" } -func (s *Simple) Gather(acc inputs.Accumulator) error { +func (s *Simple) Gather(acc telegraf.Accumulator) error { if s.Ok { acc.Add("state", "pretty good", nil) } else { @@ -114,7 +114,7 @@ creating the `Parser` object. You should also add the following to your SampleConfig() return: ```toml - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md @@ -244,7 +244,7 @@ instantiating and creating the `Serializer` object. You should also add the following to your SampleConfig() return: ```toml - ## Data format to output. This can be "influx" or "graphite" + ## Data format to output. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md diff --git a/Godeps b/Godeps index d2ac1857f..aa5be999d 100644 --- a/Godeps +++ b/Godeps @@ -1,53 +1,56 @@ -git.eclipse.org/gitroot/paho/org.eclipse.paho.mqtt.golang.git 617c801af238c3af2d9e72c5d4a0f02edad03ce5 -github.com/Shopify/sarama d37c73f2b2bce85f7fa16b6a550d26c5372892ef -github.com/Sirupsen/logrus f7f79f729e0fbe2fcc061db48a9ba0263f588252 -github.com/amir/raidman 6a8e089bbe32e6b907feae5ba688841974b3c339 -github.com/aws/aws-sdk-go 87b1e60a50b09e4812dee560b33a238f67305804 -github.com/beorn7/perks b965b613227fddccbfffe13eae360ed3fa822f8d +github.com/Shopify/sarama 8aadb476e66ca998f2f6bb3c993e9a2daa3666b9 +github.com/Sirupsen/logrus 219c8cb75c258c552e999735be6df753ffc7afdc +github.com/amir/raidman 53c1b967405155bfc8758557863bf2e14f814687 +github.com/aws/aws-sdk-go 13a12060f716145019378a10e2806c174356b857 +github.com/beorn7/perks 3ac7bf7a47d159a033b107610db8a1b6575507a4 github.com/cenkalti/backoff 4dc77674aceaabba2c7e3da25d4c823edfb73f99 -github.com/dancannon/gorethink 6f088135ff288deb9d5546f4c71919207f891a70 +github.com/couchbase/go-couchbase cb664315a324d87d19c879d9cc67fda6be8c2ac1 +github.com/couchbase/gomemcached a5ea6356f648fec6ab89add00edd09151455b4b2 +github.com/couchbase/goutils 5823a0cbaaa9008406021dc5daf80125ea30bba6 +github.com/dancannon/gorethink e7cac92ea2bc52638791a021f212145acfedb1fc github.com/davecgh/go-spew 5215b55f46b2b919f50a1df0eaa5886afe4e3b3d +github.com/docker/engine-api 8924d6900370b4c7e7984be5adc61f50a80d7537 +github.com/docker/go-connections f549a9393d05688dff0992ef3efd8bbe6c628aeb +github.com/docker/go-units 5d2041e26a699eaca682e2ea41c8f891e1060444 github.com/eapache/go-resiliency b86b1ec0dd4209a588dc1285cdd471e73525c0b3 github.com/eapache/queue ded5959c0d4e360646dc9e9908cff48666781367 -github.com/fsouza/go-dockerclient 7b651349f9479f5114913eefbfd3c4eeddd79ab4 -github.com/go-ini/ini afbd495e5aaea13597b5e14fe514ddeaa4d76fc3 -github.com/go-sql-driver/mysql 7c7f556282622f94213bc028b4d0a7b6151ba239 -github.com/golang/protobuf 6aaa8d47701fa6cf07e914ec01fde3d4a1fe79c3 -github.com/golang/snappy 723cc1e459b8eea2dea4583200fd60757d40097a +github.com/eclipse/paho.mqtt.golang 4ab3e867810d1ec5f35157c59e965054dbf43a0d +github.com/go-sql-driver/mysql 1fca743146605a172a266e1654e01e5cd5669bee +github.com/golang/protobuf 552c7b9542c194800fd493123b3798ef0a832032 +github.com/golang/snappy 427fb6fc07997f43afa32f35e850833760e489a7 github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2 -github.com/gorilla/context 1c83b3eabd45b6d76072b66b746c20815fb2872d -github.com/gorilla/mux 26a6070f849969ba72b72256e9f14cf519751690 +github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a +github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478 -github.com/influxdata/config bae7cb98197d842374d3b8403905924094930f24 -github.com/influxdata/influxdb ef571fc104dc24b77cd3710c156cd95e5cfd7aa5 -github.com/jmespath/go-jmespath c01cf91b011868172fdcd9f41838e80c9d716264 -github.com/klauspost/crc32 999f3125931f6557b991b2f8472172bdfa578d38 -github.com/lib/pq 8ad2b298cadd691a77015666a5372eae5dbfac8f +github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da +github.com/influxdata/influxdb e3fef5593c21644f2b43af55d6e17e70910b0e48 +github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0 +github.com/klauspost/crc32 19b0b332c9e4516a6370a0456e6182c3b5036720 +github.com/lib/pq e182dc4027e2ded4b19396d638610f2653295f36 github.com/matttproud/golang_protobuf_extensions d0c3fe89de86839aecf2e0579c40ba3bb336a453 +github.com/miekg/dns cce6c130cdb92c752850880fd285bea1d64439dd github.com/mreiferson/go-snappystream 028eae7ab5c4c9e2d1cb4c4ca1e53259bbe7e504 github.com/naoina/go-stringutil 6b638e95a32d0c1131db0e7fe83775cbea4a0d0b -github.com/naoina/toml 751171607256bb66e64c9f0220c00662420c38e9 -github.com/nats-io/nats 6a83f1a633cfbfd90aa648ac99fb38c06a8b40df -github.com/nsqio/go-nsq 2118015c120962edc5d03325c680daf3163a8b5f -github.com/pmezard/go-difflib 792786c7400a136282c1664665ae0a8db921c6c2 -github.com/prometheus/client_golang 67994f177195311c3ea3d4407ed0175e34a4256f +github.com/nats-io/nats b13fc9d12b0b123ebc374e6b808c6228ae4234a3 +github.com/nats-io/nuid 4f84f5f3b2786224e336af2e13dba0a0a80b76fa +github.com/nsqio/go-nsq 0b80d6f05e15ca1930e0c5e1d540ed627e299980 +github.com/opencontainers/runc 89ab7f2ccc1e45ddf6485eaa802c35dcf321dfc8 +github.com/prometheus/client_golang 18acf9993a863f4c4b40612e19cdd243e7c86831 github.com/prometheus/client_model fa8ad6fec33561be4280a8f0514318c79d7f6cb6 -github.com/prometheus/common 14ca1097bbe21584194c15e391a9dab95ad42a59 +github.com/prometheus/common e8eabff8812b05acf522b45fdcd725a785188e37 github.com/prometheus/procfs 406e5b7bfd8201a36e2bb5f7bdae0b03380c2ce8 github.com/samuel/go-zookeeper 218e9c81c0dd8b3b18172b2bbfad92cc7d6db55f -github.com/shirou/gopsutil e77438504d45b9985c99a75730fe65220ceea00e +github.com/shirou/gopsutil 1f32ce1bb380845be7f5d174ac641a2c592c0c42 github.com/soniah/gosnmp b1b4f885b12c5dcbd021c5cee1c904110de6db7d github.com/streadway/amqp b4f3ceab0337f013208d31348b578d83c0064744 -github.com/stretchr/objx 1a9d0bb9f541897e62256577b352fdbc1fb4fd94 -github.com/stretchr/testify f390dcf405f7b83c997eac1b06768bb9f44dec18 -github.com/wvanbergen/kafka 1a8639a45164fcc245d5c7b4bd3ccfbd1a0ffbf3 +github.com/stretchr/testify 1f4a1643a57e798696635ea4c126e9127adb7d3c +github.com/wvanbergen/kafka 46f9a1cf3f670edec492029fadded9c2d9e18866 github.com/wvanbergen/kazoo-go 0f768712ae6f76454f987c3356177e138df258f8 github.com/zensqlmonitor/go-mssqldb ffe5510c6fa5e15e6d983210ab501c815b56b363 -golang.org/x/crypto 1f22c0103821b9390939b6776727195525381532 -golang.org/x/net 04b9de9b512f58addf28c9853d50ebef61c3953e -golang.org/x/text 6d3c22c4525a4da167968fa2479be5524d2e8bd0 -gopkg.in/dancannon/gorethink.v1 6f088135ff288deb9d5546f4c71919207f891a70 +golang.org/x/crypto 5dc8cb4b8a8eb076cbb5a06bc3b8682c15bdbbd3 +golang.org/x/net 6acef71eb69611914f7a30939ea9f6e194c78172 +golang.org/x/text a71fd10341b064c10f4a81ceac72bcf70f26ea34 +gopkg.in/dancannon/gorethink.v1 7d1af5be49cb5ecc7b177bf387d232050299d6ef gopkg.in/fatih/pool.v2 cba550ebf9bce999a02e963296d4bc7a486cb715 -gopkg.in/mgo.v2 03c9f3ee4c14c8e51ee521a6a7d0425658dd6f64 -gopkg.in/yaml.v2 f7716cbe52baa25d2e9b0d0da546fcf909fc16b4 -github.com/miekg/dns e0d84d97e59bcb6561eae269c4e94d25b66822cb \ No newline at end of file +gopkg.in/mgo.v2 d90005c5262a3463800497ea5a89aed5fe22c886 +gopkg.in/yaml.v2 a83829b6f1293c91addabc89d0571c246397bbf4 diff --git a/Godeps_windows b/Godeps_windows index dd46184ec..f499fa915 100644 --- a/Godeps_windows +++ b/Godeps_windows @@ -1,56 +1,60 @@ -git.eclipse.org/gitroot/paho/org.eclipse.paho.mqtt.golang.git 617c801af238c3af2d9e72c5d4a0f02edad03ce5 -github.com/Shopify/sarama d37c73f2b2bce85f7fa16b6a550d26c5372892ef -github.com/Sirupsen/logrus f7f79f729e0fbe2fcc061db48a9ba0263f588252 +github.com/Shopify/sarama 8aadb476e66ca998f2f6bb3c993e9a2daa3666b9 +github.com/Sirupsen/logrus 219c8cb75c258c552e999735be6df753ffc7afdc github.com/StackExchange/wmi f3e2bae1e0cb5aef83e319133eabfee30013a4a5 -github.com/amir/raidman 6a8e089bbe32e6b907feae5ba688841974b3c339 -github.com/aws/aws-sdk-go 87b1e60a50b09e4812dee560b33a238f67305804 -github.com/beorn7/perks b965b613227fddccbfffe13eae360ed3fa822f8d +github.com/amir/raidman 53c1b967405155bfc8758557863bf2e14f814687 +github.com/aws/aws-sdk-go 13a12060f716145019378a10e2806c174356b857 +github.com/beorn7/perks 3ac7bf7a47d159a033b107610db8a1b6575507a4 github.com/cenkalti/backoff 4dc77674aceaabba2c7e3da25d4c823edfb73f99 -github.com/dancannon/gorethink 6f088135ff288deb9d5546f4c71919207f891a70 -github.com/davecgh/go-spew 5215b55f46b2b919f50a1df0eaa5886afe4e3b3d +github.com/couchbase/go-couchbase cb664315a324d87d19c879d9cc67fda6be8c2ac1 +github.com/couchbase/gomemcached a5ea6356f648fec6ab89add00edd09151455b4b2 +github.com/couchbase/goutils 5823a0cbaaa9008406021dc5daf80125ea30bba6 +github.com/dancannon/gorethink e7cac92ea2bc52638791a021f212145acfedb1fc +github.com/davecgh/go-spew fc32781af5e85e548d3f1abaf0fa3dbe8a72495c github.com/eapache/go-resiliency b86b1ec0dd4209a588dc1285cdd471e73525c0b3 github.com/eapache/queue ded5959c0d4e360646dc9e9908cff48666781367 -github.com/fsouza/go-dockerclient 7b651349f9479f5114913eefbfd3c4eeddd79ab4 -github.com/go-ini/ini afbd495e5aaea13597b5e14fe514ddeaa4d76fc3 +github.com/eclipse/paho.mqtt.golang 4ab3e867810d1ec5f35157c59e965054dbf43a0d +github.com/fsouza/go-dockerclient a49c8269a6899cae30da1f8a4b82e0ce945f9967 +github.com/go-ini/ini 776aa739ce9373377cd16f526cdf06cb4c89b40f github.com/go-ole/go-ole 50055884d646dd9434f16bbb5c9801749b9bafe4 -github.com/go-sql-driver/mysql 7c7f556282622f94213bc028b4d0a7b6151ba239 -github.com/golang/protobuf 6aaa8d47701fa6cf07e914ec01fde3d4a1fe79c3 -github.com/golang/snappy 723cc1e459b8eea2dea4583200fd60757d40097a +github.com/go-sql-driver/mysql 1fca743146605a172a266e1654e01e5cd5669bee +github.com/golang/protobuf 552c7b9542c194800fd493123b3798ef0a832032 +github.com/golang/snappy 5979233c5d6225d4a8e438cdd0b411888449ddab github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2 -github.com/gorilla/context 1c83b3eabd45b6d76072b66b746c20815fb2872d -github.com/gorilla/mux 26a6070f849969ba72b72256e9f14cf519751690 +github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a +github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478 -github.com/influxdata/config bae7cb98197d842374d3b8403905924094930f24 -github.com/influxdata/influxdb ef571fc104dc24b77cd3710c156cd95e5cfd7aa5 -github.com/jmespath/go-jmespath c01cf91b011868172fdcd9f41838e80c9d716264 -github.com/klauspost/crc32 999f3125931f6557b991b2f8472172bdfa578d38 -github.com/lib/pq 8ad2b298cadd691a77015666a5372eae5dbfac8f +github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da +github.com/influxdata/influxdb c190778997f4154294e6160c41b90140641ac915 +github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0 +github.com/jmespath/go-jmespath 0b12d6b521d83fc7f755e7cfc1b1fbdd35a01a74 +github.com/klauspost/crc32 19b0b332c9e4516a6370a0456e6182c3b5036720 +github.com/lib/pq e182dc4027e2ded4b19396d638610f2653295f36 github.com/lxn/win 9a7734ea4db26bc593d52f6a8a957afdad39c5c1 github.com/matttproud/golang_protobuf_extensions d0c3fe89de86839aecf2e0579c40ba3bb336a453 -github.com/miekg/dns e0d84d97e59bcb6561eae269c4e94d25b66822cb +github.com/miekg/dns cce6c130cdb92c752850880fd285bea1d64439dd github.com/mreiferson/go-snappystream 028eae7ab5c4c9e2d1cb4c4ca1e53259bbe7e504 github.com/naoina/go-stringutil 6b638e95a32d0c1131db0e7fe83775cbea4a0d0b -github.com/naoina/toml 751171607256bb66e64c9f0220c00662420c38e9 -github.com/nats-io/nats 6a83f1a633cfbfd90aa648ac99fb38c06a8b40df -github.com/nsqio/go-nsq 2118015c120962edc5d03325c680daf3163a8b5f +github.com/nats-io/nats b13fc9d12b0b123ebc374e6b808c6228ae4234a3 +github.com/nats-io/nuid 4f84f5f3b2786224e336af2e13dba0a0a80b76fa +github.com/nsqio/go-nsq 0b80d6f05e15ca1930e0c5e1d540ed627e299980 github.com/pmezard/go-difflib 792786c7400a136282c1664665ae0a8db921c6c2 -github.com/prometheus/client_golang 67994f177195311c3ea3d4407ed0175e34a4256f +github.com/prometheus/client_golang 18acf9993a863f4c4b40612e19cdd243e7c86831 github.com/prometheus/client_model fa8ad6fec33561be4280a8f0514318c79d7f6cb6 -github.com/prometheus/common 14ca1097bbe21584194c15e391a9dab95ad42a59 +github.com/prometheus/common e8eabff8812b05acf522b45fdcd725a785188e37 github.com/prometheus/procfs 406e5b7bfd8201a36e2bb5f7bdae0b03380c2ce8 github.com/samuel/go-zookeeper 218e9c81c0dd8b3b18172b2bbfad92cc7d6db55f -github.com/shirou/gopsutil e77438504d45b9985c99a75730fe65220ceea00e +github.com/shirou/gopsutil 1f32ce1bb380845be7f5d174ac641a2c592c0c42 github.com/shirou/w32 ada3ba68f000aa1b58580e45c9d308fe0b7fc5c5 github.com/soniah/gosnmp b1b4f885b12c5dcbd021c5cee1c904110de6db7d github.com/streadway/amqp b4f3ceab0337f013208d31348b578d83c0064744 github.com/stretchr/objx 1a9d0bb9f541897e62256577b352fdbc1fb4fd94 -github.com/stretchr/testify f390dcf405f7b83c997eac1b06768bb9f44dec18 +github.com/stretchr/testify 1f4a1643a57e798696635ea4c126e9127adb7d3c github.com/wvanbergen/kafka 1a8639a45164fcc245d5c7b4bd3ccfbd1a0ffbf3 github.com/wvanbergen/kazoo-go 0f768712ae6f76454f987c3356177e138df258f8 github.com/zensqlmonitor/go-mssqldb ffe5510c6fa5e15e6d983210ab501c815b56b363 -golang.org/x/net 04b9de9b512f58addf28c9853d50ebef61c3953e -golang.org/x/text 6d3c22c4525a4da167968fa2479be5524d2e8bd0 -gopkg.in/dancannon/gorethink.v1 6f088135ff288deb9d5546f4c71919207f891a70 +golang.org/x/net 6acef71eb69611914f7a30939ea9f6e194c78172 +golang.org/x/text a71fd10341b064c10f4a81ceac72bcf70f26ea34 +gopkg.in/dancannon/gorethink.v1 7d1af5be49cb5ecc7b177bf387d232050299d6ef gopkg.in/fatih/pool.v2 cba550ebf9bce999a02e963296d4bc7a486cb715 -gopkg.in/mgo.v2 03c9f3ee4c14c8e51ee521a6a7d0425658dd6f64 -gopkg.in/yaml.v2 f7716cbe52baa25d2e9b0d0da546fcf909fc16b4 +gopkg.in/mgo.v2 d90005c5262a3463800497ea5a89aed5fe22c886 +gopkg.in/yaml.v2 a83829b6f1293c91addabc89d0571c246397bbf4 diff --git a/Makefile b/Makefile index ef316bd03..c87f78b55 100644 --- a/Makefile +++ b/Makefile @@ -22,8 +22,8 @@ build-windows: ./cmd/telegraf/telegraf.go build-for-docker: - CGO_ENABLED=0 GOOS=linux go -o telegraf -ldflags \ - "-X main.Version=$(VERSION)" \ + CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o telegraf -ldflags \ + "-s -X main.Version=$(VERSION)" \ ./cmd/telegraf/telegraf.go # Build with race detector diff --git a/README.md b/README.md index 0f3f81ebf..caa562a6d 100644 --- a/README.md +++ b/README.md @@ -17,26 +17,15 @@ new plugins. ## Installation: -NOTE: Telegraf 0.10.x is **not** backwards-compatible with previous versions -of telegraf, both in the database layout and the configuration file. 0.2.x -will continue to be supported, see below for download links. - -For more details on the differences between Telegraf 0.2.x and 0.10.x, see -the [release blog post](https://influxdata.com/blog/announcing-telegraf-0-10-0/). - ### Linux deb and rpm Packages: Latest: -* http://get.influxdb.org/telegraf/telegraf_0.10.4-1_amd64.deb -* http://get.influxdb.org/telegraf/telegraf-0.10.4-1.x86_64.rpm +* http://get.influxdb.org/telegraf/telegraf_0.12.0-1_amd64.deb +* http://get.influxdb.org/telegraf/telegraf-0.12.0-1.x86_64.rpm Latest (arm): -* http://get.influxdb.org/telegraf/telegraf_0.10.4-1_arm.deb -* http://get.influxdb.org/telegraf/telegraf-0.10.4-1.arm.rpm - -0.2.x: -* http://get.influxdb.org/telegraf/telegraf_0.2.4_amd64.deb -* http://get.influxdb.org/telegraf/telegraf-0.2.4-1.x86_64.rpm +* http://get.influxdb.org/telegraf/telegraf_0.12.0-1_armhf.deb +* http://get.influxdb.org/telegraf/telegraf-0.12.0-1.armhf.rpm ##### Package Instructions: @@ -50,35 +39,40 @@ controlled via `systemctl [action] telegraf` ### yum/apt Repositories: There is a yum/apt repo available for the whole InfluxData stack, see -[here](https://docs.influxdata.com/influxdb/v0.9/introduction/installation/#installation) -for instructions, replacing the `influxdb` package name with `telegraf`. +[here](https://docs.influxdata.com/influxdb/v0.10/introduction/installation/#installation) +for instructions on setting up the repo. Once it is configured, you will be able +to use this repo to install & update telegraf. ### Linux tarballs: Latest: -* http://get.influxdb.org/telegraf/telegraf-0.10.4-1_linux_amd64.tar.gz -* http://get.influxdb.org/telegraf/telegraf-0.10.4-1_linux_i386.tar.gz -* http://get.influxdb.org/telegraf/telegraf-0.10.4-1_linux_arm.tar.gz - -0.2.x: -* http://get.influxdb.org/telegraf/telegraf_linux_amd64_0.2.4.tar.gz -* http://get.influxdb.org/telegraf/telegraf_linux_386_0.2.4.tar.gz -* http://get.influxdb.org/telegraf/telegraf_linux_arm_0.2.4.tar.gz +* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_amd64.tar.gz +* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_i386.tar.gz +* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_armhf.tar.gz ##### tarball Instructions: To install the full directory structure with config file, run: ``` -sudo tar -C / -zxvf ./telegraf-0.10.4-1_linux_amd64.tar.gz +sudo tar -C / -zxvf ./telegraf-0.12.0-1_linux_amd64.tar.gz ``` To extract only the binary, run: ``` -tar -zxvf telegraf-0.10.4-1_linux_amd64.tar.gz --strip-components=3 ./usr/bin/telegraf +tar -zxvf telegraf-0.12.0-1_linux_amd64.tar.gz --strip-components=3 ./usr/bin/telegraf ``` +### FreeBSD tarball: + +Latest: +* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_freebsd_amd64.tar.gz + +##### tarball Instructions: + +See linux instructions above. + ### Ansible Role: Ansible role: https://github.com/rossmcdonald/telegraf @@ -93,8 +87,8 @@ brew install telegraf ### Windows Binaries (EXPERIMENTAL) Latest: -* http://get.influxdb.org/telegraf/telegraf-0.10.4-1_windows_amd64.zip -* http://get.influxdb.org/telegraf/telegraf-0.10.4-1_windows_i386.zip +* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_windows_amd64.zip +* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_windows_i386.zip ### From Source: @@ -162,51 +156,56 @@ more information on each, please look at the directory of the same name in Currently implemented sources: -* aerospike -* apache -* bcache -* couchdb -* disque -* dns query time -* docker -* dovecot -* elasticsearch -* exec (generic executable plugin, support JSON, influx and graphite) -* haproxy -* httpjson (generic JSON-emitting http service plugin) -* influxdb -* jolokia -* leofs -* lustre2 -* mailchimp -* memcached -* mesos -* mongodb -* mysql -* net_response -* nginx -* nsq -* phpfpm -* phusion passenger -* ping -* postgresql -* powerdns -* procstat -* prometheus -* puppetagent -* rabbitmq -* raindrops -* redis -* rethinkdb -* riak -* sensors (only available if built from source) -* snmp -* sql server (microsoft) -* twemproxy -* zfs -* zookeeper -* win_perf_counters (windows performance counters) -* system +* [aws cloudwatch](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/cloudwatch) +* [aerospike](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/aerospike) +* [apache](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/apache) +* [bcache](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/bcache) +* [couchbase](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchbase) +* [couchdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchdb) +* [disque](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/disque) +* [dns query time](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/dns_query) +* [docker](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/docker) +* [dovecot](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/dovecot) +* [elasticsearch](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/elasticsearch) +* [exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec ) (generic executable plugin, support JSON, influx, graphite and nagios) +* [haproxy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/haproxy) +* [httpjson ](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/httpjson ) (generic JSON-emitting http service plugin) +* [influxdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/influxdb) +* [ipmi_sensor](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ipmi_sensor) +* [jolokia](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/jolokia) +* [leofs](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/leofs) +* [lustre2](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/lustre2) +* [mailchimp](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mailchimp) +* [memcached](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/memcached) +* [mesos](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mesos) +* [mongodb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mongodb) +* [mysql](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mysql) +* [net_response](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/net_response) +* [nginx](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nginx) +* [nsq](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nsq) +* [ntpq](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ntpq) +* [phpfpm](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/phpfpm) +* [phusion passenger](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/passenger) +* [ping](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ping) +* [postgresql](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/postgresql) +* [postgresql_extensible](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/postgresql_extensible) +* [powerdns](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/powerdns) +* [procstat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/procstat) +* [prometheus](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/prometheus) +* [puppetagent](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/puppetagent) +* [rabbitmq](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/rabbitmq) +* [raindrops](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/raindrops) +* [redis](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/redis) +* [rethinkdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/rethinkdb) +* [riak](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/riak) +* [sensors ](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/sensors) (only available if built from source) +* [snmp](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/snmp) +* [sql server](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/sqlserver) (microsoft) +* [twemproxy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/twemproxy) +* [zfs](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/zfs) +* [zookeeper](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/zookeeper) +* [win_perf_counters ](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/win_perf_counters) (windows performance counters) +* [system](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/system) * cpu * mem * net @@ -214,34 +213,38 @@ Currently implemented sources: * disk * diskio * swap + * processes + * kernel (/proc/stat) Telegraf can also collect metrics via the following service plugins: -* statsd -* mqtt_consumer -* kafka_consumer -* nats_consumer -* github_webhooks +* [statsd](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/statsd) +* [udp_listener](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/udp_listener) +* [tcp_listener](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/tcp_listener) +* [mqtt_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mqtt_consumer) +* [kafka_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/kafka_consumer) +* [nats_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nats_consumer) +* [github_webhooks](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/github_webhooks) We'll be adding support for many more over the coming months. Read on if you want to add support for another service or third-party API. ## Supported Output Plugins -* influxdb -* amon -* amqp -* aws kinesis -* aws cloudwatch -* datadog -* graphite -* kafka -* librato -* mqtt -* nsq -* opentsdb -* prometheus -* riemann +* [influxdb](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/influxdb) +* [amon](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/amon) +* [amqp](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/amqp) +* [aws kinesis](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kinesis) +* [aws cloudwatch](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/cloudwatch) +* [datadog](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/datadog) +* [graphite](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/graphite) +* [kafka](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kafka) +* [librato](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/librato) +* [mqtt](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/mqtt) +* [nsq](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/nsq) +* [opentsdb](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/opentsdb) +* [prometheus](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/prometheus_client) +* [riemann](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/riemann) ## Contributing diff --git a/agent/accumulator.go b/agent/accumulator.go index b04ff2b53..7ec22cd7f 100644 --- a/agent/accumulator.go +++ b/agent/accumulator.go @@ -105,7 +105,6 @@ func (ac *accumulator) AddFields( continue } } - result[k] = v // Validate uint64 and float64 fields switch val := v.(type) { @@ -116,6 +115,7 @@ func (ac *accumulator) AddFields( } else { result[k] = int64(9223372036854775807) } + continue case float64: // NaNs are invalid values in influxdb, skip measurement if math.IsNaN(val) || math.IsInf(val, 0) { @@ -127,6 +127,8 @@ func (ac *accumulator) AddFields( continue } } + + result[k] = v } fields = nil if len(result) == 0 { @@ -168,5 +170,8 @@ func (ac *accumulator) setDefaultTags(tags map[string]string) { } func (ac *accumulator) addDefaultTag(key, value string) { + if ac.defaultTags == nil { + ac.defaultTags = make(map[string]string) + } ac.defaultTags[key] = value } diff --git a/agent/accumulator_test.go b/agent/accumulator_test.go new file mode 100644 index 000000000..05f9b02aa --- /dev/null +++ b/agent/accumulator_test.go @@ -0,0 +1,302 @@ +package agent + +import ( + "fmt" + "math" + "testing" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/models" + + "github.com/stretchr/testify/assert" +) + +func TestAdd(t *testing.T) { + a := accumulator{} + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + a.Add("acctest", float64(101), map[string]string{}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Contains(t, actual, "acctest,acc=test value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test value=101 %d", now.UnixNano()), + actual) +} + +func TestAddDefaultTags(t *testing.T) { + a := accumulator{} + a.addDefaultTag("default", "tag") + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + a.Add("acctest", float64(101), map[string]string{}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest,default=tag value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Contains(t, actual, "acctest,acc=test,default=tag value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test,default=tag value=101 %d", now.UnixNano()), + actual) +} + +func TestAddFields(t *testing.T) { + a := accumulator{} + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + fields := map[string]interface{}{ + "usage": float64(99), + } + a.AddFields("acctest", fields, map[string]string{}) + a.AddFields("acctest", fields, map[string]string{"acc": "test"}) + a.AddFields("acctest", fields, map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest usage=99") + + testm = <-a.metrics + actual = testm.String() + assert.Contains(t, actual, "acctest,acc=test usage=99") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test usage=99 %d", now.UnixNano()), + actual) +} + +// Test that all Inf fields get dropped, and not added to metrics channel +func TestAddInfFields(t *testing.T) { + inf := math.Inf(1) + ninf := math.Inf(-1) + + a := accumulator{} + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + fields := map[string]interface{}{ + "usage": inf, + "nusage": ninf, + } + a.AddFields("acctest", fields, map[string]string{}) + a.AddFields("acctest", fields, map[string]string{"acc": "test"}) + a.AddFields("acctest", fields, map[string]string{"acc": "test"}, now) + + assert.Len(t, a.metrics, 0) + + // test that non-inf fields are kept and not dropped + fields["notinf"] = float64(100) + a.AddFields("acctest", fields, map[string]string{}) + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest notinf=100") +} + +// Test that nan fields are dropped and not added +func TestAddNaNFields(t *testing.T) { + nan := math.NaN() + + a := accumulator{} + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + fields := map[string]interface{}{ + "usage": nan, + } + a.AddFields("acctest", fields, map[string]string{}) + a.AddFields("acctest", fields, map[string]string{"acc": "test"}) + a.AddFields("acctest", fields, map[string]string{"acc": "test"}, now) + + assert.Len(t, a.metrics, 0) + + // test that non-nan fields are kept and not dropped + fields["notnan"] = float64(100) + a.AddFields("acctest", fields, map[string]string{}) + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest notnan=100") +} + +func TestAddUint64Fields(t *testing.T) { + a := accumulator{} + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + fields := map[string]interface{}{ + "usage": uint64(99), + } + a.AddFields("acctest", fields, map[string]string{}) + a.AddFields("acctest", fields, map[string]string{"acc": "test"}) + a.AddFields("acctest", fields, map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest usage=99i") + + testm = <-a.metrics + actual = testm.String() + assert.Contains(t, actual, "acctest,acc=test usage=99i") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test usage=99i %d", now.UnixNano()), + actual) +} + +func TestAddUint64Overflow(t *testing.T) { + a := accumulator{} + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + fields := map[string]interface{}{ + "usage": uint64(9223372036854775808), + } + a.AddFields("acctest", fields, map[string]string{}) + a.AddFields("acctest", fields, map[string]string{"acc": "test"}) + a.AddFields("acctest", fields, map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest usage=9223372036854775807i") + + testm = <-a.metrics + actual = testm.String() + assert.Contains(t, actual, "acctest,acc=test usage=9223372036854775807i") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test usage=9223372036854775807i %d", now.UnixNano()), + actual) +} + +func TestAddInts(t *testing.T) { + a := accumulator{} + a.addDefaultTag("default", "tag") + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + a.Add("acctest", int(101), map[string]string{}) + a.Add("acctest", int32(101), map[string]string{"acc": "test"}) + a.Add("acctest", int64(101), map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest,default=tag value=101i") + + testm = <-a.metrics + actual = testm.String() + assert.Contains(t, actual, "acctest,acc=test,default=tag value=101i") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test,default=tag value=101i %d", now.UnixNano()), + actual) +} + +func TestAddFloats(t *testing.T) { + a := accumulator{} + a.addDefaultTag("default", "tag") + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + a.Add("acctest", float32(101), map[string]string{"acc": "test"}) + a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest,acc=test,default=tag value=101") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test,default=tag value=101 %d", now.UnixNano()), + actual) +} + +func TestAddStrings(t *testing.T) { + a := accumulator{} + a.addDefaultTag("default", "tag") + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + a.Add("acctest", "test", map[string]string{"acc": "test"}) + a.Add("acctest", "foo", map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest,acc=test,default=tag value=\"test\"") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test,default=tag value=\"foo\" %d", now.UnixNano()), + actual) +} + +func TestAddBools(t *testing.T) { + a := accumulator{} + a.addDefaultTag("default", "tag") + now := time.Now() + a.metrics = make(chan telegraf.Metric, 10) + defer close(a.metrics) + a.inputConfig = &internal_models.InputConfig{} + + a.Add("acctest", true, map[string]string{"acc": "test"}) + a.Add("acctest", false, map[string]string{"acc": "test"}, now) + + testm := <-a.metrics + actual := testm.String() + assert.Contains(t, actual, "acctest,acc=test,default=tag value=true") + + testm = <-a.metrics + actual = testm.String() + assert.Equal(t, + fmt.Sprintf("acctest,acc=test,default=tag value=false %d", now.UnixNano()), + actual) +} diff --git a/agent/agent.go b/agent/agent.go index 8a8800cc2..fdd17a267 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -27,17 +27,19 @@ func NewAgent(config *config.Config) (*Agent, error) { Config: config, } - if a.Config.Agent.Hostname == "" { - hostname, err := os.Hostname() - if err != nil { - return nil, err + if !a.Config.Agent.OmitHostname { + if a.Config.Agent.Hostname == "" { + hostname, err := os.Hostname() + if err != nil { + return nil, err + } + + a.Config.Agent.Hostname = hostname } - a.Config.Agent.Hostname = hostname + config.Tags["host"] = a.Config.Agent.Hostname } - config.Tags["host"] = a.Config.Agent.Hostname - return a, nil } diff --git a/agent/agent_test.go b/agent/agent_test.go index 8bf8a150b..adbde9a13 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -1,7 +1,6 @@ package agent import ( - "github.com/stretchr/testify/assert" "testing" "time" @@ -11,8 +10,18 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/all" // needing to load the outputs _ "github.com/influxdata/telegraf/plugins/outputs/all" + + "github.com/stretchr/testify/assert" ) +func TestAgent_OmitHostname(t *testing.T) { + c := config.NewConfig() + c.Agent.OmitHostname = true + _, err := NewAgent(c) + assert.NoError(t, err) + assert.NotContains(t, c.Tags, "host") +} + func TestAgent_LoadPlugin(t *testing.T) { c := config.NewConfig() c.InputFilters = []string{"mysql"} diff --git a/circle.yml b/circle.yml index 8fd255a78..e7b711f9d 100644 --- a/circle.yml +++ b/circle.yml @@ -4,9 +4,9 @@ machine: post: - sudo service zookeeper stop - go version - - go version | grep 1.5.3 || sudo rm -rf /usr/local/go - - wget https://storage.googleapis.com/golang/go1.5.3.linux-amd64.tar.gz - - sudo tar -C /usr/local -xzf go1.5.3.linux-amd64.tar.gz + - go version | grep 1.6 || sudo rm -rf /usr/local/go + - wget https://storage.googleapis.com/golang/go1.6.linux-amd64.tar.gz + - sudo tar -C /usr/local -xzf go1.6.linux-amd64.tar.gz - go version dependencies: diff --git a/cmd/telegraf/telegraf.go b/cmd/telegraf/telegraf.go index a65c5607c..be591829b 100644 --- a/cmd/telegraf/telegraf.go +++ b/cmd/telegraf/telegraf.go @@ -11,8 +11,9 @@ import ( "github.com/influxdata/telegraf/agent" "github.com/influxdata/telegraf/internal/config" - + "github.com/influxdata/telegraf/plugins/inputs" _ "github.com/influxdata/telegraf/plugins/inputs/all" + "github.com/influxdata/telegraf/plugins/outputs" _ "github.com/influxdata/telegraf/plugins/outputs/all" ) @@ -30,11 +31,14 @@ var fSampleConfig = flag.Bool("sample-config", false, var fPidfile = flag.String("pidfile", "", "file to write our pid to") var fInputFilters = flag.String("input-filter", "", "filter the inputs to enable, separator is :") +var fInputList = flag.Bool("input-list", false, + "print available input plugins.") var fOutputFilters = flag.String("output-filter", "", "filter the outputs to enable, separator is :") +var fOutputList = flag.Bool("output-list", false, + "print available output plugins.") var fUsage = flag.String("usage", "", "print usage for a plugin, ie, 'telegraf -usage mysql'") - var fInputFiltersLegacy = flag.String("filter", "", "filter the inputs to enable, separator is :") var fOutputFiltersLegacy = flag.String("outputfilter", "", @@ -59,7 +63,9 @@ The flags are: -sample-config print out full sample configuration to stdout -config-directory directory containing additional *.conf files -input-filter filter the input plugins to enable, separator is : + -input-list print all the plugins inputs -output-filter filter the output plugins to enable, separator is : + -output-list print all the available outputs -usage print usage for a plugin, ie, 'telegraf -usage mysql' -debug print metrics as they're generated to stdout -quiet run in quiet mode @@ -90,8 +96,9 @@ func main() { reload <- false flag.Usage = func() { usageExit(0) } flag.Parse() + args := flag.Args() - if flag.NFlag() == 0 { + if flag.NFlag() == 0 && len(args) == 0 { usageExit(0) } @@ -115,6 +122,34 @@ func main() { outputFilters = strings.Split(":"+outputFilter+":", ":") } + if len(args) > 0 { + switch args[0] { + case "version": + v := fmt.Sprintf("Telegraf - Version %s", Version) + fmt.Println(v) + return + case "config": + config.PrintSampleConfig(inputFilters, outputFilters) + return + } + } + + if *fOutputList { + fmt.Println("Available Output Plugins:") + for k, _ := range outputs.Outputs { + fmt.Printf(" %s\n", k) + } + return + } + + if *fInputList { + fmt.Println("Available Input Plugins:") + for k, _ := range inputs.Inputs { + fmt.Printf(" %s\n", k) + } + return + } + if *fVersion { v := fmt.Sprintf("Telegraf - Version %s", Version) fmt.Println(v) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 58dbdf261..0afaa120f 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -9,6 +9,12 @@ To generate a file with specific inputs and outputs, you can use the -input-filter and -output-filter flags: `telegraf -sample-config -input-filter cpu:mem:net:swap -output-filter influxdb:kafka` +## Environment Variables + +Environment variables can be used anywhere in the config file, simply prepend +them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), +for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) + ## `[global_tags]` Configuration Global tags can be specific in the `[global_tags]` section of the config file in @@ -97,7 +103,7 @@ fields which begin with `time_`. percpu = true totalcpu = false # filter all fields beginning with 'time_' - drop = ["time_*"] + fielddrop = ["time_*"] ``` #### Input Config: tagpass and tagdrop @@ -106,7 +112,7 @@ fields which begin with `time_`. [[inputs.cpu]] percpu = true totalcpu = false - drop = ["cpu_time"] + fielddrop = ["cpu_time"] # Don't collect CPU data for cpu6 & cpu7 [inputs.cpu.tagdrop] cpu = [ "cpu6", "cpu7" ] @@ -141,12 +147,12 @@ fields which begin with `time_`. # Drop all metrics about containers for kubelet [[inputs.prometheus]] urls = ["http://kube-node-1:4194/metrics"] - namedrop = ["container_"] + namedrop = ["container_*"] # Only store rest client related metrics for kubelet [[inputs.prometheus]] urls = ["http://kube-node-1:4194/metrics"] - namepass = ["rest_client_"] + namepass = ["rest_client_*"] ``` #### Input config: prefix, suffix, and override @@ -199,7 +205,7 @@ to avoid measurement collisions: percpu = true totalcpu = false name_override = "percpu_usage" - drop = ["cpu_time*"] + fielddrop = ["cpu_time*"] ``` ## `[outputs.xxx]` Configuration diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index 79528a962..6a916711b 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -1,5 +1,13 @@ # Telegraf Input Data Formats +Telegraf is able to parse the following input data formats into metrics: + +1. [InfluxDB Line Protocol](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#influx) +1. [JSON](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#json) +1. [Graphite](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite) +1. [Value](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#value), ie: 45 or "booyah" +1. [Nagios](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#nagios) (exec input only) + Telegraf metrics, like InfluxDB [points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/), are a combination of four basic parts: @@ -31,7 +39,7 @@ example, in the exec plugin: ## measurement name suffix (for separating different commands) name_suffix = "_mycollector" - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md @@ -43,7 +51,7 @@ example, in the exec plugin: Each data_format has an additional set of configuration options available, which I'll go over below. -## Influx: +# Influx: There are no additional configuration options for InfluxDB line-protocol. The metrics are parsed directly into Telegraf metrics. @@ -58,14 +66,14 @@ metrics are parsed directly into Telegraf metrics. ## measurement name suffix (for separating different commands) name_suffix = "_mycollector" - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "influx" ``` -## JSON: +# JSON: The JSON data format flattens JSON into metric _fields_. For example, this JSON: @@ -103,7 +111,7 @@ For example, if you had this configuration: ## measurement name suffix (for separating different commands) name_suffix = "_mycollector" - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md @@ -134,7 +142,38 @@ Your Telegraf metrics would get tagged with "my_tag_1" exec_mycollector,my_tag_1=foo a=5,b_c=6 ``` -## Graphite: +# Value: + +The "value" data format translates single values into Telegraf metrics. This +is done by assigning a measurement name and setting a single field ("value") +as the parsed metric. + +#### Value Configuration: + +You **must** tell Telegraf what type of metric to collect by using the +`data_type` configuration option. + +**Note:** It is also recommended that you set `name_override` to a measurement +name that makes sense for your metric, otherwise it will just be set to the +name of the plugin. + +```toml +[[inputs.exec]] + ## Commands array + commands = ["cat /proc/sys/kernel/random/entropy_avail"] + + ## override the default metric name of "exec" + name_override = "entropy_available" + + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "value" + data_type = "integer" # required +``` + +# Graphite: The Graphite data format translates graphite _dot_ buckets directly into telegraf measurement names, with a single value field, and without any tags. For @@ -181,17 +220,32 @@ So the following template: ```toml templates = [ - "measurement.measurement.field.region" + "measurement.measurement.field.field.region" ] ``` would result in the following Graphite -> Telegraf transformation. ``` -cpu.usage.idle.us-west 100 -=> cpu_usage,region=us-west idle=100 +cpu.usage.idle.percent.us-west 100 +=> cpu_usage,region=us-west idle_percent=100 ``` +The field key can also be derived from the second "half" of the input metric-name by specifying ```field*```: +```toml +templates = [ + "measurement.measurement.region.field*" +] +``` + +would result in the following Graphite -> Telegraf transformation. + +``` +cpu.usage.us-west.idle.percentage 100 +=> cpu_usage,region=us-west idle_percentage=100 +``` +(This cannot be used in conjunction with "measurement*"!) + #### Filter Templates: Users can also filter the template(s) to use based on the name of the bucket, @@ -247,7 +301,7 @@ There are many more options available, ## measurement name suffix (for separating different commands) name_suffix = "_mycollector" - ## Data format to consume. This can be "json", "influx" or "graphite" (line-protocol) + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md @@ -272,3 +326,27 @@ There are many more options available, "measurement*" ] ``` + +# Nagios: + +There are no additional configuration options for Nagios line-protocol. The +metrics are parsed directly into Telegraf metrics. + +Note: Nagios Input Data Formats is only supported in `exec` input plugin. + +#### Nagios Configuration: + +```toml +[[inputs.exec]] + ## Commands array + commands = ["/usr/lib/nagios/plugins/check_load", "-w 5,6,7 -c 7,8,9"] + + ## measurement name suffix (for separating different commands) + name_suffix = "_mycollector" + + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "nagios" +``` diff --git a/docs/DATA_FORMATS_OUTPUT.md b/docs/DATA_FORMATS_OUTPUT.md index 524ec6d66..28f8cd6c3 100644 --- a/docs/DATA_FORMATS_OUTPUT.md +++ b/docs/DATA_FORMATS_OUTPUT.md @@ -29,7 +29,8 @@ config option, for example, in the `file` output plugin: ## Files to write to, "stdout" is a specially handled file. files = ["stdout"] - ## Data format to output. This can be "influx" or "graphite" + ## Data format to output. + ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md @@ -53,7 +54,8 @@ metrics are serialized directly into InfluxDB line-protocol. ## Files to write to, "stdout" is a specially handled file. files = ["stdout", "/tmp/metrics.out"] - ## Data format to output. This can be "influx" or "graphite" + ## Data format to output. + ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md @@ -87,7 +89,8 @@ tars.cpu-total.us-east-1.cpu.usage_idle 98.09 1455320690 ## Files to write to, "stdout" is a specially handled file. files = ["stdout", "/tmp/metrics.out"] - ## Data format to output. This can be "influx" or "graphite" + ## Data format to output. + ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md @@ -95,3 +98,38 @@ tars.cpu-total.us-east-1.cpu.usage_idle 98.09 1455320690 prefix = "telegraf" ``` + +## Json: + +The Json data format serialized Telegraf metrics in json format. The format is: + +```json +{ + "fields":{ + "field_1":30, + "field_2":4, + "field_N":59, + "n_images":660 + }, + "name":"docker", + "tags":{ + "host":"raynor" + }, + "timestamp":1458229140 +} +``` + +#### Json Configuration: + +```toml +[[outputs.file]] + ## Files to write to, "stdout" is a specially handled file. + files = ["stdout", "/tmp/metrics.out"] + + ## Data format to output. + + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md + data_format = "json" +``` diff --git a/docs/WINDOWS_SERVICE.md b/docs/WINDOWS_SERVICE.md new file mode 100644 index 000000000..679a41527 --- /dev/null +++ b/docs/WINDOWS_SERVICE.md @@ -0,0 +1,36 @@ +# Running Telegraf as a Windows Service + +If you have tried to install Go binaries as Windows Services with the **sc.exe** +tool you may have seen that the service errors and stops running after a while. + +**NSSM** (the Non-Sucking Service Manager) is a tool that helps you in a +[number of scenarios](http://nssm.cc/scenarios) including running Go binaries +that were not specifically designed to run only in Windows platforms. + +## NSSM Installation via Chocolatey + +You can install [Chocolatey](https://chocolatey.org/) and [NSSM](http://nssm.cc/) +with these commands + +```powershell +iex ((new-object net.webclient).DownloadString('https://chocolatey.org/install.ps1')) +choco install -y nssm +``` + +## Installing Telegraf as a Windows Service with NSSM + +You can download the latest Telegraf Windows binaries (still Experimental at +the moment) from [the Telegraf Github repo](https://github.com/influxdata/telegraf). + +Then you can create a C:\telegraf folder, unzip the binary there and modify the +**telegraf.conf** sample to allocate the metrics you want to send to **InfluxDB**. + +Once you have NSSM installed in your system, the process is quite straightforward. +You only need to type this command in your Windows shell + +```powershell +nssm install Telegraf c:\telegraf\telegraf.exe -config c:\telegraf\telegraf.config +``` + +And now your service will be installed in Windows and you will be able to start and +stop it gracefully \ No newline at end of file diff --git a/etc/telegraf.conf b/etc/telegraf.conf index d8a295442..1b534d888 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -1,18 +1,26 @@ -# Telegraf configuration - +# Telegraf Configuration +# # Telegraf is entirely plugin driven. All metrics are gathered from the # declared inputs, and sent to the declared outputs. - +# # Plugins must be declared in here to be active. # To deactivate a plugin, comment out the name and any variables. - +# # Use 'telegraf -config telegraf.conf -test' to see what metrics a config # file would generate. +# +# Environment variables can be used anywhere in this config file, simply prepend +# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), +# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) + # Global tags can be specified here in key="value" format. [global_tags] # dc = "us-east-1" # will tag all metrics with dc=us-east-1 # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + # user = "$USER" + # Configuration for telegraf agent [agent] @@ -24,7 +32,7 @@ ## Telegraf will cache metric_buffer_limit metrics for each output, and will ## flush this buffer on a successful write. - metric_buffer_limit = 10000 + metric_buffer_limit = 1000 ## Flush the buffer whenever full, regardless of flush_interval. flush_buffer_when_full = true @@ -48,23 +56,27 @@ quiet = false ## Override default hostname, if empty use os.Hostname() hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false ############################################################################### -# OUTPUTS # +# OUTPUT PLUGINS # ############################################################################### # Configuration for influxdb server to send metrics to [[outputs.influxdb]] - # The full HTTP or UDP endpoint URL for your InfluxDB instance. - # Multiple urls can be specified but it is assumed that they are part of the same - # cluster, this means that only ONE of the urls will be written to each interval. + ## The full HTTP or UDP endpoint URL for your InfluxDB instance. + ## Multiple urls can be specified as part of the same cluster, + ## this means that only ONE of the urls will be written to each interval. # urls = ["udp://localhost:8089"] # UDP endpoint example urls = ["http://localhost:8086"] # required - # The target database for metrics (telegraf will create it if not exists) + ## The target database for metrics (telegraf will create it if not exists). database = "telegraf" # required - # Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h". - # note: using second precision greatly helps InfluxDB compression + ## Retention policy to write to. + retention_policy = "default" + ## Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h". + ## note: using "s" precision greatly improves InfluxDB compression. precision = "s" ## Write timeout (for the InfluxDB client), formatted as a string. @@ -72,57 +84,1230 @@ timeout = "5s" # username = "telegraf" # password = "metricsmetricsmetricsmetrics" - # Set the user agent for HTTP POSTs (can be useful for log differentiation) + ## Set the user agent for HTTP POSTs (can be useful for log differentiation) # user_agent = "telegraf" - # Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) + ## Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) # udp_payload = 512 + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false + + +# # Configuration for Amon Server to send metrics to. +# [[outputs.amon]] +# ## Amon Server Key +# server_key = "my-server-key" # required. +# +# ## Amon Instance URL +# amon_instance = "https://youramoninstance" # required +# +# ## Connection timeout. +# # timeout = "5s" + + +# # Configuration for the AMQP server to send metrics to +# [[outputs.amqp]] +# ## AMQP url +# url = "amqp://localhost:5672/influxdb" +# ## AMQP exchange +# exchange = "telegraf" +# ## Auth method. PLAIN and EXTERNAL are supported +# # auth_method = "PLAIN" +# ## Telegraf tag to use as a routing key +# ## ie, if this tag exists, it's value will be used as the routing key +# routing_tag = "host" +# +# ## InfluxDB retention policy +# # retention_policy = "default" +# ## InfluxDB database +# # database = "telegraf" +# ## InfluxDB precision +# # precision = "s" +# +# ## Optional SSL Config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Data format to output. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# data_format = "influx" + + +# # Configuration for AWS CloudWatch output. +# [[outputs.cloudwatch]] +# ## Amazon REGION +# region = 'us-east-1' +# +# ## Namespace for the CloudWatch MetricDatums +# namespace = 'InfluxData/Telegraf' + + +# # Configuration for DataDog API to send metrics to. +# [[outputs.datadog]] +# ## Datadog API key +# apikey = "my-secret-key" # required. +# +# ## Connection timeout. +# # timeout = "5s" + + +# # Send telegraf metrics to file(s) +# [[outputs.file]] +# ## Files to write to, "stdout" is a specially handled file. +# files = ["stdout", "/tmp/metrics.out"] +# +# ## Data format to output. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# data_format = "influx" + + +# # Configuration for Graphite server to send metrics to +# [[outputs.graphite]] +# ## TCP endpoint for your graphite instance. +# servers = ["localhost:2003"] +# ## Prefix metrics name +# prefix = "" +# ## timeout in seconds for the write connection to graphite +# timeout = 2 + + +# # Configuration for the Kafka server to send metrics to +# [[outputs.kafka]] +# ## URLs of kafka brokers +# brokers = ["localhost:9092"] +# ## Kafka topic for producer messages +# topic = "telegraf" +# ## Telegraf tag to use as a routing key +# ## ie, if this tag exists, it's value will be used as the routing key +# routing_tag = "host" +# +# ## CompressionCodec represents the various compression codecs recognized by +# ## Kafka in messages. +# ## 0 : No compression +# ## 1 : Gzip compression +# ## 2 : Snappy compression +# compression_codec = 0 +# +# ## RequiredAcks is used in Produce Requests to tell the broker how many +# ## replica acknowledgements it must see before responding +# ## 0 : the producer never waits for an acknowledgement from the broker. +# ## This option provides the lowest latency but the weakest durability +# ## guarantees (some data will be lost when a server fails). +# ## 1 : the producer gets an acknowledgement after the leader replica has +# ## received the data. This option provides better durability as the +# ## client waits until the server acknowledges the request as successful +# ## (only messages that were written to the now-dead leader but not yet +# ## replicated will be lost). +# ## -1: the producer gets an acknowledgement after all in-sync replicas have +# ## received the data. This option provides the best durability, we +# ## guarantee that no messages will be lost as long as at least one in +# ## sync replica remains. +# required_acks = -1 +# +# ## The total number of times to retry sending a message +# max_retry = 3 +# +# ## Optional SSL Config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Data format to output. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# data_format = "influx" + + +# # Configuration for the AWS Kinesis output. +# [[outputs.kinesis]] +# ## Amazon REGION of kinesis endpoint. +# region = "ap-southeast-2" +# ## Kinesis StreamName must exist prior to starting telegraf. +# streamname = "StreamName" +# ## PartitionKey as used for sharding data. +# partitionkey = "PartitionKey" +# ## format of the Data payload in the kinesis PutRecord, supported +# ## String and Custom. +# format = "string" +# ## debug will show upstream aws messages. +# debug = false + + +# # Configuration for Librato API to send metrics to. +# [[outputs.librato]] +# ## Librator API Docs +# ## http://dev.librato.com/v1/metrics-authentication +# +# ## Librato API user +# api_user = "telegraf@influxdb.com" # required. +# +# ## Librato API token +# api_token = "my-secret-token" # required. +# +# ### Debug +# # debug = false +# +# ### Tag Field to populate source attribute (optional) +# ### This is typically the _hostname_ from which the metric was obtained. +# source_tag = "host" +# +# ## Connection timeout. +# # timeout = "5s" + + +# # Configuration for MQTT server to send metrics to +# [[outputs.mqtt]] +# servers = ["localhost:1883"] # required. +# +# ## MQTT outputs send metrics to this topic format +# ## "///" +# ## ex: prefix/web01.example.com/mem +# topic_prefix = "telegraf" +# +# ## username and password to connect MQTT server. +# # username = "telegraf" +# # password = "metricsmetricsmetricsmetrics" +# +# ## Optional SSL Config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Data format to output. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# data_format = "influx" + + +# # Send telegraf measurements to NSQD +# [[outputs.nsq]] +# ## Location of nsqd instance listening on TCP +# server = "localhost:4150" +# ## NSQ topic for producer messages +# topic = "telegraf" +# +# ## Data format to output. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md +# data_format = "influx" + + +# # Configuration for OpenTSDB server to send metrics to +# [[outputs.opentsdb]] +# ## prefix for metrics keys +# prefix = "my.specific.prefix." +# +# ## Telnet Mode ## +# ## DNS name of the OpenTSDB server in telnet mode +# host = "opentsdb.example.com" +# +# ## Port of the OpenTSDB server in telnet mode +# port = 4242 +# +# ## Debug true - Prints OpenTSDB communication +# debug = false + + +# # Configuration for the Prometheus client to spawn +# [[outputs.prometheus_client]] +# ## Address to listen on +# # listen = ":9126" + + +# # Configuration for the Riemann server to send metrics to +# [[outputs.riemann]] +# ## URL of server +# url = "localhost:5555" +# ## transport protocol to use either tcp or udp +# transport = "tcp" +# ## separator to use between input name and field name in Riemann service name +# separator = " " + + ############################################################################### -# INPUTS # +# INPUT PLUGINS # ############################################################################### # Read metrics about cpu usage [[inputs.cpu]] - # Whether to report per-cpu stats or not + ## Whether to report per-cpu stats or not percpu = true - # Whether to report total system cpu stats or not + ## Whether to report total system cpu stats or not totalcpu = true - # Comment this line if you want the raw CPU time metrics + ## Comment this line if you want the raw CPU time metrics fielddrop = ["time_*"] + # Read metrics about disk usage by mount point [[inputs.disk]] - # By default, telegraf gather stats for all mountpoints. - # Setting mountpoints will restrict the stats to the specified mountpoints. - # mount_points=["/"] + ## By default, telegraf gather stats for all mountpoints. + ## Setting mountpoints will restrict the stats to the specified mountpoints. + # mount_points = ["/"] - # Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually - # present on /run, /var/run, /dev/shm or /dev). + ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually + ## present on /run, /var/run, /dev/shm or /dev). ignore_fs = ["tmpfs", "devtmpfs"] + # Read metrics about disk IO by device [[inputs.diskio]] - # By default, telegraf will gather stats for all devices including - # disk partitions. - # Setting devices will restrict the stats to the specified devices. + ## By default, telegraf will gather stats for all devices including + ## disk partitions. + ## Setting devices will restrict the stats to the specified devices. # devices = ["sda", "sdb"] - # Uncomment the following line if you do not need disk serial numbers. + ## Uncomment the following line if you do not need disk serial numbers. # skip_serial_number = true + +# Get kernel statistics from /proc/stat +[[inputs.kernel]] + # no configuration + + # Read metrics about memory usage [[inputs.mem]] # no configuration + +# Get the number of processes and group them by status +[[inputs.processes]] + # no configuration + + # Read metrics about swap memory usage [[inputs.swap]] # no configuration + # Read metrics about system load & uptime [[inputs.system]] # no configuration +# # Read stats from an aerospike server +# [[inputs.aerospike]] +# ## Aerospike servers to connect to (with port) +# ## This plugin will query all namespaces the aerospike +# ## server has configured and get stats for them. +# servers = ["localhost:3000"] + + +# # Read Apache status information (mod_status) +# [[inputs.apache]] +# ## An array of Apache status URI to gather stats. +# urls = ["http://localhost/server-status?auto"] + + +# # Read metrics of bcache from stats_total and dirty_data +# [[inputs.bcache]] +# ## Bcache sets path +# ## If not specified, then default is: +# bcachePath = "/sys/fs/bcache" +# +# ## By default, telegraf gather stats for all bcache devices +# ## Setting devices will restrict the stats to the specified +# ## bcache devices. +# bcacheDevs = ["bcache0"] + + +# # Pull Metric Statistics from Amazon CloudWatch +# [[inputs.cloudwatch]] +# ## Amazon Region +# region = 'us-east-1' +# +# ## Requested CloudWatch aggregation Period (required - must be a multiple of 60s) +# period = '1m' +# +# ## Collection Delay (required - must account for metrics availability via CloudWatch API) +# delay = '1m' +# +# ## Recomended: use metric 'interval' that is a multiple of 'period' to avoid +# ## gaps or overlap in pulled data +# interval = '1m' +# +# ## Metric Statistic Namespace (required) +# namespace = 'AWS/ELB' +# +# ## Metrics to Pull (optional) +# ## Defaults to all Metrics in Namespace if nothing is provided +# ## Refreshes Namespace available metrics every 1h +# #[[inputs.cloudwatch.metrics]] +# # names = ['Latency', 'RequestCount'] +# # +# # ## Dimension filters for Metric (optional) +# # [[inputs.cloudwatch.metrics.dimensions]] +# # name = 'LoadBalancerName' +# # value = 'p-example' + + +# # Read metrics from one or many couchbase clusters +# [[inputs.couchbase]] +# ## specify servers via a url matching: +# ## [protocol://][:password]@address[:port] +# ## e.g. +# ## http://couchbase-0.example.com/ +# ## http://admin:secret@couchbase-0.example.com:8091/ +# ## +# ## If no servers are specified, then localhost is used as the host. +# ## If no protocol is specifed, HTTP is used. +# ## If no port is specified, 8091 is used. +# servers = ["http://localhost:8091"] + + +# # Read CouchDB Stats from one or more servers +# [[inputs.couchdb]] +# ## Works with CouchDB stats endpoints out of the box +# ## Multiple HOSTs from which to read CouchDB stats: +# hosts = ["http://localhost:8086/_stats"] + + +# # Read metrics from one or many disque servers +# [[inputs.disque]] +# ## An array of URI to gather stats about. Specify an ip or hostname +# ## with optional port and password. +# ## ie disque://localhost, disque://10.10.3.33:18832, 10.0.0.1:10000, etc. +# ## If no servers are specified, then localhost is used as the host. +# servers = ["localhost"] + + +# # Query given DNS server and gives statistics +# [[inputs.dns_query]] +# ## servers to query +# servers = ["8.8.8.8"] # required +# +# ## Domains or subdomains to query. "."(root) is default +# domains = ["."] # optional +# +# ## Query record type. Default is "A" +# ## Posible values: A, AAAA, CNAME, MX, NS, PTR, TXT, SOA, SPF, SRV. +# record_type = "A" # optional +# +# ## Dns server port. 53 is default +# port = 53 # optional +# +# ## Query timeout in seconds. Default is 2 seconds +# timeout = 2 # optional + + +# # Read metrics about docker containers +# [[inputs.docker]] +# ## Docker Endpoint +# ## To use TCP, set endpoint = "tcp://[ip]:[port]" +# ## To use environment variables (ie, docker-machine), set endpoint = "ENV" +# endpoint = "unix:///var/run/docker.sock" +# ## Only collect metrics for these containers, collect all if empty +# container_names = [] + + +# # Read statistics from one or many dovecot servers +# [[inputs.dovecot]] +# ## specify dovecot servers via an address:port list +# ## e.g. +# ## localhost:24242 +# ## +# ## If no servers are specified, then localhost is used as the host. +# servers = ["localhost:24242"] +# ## Only collect metrics for these domains, collect all if empty +# domains = [] + + +# # Read stats from one or more Elasticsearch servers or clusters +# [[inputs.elasticsearch]] +# ## specify a list of one or more Elasticsearch servers +# servers = ["http://localhost:9200"] +# +# ## set local to false when you want to read the indices stats from all nodes +# ## within the cluster +# local = true +# +# ## set cluster_health to true when you want to also obtain cluster level stats +# cluster_health = false + + +# # Read metrics from one or more commands that can output to stdout +# [[inputs.exec]] +# ## Commands array +# commands = ["/tmp/test.sh", "/usr/bin/mycollector --foo=bar"] +# +# ## measurement name suffix (for separating different commands) +# name_suffix = "_mycollector" +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Read metrics of haproxy, via socket or csv stats page +# [[inputs.haproxy]] +# ## An array of address to gather stats about. Specify an ip on hostname +# ## with optional port. ie localhost, 10.10.3.33:1936, etc. +# +# ## If no servers are specified, then default to 127.0.0.1:1936 +# servers = ["http://myhaproxy.com:1936", "http://anotherhaproxy.com:1936"] +# ## Or you can also use local socket(not work yet) +# ## servers = ["socket://run/haproxy/admin.sock"] + + +# # Read flattened metrics from one or more JSON HTTP endpoints +# [[inputs.httpjson]] +# ## NOTE This plugin only reads numerical measurements, strings and booleans +# ## will be ignored. +# +# ## a name for the service being polled +# name = "webserver_stats" +# +# ## URL of each server in the service's cluster +# servers = [ +# "http://localhost:9999/stats/", +# "http://localhost:9998/stats/", +# ] +# +# ## HTTP method to use: GET or POST (case-sensitive) +# method = "GET" +# +# ## List of tag names to extract from top-level of JSON server response +# # tag_keys = [ +# # "my_tag_1", +# # "my_tag_2" +# # ] +# +# ## HTTP parameters (all values must be strings) +# [inputs.httpjson.parameters] +# event_type = "cpu_spike" +# threshold = "0.75" +# +# ## HTTP Header parameters (all values must be strings) +# # [inputs.httpjson.headers] +# # X-Auth-Token = "my-xauth-token" +# # apiVersion = "v1" +# +# ## Optional SSL Config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false + + +# # Read InfluxDB-formatted JSON metrics from one or more HTTP endpoints +# [[inputs.influxdb]] +# ## Works with InfluxDB debug endpoints out of the box, +# ## but other services can use this format too. +# ## See the influxdb plugin's README for more details. +# +# ## Multiple URLs from which to read InfluxDB-formatted JSON +# urls = [ +# "http://localhost:8086/debug/vars" +# ] + + +# # Read metrics from one or many bare metal servers +# [[inputs.ipmi_sensor]] +# ## specify servers via a url matching: +# ## [username[:password]@][protocol[(address)]] +# ## e.g. +# ## root:passwd@lan(127.0.0.1) +# ## +# servers = ["USERID:PASSW0RD@lan(192.168.1.1)"] + + +# # Read JMX metrics through Jolokia +# [[inputs.jolokia]] +# ## This is the context root used to compose the jolokia url +# context = "/jolokia/read" +# +# ## List of servers exposing jolokia read service +# [[inputs.jolokia.servers]] +# name = "stable" +# host = "192.168.103.2" +# port = "8180" +# # username = "myuser" +# # password = "mypassword" +# +# ## List of metrics collected on above servers +# ## Each metric consists in a name, a jmx path and either +# ## a pass or drop slice attribute. +# ## This collect all heap memory usage metrics. +# [[inputs.jolokia.metrics]] +# name = "heap_memory_usage" +# jmx = "/java.lang:type=Memory/HeapMemoryUsage" + + +# # Read metrics from a LeoFS Server via SNMP +# [[inputs.leofs]] +# ## An array of URI to gather stats about LeoFS. +# ## Specify an ip or hostname with port. ie 127.0.0.1:4020 +# servers = ["127.0.0.1:4021"] + + +# # Read metrics from local Lustre service on OST, MDS +# [[inputs.lustre2]] +# ## An array of /proc globs to search for Lustre stats +# ## If not specified, the default will work on Lustre 2.5.x +# ## +# # ost_procfiles = [ +# # "/proc/fs/lustre/obdfilter/*/stats", +# # "/proc/fs/lustre/osd-ldiskfs/*/stats" +# # ] +# # mds_procfiles = ["/proc/fs/lustre/mdt/*/md_stats"] + + +# # Gathers metrics from the /3.0/reports MailChimp API +# [[inputs.mailchimp]] +# ## MailChimp API key +# ## get from https://admin.mailchimp.com/account/api/ +# api_key = "" # required +# ## Reports for campaigns sent more than days_old ago will not be collected. +# ## 0 means collect all. +# days_old = 0 +# ## Campaign ID to get, if empty gets all campaigns, this option overrides days_old +# # campaign_id = "" + + +# # Read metrics from one or many memcached servers +# [[inputs.memcached]] +# ## An array of address to gather stats about. Specify an ip on hostname +# ## with optional port. ie localhost, 10.0.0.1:11211, etc. +# servers = ["localhost:11211"] +# # unix_sockets = ["/var/run/memcached.sock"] + + +# # Telegraf plugin for gathering metrics from N Mesos masters +# [[inputs.mesos]] +# # Timeout, in ms. +# timeout = 100 +# # A list of Mesos masters, default value is localhost:5050. +# masters = ["localhost:5050"] +# # Metrics groups to be collected, by default, all enabled. +# master_collections = [ +# "resources", +# "master", +# "system", +# "slaves", +# "frameworks", +# "messages", +# "evqueue", +# "registrar", +# ] + + +# # Read metrics from one or many MongoDB servers +# [[inputs.mongodb]] +# ## An array of URI to gather stats about. Specify an ip or hostname +# ## with optional port add password. ie, +# ## mongodb://user:auth_key@10.10.3.30:27017, +# ## mongodb://10.10.3.33:18832, +# ## 10.0.0.1:10000, etc. +# servers = ["127.0.0.1:27017"] + + +# # Read metrics from one or many mysql servers +# [[inputs.mysql]] +# ## specify servers via a url matching: +# ## [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify]] +# ## see https://github.com/go-sql-driver/mysql#dsn-data-source-name +# ## e.g. +# ## root:passwd@tcp(127.0.0.1:3306)/?tls=false +# ## root@tcp(127.0.0.1:3306)/?tls=false +# ## +# ## If no servers are specified, then localhost is used as the host. +# servers = ["tcp(127.0.0.1:3306)/"] + + +# # Read metrics about network interface usage +# [[inputs.net]] +# ## By default, telegraf gathers stats from any up interface (excluding loopback) +# ## Setting interfaces will tell it to gather these explicit interfaces, +# ## regardless of status. +# ## +# # interfaces = ["eth0"] + + +# # TCP or UDP 'ping' given url and collect response time in seconds +# [[inputs.net_response]] +# ## Protocol, must be "tcp" or "udp" +# protocol = "tcp" +# ## Server address (default localhost) +# address = "github.com:80" +# ## Set timeout (default 1.0 seconds) +# timeout = 1.0 +# ## Set read timeout (default 1.0 seconds) +# read_timeout = 1.0 +# ## Optional string sent to the server +# # send = "ssh" +# ## Optional expected string in answer +# # expect = "ssh" + + +# # Read TCP metrics such as established, time wait and sockets counts. +# [[inputs.netstat]] +# # no configuration + + +# # Read Nginx's basic status information (ngx_http_stub_status_module) +# [[inputs.nginx]] +# ## An array of Nginx stub_status URI to gather stats. +# urls = ["http://localhost/status"] + + +# # Read NSQ topic and channel statistics. +# [[inputs.nsq]] +# ## An array of NSQD HTTP API endpoints +# endpoints = ["http://localhost:4151"] + + +# # Get standard NTP query metrics, requires ntpq executable. +# [[inputs.ntpq]] +# ## If false, set the -n ntpq flag. Can reduce metric gather time. +# dns_lookup = true + + +# # Read metrics of passenger using passenger-status +# [[inputs.passenger]] +# ## Path of passenger-status. +# ## +# ## Plugin gather metric via parsing XML output of passenger-status +# ## More information about the tool: +# ## https://www.phusionpassenger.com/library/admin/apache/overall_status_report.html +# ## +# ## If no path is specified, then the plugin simply execute passenger-status +# ## hopefully it can be found in your PATH +# command = "passenger-status -v --show=xml" + + +# # Read metrics of phpfpm, via HTTP status page or socket +# [[inputs.phpfpm]] +# ## An array of addresses to gather stats about. Specify an ip or hostname +# ## with optional port and path +# ## +# ## Plugin can be configured in three modes (either can be used): +# ## - http: the URL must start with http:// or https://, ie: +# ## "http://localhost/status" +# ## "http://192.168.130.1/status?full" +# ## +# ## - unixsocket: path to fpm socket, ie: +# ## "/var/run/php5-fpm.sock" +# ## or using a custom fpm status path: +# ## "/var/run/php5-fpm.sock:fpm-custom-status-path" +# ## +# ## - fcgi: the URL must start with fcgi:// or cgi://, and port must be present, ie: +# ## "fcgi://10.0.0.12:9000/status" +# ## "cgi://10.0.10.12:9001/status" +# ## +# ## Example of multiple gathering from local socket and remove host +# ## urls = ["http://192.168.1.20/status", "/tmp/fpm.sock"] +# urls = ["http://localhost/status"] + + +# # Ping given url(s) and return statistics +# [[inputs.ping]] +# ## NOTE: this plugin forks the ping command. You may need to set capabilities +# ## via setcap cap_net_raw+p /bin/ping +# +# ## urls to ping +# urls = ["www.google.com"] # required +# ## number of pings to send (ping -c ) +# count = 1 # required +# ## interval, in s, at which to ping. 0 == default (ping -i ) +# ping_interval = 0.0 +# ## ping timeout, in s. 0 == no timeout (ping -t ) +# timeout = 0.0 +# ## interface to send ping from (ping -I ) +# interface = "" + + +# # Read metrics from one or many postgresql servers +# [[inputs.postgresql]] +# ## specify address via a url matching: +# ## postgres://[pqgotest[:password]]@localhost[/dbname]\ +# ## ?sslmode=[disable|verify-ca|verify-full] +# ## or a simple string: +# ## host=localhost user=pqotest password=... sslmode=... dbname=app_production +# ## +# ## All connection parameters are optional. +# ## +# ## Without the dbname parameter, the driver will default to a database +# ## with the same name as the user. This dbname is just for instantiating a +# ## connection with the server and doesn't restrict the databases we are trying +# ## to grab metrics for. +# ## +# address = "host=localhost user=postgres sslmode=disable" +# +# ## A list of databases to pull metrics about. If not specified, metrics for all +# ## databases are gathered. +# # databases = ["app_production", "testing"] + + +# # Read metrics from one or many postgresql servers +# [[inputs.postgresql_extensible]] +# ## specify address via a url matching: +# ## postgres://[pqgotest[:password]]@localhost[/dbname]\ +# ## ?sslmode=[disable|verify-ca|verify-full] +# ## or a simple string: +# ## host=localhost user=pqotest password=... sslmode=... dbname=app_production +# # +# ## All connection parameters are optional. # +# ## Without the dbname parameter, the driver will default to a database +# ## with the same name as the user. This dbname is just for instantiating a +# ## connection with the server and doesn't restrict the databases we are trying +# ## to grab metrics for. +# # +# address = "host=localhost user=postgres sslmode=disable" +# ## A list of databases to pull metrics about. If not specified, metrics for all +# ## databases are gathered. +# ## databases = ["app_production", "testing"] +# # +# ## Define the toml config where the sql queries are stored +# ## New queries can be added, if the withdbname is set to true and there is no +# ## databases defined in the 'databases field', the sql query is ended by a +# ## 'is not null' in order to make the query succeed. +# ## Example : +# ## The sqlquery : "SELECT * FROM pg_stat_database where datname" become +# ## "SELECT * FROM pg_stat_database where datname IN ('postgres', 'pgbench')" +# ## because the databases variable was set to ['postgres', 'pgbench' ] and the +# ## withdbname was true. Be careful that if the withdbname is set to false you +# ## don't have to define the where clause (aka with the dbname) the tagvalue +# ## field is used to define custom tags (separated by comas) +# # +# ## Structure : +# ## [[inputs.postgresql_extensible.query]] +# ## sqlquery string +# ## version string +# ## withdbname boolean +# ## tagvalue string (coma separated) +# [[inputs.postgresql_extensible.query]] +# sqlquery="SELECT * FROM pg_stat_database" +# version=901 +# withdbname=false +# tagvalue="" +# [[inputs.postgresql_extensible.query]] +# sqlquery="SELECT * FROM pg_stat_bgwriter" +# version=901 +# withdbname=false +# tagvalue="" + + +# # Read metrics from one or many PowerDNS servers +# [[inputs.powerdns]] +# ## An array of sockets to gather stats about. +# ## Specify a path to unix socket. +# unix_sockets = ["/var/run/pdns.controlsocket"] + + +# # Monitor process cpu and memory usage +# [[inputs.procstat]] +# ## Must specify one of: pid_file, exe, or pattern +# ## PID file to monitor process +# pid_file = "/var/run/nginx.pid" +# ## executable name (ie, pgrep ) +# # exe = "nginx" +# ## pattern as argument for pgrep (ie, pgrep -f ) +# # pattern = "nginx" +# ## user as argument for pgrep (ie, pgrep -u ) +# # user = "nginx" +# +# ## Field name prefix +# prefix = "" +# ## comment this out if you want raw cpu_time stats +# fielddrop = ["cpu_time_*"] + + +# # Read metrics from one or many prometheus clients +# [[inputs.prometheus]] +# ## An array of urls to scrape metrics from. +# urls = ["http://localhost:9100/metrics"] +# +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false +# ## Use bearer token for authorization +# # bearer_token = /path/to/bearer/token + + +# # Reads last_run_summary.yaml file and converts to measurments +# [[inputs.puppetagent]] +# ## Location of puppet last run summary file +# location = "/var/lib/puppet/state/last_run_summary.yaml" + + +# # Read metrics from one or many RabbitMQ servers via the management API +# [[inputs.rabbitmq]] +# url = "http://localhost:15672" # required +# # name = "rmq-server-1" # optional tag +# # username = "guest" +# # password = "guest" +# +# ## A list of nodes to pull metrics about. If not specified, metrics for +# ## all nodes are gathered. +# # nodes = ["rabbit@node1", "rabbit@node2"] + + +# # Read raindrops stats (raindrops - real-time stats for preforking Rack servers) +# [[inputs.raindrops]] +# ## An array of raindrops middleware URI to gather stats. +# urls = ["http://localhost:8080/_raindrops"] + + +# # Read metrics from one or many redis servers +# [[inputs.redis]] +# ## specify servers via a url matching: +# ## [protocol://][:password]@address[:port] +# ## e.g. +# ## tcp://localhost:6379 +# ## tcp://:password@192.168.99.100 +# ## +# ## If no servers are specified, then localhost is used as the host. +# ## If no port is specified, 6379 is used +# servers = ["tcp://localhost:6379"] + + +# # Read metrics from one or many RethinkDB servers +# [[inputs.rethinkdb]] +# ## An array of URI to gather stats about. Specify an ip or hostname +# ## with optional port add password. ie, +# ## rethinkdb://user:auth_key@10.10.3.30:28105, +# ## rethinkdb://10.10.3.33:18832, +# ## 10.0.0.1:10000, etc. +# servers = ["127.0.0.1:28015"] + + +# # Read metrics one or many Riak servers +# [[inputs.riak]] +# # Specify a list of one or more riak http servers +# servers = ["http://localhost:8098"] + + +# # Reads oids value from one or many snmp agents +# [[inputs.snmp]] +# ## Use 'oids.txt' file to translate oids to names +# ## To generate 'oids.txt' you need to run: +# ## snmptranslate -m all -Tz -On | sed -e 's/"//g' > /tmp/oids.txt +# ## Or if you have an other MIB folder with custom MIBs +# ## snmptranslate -M /mycustommibfolder -Tz -On -m all | sed -e 's/"//g' > oids.txt +# snmptranslate_file = "/tmp/oids.txt" +# [[inputs.snmp.host]] +# address = "192.168.2.2:161" +# # SNMP community +# community = "public" # default public +# # SNMP version (1, 2 or 3) +# # Version 3 not supported yet +# version = 2 # default 2 +# # SNMP response timeout +# timeout = 2.0 # default 2.0 +# # SNMP request retries +# retries = 2 # default 2 +# # Which get/bulk do you want to collect for this host +# collect = ["mybulk", "sysservices", "sysdescr"] +# # Simple list of OIDs to get, in addition to "collect" +# get_oids = [] +# +# [[inputs.snmp.host]] +# address = "192.168.2.3:161" +# community = "public" +# version = 2 +# timeout = 2.0 +# retries = 2 +# collect = ["mybulk"] +# get_oids = [ +# "ifNumber", +# ".1.3.6.1.2.1.1.3.0", +# ] +# +# [[inputs.snmp.get]] +# name = "ifnumber" +# oid = "ifNumber" +# +# [[inputs.snmp.get]] +# name = "interface_speed" +# oid = "ifSpeed" +# instance = "0" +# +# [[inputs.snmp.get]] +# name = "sysuptime" +# oid = ".1.3.6.1.2.1.1.3.0" +# unit = "second" +# +# [[inputs.snmp.bulk]] +# name = "mybulk" +# max_repetition = 127 +# oid = ".1.3.6.1.2.1.1" +# +# [[inputs.snmp.bulk]] +# name = "ifoutoctets" +# max_repetition = 127 +# oid = "ifOutOctets" +# +# [[inputs.snmp.host]] +# address = "192.168.2.13:161" +# #address = "127.0.0.1:161" +# community = "public" +# version = 2 +# timeout = 2.0 +# retries = 2 +# #collect = ["mybulk", "sysservices", "sysdescr", "systype"] +# collect = ["sysuptime" ] +# [[inputs.snmp.host.table]] +# name = "iftable3" +# include_instances = ["enp5s0", "eth1"] +# +# # SNMP TABLEs +# # table without mapping neither subtables +# [[inputs.snmp.table]] +# name = "iftable1" +# oid = ".1.3.6.1.2.1.31.1.1.1" +# +# # table without mapping but with subtables +# [[inputs.snmp.table]] +# name = "iftable2" +# oid = ".1.3.6.1.2.1.31.1.1.1" +# sub_tables = [".1.3.6.1.2.1.2.2.1.13"] +# +# # table with mapping but without subtables +# [[inputs.snmp.table]] +# name = "iftable3" +# oid = ".1.3.6.1.2.1.31.1.1.1" +# # if empty. get all instances +# mapping_table = ".1.3.6.1.2.1.31.1.1.1.1" +# # if empty, get all subtables +# +# # table with both mapping and subtables +# [[inputs.snmp.table]] +# name = "iftable4" +# oid = ".1.3.6.1.2.1.31.1.1.1" +# # if empty get all instances +# mapping_table = ".1.3.6.1.2.1.31.1.1.1.1" +# # if empty get all subtables +# # sub_tables could be not "real subtables" +# sub_tables=[".1.3.6.1.2.1.2.2.1.13", "bytes_recv", "bytes_send"] + + +# # Read metrics from Microsoft SQL Server +# [[inputs.sqlserver]] +# ## Specify instances to monitor with a list of connection strings. +# ## All connection parameters are optional. +# ## By default, the host is localhost, listening on default port, TCP 1433. +# ## for Windows, the user is the currently running AD user (SSO). +# ## See https://github.com/denisenkom/go-mssqldb for detailed connection +# ## parameters. +# # servers = [ +# # "Server=192.168.1.10;Port=1433;User Id=;Password=;app name=telegraf;log=1;", +# # ] + + +# # Inserts sine and cosine waves for demonstration purposes +# [[inputs.trig]] +# ## Set the amplitude +# amplitude = 10.0 + + +# # Read Twemproxy stats data +# [[inputs.twemproxy]] +# ## Twemproxy stats address and port (no scheme) +# addr = "localhost:22222" +# ## Monitor pool name +# pools = ["redis_pool", "mc_pool"] + + +# # Read metrics of ZFS from arcstats, zfetchstats and vdev_cache_stats +# [[inputs.zfs]] +# ## ZFS kstat path +# ## If not specified, then default is: +# kstatPath = "/proc/spl/kstat/zfs" +# +# ## By default, telegraf gather all zfs stats +# ## If not specified, then default is: +# kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] +# +# ## By default, don't gather zpool stats +# poolMetrics = false + + +# # Reads 'mntr' stats from one or many zookeeper servers +# [[inputs.zookeeper]] +# ## An array of address to gather stats about. Specify an ip or hostname +# ## with port. ie localhost:2181, 10.0.0.1:2181, etc. +# +# ## If no servers are specified, then localhost is used as the host. +# ## If no port is specified, 2181 is used +# servers = [":2181"] + + + ############################################################################### -# SERVICE INPUTS # +# SERVICE INPUT PLUGINS # ############################################################################### + +# # A Github Webhook Event collector +# [[inputs.github_webhooks]] +# ## Address and port to host Webhook listener on +# service_address = ":1618" + + +# # Read metrics from Kafka topic(s) +# [[inputs.kafka_consumer]] +# ## topic(s) to consume +# topics = ["telegraf"] +# ## an array of Zookeeper connection strings +# zookeeper_peers = ["localhost:2181"] +# ## Zookeeper Chroot +# zookeeper_chroot = "/" +# ## the name of the consumer group +# consumer_group = "telegraf_metrics_consumers" +# ## Offset (must be either "oldest" or "newest") +# offset = "oldest" +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Read metrics from MQTT topic(s) +# [[inputs.mqtt_consumer]] +# servers = ["localhost:1883"] +# ## MQTT QoS, must be 0, 1, or 2 +# qos = 0 +# +# ## Topics to subscribe to +# topics = [ +# "telegraf/host01/cpu", +# "telegraf/+/mem", +# "sensors/#", +# ] +# +# # if true, messages that can't be delivered while the subscriber is offline +# # will be delivered when it comes back (such as on service restart). +# # NOTE: if true, client_id MUST be set +# persistent_session = false +# # If empty, a random client ID will be generated. +# client_id = "" +# +# ## username and password to connect MQTT server. +# # username = "telegraf" +# # password = "metricsmetricsmetricsmetrics" +# +# ## Optional SSL Config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Read metrics from NATS subject(s) +# [[inputs.nats_consumer]] +# ## urls of NATS servers +# servers = ["nats://localhost:4222"] +# ## Use Transport Layer Security +# secure = false +# ## subject(s) to consume +# subjects = ["telegraf"] +# ## name a queue group +# queue_group = "telegraf_consumers" +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Statsd Server +# [[inputs.statsd]] +# ## Address and port to host UDP listener on +# service_address = ":8125" +# ## Delete gauges every interval (default=false) +# delete_gauges = false +# ## Delete counters every interval (default=false) +# delete_counters = false +# ## Delete sets every interval (default=false) +# delete_sets = false +# ## Delete timings & histograms every interval (default=true) +# delete_timings = true +# ## Percentiles to calculate for timing & histogram stats +# percentiles = [90] +# +# ## separator to use between elements of a statsd metric +# metric_separator = "_" +# +# ## Parses tags in the datadog statsd format +# ## http://docs.datadoghq.com/guides/dogstatsd/ +# parse_data_dog_tags = false +# +# ## Statsd data translation templates, more info can be read here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite +# # templates = [ +# # "cpu.* measurement*" +# # ] +# +# ## Number of UDP messages allowed to queue up, once filled, +# ## the statsd server will start dropping packets +# allowed_pending_messages = 10000 +# +# ## Number of timing/histogram values to track per-measurement in the +# ## calculation of percentiles. Raising this limit increases the accuracy +# ## of percentiles but also increases the memory usage and cpu time. +# percentile_limit = 1000 + + +# # Generic TCP listener +# [[inputs.tcp_listener]] +# ## Address and port to host TCP listener on +# service_address = ":8094" +# +# ## Number of TCP messages allowed to queue up. Once filled, the +# ## TCP listener will start dropping packets. +# allowed_pending_messages = 10000 +# +# ## Maximum number of concurrent TCP connections to allow +# max_tcp_connections = 250 +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + + +# # Generic UDP listener +# [[inputs.udp_listener]] +# ## Address and port to host UDP listener on +# service_address = ":8092" +# +# ## Number of UDP messages allowed to queue up. Once filled, the +# ## UDP listener will start dropping packets. +# allowed_pending_messages = 10000 +# +# ## Data format to consume. +# ## Each data format has it's own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +# data_format = "influx" + diff --git a/etc/telegraf_windows.conf b/etc/telegraf_windows.conf index 7e66cb209..9ce067c39 100644 --- a/etc/telegraf_windows.conf +++ b/etc/telegraf_windows.conf @@ -24,7 +24,7 @@ ## Telegraf will cache metric_buffer_limit metrics for each output, and will ## flush this buffer on a successful write. - metric_buffer_limit = 10000 + metric_buffer_limit = 1000 ## Flush the buffer whenever full, regardless of flush_interval. flush_buffer_when_full = true diff --git a/internal/config/config.go b/internal/config/config.go index b5b73e06e..1e07234e8 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1,11 +1,14 @@ package config import ( + "bytes" "errors" "fmt" "io/ioutil" "log" + "os" "path/filepath" + "regexp" "sort" "strings" "time" @@ -19,7 +22,20 @@ import ( "github.com/influxdata/telegraf/plugins/serializers" "github.com/influxdata/config" - "github.com/naoina/toml/ast" + "github.com/influxdata/toml" + "github.com/influxdata/toml/ast" +) + +var ( + // Default input plugins + inputDefaults = []string{"cpu", "mem", "swap", "system", "kernel", + "processes", "disk", "diskio"} + + // Default output plugins + outputDefaults = []string{"influxdb"} + + // envVarRe is a regex to find environment variables in the config file + envVarRe = regexp.MustCompile(`\$\w+`) ) // Config specifies the URL/user/password for the database that telegraf @@ -97,8 +113,9 @@ type AgentConfig struct { Debug bool // Quiet is the option for running in quiet mode - Quiet bool - Hostname string + Quiet bool + Hostname string + OmitHostname bool } // Inputs returns a list of strings of the configured inputs. @@ -134,20 +151,28 @@ func (c *Config) ListTags() string { } var header = `# Telegraf Configuration - +# # Telegraf is entirely plugin driven. All metrics are gathered from the # declared inputs, and sent to the declared outputs. - +# # Plugins must be declared in here to be active. # To deactivate a plugin, comment out the name and any variables. - +# # Use 'telegraf -config telegraf.conf -test' to see what metrics a config # file would generate. +# +# Environment variables can be used anywhere in this config file, simply prepend +# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), +# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) + # Global tags can be specified here in key="value" format. [global_tags] # dc = "us-east-1" # will tag all metrics with dc=us-east-1 # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + # user = "$USER" + # Configuration for telegraf agent [agent] @@ -159,7 +184,7 @@ var header = `# Telegraf Configuration ## Telegraf will cache metric_buffer_limit metrics for each output, and will ## flush this buffer on a successful write. - metric_buffer_limit = 10000 + metric_buffer_limit = 1000 ## Flush the buffer whenever full, regardless of flush_interval. flush_buffer_when_full = true @@ -183,34 +208,111 @@ var header = `# Telegraf Configuration quiet = false ## Override default hostname, if empty use os.Hostname() hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false -# -# OUTPUTS: -# - +############################################################################### +# OUTPUT PLUGINS # +############################################################################### ` -var pluginHeader = ` -# -# INPUTS: -# +var inputHeader = ` + +############################################################################### +# INPUT PLUGINS # +############################################################################### ` var serviceInputHeader = ` -# -# SERVICE INPUTS: -# + +############################################################################### +# SERVICE INPUT PLUGINS # +############################################################################### ` // PrintSampleConfig prints the sample config -func PrintSampleConfig(pluginFilters []string, outputFilters []string) { +func PrintSampleConfig(inputFilters []string, outputFilters []string) { fmt.Printf(header) + if len(outputFilters) != 0 { + printFilteredOutputs(outputFilters, false) + } else { + printFilteredOutputs(outputDefaults, false) + // Print non-default outputs, commented + var pnames []string + for pname := range outputs.Outputs { + if !sliceContains(pname, outputDefaults) { + pnames = append(pnames, pname) + } + } + sort.Strings(pnames) + printFilteredOutputs(pnames, true) + } + + fmt.Printf(inputHeader) + if len(inputFilters) != 0 { + printFilteredInputs(inputFilters, false) + } else { + printFilteredInputs(inputDefaults, false) + // Print non-default inputs, commented + var pnames []string + for pname := range inputs.Inputs { + if !sliceContains(pname, inputDefaults) { + pnames = append(pnames, pname) + } + } + sort.Strings(pnames) + printFilteredInputs(pnames, true) + } +} + +func printFilteredInputs(inputFilters []string, commented bool) { + // Filter inputs + var pnames []string + for pname := range inputs.Inputs { + if sliceContains(pname, inputFilters) { + pnames = append(pnames, pname) + } + } + sort.Strings(pnames) + + // cache service inputs to print them at the end + servInputs := make(map[string]telegraf.ServiceInput) + // for alphabetical looping: + servInputNames := []string{} + + // Print Inputs + for _, pname := range pnames { + creator := inputs.Inputs[pname] + input := creator() + + switch p := input.(type) { + case telegraf.ServiceInput: + servInputs[pname] = p + servInputNames = append(servInputNames, pname) + continue + } + + printConfig(pname, input, "inputs", commented) + } + + // Print Service Inputs + if len(servInputs) == 0 { + return + } + sort.Strings(servInputNames) + fmt.Printf(serviceInputHeader) + for _, name := range servInputNames { + printConfig(name, servInputs[name], "inputs", commented) + } +} + +func printFilteredOutputs(outputFilters []string, commented bool) { // Filter outputs var onames []string for oname := range outputs.Outputs { - if len(outputFilters) == 0 || sliceContains(oname, outputFilters) { + if sliceContains(oname, outputFilters) { onames = append(onames, oname) } } @@ -220,38 +322,7 @@ func PrintSampleConfig(pluginFilters []string, outputFilters []string) { for _, oname := range onames { creator := outputs.Outputs[oname] output := creator() - printConfig(oname, output, "outputs") - } - - // Filter inputs - var pnames []string - for pname := range inputs.Inputs { - if len(pluginFilters) == 0 || sliceContains(pname, pluginFilters) { - pnames = append(pnames, pname) - } - } - sort.Strings(pnames) - - // Print Inputs - fmt.Printf(pluginHeader) - servInputs := make(map[string]telegraf.ServiceInput) - for _, pname := range pnames { - creator := inputs.Inputs[pname] - input := creator() - - switch p := input.(type) { - case telegraf.ServiceInput: - servInputs[pname] = p - continue - } - - printConfig(pname, input, "inputs") - } - - // Print Service Inputs - fmt.Printf(serviceInputHeader) - for name, input := range servInputs { - printConfig(name, input, "inputs") + printConfig(oname, output, "outputs", commented) } } @@ -260,13 +331,26 @@ type printer interface { SampleConfig() string } -func printConfig(name string, p printer, op string) { - fmt.Printf("\n# %s\n[[%s.%s]]", p.Description(), op, name) +func printConfig(name string, p printer, op string, commented bool) { + comment := "" + if commented { + comment = "# " + } + fmt.Printf("\n%s# %s\n%s[[%s.%s]]", comment, p.Description(), comment, + op, name) + config := p.SampleConfig() if config == "" { - fmt.Printf("\n # no configuration\n") + fmt.Printf("\n%s # no configuration\n\n", comment) } else { - fmt.Printf(config) + lines := strings.Split(config, "\n") + for i, line := range lines { + if i == 0 || i == len(lines)-1 { + fmt.Print("\n") + continue + } + fmt.Print(comment + line + "\n") + } } } @@ -282,7 +366,7 @@ func sliceContains(name string, list []string) bool { // PrintInputConfig prints the config usage of a single input. func PrintInputConfig(name string) error { if creator, ok := inputs.Inputs[name]; ok { - printConfig(name, creator(), "inputs") + printConfig(name, creator(), "inputs", false) } else { return errors.New(fmt.Sprintf("Input %s not found", name)) } @@ -292,7 +376,7 @@ func PrintInputConfig(name string) error { // PrintOutputConfig prints the config usage of a single output. func PrintOutputConfig(name string) error { if creator, ok := outputs.Outputs[name]; ok { - printConfig(name, creator(), "outputs") + printConfig(name, creator(), "outputs", false) } else { return errors.New(fmt.Sprintf("Output %s not found", name)) } @@ -322,44 +406,44 @@ func (c *Config) LoadDirectory(path string) error { // LoadConfig loads the given config file and applies it to c func (c *Config) LoadConfig(path string) error { - tbl, err := config.ParseFile(path) + tbl, err := parseFile(path) if err != nil { - return err + return fmt.Errorf("Error parsing %s, %s", path, err) } for name, val := range tbl.Fields { subTable, ok := val.(*ast.Table) if !ok { - return errors.New("invalid configuration") + return fmt.Errorf("%s: invalid configuration", path) } switch name { case "agent": if err = config.UnmarshalTable(subTable, c.Agent); err != nil { log.Printf("Could not parse [agent] config\n") - return err + return fmt.Errorf("Error parsing %s, %s", path, err) } case "global_tags", "tags": if err = config.UnmarshalTable(subTable, c.Tags); err != nil { log.Printf("Could not parse [global_tags] config\n") - return err + return fmt.Errorf("Error parsing %s, %s", path, err) } case "outputs": for pluginName, pluginVal := range subTable.Fields { switch pluginSubTable := pluginVal.(type) { case *ast.Table: if err = c.addOutput(pluginName, pluginSubTable); err != nil { - return err + return fmt.Errorf("Error parsing %s, %s", path, err) } case []*ast.Table: for _, t := range pluginSubTable { if err = c.addOutput(pluginName, t); err != nil { - return err + return fmt.Errorf("Error parsing %s, %s", path, err) } } default: - return fmt.Errorf("Unsupported config format: %s", - pluginName) + return fmt.Errorf("Unsupported config format: %s, file %s", + pluginName, path) } } case "inputs", "plugins": @@ -367,30 +451,50 @@ func (c *Config) LoadConfig(path string) error { switch pluginSubTable := pluginVal.(type) { case *ast.Table: if err = c.addInput(pluginName, pluginSubTable); err != nil { - return err + return fmt.Errorf("Error parsing %s, %s", path, err) } case []*ast.Table: for _, t := range pluginSubTable { if err = c.addInput(pluginName, t); err != nil { - return err + return fmt.Errorf("Error parsing %s, %s", path, err) } } default: - return fmt.Errorf("Unsupported config format: %s", - pluginName) + return fmt.Errorf("Unsupported config format: %s, file %s", + pluginName, path) } } // Assume it's an input input for legacy config file support if no other // identifiers are present default: if err = c.addInput(name, subTable); err != nil { - return err + return fmt.Errorf("Error parsing %s, %s", path, err) } } } return nil } +// parseFile loads a TOML configuration from a provided path and +// returns the AST produced from the TOML parser. When loading the file, it +// will find environment variables and replace them. +func parseFile(fpath string) (*ast.Table, error) { + contents, err := ioutil.ReadFile(fpath) + if err != nil { + return nil, err + } + + env_vars := envVarRe.FindAll(contents, -1) + for _, env_var := range env_vars { + env_val := os.Getenv(strings.TrimPrefix(string(env_var), "$")) + if env_val != "" { + contents = bytes.Replace(contents, env_var, []byte(env_val), 1) + } + } + + return toml.Parse(contents) +} + func (c *Config) addOutput(name string, table *ast.Table) error { if len(c.OutputFilters) > 0 && !sliceContains(name, c.OutputFilters) { return nil @@ -701,12 +805,21 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { } } + if node, ok := tbl.Fields["data_type"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.String); ok { + c.DataType = str.Value + } + } + } + c.MetricName = name delete(tbl.Fields, "data_format") delete(tbl.Fields, "separator") delete(tbl.Fields, "templates") delete(tbl.Fields, "tag_keys") + delete(tbl.Fields, "data_type") return parsers.NewParser(c) } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index f0add8b98..d78a8d6b8 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -1,6 +1,7 @@ package config import ( + "os" "testing" "time" @@ -10,9 +11,52 @@ import ( "github.com/influxdata/telegraf/plugins/inputs/memcached" "github.com/influxdata/telegraf/plugins/inputs/procstat" "github.com/influxdata/telegraf/plugins/parsers" + "github.com/stretchr/testify/assert" ) +func TestConfig_LoadSingleInputWithEnvVars(t *testing.T) { + c := NewConfig() + err := os.Setenv("MY_TEST_SERVER", "192.168.1.1") + assert.NoError(t, err) + err = os.Setenv("TEST_INTERVAL", "10s") + assert.NoError(t, err) + c.LoadConfig("./testdata/single_plugin_env_vars.toml") + + memcached := inputs.Inputs["memcached"]().(*memcached.Memcached) + memcached.Servers = []string{"192.168.1.1"} + + mConfig := &internal_models.InputConfig{ + Name: "memcached", + Filter: internal_models.Filter{ + NameDrop: []string{"metricname2"}, + NamePass: []string{"metricname1"}, + FieldDrop: []string{"other", "stuff"}, + FieldPass: []string{"some", "strings"}, + TagDrop: []internal_models.TagFilter{ + internal_models.TagFilter{ + Name: "badtag", + Filter: []string{"othertag"}, + }, + }, + TagPass: []internal_models.TagFilter{ + internal_models.TagFilter{ + Name: "goodtag", + Filter: []string{"mytag"}, + }, + }, + IsActive: true, + }, + Interval: 10 * time.Second, + } + mConfig.Tags = make(map[string]string) + + assert.Equal(t, memcached, c.Inputs[0].Input, + "Testdata did not produce a correct memcached struct.") + assert.Equal(t, mConfig, c.Inputs[0].Config, + "Testdata did not produce correct memcached metadata.") +} + func TestConfig_LoadSingleInput(t *testing.T) { c := NewConfig() c.LoadConfig("./testdata/single_plugin.toml") diff --git a/internal/config/testdata/single_plugin_env_vars.toml b/internal/config/testdata/single_plugin_env_vars.toml new file mode 100644 index 000000000..6600a77b3 --- /dev/null +++ b/internal/config/testdata/single_plugin_env_vars.toml @@ -0,0 +1,11 @@ +[[inputs.memcached]] + servers = ["$MY_TEST_SERVER"] + namepass = ["metricname1"] + namedrop = ["metricname2"] + fieldpass = ["some", "strings"] + fielddrop = ["other", "stuff"] + interval = "$TEST_INTERVAL" + [inputs.memcached.tagpass] + goodtag = ["mytag"] + [inputs.memcached.tagdrop] + badtag = ["othertag"] diff --git a/internal/internal.go b/internal/internal.go index 82758e5e8..ff73aae84 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -11,6 +11,7 @@ import ( "os" "strings" "time" + "unicode" ) const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" @@ -86,15 +87,15 @@ func GetTLSConfig( SSLCert, SSLKey, SSLCA string, InsecureSkipVerify bool, ) (*tls.Config, error) { - t := &tls.Config{} - if SSLCert != "" && SSLKey != "" && SSLCA != "" { - cert, err := tls.LoadX509KeyPair(SSLCert, SSLKey) - if err != nil { - return nil, errors.New(fmt.Sprintf( - "Could not load TLS client key/certificate: %s", - err)) - } + if SSLCert == "" && SSLKey == "" && SSLCA == "" && !InsecureSkipVerify { + return nil, nil + } + t := &tls.Config{ + InsecureSkipVerify: InsecureSkipVerify, + } + + if SSLCA != "" { caCert, err := ioutil.ReadFile(SSLCA) if err != nil { return nil, errors.New(fmt.Sprintf("Could not load TLS CA: %s", @@ -103,23 +104,42 @@ func GetTLSConfig( caCertPool := x509.NewCertPool() caCertPool.AppendCertsFromPEM(caCert) - - t = &tls.Config{ - Certificates: []tls.Certificate{cert}, - RootCAs: caCertPool, - InsecureSkipVerify: InsecureSkipVerify, - } - } else { - if InsecureSkipVerify { - t.InsecureSkipVerify = true - } else { - return nil, nil - } + t.RootCAs = caCertPool } + + if SSLCert != "" && SSLKey != "" { + cert, err := tls.LoadX509KeyPair(SSLCert, SSLKey) + if err != nil { + return nil, errors.New(fmt.Sprintf( + "Could not load TLS client key/certificate: %s", + err)) + } + + t.Certificates = []tls.Certificate{cert} + t.BuildNameToCertificate() + } + // will be nil by default if nothing is provided return t, nil } +// SnakeCase converts the given string to snake case following the Golang format: +// acronyms are converted to lower-case and preceded by an underscore. +func SnakeCase(in string) string { + runes := []rune(in) + length := len(runes) + + var out []rune + for i := 0; i < length; i++ { + if i > 0 && unicode.IsUpper(runes[i]) && ((i+1 < length && unicode.IsLower(runes[i+1])) || unicode.IsLower(runes[i-1])) { + out = append(out, '_') + } + out = append(out, unicode.ToLower(runes[i])) + } + + return string(out) +} + // Glob will test a string pattern, potentially containing globs, against a // subject string. The result is a simple true/false, determining whether or // not the glob pattern matched the subject text. diff --git a/internal/internal_test.go b/internal/internal_test.go index 7f0c687a8..e4a5eed14 100644 --- a/internal/internal_test.go +++ b/internal/internal_test.go @@ -42,3 +42,32 @@ func TestGlob(t *testing.T) { testGlobNoMatch(t, pattern, "this_is_a_test") } } + +type SnakeTest struct { + input string + output string +} + +var tests = []SnakeTest{ + {"a", "a"}, + {"snake", "snake"}, + {"A", "a"}, + {"ID", "id"}, + {"MOTD", "motd"}, + {"Snake", "snake"}, + {"SnakeTest", "snake_test"}, + {"APIResponse", "api_response"}, + {"SnakeID", "snake_id"}, + {"SnakeIDGoogle", "snake_id_google"}, + {"LinuxMOTD", "linux_motd"}, + {"OMGWTFBBQ", "omgwtfbbq"}, + {"omg_wtf_bbq", "omg_wtf_bbq"}, +} + +func TestSnakeCase(t *testing.T) { + for _, test := range tests { + if SnakeCase(test.input) != test.output { + t.Errorf(`SnakeCase("%s"), wanted "%s", got \%s"`, test.input, test.output, SnakeCase(test.input)) + } + } +} diff --git a/internal/models/running_output.go b/internal/models/running_output.go index 37b479dfb..1e3d44a61 100644 --- a/internal/models/running_output.go +++ b/internal/models/running_output.go @@ -10,7 +10,7 @@ import ( const ( // Default number of metrics kept between flushes. - DEFAULT_METRIC_BUFFER_LIMIT = 10000 + DEFAULT_METRIC_BUFFER_LIMIT = 1000 // Limit how many full metric buffers are kept due to failed writes. FULL_METRIC_BUFFERS_LIMIT = 100 @@ -82,9 +82,11 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) { } } } else { - log.Printf("WARNING: overwriting cached metrics, you may want to " + - "increase the metric_buffer_limit setting in your [agent] " + - "config if you do not wish to overwrite metrics.\n") + if ro.overwriteI == 0 { + log.Printf("WARNING: overwriting cached metrics, you may want to " + + "increase the metric_buffer_limit setting in your [agent] " + + "config if you do not wish to overwrite metrics.\n") + } if ro.overwriteI == len(ro.metrics) { ro.overwriteI = 0 } @@ -119,6 +121,9 @@ func (ro *RunningOutput) Write() error { } func (ro *RunningOutput) write(metrics []telegraf.Metric) error { + if len(metrics) == 0 { + return nil + } start := time.Now() err := ro.Output.Write(metrics) elapsed := time.Since(start) diff --git a/plugins/inputs/EXAMPLE_README.md b/plugins/inputs/EXAMPLE_README.md index 9207cd2ab..6bebf1e88 100644 --- a/plugins/inputs/EXAMPLE_README.md +++ b/plugins/inputs/EXAMPLE_README.md @@ -30,8 +30,6 @@ The example plugin gathers metrics about example things ### Example Output: -Give an example `-test` output here - ``` $ ./telegraf -config telegraf.conf -input-filter example -test measurement1,tag1=foo,tag2=bar field1=1i,field2=2.1 1453831884664956455 diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 5af18fcff..52ee6c13d 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -4,6 +4,8 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/aerospike" _ "github.com/influxdata/telegraf/plugins/inputs/apache" _ "github.com/influxdata/telegraf/plugins/inputs/bcache" + _ "github.com/influxdata/telegraf/plugins/inputs/cloudwatch" + _ "github.com/influxdata/telegraf/plugins/inputs/couchbase" _ "github.com/influxdata/telegraf/plugins/inputs/couchdb" _ "github.com/influxdata/telegraf/plugins/inputs/disque" _ "github.com/influxdata/telegraf/plugins/inputs/dns_query" @@ -15,6 +17,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/haproxy" _ "github.com/influxdata/telegraf/plugins/inputs/httpjson" _ "github.com/influxdata/telegraf/plugins/inputs/influxdb" + _ "github.com/influxdata/telegraf/plugins/inputs/ipmi_sensor" _ "github.com/influxdata/telegraf/plugins/inputs/jolokia" _ "github.com/influxdata/telegraf/plugins/inputs/kafka_consumer" _ "github.com/influxdata/telegraf/plugins/inputs/leofs" @@ -29,10 +32,12 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/net_response" _ "github.com/influxdata/telegraf/plugins/inputs/nginx" _ "github.com/influxdata/telegraf/plugins/inputs/nsq" + _ "github.com/influxdata/telegraf/plugins/inputs/ntpq" _ "github.com/influxdata/telegraf/plugins/inputs/passenger" _ "github.com/influxdata/telegraf/plugins/inputs/phpfpm" _ "github.com/influxdata/telegraf/plugins/inputs/ping" _ "github.com/influxdata/telegraf/plugins/inputs/postgresql" + _ "github.com/influxdata/telegraf/plugins/inputs/postgresql_extensible" _ "github.com/influxdata/telegraf/plugins/inputs/powerdns" _ "github.com/influxdata/telegraf/plugins/inputs/procstat" _ "github.com/influxdata/telegraf/plugins/inputs/prometheus" @@ -47,8 +52,10 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/sqlserver" _ "github.com/influxdata/telegraf/plugins/inputs/statsd" _ "github.com/influxdata/telegraf/plugins/inputs/system" + _ "github.com/influxdata/telegraf/plugins/inputs/tcp_listener" _ "github.com/influxdata/telegraf/plugins/inputs/trig" _ "github.com/influxdata/telegraf/plugins/inputs/twemproxy" + _ "github.com/influxdata/telegraf/plugins/inputs/udp_listener" _ "github.com/influxdata/telegraf/plugins/inputs/win_perf_counters" _ "github.com/influxdata/telegraf/plugins/inputs/zfs" _ "github.com/influxdata/telegraf/plugins/inputs/zookeeper" diff --git a/plugins/inputs/apache/apache.go b/plugins/inputs/apache/apache.go index b6e3e50f1..eba5a1188 100644 --- a/plugins/inputs/apache/apache.go +++ b/plugins/inputs/apache/apache.go @@ -58,7 +58,10 @@ var tr = &http.Transport{ ResponseHeaderTimeout: time.Duration(3 * time.Second), } -var client = &http.Client{Transport: tr} +var client = &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), +} func (n *Apache) gatherUrl(addr *url.URL, acc telegraf.Accumulator) error { resp, err := client.Get(addr.String()) diff --git a/plugins/inputs/cloudwatch/README.md b/plugins/inputs/cloudwatch/README.md new file mode 100644 index 000000000..04501161d --- /dev/null +++ b/plugins/inputs/cloudwatch/README.md @@ -0,0 +1,86 @@ +# Amazon CloudWatch Statistics Input + +This plugin will pull Metric Statistics from Amazon CloudWatch. + +### Amazon Authentication + +This plugin uses a credential chain for Authentication with the CloudWatch +API endpoint. In the following order the plugin will attempt to authenticate. +1. [IAMS Role](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html) +2. [Environment Variables](https://github.com/aws/aws-sdk-go/wiki/configuring-sdk#environment-variables) +3. [Shared Credentials](https://github.com/aws/aws-sdk-go/wiki/configuring-sdk#shared-credentials-file) + +### Configuration: + +```toml +[[inputs.cloudwatch]] + ## Amazon Region (required) + region = 'us-east-1' + + ## Requested CloudWatch aggregation Period (required - must be a multiple of 60s) + period = '1m' + + ## Collection Delay (required - must account for metrics availability via CloudWatch API) + delay = '1m' + + ## Override global run interval (optional - defaults to global interval) + ## Recomended: use metric 'interval' that is a multiple of 'period' to avoid + ## gaps or overlap in pulled data + interval = '1m' + + ## Metric Statistic Namespace (required) + namespace = 'AWS/ELB' + + ## Metrics to Pull (optional) + ## Defaults to all Metrics in Namespace if nothing is provided + ## Refreshes Namespace available metrics every 1h + [[inputs.cloudwatch.metrics]] + names = ['Latency', 'RequestCount'] + + ## Dimension filters for Metric (optional) + [[inputs.cloudwatch.metrics.dimensions]] + name = 'LoadBalancerName' + value = 'p-example' +``` +#### Requirements and Terminology + +Plugin Configuration utilizes [CloudWatch concepts](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html) and access pattern to allow monitoring of any CloudWatch Metric. + +- `region` must be a valid AWS [Region](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#CloudWatchRegions) value +- `period` must be a valid CloudWatch [Period](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#CloudWatchPeriods) value +- `namespace` must be a valid CloudWatch [Namespace](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#Namespace) value +- `names` must be valid CloudWatch [Metric](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#Metric) names +- `dimensions` must be valid CloudWatch [Dimension](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#Dimension) name/value pairs + +#### Restrictions and Limitations +- CloudWatch metrics are not available instantly via the CloudWatch API. You should adjust your collection `delay` to account for this lag in metrics availability based on your [monitoring subscription level](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html) +- CloudWatch API usage incurs cost - see [GetMetricStatistics Pricing](https://aws.amazon.com/cloudwatch/pricing/) + +### Measurements & Fields: + +Each CloudWatch Namespace monitored records a measurement with fields for each available Metric Statistic +Namespace and Metrics are represented in [snake case](https://en.wikipedia.org/wiki/Snake_case) + +- cloudwatch_{namespace} + - {metric}_sum (metric Sum value) + - {metric}_average (metric Average value) + - {metric}_minimum (metric Minimum value) + - {metric}_maximum (metric Maximum value) + - {metric}_sample_count (metric SampleCount value) + + +### Tags: +Each measurement is tagged with the following identifiers to uniquely identify the associated metric +Tag Dimension names are represented in [snake case](https://en.wikipedia.org/wiki/Snake_case) + +- All measurements have the following tags: + - region (CloudWatch Region) + - unit (CloudWatch Metric Unit) + - {dimension-name} (Cloudwatch Dimension value - one for each metric dimension) + +### Example Output: + +``` +$ ./telegraf -config telegraf.conf -input-filter cloudwatch -test +> cloudwatch_aws_elb,load_balancer_name=p-example,region=us-east-1,unit=seconds latency_average=0.004810798017284538,latency_maximum=0.1100282669067383,latency_minimum=0.0006084442138671875,latency_sample_count=4029,latency_sum=19.382705211639404 1459542420000000000 +``` diff --git a/plugins/inputs/cloudwatch/cloudwatch.go b/plugins/inputs/cloudwatch/cloudwatch.go new file mode 100644 index 000000000..e3fa74bad --- /dev/null +++ b/plugins/inputs/cloudwatch/cloudwatch.go @@ -0,0 +1,305 @@ +package cloudwatch + +import ( + "fmt" + "strings" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds" + "github.com/aws/aws-sdk-go/aws/ec2metadata" + "github.com/aws/aws-sdk-go/aws/session" + + "github.com/aws/aws-sdk-go/service/cloudwatch" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type ( + CloudWatch struct { + Region string `toml:"region"` + Period internal.Duration `toml:"period"` + Delay internal.Duration `toml:"delay"` + Namespace string `toml:"namespace"` + Metrics []*Metric `toml:"metrics"` + client cloudwatchClient + metricCache *MetricCache + } + + Metric struct { + MetricNames []string `toml:"names"` + Dimensions []*Dimension `toml:"dimensions"` + } + + Dimension struct { + Name string `toml:"name"` + Value string `toml:"value"` + } + + MetricCache struct { + TTL time.Duration + Fetched time.Time + Metrics []*cloudwatch.Metric + } + + cloudwatchClient interface { + ListMetrics(*cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error) + GetMetricStatistics(*cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) + } +) + +func (c *CloudWatch) SampleConfig() string { + return ` + ## Amazon Region + region = 'us-east-1' + + ## Requested CloudWatch aggregation Period (required - must be a multiple of 60s) + period = '1m' + + ## Collection Delay (required - must account for metrics availability via CloudWatch API) + delay = '1m' + + ## Recomended: use metric 'interval' that is a multiple of 'period' to avoid + ## gaps or overlap in pulled data + interval = '1m' + + ## Metric Statistic Namespace (required) + namespace = 'AWS/ELB' + + ## Metrics to Pull (optional) + ## Defaults to all Metrics in Namespace if nothing is provided + ## Refreshes Namespace available metrics every 1h + #[[inputs.cloudwatch.metrics]] + # names = ['Latency', 'RequestCount'] + # + # ## Dimension filters for Metric (optional) + # [[inputs.cloudwatch.metrics.dimensions]] + # name = 'LoadBalancerName' + # value = 'p-example' +` +} + +func (c *CloudWatch) Description() string { + return "Pull Metric Statistics from Amazon CloudWatch" +} + +func (c *CloudWatch) Gather(acc telegraf.Accumulator) error { + if c.client == nil { + c.initializeCloudWatch() + } + + var metrics []*cloudwatch.Metric + + // check for provided metric filter + if c.Metrics != nil { + metrics = []*cloudwatch.Metric{} + for _, m := range c.Metrics { + dimensions := make([]*cloudwatch.Dimension, len(m.Dimensions)) + for k, d := range m.Dimensions { + dimensions[k] = &cloudwatch.Dimension{ + Name: aws.String(d.Name), + Value: aws.String(d.Value), + } + } + for _, name := range m.MetricNames { + metrics = append(metrics, &cloudwatch.Metric{ + Namespace: aws.String(c.Namespace), + MetricName: aws.String(name), + Dimensions: dimensions, + }) + } + } + } else { + var err error + metrics, err = c.fetchNamespaceMetrics() + if err != nil { + return err + } + } + + metricCount := len(metrics) + var errChan = make(chan error, metricCount) + + now := time.Now() + + // limit concurrency or we can easily exhaust user connection limit + semaphore := make(chan byte, 64) + + for _, m := range metrics { + semaphore <- 0x1 + go c.gatherMetric(acc, m, now, semaphore, errChan) + } + + for i := 1; i <= metricCount; i++ { + err := <-errChan + if err != nil { + return err + } + } + return nil +} + +func init() { + inputs.Add("cloudwatch", func() telegraf.Input { + return &CloudWatch{} + }) +} + +/* + * Initialize CloudWatch client + */ +func (c *CloudWatch) initializeCloudWatch() error { + config := &aws.Config{ + Region: aws.String(c.Region), + Credentials: credentials.NewChainCredentials( + []credentials.Provider{ + &ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(session.New())}, + &credentials.EnvProvider{}, + &credentials.SharedCredentialsProvider{}, + }), + } + + c.client = cloudwatch.New(session.New(config)) + return nil +} + +/* + * Fetch available metrics for given CloudWatch Namespace + */ +func (c *CloudWatch) fetchNamespaceMetrics() (metrics []*cloudwatch.Metric, err error) { + if c.metricCache != nil && c.metricCache.IsValid() { + metrics = c.metricCache.Metrics + return + } + + metrics = []*cloudwatch.Metric{} + + var token *string + for more := true; more; { + params := &cloudwatch.ListMetricsInput{ + Namespace: aws.String(c.Namespace), + Dimensions: []*cloudwatch.DimensionFilter{}, + NextToken: token, + MetricName: nil, + } + + resp, err := c.client.ListMetrics(params) + if err != nil { + return nil, err + } + + metrics = append(metrics, resp.Metrics...) + + token = resp.NextToken + more = token != nil + } + + cacheTTL, _ := time.ParseDuration("1hr") + c.metricCache = &MetricCache{ + Metrics: metrics, + Fetched: time.Now(), + TTL: cacheTTL, + } + + return +} + +/* + * Gather given Metric and emit any error + */ +func (c *CloudWatch) gatherMetric(acc telegraf.Accumulator, metric *cloudwatch.Metric, now time.Time, semaphore chan byte, errChan chan error) { + params := c.getStatisticsInput(metric, now) + resp, err := c.client.GetMetricStatistics(params) + if err != nil { + errChan <- err + <-semaphore + return + } + + for _, point := range resp.Datapoints { + tags := map[string]string{ + "region": c.Region, + "unit": snakeCase(*point.Unit), + } + + for _, d := range metric.Dimensions { + tags[snakeCase(*d.Name)] = *d.Value + } + + // record field for each statistic + fields := map[string]interface{}{} + + if point.Average != nil { + fields[formatField(*metric.MetricName, cloudwatch.StatisticAverage)] = *point.Average + } + if point.Maximum != nil { + fields[formatField(*metric.MetricName, cloudwatch.StatisticMaximum)] = *point.Maximum + } + if point.Minimum != nil { + fields[formatField(*metric.MetricName, cloudwatch.StatisticMinimum)] = *point.Minimum + } + if point.SampleCount != nil { + fields[formatField(*metric.MetricName, cloudwatch.StatisticSampleCount)] = *point.SampleCount + } + if point.Sum != nil { + fields[formatField(*metric.MetricName, cloudwatch.StatisticSum)] = *point.Sum + } + + acc.AddFields(formatMeasurement(c.Namespace), fields, tags, *point.Timestamp) + } + + errChan <- nil + <-semaphore +} + +/* + * Formatting helpers + */ +func formatField(metricName string, statistic string) string { + return fmt.Sprintf("%s_%s", snakeCase(metricName), snakeCase(statistic)) +} + +func formatMeasurement(namespace string) string { + namespace = strings.Replace(namespace, "/", "_", -1) + namespace = snakeCase(namespace) + return fmt.Sprintf("cloudwatch_%s", namespace) +} + +func snakeCase(s string) string { + s = internal.SnakeCase(s) + s = strings.Replace(s, "__", "_", -1) + return s +} + +/* + * Map Metric to *cloudwatch.GetMetricStatisticsInput for given timeframe + */ +func (c *CloudWatch) getStatisticsInput(metric *cloudwatch.Metric, now time.Time) *cloudwatch.GetMetricStatisticsInput { + end := now.Add(-c.Delay.Duration) + + input := &cloudwatch.GetMetricStatisticsInput{ + StartTime: aws.Time(end.Add(-c.Period.Duration)), + EndTime: aws.Time(end), + MetricName: metric.MetricName, + Namespace: metric.Namespace, + Period: aws.Int64(int64(c.Period.Duration.Seconds())), + Dimensions: metric.Dimensions, + Statistics: []*string{ + aws.String(cloudwatch.StatisticAverage), + aws.String(cloudwatch.StatisticMaximum), + aws.String(cloudwatch.StatisticMinimum), + aws.String(cloudwatch.StatisticSum), + aws.String(cloudwatch.StatisticSampleCount)}, + } + return input +} + +/* + * Check Metric Cache validity + */ +func (c *MetricCache) IsValid() bool { + return c.Metrics != nil && time.Since(c.Fetched) < c.TTL +} diff --git a/plugins/inputs/cloudwatch/cloudwatch_test.go b/plugins/inputs/cloudwatch/cloudwatch_test.go new file mode 100644 index 000000000..8f8a3ad0b --- /dev/null +++ b/plugins/inputs/cloudwatch/cloudwatch_test.go @@ -0,0 +1,131 @@ +package cloudwatch + +import ( + "testing" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/cloudwatch" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +type mockCloudWatchClient struct{} + +func (m *mockCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error) { + metric := &cloudwatch.Metric{ + Namespace: params.Namespace, + MetricName: aws.String("Latency"), + Dimensions: []*cloudwatch.Dimension{ + &cloudwatch.Dimension{ + Name: aws.String("LoadBalancerName"), + Value: aws.String("p-example"), + }, + }, + } + + result := &cloudwatch.ListMetricsOutput{ + Metrics: []*cloudwatch.Metric{metric}, + } + return result, nil +} + +func (m *mockCloudWatchClient) GetMetricStatistics(params *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) { + dataPoint := &cloudwatch.Datapoint{ + Timestamp: params.EndTime, + Minimum: aws.Float64(0.1), + Maximum: aws.Float64(0.3), + Average: aws.Float64(0.2), + Sum: aws.Float64(123), + SampleCount: aws.Float64(100), + Unit: aws.String("Seconds"), + } + result := &cloudwatch.GetMetricStatisticsOutput{ + Label: aws.String("Latency"), + Datapoints: []*cloudwatch.Datapoint{dataPoint}, + } + return result, nil +} + +func TestGather(t *testing.T) { + duration, _ := time.ParseDuration("1m") + internalDuration := internal.Duration{ + Duration: duration, + } + c := &CloudWatch{ + Region: "us-east-1", + Namespace: "AWS/ELB", + Delay: internalDuration, + Period: internalDuration, + } + + var acc testutil.Accumulator + c.client = &mockCloudWatchClient{} + + c.Gather(&acc) + + fields := map[string]interface{}{} + fields["latency_minimum"] = 0.1 + fields["latency_maximum"] = 0.3 + fields["latency_average"] = 0.2 + fields["latency_sum"] = 123.0 + fields["latency_sample_count"] = 100.0 + + tags := map[string]string{} + tags["unit"] = "seconds" + tags["region"] = "us-east-1" + tags["load_balancer_name"] = "p-example" + + assert.True(t, acc.HasMeasurement("cloudwatch_aws_elb")) + acc.AssertContainsTaggedFields(t, "cloudwatch_aws_elb", fields, tags) + +} + +func TestGenerateStatisticsInputParams(t *testing.T) { + d := &cloudwatch.Dimension{ + Name: aws.String("LoadBalancerName"), + Value: aws.String("p-example"), + } + + m := &cloudwatch.Metric{ + MetricName: aws.String("Latency"), + Dimensions: []*cloudwatch.Dimension{d}, + } + + duration, _ := time.ParseDuration("1m") + internalDuration := internal.Duration{ + Duration: duration, + } + + c := &CloudWatch{ + Namespace: "AWS/ELB", + Delay: internalDuration, + Period: internalDuration, + } + + c.initializeCloudWatch() + + now := time.Now() + + params := c.getStatisticsInput(m, now) + + assert.EqualValues(t, *params.EndTime, now.Add(-c.Delay.Duration)) + assert.EqualValues(t, *params.StartTime, now.Add(-c.Period.Duration).Add(-c.Delay.Duration)) + assert.Len(t, params.Dimensions, 1) + assert.Len(t, params.Statistics, 5) + assert.EqualValues(t, *params.Period, 60) +} + +func TestMetricsCacheTimeout(t *testing.T) { + ttl, _ := time.ParseDuration("5ms") + cache := &MetricCache{ + Metrics: []*cloudwatch.Metric{}, + Fetched: time.Now(), + TTL: ttl, + } + + assert.True(t, cache.IsValid()) + time.Sleep(ttl) + assert.False(t, cache.IsValid()) +} diff --git a/plugins/inputs/couchbase/README.md b/plugins/inputs/couchbase/README.md new file mode 100644 index 000000000..6d654a0e2 --- /dev/null +++ b/plugins/inputs/couchbase/README.md @@ -0,0 +1,63 @@ +# Telegraf Plugin: Couchbase + +## Configuration: + +``` +# Read per-node and per-bucket metrics from Couchbase +[[inputs.couchbase]] + ## specify servers via a url matching: + ## [protocol://][:password]@address[:port] + ## e.g. + ## http://couchbase-0.example.com/ + ## http://admin:secret@couchbase-0.example.com:8091/ + ## + ## If no servers are specified, then localhost is used as the host. + ## If no protocol is specifed, HTTP is used. + ## If no port is specified, 8091 is used. + servers = ["http://localhost:8091"] +``` + +## Measurements: + +### couchbase_node + +Tags: +- cluster: whatever you called it in `servers` in the configuration, e.g.: `http://couchbase-0.example.com/` +- hostname: Couchbase's name for the node and port, e.g., `172.16.10.187:8091` + +Fields: +- memory_free (unit: bytes, example: 23181365248.0) +- memory_total (unit: bytes, example: 64424656896.0) + +### couchbase_bucket + +Tags: +- cluster: whatever you called it in `servers` in the configuration, e.g.: `http://couchbase-0.example.com/`) +- bucket: the name of the couchbase bucket, e.g., `blastro-df` + +Fields: +- quota_percent_used (unit: percent, example: 68.85424936294555) +- ops_per_sec (unit: count, example: 5686.789686789687) +- disk_fetches (unit: count, example: 0.0) +- item_count (unit: count, example: 943239752.0) +- disk_used (unit: bytes, example: 409178772321.0) +- data_used (unit: bytes, example: 212179309111.0) +- mem_used (unit: bytes, example: 202156957464.0) + + +## Example output + +``` +$ telegraf -config telegraf.conf -input-filter couchbase -test +* Plugin: couchbase, Collection 1 +> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.10.187:8091 memory_free=22927384576,memory_total=64424656896 1458381183695864929 +> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.10.65:8091 memory_free=23520161792,memory_total=64424656896 1458381183695972112 +> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.13.105:8091 memory_free=23531704320,memory_total=64424656896 1458381183695995259 +> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.13.173:8091 memory_free=23628767232,memory_total=64424656896 1458381183696010870 +> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.15.120:8091 memory_free=23616692224,memory_total=64424656896 1458381183696027406 +> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.8.127:8091 memory_free=23431770112,memory_total=64424656896 1458381183696041040 +> couchbase_node,cluster=https://couchbase-0.example.com/,hostname=172.16.8.148:8091 memory_free=23811371008,memory_total=64424656896 1458381183696059060 +> couchbase_bucket,bucket=default,cluster=https://couchbase-0.example.com/ data_used=25743360,disk_fetches=0,disk_used=31744886,item_count=0,mem_used=77729224,ops_per_sec=0,quota_percent_used=10.58976636614118 1458381183696210074 +> couchbase_bucket,bucket=demoncat,cluster=https://couchbase-0.example.com/ data_used=38157584951,disk_fetches=0,disk_used=62730302441,item_count=14662532,mem_used=24015304256,ops_per_sec=1207.753207753208,quota_percent_used=79.87855353525707 1458381183696242695 +> couchbase_bucket,bucket=blastro-df,cluster=https://couchbase-0.example.com/ data_used=212552491622,disk_fetches=0,disk_used=413323157621,item_count=944655680,mem_used=202421103760,ops_per_sec=1692.176692176692,quota_percent_used=68.9442170551845 1458381183696272206 +``` diff --git a/plugins/inputs/couchbase/couchbase.go b/plugins/inputs/couchbase/couchbase.go new file mode 100644 index 000000000..48e0c1a75 --- /dev/null +++ b/plugins/inputs/couchbase/couchbase.go @@ -0,0 +1,104 @@ +package couchbase + +import ( + couchbase "github.com/couchbase/go-couchbase" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" + "sync" +) + +type Couchbase struct { + Servers []string +} + +var sampleConfig = ` + ## specify servers via a url matching: + ## [protocol://][:password]@address[:port] + ## e.g. + ## http://couchbase-0.example.com/ + ## http://admin:secret@couchbase-0.example.com:8091/ + ## + ## If no servers are specified, then localhost is used as the host. + ## If no protocol is specifed, HTTP is used. + ## If no port is specified, 8091 is used. + servers = ["http://localhost:8091"] +` + +func (r *Couchbase) SampleConfig() string { + return sampleConfig +} + +func (r *Couchbase) Description() string { + return "Read metrics from one or many couchbase clusters" +} + +// Reads stats from all configured clusters. Accumulates stats. +// Returns one of the errors encountered while gathering stats (if any). +func (r *Couchbase) Gather(acc telegraf.Accumulator) error { + if len(r.Servers) == 0 { + r.gatherServer("http://localhost:8091/", acc, nil) + return nil + } + + var wg sync.WaitGroup + + var outerr error + + for _, serv := range r.Servers { + wg.Add(1) + go func(serv string) { + defer wg.Done() + outerr = r.gatherServer(serv, acc, nil) + }(serv) + } + + wg.Wait() + + return outerr +} + +func (r *Couchbase) gatherServer(addr string, acc telegraf.Accumulator, pool *couchbase.Pool) error { + if pool == nil { + client, err := couchbase.Connect(addr) + if err != nil { + return err + } + + // `default` is the only possible pool name. It's a + // placeholder for a possible future Couchbase feature. See + // http://stackoverflow.com/a/16990911/17498. + p, err := client.GetPool("default") + if err != nil { + return err + } + pool = &p + } + for i := 0; i < len(pool.Nodes); i++ { + node := pool.Nodes[i] + tags := map[string]string{"cluster": addr, "hostname": node.Hostname} + fields := make(map[string]interface{}) + fields["memory_free"] = node.MemoryFree + fields["memory_total"] = node.MemoryTotal + acc.AddFields("couchbase_node", fields, tags) + } + for bucketName, _ := range pool.BucketMap { + tags := map[string]string{"cluster": addr, "bucket": bucketName} + bs := pool.BucketMap[bucketName].BasicStats + fields := make(map[string]interface{}) + fields["quota_percent_used"] = bs["quotaPercentUsed"] + fields["ops_per_sec"] = bs["opsPerSec"] + fields["disk_fetches"] = bs["diskFetches"] + fields["item_count"] = bs["itemCount"] + fields["disk_used"] = bs["diskUsed"] + fields["data_used"] = bs["dataUsed"] + fields["mem_used"] = bs["memUsed"] + acc.AddFields("couchbase_bucket", fields, tags) + } + return nil +} + +func init() { + inputs.Add("couchbase", func() telegraf.Input { + return &Couchbase{} + }) +} diff --git a/plugins/inputs/couchbase/couchbase_test.go b/plugins/inputs/couchbase/couchbase_test.go new file mode 100644 index 000000000..8fda04d41 --- /dev/null +++ b/plugins/inputs/couchbase/couchbase_test.go @@ -0,0 +1,50 @@ +package couchbase + +import ( + "encoding/json" + couchbase "github.com/couchbase/go-couchbase" + "github.com/influxdata/telegraf/testutil" + "testing" +) + +func TestGatherServer(t *testing.T) { + var pool couchbase.Pool + if err := json.Unmarshal([]byte(poolsDefaultResponse), &pool); err != nil { + t.Fatal("parse poolsDefaultResponse", err) + } + + var bucket couchbase.Bucket + if err := json.Unmarshal([]byte(bucketResponse), &bucket); err != nil { + t.Fatal("parse bucketResponse", err) + } + pool.BucketMap = map[string]couchbase.Bucket{ + bucket.Name: bucket, + } + var cb Couchbase + var acc testutil.Accumulator + cb.gatherServer("mycluster", &acc, &pool) + acc.AssertContainsTaggedFields(t, "couchbase_node", + map[string]interface{}{"memory_free": 23181365248.0, "memory_total": 64424656896.0}, + map[string]string{"cluster": "mycluster", "hostname": "172.16.10.187:8091"}) + acc.AssertContainsTaggedFields(t, "couchbase_node", + map[string]interface{}{"memory_free": 23665811456.0, "memory_total": 64424656896.0}, + map[string]string{"cluster": "mycluster", "hostname": "172.16.10.65:8091"}) + acc.AssertContainsTaggedFields(t, "couchbase_bucket", + map[string]interface{}{ + "quota_percent_used": 68.85424936294555, + "ops_per_sec": 5686.789686789687, + "disk_fetches": 0.0, + "item_count": 943239752.0, + "disk_used": 409178772321.0, + "data_used": 212179309111.0, + "mem_used": 202156957464.0, + }, + map[string]string{"cluster": "mycluster", "bucket": "blastro-df"}) + +} + +// From `/pools/default` on a real cluster +const poolsDefaultResponse string = `{"storageTotals":{"ram":{"total":450972598272,"quotaTotal":360777252864,"quotaUsed":360777252864,"used":446826622976,"usedByData":255061495696,"quotaUsedPerNode":51539607552,"quotaTotalPerNode":51539607552},"hdd":{"total":1108766539776,"quotaTotal":1108766539776,"used":559135126484,"usedByData":515767865143,"free":498944942902}},"serverGroupsUri":"/pools/default/serverGroups?v=98656394","name":"default","alerts":["Metadata overhead warning. Over 63% of RAM allocated to bucket \"blastro-df\" on node \"172.16.8.148\" is taken up by keys and metadata.","Metadata overhead warning. Over 65% of RAM allocated to bucket \"blastro-df\" on node \"172.16.10.65\" is taken up by keys and metadata.","Metadata overhead warning. Over 64% of RAM allocated to bucket \"blastro-df\" on node \"172.16.13.173\" is taken up by keys and metadata.","Metadata overhead warning. Over 65% of RAM allocated to bucket \"blastro-df\" on node \"172.16.15.75\" is taken up by keys and metadata.","Metadata overhead warning. Over 65% of RAM allocated to bucket \"blastro-df\" on node \"172.16.13.105\" is taken up by keys and metadata.","Metadata overhead warning. Over 64% of RAM allocated to bucket \"blastro-df\" on node \"172.16.8.127\" is taken up by keys and metadata.","Metadata overhead warning. Over 63% of RAM allocated to bucket \"blastro-df\" on node \"172.16.15.120\" is taken up by keys and metadata.","Metadata overhead warning. Over 66% of RAM allocated to bucket \"blastro-df\" on node \"172.16.10.187\" is taken up by keys and metadata."],"alertsSilenceURL":"/controller/resetAlerts?token=2814&uuid=2bec87861652b990cf6aa5c7ee58c253","nodes":[{"systemStats":{"cpu_utilization_rate":35.43307086614173,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23181365248},"interestingStats":{"cmd_get":17.98201798201798,"couch_docs_actual_disk_size":68506048063,"couch_docs_data_size":38718796110,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":140158886,"curr_items_tot":279374646,"ep_bg_fetched":0.999000999000999,"get_hits":10.98901098901099,"mem_used":36497390640,"ops":829.1708291708292,"vb_replica_curr_items":139215760},"uptime":"341236","memoryTotal":64424656896,"memoryFree":23181365248,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"couchApiBase":"http://172.16.10.187:8092/","clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.10.187","thisNode":true,"hostname":"172.16.10.187:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"systemStats":{"cpu_utilization_rate":47.38255033557047,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23665811456},"interestingStats":{"cmd_get":172.8271728271728,"couch_docs_actual_disk_size":79360565405,"couch_docs_data_size":38736382876,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":140174377,"curr_items_tot":279383025,"ep_bg_fetched":0.999000999000999,"get_hits":167.8321678321678,"mem_used":36650059656,"ops":1685.314685314685,"vb_replica_curr_items":139208648},"uptime":"341210","memoryTotal":64424656896,"memoryFree":23665811456,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"couchApiBase":"http://172.16.10.65:8092/","clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.10.65","hostname":"172.16.10.65:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"systemStats":{"cpu_utilization_rate":25.5586592178771,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23726600192},"interestingStats":{"cmd_get":63.06306306306306,"couch_docs_actual_disk_size":79345105217,"couch_docs_data_size":38728086130,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":139195268,"curr_items_tot":279349113,"ep_bg_fetched":0,"get_hits":53.05305305305306,"mem_used":36476665576,"ops":1878.878878878879,"vb_replica_curr_items":140153845},"uptime":"341210","memoryTotal":64424656896,"memoryFree":23726600192,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"couchApiBase":"http://172.16.13.105:8092/","clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.13.105","hostname":"172.16.13.105:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"systemStats":{"cpu_utilization_rate":26.45803698435277,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23854841856},"interestingStats":{"cmd_get":51.05105105105105,"couch_docs_actual_disk_size":74465931949,"couch_docs_data_size":38723830730,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":139209869,"curr_items_tot":279380019,"ep_bg_fetched":0,"get_hits":47.04704704704704,"mem_used":36471784896,"ops":1831.831831831832,"vb_replica_curr_items":140170150},"uptime":"340526","memoryTotal":64424656896,"memoryFree":23854841856,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"couchApiBase":"http://172.16.13.173:8092/","clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.13.173","hostname":"172.16.13.173:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"systemStats":{"cpu_utilization_rate":47.31034482758621,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23773573120},"interestingStats":{"cmd_get":77.07707707707708,"couch_docs_actual_disk_size":74743093945,"couch_docs_data_size":38594660087,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":139215932,"curr_items_tot":278427644,"ep_bg_fetched":0,"get_hits":53.05305305305305,"mem_used":36306500344,"ops":1981.981981981982,"vb_replica_curr_items":139211712},"uptime":"340495","memoryTotal":64424656896,"memoryFree":23773573120,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"couchApiBase":"http://172.16.15.120:8092/","clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.15.120","hostname":"172.16.15.120:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"systemStats":{"cpu_utilization_rate":17.60660247592847,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23662190592},"interestingStats":{"cmd_get":146.8531468531468,"couch_docs_actual_disk_size":72932847344,"couch_docs_data_size":38581771457,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":139226879,"curr_items_tot":278436540,"ep_bg_fetched":0,"get_hits":144.8551448551448,"mem_used":36421860496,"ops":1495.504495504495,"vb_replica_curr_items":139209661},"uptime":"337174","memoryTotal":64424656896,"memoryFree":23662190592,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"couchApiBase":"http://172.16.8.127:8092/","clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.8.127","hostname":"172.16.8.127:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"systemStats":{"cpu_utilization_rate":21.68831168831169,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":24049729536},"interestingStats":{"cmd_get":11.98801198801199,"couch_docs_actual_disk_size":66414273220,"couch_docs_data_size":38587642702,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":139193759,"curr_items_tot":278398926,"ep_bg_fetched":0,"get_hits":9.990009990009991,"mem_used":36237234088,"ops":883.1168831168832,"vb_replica_curr_items":139205167},"uptime":"341228","memoryTotal":64424656896,"memoryFree":24049729536,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"couchApiBase":"http://172.16.8.148:8092/","clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.8.148","hostname":"172.16.8.148:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}}],"buckets":{"uri":"/pools/default/buckets?v=74117050&uuid=2bec87861652b990cf6aa5c7ee58c253","terseBucketsBase":"/pools/default/b/","terseStreamingBucketsBase":"/pools/default/bs/"},"remoteClusters":{"uri":"/pools/default/remoteClusters?uuid=2bec87861652b990cf6aa5c7ee58c253","validateURI":"/pools/default/remoteClusters?just_validate=1"},"controllers":{"addNode":{"uri":"/controller/addNode?uuid=2bec87861652b990cf6aa5c7ee58c253"},"rebalance":{"uri":"/controller/rebalance?uuid=2bec87861652b990cf6aa5c7ee58c253"},"failOver":{"uri":"/controller/failOver?uuid=2bec87861652b990cf6aa5c7ee58c253"},"startGracefulFailover":{"uri":"/controller/startGracefulFailover?uuid=2bec87861652b990cf6aa5c7ee58c253"},"reAddNode":{"uri":"/controller/reAddNode?uuid=2bec87861652b990cf6aa5c7ee58c253"},"reFailOver":{"uri":"/controller/reFailOver?uuid=2bec87861652b990cf6aa5c7ee58c253"},"ejectNode":{"uri":"/controller/ejectNode?uuid=2bec87861652b990cf6aa5c7ee58c253"},"setRecoveryType":{"uri":"/controller/setRecoveryType?uuid=2bec87861652b990cf6aa5c7ee58c253"},"setAutoCompaction":{"uri":"/controller/setAutoCompaction?uuid=2bec87861652b990cf6aa5c7ee58c253","validateURI":"/controller/setAutoCompaction?just_validate=1"},"clusterLogsCollection":{"startURI":"/controller/startLogsCollection?uuid=2bec87861652b990cf6aa5c7ee58c253","cancelURI":"/controller/cancelLogsCollection?uuid=2bec87861652b990cf6aa5c7ee58c253"},"replication":{"createURI":"/controller/createReplication?uuid=2bec87861652b990cf6aa5c7ee58c253","validateURI":"/controller/createReplication?just_validate=1"},"setFastWarmup":{"uri":"/controller/setFastWarmup?uuid=2bec87861652b990cf6aa5c7ee58c253","validateURI":"/controller/setFastWarmup?just_validate=1"}},"rebalanceStatus":"none","rebalanceProgressUri":"/pools/default/rebalanceProgress","stopRebalanceUri":"/controller/stopRebalance?uuid=2bec87861652b990cf6aa5c7ee58c253","nodeStatusesUri":"/nodeStatuses","maxBucketCount":10,"autoCompactionSettings":{"parallelDBAndViewCompaction":false,"databaseFragmentationThreshold":{"percentage":50,"size":"undefined"},"viewFragmentationThreshold":{"percentage":50,"size":"undefined"}},"fastWarmupSettings":{"fastWarmupEnabled":true,"minMemoryThreshold":10,"minItemsThreshold":10},"tasks":{"uri":"/pools/default/tasks?v=97479372"},"visualSettingsUri":"/internalSettings/visual?v=7111573","counters":{"rebalance_success":4,"rebalance_start":6,"rebalance_stop":2}}` + +// From `/pools/default/buckets/blastro-df` on a real cluster +const bucketResponse string = `{"name":"blastro-df","bucketType":"membase","authType":"sasl","saslPassword":"","proxyPort":0,"replicaIndex":false,"uri":"/pools/default/buckets/blastro-df?bucket_uuid=2e6b9dc4c278300ce3a4f27ad540323f","streamingUri":"/pools/default/bucketsStreaming/blastro-df?bucket_uuid=2e6b9dc4c278300ce3a4f27ad540323f","localRandomKeyUri":"/pools/default/buckets/blastro-df/localRandomKey","controllers":{"compactAll":"/pools/default/buckets/blastro-df/controller/compactBucket","compactDB":"/pools/default/buckets/default/controller/compactDatabases","purgeDeletes":"/pools/default/buckets/blastro-df/controller/unsafePurgeBucket","startRecovery":"/pools/default/buckets/blastro-df/controller/startRecovery"},"nodes":[{"couchApiBase":"http://172.16.8.148:8092/blastro-df%2B2e6b9dc4c278300ce3a4f27ad540323f","systemStats":{"cpu_utilization_rate":18.39557399723375,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23791935488},"interestingStats":{"cmd_get":10.98901098901099,"couch_docs_actual_disk_size":79525832077,"couch_docs_data_size":38633186946,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":139229304,"curr_items_tot":278470058,"ep_bg_fetched":0,"get_hits":5.994005994005994,"mem_used":36284362960,"ops":1275.724275724276,"vb_replica_curr_items":139240754},"uptime":"343968","memoryTotal":64424656896,"memoryFree":23791935488,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"replication":1,"clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.8.148","hostname":"172.16.8.148:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"couchApiBase":"http://172.16.8.127:8092/blastro-df%2B2e6b9dc4c278300ce3a4f27ad540323f","systemStats":{"cpu_utilization_rate":21.97183098591549,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23533023232},"interestingStats":{"cmd_get":39.96003996003996,"couch_docs_actual_disk_size":63322357663,"couch_docs_data_size":38603481061,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":139262616,"curr_items_tot":278508069,"ep_bg_fetched":0.999000999000999,"get_hits":30.96903096903097,"mem_used":36475078736,"ops":1370.629370629371,"vb_replica_curr_items":139245453},"uptime":"339914","memoryTotal":64424656896,"memoryFree":23533023232,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"replication":1,"clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.8.127","hostname":"172.16.8.127:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"couchApiBase":"http://172.16.15.120:8092/blastro-df%2B2e6b9dc4c278300ce3a4f27ad540323f","systemStats":{"cpu_utilization_rate":23.38028169014084,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23672963072},"interestingStats":{"cmd_get":88.08808808808809,"couch_docs_actual_disk_size":80260594761,"couch_docs_data_size":38632863189,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":139251563,"curr_items_tot":278498913,"ep_bg_fetched":0,"get_hits":74.07407407407408,"mem_used":36348663000,"ops":1707.707707707708,"vb_replica_curr_items":139247350},"uptime":"343235","memoryTotal":64424656896,"memoryFree":23672963072,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"replication":1,"clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.15.120","hostname":"172.16.15.120:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"couchApiBase":"http://172.16.13.173:8092/blastro-df%2B2e6b9dc4c278300ce3a4f27ad540323f","systemStats":{"cpu_utilization_rate":22.15988779803646,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23818825728},"interestingStats":{"cmd_get":103.1031031031031,"couch_docs_actual_disk_size":68247785524,"couch_docs_data_size":38747583467,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":139245453,"curr_items_tot":279451313,"ep_bg_fetched":1.001001001001001,"get_hits":86.08608608608608,"mem_used":36524715864,"ops":1749.74974974975,"vb_replica_curr_items":140205860},"uptime":"343266","memoryTotal":64424656896,"memoryFree":23818825728,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"replication":1,"clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.13.173","hostname":"172.16.13.173:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"couchApiBase":"http://172.16.13.105:8092/blastro-df%2B2e6b9dc4c278300ce3a4f27ad540323f","systemStats":{"cpu_utilization_rate":21.94444444444444,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23721426944},"interestingStats":{"cmd_get":113.1131131131131,"couch_docs_actual_disk_size":68102832275,"couch_docs_data_size":38747477407,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":139230887,"curr_items_tot":279420530,"ep_bg_fetched":0,"get_hits":106.1061061061061,"mem_used":36524887624,"ops":1799.7997997998,"vb_replica_curr_items":140189643},"uptime":"343950","memoryTotal":64424656896,"memoryFree":23721426944,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"replication":1,"clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.13.105","hostname":"172.16.13.105:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"couchApiBase":"http://172.16.10.65:8092/blastro-df%2B2e6b9dc4c278300ce3a4f27ad540323f","systemStats":{"cpu_utilization_rate":60.62176165803109,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23618203648},"interestingStats":{"cmd_get":30.96903096903097,"couch_docs_actual_disk_size":69052175561,"couch_docs_data_size":38755695030,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":140210194,"curr_items_tot":279454253,"ep_bg_fetched":0,"get_hits":26.97302697302698,"mem_used":36543072472,"ops":1337.662337662338,"vb_replica_curr_items":139244059},"uptime":"343950","memoryTotal":64424656896,"memoryFree":23618203648,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"replication":1,"clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.10.65","hostname":"172.16.10.65:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}},{"couchApiBase":"http://172.16.10.187:8092/blastro-df%2B2e6b9dc4c278300ce3a4f27ad540323f","systemStats":{"cpu_utilization_rate":21.83588317107093,"swap_total":0,"swap_used":0,"mem_total":64424656896,"mem_free":23062269952},"interestingStats":{"cmd_get":33.03303303303304,"couch_docs_actual_disk_size":74422029546,"couch_docs_data_size":38758172837,"couch_views_actual_disk_size":0,"couch_views_data_size":0,"curr_items":140194321,"curr_items_tot":279445526,"ep_bg_fetched":0,"get_hits":21.02102102102102,"mem_used":36527676832,"ops":1088.088088088088,"vb_replica_curr_items":139251205},"uptime":"343971","memoryTotal":64424656896,"memoryFree":23062269952,"mcdMemoryReserved":49152,"mcdMemoryAllocated":49152,"replication":1,"clusterMembership":"active","recoveryType":"none","status":"healthy","otpNode":"ns_1@172.16.10.187","thisNode":true,"hostname":"172.16.10.187:8091","clusterCompatibility":196608,"version":"3.0.1-1444-rel-community","os":"x86_64-unknown-linux-gnu","ports":{"proxy":11211,"direct":11210}}],"stats":{"uri":"/pools/default/buckets/blastro-df/stats","directoryURI":"/pools/default/buckets/blastro-df/statsDirectory","nodeStatsListURI":"/pools/default/buckets/blastro-df/nodes"},"ddocs":{"uri":"/pools/default/buckets/blastro-df/ddocs"},"nodeLocator":"vbucket","fastWarmupSettings":false,"autoCompactionSettings":false,"uuid":"2e6b9dc4c278300ce3a4f27ad540323f","vBucketServerMap":{"hashAlgorithm":"CRC","numReplicas":1,"serverList":["172.16.10.187:11210","172.16.10.65:11210","172.16.13.105:11210","172.16.13.173:11210","172.16.15.120:11210","172.16.8.127:11210","172.16.8.148:11210"],"vBucketMap":[[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,6],[0,6],[0,6],[0,6],[0,6],[1,3],[1,3],[1,3],[1,4],[1,4],[1,5],[1,5],[1,5],[1,5],[1,5],[1,5],[1,5],[1,5],[1,5],[1,5],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[2,3],[2,3],[2,5],[2,5],[2,5],[2,5],[2,5],[2,5],[2,5],[2,5],[2,5],[0,3],[0,3],[0,3],[0,3],[0,3],[0,3],[0,3],[0,3],[0,3],[0,3],[0,3],[0,3],[0,3],[0,3],[2,5],[2,5],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[3,5],[3,5],[3,5],[3,5],[3,5],[3,5],[3,5],[3,5],[3,5],[3,5],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[3,5],[3,5],[3,5],[3,5],[3,5],[3,5],[3,6],[3,6],[3,6],[3,6],[4,5],[4,5],[4,5],[4,5],[4,5],[4,5],[4,5],[4,5],[4,5],[4,5],[0,6],[0,6],[0,6],[0,6],[0,6],[0,6],[0,6],[0,6],[0,6],[0,6],[0,6],[0,6],[0,6],[0,6],[0,6],[5,3],[5,4],[5,4],[5,4],[5,4],[5,4],[5,4],[5,4],[5,4],[5,4],[6,5],[6,5],[6,5],[6,5],[6,5],[6,5],[6,5],[6,5],[6,5],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[0,3],[0,3],[0,3],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[0,4],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,5],[2,5],[2,5],[2,5],[2,5],[2,5],[4,5],[4,5],[4,5],[4,5],[1,3],[1,3],[1,3],[1,3],[1,3],[1,3],[1,3],[1,3],[1,3],[1,3],[1,3],[1,3],[1,3],[1,3],[2,6],[2,6],[3,2],[3,2],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,5],[3,5],[3,5],[3,5],[2,0],[2,0],[2,0],[2,0],[2,0],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[3,6],[3,6],[3,6],[3,6],[3,6],[3,6],[4,2],[4,3],[4,3],[4,3],[4,5],[4,5],[4,5],[4,5],[3,0],[3,0],[3,0],[3,0],[3,0],[3,0],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[1,6],[5,4],[5,4],[5,6],[5,6],[5,6],[5,6],[5,6],[5,6],[5,6],[5,6],[6,5],[6,5],[6,5],[6,5],[6,5],[4,0],[4,0],[4,0],[4,0],[4,0],[2,0],[2,0],[2,0],[2,0],[2,0],[2,0],[2,0],[2,0],[2,0],[2,0],[2,0],[2,0],[2,0],[2,0],[0,4],[0,4],[0,4],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[0,5],[4,5],[4,5],[4,5],[4,5],[4,5],[4,6],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,4],[1,5],[1,5],[1,5],[1,5],[1,5],[1,5],[1,5],[4,6],[4,6],[4,6],[4,6],[4,6],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[3,4],[3,4],[3,4],[3,5],[3,5],[3,5],[3,5],[5,0],[5,0],[5,0],[2,0],[2,0],[3,0],[3,0],[3,0],[5,3],[5,3],[5,3],[5,3],[5,3],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[2,4],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,5],[4,5],[1,0],[3,0],[3,1],[3,1],[3,1],[3,1],[5,4],[5,4],[5,4],[5,4],[5,4],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[2,6],[5,6],[5,6],[5,6],[6,2],[6,2],[6,3],[6,3],[6,3],[4,0],[4,0],[4,0],[4,0],[4,0],[4,1],[4,1],[4,1],[5,6],[5,6],[5,6],[5,6],[3,0],[3,0],[3,0],[3,0],[3,0],[3,0],[3,0],[3,0],[3,0],[3,0],[3,0],[3,0],[3,0],[3,0],[0,5],[0,5],[0,5],[0,6],[0,6],[0,6],[0,6],[0,6],[0,1],[0,1],[4,6],[4,6],[4,6],[4,6],[5,0],[5,0],[5,0],[5,0],[5,0],[5,0],[3,1],[3,1],[3,1],[3,1],[3,1],[3,1],[3,1],[3,1],[3,1],[3,1],[3,1],[3,1],[3,1],[3,1],[1,5],[1,5],[1,5],[1,5],[1,5],[1,5],[1,5],[1,6],[2,0],[2,0],[5,2],[5,3],[5,3],[5,3],[5,3],[5,1],[5,1],[5,1],[5,1],[5,1],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[2,5],[2,5],[2,5],[2,5],[2,5],[2,5],[2,5],[4,1],[4,1],[4,1],[5,3],[5,3],[5,3],[5,3],[5,3],[2,0],[5,2],[5,2],[5,2],[5,2],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[3,4],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,2],[5,4],[5,4],[5,4],[5,4],[5,4],[5,4],[5,4],[5,4],[5,4],[3,6],[3,6],[3,6],[3,6],[3,6],[3,6],[3,6],[3,6],[3,6],[3,6],[3,6],[3,6],[3,6],[3,6],[4,1],[4,1],[5,0],[5,0],[5,0],[5,0],[5,0],[5,0],[5,0],[5,1],[5,6],[5,6],[5,6],[5,6],[5,6],[5,6],[5,6],[5,6],[5,6],[5,6],[4,0],[4,0],[4,0],[4,0],[4,0],[4,0],[4,0],[4,0],[4,0],[4,0],[4,0],[4,0],[4,0],[4,0],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,2],[0,2],[5,0],[5,0],[5,0],[5,0],[5,0],[5,0],[5,0],[5,0],[0,2],[0,2],[4,1],[4,1],[4,1],[4,1],[4,1],[4,1],[4,1],[4,1],[4,1],[4,1],[4,1],[4,1],[4,1],[4,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[5,1],[5,1],[5,1],[5,1],[5,1],[5,1],[5,1],[5,1],[5,1],[3,1],[4,2],[4,2],[4,2],[4,2],[4,2],[4,2],[4,2],[4,2],[4,2],[4,2],[4,2],[4,2],[4,2],[4,2],[4,1],[4,1],[4,2],[4,2],[4,2],[6,3],[6,3],[6,3],[6,3],[6,3],[5,2],[5,2],[5,2],[5,2],[5,2],[5,2],[5,2],[5,2],[5,2],[5,2],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[4,3],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[5,3],[5,3],[5,3],[5,3],[5,3],[5,3],[5,3],[5,3],[5,3],[4,6],[4,6],[4,6],[4,6],[4,6],[4,6],[4,6],[4,6],[4,6],[4,6],[4,6],[4,6],[4,6],[4,6],[5,1],[5,1],[5,1],[5,1],[5,1],[5,1],[5,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,2],[6,2],[6,2],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[6,0],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[1,2],[2,1],[2,3],[2,3],[1,2],[1,2],[1,2],[1,3],[1,3],[1,3],[1,3],[1,3],[3,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[6,1],[3,1],[3,1],[3,1],[3,1],[4,2],[4,2],[4,2],[4,2],[4,2],[4,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[3,2],[6,3],[6,3],[6,2],[6,2],[6,2],[6,2],[6,2],[6,2],[6,2],[6,2],[6,2],[6,2],[6,2],[6,2],[6,2],[6,2],[5,1],[5,1],[5,2],[5,2],[5,2],[5,2],[5,2],[5,2],[5,2],[5,2],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[5,2],[6,2],[6,2],[6,3],[6,3],[6,3],[6,3],[6,3],[6,3],[6,3],[6,3],[6,3],[6,3],[6,3],[6,3],[6,3],[6,3],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[0,2],[0,3],[1,3],[1,3],[6,2],[6,2],[0,3],[0,3],[0,3],[0,3],[0,3],[0,3],[1,3],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,4],[6,5],[6,5],[2,3],[2,3],[2,3],[2,3],[2,3],[2,3],[6,5],[6,5],[6,5],[6,5],[6,5],[6,5],[6,5],[6,5],[6,5],[6,2]]},"replicaNumber":1,"threadsNumber":3,"quota":{"ram":293601280000,"rawRAM":41943040000},"basicStats":{"quotaPercentUsed":68.85424936294555,"opsPerSec":5686.789686789687,"diskFetches":0,"itemCount":943239752,"diskUsed":409178772321,"dataUsed":212179309111,"memUsed":202156957464},"evictionPolicy":"valueOnly","bucketCapabilitiesVer":"","bucketCapabilities":["cbhello","touch","couchapi","cccp","xdcrCheckpointing","nodesExt"]}` diff --git a/plugins/inputs/couchdb/couchdb.go b/plugins/inputs/couchdb/couchdb.go index ba64e4a6d..bf241649a 100644 --- a/plugins/inputs/couchdb/couchdb.go +++ b/plugins/inputs/couchdb/couchdb.go @@ -10,6 +10,7 @@ import ( "reflect" "strings" "sync" + "time" ) // Schema: @@ -112,9 +113,18 @@ func (c *CouchDB) Gather(accumulator telegraf.Accumulator) error { } +var tr = &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), +} + +var client = &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), +} + func (c *CouchDB) fetchAndInsertData(accumulator telegraf.Accumulator, host string) error { - response, error := http.Get(host) + response, error := client.Get(host) if error != nil { return error } diff --git a/plugins/inputs/disque/disque.go b/plugins/inputs/disque/disque.go index a311b6739..d726590b4 100644 --- a/plugins/inputs/disque/disque.go +++ b/plugins/inputs/disque/disque.go @@ -9,6 +9,7 @@ import ( "strconv" "strings" "sync" + "time" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" @@ -23,13 +24,14 @@ type Disque struct { var sampleConfig = ` ## An array of URI to gather stats about. Specify an ip or hostname - ## with optional port and password. ie disque://localhost, disque://10.10.3.33:18832, - ## 10.0.0.1:10000, etc. - + ## with optional port and password. + ## ie disque://localhost, disque://10.10.3.33:18832, 10.0.0.1:10000, etc. ## If no servers are specified, then localhost is used as the host. servers = ["localhost"] ` +var defaultTimeout = 5 * time.Second + func (r *Disque) SampleConfig() string { return sampleConfig } @@ -107,7 +109,7 @@ func (g *Disque) gatherServer(addr *url.URL, acc telegraf.Accumulator) error { addr.Host = addr.Host + ":" + defaultPort } - c, err := net.Dial("tcp", addr.Host) + c, err := net.DialTimeout("tcp", addr.Host, defaultTimeout) if err != nil { return fmt.Errorf("Unable to connect to disque server '%s': %s", addr.Host, err) } @@ -132,6 +134,9 @@ func (g *Disque) gatherServer(addr *url.URL, acc telegraf.Accumulator) error { g.c = c } + // Extend connection + g.c.SetDeadline(time.Now().Add(defaultTimeout)) + g.c.Write([]byte("info\r\n")) r := bufio.NewReader(g.c) diff --git a/plugins/inputs/dns_query/dns_query.go b/plugins/inputs/dns_query/dns_query.go index 397482a98..2231f2921 100644 --- a/plugins/inputs/dns_query/dns_query.go +++ b/plugins/inputs/dns_query/dns_query.go @@ -35,7 +35,8 @@ var sampleConfig = ` ## Domains or subdomains to query. "."(root) is default domains = ["."] # optional - ## Query record type. Posible values: A, AAAA, CNAME, MX, NS, PTR, TXT, SOA, SPF, SRV. Default is "NS" + ## Query record type. Default is "A" + ## Posible values: A, AAAA, CNAME, MX, NS, PTR, TXT, SOA, SPF, SRV. record_type = "A" # optional ## Dns server port. 53 is default diff --git a/plugins/inputs/dns_query/dns_query_test.go b/plugins/inputs/dns_query/dns_query_test.go index 076db5fab..d7d267a59 100644 --- a/plugins/inputs/dns_query/dns_query_test.go +++ b/plugins/inputs/dns_query/dns_query_test.go @@ -1,11 +1,14 @@ package dns_query import ( - "github.com/influxdata/telegraf/testutil" - "github.com/miekg/dns" - "github.com/stretchr/testify/assert" "testing" "time" + + "github.com/influxdata/telegraf/testutil" + + "github.com/miekg/dns" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) var servers = []string{"8.8.8.8"} @@ -21,7 +24,7 @@ func TestGathering(t *testing.T) { err := dnsConfig.Gather(&acc) assert.NoError(t, err) metric, ok := acc.Get("dns_query") - assert.True(t, ok) + require.True(t, ok) queryTime, _ := metric.Fields["query_time_ms"].(float64) assert.NotEqual(t, 0, queryTime) @@ -38,7 +41,7 @@ func TestGatheringMxRecord(t *testing.T) { err := dnsConfig.Gather(&acc) assert.NoError(t, err) metric, ok := acc.Get("dns_query") - assert.True(t, ok) + require.True(t, ok) queryTime, _ := metric.Fields["query_time_ms"].(float64) assert.NotEqual(t, 0, queryTime) @@ -61,7 +64,7 @@ func TestGatheringRootDomain(t *testing.T) { err := dnsConfig.Gather(&acc) assert.NoError(t, err) metric, ok := acc.Get("dns_query") - assert.True(t, ok) + require.True(t, ok) queryTime, _ := metric.Fields["query_time_ms"].(float64) fields["query_time_ms"] = queryTime @@ -84,7 +87,7 @@ func TestMetricContainsServerAndDomainAndRecordTypeTags(t *testing.T) { err := dnsConfig.Gather(&acc) assert.NoError(t, err) metric, ok := acc.Get("dns_query") - assert.True(t, ok) + require.True(t, ok) queryTime, _ := metric.Fields["query_time_ms"].(float64) fields["query_time_ms"] = queryTime diff --git a/plugins/inputs/docker/README.md b/plugins/inputs/docker/README.md index fa662ca80..c22e6af8e 100644 --- a/plugins/inputs/docker/README.md +++ b/plugins/inputs/docker/README.md @@ -5,11 +5,11 @@ docker containers. You can read Docker's documentation for their remote API [here](https://docs.docker.com/engine/reference/api/docker_remote_api_v1.20/#get-container-stats-based-on-resource-usage) The docker plugin uses the excellent -[fsouza go-dockerclient](https://github.com/fsouza/go-dockerclient) library to +[docker engine-api](https://github.com/docker/engine-api) library to gather stats. Documentation for the library can be found -[here](https://godoc.org/github.com/fsouza/go-dockerclient) and documentation +[here](https://godoc.org/github.com/docker/engine-api) and documentation for the stat structure can be found -[here](https://godoc.org/github.com/fsouza/go-dockerclient#Stats) +[here](https://godoc.org/github.com/docker/engine-api/types#Stats) ### Configuration: @@ -74,6 +74,7 @@ on the availability of per-cpu stats on your system. - usage_in_usermode - usage_system - usage_total + - usage_percent - docker_net - rx_dropped - rx_bytes @@ -94,18 +95,50 @@ on the availability of per-cpu stats on your system. - io_serviced_recursive_sync - io_serviced_recursive_total - io_serviced_recursive_write +- docker_ + - n_used_file_descriptors + - n_cpus + - n_containers + - n_images + - n_goroutines + - n_listener_events + - memory_total + - pool_blocksize +- docker_data + - available + - total + - used +- docker_metadata + - available + - total + - used + ### Tags: -- All stats have the following tags: +- docker (memory_total) + - unit=bytes +- docker (pool_blocksize) + - unit=bytes +- docker_data + - unit=bytes +- docker_metadata + - unit=bytes + +- docker_cpu specific: - cont_id (container ID) - cont_image (container image) - cont_name (container name) -- docker_cpu specific: - cpu - docker_net specific: + - cont_id (container ID) + - cont_image (container image) + - cont_name (container name) - network - docker_blkio specific: + - cont_id (container ID) + - cont_image (container image) + - cont_name (container name) - device ### Example Output: @@ -113,6 +146,16 @@ on the availability of per-cpu stats on your system. ``` % ./telegraf -config ~/ws/telegraf.conf -input-filter docker -test * Plugin: docker, Collection 1 +> docker n_cpus=8i 1456926671065383978 +> docker n_used_file_descriptors=15i 1456926671065383978 +> docker n_containers=7i 1456926671065383978 +> docker n_images=152i 1456926671065383978 +> docker n_goroutines=36i 1456926671065383978 +> docker n_listener_events=0i 1456926671065383978 +> docker,unit=bytes memory_total=18935443456i 1456926671065383978 +> docker,unit=bytes pool_blocksize=65540i 1456926671065383978 +> docker_data,unit=bytes available=24340000000i,total=107400000000i,used=14820000000i 1456926671065383978 +> docker_metadata,unit=bytes available=2126999999i,total=2146999999i,used=20420000i 145692667106538 > docker_mem,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\ cont_image=spotify/kafka,cont_name=kafka \ active_anon=52568064i,active_file=6926336i,cache=12038144i,fail_count=0i,\ diff --git a/plugins/inputs/docker/docker.go b/plugins/inputs/docker/docker.go index 0d89979c1..094bad8ca 100644 --- a/plugins/inputs/docker/docker.go +++ b/plugins/inputs/docker/docker.go @@ -1,25 +1,52 @@ package system import ( + "encoding/json" "fmt" + "io" "log" + "regexp" + "strconv" "strings" "sync" "time" + "golang.org/x/net/context" + + "github.com/docker/engine-api/client" + "github.com/docker/engine-api/types" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" - - "github.com/fsouza/go-dockerclient" ) +// Docker object type Docker struct { Endpoint string ContainerNames []string - client *docker.Client + client DockerClient } +// DockerClient interface, useful for testing +type DockerClient interface { + Info(ctx context.Context) (types.Info, error) + ContainerList(ctx context.Context, options types.ContainerListOptions) ([]types.Container, error) + ContainerStats(ctx context.Context, containerID string, stream bool) (io.ReadCloser, error) +} + +// KB, MB, GB, TB, PB...human friendly +const ( + KB = 1000 + MB = 1000 * KB + GB = 1000 * MB + TB = 1000 * GB + PB = 1000 * TB +) + +var ( + sizeRegex = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[bB]?$`) +) + var sampleConfig = ` ## Docker Endpoint ## To use TCP, set endpoint = "tcp://[ip]:[port]" @@ -29,28 +56,32 @@ var sampleConfig = ` container_names = [] ` +// Description returns input description func (d *Docker) Description() string { return "Read metrics about docker containers" } +// SampleConfig prints sampleConfig func (d *Docker) SampleConfig() string { return sampleConfig } +// Gather starts stats collection func (d *Docker) Gather(acc telegraf.Accumulator) error { if d.client == nil { - var c *docker.Client + var c *client.Client var err error + defaultHeaders := map[string]string{"User-Agent": "engine-api-cli-1.0"} if d.Endpoint == "ENV" { - c, err = docker.NewClientFromEnv() + c, err = client.NewEnvClient() if err != nil { return err } } else if d.Endpoint == "" { - c, err = docker.NewClient("unix:///var/run/docker.sock") + c, err = client.NewClient("unix:///var/run/docker.sock", "", nil, defaultHeaders) if err != nil { return err } } else { - c, err = docker.NewClient(d.Endpoint) + c, err = client.NewClient(d.Endpoint, "", nil, defaultHeaders) if err != nil { return err } @@ -58,17 +89,25 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error { d.client = c } - opts := docker.ListContainersOptions{} - containers, err := d.client.ListContainers(opts) + // Get daemon info + err := d.gatherInfo(acc) + if err != nil { + fmt.Println(err.Error()) + } + + // List containers + opts := types.ContainerListOptions{} + containers, err := d.client.ContainerList(context.Background(), opts) if err != nil { return err } + // Get container data var wg sync.WaitGroup wg.Add(len(containers)) for _, container := range containers { - go func(c docker.APIContainers) { + go func(c types.Container) { defer wg.Done() err := d.gatherContainer(c, acc) if err != nil { @@ -81,10 +120,78 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error { return nil } +func (d *Docker) gatherInfo(acc telegraf.Accumulator) error { + // Init vars + dataFields := make(map[string]interface{}) + metadataFields := make(map[string]interface{}) + now := time.Now() + // Get info from docker daemon + info, err := d.client.Info(context.Background()) + if err != nil { + return err + } + + fields := map[string]interface{}{ + "n_cpus": info.NCPU, + "n_used_file_descriptors": info.NFd, + "n_containers": info.Containers, + "n_images": info.Images, + "n_goroutines": info.NGoroutines, + "n_listener_events": info.NEventsListener, + } + // Add metrics + acc.AddFields("docker", + fields, + nil, + now) + acc.AddFields("docker", + map[string]interface{}{"memory_total": info.MemTotal}, + map[string]string{"unit": "bytes"}, + now) + // Get storage metrics + for _, rawData := range info.DriverStatus { + // Try to convert string to int (bytes) + value, err := parseSize(rawData[1]) + if err != nil { + continue + } + name := strings.ToLower(strings.Replace(rawData[0], " ", "_", -1)) + if name == "pool_blocksize" { + // pool blocksize + acc.AddFields("docker", + map[string]interface{}{"pool_blocksize": value}, + map[string]string{"unit": "bytes"}, + now) + } else if strings.HasPrefix(name, "data_space_") { + // data space + fieldName := strings.TrimPrefix(name, "data_space_") + dataFields[fieldName] = value + } else if strings.HasPrefix(name, "metadata_space_") { + // metadata space + fieldName := strings.TrimPrefix(name, "metadata_space_") + metadataFields[fieldName] = value + } + } + if len(dataFields) > 0 { + acc.AddFields("docker_data", + dataFields, + map[string]string{"unit": "bytes"}, + now) + } + if len(metadataFields) > 0 { + acc.AddFields("docker_metadata", + metadataFields, + map[string]string{"unit": "bytes"}, + now) + } + return nil +} + func (d *Docker) gatherContainer( - container docker.APIContainers, + container types.Container, acc telegraf.Accumulator, ) error { + var v *types.StatsJSON // Parse container name cname := "unknown" if len(container.Names) > 0 { @@ -103,28 +210,14 @@ func (d *Docker) gatherContainer( } } - statChan := make(chan *docker.Stats) - done := make(chan bool) - statOpts := docker.StatsOptions{ - Stream: false, - ID: container.ID, - Stats: statChan, - Done: done, - Timeout: time.Duration(time.Second * 5), + r, err := d.client.ContainerStats(context.Background(), container.ID, false) + if err != nil { + log.Printf("Error getting docker stats: %s\n", err.Error()) } - - go func() { - err := d.client.Stats(statOpts) - if err != nil { - log.Printf("Error getting docker stats: %s\n", err.Error()) - } - }() - - stat := <-statChan - close(done) - - if stat == nil { - return nil + defer r.Close() + dec := json.NewDecoder(r) + if err = dec.Decode(&v); err != nil { + log.Printf("Error decoding: %s\n", err.Error()) } // Add labels to tags @@ -132,13 +225,13 @@ func (d *Docker) gatherContainer( tags[k] = v } - gatherContainerStats(stat, acc, tags) + gatherContainerStats(v, acc, tags) return nil } func gatherContainerStats( - stat *docker.Stats, + stat *types.StatsJSON, acc telegraf.Accumulator, tags map[string]string, ) { @@ -149,35 +242,35 @@ func gatherContainerStats( "usage": stat.MemoryStats.Usage, "fail_count": stat.MemoryStats.Failcnt, "limit": stat.MemoryStats.Limit, - "total_pgmafault": stat.MemoryStats.Stats.TotalPgmafault, - "cache": stat.MemoryStats.Stats.Cache, - "mapped_file": stat.MemoryStats.Stats.MappedFile, - "total_inactive_file": stat.MemoryStats.Stats.TotalInactiveFile, - "pgpgout": stat.MemoryStats.Stats.Pgpgout, - "rss": stat.MemoryStats.Stats.Rss, - "total_mapped_file": stat.MemoryStats.Stats.TotalMappedFile, - "writeback": stat.MemoryStats.Stats.Writeback, - "unevictable": stat.MemoryStats.Stats.Unevictable, - "pgpgin": stat.MemoryStats.Stats.Pgpgin, - "total_unevictable": stat.MemoryStats.Stats.TotalUnevictable, - "pgmajfault": stat.MemoryStats.Stats.Pgmajfault, - "total_rss": stat.MemoryStats.Stats.TotalRss, - "total_rss_huge": stat.MemoryStats.Stats.TotalRssHuge, - "total_writeback": stat.MemoryStats.Stats.TotalWriteback, - "total_inactive_anon": stat.MemoryStats.Stats.TotalInactiveAnon, - "rss_huge": stat.MemoryStats.Stats.RssHuge, - "hierarchical_memory_limit": stat.MemoryStats.Stats.HierarchicalMemoryLimit, - "total_pgfault": stat.MemoryStats.Stats.TotalPgfault, - "total_active_file": stat.MemoryStats.Stats.TotalActiveFile, - "active_anon": stat.MemoryStats.Stats.ActiveAnon, - "total_active_anon": stat.MemoryStats.Stats.TotalActiveAnon, - "total_pgpgout": stat.MemoryStats.Stats.TotalPgpgout, - "total_cache": stat.MemoryStats.Stats.TotalCache, - "inactive_anon": stat.MemoryStats.Stats.InactiveAnon, - "active_file": stat.MemoryStats.Stats.ActiveFile, - "pgfault": stat.MemoryStats.Stats.Pgfault, - "inactive_file": stat.MemoryStats.Stats.InactiveFile, - "total_pgpgin": stat.MemoryStats.Stats.TotalPgpgin, + "total_pgmafault": stat.MemoryStats.Stats["total_pgmajfault"], + "cache": stat.MemoryStats.Stats["cache"], + "mapped_file": stat.MemoryStats.Stats["mapped_file"], + "total_inactive_file": stat.MemoryStats.Stats["total_inactive_file"], + "pgpgout": stat.MemoryStats.Stats["pagpgout"], + "rss": stat.MemoryStats.Stats["rss"], + "total_mapped_file": stat.MemoryStats.Stats["total_mapped_file"], + "writeback": stat.MemoryStats.Stats["writeback"], + "unevictable": stat.MemoryStats.Stats["unevictable"], + "pgpgin": stat.MemoryStats.Stats["pgpgin"], + "total_unevictable": stat.MemoryStats.Stats["total_unevictable"], + "pgmajfault": stat.MemoryStats.Stats["pgmajfault"], + "total_rss": stat.MemoryStats.Stats["total_rss"], + "total_rss_huge": stat.MemoryStats.Stats["total_rss_huge"], + "total_writeback": stat.MemoryStats.Stats["total_write_back"], + "total_inactive_anon": stat.MemoryStats.Stats["total_inactive_anon"], + "rss_huge": stat.MemoryStats.Stats["rss_huge"], + "hierarchical_memory_limit": stat.MemoryStats.Stats["hierarchical_memory_limit"], + "total_pgfault": stat.MemoryStats.Stats["total_pgfault"], + "total_active_file": stat.MemoryStats.Stats["total_active_file"], + "active_anon": stat.MemoryStats.Stats["active_anon"], + "total_active_anon": stat.MemoryStats.Stats["total_active_anon"], + "total_pgpgout": stat.MemoryStats.Stats["total_pgpgout"], + "total_cache": stat.MemoryStats.Stats["total_cache"], + "inactive_anon": stat.MemoryStats.Stats["inactive_anon"], + "active_file": stat.MemoryStats.Stats["active_file"], + "pgfault": stat.MemoryStats.Stats["pgfault"], + "inactive_file": stat.MemoryStats.Stats["inactive_file"], + "total_pgpgin": stat.MemoryStats.Stats["total_pgpgin"], "usage_percent": calculateMemPercent(stat), } acc.AddFields("docker_mem", memfields, tags, now) @@ -186,7 +279,7 @@ func gatherContainerStats( "usage_total": stat.CPUStats.CPUUsage.TotalUsage, "usage_in_usermode": stat.CPUStats.CPUUsage.UsageInUsermode, "usage_in_kernelmode": stat.CPUStats.CPUUsage.UsageInKernelmode, - "usage_system": stat.CPUStats.SystemCPUUsage, + "usage_system": stat.CPUStats.SystemUsage, "throttling_periods": stat.CPUStats.ThrottlingData.Periods, "throttling_throttled_periods": stat.CPUStats.ThrottlingData.ThrottledPeriods, "throttling_throttled_time": stat.CPUStats.ThrottlingData.ThrottledTime, @@ -222,7 +315,7 @@ func gatherContainerStats( gatherBlockIOMetrics(stat, acc, tags, now) } -func calculateMemPercent(stat *docker.Stats) float64 { +func calculateMemPercent(stat *types.StatsJSON) float64 { var memPercent = 0.0 if stat.MemoryStats.Limit > 0 { memPercent = float64(stat.MemoryStats.Usage) / float64(stat.MemoryStats.Limit) * 100.0 @@ -230,11 +323,11 @@ func calculateMemPercent(stat *docker.Stats) float64 { return memPercent } -func calculateCPUPercent(stat *docker.Stats) float64 { +func calculateCPUPercent(stat *types.StatsJSON) float64 { var cpuPercent = 0.0 // calculate the change for the cpu and system usage of the container in between readings cpuDelta := float64(stat.CPUStats.CPUUsage.TotalUsage) - float64(stat.PreCPUStats.CPUUsage.TotalUsage) - systemDelta := float64(stat.CPUStats.SystemCPUUsage) - float64(stat.PreCPUStats.SystemCPUUsage) + systemDelta := float64(stat.CPUStats.SystemUsage) - float64(stat.PreCPUStats.SystemUsage) if systemDelta > 0.0 && cpuDelta > 0.0 { cpuPercent = (cpuDelta / systemDelta) * float64(len(stat.CPUStats.CPUUsage.PercpuUsage)) * 100.0 @@ -243,7 +336,7 @@ func calculateCPUPercent(stat *docker.Stats) float64 { } func gatherBlockIOMetrics( - stat *docker.Stats, + stat *types.StatsJSON, acc telegraf.Accumulator, tags map[string]string, now time.Time, @@ -252,7 +345,7 @@ func gatherBlockIOMetrics( // Make a map of devices to their block io stats deviceStatMap := make(map[string]map[string]interface{}) - for _, metric := range blkioStats.IOServiceBytesRecursive { + for _, metric := range blkioStats.IoServiceBytesRecursive { device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) _, ok := deviceStatMap[device] if !ok { @@ -263,7 +356,7 @@ func gatherBlockIOMetrics( deviceStatMap[device][field] = metric.Value } - for _, metric := range blkioStats.IOServicedRecursive { + for _, metric := range blkioStats.IoServicedRecursive { device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) _, ok := deviceStatMap[device] if !ok { @@ -274,40 +367,38 @@ func gatherBlockIOMetrics( deviceStatMap[device][field] = metric.Value } - for _, metric := range blkioStats.IOQueueRecursive { + for _, metric := range blkioStats.IoQueuedRecursive { device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) field := fmt.Sprintf("io_queue_recursive_%s", strings.ToLower(metric.Op)) deviceStatMap[device][field] = metric.Value } - for _, metric := range blkioStats.IOServiceTimeRecursive { + for _, metric := range blkioStats.IoServiceTimeRecursive { device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) field := fmt.Sprintf("io_service_time_recursive_%s", strings.ToLower(metric.Op)) deviceStatMap[device][field] = metric.Value } - for _, metric := range blkioStats.IOWaitTimeRecursive { + for _, metric := range blkioStats.IoWaitTimeRecursive { device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) field := fmt.Sprintf("io_wait_time_%s", strings.ToLower(metric.Op)) deviceStatMap[device][field] = metric.Value } - for _, metric := range blkioStats.IOMergedRecursive { + for _, metric := range blkioStats.IoMergedRecursive { device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) field := fmt.Sprintf("io_merged_recursive_%s", strings.ToLower(metric.Op)) deviceStatMap[device][field] = metric.Value } - for _, metric := range blkioStats.IOTimeRecursive { + for _, metric := range blkioStats.IoTimeRecursive { device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) - field := fmt.Sprintf("io_time_recursive_%s", strings.ToLower(metric.Op)) - deviceStatMap[device][field] = metric.Value + deviceStatMap[device]["io_time_recursive"] = metric.Value } for _, metric := range blkioStats.SectorsRecursive { device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) - field := fmt.Sprintf("sectors_recursive_%s", strings.ToLower(metric.Op)) - deviceStatMap[device][field] = metric.Value + deviceStatMap[device]["sectors_recursive"] = metric.Value } for device, fields := range deviceStatMap { @@ -334,6 +425,27 @@ func sliceContains(in string, sl []string) bool { return false } +// Parses the human-readable size string into the amount it represents. +func parseSize(sizeStr string) (int64, error) { + matches := sizeRegex.FindStringSubmatch(sizeStr) + if len(matches) != 4 { + return -1, fmt.Errorf("invalid size: '%s'", sizeStr) + } + + size, err := strconv.ParseFloat(matches[1], 64) + if err != nil { + return -1, err + } + + uMap := map[string]int64{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB} + unitPrefix := strings.ToLower(matches[3]) + if mul, ok := uMap[unitPrefix]; ok { + size *= float64(mul) + } + + return int64(size), nil +} + func init() { inputs.Add("docker", func() telegraf.Input { return &Docker{} diff --git a/plugins/inputs/docker/docker_test.go b/plugins/inputs/docker/docker_test.go index aebe8102e..c9fe6cea1 100644 --- a/plugins/inputs/docker/docker_test.go +++ b/plugins/inputs/docker/docker_test.go @@ -1,12 +1,19 @@ package system import ( + "io" + "io/ioutil" + "strings" "testing" "time" + "golang.org/x/net/context" + + "github.com/docker/engine-api/types" + "github.com/docker/engine-api/types/registry" "github.com/influxdata/telegraf/testutil" - "github.com/fsouza/go-dockerclient" + "github.com/stretchr/testify/require" ) func TestDockerGatherContainerStats(t *testing.T) { @@ -112,58 +119,58 @@ func TestDockerGatherContainerStats(t *testing.T) { acc.AssertContainsTaggedFields(t, "docker_cpu", cpu1fields, cputags) } -func testStats() *docker.Stats { - stats := &docker.Stats{ - Read: time.Now(), - Networks: make(map[string]docker.NetworkStats), - } +func testStats() *types.StatsJSON { + stats := &types.StatsJSON{} + stats.Read = time.Now() + stats.Networks = make(map[string]types.NetworkStats) stats.CPUStats.CPUUsage.PercpuUsage = []uint64{1, 1002} stats.CPUStats.CPUUsage.UsageInUsermode = 100 stats.CPUStats.CPUUsage.TotalUsage = 500 stats.CPUStats.CPUUsage.UsageInKernelmode = 200 - stats.CPUStats.SystemCPUUsage = 100 + stats.CPUStats.SystemUsage = 100 stats.CPUStats.ThrottlingData.Periods = 1 stats.PreCPUStats.CPUUsage.TotalUsage = 400 - stats.PreCPUStats.SystemCPUUsage = 50 + stats.PreCPUStats.SystemUsage = 50 - stats.MemoryStats.Stats.TotalPgmafault = 0 - stats.MemoryStats.Stats.Cache = 0 - stats.MemoryStats.Stats.MappedFile = 0 - stats.MemoryStats.Stats.TotalInactiveFile = 0 - stats.MemoryStats.Stats.Pgpgout = 0 - stats.MemoryStats.Stats.Rss = 0 - stats.MemoryStats.Stats.TotalMappedFile = 0 - stats.MemoryStats.Stats.Writeback = 0 - stats.MemoryStats.Stats.Unevictable = 0 - stats.MemoryStats.Stats.Pgpgin = 0 - stats.MemoryStats.Stats.TotalUnevictable = 0 - stats.MemoryStats.Stats.Pgmajfault = 0 - stats.MemoryStats.Stats.TotalRss = 44 - stats.MemoryStats.Stats.TotalRssHuge = 444 - stats.MemoryStats.Stats.TotalWriteback = 55 - stats.MemoryStats.Stats.TotalInactiveAnon = 0 - stats.MemoryStats.Stats.RssHuge = 0 - stats.MemoryStats.Stats.HierarchicalMemoryLimit = 0 - stats.MemoryStats.Stats.TotalPgfault = 0 - stats.MemoryStats.Stats.TotalActiveFile = 0 - stats.MemoryStats.Stats.ActiveAnon = 0 - stats.MemoryStats.Stats.TotalActiveAnon = 0 - stats.MemoryStats.Stats.TotalPgpgout = 0 - stats.MemoryStats.Stats.TotalCache = 0 - stats.MemoryStats.Stats.InactiveAnon = 0 - stats.MemoryStats.Stats.ActiveFile = 1 - stats.MemoryStats.Stats.Pgfault = 2 - stats.MemoryStats.Stats.InactiveFile = 3 - stats.MemoryStats.Stats.TotalPgpgin = 4 + stats.MemoryStats.Stats = make(map[string]uint64) + stats.MemoryStats.Stats["total_pgmajfault"] = 0 + stats.MemoryStats.Stats["cache"] = 0 + stats.MemoryStats.Stats["mapped_file"] = 0 + stats.MemoryStats.Stats["total_inactive_file"] = 0 + stats.MemoryStats.Stats["pagpgout"] = 0 + stats.MemoryStats.Stats["rss"] = 0 + stats.MemoryStats.Stats["total_mapped_file"] = 0 + stats.MemoryStats.Stats["writeback"] = 0 + stats.MemoryStats.Stats["unevictable"] = 0 + stats.MemoryStats.Stats["pgpgin"] = 0 + stats.MemoryStats.Stats["total_unevictable"] = 0 + stats.MemoryStats.Stats["pgmajfault"] = 0 + stats.MemoryStats.Stats["total_rss"] = 44 + stats.MemoryStats.Stats["total_rss_huge"] = 444 + stats.MemoryStats.Stats["total_write_back"] = 55 + stats.MemoryStats.Stats["total_inactive_anon"] = 0 + stats.MemoryStats.Stats["rss_huge"] = 0 + stats.MemoryStats.Stats["hierarchical_memory_limit"] = 0 + stats.MemoryStats.Stats["total_pgfault"] = 0 + stats.MemoryStats.Stats["total_active_file"] = 0 + stats.MemoryStats.Stats["active_anon"] = 0 + stats.MemoryStats.Stats["total_active_anon"] = 0 + stats.MemoryStats.Stats["total_pgpgout"] = 0 + stats.MemoryStats.Stats["total_cache"] = 0 + stats.MemoryStats.Stats["inactive_anon"] = 0 + stats.MemoryStats.Stats["active_file"] = 1 + stats.MemoryStats.Stats["pgfault"] = 2 + stats.MemoryStats.Stats["inactive_file"] = 3 + stats.MemoryStats.Stats["total_pgpgin"] = 4 stats.MemoryStats.MaxUsage = 1001 stats.MemoryStats.Usage = 1111 stats.MemoryStats.Failcnt = 1 stats.MemoryStats.Limit = 2000 - stats.Networks["eth0"] = docker.NetworkStats{ + stats.Networks["eth0"] = types.NetworkStats{ RxDropped: 1, RxBytes: 2, RxErrors: 3, @@ -174,23 +181,247 @@ func testStats() *docker.Stats { TxBytes: 4, } - sbr := docker.BlkioStatsEntry{ + sbr := types.BlkioStatEntry{ Major: 6, Minor: 0, Op: "read", Value: 100, } - sr := docker.BlkioStatsEntry{ + sr := types.BlkioStatEntry{ Major: 6, Minor: 0, Op: "write", Value: 101, } - stats.BlkioStats.IOServiceBytesRecursive = append( - stats.BlkioStats.IOServiceBytesRecursive, sbr) - stats.BlkioStats.IOServicedRecursive = append( - stats.BlkioStats.IOServicedRecursive, sr) + stats.BlkioStats.IoServiceBytesRecursive = append( + stats.BlkioStats.IoServiceBytesRecursive, sbr) + stats.BlkioStats.IoServicedRecursive = append( + stats.BlkioStats.IoServicedRecursive, sr) return stats } + +type FakeDockerClient struct { +} + +func (d FakeDockerClient) Info(ctx context.Context) (types.Info, error) { + env := types.Info{ + Containers: 108, + OomKillDisable: false, + SystemTime: "2016-02-24T00:55:09.15073105-05:00", + NEventsListener: 0, + ID: "5WQQ:TFWR:FDNG:OKQ3:37Y4:FJWG:QIKK:623T:R3ME:QTKB:A7F7:OLHD", + Debug: false, + LoggingDriver: "json-file", + KernelVersion: "4.3.0-1-amd64", + IndexServerAddress: "https://index.docker.io/v1/", + MemTotal: 3840757760, + Images: 199, + CPUCfsQuota: true, + Name: "absol", + SwapLimit: false, + IPv4Forwarding: true, + ExecutionDriver: "native-0.2", + ExperimentalBuild: false, + CPUCfsPeriod: true, + RegistryConfig: ®istry.ServiceConfig{ + IndexConfigs: map[string]*registry.IndexInfo{ + "docker.io": { + Name: "docker.io", + Mirrors: []string{}, + Official: true, + Secure: true, + }, + }, InsecureRegistryCIDRs: []*registry.NetIPNet{{IP: []byte{127, 0, 0, 0}, Mask: []byte{255, 0, 0, 0}}}, Mirrors: []string{}}, + OperatingSystem: "Linux Mint LMDE (containerized)", + BridgeNfIptables: true, + HTTPSProxy: "", + Labels: []string{}, + MemoryLimit: false, + DriverStatus: [][2]string{{"Pool Name", "docker-8:1-1182287-pool"}, {"Pool Blocksize", "65.54 kB"}, {"Backing Filesystem", "extfs"}, {"Data file", "/dev/loop0"}, {"Metadata file", "/dev/loop1"}, {"Data Space Used", "17.3 GB"}, {"Data Space Total", "107.4 GB"}, {"Data Space Available", "36.53 GB"}, {"Metadata Space Used", "20.97 MB"}, {"Metadata Space Total", "2.147 GB"}, {"Metadata Space Available", "2.127 GB"}, {"Udev Sync Supported", "true"}, {"Deferred Removal Enabled", "false"}, {"Data loop file", "/var/lib/docker/devicemapper/devicemapper/data"}, {"Metadata loop file", "/var/lib/docker/devicemapper/devicemapper/metadata"}, {"Library Version", "1.02.115 (2016-01-25)"}}, + NFd: 19, + HTTPProxy: "", + Driver: "devicemapper", + NGoroutines: 39, + NCPU: 4, + DockerRootDir: "/var/lib/docker", + NoProxy: "", + BridgeNfIP6tables: true, + } + return env, nil +} + +func (d FakeDockerClient) ContainerList(octx context.Context, options types.ContainerListOptions) ([]types.Container, error) { + container1 := types.Container{ + ID: "e2173b9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296b7dfb", + Names: []string{"/etcd"}, + Image: "quay.io/coreos/etcd:v2.2.2", + Command: "/etcd -name etcd0 -advertise-client-urls http://localhost:2379 -listen-client-urls http://0.0.0.0:2379", + Created: 1455941930, + Status: "Up 4 hours", + Ports: []types.Port{ + types.Port{ + PrivatePort: 7001, + PublicPort: 0, + Type: "tcp", + }, + types.Port{ + PrivatePort: 4001, + PublicPort: 0, + Type: "tcp", + }, + types.Port{ + PrivatePort: 2380, + PublicPort: 0, + Type: "tcp", + }, + types.Port{ + PrivatePort: 2379, + PublicPort: 2379, + Type: "tcp", + IP: "0.0.0.0", + }, + }, + SizeRw: 0, + SizeRootFs: 0, + } + container2 := types.Container{ + ID: "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", + Names: []string{"/etcd2"}, + Image: "quay.io/coreos/etcd:v2.2.2", + Command: "/etcd -name etcd2 -advertise-client-urls http://localhost:2379 -listen-client-urls http://0.0.0.0:2379", + Created: 1455941933, + Status: "Up 4 hours", + Ports: []types.Port{ + types.Port{ + PrivatePort: 7002, + PublicPort: 0, + Type: "tcp", + }, + types.Port{ + PrivatePort: 4002, + PublicPort: 0, + Type: "tcp", + }, + types.Port{ + PrivatePort: 2381, + PublicPort: 0, + Type: "tcp", + }, + types.Port{ + PrivatePort: 2382, + PublicPort: 2382, + Type: "tcp", + IP: "0.0.0.0", + }, + }, + SizeRw: 0, + SizeRootFs: 0, + } + + containers := []types.Container{container1, container2} + return containers, nil + + //#{e6a96c84ca91a5258b7cb752579fb68826b68b49ff957487695cd4d13c343b44 titilambert/snmpsim /bin/sh -c 'snmpsimd --agent-udpv4-endpoint=0.0.0.0:31161 --process-user=root --process-group=user' 1455724831 Up 4 hours [{31161 31161 udp 0.0.0.0}] 0 0 [/snmp] map[]}]2016/02/24 01:05:01 Gathered metrics, (3s interval), from 1 inputs in 1.233836656s +} + +func (d FakeDockerClient) ContainerStats(ctx context.Context, containerID string, stream bool) (io.ReadCloser, error) { + var stat io.ReadCloser + jsonStat := `{"read":"2016-02-24T11:42:27.472459608-05:00","memory_stats":{"stats":{},"limit":18935443456},"blkio_stats":{"io_service_bytes_recursive":[{"major":252,"minor":1,"op":"Read","value":753664},{"major":252,"minor":1,"op":"Write"},{"major":252,"minor":1,"op":"Sync"},{"major":252,"minor":1,"op":"Async","value":753664},{"major":252,"minor":1,"op":"Total","value":753664}],"io_serviced_recursive":[{"major":252,"minor":1,"op":"Read","value":26},{"major":252,"minor":1,"op":"Write"},{"major":252,"minor":1,"op":"Sync"},{"major":252,"minor":1,"op":"Async","value":26},{"major":252,"minor":1,"op":"Total","value":26}]},"cpu_stats":{"cpu_usage":{"percpu_usage":[17871,4959158,1646137,1231652,11829401,244656,369972,0],"usage_in_usermode":10000000,"total_usage":20298847},"system_cpu_usage":24052607520000000,"throttling_data":{}},"precpu_stats":{"cpu_usage":{"percpu_usage":[17871,4959158,1646137,1231652,11829401,244656,369972,0],"usage_in_usermode":10000000,"total_usage":20298847},"system_cpu_usage":24052599550000000,"throttling_data":{}}}` + stat = ioutil.NopCloser(strings.NewReader(jsonStat)) + return stat, nil +} + +func TestDockerGatherInfo(t *testing.T) { + var acc testutil.Accumulator + client := FakeDockerClient{} + d := Docker{client: client} + + err := d.Gather(&acc) + + require.NoError(t, err) + + acc.AssertContainsTaggedFields(t, + "docker", + map[string]interface{}{ + "n_listener_events": int(0), + "n_cpus": int(4), + "n_used_file_descriptors": int(19), + "n_containers": int(108), + "n_images": int(199), + "n_goroutines": int(39), + }, + map[string]string{}, + ) + + acc.AssertContainsTaggedFields(t, + "docker_data", + map[string]interface{}{ + "used": int64(17300000000), + "total": int64(107400000000), + "available": int64(36530000000), + }, + map[string]string{ + "unit": "bytes", + }, + ) + acc.AssertContainsTaggedFields(t, + "docker_cpu", + map[string]interface{}{ + "usage_total": uint64(1231652), + }, + map[string]string{ + "cont_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", + "cont_name": "etcd2", + "cont_image": "quay.io/coreos/etcd:v2.2.2", + "cpu": "cpu3", + }, + ) + acc.AssertContainsTaggedFields(t, + "docker_mem", + map[string]interface{}{ + "total_pgpgout": uint64(0), + "usage_percent": float64(0), + "rss": uint64(0), + "total_writeback": uint64(0), + "active_anon": uint64(0), + "total_pgmafault": uint64(0), + "total_rss": uint64(0), + "total_unevictable": uint64(0), + "active_file": uint64(0), + "total_mapped_file": uint64(0), + "pgpgin": uint64(0), + "total_active_file": uint64(0), + "total_active_anon": uint64(0), + "total_cache": uint64(0), + "inactive_anon": uint64(0), + "pgmajfault": uint64(0), + "total_inactive_anon": uint64(0), + "total_rss_huge": uint64(0), + "rss_huge": uint64(0), + "hierarchical_memory_limit": uint64(0), + "pgpgout": uint64(0), + "unevictable": uint64(0), + "total_inactive_file": uint64(0), + "writeback": uint64(0), + "total_pgfault": uint64(0), + "total_pgpgin": uint64(0), + "cache": uint64(0), + "mapped_file": uint64(0), + "inactive_file": uint64(0), + "max_usage": uint64(0), + "fail_count": uint64(0), + "pgfault": uint64(0), + "usage": uint64(0), + "limit": uint64(18935443456), + }, + map[string]string{ + "cont_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173", + "cont_name": "etcd2", + "cont_image": "quay.io/coreos/etcd:v2.2.2", + }, + ) + + //fmt.Print(info) +} diff --git a/plugins/inputs/dovecot/dovecot.go b/plugins/inputs/dovecot/dovecot.go index 75829f595..bf1b20269 100644 --- a/plugins/inputs/dovecot/dovecot.go +++ b/plugins/inputs/dovecot/dovecot.go @@ -34,6 +34,8 @@ var sampleConfig = ` domains = [] ` +var defaultTimeout = time.Second * time.Duration(5) + func (d *Dovecot) SampleConfig() string { return sampleConfig } const defaultPort = "24242" @@ -74,13 +76,16 @@ func (d *Dovecot) gatherServer(addr string, acc telegraf.Accumulator, doms map[s return fmt.Errorf("Error: %s on url %s\n", err, addr) } - c, err := net.Dial("tcp", addr) + c, err := net.DialTimeout("tcp", addr, defaultTimeout) if err != nil { return fmt.Errorf("Unable to connect to dovecot server '%s': %s", addr, err) } defer c.Close() - c.Write([]byte("EXPORT\tdomain\n\n")) + // Extend connection + c.SetDeadline(time.Now().Add(defaultTimeout)) + + c.Write([]byte("EXPORT\tdomain\n")) var buf bytes.Buffer io.Copy(&buf, c) // buf := bufio.NewReader(c) diff --git a/plugins/inputs/elasticsearch/elasticsearch.go b/plugins/inputs/elasticsearch/elasticsearch.go index aae97f4d7..32bd58516 100644 --- a/plugins/inputs/elasticsearch/elasticsearch.go +++ b/plugins/inputs/elasticsearch/elasticsearch.go @@ -81,7 +81,12 @@ type Elasticsearch struct { // NewElasticsearch return a new instance of Elasticsearch func NewElasticsearch() *Elasticsearch { - return &Elasticsearch{client: http.DefaultClient} + tr := &http.Transport{ResponseHeaderTimeout: time.Duration(3 * time.Second)} + client := &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), + } + return &Elasticsearch{client: client} } // SampleConfig returns sample configuration for this plugin. diff --git a/plugins/inputs/elasticsearch/elasticsearch_test.go b/plugins/inputs/elasticsearch/elasticsearch_test.go index f94d3f9ac..f29857507 100644 --- a/plugins/inputs/elasticsearch/elasticsearch_test.go +++ b/plugins/inputs/elasticsearch/elasticsearch_test.go @@ -34,6 +34,9 @@ func (t *transportMock) RoundTrip(r *http.Request) (*http.Response, error) { return res, nil } +func (t *transportMock) CancelRequest(_ *http.Request) { +} + func TestElasticsearch(t *testing.T) { es := NewElasticsearch() es.Servers = []string{"http://example.com:9200"} diff --git a/plugins/inputs/exec/README.md b/plugins/inputs/exec/README.md index eddc86ada..a75ae7856 100644 --- a/plugins/inputs/exec/README.md +++ b/plugins/inputs/exec/README.md @@ -1,28 +1,13 @@ # Exec Input Plugin -The exec plugin can execute arbitrary commands which output: +Please also see: [Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md) -* JSON -* InfluxDB [line-protocol](https://docs.influxdata.com/influxdb/v0.9/write_protocols/line/) -* Graphite [graphite-protocol](http://graphite.readthedocs.org/en/latest/feeding-carbon.html) +### Example 1 - JSON -> Graphite understands messages with this format: +#### Configuration -> ``` -metric_path value timestamp\n -``` - -> __metric_path__ is the metric namespace that you want to populate. - -> __value__ is the value that you want to assign to the metric at this time. - -> __timestamp__ is the unix epoch time. - - -If using JSON, only numeric values are parsed and turned into floats. Booleans -and strings will be ignored. - -### Configuration +In this example a script called ```/tmp/test.sh``` and a script called ```/tmp/test2.sh``` +are configured for ```[[inputs.exec]]``` in JSON format. ``` # Read flattened metrics from one or more commands that output JSON to stdout @@ -30,7 +15,7 @@ and strings will be ignored. # Shell/commands array commands = ["/tmp/test.sh", "/tmp/test2.sh"] - # Data format to consume. This can be "json", "influx" or "graphite" (line-protocol) + # Data format to consume. # NOTE json only reads numerical measurements, strings and booleans are ignored. data_format = "json" @@ -64,8 +49,6 @@ Other options for modifying the measurement names are: name_prefix = "prefix_" ``` -### Example 1 - Let's say that we have the above configuration, and mycollector outputs the following JSON: @@ -85,10 +68,16 @@ The collected metrics will be stored as fields under the measurement ``` exec_mycollector a=0.5,b_c=0.1,b_d=5 1452815002357578567 ``` +If using JSON, only numeric values are parsed and turned into floats. Booleans +and strings will be ignored. -### Example 2 +### Example 2 - Influx Line-Protocol -Now let's say we have the following configuration: +In this example an application called ```/usr/bin/line_protocol_collector``` +and a script called ```/tmp/test2.sh``` are configured for ```[[inputs.exec]]``` +in influx line-protocol format. + +#### Configuration ``` [[inputs.exec]] @@ -98,12 +87,12 @@ Now let's say we have the following configuration: # command = "/usr/bin/line_protocol_collector" commands = ["/usr/bin/line_protocol_collector","/tmp/test2.sh"] - # Data format to consume. This can be "json" or "influx" (line-protocol) + # Data format to consume. # NOTE json only reads numerical measurements, strings and booleans are ignored. data_format = "influx" ``` -And line_protocol_collector outputs the following line protocol: +The line_protocol_collector application outputs the following line protocol: ``` cpu,cpu=cpu0,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 @@ -118,14 +107,17 @@ cpu,cpu=cpu6,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 You will get data in InfluxDB exactly as it is defined above, tags are cpu=cpuN, host=foo, and datacenter=us-east with fields usage_idle and usage_busy. They will receive a timestamp at collection time. +Each line must end in \n, just as the Influx line protocol does. -### Example 3 +### Example 3 - Graphite We can also change the data_format to "graphite" to use the metrics collecting scripts such as (compatible with graphite): -* Nagios [Mertics Plugins] (https://exchange.nagios.org/directory/Plugins) -* Sensu [Mertics Plugins] (https://github.com/sensu-plugins) +* Nagios [Metrics Plugins](https://exchange.nagios.org/directory/Plugins) +* Sensu [Metrics Plugins](https://github.com/sensu-plugins) + +In this example a script called /tmp/test.sh and a script called /tmp/test2.sh are configured for [[inputs.exec]] in graphite format. #### Configuration ``` @@ -134,7 +126,7 @@ We can also change the data_format to "graphite" to use the metrics collecting s # Shell/commands array commands = ["/tmp/test.sh","/tmp/test2.sh"] - # Data format to consume. This can be "json", "influx" or "graphite" (line-protocol) + # Data format to consume. # NOTE json only reads numerical measurements, strings and booleans are ignored. data_format = "graphite" @@ -161,6 +153,17 @@ We can also change the data_format to "graphite" to use the metrics collecting s "measurement*" ] ``` +Graphite messages are in this format: + +``` +metric_path value timestamp\n +``` + +__metric_path__ is the metric namespace that you want to populate. + +__value__ is the value that you want to assign to the metric at this time. + +__timestamp__ is the unix epoch time. And test.sh/test2.sh will output: @@ -176,5 +179,5 @@ sensu.metric.net.server0.eth0.rx_dropped 0 1444234982 The templates configuration will be used to parse the graphite metrics to support influxdb/opentsdb tagging store engines. -More detail information about templates, please refer to [The graphite Input] (https://github.com/influxdata/influxdb/blob/master/services/graphite/README.md) +More detail information about templates, please refer to [The graphite Input](https://github.com/influxdata/influxdb/blob/master/services/graphite/README.md) diff --git a/plugins/inputs/exec/exec.go b/plugins/inputs/exec/exec.go index 5231fd013..d2e09ccd0 100644 --- a/plugins/inputs/exec/exec.go +++ b/plugins/inputs/exec/exec.go @@ -5,12 +5,14 @@ import ( "fmt" "os/exec" "sync" + "syscall" "github.com/gonuts/go-shellquote" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/parsers" + "github.com/influxdata/telegraf/plugins/parsers/nagios" ) const sampleConfig = ` @@ -20,7 +22,7 @@ const sampleConfig = ` ## measurement name suffix (for separating different commands) name_suffix = "_mycollector" - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md @@ -46,12 +48,32 @@ func NewExec() *Exec { } type Runner interface { - Run(*Exec, string) ([]byte, error) + Run(*Exec, string, telegraf.Accumulator) ([]byte, error) } type CommandRunner struct{} -func (c CommandRunner) Run(e *Exec, command string) ([]byte, error) { +func AddNagiosState(exitCode error, acc telegraf.Accumulator) error { + nagiosState := 0 + if exitCode != nil { + exiterr, ok := exitCode.(*exec.ExitError) + if ok { + status, ok := exiterr.Sys().(syscall.WaitStatus) + if ok { + nagiosState = status.ExitStatus() + } else { + return fmt.Errorf("exec: unable to get nagios plugin exit code") + } + } else { + return fmt.Errorf("exec: unable to get nagios plugin exit code") + } + } + fields := map[string]interface{}{"state": nagiosState} + acc.AddFields("nagios_state", fields, nil) + return nil +} + +func (c CommandRunner) Run(e *Exec, command string, acc telegraf.Accumulator) ([]byte, error) { split_cmd, err := shellquote.Split(command) if err != nil || len(split_cmd) == 0 { return nil, fmt.Errorf("exec: unable to parse command, %s", err) @@ -63,7 +85,17 @@ func (c CommandRunner) Run(e *Exec, command string) ([]byte, error) { cmd.Stdout = &out if err := cmd.Run(); err != nil { - return nil, fmt.Errorf("exec: %s for command '%s'", err, command) + switch e.parser.(type) { + case *nagios.NagiosParser: + AddNagiosState(err, acc) + default: + return nil, fmt.Errorf("exec: %s for command '%s'", err, command) + } + } else { + switch e.parser.(type) { + case *nagios.NagiosParser: + AddNagiosState(nil, acc) + } } return out.Bytes(), nil @@ -72,7 +104,7 @@ func (c CommandRunner) Run(e *Exec, command string) ([]byte, error) { func (e *Exec) ProcessCommand(command string, acc telegraf.Accumulator) { defer e.wg.Done() - out, err := e.runner.Run(e, command) + out, err := e.runner.Run(e, command, acc) if err != nil { e.errChan <- err return diff --git a/plugins/inputs/exec/exec_test.go b/plugins/inputs/exec/exec_test.go index da55ef9d3..9c75857cf 100644 --- a/plugins/inputs/exec/exec_test.go +++ b/plugins/inputs/exec/exec_test.go @@ -4,6 +4,7 @@ import ( "fmt" "testing" + "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/testutil" @@ -57,7 +58,7 @@ func newRunnerMock(out []byte, err error) Runner { } } -func (r runnerMock) Run(e *Exec, command string) ([]byte, error) { +func (r runnerMock) Run(e *Exec, command string, acc telegraf.Accumulator) ([]byte, error) { if r.err != nil { return nil, r.err } diff --git a/plugins/inputs/github_webhooks/github_webhooks.go b/plugins/inputs/github_webhooks/github_webhooks.go index bc3f184be..726eef037 100644 --- a/plugins/inputs/github_webhooks/github_webhooks.go +++ b/plugins/inputs/github_webhooks/github_webhooks.go @@ -73,14 +73,17 @@ func (gh *GithubWebhooks) Stop() { // Handles the / route func (gh *GithubWebhooks) eventHandler(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() eventType := r.Header["X-Github-Event"][0] data, err := ioutil.ReadAll(r.Body) if err != nil { w.WriteHeader(http.StatusBadRequest) + return } e, err := NewEvent(data, eventType) if err != nil { w.WriteHeader(http.StatusBadRequest) + return } gh.Lock() gh.events = append(gh.events, e) diff --git a/plugins/inputs/haproxy/haproxy.go b/plugins/inputs/haproxy/haproxy.go index 233cd8481..b1402d8ec 100644 --- a/plugins/inputs/haproxy/haproxy.go +++ b/plugins/inputs/haproxy/haproxy.go @@ -129,8 +129,11 @@ func (g *haproxy) Gather(acc telegraf.Accumulator) error { func (g *haproxy) gatherServer(addr string, acc telegraf.Accumulator) error { if g.client == nil { - - client := &http.Client{} + tr := &http.Transport{ResponseHeaderTimeout: time.Duration(3 * time.Second)} + client := &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), + } g.client = client } diff --git a/plugins/inputs/httpjson/README.md b/plugins/inputs/httpjson/README.md index fc45dd567..707b256df 100644 --- a/plugins/inputs/httpjson/README.md +++ b/plugins/inputs/httpjson/README.md @@ -6,7 +6,7 @@ For example, if you have a service called _mycollector_, which has HTTP endpoint plugin like this: ``` -[[httpjson.services]] +[[inputs.httpjson]] name = "mycollector" servers = [ @@ -24,7 +24,7 @@ plugin like this: You can also specify which keys from server response should be considered tags: ``` -[[httpjson.services]] +[[inputs.httpjson]] ... tag_keys = [ @@ -36,10 +36,10 @@ You can also specify which keys from server response should be considered tags: You can also specify additional request parameters for the service: ``` -[[httpjson.services]] +[[inputs.httpjson]] ... - [httpjson.services.parameters] + [inputs.httpjson.parameters] event_type = "cpu_spike" threshold = "0.75" @@ -48,10 +48,10 @@ You can also specify additional request parameters for the service: You can also specify additional request header parameters for the service: ``` -[[httpjson.services]] +[[inputs.httpjson]] ... - [httpjson.services.headers] + [inputs.httpjson.headers] X-Auth-Token = "my-xauth-token" apiVersion = "v1" ``` @@ -61,18 +61,14 @@ You can also specify additional request header parameters for the service: Let's say that we have a service named "mycollector" configured like this: ``` -[httpjson] - [[httpjson.services]] - name = "mycollector" - - servers = [ - "http://my.service.com/_stats" - ] - - # HTTP method to use (case-sensitive) - method = "GET" - - tag_keys = ["service"] +[[inputs.httpjson]] + name = "mycollector" + servers = [ + "http://my.service.com/_stats" + ] + # HTTP method to use (case-sensitive) + method = "GET" + tag_keys = ["service"] ``` which responds with the following JSON: @@ -102,26 +98,21 @@ There is also the option to collect JSON from multiple services, here is an example doing that. ``` -[httpjson] - [[httpjson.services]] - name = "mycollector1" +[[inputs.httpjson]] + name = "mycollector1" + servers = [ + "http://my.service1.com/_stats" + ] + # HTTP method to use (case-sensitive) + method = "GET" - servers = [ - "http://my.service1.com/_stats" - ] - - # HTTP method to use (case-sensitive) - method = "GET" - - [[httpjson.services]] - name = "mycollector2" - - servers = [ - "http://service.net/json/stats" - ] - - # HTTP method to use (case-sensitive) - method = "POST" +[[inputs.httpjson]] + name = "mycollector2" + servers = [ + "http://service.net/json/stats" + ] + # HTTP method to use (case-sensitive) + method = "POST" ``` The services respond with the following JSON: diff --git a/plugins/inputs/httpjson/httpjson.go b/plugins/inputs/httpjson/httpjson.go index c055f66de..6fe4da1e5 100644 --- a/plugins/inputs/httpjson/httpjson.go +++ b/plugins/inputs/httpjson/httpjson.go @@ -11,6 +11,7 @@ import ( "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/parsers" ) @@ -23,6 +24,15 @@ type HttpJson struct { Parameters map[string]string Headers map[string]string + // Path to CA file + SSLCA string `toml:"ssl_ca"` + // Path to host cert file + SSLCert string `toml:"ssl_cert"` + // Path to cert key file + SSLKey string `toml:"ssl_key"` + // Use SSL but skip chain & host verification + InsecureSkipVerify bool + client HTTPClient } @@ -36,16 +46,27 @@ type HTTPClient interface { // http.Response: HTTP respons object // error : Any error that may have occurred MakeRequest(req *http.Request) (*http.Response, error) + + SetHTTPClient(client *http.Client) + HTTPClient() *http.Client } type RealHTTPClient struct { client *http.Client } -func (c RealHTTPClient) MakeRequest(req *http.Request) (*http.Response, error) { +func (c *RealHTTPClient) MakeRequest(req *http.Request) (*http.Response, error) { return c.client.Do(req) } +func (c *RealHTTPClient) SetHTTPClient(client *http.Client) { + c.client = client +} + +func (c *RealHTTPClient) HTTPClient() *http.Client { + return c.client +} + var sampleConfig = ` ## NOTE This plugin only reads numerical measurements, strings and booleans ## will be ignored. @@ -77,6 +98,13 @@ var sampleConfig = ` # [inputs.httpjson.headers] # X-Auth-Token = "my-xauth-token" # apiVersion = "v1" + + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false ` func (h *HttpJson) SampleConfig() string { @@ -91,6 +119,23 @@ func (h *HttpJson) Description() string { func (h *HttpJson) Gather(acc telegraf.Accumulator) error { var wg sync.WaitGroup + if h.client.HTTPClient() == nil { + tlsCfg, err := internal.GetTLSConfig( + h.SSLCert, h.SSLKey, h.SSLCA, h.InsecureSkipVerify) + if err != nil { + return err + } + tr := &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), + TLSClientConfig: tlsCfg, + } + client := &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), + } + h.client.SetHTTPClient(client) + } + errorChannel := make(chan error, len(h.Servers)) for _, server := range h.Servers { @@ -244,6 +289,8 @@ func (h *HttpJson) sendRequest(serverURL string) (string, float64, error) { func init() { inputs.Add("httpjson", func() telegraf.Input { - return &HttpJson{client: RealHTTPClient{client: &http.Client{}}} + return &HttpJson{ + client: &RealHTTPClient{}, + } }) } diff --git a/plugins/inputs/httpjson/httpjson_test.go b/plugins/inputs/httpjson/httpjson_test.go index b6b57a167..31447b307 100644 --- a/plugins/inputs/httpjson/httpjson_test.go +++ b/plugins/inputs/httpjson/httpjson_test.go @@ -125,7 +125,7 @@ type mockHTTPClient struct { // Mock implementation of MakeRequest. Usually returns an http.Response with // hard-coded responseBody and statusCode. However, if the request uses a // nonstandard method, it uses status code 405 (method not allowed) -func (c mockHTTPClient) MakeRequest(req *http.Request) (*http.Response, error) { +func (c *mockHTTPClient) MakeRequest(req *http.Request) (*http.Response, error) { resp := http.Response{} resp.StatusCode = c.statusCode @@ -147,6 +147,13 @@ func (c mockHTTPClient) MakeRequest(req *http.Request) (*http.Response, error) { return &resp, nil } +func (c *mockHTTPClient) SetHTTPClient(_ *http.Client) { +} + +func (c *mockHTTPClient) HTTPClient() *http.Client { + return nil +} + // Generates a pointer to an HttpJson object that uses a mock HTTP client. // Parameters: // response : Body of the response that the mock HTTP client should return @@ -157,7 +164,7 @@ func (c mockHTTPClient) MakeRequest(req *http.Request) (*http.Response, error) { func genMockHttpJson(response string, statusCode int) []*HttpJson { return []*HttpJson{ &HttpJson{ - client: mockHTTPClient{responseBody: response, statusCode: statusCode}, + client: &mockHTTPClient{responseBody: response, statusCode: statusCode}, Servers: []string{ "http://server1.example.com/metrics/", "http://server2.example.com/metrics/", @@ -174,7 +181,7 @@ func genMockHttpJson(response string, statusCode int) []*HttpJson { }, }, &HttpJson{ - client: mockHTTPClient{responseBody: response, statusCode: statusCode}, + client: &mockHTTPClient{responseBody: response, statusCode: statusCode}, Servers: []string{ "http://server3.example.com/metrics/", "http://server4.example.com/metrics/", @@ -234,7 +241,7 @@ func TestHttpJsonGET_URL(t *testing.T) { Servers: []string{ts.URL + "?api_key=mykey"}, Name: "", Method: "GET", - client: RealHTTPClient{client: &http.Client{}}, + client: &RealHTTPClient{client: &http.Client{}}, } var acc testutil.Accumulator @@ -307,7 +314,7 @@ func TestHttpJsonGET(t *testing.T) { Name: "", Method: "GET", Parameters: params, - client: RealHTTPClient{client: &http.Client{}}, + client: &RealHTTPClient{client: &http.Client{}}, } var acc testutil.Accumulator @@ -381,7 +388,7 @@ func TestHttpJsonPOST(t *testing.T) { Name: "", Method: "POST", Parameters: params, - client: RealHTTPClient{client: &http.Client{}}, + client: &RealHTTPClient{client: &http.Client{}}, } var acc testutil.Accumulator diff --git a/plugins/inputs/influxdb/influxdb.go b/plugins/inputs/influxdb/influxdb.go index 63a3c1854..5af9a0731 100644 --- a/plugins/inputs/influxdb/influxdb.go +++ b/plugins/inputs/influxdb/influxdb.go @@ -7,6 +7,7 @@ import ( "net/http" "strings" "sync" + "time" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" @@ -70,6 +71,15 @@ type point struct { Values map[string]interface{} `json:"values"` } +var tr = &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), +} + +var client = &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), +} + // Gathers data from a particular URL // Parameters: // acc : The telegraf Accumulator to use @@ -81,7 +91,7 @@ func (i *InfluxDB) gatherURL( acc telegraf.Accumulator, url string, ) error { - resp, err := http.Get(url) + resp, err := client.Get(url) if err != nil { return err } diff --git a/plugins/inputs/ipmi_sensor/README.md b/plugins/inputs/ipmi_sensor/README.md new file mode 100644 index 000000000..2ece4ea20 --- /dev/null +++ b/plugins/inputs/ipmi_sensor/README.md @@ -0,0 +1,42 @@ +# Telegraf ipmi plugin + +Get bare metal metrics using the command line utility `ipmitool` + +see ipmitool(https://sourceforge.net/projects/ipmitool/files/ipmitool/) + +The plugin will use the following command to collect remote host sensor stats: + +ipmitool -I lan -H 192.168.1.1 -U USERID -P PASSW0RD sdr + +## Measurements + +- ipmi_sensor: + + * Tags: `name`, `server`, `unit` + * Fields: + - status + - value + +## Configuration + +```toml +[[inputs.ipmi]] + ## specify servers via a url matching: + ## [username[:password]@][protocol[(address)]] + ## e.g. + ## root:passwd@lan(127.0.0.1) + ## + servers = ["USERID:PASSW0RD@lan(10.20.2.203)"] +``` + +## Output + +``` +> ipmi_sensor,server=10.20.2.203,unit=degrees_c,name=ambient_temp status=1i,value=20 1458488465012559455 +> ipmi_sensor,server=10.20.2.203,unit=feet,name=altitude status=1i,value=80 1458488465012688613 +> ipmi_sensor,server=10.20.2.203,unit=watts,name=avg_power status=1i,value=220 1458488465012776511 +> ipmi_sensor,server=10.20.2.203,unit=volts,name=planar_3.3v status=1i,value=3.28 1458488465012861875 +> ipmi_sensor,server=10.20.2.203,unit=volts,name=planar_vbat status=1i,value=3.04 1458488465013072508 +> ipmi_sensor,server=10.20.2.203,unit=rpm,name=fan_1a_tach status=1i,value=2610 1458488465013137932 +> ipmi_sensor,server=10.20.2.203,unit=rpm,name=fan_1b_tach status=1i,value=1775 1458488465013279896 +``` diff --git a/plugins/inputs/ipmi_sensor/command.go b/plugins/inputs/ipmi_sensor/command.go new file mode 100644 index 000000000..353c27d36 --- /dev/null +++ b/plugins/inputs/ipmi_sensor/command.go @@ -0,0 +1,38 @@ +package ipmi_sensor + +import ( + "bytes" + "fmt" + "os/exec" + "strings" +) + +type CommandRunner struct{} + +func (t CommandRunner) cmd(conn *Connection, args ...string) *exec.Cmd { + path := conn.Path + opts := append(conn.options(), args...) + + if path == "" { + path = "ipmitool" + } + + return exec.Command(path, opts...) + +} + +func (t CommandRunner) Run(conn *Connection, args ...string) (string, error) { + cmd := t.cmd(conn, args...) + var stdout bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err := cmd.Run() + if err != nil { + return "", fmt.Errorf("run %s %s: %s (%s)", + cmd.Path, strings.Join(cmd.Args, " "), stderr.String(), err) + } + + return stdout.String(), err +} diff --git a/plugins/inputs/ipmi_sensor/connection.go b/plugins/inputs/ipmi_sensor/connection.go new file mode 100644 index 000000000..1e9bfbdcb --- /dev/null +++ b/plugins/inputs/ipmi_sensor/connection.go @@ -0,0 +1,89 @@ +package ipmi_sensor + +import ( + "fmt" + "net" + "strconv" + "strings" +) + +// Connection properties for a Client +type Connection struct { + Hostname string + Username string + Password string + Path string + Port int + Interface string +} + +func NewConnection(server string) *Connection { + conn := &Connection{} + inx1 := strings.Index(server, "@") + inx2 := strings.Index(server, "(") + inx3 := strings.Index(server, ")") + + connstr := server + + if inx1 > 0 { + security := server[0:inx1] + connstr = server[inx1+1 : len(server)] + up := strings.SplitN(security, ":", 2) + conn.Username = up[0] + conn.Password = up[1] + } + + if inx2 > 0 { + inx2 = strings.Index(connstr, "(") + inx3 = strings.Index(connstr, ")") + + conn.Interface = connstr[0:inx2] + conn.Hostname = connstr[inx2+1 : inx3] + } + + return conn +} + +func (t *Connection) options() []string { + intf := t.Interface + if intf == "" { + intf = "lan" + } + + options := []string{ + "-H", t.Hostname, + "-U", t.Username, + "-P", t.Password, + "-I", intf, + } + + if t.Port != 0 { + options = append(options, "-p", strconv.Itoa(t.Port)) + } + + return options +} + +// RemoteIP returns the remote (bmc) IP address of the Connection +func (c *Connection) RemoteIP() string { + if net.ParseIP(c.Hostname) == nil { + addrs, err := net.LookupHost(c.Hostname) + if err != nil && len(addrs) > 0 { + return addrs[0] + } + } + return c.Hostname +} + +// LocalIP returns the local (client) IP address of the Connection +func (c *Connection) LocalIP() string { + conn, err := net.Dial("udp", fmt.Sprintf("%s:%d", c.Hostname, c.Port)) + if err != nil { + // don't bother returning an error, since this value will never + // make it to the bmc if we can't connect to it. + return c.Hostname + } + _ = conn.Close() + host, _, _ := net.SplitHostPort(conn.LocalAddr().String()) + return host +} diff --git a/plugins/inputs/ipmi_sensor/ipmi.go b/plugins/inputs/ipmi_sensor/ipmi.go new file mode 100644 index 000000000..aec56a0e4 --- /dev/null +++ b/plugins/inputs/ipmi_sensor/ipmi.go @@ -0,0 +1,129 @@ +package ipmi_sensor + +import ( + "strconv" + "strings" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type Ipmi struct { + Servers []string + runner Runner +} + +var sampleConfig = ` + ## specify servers via a url matching: + ## [username[:password]@][protocol[(address)]] + ## e.g. + ## root:passwd@lan(127.0.0.1) + ## + servers = ["USERID:PASSW0RD@lan(192.168.1.1)"] +` + +func NewIpmi() *Ipmi { + return &Ipmi{ + runner: CommandRunner{}, + } +} + +func (m *Ipmi) SampleConfig() string { + return sampleConfig +} + +func (m *Ipmi) Description() string { + return "Read metrics from one or many bare metal servers" +} + +func (m *Ipmi) Gather(acc telegraf.Accumulator) error { + if m.runner == nil { + m.runner = CommandRunner{} + } + for _, serv := range m.Servers { + err := m.gatherServer(serv, acc) + if err != nil { + return err + } + } + + return nil +} + +func (m *Ipmi) gatherServer(serv string, acc telegraf.Accumulator) error { + conn := NewConnection(serv) + + res, err := m.runner.Run(conn, "sdr") + if err != nil { + return err + } + + // each line will look something like + // Planar VBAT | 3.05 Volts | ok + lines := strings.Split(res, "\n") + for i := 0; i < len(lines); i++ { + vals := strings.Split(lines[i], "|") + if len(vals) != 3 { + continue + } + + tags := map[string]string{ + "server": conn.Hostname, + "name": transform(vals[0]), + } + + fields := make(map[string]interface{}) + if strings.EqualFold("ok", trim(vals[2])) { + fields["status"] = 1 + } else { + fields["status"] = 0 + } + + val1 := trim(vals[1]) + + if strings.Index(val1, " ") > 0 { + // split middle column into value and unit + valunit := strings.SplitN(val1, " ", 2) + fields["value"] = Atofloat(valunit[0]) + if len(valunit) > 1 { + tags["unit"] = transform(valunit[1]) + } + } else { + fields["value"] = 0.0 + } + + acc.AddFields("ipmi_sensor", fields, tags, time.Now()) + } + + return nil +} + +type Runner interface { + Run(conn *Connection, args ...string) (string, error) +} + +func Atofloat(val string) float64 { + f, err := strconv.ParseFloat(val, 64) + if err != nil { + return 0.0 + } else { + return f + } +} + +func trim(s string) string { + return strings.TrimSpace(s) +} + +func transform(s string) string { + s = trim(s) + s = strings.ToLower(s) + return strings.Replace(s, " ", "_", -1) +} + +func init() { + inputs.Add("ipmi_sensor", func() telegraf.Input { + return &Ipmi{} + }) +} diff --git a/plugins/inputs/ipmi_sensor/ipmi_test.go b/plugins/inputs/ipmi_sensor/ipmi_test.go new file mode 100644 index 000000000..c62447e39 --- /dev/null +++ b/plugins/inputs/ipmi_sensor/ipmi_test.go @@ -0,0 +1,275 @@ +package ipmi_sensor + +import ( + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const serv = "USERID:PASSW0RD@lan(192.168.1.1)" + +const cmdReturn = ` +Ambient Temp | 20 degrees C | ok +Altitude | 80 feet | ok +Avg Power | 210 Watts | ok +Planar 3.3V | 3.29 Volts | ok +Planar 5V | 4.90 Volts | ok +Planar 12V | 12.04 Volts | ok +Planar VBAT | 3.05 Volts | ok +Fan 1A Tach | 2610 RPM | ok +Fan 1B Tach | 1775 RPM | ok +Fan 2A Tach | 2001 RPM | ok +Fan 2B Tach | 1275 RPM | ok +Fan 3A Tach | 2929 RPM | ok +Fan 3B Tach | 2125 RPM | ok +Fan 1 | 0x00 | ok +Fan 2 | 0x00 | ok +Fan 3 | 0x00 | ok +Front Panel | 0x00 | ok +Video USB | 0x00 | ok +DASD Backplane 1 | 0x00 | ok +SAS Riser | 0x00 | ok +PCI Riser 1 | 0x00 | ok +PCI Riser 2 | 0x00 | ok +CPU 1 | 0x00 | ok +CPU 2 | 0x00 | ok +All CPUs | 0x00 | ok +One of The CPUs | 0x00 | ok +IOH Temp Status | 0x00 | ok +CPU 1 OverTemp | 0x00 | ok +CPU 2 OverTemp | 0x00 | ok +CPU Fault Reboot | 0x00 | ok +Aux Log | 0x00 | ok +NMI State | 0x00 | ok +ABR Status | 0x00 | ok +Firmware Error | 0x00 | ok +PCIs | 0x00 | ok +CPUs | 0x00 | ok +DIMMs | 0x00 | ok +Sys Board Fault | 0x00 | ok +Power Supply 1 | 0x00 | ok +Power Supply 2 | 0x00 | ok +PS 1 Fan Fault | 0x00 | ok +PS 2 Fan Fault | 0x00 | ok +VT Fault | 0x00 | ok +Pwr Rail A Fault | 0x00 | ok +Pwr Rail B Fault | 0x00 | ok +Pwr Rail C Fault | 0x00 | ok +Pwr Rail D Fault | 0x00 | ok +Pwr Rail E Fault | 0x00 | ok +PS 1 Therm Fault | 0x00 | ok +PS 2 Therm Fault | 0x00 | ok +PS1 12V OV Fault | 0x00 | ok +PS2 12V OV Fault | 0x00 | ok +PS1 12V UV Fault | 0x00 | ok +PS2 12V UV Fault | 0x00 | ok +PS1 12V OC Fault | 0x00 | ok +PS2 12V OC Fault | 0x00 | ok +PS 1 VCO Fault | 0x00 | ok +PS 2 VCO Fault | 0x00 | ok +Power Unit | 0x00 | ok +Cooling Zone 1 | 0x00 | ok +Cooling Zone 2 | 0x00 | ok +Cooling Zone 3 | 0x00 | ok +Drive 0 | 0x00 | ok +Drive 1 | 0x00 | ok +Drive 2 | 0x00 | ok +Drive 3 | 0x00 | ok +Drive 4 | 0x00 | ok +Drive 5 | 0x00 | ok +Drive 6 | 0x00 | ok +Drive 7 | 0x00 | ok +Drive 8 | 0x00 | ok +Drive 9 | 0x00 | ok +Drive 10 | 0x00 | ok +Drive 11 | 0x00 | ok +Drive 12 | 0x00 | ok +Drive 13 | 0x00 | ok +Drive 14 | 0x00 | ok +Drive 15 | 0x00 | ok +All DIMMS | 0x00 | ok +One of the DIMMs | 0x00 | ok +DIMM 1 | 0x00 | ok +DIMM 2 | 0x00 | ok +DIMM 3 | 0x00 | ok +DIMM 4 | 0x00 | ok +DIMM 5 | 0x00 | ok +DIMM 6 | 0x00 | ok +DIMM 7 | 0x00 | ok +DIMM 8 | 0x00 | ok +DIMM 9 | 0x00 | ok +DIMM 10 | 0x00 | ok +DIMM 11 | 0x00 | ok +DIMM 12 | 0x00 | ok +DIMM 13 | 0x00 | ok +DIMM 14 | 0x00 | ok +DIMM 15 | 0x00 | ok +DIMM 16 | 0x00 | ok +DIMM 17 | 0x00 | ok +DIMM 18 | 0x00 | ok +DIMM 1 Temp | 0x00 | ok +DIMM 2 Temp | 0x00 | ok +DIMM 3 Temp | 0x00 | ok +DIMM 4 Temp | 0x00 | ok +DIMM 5 Temp | 0x00 | ok +DIMM 6 Temp | 0x00 | ok +DIMM 7 Temp | 0x00 | ok +DIMM 8 Temp | 0x00 | ok +DIMM 9 Temp | 0x00 | ok +DIMM 10 Temp | 0x00 | ok +DIMM 11 Temp | 0x00 | ok +DIMM 12 Temp | 0x00 | ok +DIMM 13 Temp | 0x00 | ok +DIMM 14 Temp | 0x00 | ok +DIMM 15 Temp | 0x00 | ok +DIMM 16 Temp | 0x00 | ok +DIMM 17 Temp | 0x00 | ok +DIMM 18 Temp | 0x00 | ok +PCI 1 | 0x00 | ok +PCI 2 | 0x00 | ok +PCI 3 | 0x00 | ok +PCI 4 | 0x00 | ok +All PCI Error | 0x00 | ok +One of PCI Error | 0x00 | ok +IPMI Watchdog | 0x00 | ok +Host Power | 0x00 | ok +DASD Backplane 2 | 0x00 | ok +DASD Backplane 3 | Not Readable | ns +DASD Backplane 4 | Not Readable | ns +Backup Memory | 0x00 | ok +Progress | 0x00 | ok +Planar Fault | 0x00 | ok +SEL Fullness | 0x00 | ok +PCI 5 | 0x00 | ok +OS RealTime Mod | 0x00 | ok +` + +type runnerMock struct { + out string + err error +} + +func newRunnerMock(out string, err error) Runner { + return &runnerMock{ + out: out, + err: err, + } +} + +func (r runnerMock) Run(conn *Connection, args ...string) (out string, err error) { + if r.err != nil { + return out, r.err + } + return r.out, nil +} + +func TestIpmi(t *testing.T) { + i := &Ipmi{ + Servers: []string{"USERID:PASSW0RD@lan(192.168.1.1)"}, + runner: newRunnerMock(cmdReturn, nil), + } + + var acc testutil.Accumulator + + err := i.Gather(&acc) + + require.NoError(t, err) + + assert.Equal(t, acc.NFields(), 266, "non-numeric measurements should be ignored") + + var tests = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "value": float64(20), + "status": int(1), + }, + map[string]string{ + "name": "ambient_temp", + "server": "192.168.1.1", + "unit": "degrees_c", + }, + }, + { + map[string]interface{}{ + "value": float64(80), + "status": int(1), + }, + map[string]string{ + "name": "altitude", + "server": "192.168.1.1", + "unit": "feet", + }, + }, + { + map[string]interface{}{ + "value": float64(210), + "status": int(1), + }, + map[string]string{ + "name": "avg_power", + "server": "192.168.1.1", + "unit": "watts", + }, + }, + { + map[string]interface{}{ + "value": float64(4.9), + "status": int(1), + }, + map[string]string{ + "name": "planar_5v", + "server": "192.168.1.1", + "unit": "volts", + }, + }, + { + map[string]interface{}{ + "value": float64(3.05), + "status": int(1), + }, + map[string]string{ + "name": "planar_vbat", + "server": "192.168.1.1", + "unit": "volts", + }, + }, + { + map[string]interface{}{ + "value": float64(2610), + "status": int(1), + }, + map[string]string{ + "name": "fan_1a_tach", + "server": "192.168.1.1", + "unit": "rpm", + }, + }, + { + map[string]interface{}{ + "value": float64(1775), + "status": int(1), + }, + map[string]string{ + "name": "fan_1b_tach", + "server": "192.168.1.1", + "unit": "rpm", + }, + }, + } + + for _, test := range tests { + acc.AssertContainsTaggedFields(t, "ipmi_sensor", test.fields, test.tags) + } +} + +func TestIpmiConnection(t *testing.T) { + conn := NewConnection(serv) + assert.Equal(t, "USERID", conn.Username) + assert.Equal(t, "lan", conn.Interface) + +} diff --git a/plugins/inputs/jolokia/README.md b/plugins/inputs/jolokia/README.md index bda0c5f93..3a528b33f 100644 --- a/plugins/inputs/jolokia/README.md +++ b/plugins/inputs/jolokia/README.md @@ -1,16 +1,28 @@ # Telegraf plugin: Jolokia -#### Plugin arguments: -- **context** string: Context root used of jolokia url -- **servers** []Server: List of servers - + **name** string: Server's logical name - + **host** string: Server's ip address or hostname - + **port** string: Server's listening port -- **metrics** []Metric - + **name** string: Name of the measure - + **jmx** string: Jmx path that identifies mbeans attributes - + **pass** []string: Attributes to retain when collecting values - + **drop** []string: Attributes to drop when collecting values +#### Configuration + +```toml +[[inputs.jolokia]] + ## This is the context root used to compose the jolokia url + context = "/jolokia/read" + + ## List of servers exposing jolokia read service + [[inputs.jolokia.servers]] + name = "stable" + host = "192.168.103.2" + port = "8180" + # username = "myuser" + # password = "mypassword" + + ## List of metrics collected on above servers + ## Each metric consists in a name, a jmx path and either + ## a pass or drop slice attribute. + ## This collect all heap memory usage metrics. + [[inputs.jolokia.metrics]] + name = "heap_memory_usage" + jmx = "/java.lang:type=Memory/HeapMemoryUsage" +``` #### Description @@ -21,31 +33,3 @@ See: https://jolokia.org/ # Measurements: Jolokia plugin produces one measure for each metric configured, adding Server's `name`, `host` and `port` as tags. - -Given a configuration like: - -```ini -[jolokia] - -[[jolokia.servers]] - name = "as-service-1" - host = "127.0.0.1" - port = "8080" - -[[jolokia.servers]] - name = "as-service-2" - host = "127.0.0.1" - port = "8180" - -[[jolokia.metrics]] - name = "heap_memory_usage" - jmx = "/java.lang:type=Memory/HeapMemoryUsage" - pass = ["used", "max"] -``` - -The collected metrics will be: - -``` -jolokia_heap_memory_usage name=as-service-1,host=127.0.0.1,port=8080 used=xxx,max=yyy -jolokia_heap_memory_usage name=as-service-2,host=127.0.0.1,port=8180 used=vvv,max=zzz -``` diff --git a/plugins/inputs/jolokia/jolokia.go b/plugins/inputs/jolokia/jolokia.go index 2e0bba6d5..a65f5ff8f 100644 --- a/plugins/inputs/jolokia/jolokia.go +++ b/plugins/inputs/jolokia/jolokia.go @@ -7,6 +7,7 @@ import ( "io/ioutil" "net/http" "net/url" + "time" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" @@ -160,6 +161,11 @@ func (j *Jolokia) Gather(acc telegraf.Accumulator) error { func init() { inputs.Add("jolokia", func() telegraf.Input { - return &Jolokia{jClient: &JolokiaClientImpl{client: &http.Client{}}} + tr := &http.Transport{ResponseHeaderTimeout: time.Duration(3 * time.Second)} + client := &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), + } + return &Jolokia{jClient: &JolokiaClientImpl{client: client}} }) } diff --git a/plugins/inputs/kafka_consumer/README.md b/plugins/inputs/kafka_consumer/README.md index 885c67a28..f5f6a359e 100644 --- a/plugins/inputs/kafka_consumer/README.md +++ b/plugins/inputs/kafka_consumer/README.md @@ -22,7 +22,8 @@ from the same topic in parallel. ## Offset (must be either "oldest" or "newest") offset = "oldest" - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md diff --git a/plugins/inputs/kafka_consumer/kafka_consumer.go b/plugins/inputs/kafka_consumer/kafka_consumer.go index bc0d225c6..a2cda43d6 100644 --- a/plugins/inputs/kafka_consumer/kafka_consumer.go +++ b/plugins/inputs/kafka_consumer/kafka_consumer.go @@ -14,10 +14,11 @@ import ( ) type Kafka struct { - ConsumerGroup string - Topics []string - ZookeeperPeers []string - Consumer *consumergroup.ConsumerGroup + ConsumerGroup string + Topics []string + ZookeeperPeers []string + ZookeeperChroot string + Consumer *consumergroup.ConsumerGroup // Legacy metric buffer support MetricBuffer int @@ -48,12 +49,14 @@ var sampleConfig = ` topics = ["telegraf"] ## an array of Zookeeper connection strings zookeeper_peers = ["localhost:2181"] + ## Zookeeper Chroot + zookeeper_chroot = "/" ## the name of the consumer group consumer_group = "telegraf_metrics_consumers" ## Offset (must be either "oldest" or "newest") offset = "oldest" - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md @@ -80,6 +83,7 @@ func (k *Kafka) Start(acc telegraf.Accumulator) error { k.acc = acc config := consumergroup.NewConfig() + config.Zookeeper.Chroot = k.ZookeeperChroot switch strings.ToLower(k.Offset) { case "oldest", "": config.Offsets.Initial = sarama.OffsetOldest diff --git a/plugins/inputs/mailchimp/chimp_api.go b/plugins/inputs/mailchimp/chimp_api.go index fe2c56d0c..75c9a30d7 100644 --- a/plugins/inputs/mailchimp/chimp_api.go +++ b/plugins/inputs/mailchimp/chimp_api.go @@ -10,6 +10,7 @@ import ( "net/url" "regexp" "sync" + "time" ) const ( @@ -120,7 +121,10 @@ func (a *ChimpAPI) GetReport(campaignID string) (Report, error) { } func runChimp(api *ChimpAPI, params ReportsParams) ([]byte, error) { - client := &http.Client{Transport: api.Transport} + client := &http.Client{ + Transport: api.Transport, + Timeout: time.Duration(4 * time.Second), + } var b bytes.Buffer req, err := http.NewRequest("GET", api.url.String(), &b) diff --git a/plugins/inputs/memcached/memcached.go b/plugins/inputs/memcached/memcached.go index 24ff09d77..c631a1ed1 100644 --- a/plugins/inputs/memcached/memcached.go +++ b/plugins/inputs/memcached/memcached.go @@ -94,14 +94,15 @@ func (m *Memcached) gatherServer( acc telegraf.Accumulator, ) error { var conn net.Conn + var err error if unix { - conn, err := net.DialTimeout("unix", address, defaultTimeout) + conn, err = net.DialTimeout("unix", address, defaultTimeout) if err != nil { return err } defer conn.Close() } else { - _, _, err := net.SplitHostPort(address) + _, _, err = net.SplitHostPort(address) if err != nil { address = address + ":11211" } @@ -113,6 +114,10 @@ func (m *Memcached) gatherServer( defer conn.Close() } + if conn == nil { + return fmt.Errorf("Failed to create net connection") + } + // Extend connection conn.SetDeadline(time.Now().Add(defaultTimeout)) diff --git a/plugins/inputs/mesos/mesos.go b/plugins/inputs/mesos/mesos.go index 5bcda7970..b096a20d9 100644 --- a/plugins/inputs/mesos/mesos.go +++ b/plugins/inputs/mesos/mesos.go @@ -10,6 +10,7 @@ import ( "strconv" "strings" "sync" + "time" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" @@ -33,7 +34,16 @@ var sampleConfig = ` # A list of Mesos masters, default value is localhost:5050. masters = ["localhost:5050"] # Metrics groups to be collected, by default, all enabled. - master_collections = ["resources","master","system","slaves","frameworks","messages","evqueue","registrar"] + master_collections = [ + "resources", + "master", + "system", + "slaves", + "frameworks", + "messages", + "evqueue", + "registrar", + ] ` // SampleConfig returns a sample configuration block @@ -261,6 +271,15 @@ func (m *Mesos) removeGroup(j *map[string]interface{}) { } } +var tr = &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), +} + +var client = &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), +} + // This should not belong to the object func (m *Mesos) gatherMetrics(a string, acc telegraf.Accumulator) error { var jsonOut map[string]interface{} @@ -282,7 +301,7 @@ func (m *Mesos) gatherMetrics(a string, acc telegraf.Accumulator) error { ts := strconv.Itoa(m.Timeout) + "ms" - resp, err := http.Get("http://" + a + "/metrics/snapshot?timeout=" + ts) + resp, err := client.Get("http://" + a + "/metrics/snapshot?timeout=" + ts) if err != nil { return err diff --git a/plugins/inputs/mongodb/mongodb.go b/plugins/inputs/mongodb/mongodb.go index 3be04477b..381814531 100644 --- a/plugins/inputs/mongodb/mongodb.go +++ b/plugins/inputs/mongodb/mongodb.go @@ -103,7 +103,7 @@ func (m *MongoDB) gatherServer(server *Server, acc telegraf.Accumulator) error { dialAddrs[0], err.Error()) } dialInfo.Direct = true - dialInfo.Timeout = time.Duration(10) * time.Second + dialInfo.Timeout = 5 * time.Second if m.Ssl.Enabled { tlsConfig := &tls.Config{} diff --git a/plugins/inputs/mongodb/mongodb_test.go b/plugins/inputs/mongodb/mongodb_test.go index 174128d19..73e68ed37 100644 --- a/plugins/inputs/mongodb/mongodb_test.go +++ b/plugins/inputs/mongodb/mongodb_test.go @@ -43,7 +43,7 @@ func testSetup(m *testing.M) { log.Fatalf("Unable to parse URL (%s), %s\n", dialAddrs[0], err.Error()) } dialInfo.Direct = true - dialInfo.Timeout = time.Duration(10) * time.Second + dialInfo.Timeout = 5 * time.Second sess, err := mgo.DialWithInfo(dialInfo) if err != nil { log.Fatalf("Unable to connect to MongoDB, %s\n", err.Error()) diff --git a/plugins/inputs/mqtt_consumer/README.md b/plugins/inputs/mqtt_consumer/README.md index 787494975..d5518b632 100644 --- a/plugins/inputs/mqtt_consumer/README.md +++ b/plugins/inputs/mqtt_consumer/README.md @@ -35,7 +35,7 @@ The plugin expects messages in the ## Use SSL but skip chain & host verification # insecure_skip_verify = false - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md diff --git a/plugins/inputs/mqtt_consumer/mqtt_consumer.go b/plugins/inputs/mqtt_consumer/mqtt_consumer.go index 2d0fbef06..c64d2139b 100644 --- a/plugins/inputs/mqtt_consumer/mqtt_consumer.go +++ b/plugins/inputs/mqtt_consumer/mqtt_consumer.go @@ -11,7 +11,7 @@ import ( "github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/parsers" - "git.eclipse.org/gitroot/paho/org.eclipse.paho.mqtt.golang.git" + "github.com/eclipse/paho.mqtt.golang" ) type MQTTConsumer struct { @@ -26,6 +26,9 @@ type MQTTConsumer struct { // Legacy metric buffer support MetricBuffer int + PersistentSession bool + ClientID string `toml:"client_id"` + // Path to CA file SSLCA string `toml:"ssl_ca"` // Path to host cert file @@ -36,7 +39,7 @@ type MQTTConsumer struct { InsecureSkipVerify bool sync.Mutex - client *mqtt.Client + client mqtt.Client // channel of all incoming raw mqtt messages in chan mqtt.Message done chan struct{} @@ -57,6 +60,13 @@ var sampleConfig = ` "sensors/#", ] + # if true, messages that can't be delivered while the subscriber is offline + # will be delivered when it comes back (such as on service restart). + # NOTE: if true, client_id MUST be set + persistent_session = false + # If empty, a random client ID will be generated. + client_id = "" + ## username and password to connect MQTT server. # username = "telegraf" # password = "metricsmetricsmetricsmetrics" @@ -68,7 +78,7 @@ var sampleConfig = ` ## Use SSL but skip chain & host verification # insecure_skip_verify = false - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md @@ -91,6 +101,11 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error { m.Lock() defer m.Unlock() + if m.PersistentSession && m.ClientID == "" { + return fmt.Errorf("ERROR MQTT Consumer: When using persistent_session" + + " = true, you MUST also set client_id") + } + m.acc = acc if m.QoS > 2 || m.QoS < 0 { return fmt.Errorf("MQTT Consumer, invalid QoS value: %d", m.QoS) @@ -148,7 +163,7 @@ func (m *MQTTConsumer) receiver() { } } -func (m *MQTTConsumer) recvMessage(_ *mqtt.Client, msg mqtt.Message) { +func (m *MQTTConsumer) recvMessage(_ mqtt.Client, msg mqtt.Message) { m.in <- msg } @@ -166,7 +181,11 @@ func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error { func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) { opts := mqtt.NewClientOptions() - opts.SetClientID("Telegraf-Consumer-" + internal.RandomString(5)) + if m.ClientID == "" { + opts.SetClientID("Telegraf-Consumer-" + internal.RandomString(5)) + } else { + opts.SetClientID(m.ClientID) + } tlsCfg, err := internal.GetTLSConfig( m.SSLCert, m.SSLKey, m.SSLCA, m.InsecureSkipVerify) @@ -181,7 +200,7 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) { } user := m.Username - if user == "" { + if user != "" { opts.SetUsername(user) } password := m.Password @@ -199,6 +218,7 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) { } opts.SetAutoReconnect(true) opts.SetKeepAlive(time.Second * 60) + opts.SetCleanSession(!m.PersistentSession) return opts, nil } diff --git a/plugins/inputs/mqtt_consumer/mqtt_consumer_test.go b/plugins/inputs/mqtt_consumer/mqtt_consumer_test.go index b1dd59bcf..7090a46c3 100644 --- a/plugins/inputs/mqtt_consumer/mqtt_consumer_test.go +++ b/plugins/inputs/mqtt_consumer/mqtt_consumer_test.go @@ -7,7 +7,9 @@ import ( "github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/testutil" - "git.eclipse.org/gitroot/paho/org.eclipse.paho.mqtt.golang.git" + "github.com/stretchr/testify/assert" + + "github.com/eclipse/paho.mqtt.golang" ) const ( @@ -28,6 +30,52 @@ func newTestMQTTConsumer() (*MQTTConsumer, chan mqtt.Message) { return n, in } +// Test that default client has random ID +func TestRandomClientID(t *testing.T) { + m1 := &MQTTConsumer{ + Servers: []string{"localhost:1883"}} + opts, err := m1.createOpts() + assert.NoError(t, err) + + m2 := &MQTTConsumer{ + Servers: []string{"localhost:1883"}} + opts2, err2 := m2.createOpts() + assert.NoError(t, err2) + + assert.NotEqual(t, opts.ClientID, opts2.ClientID) +} + +// Test that default client has random ID +func TestClientID(t *testing.T) { + m1 := &MQTTConsumer{ + Servers: []string{"localhost:1883"}, + ClientID: "telegraf-test", + } + opts, err := m1.createOpts() + assert.NoError(t, err) + + m2 := &MQTTConsumer{ + Servers: []string{"localhost:1883"}, + ClientID: "telegraf-test", + } + opts2, err2 := m2.createOpts() + assert.NoError(t, err2) + + assert.Equal(t, "telegraf-test", opts2.ClientID) + assert.Equal(t, "telegraf-test", opts.ClientID) +} + +// Test that Start() fails if client ID is not set but persistent is +func TestPersistentClientIDFail(t *testing.T) { + m1 := &MQTTConsumer{ + Servers: []string{"localhost:1883"}, + PersistentSession: true, + } + acc := testutil.Accumulator{} + err := m1.Start(&acc) + assert.Error(t, err) +} + // Test that the parser parses NATS messages into metrics func TestRunParser(t *testing.T) { n, in := newTestMQTTConsumer() diff --git a/plugins/inputs/mysql/mysql.go b/plugins/inputs/mysql/mysql.go index b2e2729a9..474067716 100644 --- a/plugins/inputs/mysql/mysql.go +++ b/plugins/inputs/mysql/mysql.go @@ -2,8 +2,10 @@ package mysql import ( "database/sql" + "net/url" "strconv" "strings" + "time" _ "github.com/go-sql-driver/mysql" "github.com/influxdata/telegraf" @@ -26,6 +28,8 @@ var sampleConfig = ` servers = ["tcp(127.0.0.1:3306)/"] ` +var defaultTimeout = time.Second * time.Duration(5) + func (m *Mysql) SampleConfig() string { return sampleConfig } @@ -122,6 +126,10 @@ func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { serv = "" } + serv, err := dsnAddTimeout(serv) + if err != nil { + return err + } db, err := sql.Open("mysql", serv) if err != nil { return err @@ -207,6 +215,27 @@ func (m *Mysql) gatherServer(serv string, acc telegraf.Accumulator) error { return nil } +func dsnAddTimeout(dsn string) (string, error) { + + // DSN "?timeout=5s" is not valid, but "/?timeout=5s" is valid ("" and "/" + // are the same DSN) + if dsn == "" { + dsn = "/" + } + u, err := url.Parse(dsn) + if err != nil { + return "", err + } + v := u.Query() + + // Only override timeout if not already defined + if _, ok := v["timeout"]; ok == false { + v.Add("timeout", defaultTimeout.String()) + u.RawQuery = v.Encode() + } + return u.String(), nil +} + func init() { inputs.Add("mysql", func() telegraf.Input { return &Mysql{} diff --git a/plugins/inputs/mysql/mysql_test.go b/plugins/inputs/mysql/mysql_test.go index 855e8ba52..9e4073432 100644 --- a/plugins/inputs/mysql/mysql_test.go +++ b/plugins/inputs/mysql/mysql_test.go @@ -84,3 +84,34 @@ func TestMysqlParseDSN(t *testing.T) { } } } + +func TestMysqlDNSAddTimeout(t *testing.T) { + tests := []struct { + input string + output string + }{ + { + "", + "/?timeout=5s", + }, + { + "tcp(192.168.1.1:3306)/", + "tcp(192.168.1.1:3306)/?timeout=5s", + }, + { + "root:passwd@tcp(192.168.1.1:3306)/?tls=false", + "root:passwd@tcp(192.168.1.1:3306)/?timeout=5s&tls=false", + }, + { + "root:passwd@tcp(192.168.1.1:3306)/?tls=false&timeout=10s", + "root:passwd@tcp(192.168.1.1:3306)/?tls=false&timeout=10s", + }, + } + + for _, test := range tests { + output, _ := dsnAddTimeout(test.input) + if output != test.output { + t.Errorf("Expected %s, got %s\n", test.output, output) + } + } +} diff --git a/plugins/inputs/nats_consumer/README.md b/plugins/inputs/nats_consumer/README.md index 90563ff55..31d947588 100644 --- a/plugins/inputs/nats_consumer/README.md +++ b/plugins/inputs/nats_consumer/README.md @@ -23,7 +23,8 @@ from a NATS cluster in parallel. ## Maximum number of metrics to buffer between collection intervals metric_buffer = 100000 - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md diff --git a/plugins/inputs/nats_consumer/nats_consumer.go b/plugins/inputs/nats_consumer/nats_consumer.go index 235601100..232d5740f 100644 --- a/plugins/inputs/nats_consumer/nats_consumer.go +++ b/plugins/inputs/nats_consumer/nats_consumer.go @@ -55,7 +55,7 @@ var sampleConfig = ` ## name a queue group queue_group = "telegraf_consumers" - ## Data format to consume. This can be "json", "influx" or "graphite" + ## Data format to consume. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md diff --git a/plugins/inputs/net_response/README.md b/plugins/inputs/net_response/README.md index 69e72a379..d6a0e1278 100644 --- a/plugins/inputs/net_response/README.md +++ b/plugins/inputs/net_response/README.md @@ -52,7 +52,7 @@ It can also check response text. ### Tags: - All measurements have the following tags: - - host + - server - port - protocol @@ -60,7 +60,7 @@ It can also check response text. ``` $ ./telegraf -config telegraf.conf -input-filter net_response -test -net_response,host=127.0.0.1,port=22,protocol=tcp response_time=0.18070360500000002,string_found=true 1454785464182527094 -net_response,host=127.0.0.1,port=2222,protocol=tcp response_time=1.090124776,string_found=false 1454784433658942325 +net_response,server=192.168.2.2,port=22,protocol=tcp response_time=0.18070360500000002,string_found=true 1454785464182527094 +net_response,server=192.168.2.2,port=2222,protocol=tcp response_time=1.090124776,string_found=false 1454784433658942325 ``` diff --git a/plugins/inputs/net_response/net_response.go b/plugins/inputs/net_response/net_response.go index 66bf2ae7b..7b5cfa785 100644 --- a/plugins/inputs/net_response/net_response.go +++ b/plugins/inputs/net_response/net_response.go @@ -169,7 +169,7 @@ func (c *NetResponse) Gather(acc telegraf.Accumulator) error { return errors.New("Bad port") } // Prepare data - tags := map[string]string{"host": host, "port": port} + tags := map[string]string{"server": host, "port": port} var fields map[string]interface{} // Gather data if c.Protocol == "tcp" { diff --git a/plugins/inputs/net_response/net_response_test.go b/plugins/inputs/net_response/net_response_test.go index 538d059c0..a6dfbcc94 100644 --- a/plugins/inputs/net_response/net_response_test.go +++ b/plugins/inputs/net_response/net_response_test.go @@ -69,7 +69,7 @@ func TestTCPOK1(t *testing.T) { "string_found": true, "response_time": 1.0, }, - map[string]string{"host": "127.0.0.1", + map[string]string{"server": "127.0.0.1", "port": "2004", "protocol": "tcp", }, @@ -109,7 +109,7 @@ func TestTCPOK2(t *testing.T) { "string_found": false, "response_time": 1.0, }, - map[string]string{"host": "127.0.0.1", + map[string]string{"server": "127.0.0.1", "port": "2004", "protocol": "tcp", }, @@ -164,7 +164,7 @@ func TestUDPOK1(t *testing.T) { "string_found": true, "response_time": 1.0, }, - map[string]string{"host": "127.0.0.1", + map[string]string{"server": "127.0.0.1", "port": "2004", "protocol": "udp", }, diff --git a/plugins/inputs/nginx/README.md b/plugins/inputs/nginx/README.md new file mode 100644 index 000000000..918ee08ad --- /dev/null +++ b/plugins/inputs/nginx/README.md @@ -0,0 +1,47 @@ +# Telegraf Plugin: Nginx + +### Configuration: + +``` +# Read Nginx's basic status information (ngx_http_stub_status_module) +[[inputs.nginx]] + ## An array of Nginx stub_status URI to gather stats. + urls = ["http://localhost/server_status"] +``` + +### Measurements & Fields: + +- Measurement + - accepts + - active + - handled + - reading + - requests + - waiting + - writing + +### Tags: + +- All measurements have the following tags: + - port + - server + +### Example Output: + +Using this configuration: +``` +[[inputs.nginx]] + ## An array of Nginx stub_status URI to gather stats. + urls = ["http://localhost/status"] +``` + +When run with: +``` +./telegraf -config telegraf.conf -input-filter nginx -test +``` + +It produces: +``` +* Plugin: nginx, Collection 1 +> nginx,port=80,server=localhost accepts=605i,active=2i,handled=605i,reading=0i,requests=12132i,waiting=1i,writing=1i 1456690994701784331 +``` diff --git a/plugins/inputs/nginx/nginx.go b/plugins/inputs/nginx/nginx.go index 3b008fbf3..c13ba39f3 100644 --- a/plugins/inputs/nginx/nginx.go +++ b/plugins/inputs/nginx/nginx.go @@ -58,7 +58,10 @@ var tr = &http.Transport{ ResponseHeaderTimeout: time.Duration(3 * time.Second), } -var client = &http.Client{Transport: tr} +var client = &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), +} func (n *Nginx) gatherUrl(addr *url.URL, acc telegraf.Accumulator) error { resp, err := client.Get(addr.String()) diff --git a/plugins/inputs/nsq/nsq.go b/plugins/inputs/nsq/nsq.go index 6b3be66f2..35ba76866 100644 --- a/plugins/inputs/nsq/nsq.go +++ b/plugins/inputs/nsq/nsq.go @@ -84,7 +84,10 @@ var tr = &http.Transport{ ResponseHeaderTimeout: time.Duration(3 * time.Second), } -var client = &http.Client{Transport: tr} +var client = &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), +} func (n *NSQ) gatherEndpoint(e string, acc telegraf.Accumulator) error { u, err := buildURL(e) diff --git a/plugins/inputs/ntpq/README.md b/plugins/inputs/ntpq/README.md new file mode 100644 index 000000000..80bf80f39 --- /dev/null +++ b/plugins/inputs/ntpq/README.md @@ -0,0 +1,60 @@ +# ntpq Input Plugin + +Get standard NTP query metrics, requires ntpq executable. + +Below is the documentation of the various headers returned from the NTP query +command when running `ntpq -p`. + +- remote – The remote peer or server being synced to. “LOCAL” is this local host +(included in case there are no remote peers or servers available); +- refid – Where or what the remote peer or server is itself synchronised to; +- st (stratum) – The remote peer or server Stratum +- t (type) – Type (u: unicast or manycast client, b: broadcast or multicast client, +l: local reference clock, s: symmetric peer, A: manycast server, +B: broadcast server, M: multicast server, see “Automatic Server Discovery“); +- when – When last polled (seconds ago, “h” hours ago, or “d” days ago); +- poll – Polling frequency: rfc5905 suggests this ranges in NTPv4 from 4 (16s) +to 17 (36h) (log2 seconds), however observation suggests the actual displayed +value is seconds for a much smaller range of 64 (26) to 1024 (210) seconds; +- reach – An 8-bit left-shift shift register value recording polls (bit set = +successful, bit reset = fail) displayed in octal; +- delay – Round trip communication delay to the remote peer or server (milliseconds); +- offset – Mean offset (phase) in the times reported between this local host and +the remote peer or server (RMS, milliseconds); +- jitter – Mean deviation (jitter) in the time reported for that remote peer or +server (RMS of difference of multiple time samples, milliseconds); + +### Configuration: + +```toml +# Get standard NTP query metrics, requires ntpq executable +[[inputs.ntpq]] + ## If false, set the -n ntpq flag. Can reduce metric gather times. + dns_lookup = true +``` + +### Measurements & Fields: + +- ntpq + - delay (float, milliseconds) + - jitter (float, milliseconds) + - offset (float, milliseconds) + - poll (int, seconds) + - reach (int) + - when (int, seconds) + +### Tags: + +- All measurements have the following tags: + - refid + - remote + - type + - stratum + +### Example Output: + +``` +$ telegraf -config ~/ws/telegraf.conf -input-filter ntpq -test +* Plugin: ntpq, Collection 1 +> ntpq,refid=.GPSs.,remote=*time.apple.com,stratum=1,type=u delay=91.797,jitter=3.735,offset=12.841,poll=64i,reach=377i,when=35i 1457960478909556134 +``` diff --git a/plugins/inputs/ntpq/ntpq.go b/plugins/inputs/ntpq/ntpq.go new file mode 100644 index 000000000..5e8ff6536 --- /dev/null +++ b/plugins/inputs/ntpq/ntpq.go @@ -0,0 +1,202 @@ +// +build !windows + +package ntpq + +import ( + "bufio" + "bytes" + "log" + "os/exec" + "strconv" + "strings" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +// Mapping of ntpq header names to tag keys +var tagHeaders map[string]string = map[string]string{ + "remote": "remote", + "refid": "refid", + "st": "stratum", + "t": "type", +} + +// Mapping of the ntpq tag key to the index in the command output +var tagI map[string]int = map[string]int{ + "remote": -1, + "refid": -1, + "stratum": -1, + "type": -1, +} + +// Mapping of float metrics to their index in the command output +var floatI map[string]int = map[string]int{ + "delay": -1, + "offset": -1, + "jitter": -1, +} + +// Mapping of int metrics to their index in the command output +var intI map[string]int = map[string]int{ + "when": -1, + "poll": -1, + "reach": -1, +} + +type NTPQ struct { + runQ func() ([]byte, error) + + DNSLookup bool `toml:"dns_lookup"` +} + +func (n *NTPQ) Description() string { + return "Get standard NTP query metrics, requires ntpq executable." +} + +func (n *NTPQ) SampleConfig() string { + return ` + ## If false, set the -n ntpq flag. Can reduce metric gather time. + dns_lookup = true +` +} + +func (n *NTPQ) Gather(acc telegraf.Accumulator) error { + out, err := n.runQ() + if err != nil { + return err + } + + lineCounter := 0 + scanner := bufio.NewScanner(bytes.NewReader(out)) + for scanner.Scan() { + fields := strings.Fields(scanner.Text()) + if len(fields) < 2 { + continue + } + + // If lineCounter == 0, then this is the header line + if lineCounter == 0 { + for i, field := range fields { + // Check if field is a tag: + if tagKey, ok := tagHeaders[field]; ok { + tagI[tagKey] = i + continue + } + + // check if field is a float metric: + if _, ok := floatI[field]; ok { + floatI[field] = i + continue + } + + // check if field is an int metric: + if _, ok := intI[field]; ok { + intI[field] = i + continue + } + } + } else { + tags := make(map[string]string) + mFields := make(map[string]interface{}) + + // Get tags from output + for key, index := range tagI { + if index == -1 { + continue + } + tags[key] = fields[index] + } + + // Get integer metrics from output + for key, index := range intI { + if index == -1 { + continue + } + + if key == "when" { + when := fields[index] + switch { + case strings.HasSuffix(when, "h"): + m, err := strconv.Atoi(strings.TrimSuffix(fields[index], "h")) + if err != nil { + log.Printf("ERROR ntpq: parsing int: %s", fields[index]) + continue + } + // seconds in an hour + mFields[key] = int64(m) * 360 + continue + case strings.HasSuffix(when, "d"): + m, err := strconv.Atoi(strings.TrimSuffix(fields[index], "d")) + if err != nil { + log.Printf("ERROR ntpq: parsing int: %s", fields[index]) + continue + } + // seconds in a day + mFields[key] = int64(m) * 86400 + continue + case strings.HasSuffix(when, "m"): + m, err := strconv.Atoi(strings.TrimSuffix(fields[index], "m")) + if err != nil { + log.Printf("ERROR ntpq: parsing int: %s", fields[index]) + continue + } + // seconds in a day + mFields[key] = int64(m) * 60 + continue + } + } + + m, err := strconv.Atoi(fields[index]) + if err != nil { + log.Printf("ERROR ntpq: parsing int: %s", fields[index]) + continue + } + mFields[key] = int64(m) + } + + // get float metrics from output + for key, index := range floatI { + if index == -1 { + continue + } + + m, err := strconv.ParseFloat(fields[index], 64) + if err != nil { + log.Printf("ERROR ntpq: parsing float: %s", fields[index]) + continue + } + mFields[key] = m + } + + acc.AddFields("ntpq", mFields, tags) + } + + lineCounter++ + } + return nil +} + +func (n *NTPQ) runq() ([]byte, error) { + bin, err := exec.LookPath("ntpq") + if err != nil { + return nil, err + } + + var cmd *exec.Cmd + if n.DNSLookup { + cmd = exec.Command(bin, "-p") + } else { + cmd = exec.Command(bin, "-p", "-n") + } + + return cmd.Output() +} + +func init() { + inputs.Add("ntpq", func() telegraf.Input { + n := &NTPQ{} + n.runQ = n.runq + return n + }) +} diff --git a/plugins/inputs/ntpq/ntpq_test.go b/plugins/inputs/ntpq/ntpq_test.go new file mode 100644 index 000000000..228eddc62 --- /dev/null +++ b/plugins/inputs/ntpq/ntpq_test.go @@ -0,0 +1,422 @@ +// +build !windows + +package ntpq + +import ( + "fmt" + "testing" + + "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" +) + +func TestSingleNTPQ(t *testing.T) { + tt := tester{ + ret: []byte(singleNTPQ), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "when": int64(101), + "poll": int64(256), + "reach": int64(37), + "delay": float64(51.016), + "offset": float64(233.010), + "jitter": float64(17.462), + } + tags := map[string]string{ + "remote": "*uschi5-ntp-002.", + "refid": "10.177.80.46", + "stratum": "2", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + +func TestBadIntNTPQ(t *testing.T) { + tt := tester{ + ret: []byte(badIntParseNTPQ), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "when": int64(101), + "reach": int64(37), + "delay": float64(51.016), + "offset": float64(233.010), + "jitter": float64(17.462), + } + tags := map[string]string{ + "remote": "*uschi5-ntp-002.", + "refid": "10.177.80.46", + "stratum": "2", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + +func TestBadFloatNTPQ(t *testing.T) { + tt := tester{ + ret: []byte(badFloatParseNTPQ), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "when": int64(2), + "poll": int64(256), + "reach": int64(37), + "delay": float64(51.016), + "jitter": float64(17.462), + } + tags := map[string]string{ + "remote": "*uschi5-ntp-002.", + "refid": "10.177.80.46", + "stratum": "2", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + +func TestDaysNTPQ(t *testing.T) { + tt := tester{ + ret: []byte(whenDaysNTPQ), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "when": int64(172800), + "poll": int64(256), + "reach": int64(37), + "delay": float64(51.016), + "offset": float64(233.010), + "jitter": float64(17.462), + } + tags := map[string]string{ + "remote": "*uschi5-ntp-002.", + "refid": "10.177.80.46", + "stratum": "2", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + +func TestHoursNTPQ(t *testing.T) { + tt := tester{ + ret: []byte(whenHoursNTPQ), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "when": int64(720), + "poll": int64(256), + "reach": int64(37), + "delay": float64(51.016), + "offset": float64(233.010), + "jitter": float64(17.462), + } + tags := map[string]string{ + "remote": "*uschi5-ntp-002.", + "refid": "10.177.80.46", + "stratum": "2", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + +func TestMinutesNTPQ(t *testing.T) { + tt := tester{ + ret: []byte(whenMinutesNTPQ), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "when": int64(120), + "poll": int64(256), + "reach": int64(37), + "delay": float64(51.016), + "offset": float64(233.010), + "jitter": float64(17.462), + } + tags := map[string]string{ + "remote": "*uschi5-ntp-002.", + "refid": "10.177.80.46", + "stratum": "2", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + +func TestBadWhenNTPQ(t *testing.T) { + tt := tester{ + ret: []byte(whenBadNTPQ), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "poll": int64(256), + "reach": int64(37), + "delay": float64(51.016), + "offset": float64(233.010), + "jitter": float64(17.462), + } + tags := map[string]string{ + "remote": "*uschi5-ntp-002.", + "refid": "10.177.80.46", + "stratum": "2", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + +func TestMultiNTPQ(t *testing.T) { + tt := tester{ + ret: []byte(multiNTPQ), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "delay": float64(54.033), + "jitter": float64(449514), + "offset": float64(243.426), + "poll": int64(1024), + "reach": int64(377), + "when": int64(740), + } + tags := map[string]string{ + "refid": "10.177.80.37", + "remote": "83.137.98.96", + "stratum": "2", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) + + fields = map[string]interface{}{ + "delay": float64(60.785), + "jitter": float64(449539), + "offset": float64(232.597), + "poll": int64(1024), + "reach": int64(377), + "when": int64(739), + } + tags = map[string]string{ + "refid": "10.177.80.37", + "remote": "81.7.16.52", + "stratum": "2", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + +func TestBadHeaderNTPQ(t *testing.T) { + resetVars() + tt := tester{ + ret: []byte(badHeaderNTPQ), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "when": int64(101), + "poll": int64(256), + "reach": int64(37), + "delay": float64(51.016), + "offset": float64(233.010), + "jitter": float64(17.462), + } + tags := map[string]string{ + "remote": "*uschi5-ntp-002.", + "refid": "10.177.80.46", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + +func TestMissingDelayColumnNTPQ(t *testing.T) { + resetVars() + tt := tester{ + ret: []byte(missingDelayNTPQ), + err: nil, + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.NoError(t, n.Gather(&acc)) + + fields := map[string]interface{}{ + "when": int64(101), + "poll": int64(256), + "reach": int64(37), + "offset": float64(233.010), + "jitter": float64(17.462), + } + tags := map[string]string{ + "remote": "*uschi5-ntp-002.", + "refid": "10.177.80.46", + "type": "u", + } + acc.AssertContainsTaggedFields(t, "ntpq", fields, tags) +} + +func TestFailedNTPQ(t *testing.T) { + tt := tester{ + ret: []byte(singleNTPQ), + err: fmt.Errorf("Test failure"), + } + n := &NTPQ{ + runQ: tt.runqTest, + } + + acc := testutil.Accumulator{} + assert.Error(t, n.Gather(&acc)) +} + +type tester struct { + ret []byte + err error +} + +func (t *tester) runqTest() ([]byte, error) { + return t.ret, t.err +} + +func resetVars() { + // Mapping of ntpq header names to tag keys + tagHeaders = map[string]string{ + "remote": "remote", + "refid": "refid", + "st": "stratum", + "t": "type", + } + + // Mapping of the ntpq tag key to the index in the command output + tagI = map[string]int{ + "remote": -1, + "refid": -1, + "stratum": -1, + "type": -1, + } + + // Mapping of float metrics to their index in the command output + floatI = map[string]int{ + "delay": -1, + "offset": -1, + "jitter": -1, + } + + // Mapping of int metrics to their index in the command output + intI = map[string]int{ + "when": -1, + "poll": -1, + "reach": -1, + } +} + +var singleNTPQ = ` remote refid st t when poll reach delay offset jitter +============================================================================== +*uschi5-ntp-002. 10.177.80.46 2 u 101 256 37 51.016 233.010 17.462 +` + +var badHeaderNTPQ = `remote refid foobar t when poll reach delay offset jitter +============================================================================== +*uschi5-ntp-002. 10.177.80.46 2 u 101 256 37 51.016 233.010 17.462 +` + +var missingDelayNTPQ = `remote refid foobar t when poll reach offset jitter +============================================================================== +*uschi5-ntp-002. 10.177.80.46 2 u 101 256 37 233.010 17.462 +` + +var whenDaysNTPQ = ` remote refid st t when poll reach delay offset jitter +============================================================================== +*uschi5-ntp-002. 10.177.80.46 2 u 2d 256 37 51.016 233.010 17.462 +` + +var whenHoursNTPQ = ` remote refid st t when poll reach delay offset jitter +============================================================================== +*uschi5-ntp-002. 10.177.80.46 2 u 2h 256 37 51.016 233.010 17.462 +` + +var whenMinutesNTPQ = ` remote refid st t when poll reach delay offset jitter +============================================================================== +*uschi5-ntp-002. 10.177.80.46 2 u 2m 256 37 51.016 233.010 17.462 +` + +var whenBadNTPQ = ` remote refid st t when poll reach delay offset jitter +============================================================================== +*uschi5-ntp-002. 10.177.80.46 2 u 2q 256 37 51.016 233.010 17.462 +` + +var badFloatParseNTPQ = ` remote refid st t when poll reach delay offset jitter +============================================================================== +*uschi5-ntp-002. 10.177.80.46 2 u 2 256 37 51.016 foobar 17.462 +` + +var badIntParseNTPQ = ` remote refid st t when poll reach delay offset jitter +============================================================================== +*uschi5-ntp-002. 10.177.80.46 2 u 101 foobar 37 51.016 233.010 17.462 +` + +var multiNTPQ = ` remote refid st t when poll reach delay offset jitter +============================================================================== + 83.137.98.96 10.177.80.37 2 u 740 1024 377 54.033 243.426 449514. + 81.7.16.52 10.177.80.37 2 u 739 1024 377 60.785 232.597 449539. + 131.188.3.221 10.177.80.37 2 u 783 1024 377 111.820 261.921 449528. + 5.9.29.107 10.177.80.37 2 u 703 1024 377 205.704 160.406 449602. + 91.189.94.4 10.177.80.37 2 u 673 1024 377 143.047 274.726 449445. +` diff --git a/plugins/inputs/ntpq/ntpq_windows.go b/plugins/inputs/ntpq/ntpq_windows.go new file mode 100644 index 000000000..a1f1a55fa --- /dev/null +++ b/plugins/inputs/ntpq/ntpq_windows.go @@ -0,0 +1,3 @@ +// +build windows + +package ntpq diff --git a/plugins/inputs/phpfpm/child.go b/plugins/inputs/phpfpm/child.go new file mode 100644 index 000000000..2ebdf2ffb --- /dev/null +++ b/plugins/inputs/phpfpm/child.go @@ -0,0 +1,331 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package phpfpm + +// This file implements FastCGI from the perspective of a child process. + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "net" + "net/http" + "net/http/cgi" + "os" + "strings" + "sync" + "time" +) + +// request holds the state for an in-progress request. As soon as it's complete, +// it's converted to an http.Request. +type request struct { + pw *io.PipeWriter + reqId uint16 + params map[string]string + buf [1024]byte + rawParams []byte + keepConn bool +} + +func newRequest(reqId uint16, flags uint8) *request { + r := &request{ + reqId: reqId, + params: map[string]string{}, + keepConn: flags&flagKeepConn != 0, + } + r.rawParams = r.buf[:0] + return r +} + +// parseParams reads an encoded []byte into Params. +func (r *request) parseParams() { + text := r.rawParams + r.rawParams = nil + for len(text) > 0 { + keyLen, n := readSize(text) + if n == 0 { + return + } + text = text[n:] + valLen, n := readSize(text) + if n == 0 { + return + } + text = text[n:] + if int(keyLen)+int(valLen) > len(text) { + return + } + key := readString(text, keyLen) + text = text[keyLen:] + val := readString(text, valLen) + text = text[valLen:] + r.params[key] = val + } +} + +// response implements http.ResponseWriter. +type response struct { + req *request + header http.Header + w *bufWriter + wroteHeader bool +} + +func newResponse(c *child, req *request) *response { + return &response{ + req: req, + header: http.Header{}, + w: newWriter(c.conn, typeStdout, req.reqId), + } +} + +func (r *response) Header() http.Header { + return r.header +} + +func (r *response) Write(data []byte) (int, error) { + if !r.wroteHeader { + r.WriteHeader(http.StatusOK) + } + return r.w.Write(data) +} + +func (r *response) WriteHeader(code int) { + if r.wroteHeader { + return + } + r.wroteHeader = true + if code == http.StatusNotModified { + // Must not have body. + r.header.Del("Content-Type") + r.header.Del("Content-Length") + r.header.Del("Transfer-Encoding") + } else if r.header.Get("Content-Type") == "" { + r.header.Set("Content-Type", "text/html; charset=utf-8") + } + + if r.header.Get("Date") == "" { + r.header.Set("Date", time.Now().UTC().Format(http.TimeFormat)) + } + + fmt.Fprintf(r.w, "Status: %d %s\r\n", code, http.StatusText(code)) + r.header.Write(r.w) + r.w.WriteString("\r\n") +} + +func (r *response) Flush() { + if !r.wroteHeader { + r.WriteHeader(http.StatusOK) + } + r.w.Flush() +} + +func (r *response) Close() error { + r.Flush() + return r.w.Close() +} + +type child struct { + conn *conn + handler http.Handler + + mu sync.Mutex // protects requests: + requests map[uint16]*request // keyed by request ID +} + +func newChild(rwc io.ReadWriteCloser, handler http.Handler) *child { + return &child{ + conn: newConn(rwc), + handler: handler, + requests: make(map[uint16]*request), + } +} + +func (c *child) serve() { + defer c.conn.Close() + defer c.cleanUp() + var rec record + for { + if err := rec.read(c.conn.rwc); err != nil { + return + } + if err := c.handleRecord(&rec); err != nil { + return + } + } +} + +var errCloseConn = errors.New("fcgi: connection should be closed") + +var emptyBody = ioutil.NopCloser(strings.NewReader("")) + +// ErrRequestAborted is returned by Read when a handler attempts to read the +// body of a request that has been aborted by the web server. +var ErrRequestAborted = errors.New("fcgi: request aborted by web server") + +// ErrConnClosed is returned by Read when a handler attempts to read the body of +// a request after the connection to the web server has been closed. +var ErrConnClosed = errors.New("fcgi: connection to web server closed") + +func (c *child) handleRecord(rec *record) error { + c.mu.Lock() + req, ok := c.requests[rec.h.Id] + c.mu.Unlock() + if !ok && rec.h.Type != typeBeginRequest && rec.h.Type != typeGetValues { + // The spec says to ignore unknown request IDs. + return nil + } + + switch rec.h.Type { + case typeBeginRequest: + if req != nil { + // The server is trying to begin a request with the same ID + // as an in-progress request. This is an error. + return errors.New("fcgi: received ID that is already in-flight") + } + + var br beginRequest + if err := br.read(rec.content()); err != nil { + return err + } + if br.role != roleResponder { + c.conn.writeEndRequest(rec.h.Id, 0, statusUnknownRole) + return nil + } + req = newRequest(rec.h.Id, br.flags) + c.mu.Lock() + c.requests[rec.h.Id] = req + c.mu.Unlock() + return nil + case typeParams: + // NOTE(eds): Technically a key-value pair can straddle the boundary + // between two packets. We buffer until we've received all parameters. + if len(rec.content()) > 0 { + req.rawParams = append(req.rawParams, rec.content()...) + return nil + } + req.parseParams() + return nil + case typeStdin: + content := rec.content() + if req.pw == nil { + var body io.ReadCloser + if len(content) > 0 { + // body could be an io.LimitReader, but it shouldn't matter + // as long as both sides are behaving. + body, req.pw = io.Pipe() + } else { + body = emptyBody + } + go c.serveRequest(req, body) + } + if len(content) > 0 { + // TODO(eds): This blocks until the handler reads from the pipe. + // If the handler takes a long time, it might be a problem. + req.pw.Write(content) + } else if req.pw != nil { + req.pw.Close() + } + return nil + case typeGetValues: + values := map[string]string{"FCGI_MPXS_CONNS": "1"} + c.conn.writePairs(typeGetValuesResult, 0, values) + return nil + case typeData: + // If the filter role is implemented, read the data stream here. + return nil + case typeAbortRequest: + c.mu.Lock() + delete(c.requests, rec.h.Id) + c.mu.Unlock() + c.conn.writeEndRequest(rec.h.Id, 0, statusRequestComplete) + if req.pw != nil { + req.pw.CloseWithError(ErrRequestAborted) + } + if !req.keepConn { + // connection will close upon return + return errCloseConn + } + return nil + default: + b := make([]byte, 8) + b[0] = byte(rec.h.Type) + c.conn.writeRecord(typeUnknownType, 0, b) + return nil + } +} + +func (c *child) serveRequest(req *request, body io.ReadCloser) { + r := newResponse(c, req) + httpReq, err := cgi.RequestFromMap(req.params) + if err != nil { + // there was an error reading the request + r.WriteHeader(http.StatusInternalServerError) + c.conn.writeRecord(typeStderr, req.reqId, []byte(err.Error())) + } else { + httpReq.Body = body + c.handler.ServeHTTP(r, httpReq) + } + r.Close() + c.mu.Lock() + delete(c.requests, req.reqId) + c.mu.Unlock() + c.conn.writeEndRequest(req.reqId, 0, statusRequestComplete) + + // Consume the entire body, so the host isn't still writing to + // us when we close the socket below in the !keepConn case, + // otherwise we'd send a RST. (golang.org/issue/4183) + // TODO(bradfitz): also bound this copy in time. Or send + // some sort of abort request to the host, so the host + // can properly cut off the client sending all the data. + // For now just bound it a little and + io.CopyN(ioutil.Discard, body, 100<<20) + body.Close() + + if !req.keepConn { + c.conn.Close() + } +} + +func (c *child) cleanUp() { + c.mu.Lock() + defer c.mu.Unlock() + for _, req := range c.requests { + if req.pw != nil { + // race with call to Close in c.serveRequest doesn't matter because + // Pipe(Reader|Writer).Close are idempotent + req.pw.CloseWithError(ErrConnClosed) + } + } +} + +// Serve accepts incoming FastCGI connections on the listener l, creating a new +// goroutine for each. The goroutine reads requests and then calls handler +// to reply to them. +// If l is nil, Serve accepts connections from os.Stdin. +// If handler is nil, http.DefaultServeMux is used. +func Serve(l net.Listener, handler http.Handler) error { + if l == nil { + var err error + l, err = net.FileListener(os.Stdin) + if err != nil { + return err + } + defer l.Close() + } + if handler == nil { + handler = http.DefaultServeMux + } + for { + rw, err := l.Accept() + if err != nil { + return err + } + c := newChild(rw, handler) + go c.serve() + } +} diff --git a/plugins/inputs/phpfpm/phpfpm_fcgi.go b/plugins/inputs/phpfpm/fcgi.go similarity index 79% rename from plugins/inputs/phpfpm/phpfpm_fcgi.go rename to plugins/inputs/phpfpm/fcgi.go index 03aac7634..689660ea0 100644 --- a/plugins/inputs/phpfpm/phpfpm_fcgi.go +++ b/plugins/inputs/phpfpm/fcgi.go @@ -17,11 +17,6 @@ import ( "errors" "io" "sync" - - "net" - "strconv" - - "strings" ) // recType is a record type, as defined by @@ -277,74 +272,3 @@ func (w *streamWriter) Close() error { // send empty record to close the stream return w.c.writeRecord(w.recType, w.reqId, nil) } - -func NewClient(h string, args ...interface{}) (fcgi *conn, err error) { - var con net.Conn - if len(args) != 1 { - err = errors.New("fcgi: not enough params") - return - } - switch args[0].(type) { - case int: - addr := h + ":" + strconv.FormatInt(int64(args[0].(int)), 10) - con, err = net.Dial("tcp", addr) - case string: - laddr := net.UnixAddr{Name: args[0].(string), Net: h} - con, err = net.DialUnix(h, nil, &laddr) - default: - err = errors.New("fcgi: we only accept int (port) or string (socket) params.") - } - fcgi = &conn{ - rwc: con, - } - return -} - -func (client *conn) Request(env map[string]string, requestData string) (retout []byte, reterr []byte, err error) { - defer client.rwc.Close() - var reqId uint16 = 1 - - err = client.writeBeginRequest(reqId, uint16(roleResponder), 0) - if err != nil { - return - } - - err = client.writePairs(typeParams, reqId, env) - if err != nil { - return - } - - if len(requestData) > 0 { - if err = client.writeRecord(typeStdin, reqId, []byte(requestData)); err != nil { - return - } - } - - rec := &record{} - var err1 error - - // recive untill EOF or FCGI_END_REQUEST -READ_LOOP: - for { - err1 = rec.read(client.rwc) - if err1 != nil && strings.Contains(err1.Error(), "use of closed network connection") { - if err1 != io.EOF { - err = err1 - } - break - } - - switch { - case rec.h.Type == typeStdout: - retout = append(retout, rec.content()...) - case rec.h.Type == typeStderr: - reterr = append(reterr, rec.content()...) - case rec.h.Type == typeEndRequest: - fallthrough - default: - break READ_LOOP - } - } - - return -} diff --git a/plugins/inputs/phpfpm/fcgi_client.go b/plugins/inputs/phpfpm/fcgi_client.go new file mode 100644 index 000000000..56978ad3a --- /dev/null +++ b/plugins/inputs/phpfpm/fcgi_client.go @@ -0,0 +1,86 @@ +package phpfpm + +import ( + "errors" + "io" + "net" + "strconv" + "strings" +) + +// Create an fcgi client +func newFcgiClient(h string, args ...interface{}) (*conn, error) { + var con net.Conn + if len(args) != 1 { + return nil, errors.New("fcgi: not enough params") + } + + var err error + switch args[0].(type) { + case int: + addr := h + ":" + strconv.FormatInt(int64(args[0].(int)), 10) + con, err = net.Dial("tcp", addr) + case string: + laddr := net.UnixAddr{Name: args[0].(string), Net: h} + con, err = net.DialUnix(h, nil, &laddr) + default: + err = errors.New("fcgi: we only accept int (port) or string (socket) params.") + } + fcgi := &conn{ + rwc: con, + } + + return fcgi, err +} + +func (client *conn) Request( + env map[string]string, + requestData string, +) (retout []byte, reterr []byte, err error) { + defer client.rwc.Close() + var reqId uint16 = 1 + + err = client.writeBeginRequest(reqId, uint16(roleResponder), 0) + if err != nil { + return + } + + err = client.writePairs(typeParams, reqId, env) + if err != nil { + return + } + + if len(requestData) > 0 { + if err = client.writeRecord(typeStdin, reqId, []byte(requestData)); err != nil { + return + } + } + + rec := &record{} + var err1 error + + // recive untill EOF or FCGI_END_REQUEST +READ_LOOP: + for { + err1 = rec.read(client.rwc) + if err1 != nil && strings.Contains(err1.Error(), "use of closed network connection") { + if err1 != io.EOF { + err = err1 + } + break + } + + switch { + case rec.h.Type == typeStdout: + retout = append(retout, rec.content()...) + case rec.h.Type == typeStderr: + reterr = append(reterr, rec.content()...) + case rec.h.Type == typeEndRequest: + fallthrough + default: + break READ_LOOP + } + } + + return +} diff --git a/plugins/inputs/phpfpm/fcgi_test.go b/plugins/inputs/phpfpm/fcgi_test.go new file mode 100644 index 000000000..15e0030a7 --- /dev/null +++ b/plugins/inputs/phpfpm/fcgi_test.go @@ -0,0 +1,280 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package phpfpm + +import ( + "bytes" + "errors" + "io" + "io/ioutil" + "net/http" + "testing" +) + +var sizeTests = []struct { + size uint32 + bytes []byte +}{ + {0, []byte{0x00}}, + {127, []byte{0x7F}}, + {128, []byte{0x80, 0x00, 0x00, 0x80}}, + {1000, []byte{0x80, 0x00, 0x03, 0xE8}}, + {33554431, []byte{0x81, 0xFF, 0xFF, 0xFF}}, +} + +func TestSize(t *testing.T) { + b := make([]byte, 4) + for i, test := range sizeTests { + n := encodeSize(b, test.size) + if !bytes.Equal(b[:n], test.bytes) { + t.Errorf("%d expected %x, encoded %x", i, test.bytes, b) + } + size, n := readSize(test.bytes) + if size != test.size { + t.Errorf("%d expected %d, read %d", i, test.size, size) + } + if len(test.bytes) != n { + t.Errorf("%d did not consume all the bytes", i) + } + } +} + +var streamTests = []struct { + desc string + recType recType + reqId uint16 + content []byte + raw []byte +}{ + {"single record", typeStdout, 1, nil, + []byte{1, byte(typeStdout), 0, 1, 0, 0, 0, 0}, + }, + // this data will have to be split into two records + {"two records", typeStdin, 300, make([]byte, 66000), + bytes.Join([][]byte{ + // header for the first record + {1, byte(typeStdin), 0x01, 0x2C, 0xFF, 0xFF, 1, 0}, + make([]byte, 65536), + // header for the second + {1, byte(typeStdin), 0x01, 0x2C, 0x01, 0xD1, 7, 0}, + make([]byte, 472), + // header for the empty record + {1, byte(typeStdin), 0x01, 0x2C, 0, 0, 0, 0}, + }, + nil), + }, +} + +type nilCloser struct { + io.ReadWriter +} + +func (c *nilCloser) Close() error { return nil } + +func TestStreams(t *testing.T) { + var rec record +outer: + for _, test := range streamTests { + buf := bytes.NewBuffer(test.raw) + var content []byte + for buf.Len() > 0 { + if err := rec.read(buf); err != nil { + t.Errorf("%s: error reading record: %v", test.desc, err) + continue outer + } + content = append(content, rec.content()...) + } + if rec.h.Type != test.recType { + t.Errorf("%s: got type %d expected %d", test.desc, rec.h.Type, test.recType) + continue + } + if rec.h.Id != test.reqId { + t.Errorf("%s: got request ID %d expected %d", test.desc, rec.h.Id, test.reqId) + continue + } + if !bytes.Equal(content, test.content) { + t.Errorf("%s: read wrong content", test.desc) + continue + } + buf.Reset() + c := newConn(&nilCloser{buf}) + w := newWriter(c, test.recType, test.reqId) + if _, err := w.Write(test.content); err != nil { + t.Errorf("%s: error writing record: %v", test.desc, err) + continue + } + if err := w.Close(); err != nil { + t.Errorf("%s: error closing stream: %v", test.desc, err) + continue + } + if !bytes.Equal(buf.Bytes(), test.raw) { + t.Errorf("%s: wrote wrong content", test.desc) + } + } +} + +type writeOnlyConn struct { + buf []byte +} + +func (c *writeOnlyConn) Write(p []byte) (int, error) { + c.buf = append(c.buf, p...) + return len(p), nil +} + +func (c *writeOnlyConn) Read(p []byte) (int, error) { + return 0, errors.New("conn is write-only") +} + +func (c *writeOnlyConn) Close() error { + return nil +} + +func TestGetValues(t *testing.T) { + var rec record + rec.h.Type = typeGetValues + + wc := new(writeOnlyConn) + c := newChild(wc, nil) + err := c.handleRecord(&rec) + if err != nil { + t.Fatalf("handleRecord: %v", err) + } + + const want = "\x01\n\x00\x00\x00\x12\x06\x00" + + "\x0f\x01FCGI_MPXS_CONNS1" + + "\x00\x00\x00\x00\x00\x00\x01\n\x00\x00\x00\x00\x00\x00" + if got := string(wc.buf); got != want { + t.Errorf(" got: %q\nwant: %q\n", got, want) + } +} + +func nameValuePair11(nameData, valueData string) []byte { + return bytes.Join( + [][]byte{ + {byte(len(nameData)), byte(len(valueData))}, + []byte(nameData), + []byte(valueData), + }, + nil, + ) +} + +func makeRecord( + recordType recType, + requestId uint16, + contentData []byte, +) []byte { + requestIdB1 := byte(requestId >> 8) + requestIdB0 := byte(requestId) + + contentLength := len(contentData) + contentLengthB1 := byte(contentLength >> 8) + contentLengthB0 := byte(contentLength) + return bytes.Join([][]byte{ + {1, byte(recordType), requestIdB1, requestIdB0, contentLengthB1, + contentLengthB0, 0, 0}, + contentData, + }, + nil) +} + +// a series of FastCGI records that start a request and begin sending the +// request body +var streamBeginTypeStdin = bytes.Join([][]byte{ + // set up request 1 + makeRecord(typeBeginRequest, 1, + []byte{0, byte(roleResponder), 0, 0, 0, 0, 0, 0}), + // add required parameters to request 1 + makeRecord(typeParams, 1, nameValuePair11("REQUEST_METHOD", "GET")), + makeRecord(typeParams, 1, nameValuePair11("SERVER_PROTOCOL", "HTTP/1.1")), + makeRecord(typeParams, 1, nil), + // begin sending body of request 1 + makeRecord(typeStdin, 1, []byte("0123456789abcdef")), +}, + nil) + +var cleanUpTests = []struct { + input []byte + err error +}{ + // confirm that child.handleRecord closes req.pw after aborting req + { + bytes.Join([][]byte{ + streamBeginTypeStdin, + makeRecord(typeAbortRequest, 1, nil), + }, + nil), + ErrRequestAborted, + }, + // confirm that child.serve closes all pipes after error reading record + { + bytes.Join([][]byte{ + streamBeginTypeStdin, + nil, + }, + nil), + ErrConnClosed, + }, +} + +type nopWriteCloser struct { + io.ReadWriter +} + +func (nopWriteCloser) Close() error { + return nil +} + +// Test that child.serve closes the bodies of aborted requests and closes the +// bodies of all requests before returning. Causes deadlock if either condition +// isn't met. See issue 6934. +func TestChildServeCleansUp(t *testing.T) { + for _, tt := range cleanUpTests { + input := make([]byte, len(tt.input)) + copy(input, tt.input) + rc := nopWriteCloser{bytes.NewBuffer(input)} + done := make(chan bool) + c := newChild(rc, http.HandlerFunc(func( + w http.ResponseWriter, + r *http.Request, + ) { + // block on reading body of request + _, err := io.Copy(ioutil.Discard, r.Body) + if err != tt.err { + t.Errorf("Expected %#v, got %#v", tt.err, err) + } + // not reached if body of request isn't closed + done <- true + })) + go c.serve() + // wait for body of request to be closed or all goroutines to block + <-done + } +} + +type rwNopCloser struct { + io.Reader + io.Writer +} + +func (rwNopCloser) Close() error { + return nil +} + +// Verifies it doesn't crash. Issue 11824. +func TestMalformedParams(t *testing.T) { + input := []byte{ + // beginRequest, requestId=1, contentLength=8, role=1, keepConn=1 + 1, 1, 0, 1, 0, 8, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, + // params, requestId=1, contentLength=10, k1Len=50, v1Len=50 (malformed, wrong length) + 1, 4, 0, 1, 0, 10, 0, 0, 50, 50, 3, 4, 5, 6, 7, 8, 9, 10, + // end of params + 1, 4, 0, 1, 0, 0, 0, 0, + } + rw := rwNopCloser{bytes.NewReader(input), ioutil.Discard} + c := newChild(rw, http.DefaultServeMux) + c.serve() +} diff --git a/plugins/inputs/phpfpm/phpfpm.go b/plugins/inputs/phpfpm/phpfpm.go index c07262342..169fe2194 100644 --- a/plugins/inputs/phpfpm/phpfpm.go +++ b/plugins/inputs/phpfpm/phpfpm.go @@ -112,6 +112,7 @@ func (g *phpfpm) gatherServer(addr string, acc telegraf.Accumulator) error { statusPath string ) + var err error if strings.HasPrefix(addr, "fcgi://") || strings.HasPrefix(addr, "cgi://") { u, err := url.Parse(addr) if err != nil { @@ -120,7 +121,12 @@ func (g *phpfpm) gatherServer(addr string, acc telegraf.Accumulator) error { socketAddr := strings.Split(u.Host, ":") fcgiIp := socketAddr[0] fcgiPort, _ := strconv.Atoi(socketAddr[1]) - fcgi, _ = NewClient(fcgiIp, fcgiPort) + fcgi, err = newFcgiClient(fcgiIp, fcgiPort) + if len(u.Path) > 1 { + statusPath = strings.Trim(u.Path, "/") + } else { + statusPath = "status" + } } else { socketAddr := strings.Split(addr, ":") if len(socketAddr) >= 2 { @@ -134,8 +140,13 @@ func (g *phpfpm) gatherServer(addr string, acc telegraf.Accumulator) error { if _, err := os.Stat(socketPath); os.IsNotExist(err) { return fmt.Errorf("Socket doesn't exist '%s': %s", socketPath, err) } - fcgi, _ = NewClient("unix", socketPath) + fcgi, err = newFcgiClient("unix", socketPath) } + + if err != nil { + return err + } + return g.gatherFcgi(fcgi, statusPath, acc) } diff --git a/plugins/inputs/postgresql/postgresql.go b/plugins/inputs/postgresql/postgresql.go index fe2a56576..da8ee8001 100644 --- a/plugins/inputs/postgresql/postgresql.go +++ b/plugins/inputs/postgresql/postgresql.go @@ -4,27 +4,30 @@ import ( "bytes" "database/sql" "fmt" + "regexp" "sort" "strings" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" - _ "github.com/lib/pq" + "github.com/lib/pq" ) type Postgresql struct { - Address string - Databases []string - OrderedColumns []string - AllColumns []string + Address string + Databases []string + OrderedColumns []string + AllColumns []string + sanitizedAddress string } var ignoredColumns = map[string]bool{"datid": true, "datname": true, "stats_reset": true} var sampleConfig = ` ## specify address via a url matching: - ## postgres://[pqgotest[:password]]@localhost[/dbname]?sslmode=[disable|verify-ca|verify-full] + ## postgres://[pqgotest[:password]]@localhost[/dbname]\ + ## ?sslmode=[disable|verify-ca|verify-full] ## or a simple string: ## host=localhost user=pqotest password=... sslmode=... dbname=app_production ## @@ -133,6 +136,23 @@ type scanner interface { Scan(dest ...interface{}) error } +var passwordKVMatcher, _ = regexp.Compile("password=\\S+ ?") + +func (p *Postgresql) SanitizedAddress() (_ string, err error) { + var canonicalizedAddress string + if strings.HasPrefix(p.Address, "postgres://") || strings.HasPrefix(p.Address, "postgresql://") { + canonicalizedAddress, err = pq.ParseURL(p.Address) + if err != nil { + return p.sanitizedAddress, err + } + } else { + canonicalizedAddress = p.Address + } + p.sanitizedAddress = passwordKVMatcher.ReplaceAllString(canonicalizedAddress, "") + + return p.sanitizedAddress, err +} + func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error { var columnVars []interface{} var dbname bytes.Buffer @@ -165,7 +185,13 @@ func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error { dbname.WriteString("postgres") } - tags := map[string]string{"server": p.Address, "db": dbname.String()} + var tagAddress string + tagAddress, err = p.SanitizedAddress() + if err != nil { + return err + } + + tags := map[string]string{"server": tagAddress, "db": dbname.String()} fields := make(map[string]interface{}) for col, val := range columnMap { diff --git a/plugins/inputs/postgresql_extensible/README.md b/plugins/inputs/postgresql_extensible/README.md new file mode 100644 index 000000000..e9fbc571c --- /dev/null +++ b/plugins/inputs/postgresql_extensible/README.md @@ -0,0 +1,231 @@ +# PostgreSQL plugin + +This postgresql plugin provides metrics for your postgres database. It has been +designed to parse ithe sql queries in the plugin section of your telegraf.conf. + +For now only two queries are specified and it's up to you to add more; some per +query parameters have been added : + +* The SQl query itself +* The minimum version supported (here in numeric display visible in pg_settings) +* A boolean to define if the query have to be run against some specific +* variables (defined in the databaes variable of the plugin section) +* The list of the column that have to be defined has tags + +``` +[[inputs.postgresql_extensible]] + # specify address via a url matching: + # postgres://[pqgotest[:password]]@localhost[/dbname]?sslmode=... + # or a simple string: + # host=localhost user=pqotest password=... sslmode=... dbname=app_production + # + # All connection parameters are optional. # + # Without the dbname parameter, the driver will default to a database + # with the same name as the user. This dbname is just for instantiating a + # connection with the server and doesn't restrict the databases we are trying + # to grab metrics for. + # + address = "host=localhost user=postgres sslmode=disable" + # A list of databases to pull metrics about. If not specified, metrics for all + # databases are gathered. + # databases = ["app_production", "testing"] + # + # Define the toml config where the sql queries are stored + # New queries can be added, if the withdbname is set to true and there is no + # databases defined in the 'databases field', the sql query is ended by a 'is + # not null' in order to make the query succeed. + # Be careful that the sqlquery must contain the where clause with a part of + # the filtering, the plugin will add a 'IN (dbname list)' clause if the + # withdbname is set to true + # Example : + # The sqlquery : "SELECT * FROM pg_stat_database where datname" become + # "SELECT * FROM pg_stat_database where datname IN ('postgres', 'pgbench')" + # because the databases variable was set to ['postgres', 'pgbench' ] and the + # withdbname was true. + # Be careful that if the withdbname is set to false you d'ont have to define + # the where clause (aka with the dbname) + # the tagvalue field is used to define custom tags (separated by comas) + # + # Structure : + # [[inputs.postgresql_extensible.query]] + # sqlquery string + # version string + # withdbname boolean + # tagvalue string (coma separated) + [[inputs.postgresql_extensible.query]] + sqlquery="SELECT * FROM pg_stat_database where datname" + version=901 + withdbname=false + tagvalue="" + [[inputs.postgresql_extensible.query]] + sqlquery="SELECT * FROM pg_stat_bgwriter" + version=901 + withdbname=false + tagvalue="" +``` + +The system can be easily extended using homemade metrics collection tools or +using postgreql extensions ([pg_stat_statements](http://www.postgresql.org/docs/current/static/pgstatstatements.html), [pg_proctab](https://github.com/markwkm/pg_proctab),[powa](http://dalibo.github.io/powa/)...) + +# Sample Queries : +- telegraf.conf postgresql_extensible queries (assuming that you have configured + correctly your connection) +``` +[[inputs.postgresql_extensible.query]] + sqlquery="SELECT * FROM pg_stat_database" + version=901 + withdbname=false + tagvalue="" +[[inputs.postgresql_extensible.query]] + sqlquery="SELECT * FROM pg_stat_bgwriter" + version=901 + withdbname=false + tagvalue="" +[[inputs.postgresql_extensible.query]] + sqlquery="select * from sessions" + version=901 + withdbname=false + tagvalue="db,username,state" +[[inputs.postgresql_extensible.query]] + sqlquery="select setting as max_connections from pg_settings where \ + name='max_connections'" + version=801 + withdbname=false + tagvalue="" +[[inputs.postgresql_extensible.query]] + sqlquery="select * from pg_stat_kcache" + version=901 + withdbname=false + tagvalue="" +[[inputs.postgresql_extensible.query]] + sqlquery="select setting as shared_buffers from pg_settings where \ + name='shared_buffers'" + version=801 + withdbname=false + tagvalue="" +[[inputs.postgresql_extensible.query]] + sqlquery="SELECT db, count( distinct blocking_pid ) AS num_blocking_sessions,\ + count( distinct blocked_pid) AS num_blocked_sessions FROM \ + public.blocking_procs group by db" + version=901 + withdbname=false + tagvalue="db" +``` + +# Postgresql Side +postgresql.conf : +``` +shared_preload_libraries = 'pg_stat_statements,pg_stat_kcache' +``` + +Please follow the requirements to setup those extensions. + +In the database (can be a specific monitoring db) +``` +create extension pg_stat_statements; +create extension pg_stat_kcache; +create extension pg_proctab; +``` +(assuming that the extension is installed on the OS Layer) + + - pg_stat_kcache is available on the postgresql.org yum repo + - pg_proctab is available at : https://github.com/markwkm/pg_proctab + + ##Views + - Blocking sessions +``` +CREATE OR REPLACE VIEW public.blocking_procs AS + SELECT a.datname AS db, + kl.pid AS blocking_pid, + ka.usename AS blocking_user, + ka.query AS blocking_query, + bl.pid AS blocked_pid, + a.usename AS blocked_user, + a.query AS blocked_query, + to_char(age(now(), a.query_start), 'HH24h:MIm:SSs'::text) AS age + FROM pg_locks bl + JOIN pg_stat_activity a ON bl.pid = a.pid + JOIN pg_locks kl ON bl.locktype = kl.locktype AND NOT bl.database IS + DISTINCT FROM kl.database AND NOT bl.relation IS DISTINCT FROM kl.relation + AND NOT bl.page IS DISTINCT FROM kl.page AND NOT bl.tuple IS DISTINCT FROM + kl.tuple AND NOT bl.virtualxid IS DISTINCT FROM kl.virtualxid AND NOT + bl.transactionid IS DISTINCT FROM kl.transactionid AND NOT bl.classid IS + DISTINCT FROM kl.classid AND NOT bl.objid IS DISTINCT FROM kl.objid AND + NOT bl.objsubid IS DISTINCT FROM kl.objsubid AND bl.pid <> kl.pid + JOIN pg_stat_activity ka ON kl.pid = ka.pid + WHERE kl.granted AND NOT bl.granted + ORDER BY a.query_start; +``` + - Sessions Statistics +``` +CREATE OR REPLACE VIEW public.sessions AS + WITH proctab AS ( + SELECT pg_proctab.pid, + CASE + WHEN pg_proctab.state::text = 'R'::bpchar::text + THEN 'running'::text + WHEN pg_proctab.state::text = 'D'::bpchar::text + THEN 'sleep-io'::text + WHEN pg_proctab.state::text = 'S'::bpchar::text + THEN 'sleep-waiting'::text + WHEN pg_proctab.state::text = 'Z'::bpchar::text + THEN 'zombie'::text + WHEN pg_proctab.state::text = 'T'::bpchar::text + THEN 'stopped'::text + ELSE NULL::text + END AS proc_state, + pg_proctab.ppid, + pg_proctab.utime, + pg_proctab.stime, + pg_proctab.vsize, + pg_proctab.rss, + pg_proctab.processor, + pg_proctab.rchar, + pg_proctab.wchar, + pg_proctab.syscr, + pg_proctab.syscw, + pg_proctab.reads, + pg_proctab.writes, + pg_proctab.cwrites + FROM pg_proctab() pg_proctab(pid, comm, fullcomm, state, ppid, pgrp, + session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, + utime, stime, cutime, cstime, priority, nice, num_threads, + itrealvalue, starttime, vsize, rss, exit_signal, processor, + rt_priority, policy, delayacct_blkio_ticks, uid, username, rchar, + wchar, syscr, syscw, reads, writes, cwrites) + ), stat_activity AS ( + SELECT pg_stat_activity.datname, + pg_stat_activity.pid, + pg_stat_activity.usename, + CASE + WHEN pg_stat_activity.query IS NULL THEN 'no query'::text + WHEN pg_stat_activity.query IS NOT NULL AND + pg_stat_activity.state = 'idle'::text THEN 'no query'::text + ELSE regexp_replace(pg_stat_activity.query, '[\n\r]+'::text, + ' '::text, 'g'::text) + END AS query + FROM pg_stat_activity + ) + SELECT stat.datname::name AS db, + stat.usename::name AS username, + stat.pid, + proc.proc_state::text AS state, +('"'::text || stat.query) || '"'::text AS query, + (proc.utime/1000)::bigint AS session_usertime, + (proc.stime/1000)::bigint AS session_systemtime, + proc.vsize AS session_virtual_memory_size, + proc.rss AS session_resident_memory_size, + proc.processor AS session_processor_number, + proc.rchar AS session_bytes_read, + proc.rchar-proc.reads AS session_logical_bytes_read, + proc.wchar AS session_bytes_written, + proc.wchar-proc.writes AS session_logical_bytes_writes, + proc.syscr AS session_read_io, + proc.syscw AS session_write_io, + proc.reads AS session_physical_reads, + proc.writes AS session_physical_writes, + proc.cwrites AS session_cancel_writes + FROM proctab proc, + stat_activity stat + WHERE proc.pid = stat.pid; +``` diff --git a/plugins/inputs/postgresql_extensible/postgresql_extensible.go b/plugins/inputs/postgresql_extensible/postgresql_extensible.go new file mode 100644 index 000000000..4ebf752ff --- /dev/null +++ b/plugins/inputs/postgresql_extensible/postgresql_extensible.go @@ -0,0 +1,278 @@ +package postgresql_extensible + +import ( + "bytes" + "database/sql" + "fmt" + "regexp" + "strings" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" + + "github.com/lib/pq" +) + +type Postgresql struct { + Address string + Databases []string + OrderedColumns []string + AllColumns []string + AdditionalTags []string + sanitizedAddress string + Query []struct { + Sqlquery string + Version int + Withdbname bool + Tagvalue string + } +} + +type query []struct { + Sqlquery string + Version int + Withdbname bool + Tagvalue string +} + +var ignoredColumns = map[string]bool{"datid": true, "datname": true, "stats_reset": true} + +var sampleConfig = ` + ## specify address via a url matching: + ## postgres://[pqgotest[:password]]@localhost[/dbname]\ + ## ?sslmode=[disable|verify-ca|verify-full] + ## or a simple string: + ## host=localhost user=pqotest password=... sslmode=... dbname=app_production + # + ## All connection parameters are optional. # + ## Without the dbname parameter, the driver will default to a database + ## with the same name as the user. This dbname is just for instantiating a + ## connection with the server and doesn't restrict the databases we are trying + ## to grab metrics for. + # + address = "host=localhost user=postgres sslmode=disable" + ## A list of databases to pull metrics about. If not specified, metrics for all + ## databases are gathered. + ## databases = ["app_production", "testing"] + # + ## Define the toml config where the sql queries are stored + ## New queries can be added, if the withdbname is set to true and there is no + ## databases defined in the 'databases field', the sql query is ended by a + ## 'is not null' in order to make the query succeed. + ## Example : + ## The sqlquery : "SELECT * FROM pg_stat_database where datname" become + ## "SELECT * FROM pg_stat_database where datname IN ('postgres', 'pgbench')" + ## because the databases variable was set to ['postgres', 'pgbench' ] and the + ## withdbname was true. Be careful that if the withdbname is set to false you + ## don't have to define the where clause (aka with the dbname) the tagvalue + ## field is used to define custom tags (separated by comas) + # + ## Structure : + ## [[inputs.postgresql_extensible.query]] + ## sqlquery string + ## version string + ## withdbname boolean + ## tagvalue string (coma separated) + [[inputs.postgresql_extensible.query]] + sqlquery="SELECT * FROM pg_stat_database" + version=901 + withdbname=false + tagvalue="" + [[inputs.postgresql_extensible.query]] + sqlquery="SELECT * FROM pg_stat_bgwriter" + version=901 + withdbname=false + tagvalue="" +` + +func (p *Postgresql) SampleConfig() string { + return sampleConfig +} + +func (p *Postgresql) Description() string { + return "Read metrics from one or many postgresql servers" +} + +func (p *Postgresql) IgnoredColumns() map[string]bool { + return ignoredColumns +} + +var localhost = "host=localhost sslmode=disable" + +func (p *Postgresql) Gather(acc telegraf.Accumulator) error { + + var sql_query string + var query_addon string + var db_version int + var query string + var tag_value string + + if p.Address == "" || p.Address == "localhost" { + p.Address = localhost + } + + db, err := sql.Open("postgres", p.Address) + if err != nil { + return err + } + + defer db.Close() + + // Retreiving the database version + + query = `select substring(setting from 1 for 3) as version from pg_settings where name='server_version_num'` + err = db.QueryRow(query).Scan(&db_version) + if err != nil { + return err + } + // We loop in order to process each query + // Query is not run if Database version does not match the query version. + + for i := range p.Query { + sql_query = p.Query[i].Sqlquery + tag_value = p.Query[i].Tagvalue + + if p.Query[i].Withdbname { + if len(p.Databases) != 0 { + query_addon = fmt.Sprintf(` IN ('%s')`, + strings.Join(p.Databases, "','")) + } else { + query_addon = " is not null" + } + } else { + query_addon = "" + } + sql_query += query_addon + + if p.Query[i].Version <= db_version { + rows, err := db.Query(sql_query) + if err != nil { + return err + } + + defer rows.Close() + + // grab the column information from the result + p.OrderedColumns, err = rows.Columns() + if err != nil { + return err + } else { + for _, v := range p.OrderedColumns { + p.AllColumns = append(p.AllColumns, v) + } + } + p.AdditionalTags = nil + if tag_value != "" { + tag_list := strings.Split(tag_value, ",") + for t := range tag_list { + p.AdditionalTags = append(p.AdditionalTags, tag_list[t]) + } + } + + for rows.Next() { + err = p.accRow(rows, acc) + if err != nil { + return err + } + } + } + } + return nil +} + +type scanner interface { + Scan(dest ...interface{}) error +} + +var passwordKVMatcher, _ = regexp.Compile("password=\\S+ ?") + +func (p *Postgresql) SanitizedAddress() (_ string, err error) { + var canonicalizedAddress string + if strings.HasPrefix(p.Address, "postgres://") || strings.HasPrefix(p.Address, "postgresql://") { + canonicalizedAddress, err = pq.ParseURL(p.Address) + if err != nil { + return p.sanitizedAddress, err + } + } else { + canonicalizedAddress = p.Address + } + p.sanitizedAddress = passwordKVMatcher.ReplaceAllString(canonicalizedAddress, "") + + return p.sanitizedAddress, err +} + +func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error { + var columnVars []interface{} + var dbname bytes.Buffer + + // this is where we'll store the column name with its *interface{} + columnMap := make(map[string]*interface{}) + + for _, column := range p.OrderedColumns { + columnMap[column] = new(interface{}) + } + + // populate the array of interface{} with the pointers in the right order + for i := 0; i < len(columnMap); i++ { + columnVars = append(columnVars, columnMap[p.OrderedColumns[i]]) + } + + // deconstruct array of variables and send to Scan + err := row.Scan(columnVars...) + + if err != nil { + return err + } + if columnMap["datname"] != nil { + // extract the database name from the column map + dbnameChars := (*columnMap["datname"]).([]uint8) + for i := 0; i < len(dbnameChars); i++ { + dbname.WriteString(string(dbnameChars[i])) + } + } else { + dbname.WriteString("postgres") + } + + var tagAddress string + tagAddress, err = p.SanitizedAddress() + if err != nil { + return err + } + + // Process the additional tags + + tags := map[string]string{} + tags["server"] = tagAddress + tags["db"] = dbname.String() + var isATag int + fields := make(map[string]interface{}) + for col, val := range columnMap { + _, ignore := ignoredColumns[col] + //if !ignore && *val != "" { + if !ignore { + isATag = 0 + for tag := range p.AdditionalTags { + if col == p.AdditionalTags[tag] { + isATag = 1 + value_type_p := fmt.Sprintf(`%T`, *val) + if value_type_p == "[]uint8" { + tags[col] = fmt.Sprintf(`%s`, *val) + } else if value_type_p == "int64" { + tags[col] = fmt.Sprintf(`%v`, *val) + } + } + } + if isATag == 0 { + fields[col] = *val + } + } + } + acc.AddFields("postgresql", fields, tags) + return nil +} + +func init() { + inputs.Add("postgresql_extensible", func() telegraf.Input { + return &Postgresql{} + }) +} diff --git a/plugins/inputs/postgresql_extensible/postgresql_extensible_test.go b/plugins/inputs/postgresql_extensible/postgresql_extensible_test.go new file mode 100644 index 000000000..7fd907102 --- /dev/null +++ b/plugins/inputs/postgresql_extensible/postgresql_extensible_test.go @@ -0,0 +1,98 @@ +package postgresql_extensible + +import ( + "fmt" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPostgresqlGeneratesMetrics(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + p := &Postgresql{ + Address: fmt.Sprintf("host=%s user=postgres sslmode=disable", + testutil.GetLocalHost()), + Databases: []string{"postgres"}, + Query: query{ + {Sqlquery: "select * from pg_stat_database", + Version: 901, + Withdbname: false, + Tagvalue: ""}, + }, + } + var acc testutil.Accumulator + err := p.Gather(&acc) + require.NoError(t, err) + + availableColumns := make(map[string]bool) + for _, col := range p.AllColumns { + availableColumns[col] = true + } + intMetrics := []string{ + "xact_commit", + "xact_rollback", + "blks_read", + "blks_hit", + "tup_returned", + "tup_fetched", + "tup_inserted", + "tup_updated", + "tup_deleted", + "conflicts", + "temp_files", + "temp_bytes", + "deadlocks", + "numbackends", + } + + floatMetrics := []string{ + "blk_read_time", + "blk_write_time", + } + + metricsCounted := 0 + + for _, metric := range intMetrics { + _, ok := availableColumns[metric] + if ok { + assert.True(t, acc.HasIntField("postgresql", metric)) + metricsCounted++ + } + } + + for _, metric := range floatMetrics { + _, ok := availableColumns[metric] + if ok { + assert.True(t, acc.HasFloatField("postgresql", metric)) + metricsCounted++ + } + } + + assert.True(t, metricsCounted > 0) + assert.Equal(t, len(availableColumns)-len(p.IgnoredColumns()), metricsCounted) +} + +func TestPostgresqlIgnoresUnwantedColumns(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + p := &Postgresql{ + Address: fmt.Sprintf("host=%s user=postgres sslmode=disable", + testutil.GetLocalHost()), + } + + var acc testutil.Accumulator + + err := p.Gather(&acc) + require.NoError(t, err) + + for col := range p.IgnoredColumns() { + assert.False(t, acc.HasMeasurement(col)) + } +} diff --git a/plugins/inputs/procstat/README.md b/plugins/inputs/procstat/README.md index 90552c2a6..ef96500a3 100644 --- a/plugins/inputs/procstat/README.md +++ b/plugins/inputs/procstat/README.md @@ -35,6 +35,10 @@ The above configuration would result in output like: # Measurements Note: prefix can be set by the user, per process. + +Threads related measurement names: +- procstat_[prefix_]num_threads value=5 + File descriptor related measurement names: - procstat_[prefix_]num_fds value=4 diff --git a/plugins/inputs/procstat/procstat.go b/plugins/inputs/procstat/procstat.go index e5ae207fe..a0e63fd6f 100644 --- a/plugins/inputs/procstat/procstat.go +++ b/plugins/inputs/procstat/procstat.go @@ -43,6 +43,8 @@ var sampleConfig = ` ## Field name prefix prefix = "" + ## comment this out if you want raw cpu_time stats + fielddrop = ["cpu_time_*"] ` func (_ *Procstat) SampleConfig() string { diff --git a/plugins/inputs/procstat/spec_processor.go b/plugins/inputs/procstat/spec_processor.go index b09ed4f21..bb248f003 100644 --- a/plugins/inputs/procstat/spec_processor.go +++ b/plugins/inputs/procstat/spec_processor.go @@ -52,6 +52,7 @@ func NewSpecProcessor( } func (p *SpecProcessor) pushMetrics() { + p.pushNThreadsStats() p.pushFDStats() p.pushCtxStats() p.pushIOStats() @@ -60,6 +61,15 @@ func (p *SpecProcessor) pushMetrics() { p.flush() } +func (p *SpecProcessor) pushNThreadsStats() error { + numThreads, err := p.proc.NumThreads() + if err != nil { + return fmt.Errorf("NumThreads error: %s\n", err) + } + p.add("num_threads", numThreads) + return nil +} + func (p *SpecProcessor) pushFDStats() error { fds, err := p.proc.NumFDs() if err != nil { diff --git a/plugins/inputs/prometheus/README.md b/plugins/inputs/prometheus/README.md new file mode 100644 index 000000000..c5c952515 --- /dev/null +++ b/plugins/inputs/prometheus/README.md @@ -0,0 +1,75 @@ +# Prometheus Input Plugin + +The prometheus input plugin gathers metrics from any webpage +exposing metrics with Prometheus format + +### Configuration: + +Example for Kubernetes apiserver +```toml +# Get all metrics from Kube-apiserver +[[inputs.prometheus]] + # An array of urls to scrape metrics from. + urls = ["http://my-kube-apiserver:8080/metrics"] +``` + +You can use more complex configuration +to filter and some tags + +```toml +# Get all metrics from Kube-apiserver +[[inputs.prometheus]] + # An array of urls to scrape metrics from. + urls = ["http://my-kube-apiserver:8080/metrics"] + # Get only metrics with "apiserver_" string is in metric name + namepass = ["apiserver_"] + # Add a metric name prefix + name_prefix = "k8s_" + # Add tags to be able to make beautiful dashboards + [inputs.prometheus.tags] + kubeservice = "kube-apiserver" +``` + +### Measurements & Fields & Tags: + +Measurements and fields could be any thing. +It just depends of what you're quering. + +Example: + +``` +# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 0.00010425500000000001 +go_gc_duration_seconds{quantile="0.25"} 0.000139108 +go_gc_duration_seconds{quantile="0.5"} 0.00015749400000000002 +go_gc_duration_seconds{quantile="0.75"} 0.000331463 +go_gc_duration_seconds{quantile="1"} 0.000667154 +go_gc_duration_seconds_sum 0.0018183950000000002 +go_gc_duration_seconds_count 7 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 15 +``` + +- go_goroutines, + - gauge (integer, unit) +- go_gc_duration_seconds + - field3 (integer, bytes) + +- All measurements have the following tags: + - url=http://my-kube-apiserver:8080/metrics +- go_goroutines has the following tags: + - kubeservice=kube-apiserver +- go_gc_duration_seconds has the following tags: + - kubeservice=kube-apiserver + +### Example Output: + +Example of output with configuration given above: + +``` +$ ./telegraf -config telegraf.conf -test +k8s_go_goroutines,kubeservice=kube-apiserver,url=http://my-kube-apiserver:8080/metrics gauge=536 1456857329391929813 +k8s_go_gc_duration_seconds,kubeservice=kube-apiserver,url=http://my-kube-apiserver:8080/metrics 0=0.038002142,0.25=0.041732467,0.5=0.04336492,0.75=0.047271799,1=0.058295811,count=0,sum=208.334617406 1456857329391929813 +``` diff --git a/plugins/inputs/prometheus/parser.go b/plugins/inputs/prometheus/parser.go new file mode 100644 index 000000000..c6ad211f8 --- /dev/null +++ b/plugins/inputs/prometheus/parser.go @@ -0,0 +1,171 @@ +package prometheus + +// Parser inspired from +// https://github.com/prometheus/prom2json/blob/master/main.go + +import ( + "bufio" + "bytes" + "fmt" + "io" + "math" + "mime" + + "github.com/influxdata/telegraf" + + "github.com/matttproud/golang_protobuf_extensions/pbutil" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" +) + +// PrometheusParser is an object for Parsing incoming metrics. +type PrometheusParser struct { + // PromFormat + PromFormat map[string]string + // DefaultTags will be added to every parsed metric + // DefaultTags map[string]string +} + +// Parse returns a slice of Metrics from a text representation of a +// metrics +func (p *PrometheusParser) Parse(buf []byte) ([]telegraf.Metric, error) { + var metrics []telegraf.Metric + var parser expfmt.TextParser + // parse even if the buffer begins with a newline + buf = bytes.TrimPrefix(buf, []byte("\n")) + // Read raw data + buffer := bytes.NewBuffer(buf) + reader := bufio.NewReader(buffer) + + // Get format + mediatype, params, err := mime.ParseMediaType(p.PromFormat["Content-Type"]) + // Prepare output + metricFamilies := make(map[string]*dto.MetricFamily) + if err == nil && mediatype == "application/vnd.google.protobuf" && + params["encoding"] == "delimited" && + params["proto"] == "io.prometheus.client.MetricFamily" { + for { + metricFamily := &dto.MetricFamily{} + if _, err = pbutil.ReadDelimited(reader, metricFamily); err != nil { + if err == io.EOF { + break + } + return nil, fmt.Errorf("reading metric family protocol buffer failed: %s", err) + } + metricFamilies[metricFamily.GetName()] = metricFamily + } + } else { + metricFamilies, err = parser.TextToMetricFamilies(reader) + if err != nil { + return nil, fmt.Errorf("reading text format failed: %s", err) + } + // read metrics + for metricName, mf := range metricFamilies { + for _, m := range mf.Metric { + // reading tags + tags := makeLabels(m) + /* + for key, value := range p.DefaultTags { + tags[key] = value + } + */ + // reading fields + fields := make(map[string]interface{}) + if mf.GetType() == dto.MetricType_SUMMARY { + // summary metric + fields = makeQuantiles(m) + fields["count"] = float64(m.GetHistogram().GetSampleCount()) + fields["sum"] = float64(m.GetSummary().GetSampleSum()) + } else if mf.GetType() == dto.MetricType_HISTOGRAM { + // historgram metric + fields = makeBuckets(m) + fields["count"] = float64(m.GetHistogram().GetSampleCount()) + fields["sum"] = float64(m.GetSummary().GetSampleSum()) + + } else { + // standard metric + fields = getNameAndValue(m) + } + // converting to telegraf metric + if len(fields) > 0 { + metric, err := telegraf.NewMetric(metricName, tags, fields) + if err == nil { + metrics = append(metrics, metric) + } + } + } + } + } + return metrics, err +} + +// Parse one line +func (p *PrometheusParser) ParseLine(line string) (telegraf.Metric, error) { + metrics, err := p.Parse([]byte(line + "\n")) + + if err != nil { + return nil, err + } + + if len(metrics) < 1 { + return nil, fmt.Errorf( + "Can not parse the line: %s, for data format: prometheus", line) + } + + return metrics[0], nil +} + +/* +// Set default tags +func (p *PrometheusParser) SetDefaultTags(tags map[string]string) { + p.DefaultTags = tags +} +*/ + +// Get Quantiles from summary metric +func makeQuantiles(m *dto.Metric) map[string]interface{} { + fields := make(map[string]interface{}) + for _, q := range m.GetSummary().Quantile { + if !math.IsNaN(q.GetValue()) { + fields[fmt.Sprint(q.GetQuantile())] = float64(q.GetValue()) + } + } + return fields +} + +// Get Buckets from histogram metric +func makeBuckets(m *dto.Metric) map[string]interface{} { + fields := make(map[string]interface{}) + for _, b := range m.GetHistogram().Bucket { + fields[fmt.Sprint(b.GetUpperBound())] = float64(b.GetCumulativeCount()) + } + return fields +} + +// Get labels from metric +func makeLabels(m *dto.Metric) map[string]string { + result := map[string]string{} + for _, lp := range m.Label { + result[lp.GetName()] = lp.GetValue() + } + return result +} + +// Get name and value from metric +func getNameAndValue(m *dto.Metric) map[string]interface{} { + fields := make(map[string]interface{}) + if m.Gauge != nil { + if !math.IsNaN(m.GetGauge().GetValue()) { + fields["gauge"] = float64(m.GetGauge().GetValue()) + } + } else if m.Counter != nil { + if !math.IsNaN(m.GetGauge().GetValue()) { + fields["counter"] = float64(m.GetCounter().GetValue()) + } + } else if m.Untyped != nil { + if !math.IsNaN(m.GetGauge().GetValue()) { + fields["value"] = float64(m.GetUntyped().GetValue()) + } + } + return fields +} diff --git a/plugins/inputs/prometheus/parser_test.go b/plugins/inputs/prometheus/parser_test.go new file mode 100644 index 000000000..5c33260be --- /dev/null +++ b/plugins/inputs/prometheus/parser_test.go @@ -0,0 +1,175 @@ +package prometheus + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +var exptime = time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC) + +const validUniqueGauge = `# HELP cadvisor_version_info A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision. +# TYPE cadvisor_version_info gauge +cadvisor_version_info{cadvisorRevision="",cadvisorVersion="",dockerVersion="1.8.2",kernelVersion="3.10.0-229.20.1.el7.x86_64",osVersion="CentOS Linux 7 (Core)"} 1 +` + +const validUniqueCounter = `# HELP get_token_fail_count Counter of failed Token() requests to the alternate token source +# TYPE get_token_fail_count counter +get_token_fail_count 0 +` + +const validUniqueLine = `# HELP get_token_fail_count Counter of failed Token() requests to the alternate token source +` + +const validUniqueSummary = `# HELP http_request_duration_microseconds The HTTP request latencies in microseconds. +# TYPE http_request_duration_microseconds summary +http_request_duration_microseconds{handler="prometheus",quantile="0.5"} 552048.506 +http_request_duration_microseconds{handler="prometheus",quantile="0.9"} 5.876804288e+06 +http_request_duration_microseconds{handler="prometheus",quantile="0.99"} 5.876804288e+06 +http_request_duration_microseconds_sum{handler="prometheus"} 1.8909097205e+07 +http_request_duration_microseconds_count{handler="prometheus"} 9 +` + +const validUniqueHistogram = `# HELP apiserver_request_latencies Response latency distribution in microseconds for each verb, resource and client. +# TYPE apiserver_request_latencies histogram +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="125000"} 1994 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="250000"} 1997 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="500000"} 2000 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="1e+06"} 2005 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="2e+06"} 2012 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="4e+06"} 2017 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="8e+06"} 2024 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="+Inf"} 2025 +apiserver_request_latencies_sum{resource="bindings",verb="POST"} 1.02726334e+08 +apiserver_request_latencies_count{resource="bindings",verb="POST"} 2025 +` + +const validData = `# HELP cadvisor_version_info A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision. +# TYPE cadvisor_version_info gauge +cadvisor_version_info{cadvisorRevision="",cadvisorVersion="",dockerVersion="1.8.2",kernelVersion="3.10.0-229.20.1.el7.x86_64",osVersion="CentOS Linux 7 (Core)"} 1 +# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 0.013534896000000001 +go_gc_duration_seconds{quantile="0.25"} 0.02469263 +go_gc_duration_seconds{quantile="0.5"} 0.033727822000000005 +go_gc_duration_seconds{quantile="0.75"} 0.03840335 +go_gc_duration_seconds{quantile="1"} 0.049956604 +go_gc_duration_seconds_sum 1970.341293002 +go_gc_duration_seconds_count 65952 +# HELP http_request_duration_microseconds The HTTP request latencies in microseconds. +# TYPE http_request_duration_microseconds summary +http_request_duration_microseconds{handler="prometheus",quantile="0.5"} 552048.506 +http_request_duration_microseconds{handler="prometheus",quantile="0.9"} 5.876804288e+06 +http_request_duration_microseconds{handler="prometheus",quantile="0.99"} 5.876804288e+06 +http_request_duration_microseconds_sum{handler="prometheus"} 1.8909097205e+07 +http_request_duration_microseconds_count{handler="prometheus"} 9 +# HELP get_token_fail_count Counter of failed Token() requests to the alternate token source +# TYPE get_token_fail_count counter +get_token_fail_count 0 +# HELP apiserver_request_latencies Response latency distribution in microseconds for each verb, resource and client. +# TYPE apiserver_request_latencies histogram +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="125000"} 1994 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="250000"} 1997 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="500000"} 2000 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="1e+06"} 2005 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="2e+06"} 2012 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="4e+06"} 2017 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="8e+06"} 2024 +apiserver_request_latencies_bucket{resource="bindings",verb="POST",le="+Inf"} 2025 +apiserver_request_latencies_sum{resource="bindings",verb="POST"} 1.02726334e+08 +apiserver_request_latencies_count{resource="bindings",verb="POST"} 2025 +` + +const prometheusMulti = ` +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +` + +const prometheusMultiSomeInvalid = ` +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +cpu,cpu=cpu3, host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +cpu,cpu=cpu4 , usage_idle=99,usage_busy=1 +cpu,host=foo,datacenter=us-east usage_idle=99,usage_busy=1 +` + +func TestParseValidPrometheus(t *testing.T) { + parser := PrometheusParser{} + + // Gauge value + metrics, err := parser.Parse([]byte(validUniqueGauge)) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "cadvisor_version_info", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "gauge": float64(1), + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{ + "osVersion": "CentOS Linux 7 (Core)", + "dockerVersion": "1.8.2", + "kernelVersion": "3.10.0-229.20.1.el7.x86_64", + }, metrics[0].Tags()) + + // Counter value + //parser.SetDefaultTags(map[string]string{"mytag": "mytagvalue"}) + metrics, err = parser.Parse([]byte(validUniqueCounter)) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "get_token_fail_count", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "counter": float64(0), + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{}, metrics[0].Tags()) + + // Summary data + //parser.SetDefaultTags(map[string]string{}) + metrics, err = parser.Parse([]byte(validUniqueSummary)) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "http_request_duration_microseconds", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "0.5": 552048.506, + "0.9": 5.876804288e+06, + "0.99": 5.876804288e+06, + "count": 0.0, + "sum": 1.8909097205e+07, + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{"handler": "prometheus"}, metrics[0].Tags()) + + // histogram data + metrics, err = parser.Parse([]byte(validUniqueHistogram)) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "apiserver_request_latencies", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "500000": 2000.0, + "count": 2025.0, + "sum": 0.0, + "250000": 1997.0, + "2e+06": 2012.0, + "4e+06": 2017.0, + "8e+06": 2024.0, + "+Inf": 2025.0, + "125000": 1994.0, + "1e+06": 2005.0, + }, metrics[0].Fields()) + assert.Equal(t, + map[string]string{"verb": "POST", "resource": "bindings"}, + metrics[0].Tags()) + +} + +func TestParseLineInvalidPrometheus(t *testing.T) { + parser := PrometheusParser{} + metric, err := parser.ParseLine(validUniqueLine) + assert.NotNil(t, err) + assert.Nil(t, metric) + +} diff --git a/plugins/inputs/prometheus/prometheus.go b/plugins/inputs/prometheus/prometheus.go index 188e6b914..1c60a363e 100644 --- a/plugins/inputs/prometheus/prometheus.go +++ b/plugins/inputs/prometheus/prometheus.go @@ -1,31 +1,42 @@ package prometheus import ( + "crypto/tls" "errors" "fmt" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" - "github.com/prometheus/common/expfmt" - "github.com/prometheus/common/model" - "io" + "io/ioutil" + "net" "net/http" "sync" + "time" ) type Prometheus struct { Urls []string + + // Use SSL but skip chain & host verification + InsecureSkipVerify bool + // Bearer Token authorization file path + BearerToken string `toml:"bearer_token"` } var sampleConfig = ` ## An array of urls to scrape metrics from. urls = ["http://localhost:9100/metrics"] + + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false + ## Use bearer token for authorization + # bearer_token = /path/to/bearer/token ` -func (r *Prometheus) SampleConfig() string { +func (p *Prometheus) SampleConfig() string { return sampleConfig } -func (r *Prometheus) Description() string { +func (p *Prometheus) Description() string { return "Read metrics from one or many prometheus clients" } @@ -33,16 +44,16 @@ var ErrProtocolError = errors.New("prometheus protocol error") // Reads stats from all configured servers accumulates stats. // Returns one of the errors encountered while gather stats (if any). -func (g *Prometheus) Gather(acc telegraf.Accumulator) error { +func (p *Prometheus) Gather(acc telegraf.Accumulator) error { var wg sync.WaitGroup var outerr error - for _, serv := range g.Urls { + for _, serv := range p.Urls { wg.Add(1) go func(serv string) { defer wg.Done() - outerr = g.gatherURL(serv, acc) + outerr = p.gatherURL(serv, acc) }(serv) } @@ -51,8 +62,43 @@ func (g *Prometheus) Gather(acc telegraf.Accumulator) error { return outerr } -func (g *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error { - resp, err := http.Get(url) +var tr = &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), +} + +var client = &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), +} + +func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error { + collectDate := time.Now() + var req, err = http.NewRequest("GET", url, nil) + req.Header = make(http.Header) + var token []byte + var resp *http.Response + + var rt http.RoundTripper = &http.Transport{ + Dial: (&net.Dialer{ + Timeout: 5 * time.Second, + KeepAlive: 30 * time.Second, + }).Dial, + TLSHandshakeTimeout: 5 * time.Second, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: p.InsecureSkipVerify, + }, + ResponseHeaderTimeout: time.Duration(3 * time.Second), + } + + if p.BearerToken != "" { + token, err = ioutil.ReadFile(p.BearerToken) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+string(token)) + } + + resp, err = rt.RoundTrip(req) if err != nil { return fmt.Errorf("error making HTTP request to %s: %s", url, err) } @@ -60,38 +106,33 @@ func (g *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error { if resp.StatusCode != http.StatusOK { return fmt.Errorf("%s returned HTTP status %s", url, resp.Status) } - format := expfmt.ResponseFormat(resp.Header) - decoder := expfmt.NewDecoder(resp.Body, format) - - options := &expfmt.DecodeOptions{ - Timestamp: model.Now(), - } - sampleDecoder := &expfmt.SampleDecoder{ - Dec: decoder, - Opts: options, + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("error reading body: %s", err) } - for { - var samples model.Vector - err := sampleDecoder.Decode(&samples) - if err == io.EOF { - break - } else if err != nil { - return fmt.Errorf("error getting processing samples for %s: %s", - url, err) - } - for _, sample := range samples { - tags := make(map[string]string) - for key, value := range sample.Metric { - if key == model.MetricNameLabel { - continue - } - tags[string(key)] = string(value) - } - acc.Add("prometheus_"+string(sample.Metric[model.MetricNameLabel]), - float64(sample.Value), tags) - } + // Headers + headers := make(map[string]string) + for key, value := range headers { + headers[key] = value + } + + // Prepare Prometheus parser config + promparser := PrometheusParser{ + PromFormat: headers, + } + + metrics, err := promparser.Parse(body) + if err != nil { + return fmt.Errorf("error getting processing samples for %s: %s", + url, err) + } + // Add (or not) collected metrics + for _, metric := range metrics { + tags := metric.Tags() + tags["url"] = url + acc.AddFields(metric.Name(), metric.Fields(), tags, collectDate) } return nil diff --git a/plugins/inputs/prometheus/prometheus_test.go b/plugins/inputs/prometheus/prometheus_test.go index 2009cbb11..8a8fea9e3 100644 --- a/plugins/inputs/prometheus/prometheus_test.go +++ b/plugins/inputs/prometheus/prometheus_test.go @@ -40,16 +40,6 @@ func TestPrometheusGeneratesMetrics(t *testing.T) { err := p.Gather(&acc) require.NoError(t, err) - expected := []struct { - name string - value float64 - tags map[string]string - }{ - {"prometheus_go_gc_duration_seconds_count", 7, map[string]string{}}, - {"prometheus_go_goroutines", 15, map[string]string{}}, - } - - for _, e := range expected { - assert.True(t, acc.HasFloatField(e.name, "value")) - } + assert.True(t, acc.HasFloatField("go_gc_duration_seconds", "count")) + assert.True(t, acc.HasFloatField("go_goroutines", "gauge")) } diff --git a/plugins/inputs/rabbitmq/rabbitmq.go b/plugins/inputs/rabbitmq/rabbitmq.go index e51d65e15..4d119282d 100644 --- a/plugins/inputs/rabbitmq/rabbitmq.go +++ b/plugins/inputs/rabbitmq/rabbitmq.go @@ -122,7 +122,11 @@ func (r *RabbitMQ) Description() string { func (r *RabbitMQ) Gather(acc telegraf.Accumulator) error { if r.Client == nil { - r.Client = &http.Client{} + tr := &http.Transport{ResponseHeaderTimeout: time.Duration(3 * time.Second)} + r.Client = &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), + } } var errChan = make(chan error, len(gatherFunctions)) diff --git a/plugins/inputs/raindrops/raindrops.go b/plugins/inputs/raindrops/raindrops.go index fed22b693..6851f5d93 100644 --- a/plugins/inputs/raindrops/raindrops.go +++ b/plugins/inputs/raindrops/raindrops.go @@ -177,8 +177,11 @@ func (r *Raindrops) getTags(addr *url.URL) map[string]string { func init() { inputs.Add("raindrops", func() telegraf.Input { - return &Raindrops{http_client: &http.Client{Transport: &http.Transport{ - ResponseHeaderTimeout: time.Duration(3 * time.Second), - }}} + return &Raindrops{http_client: &http.Client{ + Transport: &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), + }, + Timeout: time.Duration(4 * time.Second), + }} }) } diff --git a/plugins/inputs/redis/README.md b/plugins/inputs/redis/README.md new file mode 100644 index 000000000..1cbaea0ca --- /dev/null +++ b/plugins/inputs/redis/README.md @@ -0,0 +1,86 @@ +# Telegraf Plugin: Redis + +### Configuration: + +``` +# Read Redis's basic status information +[[inputs.redis]] + ## specify servers via a url matching: + ## [protocol://][:password]@address[:port] + ## e.g. + ## tcp://localhost:6379 + ## tcp://:password@192.168.99.100 + ## + ## If no servers are specified, then localhost is used as the host. + ## If no port is specified, 6379 is used + servers = ["tcp://localhost:6379"] +``` + +### Measurements & Fields: + +- Measurement + - uptime_in_seconds + - connected_clients + - used_memory + - used_memory_rss + - used_memory_peak + - used_memory_lua + - rdb_changes_since_last_save + - total_connections_received + - total_commands_processed + - instantaneous_ops_per_sec + - instantaneous_input_kbps + - instantaneous_output_kbps + - sync_full + - sync_partial_ok + - sync_partial_err + - expired_keys + - evicted_keys + - keyspace_hits + - keyspace_misses + - pubsub_channels + - pubsub_patterns + - latest_fork_usec + - connected_slaves + - master_repl_offset + - repl_backlog_active + - repl_backlog_size + - repl_backlog_histlen + - mem_fragmentation_ratio + - used_cpu_sys + - used_cpu_user + - used_cpu_sys_children + - used_cpu_user_children + +### Tags: + +- All measurements have the following tags: + - port + - server + +### Example Output: + +Using this configuration: +``` +[[inputs.redis]] + ## specify servers via a url matching: + ## [protocol://][:password]@address[:port] + ## e.g. + ## tcp://localhost:6379 + ## tcp://:password@192.168.99.100 + ## + ## If no servers are specified, then localhost is used as the host. + ## If no port is specified, 6379 is used + servers = ["tcp://localhost:6379"] +``` + +When run with: +``` +./telegraf -config telegraf.conf -input-filter redis -test +``` + +It produces: +``` +* Plugin: redis, Collection 1 +> redis,port=6379,server=localhost clients=1i,connected_slaves=0i,evicted_keys=0i,expired_keys=0i,instantaneous_ops_per_sec=0i,keyspace_hitrate=0,keyspace_hits=0i,keyspace_misses=2i,latest_fork_usec=0i,master_repl_offset=0i,mem_fragmentation_ratio=3.58,pubsub_channels=0i,pubsub_patterns=0i,rdb_changes_since_last_save=0i,repl_backlog_active=0i,repl_backlog_histlen=0i,repl_backlog_size=1048576i,sync_full=0i,sync_partial_err=0i,sync_partial_ok=0i,total_commands_processed=4i,total_connections_received=2i,uptime=869i,used_cpu_sys=0.07,used_cpu_sys_children=0,used_cpu_user=0.1,used_cpu_user_children=0,used_memory=502048i,used_memory_lua=33792i,used_memory_peak=501128i,used_memory_rss=1798144i 1457052084987848383 +``` diff --git a/plugins/inputs/redis/redis.go b/plugins/inputs/redis/redis.go index b8862f6bc..859b23a22 100644 --- a/plugins/inputs/redis/redis.go +++ b/plugins/inputs/redis/redis.go @@ -9,6 +9,7 @@ import ( "strconv" "strings" "sync" + "time" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" @@ -30,6 +31,8 @@ var sampleConfig = ` servers = ["tcp://localhost:6379"] ` +var defaultTimeout = 5 * time.Second + func (r *Redis) SampleConfig() string { return sampleConfig } @@ -120,12 +123,15 @@ func (r *Redis) gatherServer(addr *url.URL, acc telegraf.Accumulator) error { addr.Host = addr.Host + ":" + defaultPort } - c, err := net.Dial("tcp", addr.Host) + c, err := net.DialTimeout("tcp", addr.Host, defaultTimeout) if err != nil { return fmt.Errorf("Unable to connect to redis server '%s': %s", addr.Host, err) } defer c.Close() + // Extend connection + c.SetDeadline(time.Now().Add(defaultTimeout)) + if addr.User != nil { pwd, set := addr.User.Password() if set && pwd != "" { diff --git a/plugins/inputs/riak/riak.go b/plugins/inputs/riak/riak.go index 6750c75a0..56231176b 100644 --- a/plugins/inputs/riak/riak.go +++ b/plugins/inputs/riak/riak.go @@ -5,6 +5,7 @@ import ( "fmt" "net/http" "net/url" + "time" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" @@ -20,7 +21,12 @@ type Riak struct { // NewRiak return a new instance of Riak with a default http client func NewRiak() *Riak { - return &Riak{client: http.DefaultClient} + tr := &http.Transport{ResponseHeaderTimeout: time.Duration(3 * time.Second)} + client := &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), + } + return &Riak{client: client} } // Type riakStats represents the data that is received from Riak diff --git a/plugins/inputs/sensors/sensors.go b/plugins/inputs/sensors/sensors.go index b2c2919cc..dbb304b71 100644 --- a/plugins/inputs/sensors/sensors.go +++ b/plugins/inputs/sensors/sensors.go @@ -49,7 +49,7 @@ func (s *Sensors) Gather(acc telegraf.Accumulator) error { var found bool for _, sensor := range s.Sensors { - parts := strings.SplitN(":", sensor, 2) + parts := strings.SplitN(sensor, ":", 2) if parts[0] == chipName { if parts[1] == "*" || parts[1] == featureLabel { diff --git a/plugins/inputs/snmp/README.md b/plugins/inputs/snmp/README.md new file mode 100644 index 000000000..bee783228 --- /dev/null +++ b/plugins/inputs/snmp/README.md @@ -0,0 +1,549 @@ +# SNMP Input Plugin + +The SNMP input plugin gathers metrics from SNMP agents + +### Configuration: + + +#### Very simple example + +In this example, the plugin will gather value of OIDS: + + - `.1.3.6.1.2.1.2.2.1.4.1` + +```toml +# Very Simple Example +[[inputs.snmp]] + + [[inputs.snmp.host]] + address = "127.0.0.1:161" + # SNMP community + community = "public" # default public + # SNMP version (1, 2 or 3) + # Version 3 not supported yet + version = 2 # default 2 + # Simple list of OIDs to get, in addition to "collect" + get_oids = [".1.3.6.1.2.1.2.2.1.4.1"] +``` + + +#### Simple example + +In this example, Telegraf gathers value of OIDS: + + - named **ifnumber** + - named **interface_speed** + +With **inputs.snmp.get** section the plugin gets the oid number: + + - **ifnumber** => `.1.3.6.1.2.1.2.1.0` + - **interface_speed** => *ifSpeed* + +As you can see *ifSpeed* is not a valid OID. In order to get +the valid OID, the plugin uses `snmptranslate_file` to match the OID: + + - **ifnumber** => `.1.3.6.1.2.1.2.1.0` + - **interface_speed** => *ifSpeed* => `.1.3.6.1.2.1.2.2.1.5` + +Also as the plugin will append `instance` to the corresponding OID: + + - **ifnumber** => `.1.3.6.1.2.1.2.1.0` + - **interface_speed** => *ifSpeed* => `.1.3.6.1.2.1.2.2.1.5.1` + +In this example, the plugin will gather value of OIDS: + +- `.1.3.6.1.2.1.2.1.0` +- `.1.3.6.1.2.1.2.2.1.5.1` + + +```toml +# Simple example +[[inputs.snmp]] + ## Use 'oids.txt' file to translate oids to names + ## To generate 'oids.txt' you need to run: + ## snmptranslate -m all -Tz -On | sed -e 's/"//g' > /tmp/oids.txt + ## Or if you have an other MIB folder with custom MIBs + ## snmptranslate -M /mycustommibfolder -Tz -On -m all | sed -e 's/"//g' > oids.txt + snmptranslate_file = "/tmp/oids.txt" + [[inputs.snmp.host]] + address = "127.0.0.1:161" + # SNMP community + community = "public" # default public + # SNMP version (1, 2 or 3) + # Version 3 not supported yet + version = 2 # default 2 + # Which get/bulk do you want to collect for this host + collect = ["ifnumber", "interface_speed"] + + [[inputs.snmp.get]] + name = "ifnumber" + oid = ".1.3.6.1.2.1.2.1.0" + + [[inputs.snmp.get]] + name = "interface_speed" + oid = "ifSpeed" + instance = "1" + +``` + + +#### Simple bulk example + +In this example, Telegraf gathers value of OIDS: + + - named **ifnumber** + - named **interface_speed** + - named **if_out_octets** + +With **inputs.snmp.get** section the plugin gets oid number: + + - **ifnumber** => `.1.3.6.1.2.1.2.1.0` + - **interface_speed** => *ifSpeed* + +With **inputs.snmp.bulk** section the plugin gets the oid number: + + - **if_out_octets** => *ifOutOctets* + +As you can see *ifSpeed* and *ifOutOctets* are not a valid OID. +In order to get the valid OID, the plugin uses `snmptranslate_file` +to match the OID: + + - **ifnumber** => `.1.3.6.1.2.1.2.1.0` + - **interface_speed** => *ifSpeed* => `.1.3.6.1.2.1.2.2.1.5` + - **if_out_octets** => *ifOutOctets* => `.1.3.6.1.2.1.2.2.1.16` + +Also, the plugin will append `instance` to the corresponding OID: + + - **ifnumber** => `.1.3.6.1.2.1.2.1.0` + - **interface_speed** => *ifSpeed* => `.1.3.6.1.2.1.2.2.1.5.1` + +And **if_out_octets** is a bulk request, the plugin will gathers all +OIDS in the table. + +- `.1.3.6.1.2.1.2.2.1.16.1` +- `.1.3.6.1.2.1.2.2.1.16.2` +- `.1.3.6.1.2.1.2.2.1.16.3` +- `.1.3.6.1.2.1.2.2.1.16.4` +- `.1.3.6.1.2.1.2.2.1.16.5` +- `...` + +In this example, the plugin will gather value of OIDS: + +- `.1.3.6.1.2.1.2.1.0` +- `.1.3.6.1.2.1.2.2.1.5.1` +- `.1.3.6.1.2.1.2.2.1.16.1` +- `.1.3.6.1.2.1.2.2.1.16.2` +- `.1.3.6.1.2.1.2.2.1.16.3` +- `.1.3.6.1.2.1.2.2.1.16.4` +- `.1.3.6.1.2.1.2.2.1.16.5` +- `...` + + +```toml +# Simple bulk example +[[inputs.snmp]] + ## Use 'oids.txt' file to translate oids to names + ## To generate 'oids.txt' you need to run: + ## snmptranslate -m all -Tz -On | sed -e 's/"//g' > /tmp/oids.txt + ## Or if you have an other MIB folder with custom MIBs + ## snmptranslate -M /mycustommibfolder -Tz -On -m all | sed -e 's/"//g' > oids.txt + snmptranslate_file = "/tmp/oids.txt" + [[inputs.snmp.host]] + address = "127.0.0.1:161" + # SNMP community + community = "public" # default public + # SNMP version (1, 2 or 3) + # Version 3 not supported yet + version = 2 # default 2 + # Which get/bulk do you want to collect for this host + collect = ["interface_speed", "if_number", "if_out_octets"] + + [[inputs.snmp.get]] + name = "interface_speed" + oid = "ifSpeed" + instance = "1" + + [[inputs.snmp.get]] + name = "if_number" + oid = "ifNumber" + + [[inputs.snmp.bulk]] + name = "if_out_octets" + oid = "ifOutOctets" +``` + + +#### Table example + +In this example, we remove collect attribute to the host section, +but you can still use it in combination of the following part. + +Note: This example is like a bulk request a but using an +other configuration + +Telegraf gathers value of OIDS of the table: + + - named **iftable1** + +With **inputs.snmp.table** section the plugin gets oid number: + + - **iftable1** => `.1.3.6.1.2.1.31.1.1.1` + +Also **iftable1** is a table, the plugin will gathers all +OIDS in the table and in the subtables + +- `.1.3.6.1.2.1.31.1.1.1.1` +- `.1.3.6.1.2.1.31.1.1.1.1.1` +- `.1.3.6.1.2.1.31.1.1.1.1.2` +- `.1.3.6.1.2.1.31.1.1.1.1.3` +- `.1.3.6.1.2.1.31.1.1.1.1.4` +- `.1.3.6.1.2.1.31.1.1.1.1....` +- `.1.3.6.1.2.1.31.1.1.1.2` +- `.1.3.6.1.2.1.31.1.1.1.2....` +- `.1.3.6.1.2.1.31.1.1.1.3` +- `.1.3.6.1.2.1.31.1.1.1.3....` +- `.1.3.6.1.2.1.31.1.1.1.4` +- `.1.3.6.1.2.1.31.1.1.1.4....` +- `.1.3.6.1.2.1.31.1.1.1.5` +- `.1.3.6.1.2.1.31.1.1.1.5....` +- `.1.3.6.1.2.1.31.1.1.1.6....` +- `...` + +```toml +# Table example +[[inputs.snmp]] + ## Use 'oids.txt' file to translate oids to names + ## To generate 'oids.txt' you need to run: + ## snmptranslate -m all -Tz -On | sed -e 's/"//g' > /tmp/oids.txt + ## Or if you have an other MIB folder with custom MIBs + ## snmptranslate -M /mycustommibfolder -Tz -On -m all | sed -e 's/"//g' > oids.txt + snmptranslate_file = "/tmp/oids.txt" + [[inputs.snmp.host]] + address = "127.0.0.1:161" + # SNMP community + community = "public" # default public + # SNMP version (1, 2 or 3) + # Version 3 not supported yet + version = 2 # default 2 + # Which get/bulk do you want to collect for this host + # Which table do you want to collect + [[inputs.snmp.host.table]] + name = "iftable1" + + # table without mapping neither subtables + # This is like bulk request + [[inputs.snmp.table]] + name = "iftable1" + oid = ".1.3.6.1.2.1.31.1.1.1" +``` + + +#### Table with subtable example + +In this example, we remove collect attribute to the host section, +but you can still use it in combination of the following part. + +Note: This example is like a bulk request a but using an +other configuration + +Telegraf gathers value of OIDS of the table: + + - named **iftable2** + +With **inputs.snmp.table** section *AND* **sub_tables** attribute, +the plugin will get OIDS from subtables: + + - **iftable2** => `.1.3.6.1.2.1.2.2.1.13` + +Also **iftable2** is a table, the plugin will gathers all +OIDS in subtables: + +- `.1.3.6.1.2.1.2.2.1.13.1` +- `.1.3.6.1.2.1.2.2.1.13.2` +- `.1.3.6.1.2.1.2.2.1.13.3` +- `.1.3.6.1.2.1.2.2.1.13.4` +- `.1.3.6.1.2.1.2.2.1.13....` + + +```toml +# Table with subtable example +[[inputs.snmp]] + ## Use 'oids.txt' file to translate oids to names + ## To generate 'oids.txt' you need to run: + ## snmptranslate -m all -Tz -On | sed -e 's/"//g' > /tmp/oids.txt + ## Or if you have an other MIB folder with custom MIBs + ## snmptranslate -M /mycustommibfolder -Tz -On -m all | sed -e 's/"//g' > oids.txt + snmptranslate_file = "/tmp/oids.txt" + [[inputs.snmp.host]] + address = "127.0.0.1:161" + # SNMP community + community = "public" # default public + # SNMP version (1, 2 or 3) + # Version 3 not supported yet + version = 2 # default 2 + # Which table do you want to collect + [[inputs.snmp.host.table]] + name = "iftable2" + + # table without mapping but with subtables + [[inputs.snmp.table]] + name = "iftable2" + sub_tables = [".1.3.6.1.2.1.2.2.1.13"] + # note + # oid attribute is useless +``` + + +#### Table with mapping example + +In this example, we remove collect attribute to the host section, +but you can still use it in combination of the following part. + +Telegraf gathers value of OIDS of the table: + + - named **iftable3** + +With **inputs.snmp.table** section the plugin gets oid number: + + - **iftable3** => `.1.3.6.1.2.1.31.1.1.1` + +Also **iftable2** is a table, the plugin will gathers all +OIDS in the table and in the subtables + +- `.1.3.6.1.2.1.31.1.1.1.1` +- `.1.3.6.1.2.1.31.1.1.1.1.1` +- `.1.3.6.1.2.1.31.1.1.1.1.2` +- `.1.3.6.1.2.1.31.1.1.1.1.3` +- `.1.3.6.1.2.1.31.1.1.1.1.4` +- `.1.3.6.1.2.1.31.1.1.1.1....` +- `.1.3.6.1.2.1.31.1.1.1.2` +- `.1.3.6.1.2.1.31.1.1.1.2....` +- `.1.3.6.1.2.1.31.1.1.1.3` +- `.1.3.6.1.2.1.31.1.1.1.3....` +- `.1.3.6.1.2.1.31.1.1.1.4` +- `.1.3.6.1.2.1.31.1.1.1.4....` +- `.1.3.6.1.2.1.31.1.1.1.5` +- `.1.3.6.1.2.1.31.1.1.1.5....` +- `.1.3.6.1.2.1.31.1.1.1.6....` +- `...` + +But the **include_instances** attribute will filter which OIDS +will be gathered; As you see, there is an other attribute, `mapping_table`. +`include_instances` and `mapping_table` permit to build a hash table +to filter only OIDS you want. +Let's say, we have the following data on SNMP server: + - OID: `.1.3.6.1.2.1.31.1.1.1.1.1` has as value: `enp5s0` + - OID: `.1.3.6.1.2.1.31.1.1.1.1.2` has as value: `enp5s1` + - OID: `.1.3.6.1.2.1.31.1.1.1.1.3` has as value: `enp5s2` + - OID: `.1.3.6.1.2.1.31.1.1.1.1.4` has as value: `eth0` + - OID: `.1.3.6.1.2.1.31.1.1.1.1.5` has as value: `eth1` + +The plugin will build the following hash table: + +| instance name | instance id | +|---------------|-------------| +| `enp5s0` | `1` | +| `enp5s1` | `2` | +| `enp5s2` | `3` | +| `eth0` | `4` | +| `eth1` | `5` | + +With the **include_instances** attribute, the plugin will gather +the following OIDS: + +- `.1.3.6.1.2.1.31.1.1.1.1.1` +- `.1.3.6.1.2.1.31.1.1.1.1.5` +- `.1.3.6.1.2.1.31.1.1.1.2.1` +- `.1.3.6.1.2.1.31.1.1.1.2.5` +- `.1.3.6.1.2.1.31.1.1.1.3.1` +- `.1.3.6.1.2.1.31.1.1.1.3.5` +- `.1.3.6.1.2.1.31.1.1.1.4.1` +- `.1.3.6.1.2.1.31.1.1.1.4.5` +- `.1.3.6.1.2.1.31.1.1.1.5.1` +- `.1.3.6.1.2.1.31.1.1.1.5.5` +- `.1.3.6.1.2.1.31.1.1.1.6.1` +- `.1.3.6.1.2.1.31.1.1.1.6.5` +- `...` + +Note: the plugin will add instance name as tag *instance* + +```toml +# Simple table with mapping example +[[inputs.snmp]] + ## Use 'oids.txt' file to translate oids to names + ## To generate 'oids.txt' you need to run: + ## snmptranslate -m all -Tz -On | sed -e 's/"//g' > /tmp/oids.txt + ## Or if you have an other MIB folder with custom MIBs + ## snmptranslate -M /mycustommibfolder -Tz -On -m all | sed -e 's/"//g' > oids.txt + snmptranslate_file = "/tmp/oids.txt" + [[inputs.snmp.host]] + address = "127.0.0.1:161" + # SNMP community + community = "public" # default public + # SNMP version (1, 2 or 3) + # Version 3 not supported yet + version = 2 # default 2 + # Which table do you want to collect + [[inputs.snmp.host.table]] + name = "iftable3" + include_instances = ["enp5s0", "eth1"] + + # table with mapping but without subtables + [[inputs.snmp.table]] + name = "iftable3" + oid = ".1.3.6.1.2.1.31.1.1.1" + # if empty. get all instances + mapping_table = ".1.3.6.1.2.1.31.1.1.1.1" + # if empty, get all subtables +``` + + +#### Table with both mapping and subtable example + +In this example, we remove collect attribute to the host section, +but you can still use it in combination of the following part. + +Telegraf gathers value of OIDS of the table: + + - named **iftable4** + +With **inputs.snmp.table** section *AND* **sub_tables** attribute, +the plugin will get OIDS from subtables: + + - **iftable4** => `.1.3.6.1.2.1.31.1.1.1` + +Also **iftable2** is a table, the plugin will gathers all +OIDS in the table and in the subtables + +- `.1.3.6.1.2.1.31.1.1.1.6.1 +- `.1.3.6.1.2.1.31.1.1.1.6.2` +- `.1.3.6.1.2.1.31.1.1.1.6.3` +- `.1.3.6.1.2.1.31.1.1.1.6.4` +- `.1.3.6.1.2.1.31.1.1.1.6....` +- `.1.3.6.1.2.1.31.1.1.1.10.1` +- `.1.3.6.1.2.1.31.1.1.1.10.2` +- `.1.3.6.1.2.1.31.1.1.1.10.3` +- `.1.3.6.1.2.1.31.1.1.1.10.4` +- `.1.3.6.1.2.1.31.1.1.1.10....` + +But the **include_instances** attribute will filter which OIDS +will be gathered; As you see, there is an other attribute, `mapping_table`. +`include_instances` and `mapping_table` permit to build a hash table +to filter only OIDS you want. +Let's say, we have the following data on SNMP server: + - OID: `.1.3.6.1.2.1.31.1.1.1.1.1` has as value: `enp5s0` + - OID: `.1.3.6.1.2.1.31.1.1.1.1.2` has as value: `enp5s1` + - OID: `.1.3.6.1.2.1.31.1.1.1.1.3` has as value: `enp5s2` + - OID: `.1.3.6.1.2.1.31.1.1.1.1.4` has as value: `eth0` + - OID: `.1.3.6.1.2.1.31.1.1.1.1.5` has as value: `eth1` + +The plugin will build the following hash table: + +| instance name | instance id | +|---------------|-------------| +| `enp5s0` | `1` | +| `enp5s1` | `2` | +| `enp5s2` | `3` | +| `eth0` | `4` | +| `eth1` | `5` | + +With the **include_instances** attribute, the plugin will gather +the following OIDS: + +- `.1.3.6.1.2.1.31.1.1.1.6.1` +- `.1.3.6.1.2.1.31.1.1.1.6.5` +- `.1.3.6.1.2.1.31.1.1.1.10.1` +- `.1.3.6.1.2.1.31.1.1.1.10.5` + +Note: the plugin will add instance name as tag *instance* + + + +```toml +# Table with both mapping and subtable example +[[inputs.snmp]] + ## Use 'oids.txt' file to translate oids to names + ## To generate 'oids.txt' you need to run: + ## snmptranslate -m all -Tz -On | sed -e 's/"//g' > /tmp/oids.txt + ## Or if you have an other MIB folder with custom MIBs + ## snmptranslate -M /mycustommibfolder -Tz -On -m all | sed -e 's/"//g' > oids.txt + snmptranslate_file = "/tmp/oids.txt" + [[inputs.snmp.host]] + address = "127.0.0.1:161" + # SNMP community + community = "public" # default public + # SNMP version (1, 2 or 3) + # Version 3 not supported yet + version = 2 # default 2 + # Which table do you want to collect + [[inputs.snmp.host.table]] + name = "iftable4" + include_instances = ["enp5s0", "eth1"] + + # table with both mapping and subtables + [[inputs.snmp.table]] + name = "iftable4" + # if empty get all instances + mapping_table = ".1.3.6.1.2.1.31.1.1.1.1" + # if empty get all subtables + # sub_tables could be not "real subtables" + sub_tables=[".1.3.6.1.2.1.2.2.1.13", "bytes_recv", "bytes_send"] + # note + # oid attribute is useless + + # SNMP SUBTABLES + [[inputs.snmp.subtable]] + name = "bytes_recv" + oid = ".1.3.6.1.2.1.31.1.1.1.6" + unit = "octets" + + [[inputs.snmp.subtable]] + name = "bytes_send" + oid = ".1.3.6.1.2.1.31.1.1.1.10" + unit = "octets" +``` + +#### Configuration notes + +- In **inputs.snmp.table** section, the `oid` attribute is useless if + the `sub_tables` attributes is defined + +- In **inputs.snmp.subtable** section, you can put a name from `snmptranslate_file` + as `oid` attribute instead of a valid OID + +### Measurements & Fields: + +With the last example (Table with both mapping and subtable example): + +- ifHCOutOctets + - ifHCOutOctets +- ifInDiscards + - ifInDiscards +- ifHCInOctets + - ifHCInOctets + +### Tags: + +With the last example (Table with both mapping and subtable example): + +- ifHCOutOctets + - host + - instance + - unit +- ifInDiscards + - host + - instance +- ifHCInOctets + - host + - instance + - unit + +### Example Output: + +With the last example (Table with both mapping and subtable example): + +``` +ifHCOutOctets,host=127.0.0.1,instance=enp5s0,unit=octets ifHCOutOctets=10565628i 1456878706044462901 +ifInDiscards,host=127.0.0.1,instance=enp5s0 ifInDiscards=0i 1456878706044510264 +ifHCInOctets,host=127.0.0.1,instance=enp5s0,unit=octets ifHCInOctets=76351777i 1456878706044531312 +``` diff --git a/plugins/inputs/snmp/snmp.go b/plugins/inputs/snmp/snmp.go index 371bc2ad9..4c2de93c9 100644 --- a/plugins/inputs/snmp/snmp.go +++ b/plugins/inputs/snmp/snmp.go @@ -4,7 +4,6 @@ import ( "io/ioutil" "log" "net" - "regexp" "strconv" "strings" "time" @@ -20,7 +19,16 @@ type Snmp struct { Host []Host Get []Data Bulk []Data + Table []Table + Subtable []Subtable SnmptranslateFile string + + nameToOid map[string]string + initNode Node + subTableMap map[string]Subtable + + // TODO change as unexportable + //OidInstanceMapping map[string]map[string]string } type Host struct { @@ -36,9 +44,54 @@ type Host struct { Collect []string // easy get oids GetOids []string + // Table + Table []HostTable // Oids getOids []Data bulkOids []Data + tables []HostTable + // array of processed oids + // to skip oid duplication + processedOids []string +} + +type Table struct { + // name = "iftable" + Name string + // oid = ".1.3.6.1.2.1.31.1.1.1" + Oid string + //if empty get all instances + //mapping_table = ".1.3.6.1.2.1.31.1.1.1.1" + MappingTable string + // if empty get all subtables + // sub_tables could be not "real subtables" + //sub_tables=[".1.3.6.1.2.1.2.2.1.13", "bytes_recv", "bytes_send"] + SubTables []string +} + +type HostTable struct { + // name = "iftable" + Name string + // Includes only these instances + // include_instances = ["eth0", "eth1"] + IncludeInstances []string + // Excludes only these instances + // exclude_instances = ["eth20", "eth21"] + ExcludeInstances []string + // From Table struct + oid string + mappingTable string + subTables []string +} + +// TODO find better names +type Subtable struct { + //name = "bytes_send" + Name string + //oid = ".1.3.6.1.2.1.31.1.1.1.10" + Oid string + //unit = "octets" + Unit string } type Data struct { @@ -63,13 +116,8 @@ type Node struct { subnodes map[string]Node } -var initNode = Node{ - id: "1", - name: "", - subnodes: make(map[string]Node), -} - -var NameToOid = make(map[string]string) +// TODO move this var to snmp struct +var OidInstanceMapping = make(map[string]map[string]string) var sampleConfig = ` ## Use 'oids.txt' file to translate oids to names @@ -113,7 +161,7 @@ var sampleConfig = ` [[inputs.snmp.get]] name = "interface_speed" oid = "ifSpeed" - instance = 0 + instance = "0" [[inputs.snmp.get]] name = "sysuptime" @@ -129,6 +177,49 @@ var sampleConfig = ` name = "ifoutoctets" max_repetition = 127 oid = "ifOutOctets" + + [[inputs.snmp.host]] + address = "192.168.2.13:161" + #address = "127.0.0.1:161" + community = "public" + version = 2 + timeout = 2.0 + retries = 2 + #collect = ["mybulk", "sysservices", "sysdescr", "systype"] + collect = ["sysuptime" ] + [[inputs.snmp.host.table]] + name = "iftable3" + include_instances = ["enp5s0", "eth1"] + + # SNMP TABLEs + # table without mapping neither subtables + [[inputs.snmp.table]] + name = "iftable1" + oid = ".1.3.6.1.2.1.31.1.1.1" + + # table without mapping but with subtables + [[inputs.snmp.table]] + name = "iftable2" + oid = ".1.3.6.1.2.1.31.1.1.1" + sub_tables = [".1.3.6.1.2.1.2.2.1.13"] + + # table with mapping but without subtables + [[inputs.snmp.table]] + name = "iftable3" + oid = ".1.3.6.1.2.1.31.1.1.1" + # if empty. get all instances + mapping_table = ".1.3.6.1.2.1.31.1.1.1.1" + # if empty, get all subtables + + # table with both mapping and subtables + [[inputs.snmp.table]] + name = "iftable4" + oid = ".1.3.6.1.2.1.31.1.1.1" + # if empty get all instances + mapping_table = ".1.3.6.1.2.1.31.1.1.1.1" + # if empty get all subtables + # sub_tables could be not "real subtables" + sub_tables=[".1.3.6.1.2.1.2.2.1.13", "bytes_recv", "bytes_send"] ` // SampleConfig returns sample configuration message @@ -189,21 +280,36 @@ func findnodename(node Node, ids []string) (string, string) { } func (s *Snmp) Gather(acc telegraf.Accumulator) error { + // TODO put this in cache on first run + // Create subtables mapping + if len(s.subTableMap) == 0 { + s.subTableMap = make(map[string]Subtable) + for _, sb := range s.Subtable { + s.subTableMap[sb.Name] = sb + } + } + // TODO put this in cache on first run // Create oid tree - if s.SnmptranslateFile != "" && len(initNode.subnodes) == 0 { + if s.SnmptranslateFile != "" && len(s.initNode.subnodes) == 0 { + s.nameToOid = make(map[string]string) + s.initNode = Node{ + id: "1", + name: "", + subnodes: make(map[string]Node), + } + data, err := ioutil.ReadFile(s.SnmptranslateFile) if err != nil { log.Printf("Reading SNMPtranslate file error: %s", err) return err } else { for _, line := range strings.Split(string(data), "\n") { - oidsRegEx := regexp.MustCompile(`([^\t]*)\t*([^\t]*)`) - oids := oidsRegEx.FindStringSubmatch(string(line)) - if oids[2] != "" { - oid_name := oids[1] - oid := oids[2] - fillnode(initNode, oid_name, strings.Split(string(oid), ".")) - NameToOid[oid_name] = oid + oids := strings.Fields(string(line)) + if len(oids) == 2 && oids[1] != "" { + oid_name := oids[0] + oid := oids[1] + fillnode(s.initNode, oid_name, strings.Split(string(oid), ".")) + s.nameToOid[oid_name] = oid } } } @@ -227,7 +333,7 @@ func (s *Snmp) Gather(acc telegraf.Accumulator) error { // Get Easy GET oids for _, oidstring := range host.GetOids { oid := Data{} - if val, ok := NameToOid[oidstring]; ok { + if val, ok := s.nameToOid[oidstring]; ok { // TODO should we add the 0 instance ? oid.Name = oidstring oid.Oid = val @@ -248,7 +354,7 @@ func (s *Snmp) Gather(acc telegraf.Accumulator) error { // Get GET oids for _, oid := range s.Get { if oid.Name == oid_name { - if val, ok := NameToOid[oid.Oid]; ok { + if val, ok := s.nameToOid[oid.Oid]; ok { // TODO should we add the 0 instance ? if oid.Instance != "" { oid.rawOid = "." + val + "." + oid.Instance @@ -264,7 +370,7 @@ func (s *Snmp) Gather(acc telegraf.Accumulator) error { // Get GETBULK oids for _, oid := range s.Bulk { if oid.Name == oid_name { - if val, ok := NameToOid[oid.Oid]; ok { + if val, ok := s.nameToOid[oid.Oid]; ok { oid.rawOid = "." + val } else { oid.rawOid = oid.Oid @@ -273,18 +379,219 @@ func (s *Snmp) Gather(acc telegraf.Accumulator) error { } } } + // Table + for _, hostTable := range host.Table { + for _, snmpTable := range s.Table { + if hostTable.Name == snmpTable.Name { + table := hostTable + table.oid = snmpTable.Oid + table.mappingTable = snmpTable.MappingTable + table.subTables = snmpTable.SubTables + host.tables = append(host.tables, table) + } + } + } + // Launch Mapping + // TODO put this in cache on first run + // TODO save mapping and computed oids + // to do it only the first time + // only if len(s.OidInstanceMapping) == 0 + if len(OidInstanceMapping) >= 0 { + if err := host.SNMPMap(acc, s.nameToOid, s.subTableMap); err != nil { + return err + } + } // Launch Get requests - if err := host.SNMPGet(acc); err != nil { + if err := host.SNMPGet(acc, s.initNode); err != nil { return err } - if err := host.SNMPBulk(acc); err != nil { + if err := host.SNMPBulk(acc, s.initNode); err != nil { return err } } return nil } -func (h *Host) SNMPGet(acc telegraf.Accumulator) error { +func (h *Host) SNMPMap(acc telegraf.Accumulator, nameToOid map[string]string, subTableMap map[string]Subtable) error { + // Get snmp client + snmpClient, err := h.GetSNMPClient() + if err != nil { + return err + } + // Deconnection + defer snmpClient.Conn.Close() + // Prepare OIDs + for _, table := range h.tables { + // We don't have mapping + if table.mappingTable == "" { + if len(table.subTables) == 0 { + // If We don't have mapping table + // neither subtables list + // This is just a bulk request + oid := Data{} + oid.Oid = table.oid + if val, ok := nameToOid[oid.Oid]; ok { + oid.rawOid = "." + val + } else { + oid.rawOid = oid.Oid + } + h.bulkOids = append(h.bulkOids, oid) + } else { + // If We don't have mapping table + // but we have subtables + // This is a bunch of bulk requests + // For each subtable ... + for _, sb := range table.subTables { + // ... we create a new Data (oid) object + oid := Data{} + // Looking for more information about this subtable + ssb, exists := subTableMap[sb] + if exists { + // We found a subtable section in config files + oid.Oid = ssb.Oid + oid.rawOid = ssb.Oid + oid.Unit = ssb.Unit + } else { + // We did NOT find a subtable section in config files + oid.Oid = sb + oid.rawOid = sb + } + // TODO check oid validity + + // Add the new oid to getOids list + h.bulkOids = append(h.bulkOids, oid) + } + } + } else { + // We have a mapping table + // We need to query this table + // To get mapping between instance id + // and instance name + oid_asked := table.mappingTable + oid_next := oid_asked + need_more_requests := true + // Set max repetition + maxRepetition := uint8(32) + // Launch requests + for need_more_requests { + // Launch request + result, err3 := snmpClient.GetBulk([]string{oid_next}, 0, maxRepetition) + if err3 != nil { + return err3 + } + + lastOid := "" + for _, variable := range result.Variables { + lastOid = variable.Name + if strings.HasPrefix(variable.Name, oid_asked) { + switch variable.Type { + // handle instance names + case gosnmp.OctetString: + // Check if instance is in includes instances + getInstances := true + if len(table.IncludeInstances) > 0 { + getInstances = false + for _, instance := range table.IncludeInstances { + if instance == string(variable.Value.([]byte)) { + getInstances = true + } + } + } + // Check if instance is in excludes instances + if len(table.ExcludeInstances) > 0 { + getInstances = true + for _, instance := range table.ExcludeInstances { + if instance == string(variable.Value.([]byte)) { + getInstances = false + } + } + } + // We don't want this instance + if !getInstances { + continue + } + + // remove oid table from the complete oid + // in order to get the current instance id + key := strings.Replace(variable.Name, oid_asked, "", 1) + + if len(table.subTables) == 0 { + // We have a mapping table + // but no subtables + // This is just a bulk request + + // Building mapping table + mapping := map[string]string{strings.Trim(key, "."): string(variable.Value.([]byte))} + _, exists := OidInstanceMapping[table.oid] + if exists { + OidInstanceMapping[table.oid][strings.Trim(key, ".")] = string(variable.Value.([]byte)) + } else { + OidInstanceMapping[table.oid] = mapping + } + + // Add table oid in bulk oid list + oid := Data{} + oid.Oid = table.oid + if val, ok := nameToOid[oid.Oid]; ok { + oid.rawOid = "." + val + } else { + oid.rawOid = oid.Oid + } + h.bulkOids = append(h.bulkOids, oid) + } else { + // We have a mapping table + // and some subtables + // This is a bunch of get requests + // This is the best case :) + + // For each subtable ... + for _, sb := range table.subTables { + // ... we create a new Data (oid) object + oid := Data{} + // Looking for more information about this subtable + ssb, exists := subTableMap[sb] + if exists { + // We found a subtable section in config files + oid.Oid = ssb.Oid + key + oid.rawOid = ssb.Oid + key + oid.Unit = ssb.Unit + oid.Instance = string(variable.Value.([]byte)) + } else { + // We did NOT find a subtable section in config files + oid.Oid = sb + key + oid.rawOid = sb + key + oid.Instance = string(variable.Value.([]byte)) + } + // TODO check oid validity + + // Add the new oid to getOids list + h.getOids = append(h.getOids, oid) + } + } + default: + } + } else { + break + } + } + // Determine if we need more requests + if strings.HasPrefix(lastOid, oid_asked) { + need_more_requests = true + oid_next = lastOid + } else { + need_more_requests = false + } + } + } + } + // Mapping finished + + // Create newoids based on mapping + + return nil +} + +func (h *Host) SNMPGet(acc telegraf.Accumulator, initNode Node) error { // Get snmp client snmpClient, err := h.GetSNMPClient() if err != nil { @@ -317,7 +624,7 @@ func (h *Host) SNMPGet(acc telegraf.Accumulator) error { return err3 } // Handle response - _, err = h.HandleResponse(oidsList, result, acc) + _, err = h.HandleResponse(oidsList, result, acc, initNode) if err != nil { return err } @@ -325,7 +632,7 @@ func (h *Host) SNMPGet(acc telegraf.Accumulator) error { return nil } -func (h *Host) SNMPBulk(acc telegraf.Accumulator) error { +func (h *Host) SNMPBulk(acc telegraf.Accumulator, initNode Node) error { // Get snmp client snmpClient, err := h.GetSNMPClient() if err != nil { @@ -360,7 +667,7 @@ func (h *Host) SNMPBulk(acc telegraf.Accumulator) error { return err3 } // Handle response - last_oid, err := h.HandleResponse(oidsList, result, acc) + last_oid, err := h.HandleResponse(oidsList, result, acc, initNode) if err != nil { return err } @@ -412,12 +719,19 @@ func (h *Host) GetSNMPClient() (*gosnmp.GoSNMP, error) { return snmpClient, nil } -func (h *Host) HandleResponse(oids map[string]Data, result *gosnmp.SnmpPacket, acc telegraf.Accumulator) (string, error) { +func (h *Host) HandleResponse(oids map[string]Data, result *gosnmp.SnmpPacket, acc telegraf.Accumulator, initNode Node) (string, error) { var lastOid string for _, variable := range result.Variables { lastOid = variable.Name - // Remove unwanted oid + nextresult: + // Get only oid wanted for oid_key, oid := range oids { + // Skip oids already processed + for _, processedOid := range h.processedOids { + if variable.Name == processedOid { + break nextresult + } + } if strings.HasPrefix(variable.Name, oid_key) { switch variable.Type { // handle Metrics @@ -431,11 +745,27 @@ func (h *Host) HandleResponse(oids map[string]Data, result *gosnmp.SnmpPacket, a // Get name and instance var oid_name string var instance string - // Get oidname and instannce from translate file + // Get oidname and instance from translate file oid_name, instance = findnodename(initNode, strings.Split(string(variable.Name[1:]), ".")) - - if instance != "" { + // Set instance tag + // From mapping table + mapping, inMappingNoSubTable := OidInstanceMapping[oid_key] + if inMappingNoSubTable { + // filter if the instance in not in + // OidInstanceMapping mapping map + if instance_name, exists := mapping[instance]; exists { + tags["instance"] = instance_name + } else { + continue + } + } else if oid.Instance != "" { + // From config files + tags["instance"] = oid.Instance + } else if instance != "" { + // Using last id of the current oid, ie: + // with .1.3.6.1.2.1.31.1.1.1.10.3 + // instance is 3 tags["instance"] = instance } @@ -453,6 +783,7 @@ func (h *Host) HandleResponse(oids map[string]Data, result *gosnmp.SnmpPacket, a fields := make(map[string]interface{}) fields[string(field_name)] = variable.Value + h.processedOids = append(h.processedOids, variable.Name) acc.AddFields(field_name, fields, tags) case gosnmp.NoSuchObject, gosnmp.NoSuchInstance: // Oid not found diff --git a/plugins/inputs/sqlserver/sqlserver.go b/plugins/inputs/sqlserver/sqlserver.go index 3b29a32c1..58d61705f 100644 --- a/plugins/inputs/sqlserver/sqlserver.go +++ b/plugins/inputs/sqlserver/sqlserver.go @@ -283,30 +283,75 @@ EXEC sp_executesql @DynamicPivotQuery; const sqlMemoryClerk string = `SET NOCOUNT ON; SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; -DECLARE @w TABLE (ClerkCategory nvarchar(64) NOT NULL, UsedPercent decimal(9,2), UsedBytes bigint) -INSERT @w (ClerkCategory, UsedPercent, UsedBytes) -SELECT ClerkCategory -, UsedPercent = SUM(UsedPercent) -, UsedBytes = SUM(UsedBytes) -FROM -( -SELECT ClerkCategory = CASE MC.[type] - WHEN 'MEMORYCLERK_SQLBUFFERPOOL' THEN 'Buffer pool' - WHEN 'CACHESTORE_SQLCP' THEN 'Cache (sql plans)' - WHEN 'CACHESTORE_OBJCP' THEN 'Cache (objects)' - ELSE 'Other' END -, SUM(pages_kb * 1024) AS UsedBytes -, Cast(100 * Sum(pages_kb)*1.0/(Select Sum(pages_kb) From sys.dm_os_memory_clerks) as Decimal(7, 4)) UsedPercent -FROM sys.dm_os_memory_clerks MC -WHERE pages_kb > 0 -GROUP BY CASE MC.[type] - WHEN 'MEMORYCLERK_SQLBUFFERPOOL' THEN 'Buffer pool' - WHEN 'CACHESTORE_SQLCP' THEN 'Cache (sql plans)' - WHEN 'CACHESTORE_OBJCP' THEN 'Cache (objects)' - ELSE 'Other' END -) as T -GROUP BY ClerkCategory +DECLARE @sqlVers numeric(4,2) +SELECT @sqlVers = LEFT(CAST(SERVERPROPERTY('productversion') as varchar), 4) +IF OBJECT_ID('tempdb..#clerk') IS NOT NULL + DROP TABLE #clerk; + +CREATE TABLE #clerk ( + ClerkCategory nvarchar(64) NOT NULL, + UsedPercent decimal(9,2), + UsedBytes bigint +); + +DECLARE @DynamicClerkQuery AS NVARCHAR(MAX) + +IF @sqlVers < 11 +BEGIN + SET @DynamicClerkQuery = N' + INSERT #clerk (ClerkCategory, UsedPercent, UsedBytes) + SELECT ClerkCategory + , UsedPercent = SUM(UsedPercent) + , UsedBytes = SUM(UsedBytes) + FROM + ( + SELECT ClerkCategory = CASE MC.[type] + WHEN ''MEMORYCLERK_SQLBUFFERPOOL'' THEN ''Buffer pool'' + WHEN ''CACHESTORE_SQLCP'' THEN ''Cache (sql plans)'' + WHEN ''CACHESTORE_OBJCP'' THEN ''Cache (objects)'' + ELSE ''Other'' END + , SUM((single_pages_kb + multi_pages_kb) * 1024) AS UsedBytes + , Cast(100 * Sum((single_pages_kb + multi_pages_kb))*1.0/(Select Sum((single_pages_kb + multi_pages_kb)) From sys.dm_os_memory_clerks) as Decimal(7, 4)) UsedPercent + FROM sys.dm_os_memory_clerks MC + WHERE (single_pages_kb + multi_pages_kb) > 0 + GROUP BY CASE MC.[type] + WHEN ''MEMORYCLERK_SQLBUFFERPOOL'' THEN ''Buffer pool'' + WHEN ''CACHESTORE_SQLCP'' THEN ''Cache (sql plans)'' + WHEN ''CACHESTORE_OBJCP'' THEN ''Cache (objects)'' + ELSE ''Other'' END + ) as T + GROUP BY ClerkCategory; + ' +END +ELSE +BEGIN + SET @DynamicClerkQuery = N' + INSERT #clerk (ClerkCategory, UsedPercent, UsedBytes) + SELECT ClerkCategory + , UsedPercent = SUM(UsedPercent) + , UsedBytes = SUM(UsedBytes) + FROM + ( + SELECT ClerkCategory = CASE MC.[type] + WHEN ''MEMORYCLERK_SQLBUFFERPOOL'' THEN ''Buffer pool'' + WHEN ''CACHESTORE_SQLCP'' THEN ''Cache (sql plans)'' + WHEN ''CACHESTORE_OBJCP'' THEN ''Cache (objects)'' + ELSE ''Other'' END + , SUM(pages_kb * 1024) AS UsedBytes + , Cast(100 * Sum(pages_kb)*1.0/(Select Sum(pages_kb) From sys.dm_os_memory_clerks) as Decimal(7, 4)) UsedPercent + FROM sys.dm_os_memory_clerks MC + WHERE pages_kb > 0 + GROUP BY CASE MC.[type] + WHEN ''MEMORYCLERK_SQLBUFFERPOOL'' THEN ''Buffer pool'' + WHEN ''CACHESTORE_SQLCP'' THEN ''Cache (sql plans)'' + WHEN ''CACHESTORE_OBJCP'' THEN ''Cache (objects)'' + ELSE ''Other'' END + ) as T + GROUP BY ClerkCategory; + ' +END +EXEC sp_executesql @DynamicClerkQuery; SELECT -- measurement measurement @@ -325,7 +370,7 @@ SELECT measurement = 'Memory breakdown (%)' , [Cache (objects)] = ISNULL(ROUND([Cache (objects)], 1), 0) , [Cache (sql plans)] = ISNULL(ROUND([Cache (sql plans)], 1), 0) , [Other] = ISNULL(ROUND([Other], 1), 0) -FROM (SELECT ClerkCategory, UsedPercent FROM @w) as G1 +FROM (SELECT ClerkCategory, UsedPercent FROM #clerk) as G1 PIVOT ( SUM(UsedPercent) @@ -339,7 +384,7 @@ SELECT measurement = 'Memory breakdown (bytes)' , [Cache (objects)] = ISNULL(ROUND([Cache (objects)], 1), 0) , [Cache (sql plans)] = ISNULL(ROUND([Cache (sql plans)], 1), 0) , [Other] = ISNULL(ROUND([Other], 1), 0) -FROM (SELECT ClerkCategory, UsedBytes FROM @w) as G2 +FROM (SELECT ClerkCategory, UsedBytes FROM #clerk) as G2 PIVOT ( SUM(UsedBytes) @@ -698,7 +743,7 @@ IF OBJECT_ID('tempdb..#Databases') IS NOT NULL CREATE TABLE #Databases ( Measurement nvarchar(64) NOT NULL, - DatabaseName nvarchar(64) NOT NULL, + DatabaseName nvarchar(128) NOT NULL, Value tinyint NOT NULL Primary Key(DatabaseName, Measurement) ); diff --git a/plugins/inputs/statsd/README.md b/plugins/inputs/statsd/README.md index 5bb18657c..8722ce1e9 100644 --- a/plugins/inputs/statsd/README.md +++ b/plugins/inputs/statsd/README.md @@ -21,6 +21,10 @@ ## convert measurement names, "." to "_" and "-" to "__" convert_names = true + ## Parses tags in DataDog's dogstatsd format + ## http://docs.datadoghq.com/guides/dogstatsd/ + parse_data_dog_tags = false + ## Statsd data translation templates, more info can be read here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite # templates = [ @@ -155,6 +159,7 @@ per-measurement in the calculation of percentiles. Raising this limit increases the accuracy of percentiles but also increases the memory usage and cpu time. - **templates** []string: Templates for transforming statsd buckets into influx measurements and tags. +- **parse_data_dog_tags** boolean: Enable parsing of tags in DataDog's dogstatsd format (http://docs.datadoghq.com/guides/dogstatsd/) ### Statsd bucket -> InfluxDB line-protocol Templates @@ -198,4 +203,4 @@ mem.cached.localhost:256|g ``` There are many more options available, -[More details can be found here](https://github.com/influxdata/influxdb/tree/master/services/graphite#templates) +[More details can be found here](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite) diff --git a/plugins/inputs/statsd/statsd.go b/plugins/inputs/statsd/statsd.go index a16e78b5c..84687511e 100644 --- a/plugins/inputs/statsd/statsd.go +++ b/plugins/inputs/statsd/statsd.go @@ -18,14 +18,20 @@ import ( ) const ( - UDP_PACKET_SIZE int = 1500 + // UDP packet limit, see + // https://en.wikipedia.org/wiki/User_Datagram_Protocol#Packet_structure + UDP_PACKET_SIZE int = 65507 defaultFieldName = "value" + + defaultSeparator = "_" ) var dropwarn = "ERROR: Message queue full. Discarding line [%s] " + "You may want to increase allowed_pending_messages in the config\n" +var prevInstance *Statsd + type Statsd struct { // Address & Port to serve from ServiceAddress string @@ -45,11 +51,18 @@ type Statsd struct { DeleteTimings bool ConvertNames bool + // MetricSeparator is the separator between parts of the metric name. + MetricSeparator string + // This flag enables parsing of tags in the dogstatsd extention to the + // statsd protocol (http://docs.datadoghq.com/guides/dogstatsd/) + ParseDataDogTags bool + // UDPPacketSize is the size of the read packets for the server listening // for statsd UDP packets. This will default to 1500 bytes. UDPPacketSize int `toml:"udp_packet_size"` sync.Mutex + wg sync.WaitGroup // Channel for all incoming statsd packets in chan []byte @@ -65,23 +78,8 @@ type Statsd struct { // bucket -> influx templates Templates []string -} -func NewStatsd() *Statsd { - s := Statsd{} - - // Make data structures - s.done = make(chan struct{}) - s.in = make(chan []byte, s.AllowedPendingMessages) - s.gauges = make(map[string]cachedgauge) - s.counters = make(map[string]cachedcounter) - s.sets = make(map[string]cachedset) - s.timings = make(map[string]cachedtimings) - - s.ConvertNames = true - s.UDPPacketSize = UDP_PACKET_SIZE - - return &s + listener *net.UDPConn } // One statsd metric, form is :||@ @@ -140,8 +138,12 @@ const sampleConfig = ` ## Percentiles to calculate for timing & histogram stats percentiles = [90] - ## convert measurement names, "." to "_" and "-" to "__" - convert_names = true + ## separator to use between elements of a statsd metric + metric_separator = "_" + + ## Parses tags in the datadog statsd format + ## http://docs.datadoghq.com/guides/dogstatsd/ + parse_data_dog_tags = false ## Statsd data translation templates, more info can be read here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite @@ -157,10 +159,6 @@ const sampleConfig = ` ## calculation of percentiles. Raising this limit increases the accuracy ## of percentiles but also increases the memory usage and cpu time. percentile_limit = 1000 - - ## UDP packet size for the server to listen for. This will depend on the size - ## of the packets that the client is sending, which is usually 1500 bytes. - udp_packet_size = 1500 ` func (_ *Statsd) SampleConfig() string { @@ -231,38 +229,59 @@ func (s *Statsd) Start(_ telegraf.Accumulator) error { // Make data structures s.done = make(chan struct{}) s.in = make(chan []byte, s.AllowedPendingMessages) - s.gauges = make(map[string]cachedgauge) - s.counters = make(map[string]cachedcounter) - s.sets = make(map[string]cachedset) - s.timings = make(map[string]cachedtimings) + if prevInstance == nil { + s.gauges = make(map[string]cachedgauge) + s.counters = make(map[string]cachedcounter) + s.sets = make(map[string]cachedset) + s.timings = make(map[string]cachedtimings) + } else { + s.gauges = prevInstance.gauges + s.counters = prevInstance.counters + s.sets = prevInstance.sets + s.timings = prevInstance.timings + } + + if s.ConvertNames { + log.Printf("WARNING statsd: convert_names config option is deprecated," + + " please use metric_separator instead") + } + + if s.MetricSeparator == "" { + s.MetricSeparator = defaultSeparator + } + + s.wg.Add(2) // Start the UDP listener go s.udpListen() // Start the line parser go s.parser() log.Printf("Started the statsd service on %s\n", s.ServiceAddress) + prevInstance = s return nil } // udpListen starts listening for udp packets on the configured port. func (s *Statsd) udpListen() error { + defer s.wg.Done() + var err error address, _ := net.ResolveUDPAddr("udp", s.ServiceAddress) - listener, err := net.ListenUDP("udp", address) + s.listener, err = net.ListenUDP("udp", address) if err != nil { log.Fatalf("ERROR: ListenUDP - %s", err) } - defer listener.Close() - log.Println("Statsd listener listening on: ", listener.LocalAddr().String()) + log.Println("Statsd listener listening on: ", s.listener.LocalAddr().String()) + buf := make([]byte, s.UDPPacketSize) for { select { case <-s.done: return nil default: - buf := make([]byte, s.UDPPacketSize) - n, _, err := listener.ReadFromUDP(buf) - if err != nil { - log.Printf("ERROR: %s\n", err.Error()) + n, _, err := s.listener.ReadFromUDP(buf) + if err != nil && !strings.Contains(err.Error(), "closed network") { + log.Printf("ERROR READ: %s\n", err.Error()) + continue } select { @@ -278,11 +297,13 @@ func (s *Statsd) udpListen() error { // packet into statsd strings and then calls parseStatsdLine, which parses a // single statsd metric into a struct. func (s *Statsd) parser() error { + defer s.wg.Done() + var packet []byte for { select { case <-s.done: return nil - case packet := <-s.in: + case packet = <-s.in: lines := strings.Split(string(packet), "\n") for _, line := range lines { line = strings.TrimSpace(line) @@ -300,6 +321,43 @@ func (s *Statsd) parseStatsdLine(line string) error { s.Lock() defer s.Unlock() + lineTags := make(map[string]string) + if s.ParseDataDogTags { + recombinedSegments := make([]string, 0) + // datadog tags look like this: + // users.online:1|c|@0.5|#country:china,environment:production + // users.online:1|c|#sometagwithnovalue + // we will split on the pipe and remove any elements that are datadog + // tags, parse them, and rebuild the line sans the datadog tags + pipesplit := strings.Split(line, "|") + for _, segment := range pipesplit { + if len(segment) > 0 && segment[0] == '#' { + // we have ourselves a tag; they are comma separated + tagstr := segment[1:] + tags := strings.Split(tagstr, ",") + for _, tag := range tags { + ts := strings.Split(tag, ":") + var k, v string + switch len(ts) { + case 1: + // just a tag + k = ts[0] + v = "" + case 2: + k = ts[0] + v = ts[1] + } + if k != "" { + lineTags[k] = v + } + } + } else { + recombinedSegments = append(recombinedSegments, segment) + } + } + line = strings.Join(recombinedSegments, "|") + } + // Validate splitting the line on ":" bits := strings.Split(line, ":") if len(bits) < 2 { @@ -397,6 +455,12 @@ func (s *Statsd) parseStatsdLine(line string) error { m.tags["metric_type"] = "histogram" } + if len(lineTags) > 0 { + for k, v := range lineTags { + m.tags[k] = v + } + } + // Make a unique key for the measurement name/tags var tg []string for k, v := range m.tags { @@ -431,7 +495,7 @@ func (s *Statsd) parseName(bucket string) (string, string, map[string]string) { var field string name := bucketparts[0] - p, err := graphite.NewGraphiteParser(".", s.Templates, nil) + p, err := graphite.NewGraphiteParser(s.MetricSeparator, s.Templates, nil) if err == nil { p.DefaultTags = tags name, tags, field, _ = p.ApplyTemplate(name) @@ -558,14 +622,16 @@ func (s *Statsd) Stop() { defer s.Unlock() log.Println("Stopping the statsd service") close(s.done) + s.listener.Close() + s.wg.Wait() close(s.in) } func init() { inputs.Add("statsd", func() telegraf.Input { return &Statsd{ - ConvertNames: true, - UDPPacketSize: UDP_PACKET_SIZE, + MetricSeparator: "_", + UDPPacketSize: UDP_PACKET_SIZE, } }) } diff --git a/plugins/inputs/statsd/statsd_test.go b/plugins/inputs/statsd/statsd_test.go index 3a87f00aa..743e80135 100644 --- a/plugins/inputs/statsd/statsd_test.go +++ b/plugins/inputs/statsd/statsd_test.go @@ -8,9 +8,26 @@ import ( "github.com/influxdata/telegraf/testutil" ) +func NewTestStatsd() *Statsd { + s := Statsd{} + + // Make data structures + s.done = make(chan struct{}) + s.in = make(chan []byte, s.AllowedPendingMessages) + s.gauges = make(map[string]cachedgauge) + s.counters = make(map[string]cachedcounter) + s.sets = make(map[string]cachedset) + s.timings = make(map[string]cachedtimings) + + s.MetricSeparator = "_" + s.UDPPacketSize = UDP_PACKET_SIZE + + return &s +} + // Invalid lines should return an error func TestParse_InvalidLines(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() invalid_lines := []string{ "i.dont.have.a.pipe:45g", "i.dont.have.a.colon45|c", @@ -34,7 +51,7 @@ func TestParse_InvalidLines(t *testing.T) { // Invalid sample rates should be ignored and not applied func TestParse_InvalidSampleRate(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() invalid_lines := []string{ "invalid.sample.rate:45|c|0.1", "invalid.sample.rate.2:45|c|@foo", @@ -84,9 +101,9 @@ func TestParse_InvalidSampleRate(t *testing.T) { } } -// Names should be parsed like . -> _ and - -> __ +// Names should be parsed like . -> _ func TestParse_DefaultNameParsing(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() valid_lines := []string{ "valid:1|c", "valid.foo-bar:11|c", @@ -108,7 +125,7 @@ func TestParse_DefaultNameParsing(t *testing.T) { 1, }, { - "valid_foo__bar", + "valid_foo-bar", 11, }, } @@ -123,7 +140,7 @@ func TestParse_DefaultNameParsing(t *testing.T) { // Test that template name transformation works func TestParse_Template(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.Templates = []string{ "measurement.measurement.host.service", } @@ -165,7 +182,7 @@ func TestParse_Template(t *testing.T) { // Test that template filters properly func TestParse_TemplateFilter(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.Templates = []string{ "cpu.idle.* measurement.measurement.host", } @@ -207,7 +224,7 @@ func TestParse_TemplateFilter(t *testing.T) { // Test that most specific template is chosen func TestParse_TemplateSpecificity(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.Templates = []string{ "cpu.* measurement.foo.host", "cpu.idle.* measurement.measurement.host", @@ -245,7 +262,7 @@ func TestParse_TemplateSpecificity(t *testing.T) { // Test that most specific template is chosen func TestParse_TemplateFields(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.Templates = []string{ "* measurement.measurement.field", } @@ -359,7 +376,7 @@ func TestParse_Fields(t *testing.T) { // Test that tags within the bucket are parsed correctly func TestParse_Tags(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() tests := []struct { bucket string @@ -410,9 +427,87 @@ func TestParse_Tags(t *testing.T) { } } +// Test that DataDog tags are parsed +func TestParse_DataDogTags(t *testing.T) { + s := NewTestStatsd() + s.ParseDataDogTags = true + + lines := []string{ + "my_counter:1|c|#host:localhost,environment:prod", + "my_gauge:10.1|g|#live", + "my_set:1|s|#host:localhost", + "my_timer:3|ms|@0.1|#live,host:localhost", + } + + testTags := map[string]map[string]string{ + "my_counter": map[string]string{ + "host": "localhost", + "environment": "prod", + }, + + "my_gauge": map[string]string{ + "live": "", + }, + + "my_set": map[string]string{ + "host": "localhost", + }, + + "my_timer": map[string]string{ + "live": "", + "host": "localhost", + }, + } + + for _, line := range lines { + err := s.parseStatsdLine(line) + if err != nil { + t.Errorf("Parsing line %s should not have resulted in an error\n", line) + } + } + + sourceTags := map[string]map[string]string{ + "my_gauge": tagsForItem(s.gauges), + "my_counter": tagsForItem(s.counters), + "my_set": tagsForItem(s.sets), + "my_timer": tagsForItem(s.timings), + } + + for statName, tags := range testTags { + for k, v := range tags { + otherValue := sourceTags[statName][k] + if sourceTags[statName][k] != v { + t.Errorf("Error with %s, tag %s: %s != %s", statName, k, v, otherValue) + } + } + } +} + +func tagsForItem(m interface{}) map[string]string { + switch m.(type) { + case map[string]cachedcounter: + for _, v := range m.(map[string]cachedcounter) { + return v.tags + } + case map[string]cachedgauge: + for _, v := range m.(map[string]cachedgauge) { + return v.tags + } + case map[string]cachedset: + for _, v := range m.(map[string]cachedset) { + return v.tags + } + case map[string]cachedtimings: + for _, v := range m.(map[string]cachedtimings) { + return v.tags + } + } + return nil +} + // Test that statsd buckets are parsed to measurement names properly func TestParseName(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() tests := []struct { in_name string @@ -428,7 +523,7 @@ func TestParseName(t *testing.T) { }, { "foo.bar-baz", - "foo_bar__baz", + "foo_bar-baz", }, } @@ -439,8 +534,8 @@ func TestParseName(t *testing.T) { } } - // Test with ConvertNames = false - s.ConvertNames = false + // Test with separator == "." + s.MetricSeparator = "." tests = []struct { in_name string @@ -471,7 +566,7 @@ func TestParseName(t *testing.T) { // Test that measurements with the same name, but different tags, are treated // as different outputs func TestParse_MeasurementsWithSameName(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() // Test that counters work valid_lines := []string{ @@ -529,8 +624,8 @@ func TestParse_MeasurementsWithMultipleValues(t *testing.T) { "valid.multiple.mixed:1|c:1|ms:2|s:1|g", } - s_single := NewStatsd() - s_multiple := NewStatsd() + s_single := NewTestStatsd() + s_multiple := NewTestStatsd() for _, line := range single_lines { err := s_single.parseStatsdLine(line) @@ -623,7 +718,7 @@ func TestParse_MeasurementsWithMultipleValues(t *testing.T) { // Valid lines should be parsed and their values should be cached func TestParse_ValidLines(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() valid_lines := []string{ "valid:45|c", "valid:45|s", @@ -642,7 +737,7 @@ func TestParse_ValidLines(t *testing.T) { // Tests low-level functionality of gauges func TestParse_Gauges(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() // Test that gauge +- values work valid_lines := []string{ @@ -708,7 +803,7 @@ func TestParse_Gauges(t *testing.T) { // Tests low-level functionality of sets func TestParse_Sets(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() // Test that sets work valid_lines := []string{ @@ -756,7 +851,7 @@ func TestParse_Sets(t *testing.T) { // Tests low-level functionality of counters func TestParse_Counters(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() // Test that counters work valid_lines := []string{ @@ -810,7 +905,7 @@ func TestParse_Counters(t *testing.T) { // Tests low-level functionality of timings func TestParse_Timings(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.Percentiles = []int{90} acc := &testutil.Accumulator{} @@ -847,7 +942,7 @@ func TestParse_Timings(t *testing.T) { // Tests low-level functionality of timings when multiple fields is enabled // and a measurement template has been defined which can parse field names func TestParse_Timings_MultipleFieldsWithTemplate(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.Templates = []string{"measurement.field"} s.Percentiles = []int{90} acc := &testutil.Accumulator{} @@ -896,7 +991,7 @@ func TestParse_Timings_MultipleFieldsWithTemplate(t *testing.T) { // but a measurement template hasn't been defined so we can't parse field names // In this case the behaviour should be the same as normal behaviour func TestParse_Timings_MultipleFieldsWithoutTemplate(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.Templates = []string{} s.Percentiles = []int{90} acc := &testutil.Accumulator{} @@ -944,7 +1039,7 @@ func TestParse_Timings_MultipleFieldsWithoutTemplate(t *testing.T) { } func TestParse_Timings_Delete(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.DeleteTimings = true fakeacc := &testutil.Accumulator{} var err error @@ -968,7 +1063,7 @@ func TestParse_Timings_Delete(t *testing.T) { // Tests the delete_gauges option func TestParse_Gauges_Delete(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.DeleteGauges = true fakeacc := &testutil.Accumulator{} var err error @@ -994,7 +1089,7 @@ func TestParse_Gauges_Delete(t *testing.T) { // Tests the delete_sets option func TestParse_Sets_Delete(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.DeleteSets = true fakeacc := &testutil.Accumulator{} var err error @@ -1020,7 +1115,7 @@ func TestParse_Sets_Delete(t *testing.T) { // Tests the delete_counters option func TestParse_Counters_Delete(t *testing.T) { - s := NewStatsd() + s := NewTestStatsd() s.DeleteCounters = true fakeacc := &testutil.Accumulator{} var err error diff --git a/plugins/inputs/system/KERNEL_README.md b/plugins/inputs/system/KERNEL_README.md new file mode 100644 index 000000000..3285e59ef --- /dev/null +++ b/plugins/inputs/system/KERNEL_README.md @@ -0,0 +1,64 @@ +# Kernel Input Plugin + +This plugin is only available on Linux. + +The kernel plugin gathers info about the kernel that doesn't fit into other +plugins. In general, it is the statistics available in `/proc/stat` that are +not covered by other plugins. + +The metrics are documented in `man proc` under the `/proc/stat` section. + +``` +/proc/stat +kernel/system statistics. Varies with architecture. Common entries include: + +page 5741 1808 +The number of pages the system paged in and the number that were paged out (from disk). + +swap 1 0 +The number of swap pages that have been brought in and out. + +intr 1462898 +This line shows counts of interrupts serviced since boot time, for each of +the possible system interrupts. The first column is the total of all +interrupts serviced; each subsequent column is the total for a particular interrupt. + +ctxt 115315 +The number of context switches that the system underwent. + +btime 769041601 +boot time, in seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC). + +processes 86031 +Number of forks since boot. +``` + +### Configuration: + +```toml +# Get kernel statistics from /proc/stat +[[inputs.kernel]] + # no configuration +``` + +### Measurements & Fields: + +- kernel + - boot_time (integer, seconds since epoch, `btime`) + - context_switches (integer, `ctxt`) + - disk_pages_in (integer, `page (0)`) + - disk_pages_out (integer, `page (1)`) + - interrupts (integer, `intr`) + - processes_forked (integer, `processes`) + +### Tags: + +None + +### Example Output: + +``` +$ telegraf -config ~/ws/telegraf.conf -input-filter kernel -test +* Plugin: kernel, Collection 1 +> kernel boot_time=1457505775i,context_switches=2626618i,disk_pages_in=5741i,disk_pages_out=1808i,interrupts=1472736i,processes_forked=10673i 1457613402960879816 +``` diff --git a/plugins/inputs/system/PROCESSES_README.md b/plugins/inputs/system/PROCESSES_README.md new file mode 100644 index 000000000..006e043fb --- /dev/null +++ b/plugins/inputs/system/PROCESSES_README.md @@ -0,0 +1,58 @@ +# Processes Input Plugin + +This plugin gathers info about the total number of processes and groups +them by status (zombie, sleeping, running, etc.) + +On linux this plugin requires access to procfs (/proc), on other OSes +it requires access to execute `ps`. + +### Configuration: + +```toml +# Get the number of processes and group them by status +[[inputs.processes]] + # no configuration +``` + +### Measurements & Fields: + +- processes + - blocked (aka disk sleep or uninterruptible sleep) + - running + - sleeping + - stopped + - total + - zombie + - wait (freebsd only) + - idle (bsd only) + - paging (linux only) + - total_threads (linux only) + +### Process State Mappings + +Different OSes use slightly different State codes for their processes, these +state codes are documented in `man ps`, and I will give a mapping of what major +OS state codes correspond to in telegraf metrics: + +``` +Linux FreeBSD Darwin meaning + R R R running + S S S sleeping + Z Z Z zombie + T T T stopped + none I I idle (sleeping for longer than about 20 seconds) + D D,L U blocked (waiting in uninterruptible sleep, or locked) + W W none paging (linux kernel < 2.6 only), wait (freebsd) +``` + +### Tags: + +None + +### Example Output: + +``` +$ telegraf -config ~/ws/telegraf.conf -input-filter processes -test +* Plugin: processes, Collection 1 +> processes blocked=8i,running=1i,sleeping=265i,stopped=0i,total=274i,zombie=0i,paging=0i,total_threads=687i 1457478636980905042 +``` diff --git a/plugins/inputs/system/SYSTEM_README.md b/plugins/inputs/system/SYSTEM_README.md new file mode 100644 index 000000000..fc873c7e8 --- /dev/null +++ b/plugins/inputs/system/SYSTEM_README.md @@ -0,0 +1,35 @@ +# System Input Plugin + +The system plugin gathers general stats on system load, uptime, +and number of users logged in. It is basically equivalent +to the unix `uptime` command. + +### Configuration: + +```toml +# Read metrics about system load & uptime +[[inputs.system]] + # no configuration +``` + +### Measurements & Fields: + +- system + - load1 (float) + - load15 (float) + - load5 (float) + - n_users (integer) + - uptime (integer, seconds) + - uptime_format (string) + +### Tags: + +None + +### Example Output: + +``` +$ telegraf -config ~/ws/telegraf.conf -input-filter system -test +* Plugin: system, Collection 1 +> system load1=2.05,load15=2.38,load5=2.03,n_users=4i,uptime=239043i,uptime_format="2 days, 18:24" 1457546165399253452 +``` diff --git a/plugins/inputs/system/kernel.go b/plugins/inputs/system/kernel.go new file mode 100644 index 000000000..900400146 --- /dev/null +++ b/plugins/inputs/system/kernel.go @@ -0,0 +1,110 @@ +// +build linux + +package system + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "strconv" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +// /proc/stat file line prefixes to gather stats on: +var ( + interrupts = []byte("intr") + context_switches = []byte("ctxt") + processes_forked = []byte("processes") + disk_pages = []byte("page") + boot_time = []byte("btime") +) + +type Kernel struct { + statFile string +} + +func (k *Kernel) Description() string { + return "Get kernel statistics from /proc/stat" +} + +func (k *Kernel) SampleConfig() string { return "" } + +func (k *Kernel) Gather(acc telegraf.Accumulator) error { + data, err := k.getProcStat() + if err != nil { + return err + } + + fields := make(map[string]interface{}) + + dataFields := bytes.Fields(data) + for i, field := range dataFields { + switch { + case bytes.Equal(field, interrupts): + m, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + fields["interrupts"] = int64(m) + case bytes.Equal(field, context_switches): + m, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + fields["context_switches"] = int64(m) + case bytes.Equal(field, processes_forked): + m, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + fields["processes_forked"] = int64(m) + case bytes.Equal(field, boot_time): + m, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + fields["boot_time"] = int64(m) + case bytes.Equal(field, disk_pages): + in, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + out, err := strconv.Atoi(string(dataFields[i+2])) + if err != nil { + return err + } + fields["disk_pages_in"] = int64(in) + fields["disk_pages_out"] = int64(out) + } + } + + acc.AddFields("kernel", fields, map[string]string{}) + + return nil +} + +func (k *Kernel) getProcStat() ([]byte, error) { + if _, err := os.Stat(k.statFile); os.IsNotExist(err) { + return nil, fmt.Errorf("kernel: %s does not exist!", k.statFile) + } else if err != nil { + return nil, err + } + + data, err := ioutil.ReadFile(k.statFile) + if err != nil { + return nil, err + } + + return data, nil +} + +func init() { + inputs.Add("kernel", func() telegraf.Input { + return &Kernel{ + statFile: "/proc/stat", + } + }) +} diff --git a/plugins/inputs/system/kernel_notlinux.go b/plugins/inputs/system/kernel_notlinux.go new file mode 100644 index 000000000..9053b5c04 --- /dev/null +++ b/plugins/inputs/system/kernel_notlinux.go @@ -0,0 +1,27 @@ +// +build !linux + +package system + +import ( + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type Kernel struct { +} + +func (k *Kernel) Description() string { + return "Get kernel statistics from /proc/stat" +} + +func (k *Kernel) SampleConfig() string { return "" } + +func (k *Kernel) Gather(acc telegraf.Accumulator) error { + return nil +} + +func init() { + inputs.Add("kernel", func() telegraf.Input { + return &Kernel{} + }) +} diff --git a/plugins/inputs/system/kernel_test.go b/plugins/inputs/system/kernel_test.go new file mode 100644 index 000000000..398cba4cc --- /dev/null +++ b/plugins/inputs/system/kernel_test.go @@ -0,0 +1,164 @@ +// +build linux + +package system + +import ( + "io/ioutil" + "os" + "testing" + + "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" +) + +func TestFullProcFile(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(statFile_Full)) + defer os.Remove(tmpfile) + + k := Kernel{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.NoError(t, err) + + fields := map[string]interface{}{ + "boot_time": int64(1457505775), + "context_switches": int64(2626618), + "disk_pages_in": int64(5741), + "disk_pages_out": int64(1808), + "interrupts": int64(1472736), + "processes_forked": int64(10673), + } + acc.AssertContainsFields(t, "kernel", fields) +} + +func TestPartialProcFile(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(statFile_Partial)) + defer os.Remove(tmpfile) + + k := Kernel{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.NoError(t, err) + + fields := map[string]interface{}{ + "boot_time": int64(1457505775), + "context_switches": int64(2626618), + "disk_pages_in": int64(5741), + "disk_pages_out": int64(1808), + "interrupts": int64(1472736), + } + acc.AssertContainsFields(t, "kernel", fields) +} + +func TestInvalidProcFile1(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(statFile_Invalid)) + defer os.Remove(tmpfile) + + k := Kernel{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.Error(t, err) +} + +func TestInvalidProcFile2(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(statFile_Invalid2)) + defer os.Remove(tmpfile) + + k := Kernel{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.Error(t, err) +} + +func TestNoProcFile(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(statFile_Invalid2)) + os.Remove(tmpfile) + + k := Kernel{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.Error(t, err) + assert.Contains(t, err.Error(), "does not exist") +} + +const statFile_Full = `cpu 6796 252 5655 10444977 175 0 101 0 0 0 +cpu0 6796 252 5655 10444977 175 0 101 0 0 0 +intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 2626618 +btime 1457505775 +processes 10673 +procs_running 2 +procs_blocked 0 +softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545 +page 5741 1808 +swap 1 0 +` + +const statFile_Partial = `cpu 6796 252 5655 10444977 175 0 101 0 0 0 +cpu0 6796 252 5655 10444977 175 0 101 0 0 0 +intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 2626618 +btime 1457505775 +procs_running 2 +procs_blocked 0 +softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545 +page 5741 1808 +` + +// missing btime measurement +const statFile_Invalid = `cpu 6796 252 5655 10444977 175 0 101 0 0 0 +cpu0 6796 252 5655 10444977 175 0 101 0 0 0 +intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 2626618 +btime +processes 10673 +procs_running 2 +procs_blocked 0 +softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545 +page 5741 1808 +swap 1 0 +` + +// missing second page measurement +const statFile_Invalid2 = `cpu 6796 252 5655 10444977 175 0 101 0 0 0 +cpu0 6796 252 5655 10444977 175 0 101 0 0 0 +intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 2626618 +processes 10673 +procs_running 2 +page 5741 +procs_blocked 0 +softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545 +` + +func makeFakeStatFile(content []byte) string { + tmpfile, err := ioutil.TempFile("", "kerneltest") + if err != nil { + panic(err) + } + + if _, err := tmpfile.Write(content); err != nil { + panic(err) + } + if err := tmpfile.Close(); err != nil { + panic(err) + } + + return tmpfile.Name() +} diff --git a/plugins/inputs/system/processes.go b/plugins/inputs/system/processes.go new file mode 100644 index 000000000..8c50a4ebd --- /dev/null +++ b/plugins/inputs/system/processes.go @@ -0,0 +1,223 @@ +// +build !windows + +package system + +import ( + "bytes" + "fmt" + "io/ioutil" + "log" + "os" + "os/exec" + "path" + "runtime" + "strconv" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type Processes struct { + execPS func() ([]byte, error) + readProcFile func(statFile string) ([]byte, error) + + forcePS bool + forceProc bool +} + +func (p *Processes) Description() string { + return "Get the number of processes and group them by status" +} + +func (p *Processes) SampleConfig() string { return "" } + +func (p *Processes) Gather(acc telegraf.Accumulator) error { + // Get an empty map of metric fields + fields := getEmptyFields() + + // Decide if we will use 'ps' to get stats (use procfs otherwise) + usePS := true + if runtime.GOOS == "linux" { + usePS = false + } + if p.forcePS { + usePS = true + } else if p.forceProc { + usePS = false + } + + // Gather stats from 'ps' or procfs + if usePS { + if err := p.gatherFromPS(fields); err != nil { + return err + } + } else { + if err := p.gatherFromProc(fields); err != nil { + return err + } + } + + acc.AddFields("processes", fields, nil) + return nil +} + +// Gets empty fields of metrics based on the OS +func getEmptyFields() map[string]interface{} { + fields := map[string]interface{}{ + "blocked": int64(0), + "zombies": int64(0), + "stopped": int64(0), + "running": int64(0), + "sleeping": int64(0), + "total": int64(0), + } + switch runtime.GOOS { + case "freebsd": + fields["idle"] = int64(0) + fields["wait"] = int64(0) + case "darwin": + fields["idle"] = int64(0) + case "openbsd": + fields["idle"] = int64(0) + case "linux": + fields["paging"] = int64(0) + fields["total_threads"] = int64(0) + } + return fields +} + +// exec `ps` to get all process states +func (p *Processes) gatherFromPS(fields map[string]interface{}) error { + out, err := p.execPS() + if err != nil { + return err + } + + for i, status := range bytes.Fields(out) { + if i == 0 && string(status) == "STAT" { + // This is a header, skip it + continue + } + switch status[0] { + case 'W': + fields["wait"] = fields["wait"].(int64) + int64(1) + case 'U', 'D', 'L': + // Also known as uninterruptible sleep or disk sleep + fields["blocked"] = fields["blocked"].(int64) + int64(1) + case 'Z': + fields["zombies"] = fields["zombies"].(int64) + int64(1) + case 'T': + fields["stopped"] = fields["stopped"].(int64) + int64(1) + case 'R': + fields["running"] = fields["running"].(int64) + int64(1) + case 'S': + fields["sleeping"] = fields["sleeping"].(int64) + int64(1) + case 'I': + fields["idle"] = fields["idle"].(int64) + int64(1) + default: + log.Printf("processes: Unknown state [ %s ] from ps", + string(status[0])) + } + fields["total"] = fields["total"].(int64) + int64(1) + } + return nil +} + +// get process states from /proc/(pid)/stat files +func (p *Processes) gatherFromProc(fields map[string]interface{}) error { + files, err := ioutil.ReadDir("/proc") + if err != nil { + return err + } + + for _, file := range files { + if !file.IsDir() { + continue + } + + statFile := path.Join("/proc", file.Name(), "stat") + data, err := p.readProcFile(statFile) + if err != nil { + return err + } + if data == nil { + continue + } + + // Parse out data after () + i := bytes.LastIndex(data, []byte(")")) + if i == -1 { + continue + } + data = data[i+2:] + + stats := bytes.Fields(data) + if len(stats) < 3 { + return fmt.Errorf("Something is terribly wrong with %s", statFile) + } + switch stats[0][0] { + case 'R': + fields["running"] = fields["running"].(int64) + int64(1) + case 'S': + fields["sleeping"] = fields["sleeping"].(int64) + int64(1) + case 'D': + fields["blocked"] = fields["blocked"].(int64) + int64(1) + case 'Z': + fields["zombies"] = fields["zombies"].(int64) + int64(1) + case 'T', 't': + fields["stopped"] = fields["stopped"].(int64) + int64(1) + case 'W': + fields["paging"] = fields["paging"].(int64) + int64(1) + default: + log.Printf("processes: Unknown state [ %s ] in file %s", + string(stats[0][0]), statFile) + } + fields["total"] = fields["total"].(int64) + int64(1) + + threads, err := strconv.Atoi(string(stats[17])) + if err != nil { + log.Printf("processes: Error parsing thread count: %s", err) + continue + } + fields["total_threads"] = fields["total_threads"].(int64) + int64(threads) + } + return nil +} + +func readProcFile(statFile string) ([]byte, error) { + if _, err := os.Stat(statFile); os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, err + } + + data, err := ioutil.ReadFile(statFile) + if err != nil { + return nil, err + } + + return data, nil +} + +func execPS() ([]byte, error) { + bin, err := exec.LookPath("ps") + if err != nil { + return nil, err + } + + out, err := exec.Command(bin, "axo", "state").Output() + if err != nil { + return nil, err + } + + return out, err +} + +func init() { + inputs.Add("processes", func() telegraf.Input { + return &Processes{ + execPS: execPS, + readProcFile: readProcFile, + } + }) +} diff --git a/plugins/inputs/system/processes_test.go b/plugins/inputs/system/processes_test.go new file mode 100644 index 000000000..eef52cd67 --- /dev/null +++ b/plugins/inputs/system/processes_test.go @@ -0,0 +1,181 @@ +package system + +import ( + "fmt" + "runtime" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestProcesses(t *testing.T) { + processes := &Processes{ + execPS: execPS, + readProcFile: readProcFile, + } + var acc testutil.Accumulator + + err := processes.Gather(&acc) + require.NoError(t, err) + + assert.True(t, acc.HasIntField("processes", "running")) + assert.True(t, acc.HasIntField("processes", "sleeping")) + assert.True(t, acc.HasIntField("processes", "stopped")) + assert.True(t, acc.HasIntField("processes", "total")) + total, ok := acc.Get("processes") + require.True(t, ok) + assert.True(t, total.Fields["total"].(int64) > 0) +} + +func TestFromPS(t *testing.T) { + processes := &Processes{ + execPS: testExecPS, + forcePS: true, + } + + var acc testutil.Accumulator + err := processes.Gather(&acc) + require.NoError(t, err) + + fields := getEmptyFields() + fields["blocked"] = int64(4) + fields["zombies"] = int64(1) + fields["running"] = int64(4) + fields["sleeping"] = int64(34) + fields["total"] = int64(43) + + acc.AssertContainsTaggedFields(t, "processes", fields, map[string]string{}) +} + +func TestFromPSError(t *testing.T) { + processes := &Processes{ + execPS: testExecPSError, + forcePS: true, + } + + var acc testutil.Accumulator + err := processes.Gather(&acc) + require.Error(t, err) +} + +func TestFromProcFiles(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("This test only runs on linux") + } + tester := tester{} + processes := &Processes{ + readProcFile: tester.testProcFile, + forceProc: true, + } + + var acc testutil.Accumulator + err := processes.Gather(&acc) + require.NoError(t, err) + + fields := getEmptyFields() + fields["sleeping"] = tester.calls + fields["total_threads"] = tester.calls * 2 + fields["total"] = tester.calls + + acc.AssertContainsTaggedFields(t, "processes", fields, map[string]string{}) +} + +func TestFromProcFilesWithSpaceInCmd(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("This test only runs on linux") + } + tester := tester{} + processes := &Processes{ + readProcFile: tester.testProcFile2, + forceProc: true, + } + + var acc testutil.Accumulator + err := processes.Gather(&acc) + require.NoError(t, err) + + fields := getEmptyFields() + fields["sleeping"] = tester.calls + fields["total_threads"] = tester.calls * 2 + fields["total"] = tester.calls + + acc.AssertContainsTaggedFields(t, "processes", fields, map[string]string{}) +} + +func testExecPS() ([]byte, error) { + return []byte(testPSOut), nil +} + +// struct for counting calls to testProcFile +type tester struct { + calls int64 +} + +func (t *tester) testProcFile(_ string) ([]byte, error) { + t.calls++ + return []byte(fmt.Sprintf(testProcStat, "S", "2")), nil +} + +func (t *tester) testProcFile2(_ string) ([]byte, error) { + t.calls++ + return []byte(fmt.Sprintf(testProcStat2, "S", "2")), nil +} + +func testExecPSError() ([]byte, error) { + return []byte(testPSOut), fmt.Errorf("ERROR!") +} + +const testPSOut = ` +STAT +S +S +S +S +R +R +S +S +Ss +Ss +S +SNs +Ss +Ss +S +R+ +S +U +S +S +S +S +Ss +S+ +Ss +S +S+ +S+ +Ss +S+ +Ss +S +R+ +Ss +S +S+ +S+ +Ss +L +U +Z +D +S+ +` + +const testProcStat = `10 (rcuob/0) %s 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 20 0 %s 0 11 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 18446744073709551615 0 0 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +` + +const testProcStat2 = `10 (rcuob 0) %s 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 20 0 %s 0 11 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 18446744073709551615 0 0 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +` diff --git a/plugins/inputs/system/system.go b/plugins/inputs/system/system.go index 9922d5a92..42b0310a4 100644 --- a/plugins/inputs/system/system.go +++ b/plugins/inputs/system/system.go @@ -31,11 +31,17 @@ func (_ *SystemStats) Gather(acc telegraf.Accumulator) error { return err } + users, err := host.Users() + if err != nil { + return err + } + fields := map[string]interface{}{ "load1": loadavg.Load1, "load5": loadavg.Load5, "load15": loadavg.Load15, "uptime": hostinfo.Uptime, + "n_users": len(users), "uptime_format": format_uptime(hostinfo.Uptime), } acc.AddFields("system", fields, nil) diff --git a/plugins/inputs/tcp_listener/README.md b/plugins/inputs/tcp_listener/README.md new file mode 100644 index 000000000..d2dfeb575 --- /dev/null +++ b/plugins/inputs/tcp_listener/README.md @@ -0,0 +1,31 @@ +# TCP listener service input plugin + +The TCP listener is a service input plugin that listens for messages on a TCP +socket and adds those messages to InfluxDB. +The plugin expects messages in the +[Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md). + +### Configuration: + +This is a sample configuration for the plugin. + +```toml +# Generic TCP listener +[[inputs.tcp_listener]] + ## Address and port to host TCP listener on + service_address = ":8094" + + ## Number of TCP messages allowed to queue up. Once filled, the + ## TCP listener will start dropping packets. + allowed_pending_messages = 10000 + + ## Maximum number of concurrent TCP connections to allow + max_tcp_connections = 250 + + ## Data format to consume. + + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" +``` diff --git a/plugins/inputs/tcp_listener/tcp_listener.go b/plugins/inputs/tcp_listener/tcp_listener.go new file mode 100644 index 000000000..4559a3bf5 --- /dev/null +++ b/plugins/inputs/tcp_listener/tcp_listener.go @@ -0,0 +1,264 @@ +package tcp_listener + +import ( + "bufio" + "fmt" + "log" + "net" + "sync" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" + "github.com/influxdata/telegraf/plugins/parsers" +) + +type TcpListener struct { + ServiceAddress string + AllowedPendingMessages int + MaxTCPConnections int `toml:"max_tcp_connections"` + + sync.Mutex + // Lock for preventing a data race during resource cleanup + cleanup sync.Mutex + wg sync.WaitGroup + + in chan []byte + done chan struct{} + // accept channel tracks how many active connections there are, if there + // is an available bool in accept, then we are below the maximum and can + // accept the connection + accept chan bool + + // track the listener here so we can close it in Stop() + listener *net.TCPListener + // track current connections so we can close them in Stop() + conns map[string]*net.TCPConn + + parser parsers.Parser + acc telegraf.Accumulator +} + +var dropwarn = "ERROR: Message queue full. Discarding metric. " + + "You may want to increase allowed_pending_messages in the config\n" + +const sampleConfig = ` + ## Address and port to host TCP listener on + service_address = ":8094" + + ## Number of TCP messages allowed to queue up. Once filled, the + ## TCP listener will start dropping packets. + allowed_pending_messages = 10000 + + ## Maximum number of concurrent TCP connections to allow + max_tcp_connections = 250 + + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" +` + +func (t *TcpListener) SampleConfig() string { + return sampleConfig +} + +func (t *TcpListener) Description() string { + return "Generic TCP listener" +} + +// All the work is done in the Start() function, so this is just a dummy +// function. +func (t *TcpListener) Gather(_ telegraf.Accumulator) error { + return nil +} + +func (t *TcpListener) SetParser(parser parsers.Parser) { + t.parser = parser +} + +// Start starts the tcp listener service. +func (t *TcpListener) Start(acc telegraf.Accumulator) error { + t.Lock() + defer t.Unlock() + + t.acc = acc + t.in = make(chan []byte, t.AllowedPendingMessages) + t.done = make(chan struct{}) + t.accept = make(chan bool, t.MaxTCPConnections) + t.conns = make(map[string]*net.TCPConn) + for i := 0; i < t.MaxTCPConnections; i++ { + t.accept <- true + } + + // Start listener + var err error + address, _ := net.ResolveTCPAddr("tcp", t.ServiceAddress) + t.listener, err = net.ListenTCP("tcp", address) + if err != nil { + log.Fatalf("ERROR: ListenUDP - %s", err) + return err + } + log.Println("TCP server listening on: ", t.listener.Addr().String()) + + t.wg.Add(2) + go t.tcpListen() + go t.tcpParser() + + log.Printf("Started TCP listener service on %s\n", t.ServiceAddress) + return nil +} + +// Stop cleans up all resources +func (t *TcpListener) Stop() { + t.Lock() + defer t.Unlock() + close(t.done) + t.listener.Close() + + // Close all open TCP connections + // - get all conns from the t.conns map and put into slice + // - this is so the forget() function doesnt conflict with looping + // over the t.conns map + var conns []*net.TCPConn + t.cleanup.Lock() + for _, conn := range t.conns { + conns = append(conns, conn) + } + t.cleanup.Unlock() + for _, conn := range conns { + conn.Close() + } + + t.wg.Wait() + close(t.in) + log.Println("Stopped TCP listener service on ", t.ServiceAddress) +} + +// tcpListen listens for incoming TCP connections. +func (t *TcpListener) tcpListen() error { + defer t.wg.Done() + + for { + select { + case <-t.done: + return nil + default: + // Accept connection: + conn, err := t.listener.AcceptTCP() + if err != nil { + return err + } + + log.Printf("Received TCP Connection from %s", conn.RemoteAddr()) + + select { + case <-t.accept: + // not over connection limit, handle the connection properly. + t.wg.Add(1) + // generate a random id for this TCPConn + id := internal.RandomString(6) + t.remember(id, conn) + go t.handler(conn, id) + default: + // We are over the connection limit, refuse & close. + t.refuser(conn) + } + } + } +} + +// refuser refuses a TCP connection +func (t *TcpListener) refuser(conn *net.TCPConn) { + // Tell the connection why we are closing. + fmt.Fprintf(conn, "Telegraf maximum concurrent TCP connections (%d)"+ + " reached, closing.\nYou may want to increase max_tcp_connections in"+ + " the Telegraf tcp listener configuration.\n", t.MaxTCPConnections) + conn.Close() + log.Printf("Refused TCP Connection from %s", conn.RemoteAddr()) + log.Printf("WARNING: Maximum TCP Connections reached, you may want to" + + " adjust max_tcp_connections") +} + +// handler handles a single TCP Connection +func (t *TcpListener) handler(conn *net.TCPConn, id string) { + // connection cleanup function + defer func() { + t.wg.Done() + conn.Close() + log.Printf("Closed TCP Connection from %s", conn.RemoteAddr()) + // Add one connection potential back to channel when this one closes + t.accept <- true + t.forget(id) + }() + + scanner := bufio.NewScanner(conn) + for { + select { + case <-t.done: + return + default: + if !scanner.Scan() { + return + } + select { + case t.in <- scanner.Bytes(): + default: + log.Printf(dropwarn) + } + } + } +} + +// tcpParser parses the incoming tcp byte packets +func (t *TcpListener) tcpParser() error { + defer t.wg.Done() + var packet []byte + for { + select { + case <-t.done: + return nil + case packet = <-t.in: + if len(packet) == 0 { + continue + } + metrics, err := t.parser.Parse(packet) + if err == nil { + t.storeMetrics(metrics) + } else { + log.Printf("Malformed packet: [%s], Error: %s\n", + string(packet), err) + } + } + } +} + +func (t *TcpListener) storeMetrics(metrics []telegraf.Metric) error { + t.Lock() + defer t.Unlock() + for _, m := range metrics { + t.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) + } + return nil +} + +// forget a TCP connection +func (t *TcpListener) forget(id string) { + t.cleanup.Lock() + defer t.cleanup.Unlock() + delete(t.conns, id) +} + +// remember a TCP connection +func (t *TcpListener) remember(id string, conn *net.TCPConn) { + t.cleanup.Lock() + defer t.cleanup.Unlock() + t.conns[id] = conn +} + +func init() { + inputs.Add("tcp_listener", func() telegraf.Input { + return &TcpListener{} + }) +} diff --git a/plugins/inputs/tcp_listener/tcp_listener_test.go b/plugins/inputs/tcp_listener/tcp_listener_test.go new file mode 100644 index 000000000..b4aec9dd2 --- /dev/null +++ b/plugins/inputs/tcp_listener/tcp_listener_test.go @@ -0,0 +1,259 @@ +package tcp_listener + +import ( + "fmt" + "net" + "testing" + "time" + + "github.com/influxdata/telegraf/plugins/parsers" + "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + testMsg = "cpu_load_short,host=server01 value=12.0 1422568543702900257\n" + + testMsgs = ` +cpu_load_short,host=server02 value=12.0 1422568543702900257 +cpu_load_short,host=server03 value=12.0 1422568543702900257 +cpu_load_short,host=server04 value=12.0 1422568543702900257 +cpu_load_short,host=server05 value=12.0 1422568543702900257 +cpu_load_short,host=server06 value=12.0 1422568543702900257 +` +) + +func newTestTcpListener() (*TcpListener, chan []byte) { + in := make(chan []byte, 1500) + listener := &TcpListener{ + ServiceAddress: ":8194", + AllowedPendingMessages: 10000, + MaxTCPConnections: 250, + in: in, + done: make(chan struct{}), + } + return listener, in +} + +func TestConnectTCP(t *testing.T) { + listener := TcpListener{ + ServiceAddress: ":8194", + AllowedPendingMessages: 10000, + MaxTCPConnections: 250, + } + listener.parser, _ = parsers.NewInfluxParser() + + acc := &testutil.Accumulator{} + require.NoError(t, listener.Start(acc)) + defer listener.Stop() + + time.Sleep(time.Millisecond * 25) + conn, err := net.Dial("tcp", "127.0.0.1:8194") + require.NoError(t, err) + + // send single message to socket + fmt.Fprintf(conn, testMsg) + time.Sleep(time.Millisecond * 15) + acc.AssertContainsTaggedFields(t, "cpu_load_short", + map[string]interface{}{"value": float64(12)}, + map[string]string{"host": "server01"}, + ) + + // send multiple messages to socket + fmt.Fprintf(conn, testMsgs) + time.Sleep(time.Millisecond * 15) + hostTags := []string{"server02", "server03", + "server04", "server05", "server06"} + for _, hostTag := range hostTags { + acc.AssertContainsTaggedFields(t, "cpu_load_short", + map[string]interface{}{"value": float64(12)}, + map[string]string{"host": hostTag}, + ) + } +} + +// Test that MaxTCPConections is respected +func TestConcurrentConns(t *testing.T) { + listener := TcpListener{ + ServiceAddress: ":8195", + AllowedPendingMessages: 10000, + MaxTCPConnections: 2, + } + listener.parser, _ = parsers.NewInfluxParser() + + acc := &testutil.Accumulator{} + require.NoError(t, listener.Start(acc)) + defer listener.Stop() + + time.Sleep(time.Millisecond * 25) + _, err := net.Dial("tcp", "127.0.0.1:8195") + assert.NoError(t, err) + _, err = net.Dial("tcp", "127.0.0.1:8195") + assert.NoError(t, err) + + // Connection over the limit: + conn, err := net.Dial("tcp", "127.0.0.1:8195") + assert.NoError(t, err) + net.Dial("tcp", "127.0.0.1:8195") + buf := make([]byte, 1500) + n, err := conn.Read(buf) + assert.NoError(t, err) + assert.Equal(t, + "Telegraf maximum concurrent TCP connections (2) reached, closing.\n"+ + "You may want to increase max_tcp_connections in"+ + " the Telegraf tcp listener configuration.\n", + string(buf[:n])) + + _, err = conn.Write([]byte(testMsg)) + assert.NoError(t, err) + time.Sleep(time.Millisecond * 10) + assert.Zero(t, acc.NFields()) +} + +// Test that MaxTCPConections is respected when max==1 +func TestConcurrentConns1(t *testing.T) { + listener := TcpListener{ + ServiceAddress: ":8196", + AllowedPendingMessages: 10000, + MaxTCPConnections: 1, + } + listener.parser, _ = parsers.NewInfluxParser() + + acc := &testutil.Accumulator{} + require.NoError(t, listener.Start(acc)) + defer listener.Stop() + + time.Sleep(time.Millisecond * 25) + _, err := net.Dial("tcp", "127.0.0.1:8196") + assert.NoError(t, err) + + // Connection over the limit: + conn, err := net.Dial("tcp", "127.0.0.1:8196") + assert.NoError(t, err) + net.Dial("tcp", "127.0.0.1:8196") + buf := make([]byte, 1500) + n, err := conn.Read(buf) + assert.NoError(t, err) + assert.Equal(t, + "Telegraf maximum concurrent TCP connections (1) reached, closing.\n"+ + "You may want to increase max_tcp_connections in"+ + " the Telegraf tcp listener configuration.\n", + string(buf[:n])) + + _, err = conn.Write([]byte(testMsg)) + assert.NoError(t, err) + time.Sleep(time.Millisecond * 10) + assert.Zero(t, acc.NFields()) +} + +// Test that MaxTCPConections is respected +func TestCloseConcurrentConns(t *testing.T) { + listener := TcpListener{ + ServiceAddress: ":8195", + AllowedPendingMessages: 10000, + MaxTCPConnections: 2, + } + listener.parser, _ = parsers.NewInfluxParser() + + acc := &testutil.Accumulator{} + require.NoError(t, listener.Start(acc)) + + time.Sleep(time.Millisecond * 25) + _, err := net.Dial("tcp", "127.0.0.1:8195") + assert.NoError(t, err) + _, err = net.Dial("tcp", "127.0.0.1:8195") + assert.NoError(t, err) + + listener.Stop() +} + +func TestRunParser(t *testing.T) { + var testmsg = []byte(testMsg) + + listener, in := newTestTcpListener() + acc := testutil.Accumulator{} + listener.acc = &acc + defer close(listener.done) + + listener.parser, _ = parsers.NewInfluxParser() + listener.wg.Add(1) + go listener.tcpParser() + + in <- testmsg + time.Sleep(time.Millisecond * 25) + listener.Gather(&acc) + + if a := acc.NFields(); a != 1 { + t.Errorf("got %v, expected %v", a, 1) + } + + acc.AssertContainsTaggedFields(t, "cpu_load_short", + map[string]interface{}{"value": float64(12)}, + map[string]string{"host": "server01"}, + ) +} + +func TestRunParserInvalidMsg(t *testing.T) { + var testmsg = []byte("cpu_load_short") + + listener, in := newTestTcpListener() + acc := testutil.Accumulator{} + listener.acc = &acc + defer close(listener.done) + + listener.parser, _ = parsers.NewInfluxParser() + listener.wg.Add(1) + go listener.tcpParser() + + in <- testmsg + time.Sleep(time.Millisecond * 25) + + if a := acc.NFields(); a != 0 { + t.Errorf("got %v, expected %v", a, 0) + } +} + +func TestRunParserGraphiteMsg(t *testing.T) { + var testmsg = []byte("cpu.load.graphite 12 1454780029") + + listener, in := newTestTcpListener() + acc := testutil.Accumulator{} + listener.acc = &acc + defer close(listener.done) + + listener.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil) + listener.wg.Add(1) + go listener.tcpParser() + + in <- testmsg + time.Sleep(time.Millisecond * 25) + listener.Gather(&acc) + + acc.AssertContainsFields(t, "cpu_load_graphite", + map[string]interface{}{"value": float64(12)}) +} + +func TestRunParserJSONMsg(t *testing.T) { + var testmsg = []byte("{\"a\": 5, \"b\": {\"c\": 6}}\n") + + listener, in := newTestTcpListener() + acc := testutil.Accumulator{} + listener.acc = &acc + defer close(listener.done) + + listener.parser, _ = parsers.NewJSONParser("udp_json_test", []string{}, nil) + listener.wg.Add(1) + go listener.tcpParser() + + in <- testmsg + time.Sleep(time.Millisecond * 25) + listener.Gather(&acc) + + acc.AssertContainsFields(t, "udp_json_test", + map[string]interface{}{ + "a": float64(5), + "b_c": float64(6), + }) +} diff --git a/plugins/inputs/udp_listener/README.md b/plugins/inputs/udp_listener/README.md new file mode 100644 index 000000000..1dd03a2a7 --- /dev/null +++ b/plugins/inputs/udp_listener/README.md @@ -0,0 +1,92 @@ +# UDP listener service input plugin + +The UDP listener is a service input plugin that listens for messages on a UDP +socket and adds those messages to InfluxDB. +The plugin expects messages in the +[Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md). + +### Configuration: + +This is a sample configuration for the plugin. + +```toml +[[inputs.udp_listener]] + ## Address and port to host UDP listener on + service_address = ":8092" + + ## Number of UDP messages allowed to queue up. Once filled, the + ## UDP listener will start dropping packets. + allowed_pending_messages = 10000 + + ## UDP packet size for the server to listen for. This will depend + ## on the size of the packets that the client is sending, which is + ## usually 1500 bytes. + udp_packet_size = 1500 + + ## Data format to consume. + + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" +``` + +## A Note on UDP OS Buffer Sizes + +Some OSes (most notably, Linux) place very restricive limits on the performance +of UDP protocols. It is _highly_ recommended that you increase these OS limits to +at least 8MB before trying to run large amounts of UDP traffic to your instance. +8MB is just a recommendation, and can be adjusted higher. + +### Linux +Check the current UDP/IP receive buffer limit & default by typing the following +commands: + +``` +sysctl net.core.rmem_max +sysctl net.core.rmem_default +``` + +If the values are less than 8388608 bytes you should add the following lines to +the /etc/sysctl.conf file: + +``` +net.core.rmem_max=8388608 +net.core.rmem_default=8388608 +``` + +Changes to /etc/sysctl.conf do not take effect until reboot. +To update the values immediately, type the following commands as root: + +``` +sysctl -w net.core.rmem_max=8388608 +sysctl -w net.core.rmem_default=8388608 +``` + +### BSD/Darwin + +On BSD/Darwin systems you need to add about a 15% padding to the kernel limit +socket buffer. Meaning if you want an 8MB buffer (8388608 bytes) you need to set +the kernel limit to `8388608*1.15 = 9646900`. This is not documented anywhere but +happens +[in the kernel here.](https://github.com/freebsd/freebsd/blob/master/sys/kern/uipc_sockbuf.c#L63-L64) + +Check the current UDP/IP buffer limit by typing the following command: + +``` +sysctl kern.ipc.maxsockbuf +``` + +If the value is less than 9646900 bytes you should add the following lines +to the /etc/sysctl.conf file (create it if necessary): + +``` +kern.ipc.maxsockbuf=9646900 +``` + +Changes to /etc/sysctl.conf do not take effect until reboot. +To update the values immediately, type the following commands as root: + +``` +sysctl -w kern.ipc.maxsockbuf=9646900 +``` diff --git a/plugins/inputs/udp_listener/udp_listener.go b/plugins/inputs/udp_listener/udp_listener.go new file mode 100644 index 000000000..442cf98b3 --- /dev/null +++ b/plugins/inputs/udp_listener/udp_listener.go @@ -0,0 +1,162 @@ +package udp_listener + +import ( + "log" + "net" + "strings" + "sync" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" + "github.com/influxdata/telegraf/plugins/parsers" +) + +type UdpListener struct { + ServiceAddress string + UDPPacketSize int `toml:"udp_packet_size"` + AllowedPendingMessages int + + sync.Mutex + wg sync.WaitGroup + + in chan []byte + done chan struct{} + + parser parsers.Parser + + // Keep the accumulator in this struct + acc telegraf.Accumulator + + listener *net.UDPConn +} + +// UDP packet limit, see +// https://en.wikipedia.org/wiki/User_Datagram_Protocol#Packet_structure +const UDP_PACKET_SIZE int = 65507 + +var dropwarn = "ERROR: Message queue full. Discarding line [%s] " + + "You may want to increase allowed_pending_messages in the config\n" + +const sampleConfig = ` + ## Address and port to host UDP listener on + service_address = ":8092" + + ## Number of UDP messages allowed to queue up. Once filled, the + ## UDP listener will start dropping packets. + allowed_pending_messages = 10000 + + ## Data format to consume. + ## Each data format has it's own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" +` + +func (u *UdpListener) SampleConfig() string { + return sampleConfig +} + +func (u *UdpListener) Description() string { + return "Generic UDP listener" +} + +// All the work is done in the Start() function, so this is just a dummy +// function. +func (u *UdpListener) Gather(_ telegraf.Accumulator) error { + return nil +} + +func (u *UdpListener) SetParser(parser parsers.Parser) { + u.parser = parser +} + +func (u *UdpListener) Start(acc telegraf.Accumulator) error { + u.Lock() + defer u.Unlock() + + u.acc = acc + u.in = make(chan []byte, u.AllowedPendingMessages) + u.done = make(chan struct{}) + + u.wg.Add(2) + go u.udpListen() + go u.udpParser() + + log.Printf("Started UDP listener service on %s\n", u.ServiceAddress) + return nil +} + +func (u *UdpListener) Stop() { + close(u.done) + u.listener.Close() + u.wg.Wait() + close(u.in) + log.Println("Stopped UDP listener service on ", u.ServiceAddress) +} + +func (u *UdpListener) udpListen() error { + defer u.wg.Done() + var err error + address, _ := net.ResolveUDPAddr("udp", u.ServiceAddress) + u.listener, err = net.ListenUDP("udp", address) + if err != nil { + log.Fatalf("ERROR: ListenUDP - %s", err) + } + log.Println("UDP server listening on: ", u.listener.LocalAddr().String()) + + buf := make([]byte, u.UDPPacketSize) + for { + select { + case <-u.done: + return nil + default: + n, _, err := u.listener.ReadFromUDP(buf) + if err != nil && !strings.Contains(err.Error(), "closed network") { + log.Printf("ERROR: %s\n", err.Error()) + continue + } + + select { + case u.in <- buf[:n]: + default: + log.Printf(dropwarn, string(buf[:n])) + } + } + } +} + +func (u *UdpListener) udpParser() error { + defer u.wg.Done() + + var packet []byte + for { + select { + case <-u.done: + return nil + case packet = <-u.in: + metrics, err := u.parser.Parse(packet) + if err == nil { + u.storeMetrics(metrics) + } else { + log.Printf("Malformed packet: [%s], Error: %s\n", packet, err) + } + } + } +} + +func (u *UdpListener) storeMetrics(metrics []telegraf.Metric) error { + u.Lock() + defer u.Unlock() + for _, m := range metrics { + u.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) + } + return nil +} + +func init() { + inputs.Add("udp_listener", func() telegraf.Input { + return &UdpListener{ + UDPPacketSize: UDP_PACKET_SIZE, + } + }) +} diff --git a/plugins/inputs/udp_listener/udp_listener_test.go b/plugins/inputs/udp_listener/udp_listener_test.go new file mode 100644 index 000000000..bdbab318b --- /dev/null +++ b/plugins/inputs/udp_listener/udp_listener_test.go @@ -0,0 +1,116 @@ +package udp_listener + +import ( + "io/ioutil" + "log" + "testing" + "time" + + "github.com/influxdata/telegraf/plugins/parsers" + "github.com/influxdata/telegraf/testutil" +) + +func newTestUdpListener() (*UdpListener, chan []byte) { + in := make(chan []byte, 1500) + listener := &UdpListener{ + ServiceAddress: ":8125", + UDPPacketSize: 1500, + AllowedPendingMessages: 10000, + in: in, + done: make(chan struct{}), + } + return listener, in +} + +func TestRunParser(t *testing.T) { + log.SetOutput(ioutil.Discard) + var testmsg = []byte("cpu_load_short,host=server01 value=12.0 1422568543702900257") + + listener, in := newTestUdpListener() + acc := testutil.Accumulator{} + listener.acc = &acc + defer close(listener.done) + + listener.parser, _ = parsers.NewInfluxParser() + listener.wg.Add(1) + go listener.udpParser() + + in <- testmsg + time.Sleep(time.Millisecond * 25) + listener.Gather(&acc) + + if a := acc.NFields(); a != 1 { + t.Errorf("got %v, expected %v", a, 1) + } + + acc.AssertContainsTaggedFields(t, "cpu_load_short", + map[string]interface{}{"value": float64(12)}, + map[string]string{"host": "server01"}, + ) +} + +func TestRunParserInvalidMsg(t *testing.T) { + log.SetOutput(ioutil.Discard) + var testmsg = []byte("cpu_load_short") + + listener, in := newTestUdpListener() + acc := testutil.Accumulator{} + listener.acc = &acc + defer close(listener.done) + + listener.parser, _ = parsers.NewInfluxParser() + listener.wg.Add(1) + go listener.udpParser() + + in <- testmsg + time.Sleep(time.Millisecond * 25) + + if a := acc.NFields(); a != 0 { + t.Errorf("got %v, expected %v", a, 0) + } +} + +func TestRunParserGraphiteMsg(t *testing.T) { + log.SetOutput(ioutil.Discard) + var testmsg = []byte("cpu.load.graphite 12 1454780029") + + listener, in := newTestUdpListener() + acc := testutil.Accumulator{} + listener.acc = &acc + defer close(listener.done) + + listener.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil) + listener.wg.Add(1) + go listener.udpParser() + + in <- testmsg + time.Sleep(time.Millisecond * 25) + listener.Gather(&acc) + + acc.AssertContainsFields(t, "cpu_load_graphite", + map[string]interface{}{"value": float64(12)}) +} + +func TestRunParserJSONMsg(t *testing.T) { + log.SetOutput(ioutil.Discard) + var testmsg = []byte("{\"a\": 5, \"b\": {\"c\": 6}}\n") + + listener, in := newTestUdpListener() + acc := testutil.Accumulator{} + listener.acc = &acc + defer close(listener.done) + + listener.parser, _ = parsers.NewJSONParser("udp_json_test", []string{}, nil) + listener.wg.Add(1) + go listener.udpParser() + + in <- testmsg + time.Sleep(time.Millisecond * 25) + listener.Gather(&acc) + + acc.AssertContainsFields(t, "udp_json_test", + map[string]interface{}{ + "a": float64(5), + "b_c": float64(6), + }) +} diff --git a/plugins/inputs/zfs/zfs_test.go b/plugins/inputs/zfs/zfs_test.go index 514bad3d4..03179ba59 100644 --- a/plugins/inputs/zfs/zfs_test.go +++ b/plugins/inputs/zfs/zfs_test.go @@ -212,22 +212,22 @@ func TestZfsGeneratesMetrics(t *testing.T) { } z = &Zfs{KstatPath: testKstatPath} - acc = testutil.Accumulator{} - err = z.Gather(&acc) + acc2 := testutil.Accumulator{} + err = z.Gather(&acc2) require.NoError(t, err) - acc.AssertContainsTaggedFields(t, "zfs", intMetrics, tags) - acc.Metrics = nil + acc2.AssertContainsTaggedFields(t, "zfs", intMetrics, tags) + acc2.Metrics = nil intMetrics = getKstatMetricsArcOnly() //two pools, one metric z = &Zfs{KstatPath: testKstatPath, KstatMetrics: []string{"arcstats"}} - acc = testutil.Accumulator{} - err = z.Gather(&acc) + acc3 := testutil.Accumulator{} + err = z.Gather(&acc3) require.NoError(t, err) - acc.AssertContainsTaggedFields(t, "zfs", intMetrics, tags) + acc3.AssertContainsTaggedFields(t, "zfs", intMetrics, tags) err = os.RemoveAll(os.TempDir() + "/telegraf") require.NoError(t, err) diff --git a/plugins/inputs/zookeeper/zookeeper.go b/plugins/inputs/zookeeper/zookeeper.go index 0f2b2e06f..54defc56f 100644 --- a/plugins/inputs/zookeeper/zookeeper.go +++ b/plugins/inputs/zookeeper/zookeeper.go @@ -67,6 +67,9 @@ func (z *Zookeeper) gatherServer(address string, acc telegraf.Accumulator) error } defer c.Close() + // Extend connection + c.SetDeadline(time.Now().Add(defaultTimeout)) + fmt.Fprintf(c, "%s\n", "mntr") rdr := bufio.NewReader(c) scanner := bufio.NewScanner(rdr) diff --git a/plugins/outputs/amqp/amqp.go b/plugins/outputs/amqp/amqp.go index 948007117..bf9353d6e 100644 --- a/plugins/outputs/amqp/amqp.go +++ b/plugins/outputs/amqp/amqp.go @@ -4,6 +4,7 @@ import ( "bytes" "fmt" "log" + "strings" "sync" "time" @@ -20,6 +21,8 @@ type AMQP struct { URL string // AMQP exchange Exchange string + // AMQP Auth method + AuthMethod string // Routing Key Tag RoutingTag string `toml:"routing_tag"` // InfluxDB database @@ -45,7 +48,17 @@ type AMQP struct { serializer serializers.Serializer } +type externalAuth struct{} + +func (a *externalAuth) Mechanism() string { + return "EXTERNAL" +} +func (a *externalAuth) Response() string { + return fmt.Sprintf("\000") +} + const ( + DefaultAuthMethod = "PLAIN" DefaultRetentionPolicy = "default" DefaultDatabase = "telegraf" DefaultPrecision = "s" @@ -56,6 +69,8 @@ var sampleConfig = ` url = "amqp://localhost:5672/influxdb" ## AMQP exchange exchange = "telegraf" + ## Auth method. PLAIN and EXTERNAL are supported + # auth_method = "PLAIN" ## Telegraf tag to use as a routing key ## ie, if this tag exists, it's value will be used as the routing key routing_tag = "host" @@ -74,7 +89,7 @@ var sampleConfig = ` ## Use SSL but skip chain & host verification # insecure_skip_verify = false - ## Data format to output. This can be "influx" or "graphite" + ## Data format to output. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md @@ -103,11 +118,19 @@ func (q *AMQP) Connect() error { return err } - if tls != nil { - connection, err = amqp.DialTLS(q.URL, tls) - } else { - connection, err = amqp.Dial(q.URL) + // parse auth method + var sasl []amqp.Authentication // nil by default + + if strings.ToUpper(q.AuthMethod) == "EXTERNAL" { + sasl = []amqp.Authentication{&externalAuth{}} } + + amqpConf := amqp.Config{ + TLSClientConfig: tls, + SASL: sasl, // if nil, it will be PLAIN + } + + connection, err = amqp.DialConfig(q.URL, amqpConf) if err != nil { return err } @@ -200,6 +223,7 @@ func (q *AMQP) Write(metrics []telegraf.Metric) error { func init() { outputs.Add("amqp", func() telegraf.Output { return &AMQP{ + AuthMethod: DefaultAuthMethod, Database: DefaultDatabase, Precision: DefaultPrecision, RetentionPolicy: DefaultRetentionPolicy, diff --git a/plugins/outputs/datadog/datadog.go b/plugins/outputs/datadog/datadog.go index 5d6fab165..56fdc38e4 100644 --- a/plugins/outputs/datadog/datadog.go +++ b/plugins/outputs/datadog/datadog.go @@ -139,6 +139,9 @@ func (d *Datadog) authenticatedUrl() string { func buildMetrics(m telegraf.Metric) (map[string]Point, error) { ms := make(map[string]Point) for k, v := range m.Fields() { + if !verifyValue(v) { + continue + } var p Point if err := p.setValue(v); err != nil { return ms, fmt.Errorf("unable to extract value from Fields, %s", err.Error()) @@ -160,6 +163,14 @@ func buildTags(mTags map[string]string) []string { return tags } +func verifyValue(v interface{}) bool { + switch v.(type) { + case string: + return false + } + return true +} + func (p *Point) setValue(v interface{}) error { switch d := v.(type) { case int: diff --git a/plugins/outputs/datadog/datadog_test.go b/plugins/outputs/datadog/datadog_test.go index 30495a044..2d3095be1 100644 --- a/plugins/outputs/datadog/datadog_test.go +++ b/plugins/outputs/datadog/datadog_test.go @@ -152,14 +152,6 @@ func TestBuildPoint(t *testing.T) { }, nil, }, - { - testutil.TestMetric("11234.5", "test7"), - Point{ - float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), - 11234.5, - }, - fmt.Errorf("unable to extract value from Fields, undeterminable type"), - }, } for _, tt := range tagtests { pt, err := buildMetrics(tt.ptIn) @@ -175,3 +167,25 @@ func TestBuildPoint(t *testing.T) { } } } + +func TestVerifyValue(t *testing.T) { + var tagtests = []struct { + ptIn telegraf.Metric + validMetric bool + }{ + { + testutil.TestMetric(float32(11234.5), "test1"), + true, + }, + { + testutil.TestMetric("11234.5", "test2"), + false, + }, + } + for _, tt := range tagtests { + ok := verifyValue(tt.ptIn.Fields()["value"]) + if tt.validMetric != ok { + t.Errorf("%s: verification failed\n", tt.ptIn.Name()) + } + } +} diff --git a/plugins/outputs/file/file.go b/plugins/outputs/file/file.go index 743c0f03f..1d47642b2 100644 --- a/plugins/outputs/file/file.go +++ b/plugins/outputs/file/file.go @@ -23,7 +23,7 @@ var sampleConfig = ` ## Files to write to, "stdout" is a specially handled file. files = ["stdout", "/tmp/metrics.out"] - ## Data format to output. This can be "influx" or "graphite" + ## Data format to output. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md diff --git a/plugins/outputs/influxdb/README.md b/plugins/outputs/influxdb/README.md index f9a8f7217..cfa960b37 100644 --- a/plugins/outputs/influxdb/README.md +++ b/plugins/outputs/influxdb/README.md @@ -2,7 +2,7 @@ This plugin writes to [InfluxDB](https://www.influxdb.com) via HTTP or UDP. -Required parameters: +### Required parameters: * `urls`: List of strings, this is for InfluxDB clustering support. On each flush interval, Telegraf will randomly choose one of the urls @@ -10,3 +10,17 @@ to write to. Each URL should start with either `http://` or `udp://` * `database`: The name of the database to write to. +### Optional parameters: + +* `retention_policy`: Retention policy to write to. +* `precision`: Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h". note: using "s" precision greatly improves InfluxDB compression. +* `timeout`: Write timeout (for the InfluxDB client), formatted as a string. If not provided, will default to 5s. 0s means no timeout (not recommended). +* `username`: Username for influxdb +* `password`: Password for influxdb +* `user_agent`: Set the user agent for HTTP POSTs (can be useful for log differentiation) +* `udp_payload`: Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) + ## Optional SSL Config +* `ssl_ca`: SSL CA +* `ssl_cert`: SSL CERT +* `ssl_key`: SSL key +* `insecure_skip_verify`: Use SSL but skip chain & host verification (default: false) diff --git a/plugins/outputs/influxdb/influxdb.go b/plugins/outputs/influxdb/influxdb.go index 60d235511..626635a3b 100644 --- a/plugins/outputs/influxdb/influxdb.go +++ b/plugins/outputs/influxdb/influxdb.go @@ -18,15 +18,16 @@ import ( type InfluxDB struct { // URL is only for backwards compatability - URL string - URLs []string `toml:"urls"` - Username string - Password string - Database string - UserAgent string - Precision string - Timeout internal.Duration - UDPPayload int `toml:"udp_payload"` + URL string + URLs []string `toml:"urls"` + Username string + Password string + Database string + UserAgent string + Precision string + RetentionPolicy string + Timeout internal.Duration + UDPPayload int `toml:"udp_payload"` // Path to CA file SSLCA string `toml:"ssl_ca"` @@ -46,10 +47,12 @@ var sampleConfig = ` ## this means that only ONE of the urls will be written to each interval. # urls = ["udp://localhost:8089"] # UDP endpoint example urls = ["http://localhost:8086"] # required - ## The target database for metrics (telegraf will create it if not exists) + ## The target database for metrics (telegraf will create it if not exists). database = "telegraf" # required + ## Retention policy to write to. + retention_policy = "default" ## Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h". - ## note: using "s" precision greatly improves InfluxDB compression + ## note: using "s" precision greatly improves InfluxDB compression. precision = "s" ## Write timeout (for the InfluxDB client), formatted as a string. @@ -124,11 +127,12 @@ func (i *InfluxDB) Connect() error { // Create Database if it doesn't exist _, e := c.Query(client.Query{ - Command: fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", i.Database), + Command: fmt.Sprintf("CREATE DATABASE IF NOT EXISTS \"%s\"", i.Database), }) if e != nil { log.Println("Database creation failed: " + e.Error()) + continue } conns = append(conns, c) @@ -156,9 +160,16 @@ func (i *InfluxDB) Description() string { // Choose a random server in the cluster to write to until a successful write // occurs, logging each unsuccessful. If all servers fail, return error. func (i *InfluxDB) Write(metrics []telegraf.Metric) error { + if len(i.conns) == 0 { + err := i.Connect() + if err != nil { + return err + } + } bp, err := client.NewBatchPoints(client.BatchPointsConfig{ - Database: i.Database, - Precision: i.Precision, + Database: i.Database, + Precision: i.Precision, + RetentionPolicy: i.RetentionPolicy, }) if err != nil { return err @@ -180,6 +191,12 @@ func (i *InfluxDB) Write(metrics []telegraf.Metric) error { break } } + + // If all of the writes failed, create a new connection array so that + // i.Connect() will be called on the next gather. + if err != nil { + i.conns = make([]client.Client, 0) + } return err } diff --git a/plugins/outputs/kafka/kafka.go b/plugins/outputs/kafka/kafka.go index 8dea2b2a1..1fafa1353 100644 --- a/plugins/outputs/kafka/kafka.go +++ b/plugins/outputs/kafka/kafka.go @@ -19,6 +19,12 @@ type Kafka struct { Topic string // Routing Key Tag RoutingTag string `toml:"routing_tag"` + // Compression Codec Tag + CompressionCodec int + // RequiredAcks Tag + RequiredAcks int + // MaxRetry Tag + MaxRetry int // Legacy SSL config options // TLS client certificate @@ -53,6 +59,32 @@ var sampleConfig = ` ## ie, if this tag exists, it's value will be used as the routing key routing_tag = "host" + ## CompressionCodec represents the various compression codecs recognized by + ## Kafka in messages. + ## 0 : No compression + ## 1 : Gzip compression + ## 2 : Snappy compression + compression_codec = 0 + + ## RequiredAcks is used in Produce Requests to tell the broker how many + ## replica acknowledgements it must see before responding + ## 0 : the producer never waits for an acknowledgement from the broker. + ## This option provides the lowest latency but the weakest durability + ## guarantees (some data will be lost when a server fails). + ## 1 : the producer gets an acknowledgement after the leader replica has + ## received the data. This option provides better durability as the + ## client waits until the server acknowledges the request as successful + ## (only messages that were written to the now-dead leader but not yet + ## replicated will be lost). + ## -1: the producer gets an acknowledgement after all in-sync replicas have + ## received the data. This option provides the best durability, we + ## guarantee that no messages will be lost as long as at least one in + ## sync replica remains. + required_acks = -1 + + ## The total number of times to retry sending a message + max_retry = 3 + ## Optional SSL Config # ssl_ca = "/etc/telegraf/ca.pem" # ssl_cert = "/etc/telegraf/cert.pem" @@ -60,7 +92,7 @@ var sampleConfig = ` ## Use SSL but skip chain & host verification # insecure_skip_verify = false - ## Data format to output. This can be "influx" or "graphite" + ## Data format to output. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md @@ -73,10 +105,10 @@ func (k *Kafka) SetSerializer(serializer serializers.Serializer) { func (k *Kafka) Connect() error { config := sarama.NewConfig() - // Wait for all in-sync replicas to ack the message - config.Producer.RequiredAcks = sarama.WaitForAll - // Retry up to 10 times to produce the message - config.Producer.Retry.Max = 10 + + config.Producer.RequiredAcks = sarama.RequiredAcks(k.RequiredAcks) + config.Producer.Compression = sarama.CompressionCodec(k.CompressionCodec) + config.Producer.Retry.Max = k.MaxRetry // Legacy support ssl config if k.Certificate != "" { diff --git a/plugins/outputs/librato/librato.go b/plugins/outputs/librato/librato.go index 3897e0b4f..f0f03400e 100644 --- a/plugins/outputs/librato/librato.go +++ b/plugins/outputs/librato/librato.go @@ -4,19 +4,23 @@ import ( "bytes" "encoding/json" "fmt" + "io/ioutil" "log" "net/http" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/outputs" + "github.com/influxdata/telegraf/plugins/serializers/graphite" ) type Librato struct { - ApiUser string - ApiToken string - SourceTag string - Timeout internal.Duration + ApiUser string + ApiToken string + Debug bool + NameFromTags bool + SourceTag string + Timeout internal.Duration apiUrl string client *http.Client @@ -32,9 +36,12 @@ var sampleConfig = ` ## Librato API token api_token = "my-secret-token" # required. - ## Tag Field to populate source attribute (optional) - ## This is typically the _hostname_ from which the metric was obtained. - source_tag = "hostname" + ### Debug + # debug = false + + ### Tag Field to populate source attribute (optional) + ### This is typically the _hostname_ from which the metric was obtained. + source_tag = "host" ## Connection timeout. # timeout = "5s" @@ -82,17 +89,27 @@ func (l *Librato) Write(metrics []telegraf.Metric) error { for _, gauge := range gauges { tempGauges = append(tempGauges, gauge) metricCounter++ + if l.Debug { + log.Printf("[DEBUG] Got a gauge: %v\n", gauge) + } } } else { log.Printf("unable to build Gauge for %s, skipping\n", m.Name()) + if l.Debug { + log.Printf("[DEBUG] Couldn't build gauge: %v\n", err) + } } } lmetrics.Gauges = make([]*Gauge, metricCounter) copy(lmetrics.Gauges, tempGauges[0:]) - metricsBytes, err := json.Marshal(metrics) + metricsBytes, err := json.Marshal(lmetrics) if err != nil { return fmt.Errorf("unable to marshal Metrics, %s\n", err.Error()) + } else { + if l.Debug { + log.Printf("[DEBUG] Librato request: %v\n", string(metricsBytes)) + } } req, err := http.NewRequest("POST", l.apiUrl, bytes.NewBuffer(metricsBytes)) if err != nil { @@ -103,8 +120,21 @@ func (l *Librato) Write(metrics []telegraf.Metric) error { resp, err := l.client.Do(req) if err != nil { + if l.Debug { + log.Printf("[DEBUG] Error POSTing metrics: %v\n", err.Error()) + } return fmt.Errorf("error POSTing metrics, %s\n", err.Error()) + } else { + if l.Debug { + htmlData, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.Printf("[DEBUG] Couldn't get response! (%v)\n", err) + } else { + log.Printf("[DEBUG] Librato response: %v\n", string(htmlData)) + } + } } + defer resp.Body.Close() if resp.StatusCode != 200 { @@ -122,13 +152,22 @@ func (l *Librato) Description() string { return "Configuration for Librato API to send metrics to." } +func (l *Librato) buildGaugeName(m telegraf.Metric, fieldName string) string { + // Use the GraphiteSerializer + graphiteSerializer := graphite.GraphiteSerializer{} + return graphiteSerializer.SerializeBucketName(m, fieldName) +} + func (l *Librato) buildGauges(m telegraf.Metric) ([]*Gauge, error) { gauges := []*Gauge{} for fieldName, value := range m.Fields() { gauge := &Gauge{ - Name: m.Name() + "_" + fieldName, + Name: l.buildGaugeName(m, fieldName), MeasureTime: m.Time().Unix(), } + if !gauge.verifyValue(value) { + continue + } if err := gauge.setValue(value); err != nil { return gauges, fmt.Errorf("unable to extract value from Fields, %s\n", err.Error()) @@ -142,10 +181,22 @@ func (l *Librato) buildGauges(m telegraf.Metric) ([]*Gauge, error) { l.SourceTag) } } + gauges = append(gauges, gauge) + } + if l.Debug { + fmt.Printf("[DEBUG] Built gauges: %v\n", gauges) } return gauges, nil } +func (g *Gauge) verifyValue(v interface{}) bool { + switch v.(type) { + case string: + return false + } + return true +} + func (g *Gauge) setValue(v interface{}) error { switch d := v.(type) { case int: diff --git a/plugins/outputs/librato/librato_test.go b/plugins/outputs/librato/librato_test.go index c0b6ba021..3aa5b8748 100644 --- a/plugins/outputs/librato/librato_test.go +++ b/plugins/outputs/librato/librato_test.go @@ -9,9 +9,9 @@ import ( "testing" "time" - "github.com/influxdata/telegraf/testutil" - "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/serializers/graphite" + "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/require" ) @@ -28,6 +28,14 @@ func fakeLibrato() *Librato { return l } +func BuildTags(t *testing.T) { + testMetric := testutil.TestMetric(0.0, "test1") + graphiteSerializer := graphite.GraphiteSerializer{} + tags, err := graphiteSerializer.Serialize(testMetric) + fmt.Printf("Tags: %v", tags) + require.NoError(t, err) +} + func TestUriOverride(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) @@ -78,7 +86,7 @@ func TestBuildGauge(t *testing.T) { { testutil.TestMetric(0.0, "test1"), &Gauge{ - Name: "test1", + Name: "value1.test1.value", MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), Value: 0.0, }, @@ -87,7 +95,7 @@ func TestBuildGauge(t *testing.T) { { testutil.TestMetric(1.0, "test2"), &Gauge{ - Name: "test2", + Name: "value1.test2.value", MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), Value: 1.0, }, @@ -96,7 +104,7 @@ func TestBuildGauge(t *testing.T) { { testutil.TestMetric(10, "test3"), &Gauge{ - Name: "test3", + Name: "value1.test3.value", MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), Value: 10.0, }, @@ -105,7 +113,7 @@ func TestBuildGauge(t *testing.T) { { testutil.TestMetric(int32(112345), "test4"), &Gauge{ - Name: "test4", + Name: "value1.test4.value", MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), Value: 112345.0, }, @@ -114,7 +122,7 @@ func TestBuildGauge(t *testing.T) { { testutil.TestMetric(int64(112345), "test5"), &Gauge{ - Name: "test5", + Name: "value1.test5.value", MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), Value: 112345.0, }, @@ -123,7 +131,7 @@ func TestBuildGauge(t *testing.T) { { testutil.TestMetric(float32(11234.5), "test6"), &Gauge{ - Name: "test6", + Name: "value1.test6.value", MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), Value: 11234.5, }, @@ -131,12 +139,8 @@ func TestBuildGauge(t *testing.T) { }, { testutil.TestMetric("11234.5", "test7"), - &Gauge{ - Name: "test7", - MeasureTime: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), - Value: 11234.5, - }, - fmt.Errorf("unable to extract value from Fields, undeterminable type"), + nil, + nil, }, } @@ -150,6 +154,9 @@ func TestBuildGauge(t *testing.T) { t.Errorf("%s: expected an error (%s) but none returned", gt.ptIn.Name(), gt.err.Error()) } + if len(gauges) != 0 && gt.outGauge == nil { + t.Errorf("%s: unexpected gauge, %+v\n", gt.ptIn.Name(), gt.outGauge) + } if len(gauges) == 0 { continue } @@ -163,13 +170,13 @@ func TestBuildGauge(t *testing.T) { func TestBuildGaugeWithSource(t *testing.T) { pt1, _ := telegraf.NewMetric( "test1", - map[string]string{"hostname": "192.168.0.1"}, + map[string]string{"hostname": "192.168.0.1", "tag1": "value1"}, map[string]interface{}{"value": 0.0}, time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC), ) pt2, _ := telegraf.NewMetric( "test2", - map[string]string{"hostnam": "192.168.0.1"}, + map[string]string{"hostnam": "192.168.0.1", "tag1": "value1"}, map[string]interface{}{"value": 1.0}, time.Date(2010, time.December, 10, 23, 0, 0, 0, time.UTC), ) @@ -182,7 +189,7 @@ func TestBuildGaugeWithSource(t *testing.T) { { pt1, &Gauge{ - Name: "test1", + Name: "192_168_0_1.value1.test1.value", MeasureTime: time.Date(2010, time.November, 10, 23, 0, 0, 0, time.UTC).Unix(), Value: 0.0, Source: "192.168.0.1", @@ -192,7 +199,7 @@ func TestBuildGaugeWithSource(t *testing.T) { { pt2, &Gauge{ - Name: "test2", + Name: "192_168_0_1.value1.test1.value", MeasureTime: time.Date(2010, time.December, 10, 23, 0, 0, 0, time.UTC).Unix(), Value: 1.0, }, diff --git a/plugins/outputs/mqtt/mqtt.go b/plugins/outputs/mqtt/mqtt.go index 6f8abe954..c57ee8cd0 100644 --- a/plugins/outputs/mqtt/mqtt.go +++ b/plugins/outputs/mqtt/mqtt.go @@ -10,7 +10,7 @@ import ( "github.com/influxdata/telegraf/plugins/outputs" "github.com/influxdata/telegraf/plugins/serializers" - paho "git.eclipse.org/gitroot/paho/org.eclipse.paho.mqtt.golang.git" + paho "github.com/eclipse/paho.mqtt.golang" ) var sampleConfig = ` @@ -32,7 +32,7 @@ var sampleConfig = ` ## Use SSL but skip chain & host verification # insecure_skip_verify = false - ## Data format to output. This can be "influx" or "graphite" + ## Data format to output. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md @@ -57,7 +57,7 @@ type MQTT struct { // Use SSL but skip chain & host verification InsecureSkipVerify bool - client *paho.Client + client paho.Client opts *paho.ClientOptions serializer serializers.Serializer @@ -172,7 +172,7 @@ func (m *MQTT) createOpts() (*paho.ClientOptions, error) { } user := m.Username - if user == "" { + if user != "" { opts.SetUsername(user) } password := m.Password diff --git a/plugins/outputs/nsq/nsq.go b/plugins/outputs/nsq/nsq.go index 75b998484..fd4053222 100644 --- a/plugins/outputs/nsq/nsq.go +++ b/plugins/outputs/nsq/nsq.go @@ -24,7 +24,7 @@ var sampleConfig = ` ## NSQ topic for producer messages topic = "telegraf" - ## Data format to output. This can be "influx" or "graphite" + ## Data format to output. ## Each data format has it's own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md diff --git a/plugins/outputs/prometheus_client/prometheus_client.go b/plugins/outputs/prometheus_client/prometheus_client.go index df546c192..d5e3f1ced 100644 --- a/plugins/outputs/prometheus_client/prometheus_client.go +++ b/plugins/outputs/prometheus_client/prometheus_client.go @@ -4,12 +4,26 @@ import ( "fmt" "log" "net/http" + "regexp" + "strings" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/outputs" "github.com/prometheus/client_golang/prometheus" ) +var ( + sanitizedChars = strings.NewReplacer("/", "_", "@", "_", " ", "_", "-", "_", ".", "_") + + // Prometheus metric names must match this regex + // see https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels + metricName = regexp.MustCompile("^[a-zA-Z_:][a-zA-Z0-9_:]*$") + + // Prometheus labels must match this regex + // see https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels + labelName = regexp.MustCompile("^[a-zA-Z_][a-zA-Z0-9_]*$") +) + type PrometheusClient struct { Listen string metrics map[string]*prometheus.UntypedVec @@ -64,54 +78,82 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { } for _, point := range metrics { - var labels []string key := point.Name() + key = sanitizedChars.Replace(key) - for k, _ := range point.Tags() { - if len(k) > 0 { - labels = append(labels, k) - } - } - - if _, ok := p.metrics[key]; !ok { - p.metrics[key] = prometheus.NewUntypedVec( - prometheus.UntypedOpts{ - Name: key, - Help: fmt.Sprintf("Telegraf collected point '%s'", key), - }, - labels, - ) - prometheus.MustRegister(p.metrics[key]) - } - + var labels []string l := prometheus.Labels{} - for tk, tv := range point.Tags() { - l[tk] = tv + for k, v := range point.Tags() { + k = sanitizedChars.Replace(k) + if len(k) == 0 { + continue + } + if !labelName.MatchString(k) { + continue + } + labels = append(labels, k) + l[k] = v } - for _, val := range point.Fields() { + for n, val := range point.Fields() { + // Ignore string and bool fields. + switch val.(type) { + case string: + continue + case bool: + continue + } + + // sanitize the measurement name + n = sanitizedChars.Replace(n) + var mname string + if n == "value" { + mname = key + } else { + mname = fmt.Sprintf("%s_%s", key, n) + } + + // verify that it is a valid measurement name + if !metricName.MatchString(mname) { + continue + } + + // Create a new metric if it hasn't been created yet. + if _, ok := p.metrics[mname]; !ok { + p.metrics[mname] = prometheus.NewUntypedVec( + prometheus.UntypedOpts{ + Name: mname, + Help: "Telegraf collected metric", + }, + labels, + ) + if err := prometheus.Register(p.metrics[mname]); err != nil { + log.Printf("prometheus_client: Metric failed to register with prometheus, %s", err) + continue + } + } + switch val := val.(type) { - default: - log.Printf("Prometheus output, unsupported type. key: %s, type: %T\n", - key, val) case int64: - m, err := p.metrics[key].GetMetricWith(l) + m, err := p.metrics[mname].GetMetricWith(l) if err != nil { log.Printf("ERROR Getting metric in Prometheus output, "+ "key: %s, labels: %v,\nerr: %s\n", - key, l, err.Error()) + mname, l, err.Error()) continue } m.Set(float64(val)) case float64: - m, err := p.metrics[key].GetMetricWith(l) + m, err := p.metrics[mname].GetMetricWith(l) if err != nil { log.Printf("ERROR Getting metric in Prometheus output, "+ "key: %s, labels: %v,\nerr: %s\n", - key, l, err.Error()) + mname, l, err.Error()) continue } m.Set(val) + default: + continue } } } diff --git a/plugins/outputs/prometheus_client/prometheus_client_test.go b/plugins/outputs/prometheus_client/prometheus_client_test.go index 16414a8e4..15ed7b7e4 100644 --- a/plugins/outputs/prometheus_client/prometheus_client_test.go +++ b/plugins/outputs/prometheus_client/prometheus_client_test.go @@ -54,7 +54,7 @@ func TestPrometheusWritePointEmptyTag(t *testing.T) { require.NoError(t, p.Gather(&acc)) for _, e := range expected { - acc.AssertContainsFields(t, "prometheus_"+e.name, + acc.AssertContainsFields(t, e.name, map[string]interface{}{"value": e.value}) } @@ -84,7 +84,7 @@ func TestPrometheusWritePointEmptyTag(t *testing.T) { require.NoError(t, p.Gather(&acc)) for _, e := range expected2 { - acc.AssertContainsFields(t, "prometheus_"+e.name, + acc.AssertContainsFields(t, e.name, map[string]interface{}{"value": e.value}) } } diff --git a/plugins/parsers/graphite/parser.go b/plugins/parsers/graphite/parser.go index 5e8815064..8c31cd760 100644 --- a/plugins/parsers/graphite/parser.go +++ b/plugins/parsers/graphite/parser.go @@ -231,6 +231,7 @@ func (p *GraphiteParser) ApplyTemplate(line string) (string, map[string]string, type template struct { tags []string defaultTags map[string]string + greedyField bool greedyMeasurement bool separator string } @@ -248,6 +249,8 @@ func NewTemplate(pattern string, defaultTags map[string]string, separator string } if tag == "measurement*" { template.greedyMeasurement = true + } else if tag == "field*" { + template.greedyField = true } } @@ -265,7 +268,7 @@ func (t *template) Apply(line string) (string, map[string]string, string, error) var ( measurement []string tags = make(map[string]string) - field string + field []string ) // Set any default tags @@ -273,6 +276,18 @@ func (t *template) Apply(line string) (string, map[string]string, string, error) tags[k] = v } + // See if an invalid combination has been specified in the template: + for _, tag := range t.tags { + if tag == "measurement*" { + t.greedyMeasurement = true + } else if tag == "field*" { + t.greedyField = true + } + } + if t.greedyField && t.greedyMeasurement { + return "", nil, "", fmt.Errorf("either 'field*' or 'measurement*' can be used in each template (but not both together): %q", strings.Join(t.tags, t.separator)) + } + for i, tag := range t.tags { if i >= len(fields) { continue @@ -281,10 +296,10 @@ func (t *template) Apply(line string) (string, map[string]string, string, error) if tag == "measurement" { measurement = append(measurement, fields[i]) } else if tag == "field" { - if len(field) != 0 { - return "", nil, "", fmt.Errorf("'field' can only be used once in each template: %q", line) - } - field = fields[i] + field = append(field, fields[i]) + } else if tag == "field*" { + field = append(field, fields[i:]...) + break } else if tag == "measurement*" { measurement = append(measurement, fields[i:]...) break @@ -293,7 +308,7 @@ func (t *template) Apply(line string) (string, map[string]string, string, error) } } - return strings.Join(measurement, t.separator), tags, field, nil + return strings.Join(measurement, t.separator), tags, strings.Join(field, t.separator), nil } // matcher determines which template should be applied to a given metric diff --git a/plugins/parsers/graphite/parser_test.go b/plugins/parsers/graphite/parser_test.go index ccf478c7a..5200cfbdd 100644 --- a/plugins/parsers/graphite/parser_test.go +++ b/plugins/parsers/graphite/parser_test.go @@ -94,6 +94,20 @@ func TestTemplateApply(t *testing.T) { measurement: "cpu.load", tags: map[string]string{"zone": "us-west"}, }, + { + test: "conjoined fields", + input: "prod.us-west.server01.cpu.util.idle.percent", + template: "env.zone.host.measurement.measurement.field*", + measurement: "cpu.util", + tags: map[string]string{"env": "prod", "zone": "us-west", "host": "server01"}, + }, + { + test: "multiple fields", + input: "prod.us-west.server01.cpu.util.idle.percent.free", + template: "env.zone.host.measurement.measurement.field.field.reading", + measurement: "cpu.util", + tags: map[string]string{"env": "prod", "zone": "us-west", "host": "server01", "reading": "free"}, + }, } for _, test := range tests { @@ -187,6 +201,12 @@ func TestParse(t *testing.T) { template: "measurement", err: `field "cpu" time: strconv.ParseFloat: parsing "14199724z57825": invalid syntax`, }, + { + test: "measurement* and field* (invalid)", + input: `prod.us-west.server01.cpu.util.idle.percent 99.99 1419972457825`, + template: "env.zone.host.measurement*.field*", + err: `either 'field*' or 'measurement*' can be used in each template (but not both together): "env.zone.host.measurement*.field*"`, + }, } for _, test := range tests { @@ -574,15 +594,48 @@ func TestApplyTemplateField(t *testing.T) { } } -func TestApplyTemplateFieldError(t *testing.T) { +func TestApplyTemplateMultipleFieldsTogether(t *testing.T) { p, err := NewGraphiteParser("_", - []string{"current.* measurement.field.field"}, nil) + []string{"current.* measurement.measurement.field.field"}, nil) assert.NoError(t, err) - _, _, _, err = p.ApplyTemplate("current.users.logged_in") - if err == nil { - t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s", err, - "'field' can only be used once in each template: current.users.logged_in") + measurement, _, field, err := p.ApplyTemplate("current.users.logged_in.ssh") + + assert.Equal(t, "current_users", measurement) + + if field != "logged_in_ssh" { + t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s", + field, "logged_in_ssh") + } +} + +func TestApplyTemplateMultipleFieldsApart(t *testing.T) { + p, err := NewGraphiteParser("_", + []string{"current.* measurement.measurement.field.method.field"}, nil) + assert.NoError(t, err) + + measurement, _, field, err := p.ApplyTemplate("current.users.logged_in.ssh.total") + + assert.Equal(t, "current_users", measurement) + + if field != "logged_in_total" { + t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s", + field, "logged_in_total") + } +} + +func TestApplyTemplateGreedyField(t *testing.T) { + p, err := NewGraphiteParser("_", + []string{"current.* measurement.measurement.field*"}, nil) + assert.NoError(t, err) + + measurement, _, field, err := p.ApplyTemplate("current.users.logged_in") + + assert.Equal(t, "current_users", measurement) + + if field != "logged_in" { + t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s", + field, "logged_in") } } diff --git a/plugins/parsers/nagios/parser.go b/plugins/parsers/nagios/parser.go new file mode 100644 index 000000000..305c3af11 --- /dev/null +++ b/plugins/parsers/nagios/parser.go @@ -0,0 +1,102 @@ +package nagios + +import ( + "regexp" + "strings" + "time" + + "github.com/influxdata/telegraf" +) + +type NagiosParser struct { + MetricName string + DefaultTags map[string]string +} + +// Got from Alignak +// https://github.com/Alignak-monitoring/alignak/blob/develop/alignak/misc/perfdata.py +var perfSplitRegExp, _ = regexp.Compile(`([^=]+=\S+)`) +var nagiosRegExp, _ = regexp.Compile(`^([^=]+)=([\d\.\-\+eE]+)([\w\/%]*);?([\d\.\-\+eE:~@]+)?;?([\d\.\-\+eE:~@]+)?;?([\d\.\-\+eE]+)?;?([\d\.\-\+eE]+)?;?\s*`) + +func (p *NagiosParser) ParseLine(line string) (telegraf.Metric, error) { + metrics, err := p.Parse([]byte(line)) + return metrics[0], err +} + +func (p *NagiosParser) SetDefaultTags(tags map[string]string) { + p.DefaultTags = tags +} + +//> rta,host=absol,unit=ms critical=6000,min=0,value=0.332,warning=4000 1456374625003628099 +//> pl,host=absol,unit=% critical=90,min=0,value=0,warning=80 1456374625003693967 + +func (p *NagiosParser) Parse(buf []byte) ([]telegraf.Metric, error) { + metrics := make([]telegraf.Metric, 0) + // Convert to string + out := string(buf) + // Prepare output for splitting + // Delete escaped pipes + out = strings.Replace(out, `\|`, "___PROTECT_PIPE___", -1) + // Split lines and get the first one + lines := strings.Split(out, "\n") + // Split output and perfdatas + data_splitted := strings.Split(lines[0], "|") + if len(data_splitted) <= 1 { + // No pipe == no perf data + return nil, nil + } + // Get perfdatas + perfdatas := data_splitted[1] + // Add escaped pipes + perfdatas = strings.Replace(perfdatas, "___PROTECT_PIPE___", `\|`, -1) + // Split perfs + unParsedPerfs := perfSplitRegExp.FindAllSubmatch([]byte(perfdatas), -1) + // Iterate on all perfs + for _, unParsedPerfs := range unParsedPerfs { + // Get metrics + // Trim perf + trimedPerf := strings.Trim(string(unParsedPerfs[0]), " ") + // Parse perf + perf := nagiosRegExp.FindAllSubmatch([]byte(trimedPerf), -1) + // Bad string + if len(perf) == 0 { + continue + } + if len(perf[0]) <= 2 { + continue + } + if perf[0][1] == nil || perf[0][2] == nil { + continue + } + fieldName := string(perf[0][1]) + tags := make(map[string]string) + if perf[0][3] != nil { + tags["unit"] = string(perf[0][3]) + } + fields := make(map[string]interface{}) + fields["value"] = perf[0][2] + // TODO should we set empty field + // if metric if there is no data ? + if perf[0][4] != nil { + fields["warning"] = perf[0][4] + } + if perf[0][5] != nil { + fields["critical"] = perf[0][5] + } + if perf[0][6] != nil { + fields["min"] = perf[0][6] + } + if perf[0][7] != nil { + fields["max"] = perf[0][7] + } + // Create metric + metric, err := telegraf.NewMetric(fieldName, tags, fields, time.Now().UTC()) + if err != nil { + return nil, err + } + // Add Metric + metrics = append(metrics, metric) + } + + return metrics, nil +} diff --git a/plugins/parsers/nagios/parser_test.go b/plugins/parsers/nagios/parser_test.go new file mode 100644 index 000000000..49502a021 --- /dev/null +++ b/plugins/parsers/nagios/parser_test.go @@ -0,0 +1,89 @@ +package nagios + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const validOutput1 = `PING OK - Packet loss = 0%, RTA = 0.30 ms|rta=0.298000ms;4000.000000;6000.000000;0.000000 pl=0%;80;90;0;100 +This is a long output +with three lines +` +const validOutput2 = "TCP OK - 0.008 second response time on port 80|time=0.008457s;;;0.000000;10.000000" +const validOutput3 = "TCP OK - 0.008 second response time on port 80|time=0.008457" +const invalidOutput3 = "PING OK - Packet loss = 0%, RTA = 0.30 ms" +const invalidOutput4 = "PING OK - Packet loss = 0%, RTA = 0.30 ms| =3;;;; dgasdg =;;;; sff=;;;;" + +func TestParseValidOutput(t *testing.T) { + parser := NagiosParser{ + MetricName: "nagios_test", + } + + // Output1 + metrics, err := parser.Parse([]byte(validOutput1)) + require.NoError(t, err) + assert.Len(t, metrics, 2) + // rta + assert.Equal(t, "rta", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0.298), + "warning": float64(4000), + "critical": float64(6000), + "min": float64(0), + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{"unit": "ms"}, metrics[0].Tags()) + // pl + assert.Equal(t, "pl", metrics[1].Name()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0), + "warning": float64(80), + "critical": float64(90), + "min": float64(0), + "max": float64(100), + }, metrics[1].Fields()) + assert.Equal(t, map[string]string{"unit": "%"}, metrics[1].Tags()) + + // Output2 + metrics, err = parser.Parse([]byte(validOutput2)) + require.NoError(t, err) + assert.Len(t, metrics, 1) + // time + assert.Equal(t, "time", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0.008457), + "min": float64(0), + "max": float64(10), + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{"unit": "s"}, metrics[0].Tags()) + + // Output3 + metrics, err = parser.Parse([]byte(validOutput3)) + require.NoError(t, err) + assert.Len(t, metrics, 1) + // time + assert.Equal(t, "time", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0.008457), + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{}, metrics[0].Tags()) + +} + +func TestParseInvalidOutput(t *testing.T) { + parser := NagiosParser{ + MetricName: "nagios_test", + } + + // invalidOutput3 + metrics, err := parser.Parse([]byte(invalidOutput3)) + require.NoError(t, err) + assert.Len(t, metrics, 0) + + // invalidOutput4 + metrics, err = parser.Parse([]byte(invalidOutput4)) + require.NoError(t, err) + assert.Len(t, metrics, 0) + +} diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index 982b6bb80..360d795bc 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -8,6 +8,8 @@ import ( "github.com/influxdata/telegraf/plugins/parsers/graphite" "github.com/influxdata/telegraf/plugins/parsers/influx" "github.com/influxdata/telegraf/plugins/parsers/json" + "github.com/influxdata/telegraf/plugins/parsers/nagios" + "github.com/influxdata/telegraf/plugins/parsers/value" ) // ParserInput is an interface for input plugins that are able to parse @@ -38,7 +40,7 @@ type Parser interface { // Config is a struct that covers the data types needed for all parser types, // and can be used to instantiate _any_ of the parsers. type Config struct { - // Dataformat can be one of: json, influx, graphite + // Dataformat can be one of: json, influx, graphite, value, nagios DataFormat string // Separator only applied to Graphite data. @@ -48,9 +50,12 @@ type Config struct { // TagKeys only apply to JSON data TagKeys []string - // MetricName only applies to JSON data. This will be the name of the measurement. + // MetricName applies to JSON & value. This will be the name of the measurement. MetricName string + // DataType only applies to value, this will be the type to parse value to + DataType string + // DefaultTags are the default tags that will be added to all parsed metrics. DefaultTags map[string]string } @@ -63,8 +68,13 @@ func NewParser(config *Config) (Parser, error) { case "json": parser, err = NewJSONParser(config.MetricName, config.TagKeys, config.DefaultTags) + case "value": + parser, err = NewValueParser(config.MetricName, + config.DataType, config.DefaultTags) case "influx": parser, err = NewInfluxParser() + case "nagios": + parser, err = NewNagiosParser() case "graphite": parser, err = NewGraphiteParser(config.Separator, config.Templates, config.DefaultTags) @@ -87,6 +97,10 @@ func NewJSONParser( return parser, nil } +func NewNagiosParser() (Parser, error) { + return &nagios.NagiosParser{}, nil +} + func NewInfluxParser() (Parser, error) { return &influx.InfluxParser{}, nil } @@ -98,3 +112,15 @@ func NewGraphiteParser( ) (Parser, error) { return graphite.NewGraphiteParser(separator, templates, defaultTags) } + +func NewValueParser( + metricName string, + dataType string, + defaultTags map[string]string, +) (Parser, error) { + return &value.ValueParser{ + MetricName: metricName, + DataType: dataType, + DefaultTags: defaultTags, + }, nil +} diff --git a/plugins/parsers/value/parser.go b/plugins/parsers/value/parser.go new file mode 100644 index 000000000..00673eced --- /dev/null +++ b/plugins/parsers/value/parser.go @@ -0,0 +1,68 @@ +package value + +import ( + "bytes" + "fmt" + "strconv" + "time" + + "github.com/influxdata/telegraf" +) + +type ValueParser struct { + MetricName string + DataType string + DefaultTags map[string]string +} + +func (v *ValueParser) Parse(buf []byte) ([]telegraf.Metric, error) { + // separate out any fields in the buffer, ignore anything but the last. + values := bytes.Fields(buf) + if len(values) < 1 { + return []telegraf.Metric{}, nil + } + valueStr := string(values[len(values)-1]) + + var value interface{} + var err error + switch v.DataType { + case "", "int", "integer": + value, err = strconv.Atoi(valueStr) + case "float", "long": + value, err = strconv.ParseFloat(valueStr, 64) + case "str", "string": + value = valueStr + case "bool", "boolean": + value, err = strconv.ParseBool(valueStr) + } + if err != nil { + return nil, err + } + + fields := map[string]interface{}{"value": value} + metric, err := telegraf.NewMetric(v.MetricName, v.DefaultTags, + fields, time.Now().UTC()) + if err != nil { + return nil, err + } + + return []telegraf.Metric{metric}, nil +} + +func (v *ValueParser) ParseLine(line string) (telegraf.Metric, error) { + metrics, err := v.Parse([]byte(line)) + + if err != nil { + return nil, err + } + + if len(metrics) < 1 { + return nil, fmt.Errorf("Can not parse the line: %s, for data format: value", line) + } + + return metrics[0], nil +} + +func (v *ValueParser) SetDefaultTags(tags map[string]string) { + v.DefaultTags = tags +} diff --git a/plugins/parsers/value/parser_test.go b/plugins/parsers/value/parser_test.go new file mode 100644 index 000000000..f60787491 --- /dev/null +++ b/plugins/parsers/value/parser_test.go @@ -0,0 +1,238 @@ +package value + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseValidValues(t *testing.T) { + parser := ValueParser{ + MetricName: "value_test", + DataType: "integer", + } + metrics, err := parser.Parse([]byte("55")) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "value_test", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": int64(55), + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{}, metrics[0].Tags()) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "float", + } + metrics, err = parser.Parse([]byte("64")) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "value_test", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": float64(64), + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{}, metrics[0].Tags()) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "string", + } + metrics, err = parser.Parse([]byte("foobar")) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "value_test", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": "foobar", + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{}, metrics[0].Tags()) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "boolean", + } + metrics, err = parser.Parse([]byte("true")) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "value_test", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": true, + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{}, metrics[0].Tags()) +} + +func TestParseMultipleValues(t *testing.T) { + parser := ValueParser{ + MetricName: "value_test", + DataType: "integer", + } + metrics, err := parser.Parse([]byte(`55 +45 +223 +12 +999 +`)) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "value_test", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": int64(999), + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{}, metrics[0].Tags()) +} + +func TestParseLineValidValues(t *testing.T) { + parser := ValueParser{ + MetricName: "value_test", + DataType: "integer", + } + metric, err := parser.ParseLine("55") + assert.NoError(t, err) + assert.Equal(t, "value_test", metric.Name()) + assert.Equal(t, map[string]interface{}{ + "value": int64(55), + }, metric.Fields()) + assert.Equal(t, map[string]string{}, metric.Tags()) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "float", + } + metric, err = parser.ParseLine("64") + assert.NoError(t, err) + assert.Equal(t, "value_test", metric.Name()) + assert.Equal(t, map[string]interface{}{ + "value": float64(64), + }, metric.Fields()) + assert.Equal(t, map[string]string{}, metric.Tags()) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "string", + } + metric, err = parser.ParseLine("foobar") + assert.NoError(t, err) + assert.Equal(t, "value_test", metric.Name()) + assert.Equal(t, map[string]interface{}{ + "value": "foobar", + }, metric.Fields()) + assert.Equal(t, map[string]string{}, metric.Tags()) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "boolean", + } + metric, err = parser.ParseLine("true") + assert.NoError(t, err) + assert.Equal(t, "value_test", metric.Name()) + assert.Equal(t, map[string]interface{}{ + "value": true, + }, metric.Fields()) + assert.Equal(t, map[string]string{}, metric.Tags()) +} + +func TestParseInvalidValues(t *testing.T) { + parser := ValueParser{ + MetricName: "value_test", + DataType: "integer", + } + metrics, err := parser.Parse([]byte("55.0")) + assert.Error(t, err) + assert.Len(t, metrics, 0) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "float", + } + metrics, err = parser.Parse([]byte("foobar")) + assert.Error(t, err) + assert.Len(t, metrics, 0) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "boolean", + } + metrics, err = parser.Parse([]byte("213")) + assert.Error(t, err) + assert.Len(t, metrics, 0) +} + +func TestParseLineInvalidValues(t *testing.T) { + parser := ValueParser{ + MetricName: "value_test", + DataType: "integer", + } + _, err := parser.ParseLine("55.0") + assert.Error(t, err) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "float", + } + _, err = parser.ParseLine("foobar") + assert.Error(t, err) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "boolean", + } + _, err = parser.ParseLine("213") + assert.Error(t, err) +} + +func TestParseValidValuesDefaultTags(t *testing.T) { + parser := ValueParser{ + MetricName: "value_test", + DataType: "integer", + } + parser.SetDefaultTags(map[string]string{"test": "tag"}) + metrics, err := parser.Parse([]byte("55")) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "value_test", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": int64(55), + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{"test": "tag"}, metrics[0].Tags()) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "float", + } + parser.SetDefaultTags(map[string]string{"test": "tag"}) + metrics, err = parser.Parse([]byte("64")) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "value_test", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": float64(64), + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{"test": "tag"}, metrics[0].Tags()) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "string", + } + parser.SetDefaultTags(map[string]string{"test": "tag"}) + metrics, err = parser.Parse([]byte("foobar")) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "value_test", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": "foobar", + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{"test": "tag"}, metrics[0].Tags()) + + parser = ValueParser{ + MetricName: "value_test", + DataType: "boolean", + } + parser.SetDefaultTags(map[string]string{"test": "tag"}) + metrics, err = parser.Parse([]byte("true")) + assert.NoError(t, err) + assert.Len(t, metrics, 1) + assert.Equal(t, "value_test", metrics[0].Name()) + assert.Equal(t, map[string]interface{}{ + "value": true, + }, metrics[0].Fields()) + assert.Equal(t, map[string]string{"test": "tag"}, metrics[0].Tags()) +} diff --git a/plugins/serializers/graphite/graphite.go b/plugins/serializers/graphite/graphite.go index d04f756c1..7a7fec2f1 100644 --- a/plugins/serializers/graphite/graphite.go +++ b/plugins/serializers/graphite/graphite.go @@ -12,41 +12,53 @@ type GraphiteSerializer struct { Prefix string } +var sanitizedChars = strings.NewReplacer("/", "-", "@", "-", " ", "_") + func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]string, error) { out := []string{} - // Get name - name := metric.Name() + // Convert UnixNano to Unix timestamps timestamp := metric.UnixNano() / 1000000000 - tag_str := buildTags(metric) for field_name, value := range metric.Fields() { // Convert value value_str := fmt.Sprintf("%#v", value) // Write graphite metric var graphitePoint string - if name == field_name { - graphitePoint = fmt.Sprintf("%s.%s %s %d", - tag_str, - strings.Replace(name, ".", "_", -1), - value_str, - timestamp) - } else { - graphitePoint = fmt.Sprintf("%s.%s.%s %s %d", - tag_str, - strings.Replace(name, ".", "_", -1), - strings.Replace(field_name, ".", "_", -1), - value_str, - timestamp) - } - if s.Prefix != "" { - graphitePoint = fmt.Sprintf("%s.%s", s.Prefix, graphitePoint) - } + graphitePoint = fmt.Sprintf("%s %s %d", + s.SerializeBucketName(metric, field_name), + value_str, + timestamp) out = append(out, graphitePoint) } return out, nil } +func (s *GraphiteSerializer) SerializeBucketName(metric telegraf.Metric, field_name string) string { + // Get the metric name + name := metric.Name() + + // Convert UnixNano to Unix timestamps + tag_str := buildTags(metric) + + // Write graphite metric + var serializedBucketName string + if name == field_name { + serializedBucketName = fmt.Sprintf("%s.%s", + tag_str, + strings.Replace(name, ".", "_", -1)) + } else { + serializedBucketName = fmt.Sprintf("%s.%s.%s", + tag_str, + strings.Replace(name, ".", "_", -1), + strings.Replace(field_name, ".", "_", -1)) + } + if s.Prefix != "" { + serializedBucketName = fmt.Sprintf("%s.%s", s.Prefix, serializedBucketName) + } + return serializedBucketName +} + func buildTags(metric telegraf.Metric) string { var keys []string tags := metric.Tags() @@ -75,5 +87,5 @@ func buildTags(metric telegraf.Metric) string { tag_str += "." + tag_value } } - return tag_str + return sanitizedChars.Replace(tag_str) } diff --git a/plugins/serializers/graphite/graphite_test.go b/plugins/serializers/graphite/graphite_test.go index 72b203b7a..8d25bf937 100644 --- a/plugins/serializers/graphite/graphite_test.go +++ b/plugins/serializers/graphite/graphite_test.go @@ -119,3 +119,62 @@ func TestSerializeMetricPrefix(t *testing.T) { sort.Strings(expS) assert.Equal(t, expS, mS) } + +func TestSerializeBucketNameNoHost(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + "datacenter": "us-west-2", + } + fields := map[string]interface{}{ + "usage_idle": float64(91.5), + } + m, err := telegraf.NewMetric("cpu", tags, fields, now) + assert.NoError(t, err) + + s := GraphiteSerializer{} + mS := s.SerializeBucketName(m, "usage_idle") + + expS := fmt.Sprintf("cpu0.us-west-2.cpu.usage_idle") + assert.Equal(t, expS, mS) +} + +func TestSerializeBucketNameHost(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "host": "localhost", + "cpu": "cpu0", + "datacenter": "us-west-2", + } + fields := map[string]interface{}{ + "usage_idle": float64(91.5), + } + m, err := telegraf.NewMetric("cpu", tags, fields, now) + assert.NoError(t, err) + + s := GraphiteSerializer{} + mS := s.SerializeBucketName(m, "usage_idle") + + expS := fmt.Sprintf("localhost.cpu0.us-west-2.cpu.usage_idle") + assert.Equal(t, expS, mS) +} + +func TestSerializeBucketNamePrefix(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "host": "localhost", + "cpu": "cpu0", + "datacenter": "us-west-2", + } + fields := map[string]interface{}{ + "usage_idle": float64(91.5), + } + m, err := telegraf.NewMetric("cpu", tags, fields, now) + assert.NoError(t, err) + + s := GraphiteSerializer{Prefix: "prefix"} + mS := s.SerializeBucketName(m, "usage_idle") + + expS := fmt.Sprintf("prefix.localhost.cpu0.us-west-2.cpu.usage_idle") + assert.Equal(t, expS, mS) +} diff --git a/plugins/serializers/json/json.go b/plugins/serializers/json/json.go new file mode 100644 index 000000000..e27aa400f --- /dev/null +++ b/plugins/serializers/json/json.go @@ -0,0 +1,27 @@ +package json + +import ( + ejson "encoding/json" + + "github.com/influxdata/telegraf" +) + +type JsonSerializer struct { +} + +func (s *JsonSerializer) Serialize(metric telegraf.Metric) ([]string, error) { + out := []string{} + + m := make(map[string]interface{}) + m["tags"] = metric.Tags() + m["fields"] = metric.Fields() + m["name"] = metric.Name() + m["timestamp"] = metric.UnixNano() / 1000000000 + serialized, err := ejson.Marshal(m) + if err != nil { + return []string{}, err + } + out = append(out, string(serialized)) + + return out, nil +} diff --git a/plugins/serializers/json/json_test.go b/plugins/serializers/json/json_test.go new file mode 100644 index 000000000..127bf237a --- /dev/null +++ b/plugins/serializers/json/json_test.go @@ -0,0 +1,87 @@ +package json + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/influxdata/telegraf" +) + +func TestSerializeMetricFloat(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + } + fields := map[string]interface{}{ + "usage_idle": float64(91.5), + } + m, err := telegraf.NewMetric("cpu", tags, fields, now) + assert.NoError(t, err) + + s := JsonSerializer{} + mS, err := s.Serialize(m) + assert.NoError(t, err) + expS := []string{fmt.Sprintf("{\"fields\":{\"usage_idle\":91.5},\"name\":\"cpu\",\"tags\":{\"cpu\":\"cpu0\"},\"timestamp\":%d}", now.Unix())} + assert.Equal(t, expS, mS) +} + +func TestSerializeMetricInt(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + } + fields := map[string]interface{}{ + "usage_idle": int64(90), + } + m, err := telegraf.NewMetric("cpu", tags, fields, now) + assert.NoError(t, err) + + s := JsonSerializer{} + mS, err := s.Serialize(m) + assert.NoError(t, err) + + expS := []string{fmt.Sprintf("{\"fields\":{\"usage_idle\":90},\"name\":\"cpu\",\"tags\":{\"cpu\":\"cpu0\"},\"timestamp\":%d}", now.Unix())} + assert.Equal(t, expS, mS) +} + +func TestSerializeMetricString(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + } + fields := map[string]interface{}{ + "usage_idle": "foobar", + } + m, err := telegraf.NewMetric("cpu", tags, fields, now) + assert.NoError(t, err) + + s := JsonSerializer{} + mS, err := s.Serialize(m) + assert.NoError(t, err) + + expS := []string{fmt.Sprintf("{\"fields\":{\"usage_idle\":\"foobar\"},\"name\":\"cpu\",\"tags\":{\"cpu\":\"cpu0\"},\"timestamp\":%d}", now.Unix())} + assert.Equal(t, expS, mS) +} + +func TestSerializeMultiFields(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + } + fields := map[string]interface{}{ + "usage_idle": int64(90), + "usage_total": 8559615, + } + m, err := telegraf.NewMetric("cpu", tags, fields, now) + assert.NoError(t, err) + + s := JsonSerializer{} + mS, err := s.Serialize(m) + assert.NoError(t, err) + + expS := []string{fmt.Sprintf("{\"fields\":{\"usage_idle\":90,\"usage_total\":8559615},\"name\":\"cpu\",\"tags\":{\"cpu\":\"cpu0\"},\"timestamp\":%d}", now.Unix())} + assert.Equal(t, expS, mS) +} diff --git a/plugins/serializers/registry.go b/plugins/serializers/registry.go index 2fedfbeaf..ebf79bc59 100644 --- a/plugins/serializers/registry.go +++ b/plugins/serializers/registry.go @@ -5,6 +5,7 @@ import ( "github.com/influxdata/telegraf/plugins/serializers/graphite" "github.com/influxdata/telegraf/plugins/serializers/influx" + "github.com/influxdata/telegraf/plugins/serializers/json" ) // SerializerOutput is an interface for output plugins that are able to @@ -40,10 +41,16 @@ func NewSerializer(config *Config) (Serializer, error) { serializer, err = NewInfluxSerializer() case "graphite": serializer, err = NewGraphiteSerializer(config.Prefix) + case "json": + serializer, err = NewJsonSerializer() } return serializer, err } +func NewJsonSerializer() (Serializer, error) { + return &json.JsonSerializer{}, nil +} + func NewInfluxSerializer() (Serializer, error) { return &influx.InfluxSerializer{}, nil } diff --git a/scripts/build.py b/scripts/build.py index 1465e36f3..0998bb7df 100755 --- a/scripts/build.py +++ b/scripts/build.py @@ -1,11 +1,4 @@ -#!/usr/bin/env python -# -# This is the Telegraf build script. -# -# Current caveats: -# - Does not checkout the correct commit/branch (for now, you will need to do so manually) -# - Has external dependencies for packaging (fpm) and uploading (boto) -# +#!/usr/bin/python -u import sys import os @@ -19,7 +12,12 @@ import re debug = False -# PACKAGING VARIABLES +################ +#### Telegraf Variables +################ + +# Packaging variables +PACKAGE_NAME = "telegraf" INSTALL_ROOT_DIR = "/usr/bin" LOG_DIR = "/var/log/telegraf" SCRIPT_DIR = "/usr/lib/telegraf/scripts" @@ -33,6 +31,16 @@ DEFAULT_CONFIG = "etc/telegraf.conf" DEFAULT_WINDOWS_CONFIG = "etc/telegraf_windows.conf" POSTINST_SCRIPT = "scripts/post-install.sh" PREINST_SCRIPT = "scripts/pre-install.sh" +POSTREMOVE_SCRIPT = "scripts/post-remove.sh" +PREREMOVE_SCRIPT = "scripts/pre-remove.sh" + +# Default AWS S3 bucket for uploads +DEFAULT_BUCKET = "get.influxdb.org/telegraf" + +CONFIGURATION_FILES = [ + CONFIG_DIR + '/telegraf.conf', + LOGROTATE_DIR + '/telegraf', +] # META-PACKAGE VARIABLES PACKAGE_LICENSE = "MIT" @@ -43,7 +51,8 @@ DESCRIPTION = "Plugin-driven server agent for reporting metrics into InfluxDB." # SCRIPT START prereqs = [ 'git', 'go' ] -optional_prereqs = [ 'fpm', 'rpmbuild' ] +go_vet_command = "go tool vet -composites=true ./" +optional_prereqs = [ 'gvm', 'fpm', 'rpmbuild' ] fpm_common_args = "-f -s dir --log error \ --vendor {} \ @@ -54,6 +63,8 @@ fpm_common_args = "-f -s dir --log error \ --config-files {} \ --after-install {} \ --before-install {} \ + --after-remove {} \ + --before-remove {} \ --description \"{}\"".format( VENDOR, PACKAGE_URL, @@ -63,31 +74,88 @@ fpm_common_args = "-f -s dir --log error \ LOGROTATE_DIR + '/telegraf', POSTINST_SCRIPT, PREINST_SCRIPT, + POSTREMOVE_SCRIPT, + PREREMOVE_SCRIPT, DESCRIPTION) targets = { - 'telegraf' : './cmd/telegraf/telegraf.go', + 'telegraf' : './cmd/telegraf', } supported_builds = { - 'darwin': [ "amd64", "i386" ], - 'windows': [ "amd64", "i386" ], - 'linux': [ "amd64", "i386", "arm" ] + "darwin": [ "amd64" ], + "windows": [ "amd64", "i386" ], + "linux": [ "amd64", "i386", "armhf", "armel", "arm64" ], + "freebsd": [ "amd64" ] } + supported_packages = { "darwin": [ "tar", "zip" ], - "linux": [ "deb", "rpm", "tar", "zip" ], + "linux": [ "deb", "rpm", "tar" ], "windows": [ "zip" ], + "freebsd": [ "tar" ] } + supported_tags = { # "linux": { # "amd64": ["sensors"] # } } + prereq_cmds = { # "linux": "sudo apt-get install lm-sensors libsensors4-dev" } +################ +#### Telegraf Functions +################ + +def create_package_fs(build_root): + print("Creating a filesystem hierarchy from directory: {}".format(build_root)) + # Using [1:] for the path names due to them being absolute + # (will overwrite previous paths, per 'os.path.join' documentation) + dirs = [ INSTALL_ROOT_DIR[1:], LOG_DIR[1:], SCRIPT_DIR[1:], CONFIG_DIR[1:], LOGROTATE_DIR[1:] ] + for d in dirs: + create_dir(os.path.join(build_root, d)) + os.chmod(os.path.join(build_root, d), 0o755) + +def package_scripts(build_root, windows=False): + print("Copying scripts and sample configuration to build directory") + if windows: + shutil.copyfile(DEFAULT_WINDOWS_CONFIG, os.path.join(build_root, "telegraf.conf")) + os.chmod(os.path.join(build_root, "telegraf.conf"), 0o644) + else: + shutil.copyfile(INIT_SCRIPT, os.path.join(build_root, SCRIPT_DIR[1:], INIT_SCRIPT.split('/')[1])) + os.chmod(os.path.join(build_root, SCRIPT_DIR[1:], INIT_SCRIPT.split('/')[1]), 0o644) + shutil.copyfile(SYSTEMD_SCRIPT, os.path.join(build_root, SCRIPT_DIR[1:], SYSTEMD_SCRIPT.split('/')[1])) + os.chmod(os.path.join(build_root, SCRIPT_DIR[1:], SYSTEMD_SCRIPT.split('/')[1]), 0o644) + shutil.copyfile(LOGROTATE_SCRIPT, os.path.join(build_root, LOGROTATE_DIR[1:], "telegraf")) + os.chmod(os.path.join(build_root, LOGROTATE_DIR[1:], "telegraf"), 0o644) + shutil.copyfile(DEFAULT_CONFIG, os.path.join(build_root, CONFIG_DIR[1:], "telegraf.conf")) + os.chmod(os.path.join(build_root, CONFIG_DIR[1:], "telegraf.conf"), 0o644) + +def run_generate(): + # NOOP for Telegraf + return True + +def go_get(branch, update=False, no_stash=False): + if not check_path_for("gdm"): + print("Downloading `gdm`...") + get_command = "go get github.com/sparrc/gdm" + run(get_command) + print("Retrieving dependencies with `gdm`...") + run("{}/bin/gdm restore -f Godeps_windows".format(os.environ.get("GOPATH"))) + run("{}/bin/gdm restore".format(os.environ.get("GOPATH"))) + return True + +def run_tests(race, parallel, timeout, no_vet): + # Currently a NOOP for Telegraf + return True + +################ +#### All Telegraf-specific content above this line +################ + def run(command, allow_failure=False, shell=False): out = None if debug: @@ -98,6 +166,8 @@ def run(command, allow_failure=False, shell=False): else: out = subprocess.check_output(command.split(), stderr=subprocess.STDOUT) out = out.decode("utf8") + if debug: + print("[DEBUG] command output: {}".format(out)) except subprocess.CalledProcessError as e: print("") print("") @@ -127,16 +197,32 @@ def run(command, allow_failure=False, shell=False): else: return out -def create_temp_dir(prefix=None): +def create_temp_dir(prefix = None): if prefix is None: - return tempfile.mkdtemp(prefix="telegraf-build.") + return tempfile.mkdtemp(prefix="{}-build.".format(PACKAGE_NAME)) else: return tempfile.mkdtemp(prefix=prefix) +def get_current_version_tag(): + version = run("git describe --always --tags --abbrev=0").strip() + return version + def get_current_version(): - command = "git describe --always --tags --abbrev=0" - out = run(command) - return out.strip() + version_tag = get_current_version_tag() + if version_tag[0] == 'v': + # Remove leading 'v' and possible '-rc\d+' + version = re.sub(r'-rc\d+', '', version_tag[1:]) + else: + version = re.sub(r'-rc\d+', '', version_tag) + return version + +def get_current_rc(): + rc = None + version_tag = get_current_version_tag() + matches = re.match(r'.*-rc(\d+)', version_tag) + if matches: + rc, = matches.groups(1) + return rc def get_current_commit(short=False): command = None @@ -181,56 +267,61 @@ def check_path_for(b): if os.path.isfile(full_path) and os.access(full_path, os.X_OK): return full_path -def check_environ(build_dir = None): - print("\nChecking environment:") +def check_environ(build_dir=None): + print("") + print("Checking environment:") for v in [ "GOPATH", "GOBIN", "GOROOT" ]: - print("\t- {} -> {}".format(v, os.environ.get(v))) + print("- {} -> {}".format(v, os.environ.get(v))) cwd = os.getcwd() - if build_dir == None and os.environ.get("GOPATH") and os.environ.get("GOPATH") not in cwd: - print("\n!! WARNING: Your current directory is not under your GOPATH. This may lead to build failures.") + if build_dir is None and os.environ.get("GOPATH") and os.environ.get("GOPATH") not in cwd: + print("!! WARNING: Your current directory is not under your GOPATH. This may lead to build failures.") def check_prereqs(): - print("\nChecking for dependencies:") + print("") + print("Checking for dependencies:") for req in prereqs: path = check_path_for(req) - if path is None: - path = '?' - print("\t- {} -> {}".format(req, path)) + if path: + print("- {} -> {}".format(req, path)) + else: + print("- {} -> ?".format(req)) for req in optional_prereqs: path = check_path_for(req) - if path is None: - path = '?' - print("\t- {} (optional) -> {}".format(req, path)) + if path: + print("- {} (optional) -> {}".format(req, path)) + else: + print("- {} (optional) -> ?".format(req)) print("") + return True def upload_packages(packages, bucket_name=None, nightly=False): if debug: - print("[DEBUG] upload_packags: {}".format(packages)) + print("[DEBUG] upload_packages: {}".format(packages)) try: import boto from boto.s3.key import Key except ImportError: - print "!! Cannot upload packages without the 'boto' python library." + print("!! Cannot upload packages without the 'boto' Python library.") return 1 - print("Uploading packages to S3...") - print("") + print("Connecting to S3...".format(bucket_name)) c = boto.connect_s3() if bucket_name is None: - bucket_name = 'get.influxdb.org/telegraf' + bucket_name = DEFAULT_BUCKET bucket = c.get_bucket(bucket_name.split('/')[0]) - print("\t - Using bucket: {}".format(bucket_name)) + print("Using bucket: {}".format(bucket_name)) for p in packages: if '/' in bucket_name: # Allow for nested paths within the bucket name (ex: - # bucket/telegraf). Assuming forward-slashes as path + # bucket/folder). Assuming forward-slashes as path # delimiter. name = os.path.join('/'.join(bucket_name.split('/')[1:]), os.path.basename(p)) else: name = os.path.basename(p) if bucket.get_key(name) is None or nightly: - print("\t - Uploading {} to {}...".format(name, bucket_name)) + print("Uploading {}...".format(name)) + sys.stdout.flush() k = Key(bucket) k.key = name if nightly: @@ -239,8 +330,10 @@ def upload_packages(packages, bucket_name=None, nightly=False): n = k.set_contents_from_filename(p, replace=False) k.make_public() else: - print("\t - Not uploading {}, already exists.".format(p)) + print("!! Not uploading package {}, as it already exists.".format(p)) print("") + return 0 + def build(version=None, branch=None, @@ -251,22 +344,18 @@ def build(version=None, rc=None, race=False, clean=False, - outdir=".", - goarm_version="6"): - print("-------------------------") - print("") - print("Build plan:") - print("\t- version: {}".format(version)) + outdir="."): + print("\n-------------------------\n") + print("Build Plan:") + print("- version: {}".format(version)) if rc: - print("\t- release candidate: {}".format(rc)) - print("\t- commit: {}".format(commit)) - print("\t- branch: {}".format(branch)) - print("\t- platform: {}".format(platform)) - print("\t- arch: {}".format(arch)) - if arch == 'arm' and goarm_version: - print("\t- ARM version: {}".format(goarm_version)) - print("\t- nightly? {}".format(str(nightly).lower())) - print("\t- race enabled? {}".format(str(race).lower())) + print("- release candidate: {}".format(rc)) + print("- commit: {}".format(get_current_commit(short=True))) + print("- branch: {}".format(get_current_branch())) + print("- platform: {}".format(platform)) + print("- arch: {}".format(arch)) + print("- nightly? {}".format(str(nightly).lower())) + print("- race enabled? {}".format(str(race).lower())) print("") if not os.path.exists(outdir): @@ -280,45 +369,49 @@ def build(version=None, # If a release candidate, update the version information accordingly version = "{}rc{}".format(version, rc) - # Set architecture to something that Go expects - if arch == 'i386': - arch = '386' - elif arch == 'x86_64': - arch = 'amd64' - print("Starting build...") + tmp_build_dir = create_temp_dir() for b, c in targets.items(): - if platform == 'windows': - b = b + '.exe' - print("\t- Building '{}'...".format(os.path.join(outdir, b))) + print("Building '{}'...".format(os.path.join(outdir, b))) build_command = "" - build_command += "GOOS={} GOARCH={} ".format(platform, arch) - if arch == "arm" and goarm_version: - if goarm_version not in ["5", "6", "7", "arm64"]: - print("!! Invalid ARM build version: {}".format(goarm_version)) - build_command += "GOARM={} ".format(goarm_version) - build_command += "go build -o {} ".format(os.path.join(outdir, b)) + if "arm" in arch: + build_command += "GOOS={} GOARCH={} ".format(platform, "arm") + else: + if arch == 'i386': + arch = '386' + elif arch == 'x86_64': + arch = 'amd64' + build_command += "GOOS={} GOARCH={} ".format(platform, arch) + if "arm" in arch: + if arch == "armel": + build_command += "GOARM=5 " + elif arch == "armhf" or arch == "arm": + build_command += "GOARM=6 " + elif arch == "arm64": + build_command += "GOARM=7 " + else: + print("!! Invalid ARM architecture specifed: {}".format(arch)) + print("Please specify either 'armel', 'armhf', or 'arm64'") + return 1 + if platform == 'windows': + build_command += "go build -o {} ".format(os.path.join(outdir, b + '.exe')) + else: + build_command += "go build -o {} ".format(os.path.join(outdir, b)) if race: build_command += "-race " - if platform in supported_tags: - if arch in supported_tags[platform]: - build_tags = supported_tags[platform][arch] - for build_tag in build_tags: - build_command += "-tags "+build_tag+" " go_version = get_go_version() if "1.4" in go_version: - build_command += "-ldflags=\"-X main.buildTime '{}' ".format(datetime.datetime.utcnow().isoformat()) - build_command += "-X main.Version {} ".format(version) - build_command += "-X main.Branch {} ".format(get_current_branch()) - build_command += "-X main.Commit {}\" ".format(get_current_commit()) + build_command += "-ldflags=\"-X main.Version {} -X main.Branch {} -X main.Commit {}\" ".format(version, + get_current_branch(), + get_current_commit()) else: - build_command += "-ldflags=\"-X main.buildTime='{}' ".format(datetime.datetime.utcnow().isoformat()) - build_command += "-X main.Version={} ".format(version) - build_command += "-X main.Branch={} ".format(get_current_branch()) - build_command += "-X main.Commit={}\" ".format(get_current_commit()) + # With Go 1.5, the linker flag arguments changed to 'name=value' from 'name value' + build_command += "-ldflags=\"-X main.Version={} -X main.Branch={} -X main.Commit={}\" ".format(version, + get_current_branch(), + get_current_commit()) build_command += c run(build_command, shell=True) - print("") + return 0 def create_dir(path): try: @@ -343,35 +436,12 @@ def copy_file(fr, to): except OSError as e: print(e) -def create_package_fs(build_root): - print("\t- Creating a filesystem hierarchy from directory: {}".format(build_root)) - # Using [1:] for the path names due to them being absolute - # (will overwrite previous paths, per 'os.path.join' documentation) - dirs = [ INSTALL_ROOT_DIR[1:], LOG_DIR[1:], SCRIPT_DIR[1:], CONFIG_DIR[1:], LOGROTATE_DIR[1:] ] - for d in dirs: - create_dir(os.path.join(build_root, d)) - os.chmod(os.path.join(build_root, d), 0o755) - -def package_scripts(build_root, windows=False): - print("\t- Copying scripts and sample configuration to build directory") - if windows: - shutil.copyfile(DEFAULT_WINDOWS_CONFIG, os.path.join(build_root, "telegraf.conf")) - os.chmod(os.path.join(build_root, "telegraf.conf"), 0o644) - else: - shutil.copyfile(INIT_SCRIPT, os.path.join(build_root, SCRIPT_DIR[1:], INIT_SCRIPT.split('/')[1])) - os.chmod(os.path.join(build_root, SCRIPT_DIR[1:], INIT_SCRIPT.split('/')[1]), 0o644) - shutil.copyfile(SYSTEMD_SCRIPT, os.path.join(build_root, SCRIPT_DIR[1:], SYSTEMD_SCRIPT.split('/')[1])) - os.chmod(os.path.join(build_root, SCRIPT_DIR[1:], SYSTEMD_SCRIPT.split('/')[1]), 0o644) - shutil.copyfile(LOGROTATE_SCRIPT, os.path.join(build_root, LOGROTATE_DIR[1:], "telegraf")) - os.chmod(os.path.join(build_root, LOGROTATE_DIR[1:], "telegraf"), 0o644) - shutil.copyfile(DEFAULT_CONFIG, os.path.join(build_root, CONFIG_DIR[1:], "telegraf.conf")) - os.chmod(os.path.join(build_root, CONFIG_DIR[1:], "telegraf.conf"), 0o644) - -def go_get(): - print("Retrieving Go dependencies...") - run("go get github.com/sparrc/gdm") - run("gdm restore -f Godeps_windows") - run("gdm restore") +def generate_md5_from_file(path): + m = hashlib.md5() + with open(path, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b""): + m.update(chunk) + return m.hexdigest() def generate_md5_from_file(path): m = hashlib.md5() @@ -383,107 +453,135 @@ def generate_md5_from_file(path): m.update(data) return m.hexdigest() -def build_packages(build_output, version, pkg_arch, nightly=False, rc=None, iteration=1): +def build_packages(build_output, version, nightly=False, rc=None, iteration=1): outfiles = [] tmp_build_dir = create_temp_dir() if debug: print("[DEBUG] build_output = {}".format(build_output)) try: - print("-------------------------") - print("") + print("-------------------------\n") print("Packaging...") - for p in build_output: + for platform in build_output: # Create top-level folder displaying which platform (linux, etc) - create_dir(os.path.join(tmp_build_dir, p)) - for a in build_output[p]: - current_location = build_output[p][a] - # Create second-level directory displaying the architecture (amd64, etc)p - build_root = os.path.join(tmp_build_dir, p, a) + create_dir(os.path.join(tmp_build_dir, platform)) + for arch in build_output[platform]: + # Create second-level directory displaying the architecture (amd64, etc) + current_location = build_output[platform][arch] + # Create directory tree to mimic file system of package + build_root = os.path.join(tmp_build_dir, + platform, + arch, + '{}-{}-{}'.format(PACKAGE_NAME, version, iteration)) create_dir(build_root) - if p == 'windows': + + # Copy packaging scripts to build directory + if platform == 'windows': package_scripts(build_root, windows=True) else: create_package_fs(build_root) - # Copy in packaging and miscellaneous scripts package_scripts(build_root) - # Copy newly-built binaries to packaging directory - for b in targets: - if p == 'windows': - b = b + '.exe' - fr = os.path.join(current_location, b) - to = os.path.join(build_root, b) - print("\t- [{}][{}] - Moving from '{}' to '{}'".format(p, a, fr, to)) + + for binary in targets: + if platform == 'windows': + # For windows, we just want to copy the binary into the root directory + binary = binary + '.exe' + # Where the binary should go in the package filesystem + to = os.path.join(build_root, binary) + # Where the binary currently is located + fr = os.path.join(current_location, binary) + else: + # Where the binary currently is located + fr = os.path.join(current_location, binary) + # Where the binary should go in the package filesystem + to = os.path.join(build_root, INSTALL_ROOT_DIR[1:], binary) + + if debug: + print("[{}][{}] - Moving from '{}' to '{}'".format(platform, + arch, + fr, + to)) copy_file(fr, to) - # Package the directory structure - for package_type in supported_packages[p]: - print("\t- Packaging directory '{}' as '{}'...".format(build_root, package_type)) - name = "telegraf" + + for package_type in supported_packages[platform]: + # Package the directory structure for each package type for the platform + print("Packaging directory '{}' as '{}'...".format(build_root, package_type)) + name = PACKAGE_NAME # Reset version, iteration, and current location on each run # since they may be modified below. package_version = version package_iteration = iteration - current_location = build_output[p][a] + package_build_root = build_root + current_location = build_output[platform][arch] + if rc is not None: + # Set iteration to 0 since it's a release candidate + package_iteration = "0.rc{}".format(rc) if package_type in ['zip', 'tar']: + # For tars and zips, start the packaging one folder above + # the build root (to include the package name) + package_build_root = os.path.join('/', '/'.join(build_root.split('/')[:-1])) if nightly: - name = '{}-nightly_{}_{}'.format(name, p, a) + name = '{}-nightly_{}_{}'.format(name, + platform, + arch) else: - name = '{}-{}-{}_{}_{}'.format(name, package_version, package_iteration, p, a) - if package_type == 'tar': - # Add `tar.gz` to path to reduce package size - current_location = os.path.join(current_location, name + '.tar.gz') - if rc is not None: - package_iteration = "0.rc{}".format(rc) - saved_a = a - if pkg_arch is not None: - a = pkg_arch - if a == '386': - a = 'i386' - if package_type == 'zip': - zip_command = "cd {} && zip {}.zip ./*".format( - build_root, - name) - run(zip_command, shell=True) - run("mv {}.zip {}".format(os.path.join(build_root, name), current_location), shell=True) - outfile = os.path.join(current_location, name+".zip") - outfiles.append(outfile) - print("\t\tMD5 = {}".format(generate_md5_from_file(outfile))) + name = '{}-{}-{}_{}_{}'.format(name, + package_version, + package_iteration, + platform, + arch) + + current_location = os.path.join(os.getcwd(), current_location) + if package_type == 'tar': + tar_command = "cd {} && tar -cvzf {}.tar.gz ./*".format(build_root, name) + run(tar_command, shell=True) + run("mv {}.tar.gz {}".format(os.path.join(build_root, name), current_location), shell=True) + outfile = os.path.join(current_location, name + ".tar.gz") + outfiles.append(outfile) + print("MD5({}) = {}".format(outfile, generate_md5_from_file(outfile))) + elif package_type == 'zip': + zip_command = "cd {} && zip -r {}.zip ./*".format(build_root, name) + run(zip_command, shell=True) + run("mv {}.zip {}".format(os.path.join(build_root, name), current_location), shell=True) + outfile = os.path.join(current_location, name + ".zip") + outfiles.append(outfile) + print("MD5({}) = {}".format(outfile, generate_md5_from_file(outfile))) else: - fpm_command = "fpm {} --name {} -a {} -t {} --version {} --iteration {} -C {} -p {} ".format( - fpm_common_args, - name, - a, - package_type, - package_version, - package_iteration, - build_root, - current_location) - if pkg_arch is not None: - a = saved_a + fpm_command = "fpm {} --name {} -a {} -t {} --version {} --iteration {} -C {} -p {} ".format(fpm_common_args, + name, + arch, + package_type, + package_version, + package_iteration, + package_build_root, + current_location) + if debug: + fpm_command += "--verbose " if package_type == "rpm": fpm_command += "--depends coreutils " - fpm_command += "--depends lsof" + fpm_command += "--depends lsof " out = run(fpm_command, shell=True) matches = re.search(':path=>"(.*)"', out) outfile = None if matches is not None: outfile = matches.groups()[0] if outfile is None: - print("[ COULD NOT DETERMINE OUTPUT ]") + print("!! Could not determine output from packaging command.") else: # Strip nightly version (the unix epoch) from filename - if nightly and package_type in ['deb', 'rpm']: + if nightly: outfile = rename_file(outfile, outfile.replace("{}-{}".format(version, iteration), "nightly")) outfiles.append(os.path.join(os.getcwd(), outfile)) # Display MD5 hash for generated package - print("\t\tMD5 = {}".format(generate_md5_from_file(outfile))) + print("MD5({}) = {}".format(outfile, generate_md5_from_file(outfile))) print("") if debug: print("[DEBUG] package outfiles: {}".format(outfiles)) return outfiles finally: # Cleanup + print("Cleaning up build dir: {}".format(tmp_build_dir)) shutil.rmtree(tmp_build_dir) def print_usage(): @@ -491,11 +589,9 @@ def print_usage(): print("") print("Options:") print("\t --outdir= \n\t\t- Send build output to a specified path. Defaults to ./build.") - print("\t --arch= \n\t\t- Build for specified architecture. Acceptable values: x86_64|amd64, 386, arm, or all") - print("\t --goarm= \n\t\t- Build for specified ARM version (when building for ARM). Default value is: 6") + print("\t --arch= \n\t\t- Build for specified architecture. Acceptable values: x86_64|amd64, 386|i386, arm, or all") print("\t --platform= \n\t\t- Build for specified platform. Acceptable values: linux, windows, darwin, or all") print("\t --version= \n\t\t- Version information to apply to build metadata. If not specified, will be pulled from repo tag.") - print("\t --pkgarch= \n\t\t- Package architecture if different from ") print("\t --commit= \n\t\t- Use specific commit for build (currently a NOOP).") print("\t --branch= \n\t\t- Build from a specific branch (currently a NOOP).") print("\t --rc= \n\t\t- Whether or not the build is a release candidate (affects version information).") @@ -503,9 +599,13 @@ def print_usage(): print("\t --race \n\t\t- Whether the produced build should have race detection enabled.") print("\t --package \n\t\t- Whether the produced builds should be packaged for the target platform(s).") print("\t --nightly \n\t\t- Whether the produced build is a nightly (affects version information).") + print("\t --update \n\t\t- Whether dependencies should be updated prior to building.") + print("\t --test \n\t\t- Run Go tests. Will not produce a build.") print("\t --parallel \n\t\t- Run Go tests in parallel up to the count specified.") + print("\t --generate \n\t\t- Run `go generate`.") print("\t --timeout \n\t\t- Timeout for Go tests. Defaults to 480s.") print("\t --clean \n\t\t- Clean the build output directory prior to creating build.") + print("\t --no-get \n\t\t- Do not run `go get` before building.") print("\t --bucket=\n\t\t- Full path of the bucket to upload packages to (must also specify --upload).") print("\t --debug \n\t\t- Displays debug output.") print("") @@ -514,17 +614,18 @@ def print_package_summary(packages): print(packages) def main(): + global debug + # Command-line arguments outdir = "build" commit = None target_platform = None target_arch = None - package_arch = None nightly = False race = False branch = None version = get_current_version() - rc = None + rc = get_current_rc() package = False update = False clean = False @@ -534,15 +635,15 @@ def main(): timeout = None iteration = 1 no_vet = False - goarm_version = "6" run_get = True upload_bucket = None - global debug + generate = False + no_stash = False for arg in sys.argv[1:]: if '--outdir' in arg: # Output directory. If none is specified, then builds will be placed in the same directory. - output_dir = arg.split("=")[1] + outdir = arg.split("=")[1] if '--commit' in arg: # Commit to build from. If none is specified, then it will build from the most recent commit. commit = arg.split("=")[1] @@ -558,9 +659,6 @@ def main(): elif '--version' in arg: # Version to assign to this build (0.9.5, etc) version = arg.split("=")[1] - elif '--pkgarch' in arg: - # Package architecture if different from (armhf, etc) - package_arch = arg.split("=")[1] elif '--rc' in arg: # Signifies that this is a release candidate build. rc = arg.split("=")[1] @@ -570,12 +668,20 @@ def main(): elif '--package' in arg: # Signifies that packages should be built. package = True + # If packaging do not allow stashing of local changes + no_stash = True elif '--nightly' in arg: # Signifies that this is a nightly build. nightly = True + elif '--update' in arg: + # Signifies that dependencies should be updated. + update = True elif '--upload' in arg: # Signifies that the resulting packages should be uploaded to S3 upload = True + elif '--test' in arg: + # Run tests and exit + test = True elif '--parallel' in arg: # Set parallel for tests. parallel = int(arg.split("=")[1]) @@ -589,14 +695,19 @@ def main(): iteration = arg.split("=")[1] elif '--no-vet' in arg: no_vet = True - elif '--goarm' in arg: - # Signifies GOARM flag to pass to build command when compiling for ARM - goarm_version = arg.split("=")[1] + elif '--no-get' in arg: + run_get = False elif '--bucket' in arg: # The bucket to upload the packages to, relies on boto upload_bucket = arg.split("=")[1] + elif '--no-stash' in arg: + # Do not stash uncommited changes + # Fail if uncommited changes exist + no_stash = True + elif '--generate' in arg: + generate = True elif '--debug' in arg: - print "[DEBUG] Using debug output" + print("[DEBUG] Using debug output") debug = True elif '--help' in arg: print_usage() @@ -606,54 +717,69 @@ def main(): print_usage() return 1 + if nightly and rc: + print("!! Cannot be both nightly and a release candidate! Stopping.") + return 1 + if nightly: - if rc: - print("!! Cannot be both nightly and a release candidate! Stopping.") - return 1 - # In order to support nightly builds on the repository, we are adding the epoch timestamp + # In order to cleanly delineate nightly version, we are adding the epoch timestamp # to the version so that version numbers are always greater than the previous nightly. - version = "{}.n{}".format(version, int(time.time())) + version = "{}~n{}".format(version, int(time.time())) + iteration = 0 + elif rc: + iteration = 0 # Pre-build checks check_environ() - check_prereqs() + if not check_prereqs(): + return 1 if not commit: commit = get_current_commit(short=True) if not branch: branch = get_current_branch() if not target_arch: - if 'arm' in get_system_arch(): + system_arch = get_system_arch() + if 'arm' in system_arch: # Prevent uname from reporting ARM arch (eg 'armv7l') target_arch = "arm" else: - target_arch = get_system_arch() - if not target_platform: + target_arch = system_arch + if target_arch == '386': + target_arch = 'i386' + elif target_arch == 'x86_64': + target_arch = 'amd64' + if target_platform: + if target_platform not in supported_builds and target_platform != 'all': + print("! Invalid build platform: {}".format(target_platform)) + return 1 + else: target_platform = get_system_platform() - if rc or nightly: - # If a release candidate or nightly, set iteration to 0 (instead of 1) - iteration = 0 - - if target_arch == '386': - target_arch = 'i386' - elif target_arch == 'x86_64': - target_arch = 'amd64' build_output = {} - go_get() + if generate: + if not run_generate(): + return 1 + + if run_get: + if not go_get(branch, update=update, no_stash=no_stash): + return 1 + + if test: + if not run_tests(race, parallel, timeout, no_vet): + return 1 + return 0 platforms = [] single_build = True if target_platform == 'all': - platforms = list(supported_builds.keys()) + platforms = supported_builds.keys() single_build = False else: platforms = [target_platform] for platform in platforms: - if platform in prereq_cmds: - run(prereq_cmds[platform]) build_output.update( { platform : {} } ) archs = [] if target_arch == "all": @@ -661,32 +787,34 @@ def main(): archs = supported_builds.get(platform) else: archs = [target_arch] + for arch in archs: od = outdir if not single_build: od = os.path.join(outdir, platform, arch) - build(version=version, - branch=branch, - commit=commit, - platform=platform, - arch=arch, - nightly=nightly, - rc=rc, - race=race, - clean=clean, - outdir=od, - goarm_version=goarm_version) + if build(version=version, + branch=branch, + commit=commit, + platform=platform, + arch=arch, + nightly=nightly, + rc=rc, + race=race, + clean=clean, + outdir=od): + return 1 build_output.get(platform).update( { arch : od } ) # Build packages if package: if not check_path_for("fpm"): - print("!! Cannot package without command 'fpm'. Stopping.") + print("!! Cannot package without command 'fpm'.") return 1 - packages = build_packages(build_output, version, package_arch, nightly=nightly, rc=rc, iteration=iteration) - # Optionally upload to S3 + + packages = build_packages(build_output, version, nightly=nightly, rc=rc, iteration=iteration) if upload: upload_packages(packages, bucket_name=upload_bucket, nightly=nightly) + print("Done!") return 0 if __name__ == '__main__': diff --git a/scripts/circle-test.sh b/scripts/circle-test.sh index 91511b050..f0288c73e 100755 --- a/scripts/circle-test.sh +++ b/scripts/circle-test.sh @@ -68,7 +68,7 @@ telegraf -sample-config > $tmpdir/config.toml exit_if_fail telegraf -config $tmpdir/config.toml \ -test -input-filter cpu:mem -mv $GOPATH/bin/telegraf $CIRCLE_ARTIFACTS +cat $GOPATH/bin/telegraf | gzip > $CIRCLE_ARTIFACTS/telegraf.gz eval "git describe --exact-match HEAD" if [ $? -eq 0 ]; then @@ -77,5 +77,6 @@ if [ $? -eq 0 ]; then echo $tag exit_if_fail ./scripts/build.py --package --version=$tag --platform=linux --arch=all --upload exit_if_fail ./scripts/build.py --package --version=$tag --platform=windows --arch=all --upload + exit_if_fail ./scripts/build.py --package --version=$tag --platform=freebsd --arch=all --upload mv build $CIRCLE_ARTIFACTS fi diff --git a/scripts/init.sh b/scripts/init.sh index 81932bb48..09a4d24bd 100755 --- a/scripts/init.sh +++ b/scripts/init.sh @@ -159,6 +159,22 @@ case $1 in fi ;; + reload) + # Reload the daemon. + if [ -e $pidfile ]; then + pidofproc -p $pidfile $daemon > /dev/null 2>&1 && status="0" || status="$?" + if [ "$status" = 0 ]; then + if killproc -p $pidfile SIGHUP; then + log_success_msg "$name process was reloaded" + else + log_failure_msg "$name failed to reload service" + fi + fi + else + log_failure_msg "$name process is not running" + fi + ;; + restart) # Restart the daemon. $0 stop && sleep 2 && $0 start diff --git a/scripts/post-install.sh b/scripts/post-install.sh index 4f11fe8f6..53d745ca9 100644 --- a/scripts/post-install.sh +++ b/scripts/post-install.sh @@ -13,6 +13,7 @@ function install_init { function install_systemd { cp -f $SCRIPT_DIR/telegraf.service /lib/systemd/system/telegraf.service systemctl enable telegraf + systemctl daemon-reload || true } function install_update_rcd { @@ -28,7 +29,9 @@ if [[ $? -ne 0 ]]; then useradd --system -U -M telegraf -s /bin/false -d /etc/telegraf fi +test -d $LOG_DIR || mkdir -p $LOG_DIR chown -R -L telegraf:telegraf $LOG_DIR +chmod 755 $LOG_DIR # Remove legacy symlink, if it exists if [[ -L /etc/init.d/telegraf ]]; then @@ -61,10 +64,12 @@ elif [[ -f /etc/debian_version ]]; then which systemctl &>/dev/null if [[ $? -eq 0 ]]; then install_systemd + systemctl restart telegraf else # Assuming sysv install_init install_update_rcd + invoke-rc.d telegraf restart fi elif [[ -f /etc/os-release ]]; then source /etc/os-release diff --git a/scripts/post-remove.sh b/scripts/post-remove.sh new file mode 100644 index 000000000..96b178f4d --- /dev/null +++ b/scripts/post-remove.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +function disable_systemd { + systemctl disable telegraf + rm -f /lib/systemd/system/telegraf.service +} + +function disable_update_rcd { + update-rc.d -f telegraf remove + rm -f /etc/init.d/telegraf +} + +function disable_chkconfig { + chkconfig --del telegraf + rm -f /etc/init.d/telegraf +} + +if [[ -f /etc/redhat-release ]]; then + # RHEL-variant logic + if [[ "$1" = "0" ]]; then + # InfluxDB is no longer installed, remove from init system + rm -f /etc/default/telegraf + + which systemctl &>/dev/null + if [[ $? -eq 0 ]]; then + disable_systemd + else + # Assuming sysv + disable_chkconfig + fi + fi +elif [[ -f /etc/debian_version ]]; then + # Debian/Ubuntu logic + if [[ "$1" != "upgrade" ]]; then + # Remove/purge + rm -f /etc/default/telegraf + + which systemctl &>/dev/null + if [[ $? -eq 0 ]]; then + disable_systemd + else + # Assuming sysv + disable_update_rcd + fi + fi +fi diff --git a/scripts/pre-remove.sh b/scripts/pre-remove.sh new file mode 100644 index 000000000..a57184630 --- /dev/null +++ b/scripts/pre-remove.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +BIN_DIR=/usr/bin + +# Distribution-specific logic +if [[ -f /etc/debian_version ]]; then + # Debian/Ubuntu logic + which systemctl &>/dev/null + if [[ $? -eq 0 ]]; then + deb-systemd-invoke stop telegraf.service + else + # Assuming sysv + invoke-rc.d telegraf stop + fi +fi diff --git a/scripts/telegraf.service b/scripts/telegraf.service index 6f4450402..dcc2b9713 100644 --- a/scripts/telegraf.service +++ b/scripts/telegraf.service @@ -7,6 +7,7 @@ After=network.target EnvironmentFile=-/etc/default/telegraf User=telegraf ExecStart=/usr/bin/telegraf -config /etc/telegraf/telegraf.conf -config-directory /etc/telegraf/telegraf.d ${TELEGRAF_OPTS} +ExecReload=/bin/kill -HUP $MAINPID Restart=on-failure KillMode=process