Compare commits

...

159 Commits

Author SHA1 Message Date
Cameron Sparr
37ae3956c1 Space Igloo 2016-05-04 11:16:18 -06:00
Lukasz Jagiello
4c28f15b35 Fix #1148 - chatty MySQL
Two additional config options to reduce MySQL metrics

With:
 - gather_table_lock_waits = false
 - gather_event_waits = false

```
| wc -l
34
```

With:
 - gather_table_lock_waits = true
 - gather_event_waits = true

```
| wc -l
50040
```

closes #1148
closes #1149
2016-05-04 10:23:54 -06:00
Marko Crnic
095ef04c04 Fix formatting for haproxy tests
closes #1146
2016-05-04 09:47:39 -06:00
Marko Crnic
7d49979658 Update haproxy input plugin sample configuration 2016-05-04 09:46:44 -06:00
Marko Crnic
7a36695a21 Add tests for haproxy stats socket 2016-05-04 09:46:44 -06:00
Marko Crnic
5865587bd0 Add stats socket support to haproxy plugin 2016-05-04 09:46:44 -06:00
Jörg Thalheim
219bf93566 docker: add container_id to blkio metrics
all container metrics except blkio have this field

closes #1150
2016-05-04 09:28:13 -06:00
Ross McDonald
8371546a66 Disable circle uploads to S3 until more testing can be done for external PR's. 2016-05-03 11:26:52 -05:00
John Engelman
0b9b7bddd7 Specify host mount prefix with envvar.
closes #1120
2016-05-02 15:53:30 -06:00
Rene Zbinden
4c8449f4bc fix sysstat timeout error
closes #1134
2016-05-02 15:17:11 -06:00
Ross McDonald
36d7b5c9ab Improvements to build.py:
- Now uses Python argparse for cleaner handling of arguments
- Added function documentation
- Removed a few unneeded functions
- Updated nightly logic to incremement minor version
- Added support for building from specific branch or commit
- Changed --no-stash option to --no-uncommitted for clarity
- Added a --release flag, default package output will now contain the branch and commit hash in the version number
- Static builds are now listed as an architecture
- Changed default upload bucket to dl.influxdata.com/telegraf
- Don't include iteration in package name

closes #1040
2016-05-02 14:37:29 -06:00
Cameron Sparr
f2b0ea6722 value parser: doc & string handling 2016-05-02 12:17:20 -06:00
Cameron Sparr
46f4be88a6 Revert "exec plugin: allow using glob pattern in command list"
This reverts commit 6381efa7ce.
2016-05-02 12:07:17 -06:00
Jari Sukanen
6381efa7ce exec plugin: allow using glob pattern in command list
Allow using glob pattern in the command list in configuration. This enables for
example placing all commands in a single directory and using /path/to/dir/*.sh
as one of the commands to run all shell scripts in that directory.

Glob patterns are applied on every run of the commands, so matching commands can
be added without restarting telegraf.

closes #1127
2016-05-02 11:36:15 -06:00
Pierre Fersing
85ee66efb9 "DELAYED" Inserts were deprecated in MySQL 5.6.6
closes #1136
2016-05-02 11:23:28 -06:00
Victor Garcia
40dccf5b29 Metric for MongoDB jumbo chunks
closes #1128
2016-05-01 14:27:27 -06:00
Cameron Sparr
c114849a31 Use a timeout for docker list & stat cmds
closes #1133
2016-05-01 10:26:46 -06:00
Cameron Sparr
4e9798d0e6 agent and tags configs sometimes not applied
closes #1090
2016-04-29 19:44:01 -06:00
Cameron Sparr
a30b1a394f Kafka output: set max_retry=3 & required_acks=-1 as defaults
closes #1113
2016-04-29 18:51:45 -06:00
Cameron Sparr
91460436cf Changelog update 2016-04-29 12:32:04 -06:00
Cameron Sparr
3f807a9432 Implement timeouts for all exec command runners
First is to write an internal CombinedOutput and Run function with a
timeout.

Second, the following instances of command runners need to have timeouts:

    plugins/inputs/ping/ping.go
    125:	out, err := c.CombinedOutput()

    plugins/inputs/exec/exec.go
    91:	if err := cmd.Run(); err != nil {

    plugins/inputs/ipmi_sensor/command.go
    31:	err := cmd.Run()

    plugins/inputs/sysstat/sysstat.go
    194:	out, err := cmd.CombinedOutput()

    plugins/inputs/leofs/leofs.go
    185:	defer cmd.Wait()

    plugins/inputs/sysstat/sysstat.go
    282:	if err := cmd.Wait(); err != nil {

closes #1067
2016-04-29 12:06:22 -06:00
Cameron Sparr
cbe32c7482 Support default config paths
precedence will be:

 1. --config command-line option
 2. $TELEGRAF_CONFIG_PATH
 3. $HOME/.telegraf/telegraf.conf
 4. /etc/telegraf/telegraf.conf
2016-04-28 17:48:24 -06:00
Cameron Sparr
5d3c582ecf changelog update 2016-04-28 17:34:37 -06:00
Cameron Sparr
3ed006d216 Sanitize invalid opentsdb characters
closes #1098
2016-04-28 17:01:50 -06:00
Cameron Sparr
3e1026286b skip network-dependent unit tests in short mode 2016-04-28 14:44:08 -06:00
Cameron Sparr
b59266249d README fixups for udp_listener, statsd inputs
closes #1119
2016-04-28 13:11:41 -06:00
G-regL
015261a524 Sanitize Field name
Replace '/[sS]ec' for '_persec' and spaces with underscores.

closes #1118
2016-04-28 12:21:28 -06:00
Adithya B Cherunilam
024e1088eb Ensure sure that the post install script is compatible with RHEL 5
closes #1091
closes #1094
2016-04-28 11:58:06 -06:00
Cameron Sparr
08f4b1ae8a Update build to go 1.6.2 2016-04-28 11:41:16 -06:00
Bob Zoller
1390c22004 sanitize * to - in graphite serializer
closes #1110
2016-04-27 18:05:44 -06:00
Cameron Sparr
8742ead585 Change server_ -> jolokia_ in tags and other formatting 2016-04-27 16:00:58 -06:00
Cameron Sparr
59a297abe6 etc/telegraf.conf update 2016-04-27 15:46:21 -06:00
Simone Aiello
18636ea628 jolokia: use always POST
code refactor to use same prepareRequest method
for both 'agent' and 'proxy' mode

closes #1031
closes #1050
closes #473
2016-04-27 15:45:37 -06:00
Simone Aiello
cf5980ace2 jolokia: add proxy mode 2016-04-27 15:39:55 -06:00
Jesse Hanley
a7b0861436 Adding Jobstats support to Lustre2 input plugin
Lustre Jobstats allows for RPCs to be tagged with a value, such
as a job's ID.  This allows for per job statistics. This plugin
collects statistics and tags the data with the jobid.

closes #1107
2016-04-27 15:35:24 -06:00
Cameron Sparr
89f2c0b0a4 Cassandra: update plugin supported prefix & fix panic
fixes #1102
2016-04-27 15:23:05 -06:00
Cameron Sparr
ee4f4d7800 ping plugin: Set default timeout 2016-04-27 15:08:38 -06:00
Cameron Sparr
4de75ce621 Performance refactor of running_output buffers
closes #914
closes #967
2016-04-27 13:40:05 -06:00
John Engelman
1c4043ab39 Closes #1085 - allow for specifying AWS credentials in config.
closes #1085
closes #1086
2016-04-26 17:24:05 -06:00
Cameron Sparr
44c945b9f5 Tail unit tests and README tweaks 2016-04-26 10:43:41 -06:00
Cameron Sparr
c7719ac365 buffers: fix bug when Write called before AddMetric 2016-04-26 10:25:04 -06:00
Cameron Sparr
b9c24189e4 Tail input plugin 2016-04-26 09:42:06 -06:00
Cameron Sparr
411d8d7439 Fix leaky tcp connections in phpfpm plugin
closes #1089
2016-04-26 09:24:32 -06:00
Cameron Sparr
671b40df2a procstat: field prefix fixup 2016-04-25 20:10:34 -06:00
Cameron Sparr
249a860c6f procstat: fix newlines in tags 2016-04-25 19:57:38 -06:00
Mika Eloranta
0367a39e1f postgresql_extensible: custom address in metrics output
Allow overriding the the metrics "server" tag with the specified
value. Can be used to give a more user-friendly value for the server
name.

closes #1093
2016-04-25 16:33:35 -06:00
Mika Eloranta
1a7340bb02 postgresql_extensible: fix nil field values
nil field values would break the output influxdb line procotol.
Skip them from the output.
2016-04-25 16:33:33 -06:00
Mika Eloranta
ce7d852d22 postgresql_extensible: configurable measurement name
The output measurement name can be configured per query.
2016-04-25 16:33:33 -06:00
Hannu Valtonen
01b01c5969 postgresql_extensible: Censor also other security related conn params
While these aren't quite as sensitive as passwords, they do tend to be
long filesystem paths that shouldn't be reported along with
every measurement.
2016-04-25 16:33:33 -06:00
Pierre Fersing
c159460b2c Refactor running_output buffering
closes #1087
2016-04-25 16:32:41 -06:00
Cameron Sparr
07728d7425 Refactor globpath pkg to return a map
this is so that we don't call os.Stat twice for every file matched
by Match(). Also changing the behavior to _not_ return the name of a
file that doesn't exist if it's not a glob.
2016-04-24 14:37:44 -06:00
Cameron Sparr
d3a25e4dc1 globpath refactor into pkg separate from filestat 2016-04-23 11:56:33 -06:00
zensqlmonitor
1751c35f69 SQL Server input. Fix datatype conversion. 2016-04-23 09:18:08 +02:00
zensqlmonitor
93f5b8cc4a Fix datatype conversion 2016-04-23 09:14:04 +02:00
Cameron Sparr
5b1e59a48c filestat plugin config fixup 2016-04-22 19:15:07 -06:00
Cameron Sparr
7b27cad1ba Dont specify AWS credential chain, use default
closes #1078
2016-04-22 11:43:20 -06:00
Cameron Sparr
1b083d63ab add gitattributes file 2016-04-22 11:42:22 -06:00
Cameron Sparr
23f2b47531 Ignore errors in systemd
closes #1022
2016-04-22 11:23:24 -06:00
Victor Garcia
194288c00e Adding replication lag metric
closes #1066
2016-04-22 11:07:32 -06:00
Cameron Sparr
f9c8ed0dc3 Add filestat plugin to README 2016-04-21 19:47:23 -06:00
Cameron Sparr
88def9b71b filestat input plugin
closes #929
2016-04-21 16:53:02 -06:00
Martin Gehrke
f818f44693 Added Network Interface Object block to Generic Queries examples in win_perf_counters/README.md
Network metrics are pretty important and the block adds a couple with a link to the names for more. This adds a block with a few counters to the Generic Queries examples in plugins/inputs/win_perf_counters/README.md
2016-04-21 09:26:46 -04:00
Cameron Sparr
8a395fdb4a changelog update feature 1041 2016-04-20 18:36:14 -06:00
Cameron Sparr
c0588926b8 Add n_cpu field to system plugin
closes #1041
2016-04-20 18:22:04 -06:00
Cameron Sparr
f1b7ecb2a2 procstat: Add user, pidfile, pattern & exe tags
closes #1035
2016-04-20 13:18:07 -06:00
Cameron Sparr
4bcf157d88 Don't replace _ with . in datadog names
closes #1024
2016-04-20 09:06:13 -06:00
Cameron Sparr
2f7da03cce Do not log every tcp connect/disconnect
leaving as comments for whenever I rig up global debug logging.

closes #1062
2016-04-19 22:58:24 -06:00
Cameron Sparr
f1c995dcb8 Update etc/telegraf.conf 2016-04-19 18:01:41 -06:00
Cameron Sparr
9aec58c6b8 Don't allow inputs to overwrite host tag
closes #1054

This affects tags in the following plugins:

- cassandra
- disque
- rethinkdb
2016-04-19 17:44:33 -06:00
Victor Garcia
46aaaa9b70 Adding TTL metrics data
closes #1060
2016-04-19 17:02:25 -06:00
Larry Kim
46543d6323 Possible bug fix for oid_key collision
closes #1044
2016-04-19 17:00:04 -06:00
Cameron Sparr
a585119a67 Change prometheus doc to glob match 2016-04-19 14:46:37 -06:00
Cameron Sparr
8cc72368ca influxdb output: close connections & dont always overwrite
closes #1058
closes #1059

also see https://github.com/influxdata/influxdb/pull/6425
2016-04-19 13:40:08 -06:00
Cameron Sparr
92e57ee06c Set default tags in test accumulator
closes #1012
2016-04-18 19:24:17 -06:00
Eugene Chupriyanov
c737a19d9f Just close Riemann client on send metrics failure
Signed-off-by: Eugene Chupriyanov <e.chupriyanov@cpm.ru>

closes #1013
2016-04-18 17:25:36 -06:00
Eugene Chupriyanov
708a97d773 Try to reconnect to Riemann if metrics upload failed.
Signed-off-by: Eugene Chupriyanov <tchu@tchu.ru>

Error checks added

Don't Close() nil client

Signed-off-by: Eugene Chupriyanov <e.chupriyanov@cpm.ru>
2016-04-18 17:25:19 -06:00
Maksadbek
b95a90dbd6 updated README for mysql input plugin
closes #889
closes #403
2016-04-18 17:21:25 -06:00
Maksadbek
a2d1ee08d4 transposed the matrix of tags/fields for Lock Waits stats gathering 2016-04-18 17:11:26 -06:00
Maksadbek
7e64dc380f preventing tags from mutation by creating new tag for each metric 2016-04-18 17:11:26 -06:00
Maksadbek
046cb6a564 changed types to decrease needless uint64 to float64 casts 2016-04-18 17:11:26 -06:00
Maksadbek
644ce9edab fixed code regarding needless type casting; single creation of map 2016-04-18 17:11:26 -06:00
Maksadbek
059b601b13 mysql plugin conf field names are lowercase-underscored 2016-04-18 17:11:26 -06:00
Maksadbek
d59999f510 improvements on queries and additional comments 2016-04-18 17:11:26 -06:00
Maksadbek
c5d31e7527 statics that lack on MySQL 5.5 is turned off by default 2016-04-18 17:11:26 -06:00
Maksadbek
c121e38da6 mysql plugin, check for existence of table before scanning 2016-04-18 17:11:26 -06:00
Maksadbek
b16bc3d2e3 remove duplicate function; Mysql plugin GatherTableSchema is configurable 2016-04-18 17:11:26 -06:00
maksadbek
c732abbda2 Improved mysql plugin
shows global variables
	shows slave statuses
	shows size and count of binary log files
	shows information_schema.processlist stats
	shows perf table stats
	shows auto increments stats from information schema
	shows perf index stats
	shows table lock waits summary by table
	shows time and operations of event waits
	shows file event statuses
	shows events statements stats from perf_schema
	shows schema statistics
	refactored plugin, provided multiple fields per insert
2016-04-18 17:11:26 -06:00
Cameron Sparr
61d681a7c8 docker changelog update 2016-04-18 16:24:32 -06:00
Cameron Sparr
7828bc09cf Fixup docker blkio container name & docker doc 2016-04-18 16:20:46 -06:00
Cameron Sparr
36d330fea0 docker plugin schema refactor
- renaming cont_name and cont_image to container_name and
container_image.
- cont_id is now a field, called container_id
- docker_cpu, docker_mem, docker_net measurements have been renamed to
  docker_container_cpu, docker_container_mem, and docker_container_net

closes #1014
closes #1052
2016-04-18 15:16:59 -06:00
Cameron Sparr
4d46589d39 JSON input: make string ignores clear 2016-04-18 13:20:06 -06:00
chaton78
93f57edd3a Better logging for MQTT consumer
closes #1023
closes #921
2016-04-18 11:27:50 -06:00
chaton78
8ec8ae0587 Added onConnection and connectionLost Handlers 2016-04-18 11:25:03 -06:00
Pascal Larin
ce94e636bb Resubscribe if not using persistent sessions 2016-04-18 11:25:03 -06:00
Pascal Larin
21c7378b61 Handle onConnect 2016-04-18 11:25:03 -06:00
Thibault Cohen
75a9845d20 SNMP Fix #995
closes #995
2016-04-18 11:19:16 -06:00
Shahzheeb Khan
d638f6e411 mongodb readme and examples
mongodb readme and examples

closes #1039
2016-04-16 15:53:04 -06:00
Cameron Sparr
81d0a64d46 Adds support for removing/keeping tags from metrics
closes #706
2016-04-16 15:13:38 -06:00
Cameron Sparr
f76739cb1b Release 0.12.1 2016-04-14 16:16:26 -06:00
Ross McDonald
7f992fd321 Changed nohup fallback command to use 'sudo -u' so that Telegraf doesn't run as the root user. 2016-04-14 13:56:20 -06:00
Cameron Sparr
e428d11add Update etc/telegraf.conf, README 2016-04-12 14:50:56 -06:00
Subhachandra Chandra
0ed5b75a14 Fixed CHANGELOG with the correct pull request id.
closes #681
closes #1009
2016-04-12 10:57:34 -06:00
subhachandrachandra
b1b4adec74 Added cassandra plugin to access metrics using jolokia and push them to influxdb. 2016-04-12 10:55:59 -06:00
Cameron Sparr
1934cc2e62 Add memstats to the influxdb input plugin
closes #958
2016-04-12 10:13:11 -06:00
Lukasz Jagiello
ae8cf8c35e Fix plugin name in README 2016-04-11 19:43:30 -06:00
Cameron Sparr
f5878eafb9 Create a template system for the graphite serializer
closes #925
closes #879
2016-04-11 16:30:18 -06:00
Cameron Sparr
27fe4f7062 Update changelog, etc/telegraf.conf
closes #998
2016-04-08 12:00:41 -06:00
Michele Fadda
7ad8b26297 dovecot: enabled global, user and ip queries 2016-04-08 11:56:08 -06:00
Cameron Sparr
1a383b7d90 Telegraf no longer depends on lsof
so remove it as a dependency from the linux packages.

closes #974
2016-04-08 11:27:33 -06:00
Shahzheeb Khan
445946792e Adding few metrics example in jolokia plugin
Adding few metrics example in jolokia plugin

closes #993
2016-04-08 11:20:47 -06:00
Shahzheeb Khan
de82c7d5ac Adding few metrics example
Adding more metrics in example to make it easier to better understand the plugin
2016-04-08 11:20:42 -06:00
Cameron Sparr
07f0d561dc Eliminate byte buffer, copy scanner.Bytes directly 2016-04-08 09:50:03 -06:00
Cameron Sparr
be379f3dac Refactor UDP & TCP input buffers
closes #991
2016-04-08 09:50:03 -06:00
Cameron Sparr
1bf904fe60 Godeps: update paho mqtt client dep
this might fix #921

see https://github.com/eclipse/paho.mqtt.golang/issues/32
2016-04-07 17:32:28 -06:00
Cameron Sparr
c6faf005cb Add sysstat dummy file for non-linux builds 2016-04-07 12:08:26 -06:00
Rene Zbinden
b534b58542 fix tests
closes #939
2016-04-07 11:54:41 -06:00
Rene Zbinden
920711533e move pathe lookup for sadf to init() 2016-04-07 10:45:20 -06:00
Rene Zbinden
194110433e change sadf options so that it also works on older linux distributions 2016-04-07 10:45:20 -06:00
Rene Zbinden
7926396d2a cleanup code, set dfltActivities in init() function, this leads to an if less in collect() method 2016-04-07 10:45:20 -06:00
Rene Zbinden
797522e8ca change group=true by default 2016-04-07 10:45:20 -06:00
Rene Zbinden
64b5d1a269 add documentation about sadc path on different linux distributions 2016-04-07 10:45:20 -06:00
Rene Zbinden
d00d3802c9 fix build tags 2016-04-07 10:45:20 -06:00
Rene Zbinden
46fff13341 disable TestInterval with -race test option 2016-04-07 10:45:20 -06:00
Rene Zbinden
be3374a3ef remove interval configuration 2016-04-07 10:45:20 -06:00
Rene Zbinden
264ac0b017 fix race condition 2016-04-07 10:45:20 -06:00
Rene Zbinden
17033b3c6c change readme 2016-04-07 10:45:20 -06:00
Rene Zbinden
c4ea122d66 add sysstat plugin 2016-04-07 10:45:20 -06:00
Cameron Sparr
90185dc6b3 cleanup & comment http_response def config
closes #332
2016-04-07 10:37:52 -06:00
Luke Swithenbank
377b030d88 update to 5 second default and string map for headers 2016-04-07 10:28:39 -06:00
Luke Swithenbank
437bd87d7c added tests and did some refactoring 2016-04-07 10:28:39 -06:00
Luke Swithenbank
73a7916ce3 take a request body as a param 2016-04-07 10:28:39 -06:00
Luke Swithenbank
f947fa86e3 update to allow for following redirects 2016-04-07 10:28:39 -06:00
Luke Swithenbank
7219efbdb7 add the ability to parse http headers 2016-04-07 10:28:39 -06:00
Luke Swithenbank
b7435b9cd1 fmt 2016-04-07 10:28:39 -06:00
Luke Swithenbank
207ab5a0d1 update to make a working sample_config 2016-04-07 10:28:39 -06:00
Luke Swithenbank
70aa0ef85d add plugin to all 2016-04-07 10:28:39 -06:00
Luke Swithenbank
dfbe231a51 add http_response plugin 2016-04-07 10:28:39 -06:00
Cameron Sparr
cce35da366 Godeps_windows: update file 2016-04-07 10:00:10 -06:00
Cameron Sparr
1a612bcae9 Update README and etc/telegraf.conf 2016-04-06 13:49:50 -06:00
Josh Hardy
d5b9e003fe Add CloudWatch input plugin
Rebased commit of previously reviewed branch.
Added cloudwatch client Mock and more rich unit tests.

closes #935
closes #936
2016-04-06 13:45:06 -06:00
Sergio Jimenez
e19c474a92 input(docker): Cleanup
* Removed leftovers, unused code

closes #957
fixes #645
2016-04-06 12:01:36 -06:00
Sergio Jimenez
8274798499 fix(Godeps): Added github.com/opencontainers/runc 2016-04-06 11:55:30 -06:00
Sergio Jimenez
9f68a32934 fix(): Last link on README 2016-04-06 11:55:30 -06:00
Sergio Jimenez
5c688daff1 input(docker): Updated README
* Replaced links to fsouza/go-dockerclient by docker/engine-api
2016-04-06 11:55:30 -06:00
Sergio Jimenez
741fb1181f godeps(): Updated Godeps file for engine-api
* Added required deps for engine-api
* Removed fsouza/go-dockerclient
2016-04-06 11:55:30 -06:00
Sergio Jimenez
fd1f05c8e0 input(docker): Fixed io sectors/io_time recursive
* On engine-api sectors_recursive and io_time_recursive have no Op
2016-04-06 11:55:30 -06:00
Sergio Jimenez
708cbf937f input(docker): Fixed tests to work with engine-api
* Modified tests to work with engine-api
2016-04-06 11:55:30 -06:00
Sergio Jimenez
32213cad01 input(docker): docker/engine-api
* Made required changes to get it to compile
* First manual tests looking good, still unit tests need fixing
* Made go linter happier
2016-04-06 11:55:30 -06:00
Ricard Clau
9320a6e115 windows service docs
closes #954
2016-04-06 11:50:36 -06:00
Cameron Sparr
0f16c0f4cf Reduce TCP listener allocations 2016-04-05 17:37:09 -06:00
Cameron Sparr
30464396d9 Make the UDP input buffer only once 2016-04-05 17:35:43 -06:00
Cameron Sparr
64066c4ea8 Update input data format readme 2016-04-05 17:27:04 -06:00
Cameron Sparr
7e97787d9d More readme fixups 2016-04-05 16:17:45 -06:00
Cameron Sparr
40f2dd8c6c Readme fixup for exec plugin 2016-04-05 15:22:58 -06:00
Cameron Sparr
4dd364e1c3 Update all readme instances of data formats 2016-04-05 14:42:20 -06:00
Cameron Sparr
03f2a35b31 Update jolokia plugin readme 2016-04-05 13:54:02 -06:00
Martti Rannanjärvi
73bd98df57 dovecot: remove extra newline from stats query
Extra newline in the stats query is interpreted as an empty query
which is an error for dovecot.

closes #972
2016-04-05 10:54:27 -06:00
Armin Wolfermann
bcf1fc658d ipmi_sensors: Allow : in password
closes #969
2016-04-05 10:52:41 -06:00
Cameron Sparr
863cbe512d processes plugin: fix case where there are spaces in cmd name
fixes #968
2016-04-05 10:27:30 -06:00
142 changed files with 10557 additions and 1952 deletions

2
.gitattributes vendored Normal file
View File

@@ -0,0 +1,2 @@
CHANGELOG.md merge=union

View File

@@ -1,3 +1,130 @@
## v0.13 [unreleased]
### Release Notes
- **Breaking change** in jolokia plugin. See
https://github.com/influxdata/telegraf/blob/master/plugins/inputs/jolokia/README.md
for updated configuration. The plugin will now support proxy mode and will make
POST requests.
- New [agent] configuration option: `metric_batch_size`. This option tells
telegraf the maximum batch size to allow to accumulate before sending a flush
to the configured outputs. `metric_buffer_limit` now refers to the absolute
maximum number of metrics that will accumulate before metrics are dropped.
- There is no longer an option to
`flush_buffer_when_full`, this is now the default and only behavior of telegraf.
- **Breaking Change**: docker plugin tags. The cont_id tag no longer exists, it
will now be a field, and be called container_id. Additionally, cont_image and
cont_name are being renamed to container_image and container_name.
- **Breaking Change**: docker plugin measurements. The `docker_cpu`, `docker_mem`,
`docker_blkio` and `docker_net` measurements are being renamed to
`docker_container_cpu`, `docker_container_mem`, `docker_container_blkio` and
`docker_container_net`. Why? Because these metrics are
specifically tracking per-container stats. The problem with per-container stats,
in some use-cases, is that if containers are short-lived AND names are not
kept consistent, then the series cardinality will balloon very quickly.
So adding "container" to each metric will:
(1) make it more clear that these metrics are per-container, and
(2) allow users to easily drop per-container metrics if cardinality is an
issue (`namedrop = ["docker_container_*"]`)
- `tagexclude` and `taginclude` are now available, which can be used to remove
tags from measurements on inputs and outputs. See
[the configuration doc](https://github.com/influxdata/telegraf/blob/master/docs/CONFIGURATION.md)
for more details.
- **Measurement filtering:** All measurement filters now match based on glob
only. Previously there was an undocumented behavior where filters would match
based on _prefix_ in addition to globs. This means that a filter like
`fielddrop = ["time_"]` will need to be changed to `fielddrop = ["time_*"]`
- **datadog**: measurement and field names will no longer have `_` replaced by `.`
- The following plugins have changed their tags to _not_ overwrite the host tag:
- cassandra: `host -> cassandra_host`
- disque: `host -> disque_host`
- rethinkdb: `host -> rethinkdb_host`
- **Breaking Change**: The `win_perf_counters` input has been changed to sanitize field names, replacing `/Sec` and `/sec` with `_persec`, as well as spaces with underscores. This is needed because Graphite doesn't like slashes and spaces, and was failing to accept metrics that had them. The `/[sS]ec` -> `_persec` is just to make things clearer and uniform.
- The `disk` input plugin can now be configured with the `HOST_MOUNT_PREFIX` environment variable.
This value is prepended to any mountpaths discovered before retrieving stats.
It is not included on the report path. This is necessary for reporting host disk stats when running from within a container.
### Features
- [#1031](https://github.com/influxdata/telegraf/pull/1031): Jolokia plugin proxy mode. Thanks @saiello!
- [#1017](https://github.com/influxdata/telegraf/pull/1017): taginclude and tagexclude arguments.
- [#1015](https://github.com/influxdata/telegraf/pull/1015): Docker plugin schema refactor.
- [#889](https://github.com/influxdata/telegraf/pull/889): Improved MySQL plugin. Thanks @maksadbek!
- [#1060](https://github.com/influxdata/telegraf/pull/1060): TTL metrics added to MongoDB input plugin
- [#1056](https://github.com/influxdata/telegraf/pull/1056): Don't allow inputs to overwrite host tags.
- [#1035](https://github.com/influxdata/telegraf/issues/1035): Add `user`, `exe`, `pidfile` tags to procstat plugin.
- [#1041](https://github.com/influxdata/telegraf/issues/1041): Add `n_cpus` field to the system plugin.
- [#1072](https://github.com/influxdata/telegraf/pull/1072): New Input Plugin: filestat.
- [#1066](https://github.com/influxdata/telegraf/pull/1066): Replication lag metrics for MongoDB input plugin
- [#1086](https://github.com/influxdata/telegraf/pull/1086): Ability to specify AWS keys in config file. Thanks @johnrengleman!
- [#1096](https://github.com/influxdata/telegraf/pull/1096): Performance refactor of running output buffers.
- [#967](https://github.com/influxdata/telegraf/issues/967): Buffer logging improvements.
- [#1107](https://github.com/influxdata/telegraf/issues/1107): Support lustre2 job stats. Thanks @hanleyja!
- [#1122](https://github.com/influxdata/telegraf/pull/1122): Support setting config path through env variable and default paths.
- [#1128](https://github.com/influxdata/telegraf/pull/1128): MongoDB jumbo chunks metric for MongoDB input plugin
- [#1146](https://github.com/influxdata/telegraf/pull/1146): HAProxy socket support. Thanks weshmashian!
### Bugfixes
- [#1050](https://github.com/influxdata/telegraf/issues/1050): jolokia plugin - do not overwrite host tag. Thanks @saiello!
- [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78!
- [#1013](https://github.com/influxdata/telegraf/pull/1013): Close dead riemann output connections. Thanks @echupriyanov!
- [#1012](https://github.com/influxdata/telegraf/pull/1012): Set default tags in test accumulator.
- [#1024](https://github.com/influxdata/telegraf/issues/1024): Don't replace `.` with `_` in datadog output.
- [#1058](https://github.com/influxdata/telegraf/issues/1058): Fix possible leaky TCP connections in influxdb output.
- [#1044](https://github.com/influxdata/telegraf/pull/1044): Fix SNMP OID possible collisions. Thanks @relip
- [#1022](https://github.com/influxdata/telegraf/issues/1022): Dont error deb/rpm install on systemd errors.
- [#1078](https://github.com/influxdata/telegraf/issues/1078): Use default AWS credential chain.
- [#1070](https://github.com/influxdata/telegraf/issues/1070): SQL Server input. Fix datatype conversion.
- [#1089](https://github.com/influxdata/telegraf/issues/1089): Fix leaky TCP connections in phpfpm plugin.
- [#914](https://github.com/influxdata/telegraf/issues/914): Telegraf can drop metrics on full buffers.
- [#1098](https://github.com/influxdata/telegraf/issues/1098): Sanitize invalid OpenTSDB characters.
- [#1110](https://github.com/influxdata/telegraf/pull/1110): Sanitize * to - in graphite serializer. Thanks @goodeggs!
- [#1118](https://github.com/influxdata/telegraf/pull/1118): Sanitize Counter names for `win_perf_counters` input.
- [#1125](https://github.com/influxdata/telegraf/pull/1125): Wrap all exec command runners with a timeout, so hung os processes don't halt Telegraf.
- [#1113](https://github.com/influxdata/telegraf/pull/1113): Set MaxRetry and RequiredAcks defaults in Kafka output.
- [#1090](https://github.com/influxdata/telegraf/issues/1090): [agent] and [global_tags] config sometimes not getting applied.
- [#1133](https://github.com/influxdata/telegraf/issues/1133): Use a timeout for docker list & stat cmds.
- [#1052](https://github.com/influxdata/telegraf/issues/1052): Docker panic fix when decode fails.
- [#1136](https://github.com/influxdata/telegraf/pull/1136): "DELAYED" Inserts were deprecated in MySQL 5.6.6. Thanks @PierreF
## v0.12.1 [2016-04-14]
### Release Notes
- Breaking change in the dovecot input plugin. See Features section below.
- Graphite output templates are now supported. See
https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#graphite
- Possible breaking change for the librato and graphite outputs. Telegraf will
no longer insert field names when the field is simply named `value`. This is
because the `value` field is redundant in the graphite/librato context.
### Features
- [#1009](https://github.com/influxdata/telegraf/pull/1009): Cassandra input plugin. Thanks @subhachandrachandra!
- [#976](https://github.com/influxdata/telegraf/pull/976): Reduce allocations in the UDP and statsd inputs.
- [#979](https://github.com/influxdata/telegraf/pull/979): Reduce allocations in the TCP listener.
- [#992](https://github.com/influxdata/telegraf/pull/992): Refactor allocations in TCP/UDP listeners.
- [#935](https://github.com/influxdata/telegraf/pull/935): AWS Cloudwatch input plugin. Thanks @joshhardy & @ljosa!
- [#943](https://github.com/influxdata/telegraf/pull/943): http_response input plugin. Thanks @Lswith!
- [#939](https://github.com/influxdata/telegraf/pull/939): sysstat input plugin. Thanks @zbindenren!
- [#998](https://github.com/influxdata/telegraf/pull/998): **breaking change** enabled global, user and ip queries in dovecot plugin. Thanks @mikif70!
- [#1001](https://github.com/influxdata/telegraf/pull/1001): Graphite serializer templates.
- [#1008](https://github.com/influxdata/telegraf/pull/1008): Adding memstats metrics to the influxdb plugin.
### Bugfixes
- [#968](https://github.com/influxdata/telegraf/issues/968): Processes plugin gets unknown state when spaces are in (command name)
- [#969](https://github.com/influxdata/telegraf/pull/969): ipmi_sensors: allow : in password. Thanks @awaw!
- [#972](https://github.com/influxdata/telegraf/pull/972): dovecot: remove extra newline in dovecot command. Thanks @mrannanj!
- [#645](https://github.com/influxdata/telegraf/issues/645): docker plugin i/o error on closed pipe. Thanks @tripledes!
## v0.12.0 [2016-04-05]
### Features

View File

@@ -114,7 +114,7 @@ creating the `Parser` object.
You should also add the following to your SampleConfig() return:
```toml
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
@@ -244,7 +244,7 @@ instantiating and creating the `Serializer` object.
You should also add the following to your SampleConfig() return:
```toml
## Data format to output. This can be "influx" or "graphite"
## Data format to output.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md

11
Godeps
View File

@@ -9,19 +9,23 @@ github.com/couchbase/gomemcached a5ea6356f648fec6ab89add00edd09151455b4b2
github.com/couchbase/goutils 5823a0cbaaa9008406021dc5daf80125ea30bba6
github.com/dancannon/gorethink e7cac92ea2bc52638791a021f212145acfedb1fc
github.com/davecgh/go-spew 5215b55f46b2b919f50a1df0eaa5886afe4e3b3d
github.com/docker/engine-api 8924d6900370b4c7e7984be5adc61f50a80d7537
github.com/docker/go-connections f549a9393d05688dff0992ef3efd8bbe6c628aeb
github.com/docker/go-units 5d2041e26a699eaca682e2ea41c8f891e1060444
github.com/eapache/go-resiliency b86b1ec0dd4209a588dc1285cdd471e73525c0b3
github.com/eapache/queue ded5959c0d4e360646dc9e9908cff48666781367
github.com/eclipse/paho.mqtt.golang 4ab3e867810d1ec5f35157c59e965054dbf43a0d
github.com/fsouza/go-dockerclient a49c8269a6899cae30da1f8a4b82e0ce945f9967
github.com/eclipse/paho.mqtt.golang 0f7a459f04f13a41b7ed752d47944528d4bf9a86
github.com/go-sql-driver/mysql 1fca743146605a172a266e1654e01e5cd5669bee
github.com/gobwas/glob d877f6352135181470c40c73ebb81aefa22115fa
github.com/golang/protobuf 552c7b9542c194800fd493123b3798ef0a832032
github.com/golang/snappy 427fb6fc07997f43afa32f35e850833760e489a7
github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2
github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a
github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e
github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478
github.com/hpcloud/tail b2940955ab8b26e19d43a43c4da0475dd81bdb56
github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da
github.com/influxdata/influxdb e3fef5593c21644f2b43af55d6e17e70910b0e48
github.com/influxdata/influxdb 21db76b3374c733f37ed16ad93f3484020034351
github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0
github.com/klauspost/crc32 19b0b332c9e4516a6370a0456e6182c3b5036720
github.com/lib/pq e182dc4027e2ded4b19396d638610f2653295f36
@@ -32,6 +36,7 @@ github.com/naoina/go-stringutil 6b638e95a32d0c1131db0e7fe83775cbea4a0d0b
github.com/nats-io/nats b13fc9d12b0b123ebc374e6b808c6228ae4234a3
github.com/nats-io/nuid 4f84f5f3b2786224e336af2e13dba0a0a80b76fa
github.com/nsqio/go-nsq 0b80d6f05e15ca1930e0c5e1d540ed627e299980
github.com/opencontainers/runc 89ab7f2ccc1e45ddf6485eaa802c35dcf321dfc8
github.com/prometheus/client_golang 18acf9993a863f4c4b40612e19cdd243e7c86831
github.com/prometheus/client_model fa8ad6fec33561be4280a8f0514318c79d7f6cb6
github.com/prometheus/common e8eabff8812b05acf522b45fdcd725a785188e37

View File

@@ -1,3 +1,4 @@
github.com/Microsoft/go-winio 9f57cbbcbcb41dea496528872a4f0e37a4f7ae98
github.com/Shopify/sarama 8aadb476e66ca998f2f6bb3c993e9a2daa3666b9
github.com/Sirupsen/logrus 219c8cb75c258c552e999735be6df753ffc7afdc
github.com/StackExchange/wmi f3e2bae1e0cb5aef83e319133eabfee30013a4a5
@@ -9,24 +10,24 @@ github.com/couchbase/go-couchbase cb664315a324d87d19c879d9cc67fda6be8c2ac1
github.com/couchbase/gomemcached a5ea6356f648fec6ab89add00edd09151455b4b2
github.com/couchbase/goutils 5823a0cbaaa9008406021dc5daf80125ea30bba6
github.com/dancannon/gorethink e7cac92ea2bc52638791a021f212145acfedb1fc
github.com/davecgh/go-spew fc32781af5e85e548d3f1abaf0fa3dbe8a72495c
github.com/davecgh/go-spew 5215b55f46b2b919f50a1df0eaa5886afe4e3b3d
github.com/docker/engine-api 8924d6900370b4c7e7984be5adc61f50a80d7537
github.com/docker/go-connections f549a9393d05688dff0992ef3efd8bbe6c628aeb
github.com/docker/go-units 5d2041e26a699eaca682e2ea41c8f891e1060444
github.com/eapache/go-resiliency b86b1ec0dd4209a588dc1285cdd471e73525c0b3
github.com/eapache/queue ded5959c0d4e360646dc9e9908cff48666781367
github.com/eclipse/paho.mqtt.golang 4ab3e867810d1ec5f35157c59e965054dbf43a0d
github.com/fsouza/go-dockerclient a49c8269a6899cae30da1f8a4b82e0ce945f9967
github.com/go-ini/ini 776aa739ce9373377cd16f526cdf06cb4c89b40f
github.com/eclipse/paho.mqtt.golang 0f7a459f04f13a41b7ed752d47944528d4bf9a86
github.com/go-ole/go-ole 50055884d646dd9434f16bbb5c9801749b9bafe4
github.com/go-sql-driver/mysql 1fca743146605a172a266e1654e01e5cd5669bee
github.com/golang/protobuf 552c7b9542c194800fd493123b3798ef0a832032
github.com/golang/snappy 5979233c5d6225d4a8e438cdd0b411888449ddab
github.com/golang/snappy 427fb6fc07997f43afa32f35e850833760e489a7
github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2
github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a
github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e
github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478
github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da
github.com/influxdata/influxdb c190778997f4154294e6160c41b90140641ac915
github.com/influxdata/influxdb e3fef5593c21644f2b43af55d6e17e70910b0e48
github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0
github.com/jmespath/go-jmespath 0b12d6b521d83fc7f755e7cfc1b1fbdd35a01a74
github.com/klauspost/crc32 19b0b332c9e4516a6370a0456e6182c3b5036720
github.com/lib/pq e182dc4027e2ded4b19396d638610f2653295f36
github.com/lxn/win 9a7734ea4db26bc593d52f6a8a957afdad39c5c1
@@ -37,7 +38,6 @@ github.com/naoina/go-stringutil 6b638e95a32d0c1131db0e7fe83775cbea4a0d0b
github.com/nats-io/nats b13fc9d12b0b123ebc374e6b808c6228ae4234a3
github.com/nats-io/nuid 4f84f5f3b2786224e336af2e13dba0a0a80b76fa
github.com/nsqio/go-nsq 0b80d6f05e15ca1930e0c5e1d540ed627e299980
github.com/pmezard/go-difflib 792786c7400a136282c1664665ae0a8db921c6c2
github.com/prometheus/client_golang 18acf9993a863f4c4b40612e19cdd243e7c86831
github.com/prometheus/client_model fa8ad6fec33561be4280a8f0514318c79d7f6cb6
github.com/prometheus/common e8eabff8812b05acf522b45fdcd725a785188e37
@@ -47,9 +47,8 @@ github.com/shirou/gopsutil 1f32ce1bb380845be7f5d174ac641a2c592c0c42
github.com/shirou/w32 ada3ba68f000aa1b58580e45c9d308fe0b7fc5c5
github.com/soniah/gosnmp b1b4f885b12c5dcbd021c5cee1c904110de6db7d
github.com/streadway/amqp b4f3ceab0337f013208d31348b578d83c0064744
github.com/stretchr/objx 1a9d0bb9f541897e62256577b352fdbc1fb4fd94
github.com/stretchr/testify 1f4a1643a57e798696635ea4c126e9127adb7d3c
github.com/wvanbergen/kafka 1a8639a45164fcc245d5c7b4bd3ccfbd1a0ffbf3
github.com/wvanbergen/kafka 46f9a1cf3f670edec492029fadded9c2d9e18866
github.com/wvanbergen/kazoo-go 0f768712ae6f76454f987c3356177e138df258f8
github.com/zensqlmonitor/go-mssqldb ffe5510c6fa5e15e6d983210ab501c815b56b363
golang.org/x/net 6acef71eb69611914f7a30939ea9f6e194c78172

View File

@@ -20,12 +20,12 @@ new plugins.
### Linux deb and rpm Packages:
Latest:
* http://get.influxdb.org/telegraf/telegraf_0.12.0-1_amd64.deb
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1.x86_64.rpm
* http://get.influxdb.org/telegraf/telegraf_0.12.1-1_amd64.deb
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1.x86_64.rpm
Latest (arm):
* http://get.influxdb.org/telegraf/telegraf_0.12.0-1_armhf.deb
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1.armhf.rpm
* http://get.influxdb.org/telegraf/telegraf_0.12.1-1_armhf.deb
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1.armhf.rpm
##### Package Instructions:
@@ -46,28 +46,28 @@ to use this repo to install & update telegraf.
### Linux tarballs:
Latest:
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_amd64.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_i386.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_linux_armhf.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_linux_amd64.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_linux_i386.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_linux_armhf.tar.gz
##### tarball Instructions:
To install the full directory structure with config file, run:
```
sudo tar -C / -zxvf ./telegraf-0.12.0-1_linux_amd64.tar.gz
sudo tar -C / -zxvf ./telegraf-0.12.1-1_linux_amd64.tar.gz
```
To extract only the binary, run:
```
tar -zxvf telegraf-0.12.0-1_linux_amd64.tar.gz --strip-components=3 ./usr/bin/telegraf
tar -zxvf telegraf-0.12.1-1_linux_amd64.tar.gz --strip-components=3 ./usr/bin/telegraf
```
### FreeBSD tarball:
Latest:
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_freebsd_amd64.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_freebsd_amd64.tar.gz
##### tarball Instructions:
@@ -87,8 +87,8 @@ brew install telegraf
### Windows Binaries (EXPERIMENTAL)
Latest:
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_windows_amd64.zip
* http://get.influxdb.org/telegraf/telegraf-0.12.0-1_windows_i386.zip
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_windows_amd64.zip
* http://get.influxdb.org/telegraf/telegraf-0.12.1-1_windows_i386.zip
### From Source:
@@ -156,9 +156,11 @@ more information on each, please look at the directory of the same name in
Currently implemented sources:
* [aws cloudwatch](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/cloudwatch)
* [aerospike](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/aerospike)
* [apache](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/apache)
* [bcache](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/bcache)
* [cassandra](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/cassandra)
* [couchbase](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchbase)
* [couchdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchdb)
* [disque](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/disque)
@@ -166,9 +168,11 @@ Currently implemented sources:
* [docker](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/docker)
* [dovecot](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/dovecot)
* [elasticsearch](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/elasticsearch)
* [exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec ) (generic executable plugin, support JSON, influx, graphite and nagios)
* [exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec) (generic executable plugin, support JSON, influx, graphite and nagios)
* [filestat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/filestat)
* [haproxy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/haproxy)
* [httpjson ](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/httpjson ) (generic JSON-emitting http service plugin)
* [http_response](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/http_response)
* [httpjson](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/httpjson) (generic JSON-emitting http service plugin)
* [influxdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/influxdb)
* [ipmi_sensor](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ipmi_sensor)
* [jolokia](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/jolokia)
@@ -204,6 +208,7 @@ Currently implemented sources:
* [zfs](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/zfs)
* [zookeeper](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/zookeeper)
* [win_perf_counters ](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/win_perf_counters) (windows performance counters)
* [sysstat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/sysstat)
* [system](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/system)
* cpu
* mem
@@ -236,6 +241,7 @@ want to add support for another service or third-party API.
* [aws kinesis](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kinesis)
* [aws cloudwatch](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/cloudwatch)
* [datadog](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/datadog)
* [file](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/file)
* [graphite](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/graphite)
* [kafka](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kafka)
* [librato](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/librato)

View File

@@ -84,18 +84,15 @@ func (ac *accumulator) AddFields(
if tags == nil {
tags = make(map[string]string)
}
// Apply plugin-wide tags if set
for k, v := range ac.inputConfig.Tags {
if _, ok := tags[k]; !ok {
tags[k] = v
}
}
// Apply daemon-wide tags if set
for k, v := range ac.defaultTags {
if _, ok := tags[k]; !ok {
tags[k] = v
}
tags[k] = v
}
// Apply plugin-wide tags if set
for k, v := range ac.inputConfig.Tags {
tags[k] = v
}
ac.inputConfig.Filter.FilterTags(tags)
result := make(map[string]interface{})
for k, v := range fields {

View File

@@ -300,3 +300,35 @@ func TestAddBools(t *testing.T) {
fmt.Sprintf("acctest,acc=test,default=tag value=false %d", now.UnixNano()),
actual)
}
// Test that tag filters get applied to metrics.
func TestAccFilterTags(t *testing.T) {
a := accumulator{}
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
filter := internal_models.Filter{
TagExclude: []string{"acc"},
}
assert.NoError(t, filter.CompileFilter())
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig.Filter = filter
a.Add("acctest", float64(101), map[string]string{})
a.Add("acctest", float64(101), map[string]string{"acc": "test"})
a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
assert.Contains(t, actual, "acctest value=101")
testm = <-a.metrics
actual = testm.String()
assert.Contains(t, actual, "acctest value=101")
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest value=101 %d", now.UnixNano()),
actual)
}

View File

@@ -221,6 +221,7 @@ func (a *Agent) Test() error {
for _, input := range a.Config.Inputs {
acc := NewAccumulator(input.Config, metricC)
acc.SetDebug(true)
acc.setDefaultTags(a.Config.Tags)
fmt.Printf("* Plugin: %s, Collection 1\n", input.Name)
if input.Config.Interval != 0 {

View File

@@ -4,9 +4,9 @@ machine:
post:
- sudo service zookeeper stop
- go version
- go version | grep 1.6 || sudo rm -rf /usr/local/go
- wget https://storage.googleapis.com/golang/go1.6.linux-amd64.tar.gz
- sudo tar -C /usr/local -xzf go1.6.linux-amd64.tar.gz
- go version | grep 1.6.2 || sudo rm -rf /usr/local/go
- wget https://storage.googleapis.com/golang/go1.6.2.linux-amd64.tar.gz
- sudo tar -C /usr/local -xzf go1.6.2.linux-amd64.tar.gz
- go version
dependencies:

View File

@@ -71,6 +71,13 @@ The flags are:
-quiet run in quiet mode
-version print the version to stdout
In addition to the -config flag, telegraf will also load the config file from
an environment variable or default location. Precedence is:
1. -config flag
2. $TELEGRAF_CONFIG_PATH environment variable
3. $HOME/.telegraf/telegraf.conf
4. /etc/telegraf/telegraf.conf
Examples:
# generate a telegraf config file:
@@ -98,12 +105,10 @@ func main() {
flag.Parse()
args := flag.Args()
if flag.NFlag() == 0 && len(args) == 0 {
usageExit(0)
}
var inputFilters []string
if *fInputFiltersLegacy != "" {
fmt.Printf("WARNING '--filter' flag is deprecated, please use" +
" '--input-filter'")
inputFilter := strings.TrimSpace(*fInputFiltersLegacy)
inputFilters = strings.Split(":"+inputFilter+":", ":")
}
@@ -114,6 +119,8 @@ func main() {
var outputFilters []string
if *fOutputFiltersLegacy != "" {
fmt.Printf("WARNING '--outputfilter' flag is deprecated, please use" +
" '--output-filter'")
outputFilter := strings.TrimSpace(*fOutputFiltersLegacy)
outputFilters = strings.Split(":"+outputFilter+":", ":")
}
@@ -170,25 +177,19 @@ func main() {
return
}
var (
c *config.Config
err error
)
if *fConfig != "" {
c = config.NewConfig()
c.OutputFilters = outputFilters
c.InputFilters = inputFilters
err = c.LoadConfig(*fConfig)
if err != nil {
log.Fatal(err)
}
} else {
fmt.Println("You must specify a config file. See telegraf --help")
// If no other options are specified, load the config file and run.
c := config.NewConfig()
c.OutputFilters = outputFilters
c.InputFilters = inputFilters
err := c.LoadConfig(*fConfig)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
if *fConfigDirectoryLegacy != "" {
fmt.Printf("WARNING '--configdirectory' flag is deprecated, please use" +
" '--config-directory'")
err = c.LoadDirectory(*fConfigDirectoryLegacy)
if err != nil {
log.Fatal(err)

View File

@@ -3,11 +3,20 @@
## Generating a Configuration File
A default Telegraf config file can be generated using the -sample-config flag:
`telegraf -sample-config > telegraf.conf`
```
telegraf -sample-config > telegraf.conf
```
To generate a file with specific inputs and outputs, you can use the
-input-filter and -output-filter flags:
`telegraf -sample-config -input-filter cpu:mem:net:swap -output-filter influxdb:kafka`
```
telegraf -sample-config -input-filter cpu:mem:net:swap -output-filter influxdb:kafka
```
You can see the latest config file with all available plugins here:
[telegraf.conf](https://github.com/influxdata/telegraf/blob/master/etc/telegraf.conf)
## Environment Variables
@@ -17,8 +26,8 @@ for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
## `[global_tags]` Configuration
Global tags can be specific in the `[global_tags]` section of the config file in
key="value" format. All metrics being gathered on this host will be tagged
Global tags can be specified in the `[global_tags]` section of the config file
in key="value" format. All metrics being gathered on this host will be tagged
with the tags specified here.
## `[agent]` Configuration
@@ -29,8 +38,12 @@ config.
* **interval**: Default data collection interval for all inputs
* **round_interval**: Rounds collection interval to 'interval'
ie, if interval="10s" then always collect on :00, :10, :20, etc.
* **metric_batch_size**: Telegraf will send metrics to output in batch of at
most metric_batch_size metrics.
* **metric_buffer_limit**: Telegraf will cache metric_buffer_limit metrics
for each output, and will flush this buffer on a successful write.
This should be a multiple of metric_batch_size and could not be less
than 2 times metric_batch_size.
* **collection_jitter**: Collection jitter is used to jitter
the collection by a random amount.
Each plugin will sleep for a random time within jitter before collecting.
@@ -47,9 +60,35 @@ ie, a jitter of 5s and flush_interval 10s means flushes will happen every 10-15s
* **quiet**: Run telegraf in quiet mode.
* **hostname**: Override default hostname, if empty use os.Hostname().
## `[inputs.xxx]` Configuration
#### Measurement Filtering
There are some configuration options that are configurable per input:
Filters can be configured per input or output, see below for examples.
* **namepass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against
measurement names and if it matches, the field is emitted.
* **namedrop**: The inverse of pass, if a measurement name matches, it is not emitted.
* **fieldpass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against field names
and if it matches, the field is emitted. fieldpass is not available for outputs.
* **fielddrop**: The inverse of pass, if a field name matches, it is not emitted.
fielddrop is not available for outputs.
* **tagpass**: tag names and arrays of strings that are used to filter
measurements by the current input. Each string in the array is tested as a glob
match against the tag name, and if it matches the measurement is emitted.
* **tagdrop**: The inverse of tagpass. If a tag matches, the measurement is not
emitted. This is tested on measurements that have passed the tagpass test.
* **tagexclude**: tagexclude can be used to exclude a tag from measurement(s).
As opposed to tagdrop, which will drop an entire measurement based on it's
tags, tagexclude simply strips the given tag keys from the measurement. This
can be used on inputs & outputs, but it is _recommended_ to be used on inputs,
as it is more efficient to filter out tags at the ingestion point.
* **taginclude**: taginclude is the inverse of tagexclude. It will only include
the tag keys in the final measurement.
## Input Configuration
Some configuration options are configurable per input:
* **name_override**: Override the base name of the measurement.
(Default is the name of the input).
@@ -60,24 +99,6 @@ There are some configuration options that are configurable per input:
global interval, but if one particular input should be run less or more often,
you can configure that here.
#### Input Filters
There are also filters that can be configured per input:
* **namepass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against
measurement names and if it matches, the field is emitted.
* **namedrop**: The inverse of pass, if a measurement name matches, it is not emitted.
* **fieldpass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against field names
and if it matches, the field is emitted.
* **fielddrop**: The inverse of pass, if a field name matches, it is not emitted.
* **tagpass**: tag names and arrays of strings that are used to filter
measurements by the current input. Each string in the array is tested as a glob
match against the tag name, and if it matches the measurement is emitted.
* **tagdrop**: The inverse of tagpass. If a tag matches, the measurement is not
emitted. This is tested on measurements that have passed the tagpass test.
#### Input Configuration Examples
This is a full working config that will output CPU data to an InfluxDB instance
@@ -155,6 +176,20 @@ fields which begin with `time_`.
namepass = ["rest_client_*"]
```
#### Input Config: taginclude and tagexclude
```toml
# Only include the "cpu" tag in the measurements for the cpu plugin.
[[inputs.cpu]]
percpu = true
totalcpu = true
taginclude = ["cpu"]
# Exclude the "fstype" tag from the measurements for the disk plugin.
[[inputs.disk]]
tagexclude = ["fstype"]
```
#### Input config: prefix, suffix, and override
This plugin will emit measurements with the name `cpu_total`
@@ -180,6 +215,9 @@ This will emit measurements with the name `foobar`
This plugin will emit measurements with two additional tags: `tag1=foo` and
`tag2=bar`
NOTE: Order matters, the `[inputs.cpu.tags]` table must be at the _end_ of the
plugin definition.
```toml
[[inputs.cpu]]
percpu = false
@@ -208,15 +246,12 @@ to avoid measurement collisions:
fielddrop = ["cpu_time*"]
```
## `[outputs.xxx]` Configuration
## Output Configuration
Telegraf also supports specifying multiple output sinks to send data to,
configuring each output sink is different, but examples can be
found by running `telegraf -sample-config`.
Outputs also support the same configurable options as inputs
(namepass, namedrop, tagpass, tagdrop)
```toml
[[outputs.influxdb]]
urls = [ "http://localhost:8086" ]

View File

@@ -2,10 +2,11 @@
Telegraf is able to parse the following input data formats into metrics:
1. InfluxDB Line Protocol
1. JSON
1. Graphite
1. Value, ie 45 or "booyah"
1. [InfluxDB Line Protocol](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#influx)
1. [JSON](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#json)
1. [Graphite](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite)
1. [Value](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#value), ie: 45 or "booyah"
1. [Nagios](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#nagios) (exec input only)
Telegraf metrics, like InfluxDB
[points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/),
@@ -38,7 +39,7 @@ example, in the exec plugin:
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
@@ -50,7 +51,7 @@ example, in the exec plugin:
Each data_format has an additional set of configuration options available, which
I'll go over below.
## Influx:
# Influx:
There are no additional configuration options for InfluxDB line-protocol. The
metrics are parsed directly into Telegraf metrics.
@@ -65,23 +66,28 @@ metrics are parsed directly into Telegraf metrics.
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
```
## JSON:
# JSON:
The JSON data format flattens JSON into metric _fields_. For example, this JSON:
The JSON data format flattens JSON into metric _fields_.
NOTE: Only numerical values are converted to fields, and they are converted
into a float. strings are ignored unless specified as a tag_key (see below).
So for example, this JSON:
```json
{
"a": 5,
"b": {
"c": 6
}
},
"ignored": "I'm a string"
}
```
@@ -110,7 +116,7 @@ For example, if you had this configuration:
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
@@ -141,21 +147,25 @@ Your Telegraf metrics would get tagged with "my_tag_1"
exec_mycollector,my_tag_1=foo a=5,b_c=6
```
## Value:
# Value:
The "value" data format translates single values into Telegraf metrics. This
is done by assigning a measurement name (which can be overridden using the
`name_override` config option), and setting a single field ("value") as the
parsed metric.
is done by assigning a measurement name and setting a single field ("value")
as the parsed metric.
#### Value Configuration:
You can tell Telegraf what type of metric to collect by using the `data_type`
configuration option.
You **must** tell Telegraf what type of metric to collect by using the
`data_type` configuration option. Available options are:
It is also recommended that you set `name_override` to a measurement name that
makes sense for your metric, otherwise it will just be set to the name of the
plugin.
1. integer
2. float or long
3. string
4. boolean
**Note:** It is also recommended that you set `name_override` to a measurement
name that makes sense for your metric, otherwise it will just be set to the
name of the plugin.
```toml
[[inputs.exec]]
@@ -165,15 +175,15 @@ plugin.
## override the default metric name of "exec"
name_override = "entropy_available"
## Data format to consume. This can be "json", "value", influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "value"
data_type = "integer"
data_type = "integer" # required
```
## Graphite:
# Graphite:
The Graphite data format translates graphite _dot_ buckets directly into
telegraf measurement names, with a single value field, and without any tags. For
@@ -301,7 +311,7 @@ There are many more options available,
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
## Data format to consume. This can be "json", "influx" or "graphite" (line-protocol)
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
@@ -327,7 +337,7 @@ There are many more options available,
]
```
## Nagios:
# Nagios:
There are no additional configuration options for Nagios line-protocol. The
metrics are parsed directly into Telegraf metrics.
@@ -344,7 +354,7 @@ Note: Nagios Input Data Formats is only supported in `exec` input plugin.
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
## Data format to consume. This can be "json", "influx", "graphite" or "nagios"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md

View File

@@ -1,5 +1,11 @@
# Telegraf Output Data Formats
Telegraf is able to serialize metrics into the following output data formats:
1. [InfluxDB Line Protocol](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#influx)
1. [JSON](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#json)
1. [Graphite](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#graphite)
Telegraf metrics, like InfluxDB
[points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/),
are a combination of four basic parts:
@@ -29,7 +35,7 @@ config option, for example, in the `file` output plugin:
## Files to write to, "stdout" is a specially handled file.
files = ["stdout"]
## Data format to output. This can be "influx" or "graphite"
## Data format to output.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
@@ -41,34 +47,46 @@ config option, for example, in the `file` output plugin:
Each data_format has an additional set of configuration options available, which
I'll go over below.
## Influx:
# Influx:
There are no additional configuration options for InfluxDB line-protocol. The
metrics are serialized directly into InfluxDB line-protocol.
#### Influx Configuration:
### Influx Configuration:
```toml
[[outputs.file]]
## Files to write to, "stdout" is a specially handled file.
files = ["stdout", "/tmp/metrics.out"]
## Data format to output. This can be "influx", "json" or "graphite"
## Data format to output.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
data_format = "influx"
```
## Graphite:
# Graphite:
The Graphite data format translates Telegraf metrics into _dot_ buckets.
The format is:
The Graphite data format translates Telegraf metrics into _dot_ buckets. A
template can be specified for the output of Telegraf metrics into Graphite
buckets. The default template is:
```
[prefix].[host tag].[all tags (alphabetical)].[measurement name].[field name] value timestamp
template = "host.tags.measurement.field"
```
In the above template, we have four parts:
1. _host_ is a tag key. This can be any tag key that is in the Telegraf
metric(s). If the key doesn't exist, it will be ignored. If it does exist, the
tag value will be filled in.
1. _tags_ is a special keyword that outputs all remaining tag values, separated
by dots and in alphabetical order (by tag key). These will be filled after all
tag keys are filled.
1. _measurement_ is a special keyword that outputs the measurement name.
1. _field_ is a special keyword that outputs the field name.
Which means the following influx metric -> graphite conversion would happen:
```
@@ -78,27 +96,28 @@ tars.cpu-total.us-east-1.cpu.usage_user 0.89 1455320690
tars.cpu-total.us-east-1.cpu.usage_idle 98.09 1455320690
```
`prefix` is a configuration option when using the graphite output data format.
#### Graphite Configuration:
### Graphite Configuration:
```toml
[[outputs.file]]
## Files to write to, "stdout" is a specially handled file.
files = ["stdout", "/tmp/metrics.out"]
## Data format to output. This can be "influx", "json" or "graphite"
## Data format to output.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
data_format = "influx"
data_format = "graphite"
# prefix each graphite bucket
prefix = "telegraf"
# graphite template
template = "host.tags.measurement.field"
```
## Json:
# JSON:
The Json data format serialized Telegraf metrics in json format. The format is:
The JSON data format serialized Telegraf metrics in json format. The format is:
```json
{
@@ -116,14 +135,14 @@ The Json data format serialized Telegraf metrics in json format. The format is:
}
```
#### Json Configuration:
### JSON Configuration:
```toml
[[outputs.file]]
## Files to write to, "stdout" is a specially handled file.
files = ["stdout", "/tmp/metrics.out"]
## Data format to output. This can be "influx", "json" or "graphite"
## Data format to output.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md

View File

@@ -28,6 +28,5 @@
- github.com/wvanbergen/kazoo-go [MIT LICENSE](https://github.com/wvanbergen/kazoo-go/blob/master/MIT-LICENSE)
- gopkg.in/dancannon/gorethink.v1 [APACHE LICENSE](https://github.com/dancannon/gorethink/blob/v1.1.2/LICENSE)
- gopkg.in/mgo.v2 [BSD LICENSE](https://github.com/go-mgo/mgo/blob/v2/LICENSE)
- golang.org/x/crypto/* [BSD LICENSE](https://github.com/golang/crypto/blob/master/LICENSE)
- internal Glob function [MIT LICENSE](https://github.com/ryanuber/go-glob/blob/master/LICENSE)
- golang.org/x/crypto/ [BSD LICENSE](https://github.com/golang/crypto/blob/master/LICENSE)

36
docs/WINDOWS_SERVICE.md Normal file
View File

@@ -0,0 +1,36 @@
# Running Telegraf as a Windows Service
If you have tried to install Go binaries as Windows Services with the **sc.exe**
tool you may have seen that the service errors and stops running after a while.
**NSSM** (the Non-Sucking Service Manager) is a tool that helps you in a
[number of scenarios](http://nssm.cc/scenarios) including running Go binaries
that were not specifically designed to run only in Windows platforms.
## NSSM Installation via Chocolatey
You can install [Chocolatey](https://chocolatey.org/) and [NSSM](http://nssm.cc/)
with these commands
```powershell
iex ((new-object net.webclient).DownloadString('https://chocolatey.org/install.ps1'))
choco install -y nssm
```
## Installing Telegraf as a Windows Service with NSSM
You can download the latest Telegraf Windows binaries (still Experimental at
the moment) from [the Telegraf Github repo](https://github.com/influxdata/telegraf).
Then you can create a C:\telegraf folder, unzip the binary there and modify the
**telegraf.conf** sample to allocate the metrics you want to send to **InfluxDB**.
Once you have NSSM installed in your system, the process is quite straightforward.
You only need to type this command in your Windows shell
```powershell
nssm install Telegraf c:\telegraf\telegraf.exe -config c:\telegraf\telegraf.config
```
And now your service will be installed in Windows and you will be able to start and
stop it gracefully

View File

@@ -30,11 +30,13 @@
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will cache metric_buffer_limit metrics for each output, and will
## flush this buffer on a successful write.
metric_buffer_limit = 1000
## Flush the buffer whenever full, regardless of flush_interval.
flush_buffer_when_full = true
## Telegraf will send metrics to outputs in batches of at
## most metric_batch_size metrics.
metric_batch_size = 1000
## For failed writes, telegraf will cache metric_buffer_limit metrics for each
## output, and will flush this buffer on a successful write. Oldest metrics
## are dropped first when this buffer fills.
metric_buffer_limit = 10000
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
@@ -147,6 +149,15 @@
# ## Amazon REGION
# region = 'us-east-1'
#
# ## Amazon Credentials
# ## Credentials are loaded in the following order
# ## 1) explicit credentials from 'access_key' and 'secret_key'
# ## 2) environment variables
# ## 3) shared credentials file
# ## 4) EC2 Instance Profile
# #access_key = ""
# #secret_key = ""
#
# ## Namespace for the CloudWatch MetricDatums
# namespace = 'InfluxData/Telegraf'
@@ -178,6 +189,9 @@
# servers = ["localhost:2003"]
# ## Prefix metrics name
# prefix = ""
# ## Graphite output template
# ## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
# template = "host.tags.measurement.field"
# ## timeout in seconds for the write connection to graphite
# timeout = 2
@@ -236,6 +250,16 @@
# [[outputs.kinesis]]
# ## Amazon REGION of kinesis endpoint.
# region = "ap-southeast-2"
#
# ## Amazon Credentials
# ## Credentials are loaded in the following order
# ## 1) explicit credentials from 'access_key' and 'secret_key'
# ## 2) environment variables
# ## 3) shared credentials file
# ## 4) EC2 Instance Profile
# #access_key = ""
# #secret_key = ""
#
# ## Kinesis StreamName must exist prior to starting telegraf.
# streamname = "StreamName"
# ## PartitionKey as used for sharding data.
@@ -251,22 +275,20 @@
# [[outputs.librato]]
# ## Librator API Docs
# ## http://dev.librato.com/v1/metrics-authentication
#
# ## Librato API user
# api_user = "telegraf@influxdb.com" # required.
#
# ## Librato API token
# api_token = "my-secret-token" # required.
#
# ### Debug
# ## Debug
# # debug = false
#
# ### Tag Field to populate source attribute (optional)
# ### This is typically the _hostname_ from which the metric was obtained.
# ## Tag Field to populate source attribute (optional)
# ## This is typically the _hostname_ from which the metric was obtained.
# source_tag = "host"
#
# ## Connection timeout.
# # timeout = "5s"
# ## Output Name Template (same as graphite buckets)
# ## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#graphite
# template = "host.tags.measurement.field"
# # Configuration for MQTT server to send metrics to
@@ -429,6 +451,63 @@
# bcacheDevs = ["bcache0"]
# # Read Cassandra metrics through Jolokia
# [[inputs.cassandra]]
# # This is the context root used to compose the jolokia url
# context = "/jolokia/read"
# ## List of cassandra servers exposing jolokia read service
# servers = ["myuser:mypassword@10.10.10.1:8778","10.10.10.2:8778",":8778"]
# ## List of metrics collected on above servers
# ## Each metric consists of a jmx path.
# ## This will collect all heap memory usage metrics from the jvm and
# ## ReadLatency metrics for all keyspaces and tables.
# ## "type=Table" in the query works with Cassandra3.0. Older versions might
# ## need to use "type=ColumnFamily"
# metrics = [
# "/java.lang:type=Memory/HeapMemoryUsage",
# "/org.apache.cassandra.metrics:type=Table,keyspace=*,scope=*,name=ReadLatency"
# ]
# # Pull Metric Statistics from Amazon CloudWatch
# [[inputs.cloudwatch]]
# ## Amazon Region
# region = 'us-east-1'
#
# ## Amazon Credentials
# ## Credentials are loaded in the following order
# ## 1) explicit credentials from 'access_key' and 'secret_key'
# ## 2) environment variables
# ## 3) shared credentials file
# ## 4) EC2 Instance Profile
# #access_key = ""
# #secret_key = ""
#
# ## Requested CloudWatch aggregation Period (required - must be a multiple of 60s)
# period = '1m'
#
# ## Collection Delay (required - must account for metrics availability via CloudWatch API)
# delay = '1m'
#
# ## Recomended: use metric 'interval' that is a multiple of 'period' to avoid
# ## gaps or overlap in pulled data
# interval = '1m'
#
# ## Metric Statistic Namespace (required)
# namespace = 'AWS/ELB'
#
# ## Metrics to Pull (optional)
# ## Defaults to all Metrics in Namespace if nothing is provided
# ## Refreshes Namespace available metrics every 1h
# #[[inputs.cloudwatch.metrics]]
# # names = ['Latency', 'RequestCount']
# #
# # ## Dimension filters for Metric (optional)
# # [[inputs.cloudwatch.metrics.dimensions]]
# # name = 'LoadBalancerName'
# # value = 'p-example'
# # Read metrics from one or many couchbase clusters
# [[inputs.couchbase]]
# ## specify servers via a url matching:
@@ -486,6 +565,8 @@
# endpoint = "unix:///var/run/docker.sock"
# ## Only collect metrics for these containers, collect all if empty
# container_names = []
# ## Timeout for docker list, info, and stats commands
# timeout = "5s"
# # Read statistics from one or many dovecot servers
@@ -496,8 +577,11 @@
# ##
# ## If no servers are specified, then localhost is used as the host.
# servers = ["localhost:24242"]
# ## Only collect metrics for these domains, collect all if empty
# domains = []
# ## Type is one of "user", "domain", "ip", or "global"
# type = "global"
# ## Wildcard matches like "*.com". An empty string "" is same as "*"
# ## If type = "ip" filters should be <IP/network>
# filters = [""]
# # Read stats from one or more Elasticsearch servers or clusters
@@ -518,6 +602,9 @@
# ## Commands array
# commands = ["/tmp/test.sh", "/usr/bin/mycollector --foo=bar"]
#
# ## Timeout for each command to complete.
# timeout = "5s"
#
# ## measurement name suffix (for separating different commands)
# name_suffix = "_mycollector"
#
@@ -528,6 +615,22 @@
# data_format = "influx"
# # Read stats about given file(s)
# [[inputs.filestat]]
# ## Files to gather stats about.
# ## These accept standard unix glob matching rules, but with the addition of
# ## ** as a "super asterisk". ie:
# ## "/var/log/**.log" -> recursively find all .log files in /var/log
# ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
# ## "/var/log/apache.log" -> just tail the apache log file
# ##
# ## See https://github.com/gobwas/glob for more examples
# ##
# files = ["/var/log/**.log"]
# ## If true, read the entire file and calculate an md5 checksum.
# md5 = false
# # Read metrics of haproxy, via socket or csv stats page
# [[inputs.haproxy]]
# ## An array of address to gather stats about. Specify an ip on hostname
@@ -539,6 +642,25 @@
# ## servers = ["socket://run/haproxy/admin.sock"]
# # HTTP/HTTPS request given an address a method and a timeout
# [[inputs.http_response]]
# ## Server address (default http://localhost)
# address = "http://github.com"
# ## Set response_timeout (default 5 seconds)
# response_timeout = 5
# ## HTTP Request Method
# method = "GET"
# ## Whether to follow redirects from the server (defaults to false)
# follow_redirects = true
# ## HTTP Request Headers (all values must be strings)
# # [inputs.http_response.headers]
# # Host = "github.com"
# ## Optional HTTP Request Body
# # body = '''
# # {'fake':'data'}
# # '''
# # Read flattened metrics from one or more JSON HTTP endpoints
# [[inputs.httpjson]]
# ## NOTE This plugin only reads numerical measurements, strings and booleans
@@ -605,13 +727,24 @@
# # Read JMX metrics through Jolokia
# [[inputs.jolokia]]
# ## This is the context root used to compose the jolokia url
# context = "/jolokia/read"
# context = "/jolokia"
#
# ## This specifies the mode used
# # mode = "proxy"
# #
# ## When in proxy mode this section is used to specify further
# ## proxy address configurations.
# ## Remember to change host address to fit your environment.
# # [inputs.jolokia.proxy]
# # host = "127.0.0.1"
# # port = "8080"
#
#
# ## List of servers exposing jolokia read service
# [[inputs.jolokia.servers]]
# name = "stable"
# host = "192.168.103.2"
# port = "8180"
# name = "as-server-01"
# host = "127.0.0.1"
# port = "8080"
# # username = "myuser"
# # password = "mypassword"
#
@@ -621,7 +754,20 @@
# ## This collect all heap memory usage metrics.
# [[inputs.jolokia.metrics]]
# name = "heap_memory_usage"
# jmx = "/java.lang:type=Memory/HeapMemoryUsage"
# mbean = "java.lang:type=Memory"
# attribute = "HeapMemoryUsage"
#
# ## This collect thread counts metrics.
# [[inputs.jolokia.metrics]]
# name = "thread_count"
# mbean = "java.lang:type=Threading"
# attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"
#
# ## This collect number of class loaded/unloaded counts metrics.
# [[inputs.jolokia.metrics]]
# name = "class_count"
# mbean = "java.lang:type=ClassLoading"
# attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"
# # Read metrics from a LeoFS Server via SNMP
@@ -638,9 +784,13 @@
# ##
# # ost_procfiles = [
# # "/proc/fs/lustre/obdfilter/*/stats",
# # "/proc/fs/lustre/osd-ldiskfs/*/stats"
# # "/proc/fs/lustre/osd-ldiskfs/*/stats",
# # "/proc/fs/lustre/obdfilter/*/job_stats",
# # ]
# # mds_procfiles = [
# # "/proc/fs/lustre/mdt/*/md_stats",
# # "/proc/fs/lustre/mdt/*/job_stats",
# # ]
# # mds_procfiles = ["/proc/fs/lustre/mdt/*/md_stats"]
# # Gathers metrics from the /3.0/reports MailChimp API
@@ -700,9 +850,52 @@
# ## e.g.
# ## root:passwd@tcp(127.0.0.1:3306)/?tls=false
# ## root@tcp(127.0.0.1:3306)/?tls=false
# ##
# #
# ## If no servers are specified, then localhost is used as the host.
# servers = ["tcp(127.0.0.1:3306)/"]
# ## the limits for metrics form perf_events_statements
# perf_events_statements_digest_text_limit = 120
# perf_events_statements_limit = 250
# perf_events_statements_time_limit = 86400
# #
# ## if the list is empty, then metrics are gathered from all databasee tables
# table_schema_databases = []
# #
# ## gather metrics from INFORMATION_SCHEMA.TABLES for databases provided above list
# gather_table_schema = false
# #
# ## gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST
# gather_process_list = true
# #
# ## gather auto_increment columns and max values from information schema
# gather_info_schema_auto_inc = true
# #
# ## gather metrics from SHOW SLAVE STATUS command output
# gather_slave_status = true
# #
# ## gather metrics from SHOW BINARY LOGS command output
# gather_binary_logs = false
# #
# ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_TABLE
# gather_table_io_waits = false
# #
# ## gather metrics from PERFORMANCE_SCHEMA.TABLE_LOCK_WAITS
# gather_table_lock_waits = false
# #
# ## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_INDEX_USAGE
# gather_index_io_waits = false
# #
# ## gather metrics from PERFORMANCE_SCHEMA.EVENT_WAITS
# gather_event_waits = false
# #
# ## gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME
# gather_file_events_stats = false
# #
# ## gather metrics from PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
# gather_perf_events_statements = false
# #
# ## Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES)
# interval_slow = "30m"
# # Read metrics about network interface usage
@@ -794,15 +987,15 @@
# [[inputs.ping]]
# ## NOTE: this plugin forks the ping command. You may need to set capabilities
# ## via setcap cap_net_raw+p /bin/ping
#
# #
# ## urls to ping
# urls = ["www.google.com"] # required
# ## number of pings to send (ping -c <COUNT>)
# ## number of pings to send per collection (ping -c <COUNT>)
# count = 1 # required
# ## interval, in s, at which to ping. 0 == default (ping -i <PING_INTERVAL>)
# ping_interval = 0.0
# ## ping timeout, in s. 0 == no timeout (ping -t <TIMEOUT>)
# timeout = 0.0
# ## ping timeout, in s. 0 == no timeout (ping -W <TIMEOUT>)
# timeout = 1.0
# ## interface to send ping from (ping -I <INTERFACE>)
# interface = ""
@@ -848,6 +1041,11 @@
# ## databases are gathered.
# ## databases = ["app_production", "testing"]
# #
# # outputaddress = "db01"
# ## A custom name for the database that will be used as the "server" tag in the
# ## measurement output. If not specified, a default one generated from
# ## the connection address is used.
# #
# ## Define the toml config where the sql queries are stored
# ## New queries can be added, if the withdbname is set to true and there is no
# ## databases defined in the 'databases field', the sql query is ended by a
@@ -858,24 +1056,28 @@
# ## because the databases variable was set to ['postgres', 'pgbench' ] and the
# ## withdbname was true. Be careful that if the withdbname is set to false you
# ## don't have to define the where clause (aka with the dbname) the tagvalue
# ## field is used to define custom tags (separated by comas)
# ## field is used to define custom tags (separated by commas)
# ## The optional "measurement" value can be used to override the default
# ## output measurement name ("postgresql").
# #
# ## Structure :
# ## [[inputs.postgresql_extensible.query]]
# ## sqlquery string
# ## version string
# ## withdbname boolean
# ## tagvalue string (coma separated)
# ## tagvalue string (comma separated)
# ## measurement string
# [[inputs.postgresql_extensible.query]]
# sqlquery="SELECT * FROM pg_stat_database"
# version=901
# withdbname=false
# tagvalue=""
# measurement=""
# [[inputs.postgresql_extensible.query]]
# sqlquery="SELECT * FROM pg_stat_bgwriter"
# version=901
# withdbname=false
# tagvalue=""
# tagvalue="postgresql.stats"
# # Read metrics from one or many PowerDNS servers
@@ -1245,10 +1447,28 @@
# ## calculation of percentiles. Raising this limit increases the accuracy
# ## of percentiles but also increases the memory usage and cpu time.
# percentile_limit = 1000
# # Stream a log file, like the tail -f command
# [[inputs.tail]]
# ## files to tail.
# ## These accept standard unix glob matching rules, but with the addition of
# ## ** as a "super asterisk". ie:
# ## "/var/log/**.log" -> recursively find all .log files in /var/log
# ## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
# ## "/var/log/apache.log" -> just tail the apache log file
# ##
# ## See https://github.com/gobwas/glob for more examples
# ##
# files = ["/var/mymetrics.out"]
# ## Read file from beginning.
# from_beginning = false
#
# ## UDP packet size for the server to listen for. This will depend on the size
# ## of the packets that the client is sending, which is usually 1500 bytes.
# udp_packet_size = 1500
# ## Data format to consume.
# ## Each data format has it's own unique set of configuration options, read
# ## more about them here:
# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
# data_format = "influx"
# # Generic TCP listener
@@ -1279,11 +1499,6 @@
# ## UDP listener will start dropping packets.
# allowed_pending_messages = 10000
#
# ## UDP packet size for the server to listen for. This will depend
# ## on the size of the packets that the client is sending, which is
# ## usually 1500 bytes, but can be as large as 65,535 bytes.
# udp_packet_size = 1500
#
# ## Data format to consume.
# ## Each data format has it's own unique set of configuration options, read
# ## more about them here:

77
internal/buffer/buffer.go Normal file
View File

@@ -0,0 +1,77 @@
package buffer
import (
"github.com/influxdata/telegraf"
)
// Buffer is an object for storing metrics in a circular buffer.
type Buffer struct {
buf chan telegraf.Metric
// total dropped metrics
drops int
// total metrics added
total int
}
// NewBuffer returns a Buffer
// size is the maximum number of metrics that Buffer will cache. If Add is
// called when the buffer is full, then the oldest metric(s) will be dropped.
func NewBuffer(size int) *Buffer {
return &Buffer{
buf: make(chan telegraf.Metric, size),
}
}
// IsEmpty returns true if Buffer is empty.
func (b *Buffer) IsEmpty() bool {
return len(b.buf) == 0
}
// Len returns the current length of the buffer.
func (b *Buffer) Len() int {
return len(b.buf)
}
// Drops returns the total number of dropped metrics that have occured in this
// buffer since instantiation.
func (b *Buffer) Drops() int {
return b.drops
}
// Total returns the total number of metrics that have been added to this buffer.
func (b *Buffer) Total() int {
return b.total
}
// Add adds metrics to the buffer.
func (b *Buffer) Add(metrics ...telegraf.Metric) {
for i, _ := range metrics {
b.total++
select {
case b.buf <- metrics[i]:
default:
b.drops++
<-b.buf
b.buf <- metrics[i]
}
}
}
// Batch returns a batch of metrics of size batchSize.
// the batch will be of maximum length batchSize. It can be less than batchSize,
// if the length of Buffer is less than batchSize.
func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
n := min(len(b.buf), batchSize)
out := make([]telegraf.Metric, n)
for i := 0; i < n; i++ {
out[i] = <-b.buf
}
return out
}
func min(a, b int) int {
if b < a {
return b
}
return a
}

View File

@@ -0,0 +1,94 @@
package buffer
import (
"testing"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
var metricList = []telegraf.Metric{
testutil.TestMetric(2, "mymetric1"),
testutil.TestMetric(1, "mymetric2"),
testutil.TestMetric(11, "mymetric3"),
testutil.TestMetric(15, "mymetric4"),
testutil.TestMetric(8, "mymetric5"),
}
func BenchmarkAddMetrics(b *testing.B) {
buf := NewBuffer(10000)
m := testutil.TestMetric(1, "mymetric")
for n := 0; n < b.N; n++ {
buf.Add(m)
}
}
func TestNewBufferBasicFuncs(t *testing.T) {
b := NewBuffer(10)
assert.True(t, b.IsEmpty())
assert.Zero(t, b.Len())
assert.Zero(t, b.Drops())
assert.Zero(t, b.Total())
m := testutil.TestMetric(1, "mymetric")
b.Add(m)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 1)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 1)
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 6)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 6)
}
func TestDroppingMetrics(t *testing.T) {
b := NewBuffer(10)
// Add up to the size of the buffer
b.Add(metricList...)
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 10)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 10)
// Add 5 more and verify they were dropped
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 10)
assert.Equal(t, b.Drops(), 5)
assert.Equal(t, b.Total(), 15)
}
func TestGettingBatches(t *testing.T) {
b := NewBuffer(20)
// Verify that the buffer returned is smaller than requested when there are
// not as many items as requested.
b.Add(metricList...)
batch := b.Batch(10)
assert.Len(t, batch, 5)
// Verify that the buffer is now empty
assert.True(t, b.IsEmpty())
assert.Zero(t, b.Len())
assert.Zero(t, b.Drops())
assert.Equal(t, b.Total(), 5)
// Verify that the buffer returned is not more than the size requested
b.Add(metricList...)
batch = b.Batch(3)
assert.Len(t, batch, 3)
// Verify that buffer is not empty
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 2)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 10)
}

View File

@@ -93,9 +93,15 @@ type AgentConfig struct {
// ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
FlushJitter internal.Duration
// MetricBatchSize is the maximum number of metrics that is wrote to an
// output plugin in one call.
MetricBatchSize int
// MetricBufferLimit is the max number of metrics that each output plugin
// will cache. The buffer is cleared when a successful write occurs. When
// full, the oldest metrics will be overwritten.
// full, the oldest metrics will be overwritten. This number should be a
// multiple of MetricBatchSize. Due to current implementation, this could
// not be less than 2 times MetricBatchSize.
MetricBufferLimit int
// FlushBufferWhenFull tells Telegraf to flush the metric buffer whenever
@@ -182,11 +188,13 @@ var header = `# Telegraf Configuration
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will cache metric_buffer_limit metrics for each output, and will
## flush this buffer on a successful write.
metric_buffer_limit = 1000
## Flush the buffer whenever full, regardless of flush_interval.
flush_buffer_when_full = true
## Telegraf will send metrics to outputs in batches of at
## most metric_batch_size metrics.
metric_batch_size = 1000
## For failed writes, telegraf will cache metric_buffer_limit metrics for each
## output, and will flush this buffer on a successful write. Oldest metrics
## are dropped first when this buffer fills.
metric_buffer_limit = 10000
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
@@ -404,13 +412,67 @@ func (c *Config) LoadDirectory(path string) error {
return nil
}
// Try to find a default config file at these locations (in order):
// 1. $TELEGRAF_CONFIG_PATH
// 2. $HOME/.telegraf/telegraf.conf
// 3. /etc/telegraf/telegraf.conf
//
func getDefaultConfigPath() (string, error) {
envfile := os.Getenv("TELEGRAF_CONFIG_PATH")
homefile := os.ExpandEnv("${HOME}/.telegraf/telegraf.conf")
etcfile := "/etc/telegraf/telegraf.conf"
for _, path := range []string{envfile, homefile, etcfile} {
if _, err := os.Stat(path); err == nil {
log.Printf("Using config file: %s", path)
return path, nil
}
}
// if we got here, we didn't find a file in a default location
return "", fmt.Errorf("No config file specified, and could not find one"+
" in $TELEGRAF_CONFIG_PATH, %s, or %s", homefile, etcfile)
}
// LoadConfig loads the given config file and applies it to c
func (c *Config) LoadConfig(path string) error {
var err error
if path == "" {
if path, err = getDefaultConfigPath(); err != nil {
return err
}
}
tbl, err := parseFile(path)
if err != nil {
return fmt.Errorf("Error parsing %s, %s", path, err)
}
// Parse tags tables first:
for _, tableName := range []string{"tags", "global_tags"} {
if val, ok := tbl.Fields[tableName]; ok {
subTable, ok := val.(*ast.Table)
if !ok {
return fmt.Errorf("%s: invalid configuration", path)
}
if err = config.UnmarshalTable(subTable, c.Tags); err != nil {
log.Printf("Could not parse [global_tags] config\n")
return fmt.Errorf("Error parsing %s, %s", path, err)
}
}
}
// Parse agent table:
if val, ok := tbl.Fields["agent"]; ok {
subTable, ok := val.(*ast.Table)
if !ok {
return fmt.Errorf("%s: invalid configuration", path)
}
if err = config.UnmarshalTable(subTable, c.Agent); err != nil {
log.Printf("Could not parse [agent] config\n")
return fmt.Errorf("Error parsing %s, %s", path, err)
}
}
// Parse all the rest of the plugins:
for name, val := range tbl.Fields {
subTable, ok := val.(*ast.Table)
if !ok {
@@ -418,16 +480,7 @@ func (c *Config) LoadConfig(path string) error {
}
switch name {
case "agent":
if err = config.UnmarshalTable(subTable, c.Agent); err != nil {
log.Printf("Could not parse [agent] config\n")
return fmt.Errorf("Error parsing %s, %s", path, err)
}
case "global_tags", "tags":
if err = config.UnmarshalTable(subTable, c.Tags); err != nil {
log.Printf("Could not parse [global_tags] config\n")
return fmt.Errorf("Error parsing %s, %s", path, err)
}
case "agent", "global_tags", "tags":
case "outputs":
for pluginName, pluginVal := range subTable.Fields {
switch pluginSubTable := pluginVal.(type) {
@@ -525,11 +578,8 @@ func (c *Config) addOutput(name string, table *ast.Table) error {
return err
}
ro := internal_models.NewRunningOutput(name, output, outputConfig)
if c.Agent.MetricBufferLimit > 0 {
ro.MetricBufferLimit = c.Agent.MetricBufferLimit
}
ro.FlushBufferWhenFull = c.Agent.FlushBufferWhenFull
ro := internal_models.NewRunningOutput(name, output, outputConfig,
c.Agent.MetricBatchSize, c.Agent.MetricBufferLimit)
c.Outputs = append(c.Outputs, ro)
return nil
}
@@ -580,9 +630,9 @@ func (c *Config) addInput(name string, table *ast.Table) error {
// buildFilter builds a Filter
// (tagpass/tagdrop/namepass/namedrop/fieldpass/fielddrop) to
// be inserted into the internal_models.OutputConfig/internal_models.InputConfig to be used for prefix
// filtering on tags and measurements
func buildFilter(tbl *ast.Table) internal_models.Filter {
// be inserted into the internal_models.OutputConfig/internal_models.InputConfig
// to be used for glob filtering on tags and measurements
func buildFilter(tbl *ast.Table) (internal_models.Filter, error) {
f := internal_models.Filter{}
if node, ok := tbl.Fields["namepass"]; ok {
@@ -681,6 +731,33 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
}
}
if node, ok := tbl.Fields["tagexclude"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
f.TagExclude = append(f.TagExclude, str.Value)
}
}
}
}
}
if node, ok := tbl.Fields["taginclude"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
f.TagInclude = append(f.TagInclude, str.Value)
}
}
}
}
}
if err := f.CompileFilter(); err != nil {
return f, err
}
delete(tbl.Fields, "namedrop")
delete(tbl.Fields, "namepass")
delete(tbl.Fields, "fielddrop")
@@ -689,7 +766,9 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
delete(tbl.Fields, "pass")
delete(tbl.Fields, "tagdrop")
delete(tbl.Fields, "tagpass")
return f
delete(tbl.Fields, "tagexclude")
delete(tbl.Fields, "taginclude")
return f, nil
}
// buildInput parses input specific items from the ast.Table,
@@ -748,7 +827,11 @@ func buildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, erro
delete(tbl.Fields, "name_override")
delete(tbl.Fields, "interval")
delete(tbl.Fields, "tags")
cp.Filter = buildFilter(tbl)
var err error
cp.Filter, err = buildFilter(tbl)
if err != nil {
return cp, err
}
return cp, nil
}
@@ -850,18 +933,32 @@ func buildSerializer(name string, tbl *ast.Table) (serializers.Serializer, error
}
}
if node, ok := tbl.Fields["template"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if str, ok := kv.Value.(*ast.String); ok {
c.Template = str.Value
}
}
}
delete(tbl.Fields, "data_format")
delete(tbl.Fields, "prefix")
delete(tbl.Fields, "template")
return serializers.NewSerializer(c)
}
// buildOutput parses output specific items from the ast.Table, builds the filter and returns an
// buildOutput parses output specific items from the ast.Table,
// builds the filter and returns an
// internal_models.OutputConfig to be inserted into internal_models.RunningInput
// Note: error exists in the return for future calls that might require error
func buildOutput(name string, tbl *ast.Table) (*internal_models.OutputConfig, error) {
filter, err := buildFilter(tbl)
if err != nil {
return nil, err
}
oc := &internal_models.OutputConfig{
Name: name,
Filter: buildFilter(tbl),
Filter: filter,
}
// Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass
if len(oc.Filter.FieldDrop) > 0 {

View File

@@ -26,27 +26,29 @@ func TestConfig_LoadSingleInputWithEnvVars(t *testing.T) {
memcached := inputs.Inputs["memcached"]().(*memcached.Memcached)
memcached.Servers = []string{"192.168.1.1"}
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
filter := internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
TagPass: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
IsActive: true,
},
TagPass: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
IsActive: true,
}
assert.NoError(t, filter.CompileFilter())
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: filter,
Interval: 10 * time.Second,
}
mConfig.Tags = make(map[string]string)
@@ -64,27 +66,29 @@ func TestConfig_LoadSingleInput(t *testing.T) {
memcached := inputs.Inputs["memcached"]().(*memcached.Memcached)
memcached.Servers = []string{"localhost"}
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
filter := internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
TagPass: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
IsActive: true,
},
TagPass: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
IsActive: true,
}
assert.NoError(t, filter.CompileFilter())
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: filter,
Interval: 5 * time.Second,
}
mConfig.Tags = make(map[string]string)
@@ -109,27 +113,29 @@ func TestConfig_LoadDirectory(t *testing.T) {
memcached := inputs.Inputs["memcached"]().(*memcached.Memcached)
memcached.Servers = []string{"localhost"}
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
filter := internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
TagPass: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
IsActive: true,
},
TagPass: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
IsActive: true,
}
assert.NoError(t, filter.CompileFilter())
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: filter,
Interval: 5 * time.Second,
}
mConfig.Tags = make(map[string]string)

View File

@@ -0,0 +1,98 @@
package globpath
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/gobwas/glob"
)
var sepStr = fmt.Sprintf("%v", string(os.PathSeparator))
type GlobPath struct {
path string
hasMeta bool
g glob.Glob
root string
}
func Compile(path string) (*GlobPath, error) {
out := GlobPath{
hasMeta: hasMeta(path),
path: path,
}
// if there are no glob meta characters in the path, don't bother compiling
// a glob object or finding the root directory. (see short-circuit in Match)
if !out.hasMeta {
return &out, nil
}
var err error
if out.g, err = glob.Compile(path, os.PathSeparator); err != nil {
return nil, err
}
// Get the root directory for this filepath
out.root = findRootDir(path)
return &out, nil
}
func (g *GlobPath) Match() map[string]os.FileInfo {
if !g.hasMeta {
out := make(map[string]os.FileInfo)
info, err := os.Stat(g.path)
if !os.IsNotExist(err) {
out[g.path] = info
}
return out
}
return walkFilePath(g.root, g.g)
}
// walk the filepath from the given root and return a list of files that match
// the given glob.
func walkFilePath(root string, g glob.Glob) map[string]os.FileInfo {
matchedFiles := make(map[string]os.FileInfo)
walkfn := func(path string, info os.FileInfo, _ error) error {
if g.Match(path) {
matchedFiles[path] = info
}
return nil
}
filepath.Walk(root, walkfn)
return matchedFiles
}
// find the root dir of the given path (could include globs).
// ie:
// /var/log/telegraf.conf -> /var/log
// /home/** -> /home
// /home/*/** -> /home
// /lib/share/*/*/**.txt -> /lib/share
func findRootDir(path string) string {
pathItems := strings.Split(path, sepStr)
out := sepStr
for i, item := range pathItems {
if i == len(pathItems)-1 {
break
}
if item == "" {
continue
}
if hasMeta(item) {
break
}
out += item + sepStr
}
if out != "/" {
out = strings.TrimSuffix(out, "/")
}
return out
}
// hasMeta reports whether path contains any magic glob characters.
func hasMeta(path string) bool {
return strings.IndexAny(path, "*?[") >= 0
}

View File

@@ -0,0 +1,62 @@
package globpath
import (
"runtime"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestCompileAndMatch(t *testing.T) {
dir := getTestdataDir()
// test super asterisk
g1, err := Compile(dir + "/**")
require.NoError(t, err)
// test single asterisk
g2, err := Compile(dir + "/*.log")
require.NoError(t, err)
// test no meta characters (file exists)
g3, err := Compile(dir + "/log1.log")
require.NoError(t, err)
// test file that doesn't exist
g4, err := Compile(dir + "/i_dont_exist.log")
require.NoError(t, err)
// test super asterisk that doesn't exist
g5, err := Compile(dir + "/dir_doesnt_exist/**")
require.NoError(t, err)
matches := g1.Match()
assert.Len(t, matches, 3)
matches = g2.Match()
assert.Len(t, matches, 2)
matches = g3.Match()
assert.Len(t, matches, 1)
matches = g4.Match()
assert.Len(t, matches, 0)
matches = g5.Match()
assert.Len(t, matches, 0)
}
func TestFindRootDir(t *testing.T) {
tests := []struct {
input string
output string
}{
{"/var/log/telegraf.conf", "/var/log"},
{"/home/**", "/home"},
{"/home/*/**", "/home"},
{"/lib/share/*/*/**.txt", "/lib/share"},
}
for _, test := range tests {
actual := findRootDir(test.input)
assert.Equal(t, test.output, actual)
}
}
func getTestdataDir() string {
_, filename, _, _ := runtime.Caller(1)
return strings.Replace(filename, "globpath_test.go", "testdata", 1)
}

0
internal/globpath/testdata/log1.log vendored Normal file
View File

0
internal/globpath/testdata/log2.log vendored Normal file
View File

5
internal/globpath/testdata/test.conf vendored Normal file
View File

@@ -0,0 +1,5 @@
# this is a fake testing config file
# for testing the filestat plugin
option1 = "foo"
option2 = "bar"

View File

@@ -2,13 +2,16 @@ package internal
import (
"bufio"
"bytes"
"crypto/rand"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"strings"
"time"
"unicode"
@@ -16,6 +19,12 @@ import (
const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
var (
TimeoutErr = errors.New("Command timed out.")
NotImplementedError = errors.New("not implemented yet")
)
// Duration just wraps time.Duration
type Duration struct {
Duration time.Duration
@@ -33,8 +42,6 @@ func (d *Duration) UnmarshalTOML(b []byte) error {
return nil
}
var NotImplementedError = errors.New("not implemented yet")
// ReadLines reads contents from a file and splits them by new lines.
// A convenience wrapper to ReadLinesOffsetN(filename, 0, -1).
func ReadLines(filename string) ([]string, error) {
@@ -140,58 +147,47 @@ func SnakeCase(in string) string {
return string(out)
}
// Glob will test a string pattern, potentially containing globs, against a
// subject string. The result is a simple true/false, determining whether or
// not the glob pattern matched the subject text.
//
// Adapted from https://github.com/ryanuber/go-glob/blob/master/glob.go
// thanks Ryan Uber!
func Glob(pattern, measurement string) bool {
// Empty pattern can only match empty subject
if pattern == "" {
return measurement == pattern
// CombinedOutputTimeout runs the given command with the given timeout and
// returns the combined output of stdout and stderr.
// If the command times out, it attempts to kill the process.
func CombinedOutputTimeout(c *exec.Cmd, timeout time.Duration) ([]byte, error) {
var b bytes.Buffer
c.Stdout = &b
c.Stderr = &b
if err := c.Start(); err != nil {
return nil, err
}
err := WaitTimeout(c, timeout)
return b.Bytes(), err
}
// RunTimeout runs the given command with the given timeout.
// If the command times out, it attempts to kill the process.
func RunTimeout(c *exec.Cmd, timeout time.Duration) error {
if err := c.Start(); err != nil {
return err
}
return WaitTimeout(c, timeout)
}
// WaitTimeout waits for the given command to finish with a timeout.
// It assumes the command has already been started.
// If the command times out, it attempts to kill the process.
func WaitTimeout(c *exec.Cmd, timeout time.Duration) error {
timer := time.NewTimer(timeout)
done := make(chan error)
go func() { done <- c.Wait() }()
select {
case err := <-done:
timer.Stop()
return err
case <-timer.C:
if err := c.Process.Kill(); err != nil {
log.Printf("FATAL error killing process: %s", err)
return err
}
// wait for the command to return after killing it
<-done
return TimeoutErr
}
// If the pattern _is_ a glob, it matches everything
if pattern == "*" {
return true
}
parts := strings.Split(pattern, "*")
if len(parts) == 1 {
// No globs in pattern, so test for match
return pattern == measurement
}
leadingGlob := strings.HasPrefix(pattern, "*")
trailingGlob := strings.HasSuffix(pattern, "*")
end := len(parts) - 1
for i, part := range parts {
switch i {
case 0:
if leadingGlob {
continue
}
if !strings.HasPrefix(measurement, part) {
return false
}
case end:
if len(measurement) > 0 {
return trailingGlob || strings.HasSuffix(measurement, part)
}
default:
if !strings.Contains(measurement, part) {
return false
}
}
// Trim evaluated text from measurement as we loop over the pattern.
idx := strings.Index(measurement, part) + len(part)
measurement = measurement[idx:]
}
// All parts of the pattern matched
return true
}

View File

@@ -1,47 +1,12 @@
package internal
import "testing"
import (
"os/exec"
"testing"
"time"
func testGlobMatch(t *testing.T, pattern, subj string) {
if !Glob(pattern, subj) {
t.Errorf("%s should match %s", pattern, subj)
}
}
func testGlobNoMatch(t *testing.T, pattern, subj string) {
if Glob(pattern, subj) {
t.Errorf("%s should not match %s", pattern, subj)
}
}
func TestEmptyPattern(t *testing.T) {
testGlobMatch(t, "", "")
testGlobNoMatch(t, "", "test")
}
func TestPatternWithoutGlobs(t *testing.T) {
testGlobMatch(t, "test", "test")
}
func TestGlob(t *testing.T) {
for _, pattern := range []string{
"*test", // Leading glob
"this*", // Trailing glob
"*is*a*", // Lots of globs
"**test**", // Double glob characters
"**is**a***test*", // Varying number of globs
} {
testGlobMatch(t, pattern, "this_is_a_test")
}
for _, pattern := range []string{
"test*", // Implicit substring match should fail
"*is", // Partial match should fail
"*no*", // Globs without a match between them should fail
} {
testGlobNoMatch(t, pattern, "this_is_a_test")
}
}
"github.com/stretchr/testify/assert"
)
type SnakeTest struct {
input string
@@ -71,3 +36,73 @@ func TestSnakeCase(t *testing.T) {
}
}
}
var (
sleepbin, _ = exec.LookPath("sleep")
echobin, _ = exec.LookPath("echo")
)
func TestRunTimeout(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "10")
start := time.Now()
err := RunTimeout(cmd, time.Millisecond*20)
elapsed := time.Since(start)
assert.Equal(t, TimeoutErr, err)
// Verify that command gets killed in 20ms, with some breathing room
assert.True(t, elapsed < time.Millisecond*75)
}
func TestCombinedOutputTimeout(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "10")
start := time.Now()
_, err := CombinedOutputTimeout(cmd, time.Millisecond*20)
elapsed := time.Since(start)
assert.Equal(t, TimeoutErr, err)
// Verify that command gets killed in 20ms, with some breathing room
assert.True(t, elapsed < time.Millisecond*75)
}
func TestCombinedOutput(t *testing.T) {
if echobin == "" {
t.Skip("'echo' binary not available on OS, skipping.")
}
cmd := exec.Command(echobin, "foo")
out, err := CombinedOutputTimeout(cmd, time.Second)
assert.NoError(t, err)
assert.Equal(t, "foo\n", string(out))
}
// test that CombinedOutputTimeout and exec.Cmd.CombinedOutput return
// the same output from a failed command.
func TestCombinedOutputError(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "foo")
expected, err := cmd.CombinedOutput()
cmd2 := exec.Command(sleepbin, "foo")
actual, err := CombinedOutputTimeout(cmd2, time.Second)
assert.Error(t, err)
assert.Equal(t, expected, actual)
}
func TestRunError(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "foo")
err := RunTimeout(cmd, time.Second)
assert.Error(t, err)
}

View File

@@ -1,33 +1,104 @@
package internal_models
import (
"fmt"
"strings"
"github.com/gobwas/glob"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
)
// TagFilter is the name of a tag, and the values on which to filter
type TagFilter struct {
Name string
Filter []string
filter glob.Glob
}
// Filter containing drop/pass and tagdrop/tagpass rules
type Filter struct {
NameDrop []string
nameDrop glob.Glob
NamePass []string
namePass glob.Glob
FieldDrop []string
fieldDrop glob.Glob
FieldPass []string
fieldPass glob.Glob
TagDrop []TagFilter
TagPass []TagFilter
TagExclude []string
tagExclude glob.Glob
TagInclude []string
tagInclude glob.Glob
IsActive bool
}
func (f Filter) ShouldMetricPass(metric telegraf.Metric) bool {
// Compile all Filter lists into glob.Glob objects.
func (f *Filter) CompileFilter() error {
var err error
f.nameDrop, err = compileFilter(f.NameDrop)
if err != nil {
return fmt.Errorf("Error compiling 'namedrop', %s", err)
}
f.namePass, err = compileFilter(f.NamePass)
if err != nil {
return fmt.Errorf("Error compiling 'namepass', %s", err)
}
f.fieldDrop, err = compileFilter(f.FieldDrop)
if err != nil {
return fmt.Errorf("Error compiling 'fielddrop', %s", err)
}
f.fieldPass, err = compileFilter(f.FieldPass)
if err != nil {
return fmt.Errorf("Error compiling 'fieldpass', %s", err)
}
f.tagExclude, err = compileFilter(f.TagExclude)
if err != nil {
return fmt.Errorf("Error compiling 'tagexclude', %s", err)
}
f.tagInclude, err = compileFilter(f.TagInclude)
if err != nil {
return fmt.Errorf("Error compiling 'taginclude', %s", err)
}
for i, _ := range f.TagDrop {
f.TagDrop[i].filter, err = compileFilter(f.TagDrop[i].Filter)
if err != nil {
return fmt.Errorf("Error compiling 'tagdrop', %s", err)
}
}
for i, _ := range f.TagPass {
f.TagPass[i].filter, err = compileFilter(f.TagPass[i].Filter)
if err != nil {
return fmt.Errorf("Error compiling 'tagpass', %s", err)
}
}
return nil
}
func compileFilter(filter []string) (glob.Glob, error) {
if len(filter) == 0 {
return nil, nil
}
var g glob.Glob
var err error
if len(filter) == 1 {
g, err = glob.Compile(filter[0])
} else {
g, err = glob.Compile("{" + strings.Join(filter, ",") + "}")
}
return g, err
}
func (f *Filter) ShouldMetricPass(metric telegraf.Metric) bool {
if f.ShouldNamePass(metric.Name()) && f.ShouldTagsPass(metric.Tags()) {
return true
}
@@ -36,70 +107,51 @@ func (f Filter) ShouldMetricPass(metric telegraf.Metric) bool {
// ShouldFieldsPass returns true if the metric should pass, false if should drop
// based on the drop/pass filter parameters
func (f Filter) ShouldNamePass(key string) bool {
if f.NamePass != nil {
for _, pat := range f.NamePass {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
return true
}
func (f *Filter) ShouldNamePass(key string) bool {
if f.namePass != nil {
if f.namePass.Match(key) {
return true
}
return false
}
if f.NameDrop != nil {
for _, pat := range f.NameDrop {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
return false
}
if f.nameDrop != nil {
if f.nameDrop.Match(key) {
return false
}
return true
}
return true
}
// ShouldFieldsPass returns true if the metric should pass, false if should drop
// based on the drop/pass filter parameters
func (f Filter) ShouldFieldsPass(key string) bool {
if f.FieldPass != nil {
for _, pat := range f.FieldPass {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
return true
}
func (f *Filter) ShouldFieldsPass(key string) bool {
if f.fieldPass != nil {
if f.fieldPass.Match(key) {
return true
}
return false
}
if f.FieldDrop != nil {
for _, pat := range f.FieldDrop {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
return false
}
if f.fieldDrop != nil {
if f.fieldDrop.Match(key) {
return false
}
return true
}
return true
}
// ShouldTagsPass returns true if the metric should pass, false if should drop
// based on the tagdrop/tagpass filter parameters
func (f Filter) ShouldTagsPass(tags map[string]string) bool {
func (f *Filter) ShouldTagsPass(tags map[string]string) bool {
if f.TagPass != nil {
for _, pat := range f.TagPass {
if pat.filter == nil {
continue
}
if tagval, ok := tags[pat.Name]; ok {
for _, filter := range pat.Filter {
if internal.Glob(filter, tagval) {
return true
}
if pat.filter.Match(tagval) {
return true
}
}
}
@@ -108,11 +160,12 @@ func (f Filter) ShouldTagsPass(tags map[string]string) bool {
if f.TagDrop != nil {
for _, pat := range f.TagDrop {
if pat.filter == nil {
continue
}
if tagval, ok := tags[pat.Name]; ok {
for _, filter := range pat.Filter {
if internal.Glob(filter, tagval) {
return false
}
if pat.filter.Match(tagval) {
return false
}
}
}
@@ -121,3 +174,23 @@ func (f Filter) ShouldTagsPass(tags map[string]string) bool {
return true
}
// Apply TagInclude and TagExclude filters.
// modifies the tags map in-place.
func (f *Filter) FilterTags(tags map[string]string) {
if f.tagInclude != nil {
for k, _ := range tags {
if !f.tagInclude.Match(k) {
delete(tags, k)
}
}
}
if f.tagExclude != nil {
for k, _ := range tags {
if f.tagExclude.Match(k) {
delete(tags, k)
}
}
}
}

View File

@@ -2,6 +2,11 @@ package internal_models
import (
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFilter_Empty(t *testing.T) {
@@ -28,6 +33,7 @@ func TestFilter_NamePass(t *testing.T) {
f := Filter{
NamePass: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.CompileFilter())
passes := []string{
"foo",
@@ -61,6 +67,7 @@ func TestFilter_NameDrop(t *testing.T) {
f := Filter{
NameDrop: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.CompileFilter())
drops := []string{
"foo",
@@ -94,6 +101,7 @@ func TestFilter_FieldPass(t *testing.T) {
f := Filter{
FieldPass: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.CompileFilter())
passes := []string{
"foo",
@@ -127,6 +135,7 @@ func TestFilter_FieldDrop(t *testing.T) {
f := Filter{
FieldDrop: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.CompileFilter())
drops := []string{
"foo",
@@ -169,6 +178,7 @@ func TestFilter_TagPass(t *testing.T) {
f := Filter{
TagPass: filters,
}
require.NoError(t, f.CompileFilter())
passes := []map[string]string{
{"cpu": "cpu-total"},
@@ -212,6 +222,7 @@ func TestFilter_TagDrop(t *testing.T) {
f := Filter{
TagDrop: filters,
}
require.NoError(t, f.CompileFilter())
drops := []map[string]string{
{"cpu": "cpu-total"},
@@ -241,3 +252,115 @@ func TestFilter_TagDrop(t *testing.T) {
}
}
}
func TestFilter_CompileFilterError(t *testing.T) {
f := Filter{
NameDrop: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
f = Filter{
NamePass: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
f = Filter{
FieldDrop: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
f = Filter{
FieldPass: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
f = Filter{
TagExclude: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
f = Filter{
TagInclude: []string{"", ""},
}
assert.Error(t, f.CompileFilter())
filters := []TagFilter{
TagFilter{
Name: "cpu",
Filter: []string{"{foobar}"},
}}
f = Filter{
TagDrop: filters,
}
require.Error(t, f.CompileFilter())
filters = []TagFilter{
TagFilter{
Name: "cpu",
Filter: []string{"{foobar}"},
}}
f = Filter{
TagPass: filters,
}
require.Error(t, f.CompileFilter())
}
func TestFilter_ShouldMetricsPass(t *testing.T) {
m := testutil.TestMetric(1, "testmetric")
f := Filter{
NameDrop: []string{"foobar"},
}
require.NoError(t, f.CompileFilter())
require.True(t, f.ShouldMetricPass(m))
m = testutil.TestMetric(1, "foobar")
require.False(t, f.ShouldMetricPass(m))
}
func TestFilter_FilterTagsNoMatches(t *testing.T) {
pretags := map[string]string{
"host": "localhost",
"mytag": "foobar",
}
f := Filter{
TagExclude: []string{"nomatch"},
}
require.NoError(t, f.CompileFilter())
f.FilterTags(pretags)
assert.Equal(t, map[string]string{
"host": "localhost",
"mytag": "foobar",
}, pretags)
f = Filter{
TagInclude: []string{"nomatch"},
}
require.NoError(t, f.CompileFilter())
f.FilterTags(pretags)
assert.Equal(t, map[string]string{}, pretags)
}
func TestFilter_FilterTagsMatches(t *testing.T) {
pretags := map[string]string{
"host": "localhost",
"mytag": "foobar",
}
f := Filter{
TagExclude: []string{"ho*"},
}
require.NoError(t, f.CompileFilter())
f.FilterTags(pretags)
assert.Equal(t, map[string]string{
"mytag": "foobar",
}, pretags)
pretags = map[string]string{
"host": "localhost",
"mytag": "foobar",
}
f = Filter{
TagInclude: []string{"my*"},
}
require.NoError(t, f.CompileFilter())
f.FilterTags(pretags)
assert.Equal(t, map[string]string{
"mytag": "foobar",
}, pretags)
}

View File

@@ -2,48 +2,54 @@ package internal_models
import (
"log"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/buffer"
)
const (
// Default number of metrics kept between flushes.
DEFAULT_METRIC_BUFFER_LIMIT = 1000
// Default size of metrics batch size.
DEFAULT_METRIC_BATCH_SIZE = 1000
// Limit how many full metric buffers are kept due to failed writes.
FULL_METRIC_BUFFERS_LIMIT = 100
// Default number of metrics kept. It should be a multiple of batch size.
DEFAULT_METRIC_BUFFER_LIMIT = 10000
)
// RunningOutput contains the output configuration
type RunningOutput struct {
Name string
Output telegraf.Output
Config *OutputConfig
Quiet bool
MetricBufferLimit int
FlushBufferWhenFull bool
Name string
Output telegraf.Output
Config *OutputConfig
Quiet bool
MetricBufferLimit int
MetricBatchSize int
metrics []telegraf.Metric
tmpmetrics map[int][]telegraf.Metric
overwriteI int
mapI int
sync.Mutex
metrics *buffer.Buffer
failMetrics *buffer.Buffer
}
func NewRunningOutput(
name string,
output telegraf.Output,
conf *OutputConfig,
batchSize int,
bufferLimit int,
) *RunningOutput {
if bufferLimit == 0 {
bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
}
if batchSize == 0 {
batchSize = DEFAULT_METRIC_BATCH_SIZE
}
ro := &RunningOutput{
Name: name,
metrics: make([]telegraf.Metric, 0),
tmpmetrics: make(map[int][]telegraf.Metric),
metrics: buffer.NewBuffer(batchSize),
failMetrics: buffer.NewBuffer(bufferLimit),
Output: output,
Config: conf,
MetricBufferLimit: DEFAULT_METRIC_BUFFER_LIMIT,
MetricBufferLimit: bufferLimit,
MetricBatchSize: batchSize,
}
return ro
}
@@ -56,67 +62,78 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
return
}
}
ro.Lock()
defer ro.Unlock()
if len(ro.metrics) < ro.MetricBufferLimit {
ro.metrics = append(ro.metrics, metric)
} else {
if ro.FlushBufferWhenFull {
ro.metrics = append(ro.metrics, metric)
tmpmetrics := make([]telegraf.Metric, len(ro.metrics))
copy(tmpmetrics, ro.metrics)
ro.metrics = make([]telegraf.Metric, 0)
err := ro.write(tmpmetrics)
if err != nil {
log.Printf("ERROR writing full metric buffer to output %s, %s",
ro.Name, err)
if len(ro.tmpmetrics) == FULL_METRIC_BUFFERS_LIMIT {
ro.mapI = 0
// overwrite one
ro.tmpmetrics[ro.mapI] = tmpmetrics
ro.mapI++
} else {
ro.tmpmetrics[ro.mapI] = tmpmetrics
ro.mapI++
}
}
} else {
if ro.overwriteI == 0 {
log.Printf("WARNING: overwriting cached metrics, you may want to " +
"increase the metric_buffer_limit setting in your [agent] " +
"config if you do not wish to overwrite metrics.\n")
}
if ro.overwriteI == len(ro.metrics) {
ro.overwriteI = 0
}
ro.metrics[ro.overwriteI] = metric
ro.overwriteI++
// Filter any tagexclude/taginclude parameters before adding metric
if len(ro.Config.Filter.TagExclude) != 0 || len(ro.Config.Filter.TagInclude) != 0 {
// In order to filter out tags, we need to create a new metric, since
// metrics are immutable once created.
tags := metric.Tags()
fields := metric.Fields()
t := metric.Time()
name := metric.Name()
ro.Config.Filter.FilterTags(tags)
// error is not possible if creating from another metric, so ignore.
metric, _ = telegraf.NewMetric(name, tags, fields, t)
}
ro.metrics.Add(metric)
if ro.metrics.Len() == ro.MetricBatchSize {
batch := ro.metrics.Batch(ro.MetricBatchSize)
err := ro.write(batch)
if err != nil {
ro.failMetrics.Add(batch...)
}
}
}
// Write writes all cached points to this output.
func (ro *RunningOutput) Write() error {
ro.Lock()
defer ro.Unlock()
err := ro.write(ro.metrics)
if err != nil {
return err
} else {
ro.metrics = make([]telegraf.Metric, 0)
ro.overwriteI = 0
if !ro.Quiet {
log.Printf("Output [%s] buffer fullness: %d / %d metrics. "+
"Total gathered metrics: %d. Total dropped metrics: %d.",
ro.Name,
ro.failMetrics.Len()+ro.metrics.Len(),
ro.MetricBufferLimit,
ro.metrics.Total(),
ro.metrics.Drops()+ro.failMetrics.Drops())
}
// Write any cached metric buffers that failed previously
for i, tmpmetrics := range ro.tmpmetrics {
if err := ro.write(tmpmetrics); err != nil {
return err
} else {
delete(ro.tmpmetrics, i)
var err error
if !ro.failMetrics.IsEmpty() {
bufLen := ro.failMetrics.Len()
// how many batches of failed writes we need to write.
nBatches := bufLen/ro.MetricBatchSize + 1
batchSize := ro.MetricBatchSize
for i := 0; i < nBatches; i++ {
// If it's the last batch, only grab the metrics that have not had
// a write attempt already (this is primarily to preserve order).
if i == nBatches-1 {
batchSize = bufLen % ro.MetricBatchSize
}
batch := ro.failMetrics.Batch(batchSize)
// If we've already failed previous writes, don't bother trying to
// write to this output again. We are not exiting the loop just so
// that we can rotate the metrics to preserve order.
if err == nil {
err = ro.write(batch)
}
if err != nil {
ro.failMetrics.Add(batch...)
}
}
}
batch := ro.metrics.Batch(ro.MetricBatchSize)
// see comment above about not trying to write to an already failed output.
// if ro.failMetrics is empty then err will always be nil at this point.
if err == nil {
err = ro.write(batch)
}
if err != nil {
ro.failMetrics.Add(batch...)
return err
}
return nil
}
@@ -129,8 +146,8 @@ func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
elapsed := time.Since(start)
if err == nil {
if !ro.Quiet {
log.Printf("Wrote %d metrics to output %s in %s\n",
len(metrics), ro.Name, elapsed)
log.Printf("Output [%s] wrote batch of %d metrics in %s\n",
ro.Name, len(metrics), elapsed)
}
}
return err

View File

@@ -2,7 +2,6 @@ package internal_models
import (
"fmt"
"sort"
"sync"
"testing"
@@ -29,16 +28,100 @@ var next5 = []telegraf.Metric{
testutil.TestMetric(101, "metric10"),
}
// Test that we can write metrics with simple default setup.
func TestRunningOutputDefault(t *testing.T) {
// Benchmark adding metrics.
func BenchmarkRunningOutputAddWrite(b *testing.B) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &perfOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.Quiet = true
for n := 0; n < b.N; n++ {
ro.AddMetric(first5[0])
ro.Write()
}
}
// Benchmark adding metrics.
func BenchmarkRunningOutputAddWriteEvery100(b *testing.B) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &perfOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.Quiet = true
for n := 0; n < b.N; n++ {
ro.AddMetric(first5[0])
if n%100 == 0 {
ro.Write()
}
}
}
// Benchmark adding metrics.
func BenchmarkRunningOutputAddFailWrites(b *testing.B) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &perfOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.Quiet = true
for n := 0; n < b.N; n++ {
ro.AddMetric(first5[0])
}
}
// Test that NameDrop filters ger properly applied.
func TestRunningOutput_DropFilter(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: true,
NameDrop: []string{"metric1", "metric2"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
}
for _, metric := range next5 {
ro.AddMetric(metric)
}
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 8)
}
// Test that NameDrop filters without a match do nothing.
func TestRunningOutput_PassFilter(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: true,
NameDrop: []string{"metric1000", "foo*"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
@@ -53,41 +136,96 @@ func TestRunningOutputDefault(t *testing.T) {
assert.Len(t, m.Metrics(), 10)
}
// Test that the first metric gets overwritten if there is a buffer overflow.
func TestRunningOutputOverwrite(t *testing.T) {
// Test that tags are properly included
func TestRunningOutput_TagIncludeNoMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
IsActive: true,
TagInclude: []string{"nothing*"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.MetricBufferLimit = 4
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
}
require.Len(t, m.Metrics(), 0)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
require.NoError(t, err)
require.Len(t, m.Metrics(), 4)
var expected, actual []string
for i, exp := range first5[1:] {
expected = append(expected, exp.String())
actual = append(actual, m.Metrics()[i].String())
}
sort.Strings(expected)
sort.Strings(actual)
assert.Equal(t, expected, actual)
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Empty(t, m.Metrics()[0].Tags())
}
// Test that multiple buffer overflows are handled properly.
func TestRunningOutputMultiOverwrite(t *testing.T) {
// Test that tags are properly excluded
func TestRunningOutput_TagExcludeMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: true,
TagExclude: []string{"tag*"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Len(t, m.Metrics()[0].Tags(), 0)
}
// Test that tags are properly Excluded
func TestRunningOutput_TagExcludeNoMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: true,
TagExclude: []string{"nothing*"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Len(t, m.Metrics()[0].Tags(), 1)
}
// Test that tags are properly included
func TestRunningOutput_TagIncludeMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: true,
TagInclude: []string{"tag*"},
},
}
assert.NoError(t, conf.Filter.CompileFilter())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Len(t, m.Metrics()[0].Tags(), 1)
}
// Test that we can write metrics with simple default setup.
func TestRunningOutputDefault(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
@@ -95,8 +233,7 @@ func TestRunningOutputMultiOverwrite(t *testing.T) {
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.MetricBufferLimit = 3
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
@@ -104,22 +241,11 @@ func TestRunningOutputMultiOverwrite(t *testing.T) {
for _, metric := range next5 {
ro.AddMetric(metric)
}
require.Len(t, m.Metrics(), 0)
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
require.NoError(t, err)
require.Len(t, m.Metrics(), 3)
var expected, actual []string
for i, exp := range next5[2:] {
expected = append(expected, exp.String())
actual = append(actual, m.Metrics()[i].String())
}
sort.Strings(expected)
sort.Strings(actual)
assert.Equal(t, expected, actual)
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 10)
}
// Test that running output doesn't flush until it's full when
@@ -132,11 +258,9 @@ func TestRunningOutputFlushWhenFull(t *testing.T) {
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.FlushBufferWhenFull = true
ro.MetricBufferLimit = 5
ro := NewRunningOutput("test", m, conf, 6, 10)
// Fill buffer to limit
// Fill buffer to 1 under limit
for _, metric := range first5 {
ro.AddMetric(metric)
}
@@ -165,9 +289,7 @@ func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.FlushBufferWhenFull = true
ro.MetricBufferLimit = 4
ro := NewRunningOutput("test", m, conf, 4, 12)
// Fill buffer past limit twive
for _, metric := range first5 {
@@ -177,7 +299,7 @@ func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
ro.AddMetric(metric)
}
// flushed twice
assert.Len(t, m.Metrics(), 10)
assert.Len(t, m.Metrics(), 8)
}
func TestRunningOutputWriteFail(t *testing.T) {
@@ -189,11 +311,9 @@ func TestRunningOutputWriteFail(t *testing.T) {
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf)
ro.FlushBufferWhenFull = true
ro.MetricBufferLimit = 4
ro := NewRunningOutput("test", m, conf, 4, 12)
// Fill buffer past limit twice
// Fill buffer to limit twice
for _, metric := range first5 {
ro.AddMetric(metric)
}
@@ -216,6 +336,161 @@ func TestRunningOutputWriteFail(t *testing.T) {
assert.Len(t, m.Metrics(), 10)
}
// Verify that the order of points is preserved during a write failure.
func TestRunningOutputWriteFailOrder(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 100, 1000)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// Write fails
err := ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
m.failWrite = false
// add 5 more metrics
for _, metric := range next5 {
ro.AddMetric(metric)
}
err = ro.Write()
require.NoError(t, err)
// Verify that 10 metrics were written
assert.Len(t, m.Metrics(), 10)
// Verify that they are in order
expected := append(first5, next5...)
assert.Equal(t, expected, m.Metrics())
}
// Verify that the order of points is preserved during many write failures.
func TestRunningOutputWriteFailOrder2(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 5, 100)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// Write fails
err := ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add 5 metrics
for _, metric := range next5 {
ro.AddMetric(metric)
}
// Write fails
err = ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// Write fails
err = ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add 5 metrics
for _, metric := range next5 {
ro.AddMetric(metric)
}
// Write fails
err = ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
m.failWrite = false
err = ro.Write()
require.NoError(t, err)
// Verify that 10 metrics were written
assert.Len(t, m.Metrics(), 20)
// Verify that they are in order
expected := append(first5, next5...)
expected = append(expected, first5...)
expected = append(expected, next5...)
assert.Equal(t, expected, m.Metrics())
}
// Verify that the order of points is preserved when there is a remainder
// of points for the batch.
//
// ie, with a batch size of 5:
//
// 1 2 3 4 5 6 <-- order, failed points
// 6 1 2 3 4 5 <-- order, after 1st write failure (1 2 3 4 5 was batch)
// 1 2 3 4 5 6 <-- order, after 2nd write failure, (6 was batch)
//
func TestRunningOutputWriteFailOrder3(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
}
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 5, 1000)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// Write fails
err := ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add and attempt to write a single metric:
ro.AddMetric(next5[0])
err = ro.Write()
require.Error(t, err)
// unset fail and write metrics
m.failWrite = false
err = ro.Write()
require.NoError(t, err)
// Verify that 6 metrics were written
assert.Len(t, m.Metrics(), 6)
// Verify that they are in order
expected := append(first5, next5[0])
assert.Equal(t, expected, m.Metrics())
}
type mockOutput struct {
sync.Mutex
@@ -263,3 +538,31 @@ func (m *mockOutput) Metrics() []telegraf.Metric {
defer m.Unlock()
return m.metrics
}
type perfOutput struct {
// if true, mock a write failure
failWrite bool
}
func (m *perfOutput) Connect() error {
return nil
}
func (m *perfOutput) Close() error {
return nil
}
func (m *perfOutput) Description() string {
return ""
}
func (m *perfOutput) SampleConfig() string {
return ""
}
func (m *perfOutput) Write(metrics []telegraf.Metric) error {
if m.failWrite {
return fmt.Errorf("Failed Write!")
}
return nil
}

View File

@@ -4,6 +4,8 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/aerospike"
_ "github.com/influxdata/telegraf/plugins/inputs/apache"
_ "github.com/influxdata/telegraf/plugins/inputs/bcache"
_ "github.com/influxdata/telegraf/plugins/inputs/cassandra"
_ "github.com/influxdata/telegraf/plugins/inputs/cloudwatch"
_ "github.com/influxdata/telegraf/plugins/inputs/couchbase"
_ "github.com/influxdata/telegraf/plugins/inputs/couchdb"
_ "github.com/influxdata/telegraf/plugins/inputs/disque"
@@ -12,9 +14,12 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/dovecot"
_ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch"
_ "github.com/influxdata/telegraf/plugins/inputs/exec"
_ "github.com/influxdata/telegraf/plugins/inputs/filestat"
_ "github.com/influxdata/telegraf/plugins/inputs/github_webhooks"
_ "github.com/influxdata/telegraf/plugins/inputs/haproxy"
_ "github.com/influxdata/telegraf/plugins/inputs/http_response"
_ "github.com/influxdata/telegraf/plugins/inputs/httpjson"
_ "github.com/influxdata/telegraf/plugins/inputs/igloo"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb"
_ "github.com/influxdata/telegraf/plugins/inputs/ipmi_sensor"
_ "github.com/influxdata/telegraf/plugins/inputs/jolokia"
@@ -50,7 +55,9 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/snmp"
_ "github.com/influxdata/telegraf/plugins/inputs/sqlserver"
_ "github.com/influxdata/telegraf/plugins/inputs/statsd"
_ "github.com/influxdata/telegraf/plugins/inputs/sysstat"
_ "github.com/influxdata/telegraf/plugins/inputs/system"
_ "github.com/influxdata/telegraf/plugins/inputs/tail"
_ "github.com/influxdata/telegraf/plugins/inputs/tcp_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/trig"
_ "github.com/influxdata/telegraf/plugins/inputs/twemproxy"

View File

@@ -0,0 +1,125 @@
# Telegraf plugin: Cassandra
#### Plugin arguments:
- **context** string: Context root used for jolokia url
- **servers** []string: List of servers with the format "<user:passwd@><host>:port"
- **metrics** []string: List of Jmx paths that identify mbeans attributes
#### Description
The Cassandra plugin collects Cassandra/JVM metrics exposed as MBean's attributes through jolokia REST endpoint. All metrics are collected for each server configured.
See: https://jolokia.org/ and [Cassandra Documentation](http://docs.datastax.com/en/cassandra/3.x/cassandra/operations/monitoringCassandraTOC.html)
# Measurements:
Cassandra plugin produces one or more measurements for each metric configured, adding Server's name as `host` tag. More than one measurement is generated when querying table metrics with a wildcard for the keyspace or table name.
Given a configuration like:
```toml
[[inputs.cassandra]]
context = "/jolokia/read"
servers = [":8778"]
metrics = ["/java.lang:type=Memory/HeapMemoryUsage"]
```
The collected metrics will be:
```
javaMemory,host=myHost,mname=HeapMemoryUsage HeapMemoryUsage_committed=1040187392,HeapMemoryUsage_init=1050673152,HeapMemoryUsage_max=1040187392,HeapMemoryUsage_used=368155000 1459551767230567084
```
# Useful Metrics:
Here is a list of metrics that might be useful to monitor your cassandra cluster. This was put together from multiple sources on the web.
- [How to monitor Cassandra performance metrics](https://www.datadoghq.com/blog/how-to-monitor-cassandra-performance-metrics)
- [Cassandra Documentation](http://docs.datastax.com/en/cassandra/3.x/cassandra/operations/monitoringCassandraTOC.html)
####measurement = javaGarbageCollector
- /java.lang:type=GarbageCollector,name=ConcurrentMarkSweep/CollectionTime
- /java.lang:type=GarbageCollector,name=ConcurrentMarkSweep/CollectionCount
- /java.lang:type=GarbageCollector,name=ParNew/CollectionTime
- /java.lang:type=GarbageCollector,name=ParNew/CollectionCount
####measurement = javaMemory
- /java.lang:type=Memory/HeapMemoryUsage
- /java.lang:type=Memory/NonHeapMemoryUsage
####measurement = cassandraCache
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Hit
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Requests
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Entries
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Size
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Capacity
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Hit
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Requests
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Entries
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Size
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Capacity
####measurement = cassandraClient
- /org.apache.cassandra.metrics:type=Client,name=connectedNativeClients
####measurement = cassandraClientRequest
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=TotalLatency
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=TotalLatency
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=Latency
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=Latency
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=Timeouts
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=Timeouts
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=Unavailables
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=Unavailables
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=Failures
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=Failures
####measurement = cassandraCommitLog
- /org.apache.cassandra.metrics:type=CommitLog,name=PendingTasks
- /org.apache.cassandra.metrics:type=CommitLog,name=TotalCommitLogSize
####measurement = cassandraCompaction
- /org.apache.cassandra.metrics:type=Compaction,name=CompletedTask
- /org.apache.cassandra.metrics:type=Compaction,name=PendingTasks
- /org.apache.cassandra.metrics:type=Compaction,name=TotalCompactionsCompleted
- /org.apache.cassandra.metrics:type=Compaction,name=BytesCompacted
####measurement = cassandraStorage
- /org.apache.cassandra.metrics:type=Storage,name=Load
- /org.apache.cassandra.metrics:type=Storage,name=Exceptions
####measurement = cassandraTable
Using wildcards for "keyspace" and "scope" can create a lot of series as metrics will be reported for every table and keyspace including internal system tables. Specify a keyspace name and/or a table name to limit them.
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=LiveDiskSpaceUsed
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=TotalDiskSpaceUsed
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=ReadLatency
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=CoordinatorReadLatency
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=WriteLatency
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=ReadTotalLatency
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=WriteTotalLatency
####measurement = cassandraThreadPools
- /org.apache.cassandra.metrics:type=ThreadPools,path=internal,scope=CompactionExecutor,name=ActiveTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=internal,scope=AntiEntropyStage,name=ActiveTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=CounterMutationStage,name=PendingTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=CounterMutationStage,name=CurrentlyBlockedTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=MutationStage,name=PendingTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=MutationStage,name=CurrentlyBlockedTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=ReadRepairStage,name=PendingTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=ReadRepairStage,name=CurrentlyBlockedTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=ReadStage,name=PendingTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=ReadStage,name=CurrentlyBlockedTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=RequestResponseStage,name=PendingTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=RequestResponseStage,name=CurrentlyBlockedTasks

View File

@@ -0,0 +1,309 @@
package cassandra
import (
"encoding/json"
"errors"
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"io/ioutil"
"log"
"net/http"
"net/url"
"strings"
)
type JolokiaClient interface {
MakeRequest(req *http.Request) (*http.Response, error)
}
type JolokiaClientImpl struct {
client *http.Client
}
func (c JolokiaClientImpl) MakeRequest(req *http.Request) (*http.Response, error) {
return c.client.Do(req)
}
type Cassandra struct {
jClient JolokiaClient
Context string
Servers []string
Metrics []string
}
type javaMetric struct {
host string
metric string
acc telegraf.Accumulator
}
type cassandraMetric struct {
host string
metric string
acc telegraf.Accumulator
}
type jmxMetric interface {
addTagsFields(out map[string]interface{})
}
func newJavaMetric(host string, metric string,
acc telegraf.Accumulator) *javaMetric {
return &javaMetric{host: host, metric: metric, acc: acc}
}
func newCassandraMetric(host string, metric string,
acc telegraf.Accumulator) *cassandraMetric {
return &cassandraMetric{host: host, metric: metric, acc: acc}
}
func addValuesAsFields(values map[string]interface{}, fields map[string]interface{},
mname string) {
for k, v := range values {
if v != nil {
fields[mname+"_"+k] = v
}
}
}
func parseJmxMetricRequest(mbean string) map[string]string {
tokens := make(map[string]string)
classAndPairs := strings.Split(mbean, ":")
if classAndPairs[0] == "org.apache.cassandra.metrics" {
tokens["class"] = "cassandra"
} else if classAndPairs[0] == "java.lang" {
tokens["class"] = "java"
} else {
return tokens
}
pairs := strings.Split(classAndPairs[1], ",")
for _, pair := range pairs {
p := strings.Split(pair, "=")
tokens[p[0]] = p[1]
}
return tokens
}
func addTokensToTags(tokens map[string]string, tags map[string]string) {
for k, v := range tokens {
if k == "name" {
tags["mname"] = v // name seems to a reserved word in influxdb
} else if k == "class" || k == "type" {
continue // class and type are used in the metric name
} else {
tags[k] = v
}
}
}
func (j javaMetric) addTagsFields(out map[string]interface{}) {
tags := make(map[string]string)
fields := make(map[string]interface{})
a := out["request"].(map[string]interface{})
attribute := a["attribute"].(string)
mbean := a["mbean"].(string)
tokens := parseJmxMetricRequest(mbean)
addTokensToTags(tokens, tags)
tags["cassandra_host"] = j.host
if _, ok := tags["mname"]; !ok {
//Queries for a single value will not return a "name" tag in the response.
tags["mname"] = attribute
}
if values, ok := out["value"]; ok {
switch t := values.(type) {
case map[string]interface{}:
addValuesAsFields(values.(map[string]interface{}), fields, attribute)
case interface{}:
fields[attribute] = t
}
j.acc.AddFields(tokens["class"]+tokens["type"], fields, tags)
} else {
fmt.Printf("Missing key 'value' in '%s' output response\n%v\n",
j.metric, out)
}
}
func addCassandraMetric(mbean string, c cassandraMetric,
values map[string]interface{}) {
tags := make(map[string]string)
fields := make(map[string]interface{})
tokens := parseJmxMetricRequest(mbean)
addTokensToTags(tokens, tags)
tags["cassandra_host"] = c.host
addValuesAsFields(values, fields, tags["mname"])
c.acc.AddFields(tokens["class"]+tokens["type"], fields, tags)
}
func (c cassandraMetric) addTagsFields(out map[string]interface{}) {
r := out["request"]
tokens := parseJmxMetricRequest(r.(map[string]interface{})["mbean"].(string))
// Requests with wildcards for keyspace or table names will return nested
// maps in the json response
if tokens["type"] == "Table" && (tokens["keyspace"] == "*" ||
tokens["scope"] == "*") {
if valuesMap, ok := out["value"]; ok {
for k, v := range valuesMap.(map[string]interface{}) {
addCassandraMetric(k, c, v.(map[string]interface{}))
}
} else {
fmt.Printf("Missing key 'value' in '%s' output response\n%v\n",
c.metric, out)
return
}
} else {
if values, ok := out["value"]; ok {
addCassandraMetric(r.(map[string]interface{})["mbean"].(string),
c, values.(map[string]interface{}))
} else {
fmt.Printf("Missing key 'value' in '%s' output response\n%v\n",
c.metric, out)
return
}
}
}
func (j *Cassandra) SampleConfig() string {
return `
# This is the context root used to compose the jolokia url
context = "/jolokia/read"
## List of cassandra servers exposing jolokia read service
servers = ["myuser:mypassword@10.10.10.1:8778","10.10.10.2:8778",":8778"]
## List of metrics collected on above servers
## Each metric consists of a jmx path.
## This will collect all heap memory usage metrics from the jvm and
## ReadLatency metrics for all keyspaces and tables.
## "type=Table" in the query works with Cassandra3.0. Older versions might
## need to use "type=ColumnFamily"
metrics = [
"/java.lang:type=Memory/HeapMemoryUsage",
"/org.apache.cassandra.metrics:type=Table,keyspace=*,scope=*,name=ReadLatency"
]
`
}
func (j *Cassandra) Description() string {
return "Read Cassandra metrics through Jolokia"
}
func (j *Cassandra) getAttr(requestUrl *url.URL) (map[string]interface{}, error) {
// Create + send request
req, err := http.NewRequest("GET", requestUrl.String(), nil)
if err != nil {
return nil, err
}
resp, err := j.jClient.MakeRequest(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Process response
if resp.StatusCode != http.StatusOK {
err = fmt.Errorf("Response from url \"%s\" has status code %d (%s), expected %d (%s)",
requestUrl,
resp.StatusCode,
http.StatusText(resp.StatusCode),
http.StatusOK,
http.StatusText(http.StatusOK))
return nil, err
}
// read body
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
// Unmarshal json
var jsonOut map[string]interface{}
if err = json.Unmarshal([]byte(body), &jsonOut); err != nil {
return nil, errors.New("Error decoding JSON response")
}
return jsonOut, nil
}
func parseServerTokens(server string) map[string]string {
serverTokens := make(map[string]string)
hostAndUser := strings.Split(server, "@")
hostPort := ""
userPasswd := ""
if len(hostAndUser) == 2 {
hostPort = hostAndUser[1]
userPasswd = hostAndUser[0]
} else {
hostPort = hostAndUser[0]
}
hostTokens := strings.Split(hostPort, ":")
serverTokens["host"] = hostTokens[0]
serverTokens["port"] = hostTokens[1]
if userPasswd != "" {
userTokens := strings.Split(userPasswd, ":")
serverTokens["user"] = userTokens[0]
serverTokens["passwd"] = userTokens[1]
}
return serverTokens
}
func (c *Cassandra) Gather(acc telegraf.Accumulator) error {
context := c.Context
servers := c.Servers
metrics := c.Metrics
for _, server := range servers {
for _, metric := range metrics {
serverTokens := parseServerTokens(server)
var m jmxMetric
if strings.HasPrefix(metric, "/java.lang:") {
m = newJavaMetric(serverTokens["host"], metric, acc)
} else if strings.HasPrefix(metric,
"/org.apache.cassandra.metrics:") {
m = newCassandraMetric(serverTokens["host"], metric, acc)
} else {
// unsupported metric type
log.Printf("Unsupported Cassandra metric [%s], skipping",
metric)
continue
}
// Prepare URL
requestUrl, err := url.Parse("http://" + serverTokens["host"] + ":" +
serverTokens["port"] + context + metric)
if err != nil {
return err
}
if serverTokens["user"] != "" && serverTokens["passwd"] != "" {
requestUrl.User = url.UserPassword(serverTokens["user"],
serverTokens["passwd"])
}
fmt.Printf("host %s url %s\n", serverTokens["host"], requestUrl)
out, err := c.getAttr(requestUrl)
if out["status"] != 200.0 {
fmt.Printf("URL returned with status %v\n", out["status"])
continue
}
m.addTagsFields(out)
}
}
return nil
}
func init() {
inputs.Add("cassandra", func() telegraf.Input {
return &Cassandra{jClient: &JolokiaClientImpl{client: &http.Client{}}}
})
}

View File

@@ -0,0 +1,286 @@
package cassandra
import (
_ "fmt"
"io/ioutil"
"net/http"
"strings"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
_ "github.com/stretchr/testify/require"
)
const validJavaMultiValueJSON = `
{
"request":{
"mbean":"java.lang:type=Memory",
"attribute":"HeapMemoryUsage",
"type":"read"
},
"value":{
"init":67108864,
"committed":456130560,
"max":477626368,
"used":203288528
},
"timestamp":1446129191,
"status":200
}`
const validCassandraMultiValueJSON = `
{
"request": {
"mbean": "org.apache.cassandra.metrics:keyspace=test_keyspace1,name=ReadLatency,scope=test_table,type=Table",
"type": "read"},
"status": 200,
"timestamp": 1458089229,
"value": {
"999thPercentile": 20.0,
"99thPercentile": 10.0,
"Count": 400,
"DurationUnit": "microseconds",
"Max": 30.0,
"Mean": null,
"MeanRate": 3.0,
"Min": 1.0,
"RateUnit": "events/second",
"StdDev": null
}
}`
const validCassandraNestedMultiValueJSON = `
{
"request": {
"mbean": "org.apache.cassandra.metrics:keyspace=test_keyspace1,name=ReadLatency,scope=*,type=Table",
"type": "read"},
"status": 200,
"timestamp": 1458089184,
"value": {
"org.apache.cassandra.metrics:keyspace=test_keyspace1,name=ReadLatency,scope=test_table1,type=Table":
{ "999thPercentile": 1.0,
"Count": 100,
"DurationUnit": "microseconds",
"OneMinuteRate": 1.0,
"RateUnit": "events/second",
"StdDev": null
},
"org.apache.cassandra.metrics:keyspace=test_keyspace2,name=ReadLatency,scope=test_table2,type=Table":
{ "999thPercentile": 2.0,
"Count": 200,
"DurationUnit": "microseconds",
"OneMinuteRate": 2.0,
"RateUnit": "events/second",
"StdDev": null
}
}
}`
const validSingleValueJSON = `
{
"request":{
"path":"used",
"mbean":"java.lang:type=Memory",
"attribute":"HeapMemoryUsage",
"type":"read"
},
"value":209274376,
"timestamp":1446129256,
"status":200
}`
const validJavaMultiTypeJSON = `
{
"request":{
"mbean":"java.lang:name=ConcurrentMarkSweep,type=GarbageCollector",
"attribute":"CollectionCount",
"type":"read"
},
"value":1,
"timestamp":1459316570,
"status":200
}`
const invalidJSON = "I don't think this is JSON"
const empty = ""
var Servers = []string{"10.10.10.10:8778"}
var AuthServers = []string{"user:passwd@10.10.10.10:8778"}
var MultipleServers = []string{"10.10.10.10:8778", "10.10.10.11:8778"}
var HeapMetric = "/java.lang:type=Memory/HeapMemoryUsage"
var ReadLatencyMetric = "/org.apache.cassandra.metrics:type=Table,keyspace=test_keyspace1,scope=test_table,name=ReadLatency"
var NestedReadLatencyMetric = "/org.apache.cassandra.metrics:type=Table,keyspace=test_keyspace1,scope=*,name=ReadLatency"
var GarbageCollectorMetric1 = "/java.lang:type=GarbageCollector,name=ConcurrentMarkSweep/CollectionCount"
var GarbageCollectorMetric2 = "/java.lang:type=GarbageCollector,name=ConcurrentMarkSweep/CollectionTime"
var Context = "/jolokia/read"
type jolokiaClientStub struct {
responseBody string
statusCode int
}
func (c jolokiaClientStub) MakeRequest(req *http.Request) (*http.Response, error) {
resp := http.Response{}
resp.StatusCode = c.statusCode
resp.Body = ioutil.NopCloser(strings.NewReader(c.responseBody))
return &resp, nil
}
// Generates a pointer to an HttpJson object that uses a mock HTTP client.
// Parameters:
// response : Body of the response that the mock HTTP client should return
// statusCode: HTTP status code the mock HTTP client should return
//
// Returns:
// *HttpJson: Pointer to an HttpJson object that uses the generated mock HTTP client
func genJolokiaClientStub(response string, statusCode int, servers []string, metrics []string) *Cassandra {
return &Cassandra{
jClient: jolokiaClientStub{responseBody: response, statusCode: statusCode},
Context: Context,
Servers: servers,
Metrics: metrics,
}
}
// Test that the proper values are ignored or collected for class=Java
func TestHttpJsonJavaMultiValue(t *testing.T) {
cassandra := genJolokiaClientStub(validJavaMultiValueJSON, 200,
MultipleServers, []string{HeapMetric})
var acc testutil.Accumulator
acc.SetDebug(true)
err := cassandra.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 2, len(acc.Metrics))
fields := map[string]interface{}{
"HeapMemoryUsage_init": 67108864.0,
"HeapMemoryUsage_committed": 456130560.0,
"HeapMemoryUsage_max": 477626368.0,
"HeapMemoryUsage_used": 203288528.0,
}
tags1 := map[string]string{
"cassandra_host": "10.10.10.10",
"mname": "HeapMemoryUsage",
}
tags2 := map[string]string{
"cassandra_host": "10.10.10.11",
"mname": "HeapMemoryUsage",
}
acc.AssertContainsTaggedFields(t, "javaMemory", fields, tags1)
acc.AssertContainsTaggedFields(t, "javaMemory", fields, tags2)
}
func TestHttpJsonJavaMultiType(t *testing.T) {
cassandra := genJolokiaClientStub(validJavaMultiTypeJSON, 200, AuthServers, []string{GarbageCollectorMetric1, GarbageCollectorMetric2})
var acc testutil.Accumulator
acc.SetDebug(true)
err := cassandra.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 2, len(acc.Metrics))
fields := map[string]interface{}{
"CollectionCount": 1.0,
}
tags := map[string]string{
"cassandra_host": "10.10.10.10",
"mname": "ConcurrentMarkSweep",
}
acc.AssertContainsTaggedFields(t, "javaGarbageCollector", fields, tags)
}
// Test that the proper values are ignored or collected
func TestHttpJsonOn404(t *testing.T) {
jolokia := genJolokiaClientStub(validJavaMultiValueJSON, 404, Servers,
[]string{HeapMetric})
var acc testutil.Accumulator
err := jolokia.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 0, len(acc.Metrics))
}
// Test that the proper values are ignored or collected for class=Cassandra
func TestHttpJsonCassandraMultiValue(t *testing.T) {
cassandra := genJolokiaClientStub(validCassandraMultiValueJSON, 200, Servers, []string{ReadLatencyMetric})
var acc testutil.Accumulator
err := cassandra.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 1, len(acc.Metrics))
fields := map[string]interface{}{
"ReadLatency_999thPercentile": 20.0,
"ReadLatency_99thPercentile": 10.0,
"ReadLatency_Count": 400.0,
"ReadLatency_DurationUnit": "microseconds",
"ReadLatency_Max": 30.0,
"ReadLatency_MeanRate": 3.0,
"ReadLatency_Min": 1.0,
"ReadLatency_RateUnit": "events/second",
}
tags := map[string]string{
"cassandra_host": "10.10.10.10",
"mname": "ReadLatency",
"keyspace": "test_keyspace1",
"scope": "test_table",
}
acc.AssertContainsTaggedFields(t, "cassandraTable", fields, tags)
}
// Test that the proper values are ignored or collected for class=Cassandra with
// nested values
func TestHttpJsonCassandraNestedMultiValue(t *testing.T) {
cassandra := genJolokiaClientStub(validCassandraNestedMultiValueJSON, 200, Servers, []string{NestedReadLatencyMetric})
var acc testutil.Accumulator
acc.SetDebug(true)
err := cassandra.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 2, len(acc.Metrics))
fields1 := map[string]interface{}{
"ReadLatency_999thPercentile": 1.0,
"ReadLatency_Count": 100.0,
"ReadLatency_DurationUnit": "microseconds",
"ReadLatency_OneMinuteRate": 1.0,
"ReadLatency_RateUnit": "events/second",
}
fields2 := map[string]interface{}{
"ReadLatency_999thPercentile": 2.0,
"ReadLatency_Count": 200.0,
"ReadLatency_DurationUnit": "microseconds",
"ReadLatency_OneMinuteRate": 2.0,
"ReadLatency_RateUnit": "events/second",
}
tags1 := map[string]string{
"cassandra_host": "10.10.10.10",
"mname": "ReadLatency",
"keyspace": "test_keyspace1",
"scope": "test_table1",
}
tags2 := map[string]string{
"cassandra_host": "10.10.10.10",
"mname": "ReadLatency",
"keyspace": "test_keyspace2",
"scope": "test_table2",
}
acc.AssertContainsTaggedFields(t, "cassandraTable", fields1, tags1)
acc.AssertContainsTaggedFields(t, "cassandraTable", fields2, tags2)
}

View File

@@ -0,0 +1,86 @@
# Amazon CloudWatch Statistics Input
This plugin will pull Metric Statistics from Amazon CloudWatch.
### Amazon Authentication
This plugin uses a credential chain for Authentication with the CloudWatch
API endpoint. In the following order the plugin will attempt to authenticate.
1. [IAMS Role](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html)
2. [Environment Variables](https://github.com/aws/aws-sdk-go/wiki/configuring-sdk#environment-variables)
3. [Shared Credentials](https://github.com/aws/aws-sdk-go/wiki/configuring-sdk#shared-credentials-file)
### Configuration:
```toml
[[inputs.cloudwatch]]
## Amazon Region (required)
region = 'us-east-1'
## Requested CloudWatch aggregation Period (required - must be a multiple of 60s)
period = '1m'
## Collection Delay (required - must account for metrics availability via CloudWatch API)
delay = '1m'
## Override global run interval (optional - defaults to global interval)
## Recomended: use metric 'interval' that is a multiple of 'period' to avoid
## gaps or overlap in pulled data
interval = '1m'
## Metric Statistic Namespace (required)
namespace = 'AWS/ELB'
## Metrics to Pull (optional)
## Defaults to all Metrics in Namespace if nothing is provided
## Refreshes Namespace available metrics every 1h
[[inputs.cloudwatch.metrics]]
names = ['Latency', 'RequestCount']
## Dimension filters for Metric (optional)
[[inputs.cloudwatch.metrics.dimensions]]
name = 'LoadBalancerName'
value = 'p-example'
```
#### Requirements and Terminology
Plugin Configuration utilizes [CloudWatch concepts](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html) and access pattern to allow monitoring of any CloudWatch Metric.
- `region` must be a valid AWS [Region](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#CloudWatchRegions) value
- `period` must be a valid CloudWatch [Period](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#CloudWatchPeriods) value
- `namespace` must be a valid CloudWatch [Namespace](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#Namespace) value
- `names` must be valid CloudWatch [Metric](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#Metric) names
- `dimensions` must be valid CloudWatch [Dimension](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#Dimension) name/value pairs
#### Restrictions and Limitations
- CloudWatch metrics are not available instantly via the CloudWatch API. You should adjust your collection `delay` to account for this lag in metrics availability based on your [monitoring subscription level](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html)
- CloudWatch API usage incurs cost - see [GetMetricStatistics Pricing](https://aws.amazon.com/cloudwatch/pricing/)
### Measurements & Fields:
Each CloudWatch Namespace monitored records a measurement with fields for each available Metric Statistic
Namespace and Metrics are represented in [snake case](https://en.wikipedia.org/wiki/Snake_case)
- cloudwatch_{namespace}
- {metric}_sum (metric Sum value)
- {metric}_average (metric Average value)
- {metric}_minimum (metric Minimum value)
- {metric}_maximum (metric Maximum value)
- {metric}_sample_count (metric SampleCount value)
### Tags:
Each measurement is tagged with the following identifiers to uniquely identify the associated metric
Tag Dimension names are represented in [snake case](https://en.wikipedia.org/wiki/Snake_case)
- All measurements have the following tags:
- region (CloudWatch Region)
- unit (CloudWatch Metric Unit)
- {dimension-name} (Cloudwatch Dimension value - one for each metric dimension)
### Example Output:
```
$ ./telegraf -config telegraf.conf -input-filter cloudwatch -test
> cloudwatch_aws_elb,load_balancer_name=p-example,region=us-east-1,unit=seconds latency_average=0.004810798017284538,latency_maximum=0.1100282669067383,latency_minimum=0.0006084442138671875,latency_sample_count=4029,latency_sum=19.382705211639404 1459542420000000000
```

View File

@@ -0,0 +1,311 @@
package cloudwatch
import (
"fmt"
"strings"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/cloudwatch"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
)
type (
CloudWatch struct {
Region string `toml:"region"`
AccessKey string `toml:"access_key"`
SecretKey string `toml:"secret_key"`
Period internal.Duration `toml:"period"`
Delay internal.Duration `toml:"delay"`
Namespace string `toml:"namespace"`
Metrics []*Metric `toml:"metrics"`
client cloudwatchClient
metricCache *MetricCache
}
Metric struct {
MetricNames []string `toml:"names"`
Dimensions []*Dimension `toml:"dimensions"`
}
Dimension struct {
Name string `toml:"name"`
Value string `toml:"value"`
}
MetricCache struct {
TTL time.Duration
Fetched time.Time
Metrics []*cloudwatch.Metric
}
cloudwatchClient interface {
ListMetrics(*cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error)
GetMetricStatistics(*cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error)
}
)
func (c *CloudWatch) SampleConfig() string {
return `
## Amazon Region
region = 'us-east-1'
## Amazon Credentials
## Credentials are loaded in the following order
## 1) explicit credentials from 'access_key' and 'secret_key'
## 2) environment variables
## 3) shared credentials file
## 4) EC2 Instance Profile
#access_key = ""
#secret_key = ""
## Requested CloudWatch aggregation Period (required - must be a multiple of 60s)
period = '1m'
## Collection Delay (required - must account for metrics availability via CloudWatch API)
delay = '1m'
## Recomended: use metric 'interval' that is a multiple of 'period' to avoid
## gaps or overlap in pulled data
interval = '1m'
## Metric Statistic Namespace (required)
namespace = 'AWS/ELB'
## Metrics to Pull (optional)
## Defaults to all Metrics in Namespace if nothing is provided
## Refreshes Namespace available metrics every 1h
#[[inputs.cloudwatch.metrics]]
# names = ['Latency', 'RequestCount']
#
# ## Dimension filters for Metric (optional)
# [[inputs.cloudwatch.metrics.dimensions]]
# name = 'LoadBalancerName'
# value = 'p-example'
`
}
func (c *CloudWatch) Description() string {
return "Pull Metric Statistics from Amazon CloudWatch"
}
func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
if c.client == nil {
c.initializeCloudWatch()
}
var metrics []*cloudwatch.Metric
// check for provided metric filter
if c.Metrics != nil {
metrics = []*cloudwatch.Metric{}
for _, m := range c.Metrics {
dimensions := make([]*cloudwatch.Dimension, len(m.Dimensions))
for k, d := range m.Dimensions {
dimensions[k] = &cloudwatch.Dimension{
Name: aws.String(d.Name),
Value: aws.String(d.Value),
}
}
for _, name := range m.MetricNames {
metrics = append(metrics, &cloudwatch.Metric{
Namespace: aws.String(c.Namespace),
MetricName: aws.String(name),
Dimensions: dimensions,
})
}
}
} else {
var err error
metrics, err = c.fetchNamespaceMetrics()
if err != nil {
return err
}
}
metricCount := len(metrics)
var errChan = make(chan error, metricCount)
now := time.Now()
// limit concurrency or we can easily exhaust user connection limit
semaphore := make(chan byte, 64)
for _, m := range metrics {
semaphore <- 0x1
go c.gatherMetric(acc, m, now, semaphore, errChan)
}
for i := 1; i <= metricCount; i++ {
err := <-errChan
if err != nil {
return err
}
}
return nil
}
func init() {
inputs.Add("cloudwatch", func() telegraf.Input {
return &CloudWatch{}
})
}
/*
* Initialize CloudWatch client
*/
func (c *CloudWatch) initializeCloudWatch() error {
config := &aws.Config{
Region: aws.String(c.Region),
}
if c.AccessKey != "" || c.SecretKey != "" {
config.Credentials = credentials.NewStaticCredentials(c.AccessKey, c.SecretKey, "")
}
c.client = cloudwatch.New(session.New(config))
return nil
}
/*
* Fetch available metrics for given CloudWatch Namespace
*/
func (c *CloudWatch) fetchNamespaceMetrics() (metrics []*cloudwatch.Metric, err error) {
if c.metricCache != nil && c.metricCache.IsValid() {
metrics = c.metricCache.Metrics
return
}
metrics = []*cloudwatch.Metric{}
var token *string
for more := true; more; {
params := &cloudwatch.ListMetricsInput{
Namespace: aws.String(c.Namespace),
Dimensions: []*cloudwatch.DimensionFilter{},
NextToken: token,
MetricName: nil,
}
resp, err := c.client.ListMetrics(params)
if err != nil {
return nil, err
}
metrics = append(metrics, resp.Metrics...)
token = resp.NextToken
more = token != nil
}
cacheTTL, _ := time.ParseDuration("1hr")
c.metricCache = &MetricCache{
Metrics: metrics,
Fetched: time.Now(),
TTL: cacheTTL,
}
return
}
/*
* Gather given Metric and emit any error
*/
func (c *CloudWatch) gatherMetric(acc telegraf.Accumulator, metric *cloudwatch.Metric, now time.Time, semaphore chan byte, errChan chan error) {
params := c.getStatisticsInput(metric, now)
resp, err := c.client.GetMetricStatistics(params)
if err != nil {
errChan <- err
<-semaphore
return
}
for _, point := range resp.Datapoints {
tags := map[string]string{
"region": c.Region,
"unit": snakeCase(*point.Unit),
}
for _, d := range metric.Dimensions {
tags[snakeCase(*d.Name)] = *d.Value
}
// record field for each statistic
fields := map[string]interface{}{}
if point.Average != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticAverage)] = *point.Average
}
if point.Maximum != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticMaximum)] = *point.Maximum
}
if point.Minimum != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticMinimum)] = *point.Minimum
}
if point.SampleCount != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticSampleCount)] = *point.SampleCount
}
if point.Sum != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticSum)] = *point.Sum
}
acc.AddFields(formatMeasurement(c.Namespace), fields, tags, *point.Timestamp)
}
errChan <- nil
<-semaphore
}
/*
* Formatting helpers
*/
func formatField(metricName string, statistic string) string {
return fmt.Sprintf("%s_%s", snakeCase(metricName), snakeCase(statistic))
}
func formatMeasurement(namespace string) string {
namespace = strings.Replace(namespace, "/", "_", -1)
namespace = snakeCase(namespace)
return fmt.Sprintf("cloudwatch_%s", namespace)
}
func snakeCase(s string) string {
s = internal.SnakeCase(s)
s = strings.Replace(s, "__", "_", -1)
return s
}
/*
* Map Metric to *cloudwatch.GetMetricStatisticsInput for given timeframe
*/
func (c *CloudWatch) getStatisticsInput(metric *cloudwatch.Metric, now time.Time) *cloudwatch.GetMetricStatisticsInput {
end := now.Add(-c.Delay.Duration)
input := &cloudwatch.GetMetricStatisticsInput{
StartTime: aws.Time(end.Add(-c.Period.Duration)),
EndTime: aws.Time(end),
MetricName: metric.MetricName,
Namespace: metric.Namespace,
Period: aws.Int64(int64(c.Period.Duration.Seconds())),
Dimensions: metric.Dimensions,
Statistics: []*string{
aws.String(cloudwatch.StatisticAverage),
aws.String(cloudwatch.StatisticMaximum),
aws.String(cloudwatch.StatisticMinimum),
aws.String(cloudwatch.StatisticSum),
aws.String(cloudwatch.StatisticSampleCount)},
}
return input
}
/*
* Check Metric Cache validity
*/
func (c *MetricCache) IsValid() bool {
return c.Metrics != nil && time.Since(c.Fetched) < c.TTL
}

View File

@@ -0,0 +1,131 @@
package cloudwatch
import (
"testing"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/cloudwatch"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
type mockCloudWatchClient struct{}
func (m *mockCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error) {
metric := &cloudwatch.Metric{
Namespace: params.Namespace,
MetricName: aws.String("Latency"),
Dimensions: []*cloudwatch.Dimension{
&cloudwatch.Dimension{
Name: aws.String("LoadBalancerName"),
Value: aws.String("p-example"),
},
},
}
result := &cloudwatch.ListMetricsOutput{
Metrics: []*cloudwatch.Metric{metric},
}
return result, nil
}
func (m *mockCloudWatchClient) GetMetricStatistics(params *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) {
dataPoint := &cloudwatch.Datapoint{
Timestamp: params.EndTime,
Minimum: aws.Float64(0.1),
Maximum: aws.Float64(0.3),
Average: aws.Float64(0.2),
Sum: aws.Float64(123),
SampleCount: aws.Float64(100),
Unit: aws.String("Seconds"),
}
result := &cloudwatch.GetMetricStatisticsOutput{
Label: aws.String("Latency"),
Datapoints: []*cloudwatch.Datapoint{dataPoint},
}
return result, nil
}
func TestGather(t *testing.T) {
duration, _ := time.ParseDuration("1m")
internalDuration := internal.Duration{
Duration: duration,
}
c := &CloudWatch{
Region: "us-east-1",
Namespace: "AWS/ELB",
Delay: internalDuration,
Period: internalDuration,
}
var acc testutil.Accumulator
c.client = &mockCloudWatchClient{}
c.Gather(&acc)
fields := map[string]interface{}{}
fields["latency_minimum"] = 0.1
fields["latency_maximum"] = 0.3
fields["latency_average"] = 0.2
fields["latency_sum"] = 123.0
fields["latency_sample_count"] = 100.0
tags := map[string]string{}
tags["unit"] = "seconds"
tags["region"] = "us-east-1"
tags["load_balancer_name"] = "p-example"
assert.True(t, acc.HasMeasurement("cloudwatch_aws_elb"))
acc.AssertContainsTaggedFields(t, "cloudwatch_aws_elb", fields, tags)
}
func TestGenerateStatisticsInputParams(t *testing.T) {
d := &cloudwatch.Dimension{
Name: aws.String("LoadBalancerName"),
Value: aws.String("p-example"),
}
m := &cloudwatch.Metric{
MetricName: aws.String("Latency"),
Dimensions: []*cloudwatch.Dimension{d},
}
duration, _ := time.ParseDuration("1m")
internalDuration := internal.Duration{
Duration: duration,
}
c := &CloudWatch{
Namespace: "AWS/ELB",
Delay: internalDuration,
Period: internalDuration,
}
c.initializeCloudWatch()
now := time.Now()
params := c.getStatisticsInput(m, now)
assert.EqualValues(t, *params.EndTime, now.Add(-c.Delay.Duration))
assert.EqualValues(t, *params.StartTime, now.Add(-c.Period.Duration).Add(-c.Delay.Duration))
assert.Len(t, params.Dimensions, 1)
assert.Len(t, params.Statistics, 5)
assert.EqualValues(t, *params.Period, 60)
}
func TestMetricsCacheTimeout(t *testing.T) {
ttl, _ := time.ParseDuration("5ms")
cache := &MetricCache{
Metrics: []*cloudwatch.Metric{},
Fetched: time.Now(),
TTL: ttl,
}
assert.True(t, cache.IsValid())
time.Sleep(ttl)
assert.False(t, cache.IsValid())
}

View File

@@ -162,7 +162,7 @@ func (g *Disque) gatherServer(addr *url.URL, acc telegraf.Accumulator) error {
var read int
fields := make(map[string]interface{})
tags := map[string]string{"host": addr.String()}
tags := map[string]string{"disque_host": addr.String()}
for read < sz {
line, err := r.ReadString('\n')
if err != nil {

View File

@@ -15,6 +15,9 @@ var servers = []string{"8.8.8.8"}
var domains = []string{"google.com"}
func TestGathering(t *testing.T) {
if testing.Short() {
t.Skip("Skipping network-dependent test in short mode.")
}
var dnsConfig = DnsQuery{
Servers: servers,
Domains: domains,
@@ -31,6 +34,9 @@ func TestGathering(t *testing.T) {
}
func TestGatheringMxRecord(t *testing.T) {
if testing.Short() {
t.Skip("Skipping network-dependent test in short mode.")
}
var dnsConfig = DnsQuery{
Servers: servers,
Domains: domains,
@@ -48,6 +54,9 @@ func TestGatheringMxRecord(t *testing.T) {
}
func TestGatheringRootDomain(t *testing.T) {
if testing.Short() {
t.Skip("Skipping network-dependent test in short mode.")
}
var dnsConfig = DnsQuery{
Servers: servers,
Domains: []string{"."},
@@ -72,6 +81,9 @@ func TestGatheringRootDomain(t *testing.T) {
}
func TestMetricContainsServerAndDomainAndRecordTypeTags(t *testing.T) {
if testing.Short() {
t.Skip("Skipping network-dependent test in short mode.")
}
var dnsConfig = DnsQuery{
Servers: servers,
Domains: domains,
@@ -95,6 +107,9 @@ func TestMetricContainsServerAndDomainAndRecordTypeTags(t *testing.T) {
}
func TestGatheringTimeout(t *testing.T) {
if testing.Short() {
t.Skip("Skipping network-dependent test in short mode.")
}
var dnsConfig = DnsQuery{
Servers: servers,
Domains: domains,

View File

@@ -5,11 +5,11 @@ docker containers. You can read Docker's documentation for their remote API
[here](https://docs.docker.com/engine/reference/api/docker_remote_api_v1.20/#get-container-stats-based-on-resource-usage)
The docker plugin uses the excellent
[fsouza go-dockerclient](https://github.com/fsouza/go-dockerclient) library to
[docker engine-api](https://github.com/docker/engine-api) library to
gather stats. Documentation for the library can be found
[here](https://godoc.org/github.com/fsouza/go-dockerclient) and documentation
[here](https://godoc.org/github.com/docker/engine-api) and documentation
for the stat structure can be found
[here](https://godoc.org/github.com/fsouza/go-dockerclient#Stats)
[here](https://godoc.org/github.com/docker/engine-api/types#Stats)
### Configuration:
@@ -29,10 +29,10 @@ for the stat structure can be found
Every effort was made to preserve the names based on the JSON response from the
docker API.
Note that the docker_cpu metric may appear multiple times per collection, based
on the availability of per-cpu stats on your system.
Note that the docker_container_cpu metric may appear multiple times per collection,
based on the availability of per-cpu stats on your system.
- docker_mem
- docker_container_mem
- total_pgmafault
- cache
- mapped_file
@@ -66,7 +66,8 @@ on the availability of per-cpu stats on your system.
- usage
- failcnt
- limit
- docker_cpu
- container_id
- docker_container_cpu
- throttling_periods
- throttling_throttled_periods
- throttling_throttled_time
@@ -75,7 +76,8 @@ on the availability of per-cpu stats on your system.
- usage_system
- usage_total
- usage_percent
- docker_net
- container_id
- docker_container_net
- rx_dropped
- rx_bytes
- rx_errors
@@ -84,7 +86,8 @@ on the availability of per-cpu stats on your system.
- rx_packets
- tx_errors
- tx_bytes
- docker_blkio
- container_id
- docker_container_blkio
- io_service_bytes_recursive_async
- io_service_bytes_recursive_read
- io_service_bytes_recursive_sync
@@ -95,6 +98,7 @@ on the availability of per-cpu stats on your system.
- io_serviced_recursive_sync
- io_serviced_recursive_total
- io_serviced_recursive_write
- container_id
- docker_
- n_used_file_descriptors
- n_cpus
@@ -125,20 +129,20 @@ on the availability of per-cpu stats on your system.
- docker_metadata
- unit=bytes
- docker_cpu specific:
- cont_id (container ID)
- cont_image (container image)
- cont_name (container name)
- docker_container_mem specific:
- container_image
- container_name
- docker_container_cpu specific:
- container_image
- container_name
- cpu
- docker_net specific:
- cont_id (container ID)
- cont_image (container image)
- cont_name (container name)
- docker_container_net specific:
- container_image
- container_name
- network
- docker_blkio specific:
- cont_id (container ID)
- cont_image (container image)
- cont_name (container name)
- docker_container_blkio specific:
- container_image
- container_name
- device
### Example Output:
@@ -156,8 +160,8 @@ on the availability of per-cpu stats on your system.
> docker,unit=bytes pool_blocksize=65540i 1456926671065383978
> docker_data,unit=bytes available=24340000000i,total=107400000000i,used=14820000000i 1456926671065383978
> docker_metadata,unit=bytes available=2126999999i,total=2146999999i,used=20420000i 145692667106538
> docker_mem,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\
cont_image=spotify/kafka,cont_name=kafka \
> docker_container_mem,
container_image=spotify/kafka,container_name=kafka \
active_anon=52568064i,active_file=6926336i,cache=12038144i,fail_count=0i,\
hierarchical_memory_limit=9223372036854771712i,inactive_anon=52707328i,\
inactive_file=5111808i,limit=1044578304i,mapped_file=10301440i,\
@@ -168,21 +172,21 @@ total_inactive_file=5111808i,total_mapped_file=10301440i,total_pgfault=63762i,\
total_pgmafault=0i,total_pgpgin=73355i,total_pgpgout=45736i,\
total_rss=105275392i,total_rss_huge=4194304i,total_unevictable=0i,\
total_writeback=0i,unevictable=0i,usage=117440512i,writeback=0i 1453409536840126713
> docker_cpu,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\
cont_image=spotify/kafka,cont_name=kafka,cpu=cpu-total \
> docker_container_cpu,
container_image=spotify/kafka,container_name=kafka,cpu=cpu-total \
throttling_periods=0i,throttling_throttled_periods=0i,\
throttling_throttled_time=0i,usage_in_kernelmode=440000000i,\
usage_in_usermode=2290000000i,usage_system=84795360000000i,\
usage_total=6628208865i 1453409536840126713
> docker_cpu,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\
cont_image=spotify/kafka,cont_name=kafka,cpu=cpu0 \
> docker_container_cpu,
container_image=spotify/kafka,container_name=kafka,cpu=cpu0 \
usage_total=6628208865i 1453409536840126713
> docker_net,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\
cont_image=spotify/kafka,cont_name=kafka,network=eth0 \
> docker_container_net,\
container_image=spotify/kafka,container_name=kafka,network=eth0 \
rx_bytes=7468i,rx_dropped=0i,rx_errors=0i,rx_packets=94i,tx_bytes=946i,\
tx_dropped=0i,tx_errors=0i,tx_packets=13i 1453409536840126713
> docker_blkio,cont_id=5705ba8ed8fb47527410653d60a8bb2f3af5e62372297c419022a3cc6d45d848,\
cont_image=spotify/kafka,cont_name=kafka,device=8:0 \
> docker_container_blkio,
container_image=spotify/kafka,container_name=kafka,device=8:0 \
io_service_bytes_recursive_async=80216064i,io_service_bytes_recursive_read=79925248i,\
io_service_bytes_recursive_sync=77824i,io_service_bytes_recursive_total=80293888i,\
io_service_bytes_recursive_write=368640i,io_serviced_recursive_async=6562i,\

View File

@@ -3,6 +3,7 @@ package system
import (
"encoding/json"
"fmt"
"io"
"log"
"regexp"
"strconv"
@@ -10,27 +11,32 @@ import (
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"golang.org/x/net/context"
"github.com/fsouza/go-dockerclient"
"github.com/docker/engine-api/client"
"github.com/docker/engine-api/types"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
)
// Docker object
type Docker struct {
Endpoint string
ContainerNames []string
Timeout internal.Duration
client DockerClient
}
// DockerClient interface, useful for testing
type DockerClient interface {
// Docker Client wrapper
// Useful for test
Info() (*docker.Env, error)
ListContainers(opts docker.ListContainersOptions) ([]docker.APIContainers, error)
Stats(opts docker.StatsOptions) error
Info(ctx context.Context) (types.Info, error)
ContainerList(ctx context.Context, options types.ContainerListOptions) ([]types.Container, error)
ContainerStats(ctx context.Context, containerID string, stream bool) (io.ReadCloser, error)
}
// KB, MB, GB, TB, PB...human friendly
const (
KB = 1000
MB = 1000 * KB
@@ -50,30 +56,36 @@ var sampleConfig = `
endpoint = "unix:///var/run/docker.sock"
## Only collect metrics for these containers, collect all if empty
container_names = []
## Timeout for docker list, info, and stats commands
timeout = "5s"
`
// Description returns input description
func (d *Docker) Description() string {
return "Read metrics about docker containers"
}
// SampleConfig prints sampleConfig
func (d *Docker) SampleConfig() string { return sampleConfig }
// Gather starts stats collection
func (d *Docker) Gather(acc telegraf.Accumulator) error {
if d.client == nil {
var c *docker.Client
var c *client.Client
var err error
defaultHeaders := map[string]string{"User-Agent": "engine-api-cli-1.0"}
if d.Endpoint == "ENV" {
c, err = docker.NewClientFromEnv()
c, err = client.NewEnvClient()
if err != nil {
return err
}
} else if d.Endpoint == "" {
c, err = docker.NewClient("unix:///var/run/docker.sock")
c, err = client.NewClient("unix:///var/run/docker.sock", "", nil, defaultHeaders)
if err != nil {
return err
}
} else {
c, err = docker.NewClient(d.Endpoint)
c, err = client.NewClient(d.Endpoint, "", nil, defaultHeaders)
if err != nil {
return err
}
@@ -88,8 +100,10 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
}
// List containers
opts := docker.ListContainersOptions{}
containers, err := d.client.ListContainers(opts)
opts := types.ContainerListOptions{}
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
containers, err := d.client.ContainerList(ctx, opts)
if err != nil {
return err
}
@@ -98,12 +112,12 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
var wg sync.WaitGroup
wg.Add(len(containers))
for _, container := range containers {
go func(c docker.APIContainers) {
go func(c types.Container) {
defer wg.Done()
err := d.gatherContainer(c, acc)
if err != nil {
fmt.Println(err.Error())
log.Printf("Error gathering container %s stats: %s\n",
c.Names, err.Error())
}
}(container)
}
@@ -114,23 +128,24 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
func (d *Docker) gatherInfo(acc telegraf.Accumulator) error {
// Init vars
var driverStatus [][]string
dataFields := make(map[string]interface{})
metadataFields := make(map[string]interface{})
now := time.Now()
// Get info from docker daemon
info, err := d.client.Info()
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
info, err := d.client.Info(ctx)
if err != nil {
return err
}
fields := map[string]interface{}{
"n_cpus": info.GetInt64("NCPU"),
"n_used_file_descriptors": info.GetInt64("NFd"),
"n_containers": info.GetInt64("Containers"),
"n_images": info.GetInt64("Images"),
"n_goroutines": info.GetInt64("NGoroutines"),
"n_listener_events": info.GetInt64("NEventsListener"),
"n_cpus": info.NCPU,
"n_used_file_descriptors": info.NFd,
"n_containers": info.Containers,
"n_images": info.Images,
"n_goroutines": info.NGoroutines,
"n_listener_events": info.NEventsListener,
}
// Add metrics
acc.AddFields("docker",
@@ -138,13 +153,11 @@ func (d *Docker) gatherInfo(acc telegraf.Accumulator) error {
nil,
now)
acc.AddFields("docker",
map[string]interface{}{"memory_total": info.GetInt64("MemTotal")},
map[string]interface{}{"memory_total": info.MemTotal},
map[string]string{"unit": "bytes"},
now)
// Get storage metrics
driverStatusRaw := []byte(info.Get("DriverStatus"))
json.Unmarshal(driverStatusRaw, &driverStatus)
for _, rawData := range driverStatus {
for _, rawData := range info.DriverStatus {
// Try to convert string to int (bytes)
value, err := parseSize(rawData[1])
if err != nil {
@@ -159,12 +172,12 @@ func (d *Docker) gatherInfo(acc telegraf.Accumulator) error {
now)
} else if strings.HasPrefix(name, "data_space_") {
// data space
field_name := strings.TrimPrefix(name, "data_space_")
dataFields[field_name] = value
fieldName := strings.TrimPrefix(name, "data_space_")
dataFields[fieldName] = value
} else if strings.HasPrefix(name, "metadata_space_") {
// metadata space
field_name := strings.TrimPrefix(name, "metadata_space_")
metadataFields[field_name] = value
fieldName := strings.TrimPrefix(name, "metadata_space_")
metadataFields[fieldName] = value
}
}
if len(dataFields) > 0 {
@@ -183,9 +196,10 @@ func (d *Docker) gatherInfo(acc telegraf.Accumulator) error {
}
func (d *Docker) gatherContainer(
container docker.APIContainers,
container types.Container,
acc telegraf.Accumulator,
) error {
var v *types.StatsJSON
// Parse container name
cname := "unknown"
if len(container.Names) > 0 {
@@ -194,9 +208,8 @@ func (d *Docker) gatherContainer(
}
tags := map[string]string{
"cont_id": container.ID,
"cont_name": cname,
"cont_image": container.Image,
"container_name": cname,
"container_image": container.Image,
}
if len(d.ContainerNames) > 0 {
if !sliceContains(cname, d.ContainerNames) {
@@ -204,44 +217,36 @@ func (d *Docker) gatherContainer(
}
}
statChan := make(chan *docker.Stats)
done := make(chan bool)
statOpts := docker.StatsOptions{
Stream: false,
ID: container.ID,
Stats: statChan,
Done: done,
Timeout: time.Duration(time.Second * 5),
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
r, err := d.client.ContainerStats(ctx, container.ID, false)
if err != nil {
log.Printf("Error getting docker stats: %s\n", err.Error())
}
go func() {
err := d.client.Stats(statOpts)
if err != nil {
log.Printf("Error getting docker stats: %s\n", err.Error())
defer r.Close()
dec := json.NewDecoder(r)
if err = dec.Decode(&v); err != nil {
if err == io.EOF {
return nil
}
}()
stat := <-statChan
close(done)
if stat == nil {
return nil
return fmt.Errorf("Error decoding: %s", err.Error())
}
// Add labels to tags
for k, v := range container.Labels {
tags[k] = v
for k, label := range container.Labels {
tags[k] = label
}
gatherContainerStats(stat, acc, tags)
gatherContainerStats(v, acc, tags, container.ID)
return nil
}
func gatherContainerStats(
stat *docker.Stats,
stat *types.StatsJSON,
acc telegraf.Accumulator,
tags map[string]string,
id string,
) {
now := stat.Read
@@ -250,80 +255,83 @@ func gatherContainerStats(
"usage": stat.MemoryStats.Usage,
"fail_count": stat.MemoryStats.Failcnt,
"limit": stat.MemoryStats.Limit,
"total_pgmafault": stat.MemoryStats.Stats.TotalPgmafault,
"cache": stat.MemoryStats.Stats.Cache,
"mapped_file": stat.MemoryStats.Stats.MappedFile,
"total_inactive_file": stat.MemoryStats.Stats.TotalInactiveFile,
"pgpgout": stat.MemoryStats.Stats.Pgpgout,
"rss": stat.MemoryStats.Stats.Rss,
"total_mapped_file": stat.MemoryStats.Stats.TotalMappedFile,
"writeback": stat.MemoryStats.Stats.Writeback,
"unevictable": stat.MemoryStats.Stats.Unevictable,
"pgpgin": stat.MemoryStats.Stats.Pgpgin,
"total_unevictable": stat.MemoryStats.Stats.TotalUnevictable,
"pgmajfault": stat.MemoryStats.Stats.Pgmajfault,
"total_rss": stat.MemoryStats.Stats.TotalRss,
"total_rss_huge": stat.MemoryStats.Stats.TotalRssHuge,
"total_writeback": stat.MemoryStats.Stats.TotalWriteback,
"total_inactive_anon": stat.MemoryStats.Stats.TotalInactiveAnon,
"rss_huge": stat.MemoryStats.Stats.RssHuge,
"hierarchical_memory_limit": stat.MemoryStats.Stats.HierarchicalMemoryLimit,
"total_pgfault": stat.MemoryStats.Stats.TotalPgfault,
"total_active_file": stat.MemoryStats.Stats.TotalActiveFile,
"active_anon": stat.MemoryStats.Stats.ActiveAnon,
"total_active_anon": stat.MemoryStats.Stats.TotalActiveAnon,
"total_pgpgout": stat.MemoryStats.Stats.TotalPgpgout,
"total_cache": stat.MemoryStats.Stats.TotalCache,
"inactive_anon": stat.MemoryStats.Stats.InactiveAnon,
"active_file": stat.MemoryStats.Stats.ActiveFile,
"pgfault": stat.MemoryStats.Stats.Pgfault,
"inactive_file": stat.MemoryStats.Stats.InactiveFile,
"total_pgpgin": stat.MemoryStats.Stats.TotalPgpgin,
"total_pgmafault": stat.MemoryStats.Stats["total_pgmajfault"],
"cache": stat.MemoryStats.Stats["cache"],
"mapped_file": stat.MemoryStats.Stats["mapped_file"],
"total_inactive_file": stat.MemoryStats.Stats["total_inactive_file"],
"pgpgout": stat.MemoryStats.Stats["pagpgout"],
"rss": stat.MemoryStats.Stats["rss"],
"total_mapped_file": stat.MemoryStats.Stats["total_mapped_file"],
"writeback": stat.MemoryStats.Stats["writeback"],
"unevictable": stat.MemoryStats.Stats["unevictable"],
"pgpgin": stat.MemoryStats.Stats["pgpgin"],
"total_unevictable": stat.MemoryStats.Stats["total_unevictable"],
"pgmajfault": stat.MemoryStats.Stats["pgmajfault"],
"total_rss": stat.MemoryStats.Stats["total_rss"],
"total_rss_huge": stat.MemoryStats.Stats["total_rss_huge"],
"total_writeback": stat.MemoryStats.Stats["total_write_back"],
"total_inactive_anon": stat.MemoryStats.Stats["total_inactive_anon"],
"rss_huge": stat.MemoryStats.Stats["rss_huge"],
"hierarchical_memory_limit": stat.MemoryStats.Stats["hierarchical_memory_limit"],
"total_pgfault": stat.MemoryStats.Stats["total_pgfault"],
"total_active_file": stat.MemoryStats.Stats["total_active_file"],
"active_anon": stat.MemoryStats.Stats["active_anon"],
"total_active_anon": stat.MemoryStats.Stats["total_active_anon"],
"total_pgpgout": stat.MemoryStats.Stats["total_pgpgout"],
"total_cache": stat.MemoryStats.Stats["total_cache"],
"inactive_anon": stat.MemoryStats.Stats["inactive_anon"],
"active_file": stat.MemoryStats.Stats["active_file"],
"pgfault": stat.MemoryStats.Stats["pgfault"],
"inactive_file": stat.MemoryStats.Stats["inactive_file"],
"total_pgpgin": stat.MemoryStats.Stats["total_pgpgin"],
"usage_percent": calculateMemPercent(stat),
"container_id": id,
}
acc.AddFields("docker_mem", memfields, tags, now)
acc.AddFields("docker_container_mem", memfields, tags, now)
cpufields := map[string]interface{}{
"usage_total": stat.CPUStats.CPUUsage.TotalUsage,
"usage_in_usermode": stat.CPUStats.CPUUsage.UsageInUsermode,
"usage_in_kernelmode": stat.CPUStats.CPUUsage.UsageInKernelmode,
"usage_system": stat.CPUStats.SystemCPUUsage,
"usage_system": stat.CPUStats.SystemUsage,
"throttling_periods": stat.CPUStats.ThrottlingData.Periods,
"throttling_throttled_periods": stat.CPUStats.ThrottlingData.ThrottledPeriods,
"throttling_throttled_time": stat.CPUStats.ThrottlingData.ThrottledTime,
"usage_percent": calculateCPUPercent(stat),
"container_id": id,
}
cputags := copyTags(tags)
cputags["cpu"] = "cpu-total"
acc.AddFields("docker_cpu", cpufields, cputags, now)
acc.AddFields("docker_container_cpu", cpufields, cputags, now)
for i, percpu := range stat.CPUStats.CPUUsage.PercpuUsage {
percputags := copyTags(tags)
percputags["cpu"] = fmt.Sprintf("cpu%d", i)
acc.AddFields("docker_cpu", map[string]interface{}{"usage_total": percpu}, percputags, now)
acc.AddFields("docker_container_cpu", map[string]interface{}{"usage_total": percpu}, percputags, now)
}
for network, netstats := range stat.Networks {
netfields := map[string]interface{}{
"rx_dropped": netstats.RxDropped,
"rx_bytes": netstats.RxBytes,
"rx_errors": netstats.RxErrors,
"tx_packets": netstats.TxPackets,
"tx_dropped": netstats.TxDropped,
"rx_packets": netstats.RxPackets,
"tx_errors": netstats.TxErrors,
"tx_bytes": netstats.TxBytes,
"rx_dropped": netstats.RxDropped,
"rx_bytes": netstats.RxBytes,
"rx_errors": netstats.RxErrors,
"tx_packets": netstats.TxPackets,
"tx_dropped": netstats.TxDropped,
"rx_packets": netstats.RxPackets,
"tx_errors": netstats.TxErrors,
"tx_bytes": netstats.TxBytes,
"container_id": id,
}
// Create a new network tag dictionary for the "network" tag
nettags := copyTags(tags)
nettags["network"] = network
acc.AddFields("docker_net", netfields, nettags, now)
acc.AddFields("docker_container_net", netfields, nettags, now)
}
gatherBlockIOMetrics(stat, acc, tags, now)
gatherBlockIOMetrics(stat, acc, tags, now, id)
}
func calculateMemPercent(stat *docker.Stats) float64 {
func calculateMemPercent(stat *types.StatsJSON) float64 {
var memPercent = 0.0
if stat.MemoryStats.Limit > 0 {
memPercent = float64(stat.MemoryStats.Usage) / float64(stat.MemoryStats.Limit) * 100.0
@@ -331,11 +339,11 @@ func calculateMemPercent(stat *docker.Stats) float64 {
return memPercent
}
func calculateCPUPercent(stat *docker.Stats) float64 {
func calculateCPUPercent(stat *types.StatsJSON) float64 {
var cpuPercent = 0.0
// calculate the change for the cpu and system usage of the container in between readings
cpuDelta := float64(stat.CPUStats.CPUUsage.TotalUsage) - float64(stat.PreCPUStats.CPUUsage.TotalUsage)
systemDelta := float64(stat.CPUStats.SystemCPUUsage) - float64(stat.PreCPUStats.SystemCPUUsage)
systemDelta := float64(stat.CPUStats.SystemUsage) - float64(stat.PreCPUStats.SystemUsage)
if systemDelta > 0.0 && cpuDelta > 0.0 {
cpuPercent = (cpuDelta / systemDelta) * float64(len(stat.CPUStats.CPUUsage.PercpuUsage)) * 100.0
@@ -344,16 +352,17 @@ func calculateCPUPercent(stat *docker.Stats) float64 {
}
func gatherBlockIOMetrics(
stat *docker.Stats,
stat *types.StatsJSON,
acc telegraf.Accumulator,
tags map[string]string,
now time.Time,
id string,
) {
blkioStats := stat.BlkioStats
// Make a map of devices to their block io stats
deviceStatMap := make(map[string]map[string]interface{})
for _, metric := range blkioStats.IOServiceBytesRecursive {
for _, metric := range blkioStats.IoServiceBytesRecursive {
device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor)
_, ok := deviceStatMap[device]
if !ok {
@@ -364,7 +373,7 @@ func gatherBlockIOMetrics(
deviceStatMap[device][field] = metric.Value
}
for _, metric := range blkioStats.IOServicedRecursive {
for _, metric := range blkioStats.IoServicedRecursive {
device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor)
_, ok := deviceStatMap[device]
if !ok {
@@ -375,46 +384,45 @@ func gatherBlockIOMetrics(
deviceStatMap[device][field] = metric.Value
}
for _, metric := range blkioStats.IOQueueRecursive {
for _, metric := range blkioStats.IoQueuedRecursive {
device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor)
field := fmt.Sprintf("io_queue_recursive_%s", strings.ToLower(metric.Op))
deviceStatMap[device][field] = metric.Value
}
for _, metric := range blkioStats.IOServiceTimeRecursive {
for _, metric := range blkioStats.IoServiceTimeRecursive {
device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor)
field := fmt.Sprintf("io_service_time_recursive_%s", strings.ToLower(metric.Op))
deviceStatMap[device][field] = metric.Value
}
for _, metric := range blkioStats.IOWaitTimeRecursive {
for _, metric := range blkioStats.IoWaitTimeRecursive {
device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor)
field := fmt.Sprintf("io_wait_time_%s", strings.ToLower(metric.Op))
deviceStatMap[device][field] = metric.Value
}
for _, metric := range blkioStats.IOMergedRecursive {
for _, metric := range blkioStats.IoMergedRecursive {
device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor)
field := fmt.Sprintf("io_merged_recursive_%s", strings.ToLower(metric.Op))
deviceStatMap[device][field] = metric.Value
}
for _, metric := range blkioStats.IOTimeRecursive {
for _, metric := range blkioStats.IoTimeRecursive {
device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor)
field := fmt.Sprintf("io_time_recursive_%s", strings.ToLower(metric.Op))
deviceStatMap[device][field] = metric.Value
deviceStatMap[device]["io_time_recursive"] = metric.Value
}
for _, metric := range blkioStats.SectorsRecursive {
device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor)
field := fmt.Sprintf("sectors_recursive_%s", strings.ToLower(metric.Op))
deviceStatMap[device][field] = metric.Value
deviceStatMap[device]["sectors_recursive"] = metric.Value
}
for device, fields := range deviceStatMap {
iotags := copyTags(tags)
iotags["device"] = device
acc.AddFields("docker_blkio", fields, iotags, now)
fields["container_id"] = id
acc.AddFields("docker_container_blkio", fields, iotags, now)
}
}

View File

@@ -1,13 +1,18 @@
package system
import (
"encoding/json"
"io"
"io/ioutil"
"strings"
"testing"
"time"
"golang.org/x/net/context"
"github.com/docker/engine-api/types"
"github.com/docker/engine-api/types/registry"
"github.com/influxdata/telegraf/testutil"
"github.com/fsouza/go-dockerclient"
"github.com/stretchr/testify/require"
)
@@ -16,26 +21,26 @@ func TestDockerGatherContainerStats(t *testing.T) {
stats := testStats()
tags := map[string]string{
"cont_id": "foobarbaz",
"cont_name": "redis",
"cont_image": "redis/image",
"container_name": "redis",
"container_image": "redis/image",
}
gatherContainerStats(stats, &acc, tags)
gatherContainerStats(stats, &acc, tags, "123456789")
// test docker_net measurement
// test docker_container_net measurement
netfields := map[string]interface{}{
"rx_dropped": uint64(1),
"rx_bytes": uint64(2),
"rx_errors": uint64(3),
"tx_packets": uint64(4),
"tx_dropped": uint64(1),
"rx_packets": uint64(2),
"tx_errors": uint64(3),
"tx_bytes": uint64(4),
"rx_dropped": uint64(1),
"rx_bytes": uint64(2),
"rx_errors": uint64(3),
"tx_packets": uint64(4),
"tx_dropped": uint64(1),
"rx_packets": uint64(2),
"tx_errors": uint64(3),
"tx_bytes": uint64(4),
"container_id": "123456789",
}
nettags := copyTags(tags)
nettags["network"] = "eth0"
acc.AssertContainsTaggedFields(t, "docker_net", netfields, nettags)
acc.AssertContainsTaggedFields(t, "docker_container_net", netfields, nettags)
// test docker_blkio measurement
blkiotags := copyTags(tags)
@@ -43,10 +48,11 @@ func TestDockerGatherContainerStats(t *testing.T) {
blkiofields := map[string]interface{}{
"io_service_bytes_recursive_read": uint64(100),
"io_serviced_recursive_write": uint64(101),
"container_id": "123456789",
}
acc.AssertContainsTaggedFields(t, "docker_blkio", blkiofields, blkiotags)
acc.AssertContainsTaggedFields(t, "docker_container_blkio", blkiofields, blkiotags)
// test docker_mem measurement
// test docker_container_mem measurement
memfields := map[string]interface{}{
"max_usage": uint64(1001),
"usage": uint64(1111),
@@ -82,11 +88,12 @@ func TestDockerGatherContainerStats(t *testing.T) {
"inactive_file": uint64(3),
"total_pgpgin": uint64(4),
"usage_percent": float64(55.55),
"container_id": "123456789",
}
acc.AssertContainsTaggedFields(t, "docker_mem", memfields, tags)
acc.AssertContainsTaggedFields(t, "docker_container_mem", memfields, tags)
// test docker_cpu measurement
// test docker_container_cpu measurement
cputags := copyTags(tags)
cputags["cpu"] = "cpu-total"
cpufields := map[string]interface{}{
@@ -98,74 +105,75 @@ func TestDockerGatherContainerStats(t *testing.T) {
"throttling_throttled_periods": uint64(0),
"throttling_throttled_time": uint64(0),
"usage_percent": float64(400.0),
"container_id": "123456789",
}
acc.AssertContainsTaggedFields(t, "docker_cpu", cpufields, cputags)
acc.AssertContainsTaggedFields(t, "docker_container_cpu", cpufields, cputags)
cputags["cpu"] = "cpu0"
cpu0fields := map[string]interface{}{
"usage_total": uint64(1),
}
acc.AssertContainsTaggedFields(t, "docker_cpu", cpu0fields, cputags)
acc.AssertContainsTaggedFields(t, "docker_container_cpu", cpu0fields, cputags)
cputags["cpu"] = "cpu1"
cpu1fields := map[string]interface{}{
"usage_total": uint64(1002),
}
acc.AssertContainsTaggedFields(t, "docker_cpu", cpu1fields, cputags)
acc.AssertContainsTaggedFields(t, "docker_container_cpu", cpu1fields, cputags)
}
func testStats() *docker.Stats {
stats := &docker.Stats{
Read: time.Now(),
Networks: make(map[string]docker.NetworkStats),
}
func testStats() *types.StatsJSON {
stats := &types.StatsJSON{}
stats.Read = time.Now()
stats.Networks = make(map[string]types.NetworkStats)
stats.CPUStats.CPUUsage.PercpuUsage = []uint64{1, 1002}
stats.CPUStats.CPUUsage.UsageInUsermode = 100
stats.CPUStats.CPUUsage.TotalUsage = 500
stats.CPUStats.CPUUsage.UsageInKernelmode = 200
stats.CPUStats.SystemCPUUsage = 100
stats.CPUStats.SystemUsage = 100
stats.CPUStats.ThrottlingData.Periods = 1
stats.PreCPUStats.CPUUsage.TotalUsage = 400
stats.PreCPUStats.SystemCPUUsage = 50
stats.PreCPUStats.SystemUsage = 50
stats.MemoryStats.Stats.TotalPgmafault = 0
stats.MemoryStats.Stats.Cache = 0
stats.MemoryStats.Stats.MappedFile = 0
stats.MemoryStats.Stats.TotalInactiveFile = 0
stats.MemoryStats.Stats.Pgpgout = 0
stats.MemoryStats.Stats.Rss = 0
stats.MemoryStats.Stats.TotalMappedFile = 0
stats.MemoryStats.Stats.Writeback = 0
stats.MemoryStats.Stats.Unevictable = 0
stats.MemoryStats.Stats.Pgpgin = 0
stats.MemoryStats.Stats.TotalUnevictable = 0
stats.MemoryStats.Stats.Pgmajfault = 0
stats.MemoryStats.Stats.TotalRss = 44
stats.MemoryStats.Stats.TotalRssHuge = 444
stats.MemoryStats.Stats.TotalWriteback = 55
stats.MemoryStats.Stats.TotalInactiveAnon = 0
stats.MemoryStats.Stats.RssHuge = 0
stats.MemoryStats.Stats.HierarchicalMemoryLimit = 0
stats.MemoryStats.Stats.TotalPgfault = 0
stats.MemoryStats.Stats.TotalActiveFile = 0
stats.MemoryStats.Stats.ActiveAnon = 0
stats.MemoryStats.Stats.TotalActiveAnon = 0
stats.MemoryStats.Stats.TotalPgpgout = 0
stats.MemoryStats.Stats.TotalCache = 0
stats.MemoryStats.Stats.InactiveAnon = 0
stats.MemoryStats.Stats.ActiveFile = 1
stats.MemoryStats.Stats.Pgfault = 2
stats.MemoryStats.Stats.InactiveFile = 3
stats.MemoryStats.Stats.TotalPgpgin = 4
stats.MemoryStats.Stats = make(map[string]uint64)
stats.MemoryStats.Stats["total_pgmajfault"] = 0
stats.MemoryStats.Stats["cache"] = 0
stats.MemoryStats.Stats["mapped_file"] = 0
stats.MemoryStats.Stats["total_inactive_file"] = 0
stats.MemoryStats.Stats["pagpgout"] = 0
stats.MemoryStats.Stats["rss"] = 0
stats.MemoryStats.Stats["total_mapped_file"] = 0
stats.MemoryStats.Stats["writeback"] = 0
stats.MemoryStats.Stats["unevictable"] = 0
stats.MemoryStats.Stats["pgpgin"] = 0
stats.MemoryStats.Stats["total_unevictable"] = 0
stats.MemoryStats.Stats["pgmajfault"] = 0
stats.MemoryStats.Stats["total_rss"] = 44
stats.MemoryStats.Stats["total_rss_huge"] = 444
stats.MemoryStats.Stats["total_write_back"] = 55
stats.MemoryStats.Stats["total_inactive_anon"] = 0
stats.MemoryStats.Stats["rss_huge"] = 0
stats.MemoryStats.Stats["hierarchical_memory_limit"] = 0
stats.MemoryStats.Stats["total_pgfault"] = 0
stats.MemoryStats.Stats["total_active_file"] = 0
stats.MemoryStats.Stats["active_anon"] = 0
stats.MemoryStats.Stats["total_active_anon"] = 0
stats.MemoryStats.Stats["total_pgpgout"] = 0
stats.MemoryStats.Stats["total_cache"] = 0
stats.MemoryStats.Stats["inactive_anon"] = 0
stats.MemoryStats.Stats["active_file"] = 1
stats.MemoryStats.Stats["pgfault"] = 2
stats.MemoryStats.Stats["inactive_file"] = 3
stats.MemoryStats.Stats["total_pgpgin"] = 4
stats.MemoryStats.MaxUsage = 1001
stats.MemoryStats.Usage = 1111
stats.MemoryStats.Failcnt = 1
stats.MemoryStats.Limit = 2000
stats.Networks["eth0"] = docker.NetworkStats{
stats.Networks["eth0"] = types.NetworkStats{
RxDropped: 1,
RxBytes: 2,
RxErrors: 3,
@@ -176,23 +184,23 @@ func testStats() *docker.Stats {
TxBytes: 4,
}
sbr := docker.BlkioStatsEntry{
sbr := types.BlkioStatEntry{
Major: 6,
Minor: 0,
Op: "read",
Value: 100,
}
sr := docker.BlkioStatsEntry{
sr := types.BlkioStatEntry{
Major: 6,
Minor: 0,
Op: "write",
Value: 101,
}
stats.BlkioStats.IOServiceBytesRecursive = append(
stats.BlkioStats.IOServiceBytesRecursive, sbr)
stats.BlkioStats.IOServicedRecursive = append(
stats.BlkioStats.IOServicedRecursive, sr)
stats.BlkioStats.IoServiceBytesRecursive = append(
stats.BlkioStats.IoServiceBytesRecursive, sbr)
stats.BlkioStats.IoServicedRecursive = append(
stats.BlkioStats.IoServicedRecursive, sr)
return stats
}
@@ -200,35 +208,78 @@ func testStats() *docker.Stats {
type FakeDockerClient struct {
}
func (d FakeDockerClient) Info() (*docker.Env, error) {
env := docker.Env{"Containers=108", "OomKillDisable=false", "SystemTime=2016-02-24T00:55:09.15073105-05:00", "NEventsListener=0", "ID=5WQQ:TFWR:FDNG:OKQ3:37Y4:FJWG:QIKK:623T:R3ME:QTKB:A7F7:OLHD", "Debug=false", "LoggingDriver=json-file", "KernelVersion=4.3.0-1-amd64", "IndexServerAddress=https://index.docker.io/v1/", "MemTotal=3840757760", "Images=199", "CpuCfsQuota=true", "Name=absol", "SwapLimit=false", "IPv4Forwarding=true", "ExecutionDriver=native-0.2", "InitSha1=23a51f3c916d2b5a3bbb31caf301fd2d14edd518", "ExperimentalBuild=false", "CpuCfsPeriod=true", "RegistryConfig={\"IndexConfigs\":{\"docker.io\":{\"Mirrors\":null,\"Name\":\"docker.io\",\"Official\":true,\"Secure\":true}},\"InsecureRegistryCIDRs\":[\"127.0.0.0/8\"],\"Mirrors\":null}", "OperatingSystem=Linux Mint LMDE (containerized)", "BridgeNfIptables=true", "HttpsProxy=", "Labels=null", "MemoryLimit=false", "DriverStatus=[[\"Pool Name\",\"docker-8:1-1182287-pool\"],[\"Pool Blocksize\",\"65.54 kB\"],[\"Backing Filesystem\",\"extfs\"],[\"Data file\",\"/dev/loop0\"],[\"Metadata file\",\"/dev/loop1\"],[\"Data Space Used\",\"17.3 GB\"],[\"Data Space Total\",\"107.4 GB\"],[\"Data Space Available\",\"36.53 GB\"],[\"Metadata Space Used\",\"20.97 MB\"],[\"Metadata Space Total\",\"2.147 GB\"],[\"Metadata Space Available\",\"2.127 GB\"],[\"Udev Sync Supported\",\"true\"],[\"Deferred Removal Enabled\",\"false\"],[\"Data loop file\",\"/var/lib/docker/devicemapper/devicemapper/data\"],[\"Metadata loop file\",\"/var/lib/docker/devicemapper/devicemapper/metadata\"],[\"Library Version\",\"1.02.115 (2016-01-25)\"]]", "NFd=19", "HttpProxy=", "Driver=devicemapper", "NGoroutines=39", "InitPath=/usr/lib/docker.io/dockerinit", "NCPU=4", "DockerRootDir=/var/lib/docker", "NoProxy=", "BridgeNfIp6tables=true"}
return &env, nil
func (d FakeDockerClient) Info(ctx context.Context) (types.Info, error) {
env := types.Info{
Containers: 108,
OomKillDisable: false,
SystemTime: "2016-02-24T00:55:09.15073105-05:00",
NEventsListener: 0,
ID: "5WQQ:TFWR:FDNG:OKQ3:37Y4:FJWG:QIKK:623T:R3ME:QTKB:A7F7:OLHD",
Debug: false,
LoggingDriver: "json-file",
KernelVersion: "4.3.0-1-amd64",
IndexServerAddress: "https://index.docker.io/v1/",
MemTotal: 3840757760,
Images: 199,
CPUCfsQuota: true,
Name: "absol",
SwapLimit: false,
IPv4Forwarding: true,
ExecutionDriver: "native-0.2",
ExperimentalBuild: false,
CPUCfsPeriod: true,
RegistryConfig: &registry.ServiceConfig{
IndexConfigs: map[string]*registry.IndexInfo{
"docker.io": {
Name: "docker.io",
Mirrors: []string{},
Official: true,
Secure: true,
},
}, InsecureRegistryCIDRs: []*registry.NetIPNet{{IP: []byte{127, 0, 0, 0}, Mask: []byte{255, 0, 0, 0}}}, Mirrors: []string{}},
OperatingSystem: "Linux Mint LMDE (containerized)",
BridgeNfIptables: true,
HTTPSProxy: "",
Labels: []string{},
MemoryLimit: false,
DriverStatus: [][2]string{{"Pool Name", "docker-8:1-1182287-pool"}, {"Pool Blocksize", "65.54 kB"}, {"Backing Filesystem", "extfs"}, {"Data file", "/dev/loop0"}, {"Metadata file", "/dev/loop1"}, {"Data Space Used", "17.3 GB"}, {"Data Space Total", "107.4 GB"}, {"Data Space Available", "36.53 GB"}, {"Metadata Space Used", "20.97 MB"}, {"Metadata Space Total", "2.147 GB"}, {"Metadata Space Available", "2.127 GB"}, {"Udev Sync Supported", "true"}, {"Deferred Removal Enabled", "false"}, {"Data loop file", "/var/lib/docker/devicemapper/devicemapper/data"}, {"Metadata loop file", "/var/lib/docker/devicemapper/devicemapper/metadata"}, {"Library Version", "1.02.115 (2016-01-25)"}},
NFd: 19,
HTTPProxy: "",
Driver: "devicemapper",
NGoroutines: 39,
NCPU: 4,
DockerRootDir: "/var/lib/docker",
NoProxy: "",
BridgeNfIP6tables: true,
}
return env, nil
}
func (d FakeDockerClient) ListContainers(opts docker.ListContainersOptions) ([]docker.APIContainers, error) {
container1 := docker.APIContainers{
func (d FakeDockerClient) ContainerList(octx context.Context, options types.ContainerListOptions) ([]types.Container, error) {
container1 := types.Container{
ID: "e2173b9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296b7dfb",
Names: []string{"/etcd"},
Image: "quay.io/coreos/etcd:v2.2.2",
Command: "/etcd -name etcd0 -advertise-client-urls http://localhost:2379 -listen-client-urls http://0.0.0.0:2379",
Created: 1455941930,
Status: "Up 4 hours",
Ports: []docker.APIPort{
docker.APIPort{
Ports: []types.Port{
types.Port{
PrivatePort: 7001,
PublicPort: 0,
Type: "tcp",
},
docker.APIPort{
types.Port{
PrivatePort: 4001,
PublicPort: 0,
Type: "tcp",
},
docker.APIPort{
types.Port{
PrivatePort: 2380,
PublicPort: 0,
Type: "tcp",
},
docker.APIPort{
types.Port{
PrivatePort: 2379,
PublicPort: 2379,
Type: "tcp",
@@ -237,31 +288,31 @@ func (d FakeDockerClient) ListContainers(opts docker.ListContainersOptions) ([]d
},
SizeRw: 0,
SizeRootFs: 0,
Names: []string{"/etcd"},
}
container2 := docker.APIContainers{
container2 := types.Container{
ID: "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173",
Names: []string{"/etcd2"},
Image: "quay.io/coreos/etcd:v2.2.2",
Command: "/etcd -name etcd2 -advertise-client-urls http://localhost:2379 -listen-client-urls http://0.0.0.0:2379",
Created: 1455941933,
Status: "Up 4 hours",
Ports: []docker.APIPort{
docker.APIPort{
Ports: []types.Port{
types.Port{
PrivatePort: 7002,
PublicPort: 0,
Type: "tcp",
},
docker.APIPort{
types.Port{
PrivatePort: 4002,
PublicPort: 0,
Type: "tcp",
},
docker.APIPort{
types.Port{
PrivatePort: 2381,
PublicPort: 0,
Type: "tcp",
},
docker.APIPort{
types.Port{
PrivatePort: 2382,
PublicPort: 2382,
Type: "tcp",
@@ -270,21 +321,19 @@ func (d FakeDockerClient) ListContainers(opts docker.ListContainersOptions) ([]d
},
SizeRw: 0,
SizeRootFs: 0,
Names: []string{"/etcd2"},
}
containers := []docker.APIContainers{container1, container2}
containers := []types.Container{container1, container2}
return containers, nil
//#{e6a96c84ca91a5258b7cb752579fb68826b68b49ff957487695cd4d13c343b44 titilambert/snmpsim /bin/sh -c 'snmpsimd --agent-udpv4-endpoint=0.0.0.0:31161 --process-user=root --process-group=user' 1455724831 Up 4 hours [{31161 31161 udp 0.0.0.0}] 0 0 [/snmp] map[]}]2016/02/24 01:05:01 Gathered metrics, (3s interval), from 1 inputs in 1.233836656s
}
func (d FakeDockerClient) Stats(opts docker.StatsOptions) error {
func (d FakeDockerClient) ContainerStats(ctx context.Context, containerID string, stream bool) (io.ReadCloser, error) {
var stat io.ReadCloser
jsonStat := `{"read":"2016-02-24T11:42:27.472459608-05:00","memory_stats":{"stats":{},"limit":18935443456},"blkio_stats":{"io_service_bytes_recursive":[{"major":252,"minor":1,"op":"Read","value":753664},{"major":252,"minor":1,"op":"Write"},{"major":252,"minor":1,"op":"Sync"},{"major":252,"minor":1,"op":"Async","value":753664},{"major":252,"minor":1,"op":"Total","value":753664}],"io_serviced_recursive":[{"major":252,"minor":1,"op":"Read","value":26},{"major":252,"minor":1,"op":"Write"},{"major":252,"minor":1,"op":"Sync"},{"major":252,"minor":1,"op":"Async","value":26},{"major":252,"minor":1,"op":"Total","value":26}]},"cpu_stats":{"cpu_usage":{"percpu_usage":[17871,4959158,1646137,1231652,11829401,244656,369972,0],"usage_in_usermode":10000000,"total_usage":20298847},"system_cpu_usage":24052607520000000,"throttling_data":{}},"precpu_stats":{"cpu_usage":{"percpu_usage":[17871,4959158,1646137,1231652,11829401,244656,369972,0],"usage_in_usermode":10000000,"total_usage":20298847},"system_cpu_usage":24052599550000000,"throttling_data":{}}}`
var stat docker.Stats
json.Unmarshal([]byte(jsonStat), &stat)
opts.Stats <- &stat
return nil
stat = ioutil.NopCloser(strings.NewReader(jsonStat))
return stat, nil
}
func TestDockerGatherInfo(t *testing.T) {
@@ -299,12 +348,12 @@ func TestDockerGatherInfo(t *testing.T) {
acc.AssertContainsTaggedFields(t,
"docker",
map[string]interface{}{
"n_listener_events": int64(0),
"n_cpus": int64(4),
"n_used_file_descriptors": int64(19),
"n_containers": int64(108),
"n_images": int64(199),
"n_goroutines": int64(39),
"n_listener_events": int(0),
"n_cpus": int(4),
"n_used_file_descriptors": int(19),
"n_containers": int(108),
"n_images": int(199),
"n_goroutines": int(39),
},
map[string]string{},
)
@@ -321,19 +370,18 @@ func TestDockerGatherInfo(t *testing.T) {
},
)
acc.AssertContainsTaggedFields(t,
"docker_cpu",
"docker_container_cpu",
map[string]interface{}{
"usage_total": uint64(1231652),
},
map[string]string{
"cont_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173",
"cont_name": "etcd2",
"cont_image": "quay.io/coreos/etcd:v2.2.2",
"cpu": "cpu3",
"container_name": "etcd2",
"container_image": "quay.io/coreos/etcd:v2.2.2",
"cpu": "cpu3",
},
)
acc.AssertContainsTaggedFields(t,
"docker_mem",
"docker_container_mem",
map[string]interface{}{
"total_pgpgout": uint64(0),
"usage_percent": float64(0),
@@ -369,11 +417,11 @@ func TestDockerGatherInfo(t *testing.T) {
"pgfault": uint64(0),
"usage": uint64(0),
"limit": uint64(18935443456),
"container_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173",
},
map[string]string{
"cont_id": "b7dfbb9478a6ae55e237d4d74f8bbb753f0817192b5081334dc78476296e2173",
"cont_name": "etcd2",
"cont_image": "quay.io/coreos/etcd:v2.2.2",
"container_name": "etcd2",
"container_image": "quay.io/coreos/etcd:v2.2.2",
},
)

View File

@@ -10,20 +10,25 @@ domains. You can read Dovecot's documentation
```
# Read metrics about dovecot servers
[[inputs.dovecot]]
# Dovecot servers
# specify dovecot servers via an address:port list
# e.g.
# localhost:24242
#
# If no servers are specified, then localhost is used as the host.
## specify dovecot servers via an address:port list
## e.g.
## localhost:24242
##
## If no servers are specified, then localhost is used as the host.
servers = ["localhost:24242"]
# Only collect metrics for these domains, collect all if empty
domains = []
## Type is one of "user", "domain", "ip", or "global"
type = "global"
## Wildcard matches like "*.com". An empty string "" is same as "*"
## If type = "ip" filters should be <IP/network>
filters = [""]
```
### Tags:
server: hostname
type: query type
ip: ip addr
user: username
domain: domain name
@@ -33,7 +38,7 @@ domains. You can read Dovecot's documentation
last_update time.Time
num_logins int64
num_cmds int64
num_connected_sessions int64
num_connected_sessions int64 ## not in <user> type
user_cpu float32
sys_cpu float32
clock_time float64
@@ -57,11 +62,13 @@ domains. You can read Dovecot's documentation
### Example Output:
```
telegraf -config telegraf.cfg -input-filter dovecot -test
telegraf -config t.cfg -input-filter dovecot -test
* Plugin: dovecot, Collection 1
> dovecot,domain=xxxxx.it,server=dovecot--1.mail.sys clock_time=12105746411632.5,disk_input=115285225472i,disk_output=4885067755520i,invol_cs=169701886i,last_update="2016-02-09 08:49:47.000014113 +0100 CET",mail_cache_hits=441828i,mail_lookup_attr=0i,mail_lookup_path=25323i,mail_read_bytes=241188145i,mail_read_count=11719i,maj_faults=3168i,min_faults=321438988i,num_cmds=51635i,num_connected_sessions=2i,num_logins=17149i,read_bytes=7939026951110i,read_count=3716991752i,reset_timestamp="2016-01-28 09:34:36 +0100 CET",sys_cpu=222595.288,user_cpu=267468.08,vol_cs=3288715920i,write_bytes=4483648967059i,write_count=1640646952i 1455004219924838345
> dovecot,domain=yyyyy.com,server=dovecot-1.mail.sys clock_time=6650794455331782,disk_input=61957695569920i,disk_output=2638244004487168i,invol_cs=2004805041i,last_update="2016-02-09 08:49:49.000251296 +0100 CET",mail_cache_hits=2499112513i,mail_lookup_attr=506730i,mail_lookup_path=39128227i,mail_read_bytes=1076496874501i,mail_read_count=32615262i,maj_faults=1643304i,min_faults=4216116325i,num_cmds=85785559i,num_connected_sessions=1177i,num_logins=11658255i,read_bytes=4289150974554145i,read_count=1112000703i,reset_timestamp="2016-01-28 09:31:26 +0100 CET",sys_cpu=121125923.032,user_cpu=145561336.428,vol_cs=205451885i,write_bytes=2420130526835796i,write_count=2991367252i 1455004219925152529
> dovecot,domain=xxxxx.it,server=dovecot-2.mail.sys clock_time=10710826586999.143,disk_input=79792410624i,disk_output=4496066158592i,invol_cs=150426876i,last_update="2016-02-09 08:48:19.000209134 +0100 CET",mail_cache_hits=5480869i,mail_lookup_attr=0i,mail_lookup_path=122563i,mail_read_bytes=340746273i,mail_read_count=44275i,maj_faults=1722i,min_faults=288071875i,num_cmds=50098i,num_connected_sessions=0i,num_logins=16389i,read_bytes=7259551999517i,read_count=3396625369i,reset_timestamp="2016-01-28 09:31:29 +0100 CET",sys_cpu=200762.792,user_cpu=242477.664,vol_cs=2996657358i,write_bytes=4133381575263i,write_count=1497242759i 1455004219924888283
> dovecot,domain=yyyyy.com,server=dovecot-2.mail.sys clock_time=6522131245483702,disk_input=48259150004224i,disk_output=2754333359087616i,invol_cs=2294595260i,last_update="2016-02-09 08:49:49.000251919 +0100 CET",mail_cache_hits=2139113611i,mail_lookup_attr=520276i,mail_lookup_path=37940318i,mail_read_bytes=1088002215022i,mail_read_count=31350271i,maj_faults=994420i,min_faults=1486260543i,num_cmds=40414997i,num_connected_sessions=978i,num_logins=11259672i,read_bytes=4445546612487315i,read_count=1763534543i,reset_timestamp="2016-01-28 09:31:24 +0100 CET",sys_cpu=123655962.668,user_cpu=149259327.032,vol_cs=4215130546i,write_bytes=2531186030222761i,write_count=2186579650i 1455004219925398372
```
> dovecot,ip=192.168.0.1,server=dovecot-1.domain.test,type=ip clock_time=0,disk_input=0i,disk_output=0i,invol_cs=0i,last_update="2016-04-08 10:59:47.000208479 +0200 CEST",mail_cache_hits=0i,mail_lookup_attr=0i,mail_lookup_path=0i,mail_read_bytes=0i,mail_read_count=0i,maj_faults=0i,min_faults=0i,num_cmds=12i,num_connected_sessions=0i,num_logins=6i,read_bytes=0i,read_count=0i,reset_timestamp="2016-04-08 10:33:34 +0200 CEST",sys_cpu=0,user_cpu=0,vol_cs=0i,write_bytes=0i,write_count=0i 1460106251633824223
* Plugin: dovecot, Collection 1
> dovecot,server=dovecot-1.domain.test,type=user,user=user-1@domain.test clock_time=0.00006,disk_input=405504i,disk_output=77824i,invol_cs=67i,last_update="2016-04-08 11:02:55.000111634 +0200 CEST",mail_cache_hits=26i,mail_lookup_attr=0i,mail_lookup_path=6i,mail_read_bytes=86233i,mail_read_count=5i,maj_faults=0i,min_faults=975i,num_cmds=41i,num_logins=3i,read_bytes=368833i,read_count=394i,reset_timestamp="2016-04-08 11:01:32 +0200 CEST",sys_cpu=0.008,user_cpu=0.004,vol_cs=323i,write_bytes=105086i,write_count=176i 1460106256637049167
* Plugin: dovecot, Collection 1
> dovecot,domain=domain.test,server=dovecot-1.domain.test,type=domain clock_time=100896189179847.7,disk_input=6467588263936i,disk_output=17933680439296i,invol_cs=1194808498i,last_update="2016-04-08 11:04:08.000377367 +0200 CEST",mail_cache_hits=46455781i,mail_lookup_attr=0i,mail_lookup_path=571490i,mail_read_bytes=79287033067i,mail_read_count=491243i,maj_faults=16992i,min_faults=1278442541i,num_cmds=606005i,num_connected_sessions=6597i,num_logins=166381i,read_bytes=30231409780721i,read_count=1624912080i,reset_timestamp="2016-04-08 10:28:45 +0200 CEST",sys_cpu=156440.372,user_cpu=216676.476,vol_cs=2749291157i,write_bytes=17097106707594i,write_count=944448998i 1460106261639672622
* Plugin: dovecot, Collection 1
> dovecot,server=dovecot-1.domain.test,type=global clock_time=101196971074203.94,disk_input=6493168218112i,disk_output=17978638815232i,invol_cs=1198855447i,last_update="2016-04-08 11:04:13.000379245 +0200 CEST",mail_cache_hits=68192209i,mail_lookup_attr=0i,mail_lookup_path=653861i,mail_read_bytes=86705151847i,mail_read_count=566125i,maj_faults=17208i,min_faults=1286179702i,num_cmds=917469i,num_connected_sessions=8896i,num_logins=174827i,read_bytes=30327690466186i,read_count=1772396430i,reset_timestamp="2016-04-08 10:28:45 +0200 CEST",sys_cpu=157965.692,user_cpu=219337.48,vol_cs=2827615787i,write_bytes=17150837661940i,write_count=992653220i 1460106266642153907
```

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"fmt"
"io"
// "log"
"net"
"strconv"
"strings"
@@ -15,8 +16,9 @@ import (
)
type Dovecot struct {
Type string
Filters []string
Servers []string
Domains []string
}
func (d *Dovecot) Description() string {
@@ -30,12 +32,19 @@ var sampleConfig = `
##
## If no servers are specified, then localhost is used as the host.
servers = ["localhost:24242"]
## Only collect metrics for these domains, collect all if empty
domains = []
## Type is one of "user", "domain", "ip", or "global"
type = "global"
## Wildcard matches like "*.com". An empty string "" is same as "*"
## If type = "ip" filters should be <IP/network>
filters = [""]
`
var defaultTimeout = time.Second * time.Duration(5)
var validQuery = map[string]bool{
"user": true, "domain": true, "global": true, "ip": true,
}
func (d *Dovecot) SampleConfig() string { return sampleConfig }
const defaultPort = "24242"
@@ -43,6 +52,11 @@ const defaultPort = "24242"
// Reads stats from all configured servers.
func (d *Dovecot) Gather(acc telegraf.Accumulator) error {
if !validQuery[d.Type] {
return fmt.Errorf("Error: %s is not a valid query type\n",
d.Type)
}
if len(d.Servers) == 0 {
d.Servers = append(d.Servers, "127.0.0.1:24242")
}
@@ -51,18 +65,18 @@ func (d *Dovecot) Gather(acc telegraf.Accumulator) error {
var outerr error
var domains = make(map[string]bool)
for _, dom := range d.Domains {
domains[dom] = true
if len(d.Filters) <= 0 {
d.Filters = append(d.Filters, "")
}
for _, serv := range d.Servers {
wg.Add(1)
go func(serv string) {
defer wg.Done()
outerr = d.gatherServer(serv, acc, domains)
}(serv)
for _, filter := range d.Filters {
wg.Add(1)
go func(serv string, filter string) {
defer wg.Done()
outerr = d.gatherServer(serv, acc, d.Type, filter)
}(serv, filter)
}
}
wg.Wait()
@@ -70,7 +84,8 @@ func (d *Dovecot) Gather(acc telegraf.Accumulator) error {
return outerr
}
func (d *Dovecot) gatherServer(addr string, acc telegraf.Accumulator, doms map[string]bool) error {
func (d *Dovecot) gatherServer(addr string, acc telegraf.Accumulator, qtype string, filter string) error {
_, _, err := net.SplitHostPort(addr)
if err != nil {
return fmt.Errorf("Error: %s on url %s\n", err, addr)
@@ -85,17 +100,22 @@ func (d *Dovecot) gatherServer(addr string, acc telegraf.Accumulator, doms map[s
// Extend connection
c.SetDeadline(time.Now().Add(defaultTimeout))
c.Write([]byte("EXPORT\tdomain\n\n"))
msg := fmt.Sprintf("EXPORT\t%s", qtype)
if len(filter) > 0 {
msg += fmt.Sprintf("\t%s=%s", qtype, filter)
}
msg += "\n"
c.Write([]byte(msg))
var buf bytes.Buffer
io.Copy(&buf, c)
// buf := bufio.NewReader(c)
host, _, _ := net.SplitHostPort(addr)
return gatherStats(&buf, acc, doms, host)
return gatherStats(&buf, acc, host, qtype)
}
func gatherStats(buf *bytes.Buffer, acc telegraf.Accumulator, doms map[string]bool, host string) error {
func gatherStats(buf *bytes.Buffer, acc telegraf.Accumulator, host string, qtype string) error {
lines := strings.Split(buf.String(), "\n")
head := strings.Split(lines[0], "\t")
@@ -106,16 +126,18 @@ func gatherStats(buf *bytes.Buffer, acc telegraf.Accumulator, doms map[string]bo
continue
}
val := strings.Split(vals[i], "\t")
fields := make(map[string]interface{})
if len(doms) > 0 && !doms[val[0]] {
continue
tags := map[string]string{"server": host, "type": qtype}
if qtype != "global" {
tags[qtype] = val[0]
}
tags := map[string]string{"server": host, "domain": val[0]}
for n := range val {
switch head[n] {
case "domain":
case qtype:
continue
// fields[head[n]] = val[n]
case "user_cpu", "sys_cpu", "clock_time":
fields[head[n]] = secParser(val[n])
case "reset_timestamp", "last_update":

View File

@@ -15,17 +15,6 @@ func TestDovecot(t *testing.T) {
t.Skip("Skipping integration test in short mode")
}
var acc testutil.Accumulator
tags := map[string]string{"server": "dovecot.test", "domain": "domain.test"}
buf := bytes.NewBufferString(sampleStats)
var doms = map[string]bool{
"domain.test": true,
}
err := gatherStats(buf, &acc, doms, "dovecot.test")
require.NoError(t, err)
fields := map[string]interface{}{
"reset_timestamp": time.Unix(1453969886, 0),
"last_update": time.Unix(1454603963, 39864),
@@ -52,10 +41,79 @@ func TestDovecot(t *testing.T) {
"mail_cache_hits": int64(1557255080),
}
var acc testutil.Accumulator
// Test type=global
tags := map[string]string{"server": "dovecot.test", "type": "global"}
buf := bytes.NewBufferString(sampleGlobal)
err := gatherStats(buf, &acc, "dovecot.test", "global")
require.NoError(t, err)
acc.AssertContainsTaggedFields(t, "dovecot", fields, tags)
// Test type=domain
tags = map[string]string{"server": "dovecot.test", "type": "domain", "domain": "domain.test"}
buf = bytes.NewBufferString(sampleDomain)
err = gatherStats(buf, &acc, "dovecot.test", "domain")
require.NoError(t, err)
acc.AssertContainsTaggedFields(t, "dovecot", fields, tags)
// Test type=ip
tags = map[string]string{"server": "dovecot.test", "type": "ip", "ip": "192.168.0.100"}
buf = bytes.NewBufferString(sampleIp)
err = gatherStats(buf, &acc, "dovecot.test", "ip")
require.NoError(t, err)
acc.AssertContainsTaggedFields(t, "dovecot", fields, tags)
// Test type=user
fields = map[string]interface{}{
"reset_timestamp": time.Unix(1453969886, 0),
"last_update": time.Unix(1454603963, 39864),
"num_logins": int64(7503897),
"num_cmds": int64(52595715),
"user_cpu": 1.00831175372e+08,
"sys_cpu": 8.3849071112e+07,
"clock_time": 4.3260019315281835e+15,
"min_faults": int64(763950011),
"maj_faults": int64(1112443),
"vol_cs": int64(4120386897),
"invol_cs": int64(3685239306),
"disk_input": int64(41679480946688),
"disk_output": int64(1819070669176832),
"read_count": int64(2368906465),
"read_bytes": int64(2957928122981169),
"write_count": int64(3545389615),
"write_bytes": int64(1666822498251286),
"mail_lookup_path": int64(24396105),
"mail_lookup_attr": int64(302845),
"mail_read_count": int64(20155768),
"mail_read_bytes": int64(669946617705),
"mail_cache_hits": int64(1557255080),
}
tags = map[string]string{"server": "dovecot.test", "type": "user", "user": "user.1@domain.test"}
buf = bytes.NewBufferString(sampleUser)
err = gatherStats(buf, &acc, "dovecot.test", "user")
require.NoError(t, err)
acc.AssertContainsTaggedFields(t, "dovecot", fields, tags)
}
const sampleStats = `domain reset_timestamp last_update num_logins num_cmds num_connected_sessions user_cpu sys_cpu clock_time min_faults maj_faults vol_cs invol_cs disk_input disk_output read_count read_bytes write_count write_bytes mail_lookup_path mail_lookup_attr mail_read_count mail_read_bytes mail_cache_hits
domain.bad 1453970076 1454603947.383029 10749 33828 0 177988.524000 148071.772000 7531838964717.193706 212491179 2125 2190386067 112779200 74487934976 3221808119808 2469948401 5237602841760 1091171292 2951966459802 15363 0 2922 136403379 334372
const sampleGlobal = `reset_timestamp last_update num_logins num_cmds num_connected_sessions user_cpu sys_cpu clock_time min_faults maj_faults vol_cs invol_cs disk_input disk_output read_count read_bytes write_count write_bytes mail_lookup_path mail_lookup_attr mail_read_count mail_read_bytes mail_cache_hits
1453969886 1454603963.039864 7503897 52595715 1204 100831175.372000 83849071.112000 4326001931528183.495762 763950011 1112443 4120386897 3685239306 41679480946688 1819070669176832 2368906465 2957928122981169 3545389615 1666822498251286 24396105 302845 20155768 669946617705 1557255080`
const sampleDomain = `domain reset_timestamp last_update num_logins num_cmds num_connected_sessions user_cpu sys_cpu clock_time min_faults maj_faults vol_cs invol_cs disk_input disk_output read_count read_bytes write_count write_bytes mail_lookup_path mail_lookup_attr mail_read_count mail_read_bytes mail_cache_hits
domain.test 1453969886 1454603963.039864 7503897 52595715 1204 100831175.372000 83849071.112000 4326001931528183.495762 763950011 1112443 4120386897 3685239306 41679480946688 1819070669176832 2368906465 2957928122981169 3545389615 1666822498251286 24396105 302845 20155768 669946617705 1557255080`
const sampleIp = `ip reset_timestamp last_update num_logins num_cmds num_connected_sessions user_cpu sys_cpu clock_time min_faults maj_faults vol_cs invol_cs disk_input disk_output read_count read_bytes write_count write_bytes mail_lookup_path mail_lookup_attr mail_read_count mail_read_bytes mail_cache_hits
192.168.0.100 1453969886 1454603963.039864 7503897 52595715 1204 100831175.372000 83849071.112000 4326001931528183.495762 763950011 1112443 4120386897 3685239306 41679480946688 1819070669176832 2368906465 2957928122981169 3545389615 1666822498251286 24396105 302845 20155768 669946617705 1557255080`
const sampleUser = `user reset_timestamp last_update num_logins num_cmds user_cpu sys_cpu clock_time min_faults maj_faults vol_cs invol_cs disk_input disk_output read_count read_bytes write_count write_bytes mail_lookup_path mail_lookup_attr mail_read_count mail_read_bytes mail_cache_hits
user.1@domain.test 1453969886 1454603963.039864 7503897 52595715 100831175.372000 83849071.112000 4326001931528183.495762 763950011 1112443 4120386897 3685239306 41679480946688 1819070669176832 2368906465 2957928122981169 3545389615 1666822498251286 24396105 302845 20155768 669946617705 1557255080`

View File

@@ -2,18 +2,11 @@
Please also see: [Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md)
The exec input plugin can execute arbitrary commands which output:
* JSON [javascript object notation](http://www.json.org/)
* InfluxDB [line-protocol](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/)
* Graphite [graphite-protocol](http://graphite.readthedocs.org/en/latest/feeding-carbon.html)
### Example 1 - JSON
#### Configuration
In this example a script called ```/tmp/test.sh``` and a script called ```/tmp/test2.sh```
In this example a script called ```/tmp/test.sh``` and a script called ```/tmp/test2.sh```
are configured for ```[[inputs.exec]]``` in JSON format.
```
@@ -22,7 +15,7 @@ are configured for ```[[inputs.exec]]``` in JSON format.
# Shell/commands array
commands = ["/tmp/test.sh", "/tmp/test2.sh"]
# Data format to consume. This can be "json", "influx" or "graphite" (line-protocol)
# Data format to consume.
# NOTE json only reads numerical measurements, strings and booleans are ignored.
data_format = "json"
@@ -81,7 +74,7 @@ and strings will be ignored.
### Example 2 - Influx Line-Protocol
In this example an application called ```/usr/bin/line_protocol_collector```
and a script called ```/tmp/test2.sh``` are configured for ```[[inputs.exec]]```
and a script called ```/tmp/test2.sh``` are configured for ```[[inputs.exec]]```
in influx line-protocol format.
#### Configuration
@@ -94,7 +87,7 @@ in influx line-protocol format.
# command = "/usr/bin/line_protocol_collector"
commands = ["/usr/bin/line_protocol_collector","/tmp/test2.sh"]
# Data format to consume. This can be "json" or "influx" (line-protocol)
# Data format to consume.
# NOTE json only reads numerical measurements, strings and booleans are ignored.
data_format = "influx"
```
@@ -113,7 +106,7 @@ cpu,cpu=cpu6,host=foo,datacenter=us-east usage_idle=99,usage_busy=1
You will get data in InfluxDB exactly as it is defined above,
tags are cpu=cpuN, host=foo, and datacenter=us-east with fields usage_idle
and usage_busy. They will receive a timestamp at collection time.
and usage_busy. They will receive a timestamp at collection time.
Each line must end in \n, just as the Influx line protocol does.
@@ -121,8 +114,8 @@ Each line must end in \n, just as the Influx line protocol does.
We can also change the data_format to "graphite" to use the metrics collecting scripts such as (compatible with graphite):
* Nagios [Mertics Plugins] (https://exchange.nagios.org/directory/Plugins)
* Sensu [Mertics Plugins] (https://github.com/sensu-plugins)
* Nagios [Metrics Plugins](https://exchange.nagios.org/directory/Plugins)
* Sensu [Metrics Plugins](https://github.com/sensu-plugins)
In this example a script called /tmp/test.sh and a script called /tmp/test2.sh are configured for [[inputs.exec]] in graphite format.
@@ -133,7 +126,7 @@ In this example a script called /tmp/test.sh and a script called /tmp/test2.sh a
# Shell/commands array
commands = ["/tmp/test.sh","/tmp/test2.sh"]
# Data format to consume. This can be "json", "influx" or "graphite" (line-protocol)
# Data format to consume.
# NOTE json only reads numerical measurements, strings and booleans are ignored.
data_format = "graphite"
@@ -186,5 +179,5 @@ sensu.metric.net.server0.eth0.rx_dropped 0 1444234982
The templates configuration will be used to parse the graphite metrics to support influxdb/opentsdb tagging store engines.
More detail information about templates, please refer to [The graphite Input] (https://github.com/influxdata/influxdb/blob/master/services/graphite/README.md)
More detail information about templates, please refer to [The graphite Input](https://github.com/influxdata/influxdb/blob/master/services/graphite/README.md)

View File

@@ -6,10 +6,12 @@ import (
"os/exec"
"sync"
"syscall"
"time"
"github.com/gonuts/go-shellquote"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/plugins/parsers/nagios"
@@ -19,6 +21,9 @@ const sampleConfig = `
## Commands array
commands = ["/tmp/test.sh", "/usr/bin/mycollector --foo=bar"]
## Timeout for each command to complete.
timeout = "5s"
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
@@ -32,6 +37,7 @@ const sampleConfig = `
type Exec struct {
Commands []string
Command string
Timeout internal.Duration
parser parsers.Parser
@@ -43,7 +49,8 @@ type Exec struct {
func NewExec() *Exec {
return &Exec{
runner: CommandRunner{},
runner: CommandRunner{},
Timeout: internal.Duration{Duration: time.Second * 5},
}
}
@@ -73,7 +80,11 @@ func AddNagiosState(exitCode error, acc telegraf.Accumulator) error {
return nil
}
func (c CommandRunner) Run(e *Exec, command string, acc telegraf.Accumulator) ([]byte, error) {
func (c CommandRunner) Run(
e *Exec,
command string,
acc telegraf.Accumulator,
) ([]byte, error) {
split_cmd, err := shellquote.Split(command)
if err != nil || len(split_cmd) == 0 {
return nil, fmt.Errorf("exec: unable to parse command, %s", err)
@@ -84,7 +95,7 @@ func (c CommandRunner) Run(e *Exec, command string, acc telegraf.Accumulator) ([
var out bytes.Buffer
cmd.Stdout = &out
if err := cmd.Run(); err != nil {
if err := internal.RunTimeout(cmd, e.Timeout.Duration); err != nil {
switch e.parser.(type) {
case *nagios.NagiosParser:
AddNagiosState(err, acc)

View File

@@ -0,0 +1,37 @@
# filestat Input Plugin
The filestat plugin gathers metrics about file existence, size, and other stats.
### Configuration:
```toml
# Read stats about given file(s)
[[inputs.filestat]]
## Files to gather stats about.
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". See https://github.com/gobwas/glob.
files = ["/etc/telegraf/telegraf.conf", "/var/log/**.log"]
## If true, read the entire file and calculate an md5 checksum.
md5 = false
```
### Measurements & Fields:
- filestat
- exists (int, 0 | 1)
- size_bytes (int, bytes)
- md5 (optional, string)
### Tags:
- All measurements have the following tags:
- file (the path the to file, as specified in the config)
### Example Output:
```
$ telegraf -config /etc/telegraf/telegraf.conf -input-filter filestat -test
* Plugin: filestat, Collection 1
> filestat,file=/tmp/foo/bar,host=tyrion exists=0i 1461203374493128216
> filestat,file=/Users/sparrc/ws/telegraf.conf,host=tyrion exists=1i,size=47894i 1461203374493199335
```

View File

@@ -0,0 +1,125 @@
package filestat
import (
"crypto/md5"
"fmt"
"io"
"os"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/globpath"
"github.com/influxdata/telegraf/plugins/inputs"
)
const sampleConfig = `
## Files to gather stats about.
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". ie:
## "/var/log/**.log" -> recursively find all .log files in /var/log
## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
## "/var/log/apache.log" -> just tail the apache log file
##
## See https://github.com/gobwas/glob for more examples
##
files = ["/var/log/**.log"]
## If true, read the entire file and calculate an md5 checksum.
md5 = false
`
type FileStat struct {
Md5 bool
Files []string
// maps full file paths to globmatch obj
globs map[string]*globpath.GlobPath
}
func NewFileStat() *FileStat {
return &FileStat{
globs: make(map[string]*globpath.GlobPath),
}
}
func (_ *FileStat) Description() string {
return "Read stats about given file(s)"
}
func (_ *FileStat) SampleConfig() string { return sampleConfig }
func (f *FileStat) Gather(acc telegraf.Accumulator) error {
var errS string
var err error
for _, filepath := range f.Files {
// Get the compiled glob object for this filepath
g, ok := f.globs[filepath]
if !ok {
if g, err = globpath.Compile(filepath); err != nil {
errS += err.Error() + " "
continue
}
f.globs[filepath] = g
}
files := g.Match()
if len(files) == 0 {
acc.AddFields("filestat",
map[string]interface{}{
"exists": int64(0),
},
map[string]string{
"file": filepath,
})
continue
}
for fileName, fileInfo := range files {
tags := map[string]string{
"file": fileName,
}
fields := map[string]interface{}{
"exists": int64(1),
"size_bytes": fileInfo.Size(),
}
if f.Md5 {
md5, err := getMd5(fileName)
if err != nil {
errS += err.Error() + " "
} else {
fields["md5_sum"] = md5
}
}
acc.AddFields("filestat", fields, tags)
}
}
if errS != "" {
return fmt.Errorf(errS)
}
return nil
}
// Read given file and calculate an md5 hash.
func getMd5(file string) (string, error) {
of, err := os.Open(file)
if err != nil {
return "", err
}
defer of.Close()
hash := md5.New()
_, err = io.Copy(hash, of)
if err != nil {
// fatal error
return "", err
}
return fmt.Sprintf("%x", hash.Sum(nil)), nil
}
func init() {
inputs.Add("filestat", func() telegraf.Input {
return NewFileStat()
})
}

View File

@@ -0,0 +1,180 @@
package filestat
import (
"runtime"
"strings"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
func TestGatherNoMd5(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Files = []string{
dir + "log1.log",
dir + "log2.log",
"/non/existant/file",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
tags3 := map[string]string{
"file": "/non/existant/file",
}
fields3 := map[string]interface{}{
"exists": int64(0),
}
acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3)
}
func TestGatherExplicitFiles(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Md5 = true
fs.Files = []string{
dir + "log1.log",
dir + "log2.log",
"/non/existant/file",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
tags3 := map[string]string{
"file": "/non/existant/file",
}
fields3 := map[string]interface{}{
"exists": int64(0),
}
acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3)
}
func TestGatherGlob(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Md5 = true
fs.Files = []string{
dir + "*.log",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
}
func TestGatherSuperAsterisk(t *testing.T) {
dir := getTestdataDir()
fs := NewFileStat()
fs.Md5 = true
fs.Files = []string{
dir + "**",
}
acc := testutil.Accumulator{}
fs.Gather(&acc)
tags1 := map[string]string{
"file": dir + "log1.log",
}
fields1 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields1, tags1)
tags2 := map[string]string{
"file": dir + "log2.log",
}
fields2 := map[string]interface{}{
"size_bytes": int64(0),
"exists": int64(1),
"md5_sum": "d41d8cd98f00b204e9800998ecf8427e",
}
acc.AssertContainsTaggedFields(t, "filestat", fields2, tags2)
tags3 := map[string]string{
"file": dir + "test.conf",
}
fields3 := map[string]interface{}{
"size_bytes": int64(104),
"exists": int64(1),
"md5_sum": "5a7e9b77fa25e7bb411dbd17cf403c1f",
}
acc.AssertContainsTaggedFields(t, "filestat", fields3, tags3)
}
func TestGetMd5(t *testing.T) {
dir := getTestdataDir()
md5, err := getMd5(dir + "test.conf")
assert.NoError(t, err)
assert.Equal(t, "5a7e9b77fa25e7bb411dbd17cf403c1f", md5)
md5, err = getMd5("/tmp/foo/bar/fooooo")
assert.Error(t, err)
}
func getTestdataDir() string {
_, filename, _, _ := runtime.Caller(1)
return strings.Replace(filename, "filestat_test.go", "testdata/", 1)
}

View File

View File

View File

@@ -0,0 +1,5 @@
# this is a fake testing config file
# for testing the filestat plugin
option1 = "foo"
option2 = "bar"

View File

@@ -6,9 +6,11 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"io"
"net"
"net/http"
"net/url"
"strconv"
"strings"
"sync"
"time"
)
@@ -47,7 +49,7 @@ const (
HF_THROTTLE = 29 //29. throttle [...S]: current throttle percentage for the server, when slowstart is active, or no value if not in slowstart.
HF_LBTOT = 30 //30. lbtot [..BS]: total number of times a server was selected, either for new sessions, or when re-dispatching. The server counter is the number of times that server was selected.
HF_TRACKED = 31 //31. tracked [...S]: id of proxy/server if tracking is enabled.
HF_TYPE = 32 //32. type [LFBS]: (0 = frontend, 1 = backend, 2 = server, 3 = socket/listener)
HF_TYPE = 32 //32. type [LFBS]: (0 = frontend, 1 = backend, 2 = server, 3 = socket/listener)
HF_RATE = 33 //33. rate [.FBS]: number of sessions per second over last elapsed second
HF_RATE_LIM = 34 //34. rate_lim [.F..]: configured limit on new sessions per second
HF_RATE_MAX = 35 //35. rate_max [.FBS]: max number of new sessions per second
@@ -91,8 +93,8 @@ var sampleConfig = `
## If no servers are specified, then default to 127.0.0.1:1936
servers = ["http://myhaproxy.com:1936", "http://anotherhaproxy.com:1936"]
## Or you can also use local socket(not work yet)
## servers = ["socket://run/haproxy/admin.sock"]
## Or you can also use local socket
## servers = ["socket:/run/haproxy/admin.sock"]
`
func (r *haproxy) SampleConfig() string {
@@ -127,7 +129,36 @@ func (g *haproxy) Gather(acc telegraf.Accumulator) error {
return outerr
}
func (g *haproxy) gatherServerSocket(addr string, acc telegraf.Accumulator) error {
var socketPath string
socketAddr := strings.Split(addr, ":")
if len(socketAddr) >= 2 {
socketPath = socketAddr[1]
} else {
socketPath = socketAddr[0]
}
c, err := net.Dial("unix", socketPath)
if err != nil {
return fmt.Errorf("Could not connect to socket '%s': %s", addr, err)
}
_, errw := c.Write([]byte("show stat\n"))
if errw != nil {
return fmt.Errorf("Could not write to socket '%s': %s", addr, errw)
}
return importCsvResult(c, acc, socketPath)
}
func (g *haproxy) gatherServer(addr string, acc telegraf.Accumulator) error {
if !strings.HasPrefix(addr, "http") {
return g.gatherServerSocket(addr, acc)
}
if g.client == nil {
tr := &http.Transport{ResponseHeaderTimeout: time.Duration(3 * time.Second)}
client := &http.Client{

View File

@@ -1,17 +1,42 @@
package haproxy
import (
"crypto/rand"
"encoding/binary"
"fmt"
"net"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"net/http"
"net/http/httptest"
)
type statServer struct{}
func (s statServer) serverSocket(l net.Listener) {
for {
conn, err := l.Accept()
if err != nil {
return
}
go func(c net.Conn) {
buf := make([]byte, 1024)
n, _ := c.Read(buf)
data := buf[:n]
if string(data) == "show stat\n" {
c.Write([]byte(csvOutputSample))
c.Close()
}
}(conn)
}
}
func TestHaproxyGeneratesMetricsWithAuthentication(t *testing.T) {
//We create a fake server to return test data
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -146,6 +171,69 @@ func TestHaproxyGeneratesMetricsWithoutAuthentication(t *testing.T) {
acc.AssertContainsTaggedFields(t, "haproxy", fields, tags)
}
func TestHaproxyGeneratesMetricsUsingSocket(t *testing.T) {
var randomNumber int64
binary.Read(rand.Reader, binary.LittleEndian, &randomNumber)
sock, err := net.Listen("unix", fmt.Sprintf("/tmp/test-haproxy%d.sock", randomNumber))
if err != nil {
t.Fatal("Cannot initialize socket ")
}
defer sock.Close()
s := statServer{}
go s.serverSocket(sock)
r := &haproxy{
Servers: []string{sock.Addr().String()},
}
var acc testutil.Accumulator
err = r.Gather(&acc)
require.NoError(t, err)
tags := map[string]string{
"proxy": "be_app",
"server": sock.Addr().String(),
"sv": "host0",
}
fields := map[string]interface{}{
"active_servers": uint64(1),
"backup_servers": uint64(0),
"bin": uint64(510913516),
"bout": uint64(2193856571),
"check_duration": uint64(10),
"cli_abort": uint64(73),
"ctime": uint64(2),
"downtime": uint64(0),
"dresp": uint64(0),
"econ": uint64(0),
"eresp": uint64(1),
"http_response.1xx": uint64(0),
"http_response.2xx": uint64(119534),
"http_response.3xx": uint64(48051),
"http_response.4xx": uint64(2345),
"http_response.5xx": uint64(1056),
"lbtot": uint64(171013),
"qcur": uint64(0),
"qmax": uint64(0),
"qtime": uint64(0),
"rate": uint64(3),
"rate_max": uint64(12),
"rtime": uint64(312),
"scur": uint64(1),
"smax": uint64(32),
"srv_abort": uint64(1),
"stot": uint64(171014),
"ttime": uint64(2341),
"wredis": uint64(0),
"wretr": uint64(1),
}
acc.AssertContainsTaggedFields(t, "haproxy", fields, tags)
}
//When not passing server config, we default to localhost
//We just want to make sure we did request stat from localhost
func TestHaproxyDefaultGetFromLocalhost(t *testing.T) {

View File

@@ -0,0 +1,44 @@
# Example Input Plugin
This input plugin will test HTTP/HTTPS connections.
### Configuration:
```
# List of UDP/TCP connections you want to check
[[inputs.http_response]]
## Server address (default http://localhost)
address = "http://github.com"
## Set response_timeout (default 5 seconds)
response_timeout = 5
## HTTP Request Method
method = "GET"
## HTTP Request Headers
[inputs.http_response.headers]
Host = github.com
## Whether to follow redirects from the server (defaults to false)
follow_redirects = true
## Optional HTTP Request Body
body = '''
{'fake':'data'}
'''
```
### Measurements & Fields:
- http_response
- response_time (float, seconds)
- http_response_code (int) #The code received
### Tags:
- All measurements have the following tags:
- server
- method
### Example Output:
```
$ ./telegraf -config telegraf.conf -input-filter http_response -test
http_response,method=GET,server=http://www.github.com http_response_code=200i,response_time=6.223266528 1459419354977857955
```

View File

@@ -0,0 +1,154 @@
package http_response
import (
"errors"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
// HTTPResponse struct
type HTTPResponse struct {
Address string
Body string
Method string
ResponseTimeout int
Headers map[string]string
FollowRedirects bool
}
// Description returns the plugin Description
func (h *HTTPResponse) Description() string {
return "HTTP/HTTPS request given an address a method and a timeout"
}
var sampleConfig = `
## Server address (default http://localhost)
address = "http://github.com"
## Set response_timeout (default 5 seconds)
response_timeout = 5
## HTTP Request Method
method = "GET"
## Whether to follow redirects from the server (defaults to false)
follow_redirects = true
## HTTP Request Headers (all values must be strings)
# [inputs.http_response.headers]
# Host = "github.com"
## Optional HTTP Request Body
# body = '''
# {'fake':'data'}
# '''
`
// SampleConfig returns the plugin SampleConfig
func (h *HTTPResponse) SampleConfig() string {
return sampleConfig
}
// ErrRedirectAttempted indicates that a redirect occurred
var ErrRedirectAttempted = errors.New("redirect")
// CreateHttpClient creates an http client which will timeout at the specified
// timeout period and can follow redirects if specified
func CreateHttpClient(followRedirects bool, ResponseTimeout time.Duration) *http.Client {
client := &http.Client{
Timeout: time.Second * ResponseTimeout,
}
if followRedirects == false {
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
return ErrRedirectAttempted
}
}
return client
}
// CreateHeaders takes a map of header strings and puts them
// into a http.Header Object
func CreateHeaders(headers map[string]string) http.Header {
httpHeaders := make(http.Header)
for key := range headers {
httpHeaders.Add(key, headers[key])
}
return httpHeaders
}
// HTTPGather gathers all fields and returns any errors it encounters
func (h *HTTPResponse) HTTPGather() (map[string]interface{}, error) {
// Prepare fields
fields := make(map[string]interface{})
client := CreateHttpClient(h.FollowRedirects, time.Duration(h.ResponseTimeout))
var body io.Reader
if h.Body != "" {
body = strings.NewReader(h.Body)
}
request, err := http.NewRequest(h.Method, h.Address, body)
if err != nil {
return nil, err
}
request.Header = CreateHeaders(h.Headers)
// Start Timer
start := time.Now()
resp, err := client.Do(request)
if err != nil {
if h.FollowRedirects {
return nil, err
}
if urlError, ok := err.(*url.Error); ok &&
urlError.Err == ErrRedirectAttempted {
err = nil
} else {
return nil, err
}
}
fields["response_time"] = time.Since(start).Seconds()
fields["http_response_code"] = resp.StatusCode
return fields, nil
}
// Gather gets all metric fields and tags and returns any errors it encounters
func (h *HTTPResponse) Gather(acc telegraf.Accumulator) error {
// Set default values
if h.ResponseTimeout < 1 {
h.ResponseTimeout = 5
}
// Check send and expected string
if h.Method == "" {
h.Method = "GET"
}
if h.Address == "" {
h.Address = "http://localhost"
}
addr, err := url.Parse(h.Address)
if err != nil {
return err
}
if addr.Scheme != "http" && addr.Scheme != "https" {
return errors.New("Only http and https are supported")
}
// Prepare data
tags := map[string]string{"server": h.Address, "method": h.Method}
var fields map[string]interface{}
// Gather data
fields, err = h.HTTPGather()
if err != nil {
return err
}
// Add metrics
acc.AddFields("http_response", fields, tags)
return nil
}
func init() {
inputs.Add("http_response", func() telegraf.Input {
return &HTTPResponse{}
})
}

View File

@@ -0,0 +1,241 @@
package http_response
import (
"fmt"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"io/ioutil"
"net/http"
"net/http/httptest"
"testing"
"time"
)
func TestCreateHeaders(t *testing.T) {
fakeHeaders := map[string]string{
"Accept": "text/plain",
"Content-Type": "application/json",
"Cache-Control": "no-cache",
}
headers := CreateHeaders(fakeHeaders)
testHeaders := make(http.Header)
testHeaders.Add("Accept", "text/plain")
testHeaders.Add("Content-Type", "application/json")
testHeaders.Add("Cache-Control", "no-cache")
assert.Equal(t, testHeaders, headers)
}
func setUpTestMux() http.Handler {
mux := http.NewServeMux()
mux.HandleFunc("/redirect", func(w http.ResponseWriter, req *http.Request) {
http.Redirect(w, req, "/good", http.StatusMovedPermanently)
})
mux.HandleFunc("/good", func(w http.ResponseWriter, req *http.Request) {
fmt.Fprintf(w, "hit the good page!")
})
mux.HandleFunc("/badredirect", func(w http.ResponseWriter, req *http.Request) {
http.Redirect(w, req, "/badredirect", http.StatusMovedPermanently)
})
mux.HandleFunc("/mustbepostmethod", func(w http.ResponseWriter, req *http.Request) {
if req.Method != "POST" {
http.Error(w, "method wasn't post", http.StatusMethodNotAllowed)
return
}
fmt.Fprintf(w, "used post correctly!")
})
mux.HandleFunc("/musthaveabody", func(w http.ResponseWriter, req *http.Request) {
body, err := ioutil.ReadAll(req.Body)
req.Body.Close()
if err != nil {
http.Error(w, "couldn't read request body", http.StatusBadRequest)
return
}
if string(body) == "" {
http.Error(w, "body was empty", http.StatusBadRequest)
return
}
fmt.Fprintf(w, "sent a body!")
})
mux.HandleFunc("/twosecondnap", func(w http.ResponseWriter, req *http.Request) {
time.Sleep(time.Second * 2)
return
})
return mux
}
func TestFields(t *testing.T) {
mux := setUpTestMux()
ts := httptest.NewServer(mux)
defer ts.Close()
h := &HTTPResponse{
Address: ts.URL + "/good",
Body: "{ 'test': 'data'}",
Method: "GET",
ResponseTimeout: 20,
Headers: map[string]string{
"Content-Type": "application/json",
},
FollowRedirects: true,
}
fields, err := h.HTTPGather()
require.NoError(t, err)
assert.NotEmpty(t, fields)
if assert.NotNil(t, fields["http_response_code"]) {
assert.Equal(t, http.StatusOK, fields["http_response_code"])
}
assert.NotNil(t, fields["response_time"])
}
func TestRedirects(t *testing.T) {
mux := setUpTestMux()
ts := httptest.NewServer(mux)
defer ts.Close()
h := &HTTPResponse{
Address: ts.URL + "/redirect",
Body: "{ 'test': 'data'}",
Method: "GET",
ResponseTimeout: 20,
Headers: map[string]string{
"Content-Type": "application/json",
},
FollowRedirects: true,
}
fields, err := h.HTTPGather()
require.NoError(t, err)
assert.NotEmpty(t, fields)
if assert.NotNil(t, fields["http_response_code"]) {
assert.Equal(t, http.StatusOK, fields["http_response_code"])
}
h = &HTTPResponse{
Address: ts.URL + "/badredirect",
Body: "{ 'test': 'data'}",
Method: "GET",
ResponseTimeout: 20,
Headers: map[string]string{
"Content-Type": "application/json",
},
FollowRedirects: true,
}
fields, err = h.HTTPGather()
require.Error(t, err)
}
func TestMethod(t *testing.T) {
mux := setUpTestMux()
ts := httptest.NewServer(mux)
defer ts.Close()
h := &HTTPResponse{
Address: ts.URL + "/mustbepostmethod",
Body: "{ 'test': 'data'}",
Method: "POST",
ResponseTimeout: 20,
Headers: map[string]string{
"Content-Type": "application/json",
},
FollowRedirects: true,
}
fields, err := h.HTTPGather()
require.NoError(t, err)
assert.NotEmpty(t, fields)
if assert.NotNil(t, fields["http_response_code"]) {
assert.Equal(t, http.StatusOK, fields["http_response_code"])
}
h = &HTTPResponse{
Address: ts.URL + "/mustbepostmethod",
Body: "{ 'test': 'data'}",
Method: "GET",
ResponseTimeout: 20,
Headers: map[string]string{
"Content-Type": "application/json",
},
FollowRedirects: true,
}
fields, err = h.HTTPGather()
require.NoError(t, err)
assert.NotEmpty(t, fields)
if assert.NotNil(t, fields["http_response_code"]) {
assert.Equal(t, http.StatusMethodNotAllowed, fields["http_response_code"])
}
//check that lowercase methods work correctly
h = &HTTPResponse{
Address: ts.URL + "/mustbepostmethod",
Body: "{ 'test': 'data'}",
Method: "head",
ResponseTimeout: 20,
Headers: map[string]string{
"Content-Type": "application/json",
},
FollowRedirects: true,
}
fields, err = h.HTTPGather()
require.NoError(t, err)
assert.NotEmpty(t, fields)
if assert.NotNil(t, fields["http_response_code"]) {
assert.Equal(t, http.StatusMethodNotAllowed, fields["http_response_code"])
}
}
func TestBody(t *testing.T) {
mux := setUpTestMux()
ts := httptest.NewServer(mux)
defer ts.Close()
h := &HTTPResponse{
Address: ts.URL + "/musthaveabody",
Body: "{ 'test': 'data'}",
Method: "GET",
ResponseTimeout: 20,
Headers: map[string]string{
"Content-Type": "application/json",
},
FollowRedirects: true,
}
fields, err := h.HTTPGather()
require.NoError(t, err)
assert.NotEmpty(t, fields)
if assert.NotNil(t, fields["http_response_code"]) {
assert.Equal(t, http.StatusOK, fields["http_response_code"])
}
h = &HTTPResponse{
Address: ts.URL + "/musthaveabody",
Method: "GET",
ResponseTimeout: 20,
Headers: map[string]string{
"Content-Type": "application/json",
},
FollowRedirects: true,
}
fields, err = h.HTTPGather()
require.NoError(t, err)
assert.NotEmpty(t, fields)
if assert.NotNil(t, fields["http_response_code"]) {
assert.Equal(t, http.StatusBadRequest, fields["http_response_code"])
}
}
func TestTimeout(t *testing.T) {
mux := setUpTestMux()
ts := httptest.NewServer(mux)
defer ts.Close()
h := &HTTPResponse{
Address: ts.URL + "/twosecondnap",
Body: "{ 'test': 'data'}",
Method: "GET",
ResponseTimeout: 1,
Headers: map[string]string{
"Content-Type": "application/json",
},
FollowRedirects: true,
}
_, err := h.HTTPGather()
require.Error(t, err)
}

View File

@@ -0,0 +1,23 @@
# igloo Input Plugin
The igloo plugin "tails" a logfile and parses each log message.
By default, the igloo plugin acts like the following unix tail command:
```
tail -F --lines=0 myfile.log
```
- `-F` means that it will follow the _name_ of the given file, so
that it will be compatible with log-rotated files, and that it will retry on
inaccessible files.
- `--lines=0` means that it will start at the end of the file (unless
the `from_beginning` option is set).
see http://man7.org/linux/man-pages/man1/tail.1.html for more details.
### Configuration:
```toml
```

View File

@@ -0,0 +1,331 @@
package igloo
import (
"fmt"
"log"
"regexp"
"sort"
"strconv"
"strings"
"sync"
"time"
"github.com/hpcloud/tail"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/globpath"
"github.com/influxdata/telegraf/plugins/inputs"
)
// format of timestamps
const (
rfcFormat string = "%s-%s-%sT%s:%s:%s.%sZ"
)
var (
// regex for finding timestamps
tRe = regexp.MustCompile(`Timestamp=((\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}),(\d+))`)
)
type Tail struct {
Files []string
FromBeginning bool
TagKeys []string
Counters []string
NumFields []string
StrFields []string
numfieldsRe map[string]*regexp.Regexp
strfieldsRe map[string]*regexp.Regexp
countersRe map[string]*regexp.Regexp
tagsRe map[string]*regexp.Regexp
counters map[string]map[string]int64
tailers []*tail.Tail
wg sync.WaitGroup
acc telegraf.Accumulator
sync.Mutex
}
func NewTail() *Tail {
return &Tail{
FromBeginning: false,
}
}
const sampleConfig = `
## logfiles to parse.
##
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". ie:
## "/var/log/**.log" -> recursively find all .log files in /var/log
## "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
## "/var/log/apache.log" -> just tail the apache log file
##
## See https://github.com/gobwas/glob for more examples
##
files = ["$HOME/sample.log"]
## Read file from beginning.
from_beginning = false
## Each log message is searched for these tag keys in TagKey=Value format.
## Any that are found will be tagged on the resulting influx measurements.
tag_keys = [
"HostLocal",
"ProductName",
"OperationName",
]
## counters are keys which are treated as counters.
## so if counters = ["Result"], then this means that the following ocurrence
## on a log line:
## Result=Success
## would be treated as a counter: Result_Success, and it will be incremented
## for every occurrence, until Telegraf is restarted.
counters = ["Result"]
## num_fields are log line occurrences that are translated into numerical
## fields. ie:
## Duration=1
num_fields = ["Duration", "Attempt"]
## str_fields are log line occurences that are translated into string fields,
## ie:
## ActivityGUID=0bb03bf4-ae1d-4487-bb6f-311653b35760
str_fields = ["ActivityGUID"]
`
func (t *Tail) SampleConfig() string {
return sampleConfig
}
func (t *Tail) Description() string {
return "Stream an igloo file, like the tail -f command"
}
func (t *Tail) Gather(acc telegraf.Accumulator) error {
return nil
}
func (t *Tail) buildRegexes() error {
t.numfieldsRe = make(map[string]*regexp.Regexp)
t.strfieldsRe = make(map[string]*regexp.Regexp)
t.tagsRe = make(map[string]*regexp.Regexp)
t.countersRe = make(map[string]*regexp.Regexp)
t.counters = make(map[string]map[string]int64)
for _, field := range t.NumFields {
re, err := regexp.Compile(field + `=([0-9\.]+)`)
if err != nil {
return err
}
t.numfieldsRe[field] = re
}
for _, field := range t.StrFields {
re, err := regexp.Compile(field + `=([0-9a-zA-Z\.\-]+)`)
if err != nil {
return err
}
t.strfieldsRe[field] = re
}
for _, field := range t.TagKeys {
re, err := regexp.Compile(field + `=([0-9a-zA-Z\.\-]+)`)
if err != nil {
return err
}
t.tagsRe[field] = re
}
for _, field := range t.Counters {
re, err := regexp.Compile("(" + field + ")" + `=([0-9a-zA-Z\.\-]+)`)
if err != nil {
return err
}
t.countersRe[field] = re
}
return nil
}
func (t *Tail) Start(acc telegraf.Accumulator) error {
t.Lock()
defer t.Unlock()
t.acc = acc
if err := t.buildRegexes(); err != nil {
return err
}
var seek tail.SeekInfo
if !t.FromBeginning {
seek.Whence = 2
seek.Offset = 0
}
var errS string
// Create a "tailer" for each file
for _, filepath := range t.Files {
g, err := globpath.Compile(filepath)
if err != nil {
log.Printf("ERROR Glob %s failed to compile, %s", filepath, err)
}
for file, _ := range g.Match() {
tailer, err := tail.TailFile(file,
tail.Config{
ReOpen: true,
Follow: true,
Location: &seek,
})
if err != nil {
errS += err.Error() + " "
continue
}
// create a goroutine for each "tailer"
go t.receiver(tailer)
t.tailers = append(t.tailers, tailer)
}
}
if errS != "" {
return fmt.Errorf(errS)
}
return nil
}
// this is launched as a goroutine to continuously watch a tailed logfile
// for changes, parse any incoming msgs, and add to the accumulator.
func (t *Tail) receiver(tailer *tail.Tail) {
t.wg.Add(1)
defer t.wg.Done()
var err error
var line *tail.Line
for line = range tailer.Lines {
if line.Err != nil {
log.Printf("ERROR tailing file %s, Error: %s\n",
tailer.Filename, err)
continue
}
err = t.Parse(line.Text)
if err != nil {
log.Printf("ERROR: %s", err)
}
}
}
func (t *Tail) Parse(line string) error {
// find the timestamp:
match := tRe.FindAllStringSubmatch(line, -1)
if len(match) < 1 {
return nil
}
if len(match[0]) < 9 {
return nil
}
// make an rfc3339 timestamp and parse it:
ts, err := time.Parse(time.RFC3339Nano,
fmt.Sprintf(rfcFormat, match[0][2], match[0][3], match[0][4], match[0][5], match[0][6], match[0][7], match[0][8]))
if err != nil {
return nil
}
fields := make(map[string]interface{})
tags := make(map[string]string)
// parse numerical fields:
for name, re := range t.numfieldsRe {
match := re.FindAllStringSubmatch(line, -1)
if len(match) < 1 {
continue
}
if len(match[0]) < 2 {
continue
}
num, err := strconv.ParseFloat(match[0][1], 64)
if err == nil {
fields[name] = num
}
}
// parse string fields:
for name, re := range t.strfieldsRe {
match := re.FindAllStringSubmatch(line, -1)
if len(match) < 1 {
continue
}
if len(match[0]) < 2 {
continue
}
fields[name] = match[0][1]
}
// parse tags:
for name, re := range t.tagsRe {
match := re.FindAllStringSubmatch(line, -1)
if len(match) < 1 {
continue
}
if len(match[0]) < 2 {
continue
}
tags[name] = match[0][1]
}
if len(t.countersRe) > 0 {
// Make a unique key for the measurement name/tags
var tg []string
for k, v := range tags {
tg = append(tg, fmt.Sprintf("%s=%s", k, v))
}
sort.Strings(tg)
hash := fmt.Sprintf("%s%s", strings.Join(tg, ""), "igloo")
// check if this hash already has a counter map
_, ok := t.counters[hash]
if !ok {
// doesnt have counter map, so make one
t.counters[hash] = make(map[string]int64)
}
// search for counter matches:
for _, re := range t.countersRe {
match := re.FindAllStringSubmatch(line, -1)
if len(match) < 1 {
continue
}
if len(match[0]) < 3 {
continue
}
counterName := match[0][1] + "_" + match[0][2]
// increment this counter
t.counters[hash][counterName] += 1
// add this counter to the output fields
fields[counterName] = t.counters[hash][counterName]
}
}
t.acc.AddFields("igloo", fields, tags, ts)
return nil
}
func (t *Tail) Stop() {
t.Lock()
defer t.Unlock()
for _, t := range t.tailers {
err := t.Stop()
if err != nil {
log.Printf("ERROR stopping tail on file %s\n", t.Filename)
}
t.Cleanup()
}
t.wg.Wait()
}
func init() {
inputs.Add("igloo", func() telegraf.Input {
return NewTail()
})
}

View File

@@ -1,6 +1,41 @@
# influxdb plugin
The influxdb plugin collects InfluxDB-formatted data from JSON endpoints.
The InfluxDB plugin will collect metrics on the given InfluxDB servers.
This plugin can also gather metrics from endpoints that expose
InfluxDB-formatted endpoints. See below for more information.
### Configuration:
```toml
# Read InfluxDB-formatted JSON metrics from one or more HTTP endpoints
[[inputs.influxdb]]
## Works with InfluxDB debug endpoints out of the box,
## but other services can use this format too.
## See the influxdb plugin's README for more details.
## Multiple URLs from which to read InfluxDB-formatted JSON
urls = [
"http://localhost:8086/debug/vars"
]
```
### Measurements & Fields
- influxdb_database
- influxdb_httpd
- influxdb_measurement
- influxdb_memstats
- influxdb_shard
- influxdb_subscriber
- influxdb_tsm1_cache
- influxdb_tsm1_wal
- influxdb_write
### InfluxDB-formatted endpoints
The influxdb plugin can collect InfluxDB-formatted data from JSON endpoints.
Whether associated with an Influx database or not.
With a configuration of:
@@ -65,8 +100,11 @@ influxdb_transactions,url='http://192.168.2.1:8086/debug/vars' total=100.0,balan
There are two important details to note about the collected metrics:
1. Even though the values in JSON are being displayed as integers, the metrics are reported as floats.
1. Even though the values in JSON are being displayed as integers,
the metrics are reported as floats.
JSON encoders usually don't print the fractional part for round floats.
Because you cannot change the type of an existing field in InfluxDB, we assume all numbers are floats.
Because you cannot change the type of an existing field in InfluxDB,
we assume all numbers are floats.
2. The top-level keys' names (in the example above, `"k1"`, `"k2"`, and `"k3"`) are not considered when recording the metrics.
2. The top-level keys' names (in the example above, `"k1"`, `"k2"`, and `"k3"`)
are not considered when recording the metrics.

View File

@@ -71,6 +71,35 @@ type point struct {
Values map[string]interface{} `json:"values"`
}
type memstats struct {
Alloc int64 `json:"Alloc"`
TotalAlloc int64 `json:"TotalAlloc"`
Sys int64 `json:"Sys"`
Lookups int64 `json:"Lookups"`
Mallocs int64 `json:"Mallocs"`
Frees int64 `json:"Frees"`
HeapAlloc int64 `json:"HeapAlloc"`
HeapSys int64 `json:"HeapSys"`
HeapIdle int64 `json:"HeapIdle"`
HeapInuse int64 `json:"HeapInuse"`
HeapReleased int64 `json:"HeapReleased"`
HeapObjects int64 `json:"HeapObjects"`
StackInuse int64 `json:"StackInuse"`
StackSys int64 `json:"StackSys"`
MSpanInuse int64 `json:"MSpanInuse"`
MSpanSys int64 `json:"MSpanSys"`
MCacheInuse int64 `json:"MCacheInuse"`
MCacheSys int64 `json:"MCacheSys"`
BuckHashSys int64 `json:"BuckHashSys"`
GCSys int64 `json:"GCSys"`
OtherSys int64 `json:"OtherSys"`
NextGC int64 `json:"NextGC"`
LastGC int64 `json:"LastGC"`
PauseTotalNs int64 `json:"PauseTotalNs"`
NumGC int64 `json:"NumGC"`
GCCPUFraction float64 `json:"GCCPUFraction"`
}
var tr = &http.Transport{
ResponseHeaderTimeout: time.Duration(3 * time.Second),
}
@@ -118,12 +147,52 @@ func (i *InfluxDB) gatherURL(
break
}
// Read in a string key. We don't do anything with the top-level keys, so it's discarded.
_, err := dec.Token()
// Read in a string key. We don't do anything with the top-level keys,
// so it's discarded.
key, err := dec.Token()
if err != nil {
return err
}
if key.(string) == "memstats" {
var m memstats
if err := dec.Decode(&m); err != nil {
continue
}
acc.AddFields("influxdb_memstats",
map[string]interface{}{
"alloc": m.Alloc,
"total_alloc": m.TotalAlloc,
"sys": m.Sys,
"lookups": m.Lookups,
"mallocs": m.Mallocs,
"frees": m.Frees,
"heap_alloc": m.HeapAlloc,
"heap_sys": m.HeapSys,
"heap_idle": m.HeapIdle,
"heap_inuse": m.HeapInuse,
"heap_released": m.HeapReleased,
"heap_objects": m.HeapObjects,
"stack_inuse": m.StackInuse,
"stack_sys": m.StackSys,
"mspan_inuse": m.MSpanInuse,
"mspan_sys": m.MSpanSys,
"mcache_inuse": m.MCacheInuse,
"mcache_sys": m.MCacheSys,
"buck_hash_sys": m.BuckHashSys,
"gc_sys": m.GCSys,
"other_sys": m.OtherSys,
"next_gc": m.NextGC,
"last_gc": m.LastGC,
"pause_total_ns": m.PauseTotalNs,
"num_gc": m.NumGC,
"gcc_pu_fraction": m.GCCPUFraction,
},
map[string]string{
"url": url,
})
}
// Attempt to parse a whole object into a point.
// It might be a non-object, like a string or array.
// If we fail to decode it into a point, ignore it and move on.
@@ -132,7 +201,8 @@ func (i *InfluxDB) gatherURL(
continue
}
// If the object was a point, but was not fully initialized, ignore it and move on.
// If the object was a point, but was not fully initialized,
// ignore it and move on.
if p.Name == "" || p.Tags == nil || p.Values == nil || len(p.Values) == 0 {
continue
}

View File

@@ -11,7 +11,138 @@ import (
)
func TestBasic(t *testing.T) {
js := `
fakeServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/endpoint" {
_, _ = w.Write([]byte(basicJSON))
} else {
w.WriteHeader(http.StatusNotFound)
}
}))
defer fakeServer.Close()
plugin := &influxdb.InfluxDB{
URLs: []string{fakeServer.URL + "/endpoint"},
}
var acc testutil.Accumulator
require.NoError(t, plugin.Gather(&acc))
require.Len(t, acc.Metrics, 2)
fields := map[string]interface{}{
// JSON will truncate floats to integer representations.
// Since there's no distinction in JSON, we can't assume it's an int.
"i": -1.0,
"f": 0.5,
"b": true,
"s": "string",
}
tags := map[string]string{
"id": "ex1",
"url": fakeServer.URL + "/endpoint",
}
acc.AssertContainsTaggedFields(t, "influxdb_foo", fields, tags)
fields = map[string]interface{}{
"x": "x",
}
tags = map[string]string{
"id": "ex2",
"url": fakeServer.URL + "/endpoint",
}
acc.AssertContainsTaggedFields(t, "influxdb_bar", fields, tags)
}
func TestInfluxDB(t *testing.T) {
fakeInfluxServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/endpoint" {
_, _ = w.Write([]byte(influxReturn))
} else {
w.WriteHeader(http.StatusNotFound)
}
}))
defer fakeInfluxServer.Close()
plugin := &influxdb.InfluxDB{
URLs: []string{fakeInfluxServer.URL + "/endpoint"},
}
var acc testutil.Accumulator
require.NoError(t, plugin.Gather(&acc))
require.Len(t, acc.Metrics, 33)
fields := map[string]interface{}{
"heap_inuse": int64(18046976),
"heap_released": int64(3473408),
"mspan_inuse": int64(97440),
"total_alloc": int64(201739016),
"sys": int64(38537464),
"mallocs": int64(570251),
"frees": int64(381008),
"heap_idle": int64(15802368),
"pause_total_ns": int64(5132914),
"lookups": int64(77),
"heap_sys": int64(33849344),
"mcache_sys": int64(16384),
"next_gc": int64(20843042),
"gcc_pu_fraction": float64(4.287178819113636e-05),
"other_sys": int64(1229737),
"alloc": int64(17034016),
"stack_inuse": int64(753664),
"stack_sys": int64(753664),
"buck_hash_sys": int64(1461583),
"gc_sys": int64(1112064),
"num_gc": int64(27),
"heap_alloc": int64(17034016),
"heap_objects": int64(189243),
"mspan_sys": int64(114688),
"mcache_inuse": int64(4800),
"last_gc": int64(1460434886475114239),
}
tags := map[string]string{
"url": fakeInfluxServer.URL + "/endpoint",
}
acc.AssertContainsTaggedFields(t, "influxdb_memstats", fields, tags)
}
func TestErrorHandling(t *testing.T) {
badServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/endpoint" {
_, _ = w.Write([]byte("not json"))
} else {
w.WriteHeader(http.StatusNotFound)
}
}))
defer badServer.Close()
plugin := &influxdb.InfluxDB{
URLs: []string{badServer.URL + "/endpoint"},
}
var acc testutil.Accumulator
require.Error(t, plugin.Gather(&acc))
}
func TestErrorHandling404(t *testing.T) {
badServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/endpoint" {
_, _ = w.Write([]byte(basicJSON))
} else {
w.WriteHeader(http.StatusNotFound)
}
}))
defer badServer.Close()
plugin := &influxdb.InfluxDB{
URLs: []string{badServer.URL},
}
var acc testutil.Accumulator
require.Error(t, plugin.Gather(&acc))
}
const basicJSON = `
{
"_1": {
"name": "foo",
@@ -55,43 +186,48 @@ func TestBasic(t *testing.T) {
}
}
`
fakeServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/endpoint" {
_, _ = w.Write([]byte(js))
} else {
w.WriteHeader(http.StatusNotFound)
}
}))
defer fakeServer.Close()
plugin := &influxdb.InfluxDB{
URLs: []string{fakeServer.URL + "/endpoint"},
}
var acc testutil.Accumulator
require.NoError(t, plugin.Gather(&acc))
require.Len(t, acc.Metrics, 2)
fields := map[string]interface{}{
// JSON will truncate floats to integer representations.
// Since there's no distinction in JSON, we can't assume it's an int.
"i": -1.0,
"f": 0.5,
"b": true,
"s": "string",
}
tags := map[string]string{
"id": "ex1",
"url": fakeServer.URL + "/endpoint",
}
acc.AssertContainsTaggedFields(t, "influxdb_foo", fields, tags)
fields = map[string]interface{}{
"x": "x",
}
tags = map[string]string{
"id": "ex2",
"url": fakeServer.URL + "/endpoint",
}
acc.AssertContainsTaggedFields(t, "influxdb_bar", fields, tags)
}
const influxReturn = `
{
"cluster": {"name": "cluster", "tags": {}, "values": {}},
"cmdline": ["influxd"],
"cq": {"name": "cq", "tags": {}, "values": {}},
"database:_internal": {"name": "database", "tags": {"database": "_internal"}, "values": {"numMeasurements": 8, "numSeries": 12}},
"database:udp": {"name": "database", "tags": {"database": "udp"}, "values": {"numMeasurements": 14, "numSeries": 38}},
"hh:/Users/csparr/.influxdb/hh": {"name": "hh", "tags": {"path": "/Users/csparr/.influxdb/hh"}, "values": {}},
"httpd::8086": {"name": "httpd", "tags": {"bind": ":8086"}, "values": {"req": 7, "reqActive": 1, "reqDurationNs": 4488799}},
"measurement:cpu_idle.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "cpu_idle"}, "values": {"numSeries": 1}},
"measurement:cpu_usage.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "cpu_usage"}, "values": {"numSeries": 1}},
"measurement:database._internal": {"name": "measurement", "tags": {"database": "_internal", "measurement": "database"}, "values": {"numSeries": 2}},
"measurement:database.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "database"}, "values": {"numSeries": 2}},
"measurement:httpd.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "httpd"}, "values": {"numSeries": 1}},
"measurement:measurement.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "measurement"}, "values": {"numSeries": 22}},
"measurement:mem.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "mem"}, "values": {"numSeries": 1}},
"measurement:net.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "net"}, "values": {"numSeries": 1}},
"measurement:runtime._internal": {"name": "measurement", "tags": {"database": "_internal", "measurement": "runtime"}, "values": {"numSeries": 1}},
"measurement:runtime.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "runtime"}, "values": {"numSeries": 1}},
"measurement:shard._internal": {"name": "measurement", "tags": {"database": "_internal", "measurement": "shard"}, "values": {"numSeries": 2}},
"measurement:shard.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "shard"}, "values": {"numSeries": 1}},
"measurement:subscriber._internal": {"name": "measurement", "tags": {"database": "_internal", "measurement": "subscriber"}, "values": {"numSeries": 1}},
"measurement:subscriber.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "subscriber"}, "values": {"numSeries": 1}},
"measurement:swap_used.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "swap_used"}, "values": {"numSeries": 1}},
"measurement:tsm1_cache._internal": {"name": "measurement", "tags": {"database": "_internal", "measurement": "tsm1_cache"}, "values": {"numSeries": 2}},
"measurement:tsm1_cache.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "tsm1_cache"}, "values": {"numSeries": 2}},
"measurement:tsm1_wal._internal": {"name": "measurement", "tags": {"database": "_internal", "measurement": "tsm1_wal"}, "values": {"numSeries": 2}},
"measurement:tsm1_wal.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "tsm1_wal"}, "values": {"numSeries": 2}},
"measurement:udp._internal": {"name": "measurement", "tags": {"database": "_internal", "measurement": "udp"}, "values": {"numSeries": 1}},
"measurement:write._internal": {"name": "measurement", "tags": {"database": "_internal", "measurement": "write"}, "values": {"numSeries": 1}},
"measurement:write.udp": {"name": "measurement", "tags": {"database": "udp", "measurement": "write"}, "values": {"numSeries": 1}},
"memstats": {"Alloc":17034016,"TotalAlloc":201739016,"Sys":38537464,"Lookups":77,"Mallocs":570251,"Frees":381008,"HeapAlloc":17034016,"HeapSys":33849344,"HeapIdle":15802368,"HeapInuse":18046976,"HeapReleased":3473408,"HeapObjects":189243,"StackInuse":753664,"StackSys":753664,"MSpanInuse":97440,"MSpanSys":114688,"MCacheInuse":4800,"MCacheSys":16384,"BuckHashSys":1461583,"GCSys":1112064,"OtherSys":1229737,"NextGC":20843042,"LastGC":1460434886475114239,"PauseTotalNs":5132914,"PauseNs":[195052,117751,139370,156933,263089,165249,713747,103904,122015,294408,213753,170864,175845,114221,121563,122409,113098,162219,229257,126726,250774,254235,117206,293588,144279,124306,127053,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"PauseEnd":[1460433856394860455,1460433856398162739,1460433856405888337,1460433856411784017,1460433856417924684,1460433856428385687,1460433856443782908,1460433856456522851,1460433857392743223,1460433866484394564,1460433866494076235,1460433896472438632,1460433957839825106,1460433976473440328,1460434016473413006,1460434096471892794,1460434126470792929,1460434246480428250,1460434366554468369,1460434396471249528,1460434456471205885,1460434476479487292,1460434536471435965,1460434616469784776,1460434736482078216,1460434856544251733,1460434886475114239,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"NumGC":27,"GCCPUFraction":4.287178819113636e-05,"EnableGC":true,"DebugGC":false,"BySize":[{"Size":0,"Mallocs":0,"Frees":0},{"Size":8,"Mallocs":1031,"Frees":955},{"Size":16,"Mallocs":308485,"Frees":142064},{"Size":32,"Mallocs":64937,"Frees":54321},{"Size":48,"Mallocs":33012,"Frees":29754},{"Size":64,"Mallocs":20299,"Frees":18173},{"Size":80,"Mallocs":8186,"Frees":7597},{"Size":96,"Mallocs":9806,"Frees":8982},{"Size":112,"Mallocs":5671,"Frees":4850},{"Size":128,"Mallocs":2972,"Frees":2684},{"Size":144,"Mallocs":4106,"Frees":3719},{"Size":160,"Mallocs":1324,"Frees":911},{"Size":176,"Mallocs":2574,"Frees":2391},{"Size":192,"Mallocs":4053,"Frees":3863},{"Size":208,"Mallocs":442,"Frees":307},{"Size":224,"Mallocs":336,"Frees":172},{"Size":240,"Mallocs":143,"Frees":125},{"Size":256,"Mallocs":542,"Frees":497},{"Size":288,"Mallocs":15971,"Frees":14761},{"Size":320,"Mallocs":245,"Frees":30},{"Size":352,"Mallocs":1299,"Frees":1065},{"Size":384,"Mallocs":138,"Frees":2},{"Size":416,"Mallocs":54,"Frees":47},{"Size":448,"Mallocs":75,"Frees":29},{"Size":480,"Mallocs":6,"Frees":4},{"Size":512,"Mallocs":452,"Frees":422},{"Size":576,"Mallocs":486,"Frees":395},{"Size":640,"Mallocs":81,"Frees":67},{"Size":704,"Mallocs":421,"Frees":397},{"Size":768,"Mallocs":469,"Frees":468},{"Size":896,"Mallocs":1049,"Frees":1010},{"Size":1024,"Mallocs":1078,"Frees":960},{"Size":1152,"Mallocs":750,"Frees":498},{"Size":1280,"Mallocs":84,"Frees":72},{"Size":1408,"Mallocs":218,"Frees":187},{"Size":1536,"Mallocs":73,"Frees":48},{"Size":1664,"Mallocs":43,"Frees":30},{"Size":2048,"Mallocs":153,"Frees":57},{"Size":2304,"Mallocs":41,"Frees":30},{"Size":2560,"Mallocs":18,"Frees":15},{"Size":2816,"Mallocs":164,"Frees":157},{"Size":3072,"Mallocs":0,"Frees":0},{"Size":3328,"Mallocs":13,"Frees":6},{"Size":4096,"Mallocs":101,"Frees":82},{"Size":4608,"Mallocs":32,"Frees":26},{"Size":5376,"Mallocs":165,"Frees":151},{"Size":6144,"Mallocs":15,"Frees":9},{"Size":6400,"Mallocs":1,"Frees":1},{"Size":6656,"Mallocs":1,"Frees":0},{"Size":6912,"Mallocs":0,"Frees":0},{"Size":8192,"Mallocs":13,"Frees":13},{"Size":8448,"Mallocs":0,"Frees":0},{"Size":8704,"Mallocs":1,"Frees":1},{"Size":9472,"Mallocs":6,"Frees":4},{"Size":10496,"Mallocs":0,"Frees":0},{"Size":12288,"Mallocs":41,"Frees":35},{"Size":13568,"Mallocs":0,"Frees":0},{"Size":14080,"Mallocs":0,"Frees":0},{"Size":16384,"Mallocs":4,"Frees":4},{"Size":16640,"Mallocs":0,"Frees":0},{"Size":17664,"Mallocs":0,"Frees":0}]},
"queryExecutor": {"name": "queryExecutor", "tags": {}, "values": {}},
"shard:/Users/csparr/.influxdb/data/_internal/monitor/2:2": {"name": "shard", "tags": {"database": "_internal", "engine": "tsm1", "id": "2", "path": "/Users/csparr/.influxdb/data/_internal/monitor/2", "retentionPolicy": "monitor"}, "values": {}},
"shard:/Users/csparr/.influxdb/data/udp/default/1:1": {"name": "shard", "tags": {"database": "udp", "engine": "tsm1", "id": "1", "path": "/Users/csparr/.influxdb/data/udp/default/1", "retentionPolicy": "default"}, "values": {"fieldsCreate": 61, "seriesCreate": 33, "writePointsOk": 3613, "writeReq": 110}},
"subscriber": {"name": "subscriber", "tags": {}, "values": {"pointsWritten": 3613}},
"tsm1_cache:/Users/csparr/.influxdb/data/_internal/monitor/2": {"name": "tsm1_cache", "tags": {"database": "_internal", "path": "/Users/csparr/.influxdb/data/_internal/monitor/2", "retentionPolicy": "monitor"}, "values": {"WALCompactionTimeMs": 0, "cacheAgeMs": 1103932, "cachedBytes": 0, "diskBytes": 0, "memBytes": 40480, "snapshotCount": 0}},
"tsm1_cache:/Users/csparr/.influxdb/data/udp/default/1": {"name": "tsm1_cache", "tags": {"database": "udp", "path": "/Users/csparr/.influxdb/data/udp/default/1", "retentionPolicy": "default"}, "values": {"WALCompactionTimeMs": 0, "cacheAgeMs": 1103029, "cachedBytes": 0, "diskBytes": 0, "memBytes": 2359472, "snapshotCount": 0}},
"tsm1_filestore:/Users/csparr/.influxdb/data/_internal/monitor/2": {"name": "tsm1_filestore", "tags": {"database": "_internal", "path": "/Users/csparr/.influxdb/data/_internal/monitor/2", "retentionPolicy": "monitor"}, "values": {}},
"tsm1_filestore:/Users/csparr/.influxdb/data/udp/default/1": {"name": "tsm1_filestore", "tags": {"database": "udp", "path": "/Users/csparr/.influxdb/data/udp/default/1", "retentionPolicy": "default"}, "values": {}},
"tsm1_wal:/Users/csparr/.influxdb/wal/_internal/monitor/2": {"name": "tsm1_wal", "tags": {"database": "_internal", "path": "/Users/csparr/.influxdb/wal/_internal/monitor/2", "retentionPolicy": "monitor"}, "values": {"currentSegmentDiskBytes": 0, "oldSegmentsDiskBytes": 69532}},
"tsm1_wal:/Users/csparr/.influxdb/wal/udp/default/1": {"name": "tsm1_wal", "tags": {"database": "udp", "path": "/Users/csparr/.influxdb/wal/udp/default/1", "retentionPolicy": "default"}, "values": {"currentSegmentDiskBytes": 193728, "oldSegmentsDiskBytes": 1008330}},
"write": {"name": "write", "tags": {}, "values": {"pointReq": 3613, "pointReqLocal": 3613, "req": 110, "subWriteOk": 110, "writeOk": 110}}
}`

View File

@@ -20,7 +20,7 @@ ipmitool -I lan -H 192.168.1.1 -U USERID -P PASSW0RD sdr
## Configuration
```toml
[[inputs.ipmi]]
[[inputs.ipmi_sensor]]
## specify servers via a url matching:
## [username[:password]@][protocol[(address)]]
## e.g.

View File

@@ -1,10 +1,12 @@
package ipmi_sensor
import (
"bytes"
"fmt"
"os/exec"
"strings"
"time"
"github.com/influxdata/telegraf/internal"
)
type CommandRunner struct{}
@@ -18,21 +20,16 @@ func (t CommandRunner) cmd(conn *Connection, args ...string) *exec.Cmd {
}
return exec.Command(path, opts...)
}
func (t CommandRunner) Run(conn *Connection, args ...string) (string, error) {
cmd := t.cmd(conn, args...)
var stdout bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
output, err := internal.CombinedOutputTimeout(cmd, time.Second*5)
if err != nil {
return "", fmt.Errorf("run %s %s: %s (%s)",
cmd.Path, strings.Join(cmd.Args, " "), stderr.String(), err)
cmd.Path, strings.Join(cmd.Args, " "), string(output), err)
}
return stdout.String(), err
return string(output), err
}

View File

@@ -28,7 +28,7 @@ func NewConnection(server string) *Connection {
if inx1 > 0 {
security := server[0:inx1]
connstr = server[inx1+1 : len(server)]
up := strings.Split(security, ":")
up := strings.SplitN(security, ":", 2)
conn.Username = up[0]
conn.Password = up[1]
}

View File

@@ -1,51 +1,61 @@
# Telegraf plugin: Jolokia
#### Plugin arguments:
- **context** string: Context root used of jolokia url
- **servers** []Server: List of servers
+ **name** string: Server's logical name
+ **host** string: Server's ip address or hostname
+ **port** string: Server's listening port
- **metrics** []Metric
+ **name** string: Name of the measure
+ **jmx** string: Jmx path that identifies mbeans attributes
+ **pass** []string: Attributes to retain when collecting values
+ **drop** []string: Attributes to drop when collecting values
#### Configuration
```toml
# Read JMX metrics through Jolokia
[[inputs.jolokia]]
## This is the context root used to compose the jolokia url
context = "/jolokia"
## This specifies the mode used
# mode = "proxy"
#
## When in proxy mode this section is used to specify further
## proxy address configurations.
## Remember to change host address to fit your environment.
# [inputs.jolokia.proxy]
# host = "127.0.0.1"
# port = "8080"
## List of servers exposing jolokia read service
[[inputs.jolokia.servers]]
name = "as-server-01"
host = "127.0.0.1"
port = "8080"
# username = "myuser"
# password = "mypassword"
## List of metrics collected on above servers
## Each metric consists in a name, a jmx path and either
## a pass or drop slice attribute.
## This collect all heap memory usage metrics.
[[inputs.jolokia.metrics]]
name = "heap_memory_usage"
mbean = "java.lang:type=Memory"
attribute = "HeapMemoryUsage"
## This collect thread counts metrics.
[[inputs.jolokia.metrics]]
name = "thread_count"
mbean = "java.lang:type=Threading"
attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"
## This collect number of class loaded/unloaded counts metrics.
[[inputs.jolokia.metrics]]
name = "class_count"
mbean = "java.lang:type=ClassLoading"
attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"
```
#### Description
The Jolokia plugin collects JVM metrics exposed as MBean's attributes through jolokia REST endpoint. All metrics
are collected for each server configured.
The Jolokia plugin collects JVM metrics exposed as MBean's attributes through
jolokia REST endpoint. All metrics are collected for each server configured.
See: https://jolokia.org/
# Measurements:
Jolokia plugin produces one measure for each metric configured, adding Server's `name`, `host` and `port` as tags.
Given a configuration like:
```ini
[jolokia]
[[jolokia.servers]]
name = "as-service-1"
host = "127.0.0.1"
port = "8080"
[[jolokia.servers]]
name = "as-service-2"
host = "127.0.0.1"
port = "8180"
[[jolokia.metrics]]
name = "heap_memory_usage"
jmx = "/java.lang:type=Memory/HeapMemoryUsage"
pass = ["used", "max"]
```
The collected metrics will be:
```
jolokia_heap_memory_usage name=as-service-1,host=127.0.0.1,port=8080 used=xxx,max=yyy
jolokia_heap_memory_usage name=as-service-2,host=127.0.0.1,port=8180 used=vvv,max=zzz
```
Jolokia plugin produces one measure for each metric configured,
adding Server's `jolokia_name`, `jolokia_host` and `jolokia_port` as tags.

View File

@@ -1,6 +1,7 @@
package jolokia
import (
"bytes"
"encoding/json"
"errors"
"fmt"
@@ -22,8 +23,10 @@ type Server struct {
}
type Metric struct {
Name string
Jmx string
Name string
Mbean string
Attribute string
Path string
}
type JolokiaClient interface {
@@ -41,20 +44,32 @@ func (c JolokiaClientImpl) MakeRequest(req *http.Request) (*http.Response, error
type Jolokia struct {
jClient JolokiaClient
Context string
Mode string
Servers []Server
Metrics []Metric
Proxy Server
}
func (j *Jolokia) SampleConfig() string {
return `
const sampleConfig = `
## This is the context root used to compose the jolokia url
context = "/jolokia/read"
context = "/jolokia"
## This specifies the mode used
# mode = "proxy"
#
## When in proxy mode this section is used to specify further
## proxy address configurations.
## Remember to change host address to fit your environment.
# [inputs.jolokia.proxy]
# host = "127.0.0.1"
# port = "8080"
## List of servers exposing jolokia read service
[[inputs.jolokia.servers]]
name = "stable"
host = "192.168.103.2"
port = "8180"
name = "as-server-01"
host = "127.0.0.1"
port = "8080"
# username = "myuser"
# password = "mypassword"
@@ -64,20 +79,31 @@ func (j *Jolokia) SampleConfig() string {
## This collect all heap memory usage metrics.
[[inputs.jolokia.metrics]]
name = "heap_memory_usage"
jmx = "/java.lang:type=Memory/HeapMemoryUsage"
mbean = "java.lang:type=Memory"
attribute = "HeapMemoryUsage"
## This collect thread counts metrics.
[[inputs.jolokia.metrics]]
name = "thread_count"
mbean = "java.lang:type=Threading"
attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"
## This collect number of class loaded/unloaded counts metrics.
[[inputs.jolokia.metrics]]
name = "class_count"
mbean = "java.lang:type=ClassLoading"
attribute = "LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"
`
func (j *Jolokia) SampleConfig() string {
return sampleConfig
}
func (j *Jolokia) Description() string {
return "Read JMX metrics through Jolokia"
}
func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) {
// Create + send request
req, err := http.NewRequest("GET", requestUrl.String(), nil)
if err != nil {
return nil, err
}
func (j *Jolokia) doRequest(req *http.Request) (map[string]interface{}, error) {
resp, err := j.jClient.MakeRequest(req)
if err != nil {
@@ -88,7 +114,7 @@ func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) {
// Process response
if resp.StatusCode != http.StatusOK {
err = fmt.Errorf("Response from url \"%s\" has status code %d (%s), expected %d (%s)",
requestUrl,
req.RequestURI,
resp.StatusCode,
http.StatusText(resp.StatusCode),
http.StatusOK,
@@ -108,51 +134,133 @@ func (j *Jolokia) getAttr(requestUrl *url.URL) (map[string]interface{}, error) {
return nil, errors.New("Error decoding JSON response")
}
if status, ok := jsonOut["status"]; ok {
if status != float64(200) {
return nil, fmt.Errorf("Not expected status value in response body: %3.f",
status)
}
} else {
return nil, fmt.Errorf("Missing status in response body")
}
return jsonOut, nil
}
func (j *Jolokia) prepareRequest(server Server, metric Metric) (*http.Request, error) {
var jolokiaUrl *url.URL
context := j.Context // Usually "/jolokia"
// Create bodyContent
bodyContent := map[string]interface{}{
"type": "read",
"mbean": metric.Mbean,
}
if metric.Attribute != "" {
bodyContent["attribute"] = metric.Attribute
if metric.Path != "" {
bodyContent["path"] = metric.Path
}
}
// Add target, only in proxy mode
if j.Mode == "proxy" {
serviceUrl := fmt.Sprintf("service:jmx:rmi:///jndi/rmi://%s:%s/jmxrmi",
server.Host, server.Port)
target := map[string]string{
"url": serviceUrl,
}
if server.Username != "" {
target["user"] = server.Username
}
if server.Password != "" {
target["password"] = server.Password
}
bodyContent["target"] = target
proxy := j.Proxy
// Prepare ProxyURL
proxyUrl, err := url.Parse("http://" + proxy.Host + ":" + proxy.Port + context)
if err != nil {
return nil, err
}
if proxy.Username != "" || proxy.Password != "" {
proxyUrl.User = url.UserPassword(proxy.Username, proxy.Password)
}
jolokiaUrl = proxyUrl
} else {
serverUrl, err := url.Parse("http://" + server.Host + ":" + server.Port + context)
if err != nil {
return nil, err
}
if server.Username != "" || server.Password != "" {
serverUrl.User = url.UserPassword(server.Username, server.Password)
}
jolokiaUrl = serverUrl
}
requestBody, err := json.Marshal(bodyContent)
req, err := http.NewRequest("POST", jolokiaUrl.String(), bytes.NewBuffer(requestBody))
if err != nil {
return nil, err
}
req.Header.Add("Content-type", "application/json")
return req, nil
}
func (j *Jolokia) Gather(acc telegraf.Accumulator) error {
context := j.Context //"/jolokia/read"
servers := j.Servers
metrics := j.Metrics
tags := make(map[string]string)
for _, server := range servers {
tags["server"] = server.Name
tags["port"] = server.Port
tags["host"] = server.Host
tags["jolokia_name"] = server.Name
tags["jolokia_port"] = server.Port
tags["jolokia_host"] = server.Host
fields := make(map[string]interface{})
for _, metric := range metrics {
measurement := metric.Name
jmxPath := metric.Jmx
// Prepare URL
requestUrl, err := url.Parse("http://" + server.Host + ":" +
server.Port + context + jmxPath)
req, err := j.prepareRequest(server, metric)
if err != nil {
return err
}
if server.Username != "" || server.Password != "" {
requestUrl.User = url.UserPassword(server.Username, server.Password)
}
out, _ := j.getAttr(requestUrl)
out, err := j.doRequest(req)
if values, ok := out["value"]; ok {
switch t := values.(type) {
case map[string]interface{}:
for k, v := range t {
fields[measurement+"_"+k] = v
}
case interface{}:
fields[measurement] = t
}
if err != nil {
fmt.Printf("Error handling response: %s\n", err)
} else {
fmt.Printf("Missing key 'value' in '%s' output response\n",
requestUrl.String())
if values, ok := out["value"]; ok {
switch t := values.(type) {
case map[string]interface{}:
for k, v := range t {
fields[measurement+"_"+k] = v
}
case interface{}:
fields[measurement] = t
}
} else {
fmt.Printf("Missing key 'value' in output response\n")
}
}
}
acc.AddFields("jolokia", fields, tags)
}

View File

@@ -47,8 +47,10 @@ const invalidJSON = "I don't think this is JSON"
const empty = ""
var Servers = []Server{Server{Name: "as1", Host: "127.0.0.1", Port: "8080"}}
var HeapMetric = Metric{Name: "heap_memory_usage", Jmx: "/java.lang:type=Memory/HeapMemoryUsage"}
var UsedHeapMetric = Metric{Name: "heap_memory_usage", Jmx: "/java.lang:type=Memory/HeapMemoryUsage"}
var HeapMetric = Metric{Name: "heap_memory_usage",
Mbean: "java.lang:type=Memory", Attribute: "HeapMemoryUsage"}
var UsedHeapMetric = Metric{Name: "heap_memory_usage",
Mbean: "java.lang:type=Memory", Attribute: "HeapMemoryUsage"}
type jolokiaClientStub struct {
responseBody string
@@ -94,9 +96,9 @@ func TestHttpJsonMultiValue(t *testing.T) {
"heap_memory_usage_used": 203288528.0,
}
tags := map[string]string{
"host": "127.0.0.1",
"port": "8080",
"server": "as1",
"jolokia_host": "127.0.0.1",
"jolokia_port": "8080",
"jolokia_name": "as1",
}
acc.AssertContainsTaggedFields(t, "jolokia", fields, tags)
}
@@ -114,3 +116,17 @@ func TestHttpJsonOn404(t *testing.T) {
assert.Nil(t, err)
assert.Equal(t, 0, len(acc.Metrics))
}
// Test that the proper values are ignored or collected
func TestHttpInvalidJson(t *testing.T) {
jolokia := genJolokiaClientStub(invalidJSON, 200, Servers,
[]Metric{UsedHeapMetric})
var acc testutil.Accumulator
acc.SetDebug(true)
err := jolokia.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 0, len(acc.Metrics))
}

View File

@@ -22,7 +22,8 @@ from the same topic in parallel.
## Offset (must be either "oldest" or "newest")
offset = "oldest"
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md

View File

@@ -3,13 +3,16 @@ package leofs
import (
"bufio"
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"net/url"
"os/exec"
"strconv"
"strings"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
)
const oid = ".1.3.6.1.4.1.35450"
@@ -175,14 +178,18 @@ func (l *LeoFS) Gather(acc telegraf.Accumulator) error {
return outerr
}
func (l *LeoFS) gatherServer(endpoint string, serverType ServerType, acc telegraf.Accumulator) error {
func (l *LeoFS) gatherServer(
endpoint string,
serverType ServerType,
acc telegraf.Accumulator,
) error {
cmd := exec.Command("snmpwalk", "-v2c", "-cpublic", endpoint, oid)
stdout, err := cmd.StdoutPipe()
if err != nil {
return err
}
cmd.Start()
defer cmd.Wait()
defer internal.WaitTimeout(cmd, time.Second*5)
scanner := bufio.NewScanner(stdout)
if !scanner.Scan() {
return fmt.Errorf("Unable to retrieve the node name")

View File

@@ -34,9 +34,13 @@ var sampleConfig = `
##
# ost_procfiles = [
# "/proc/fs/lustre/obdfilter/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/stats"
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
# "/proc/fs/lustre/obdfilter/*/job_stats",
# ]
# mds_procfiles = [
# "/proc/fs/lustre/mdt/*/md_stats",
# "/proc/fs/lustre/mdt/*/job_stats",
# ]
# mds_procfiles = ["/proc/fs/lustre/mdt/*/md_stats"]
`
/* The wanted fields would be a []string if not for the
@@ -82,6 +86,139 @@ var wanted_ost_fields = []*mapping{
},
}
var wanted_ost_jobstats_fields = []*mapping{
{ // The read line has several fields, so we need to differentiate what they are
inProc: "read",
field: 3,
reportAs: "jobstats_read_calls",
},
{
inProc: "read",
field: 7,
reportAs: "jobstats_read_min_size",
},
{
inProc: "read",
field: 9,
reportAs: "jobstats_read_max_size",
},
{
inProc: "read",
field: 11,
reportAs: "jobstats_read_bytes",
},
{ // Different inProc for newer versions
inProc: "read_bytes",
field: 3,
reportAs: "jobstats_read_calls",
},
{
inProc: "read_bytes",
field: 7,
reportAs: "jobstats_read_min_size",
},
{
inProc: "read_bytes",
field: 9,
reportAs: "jobstats_read_max_size",
},
{
inProc: "read_bytes",
field: 11,
reportAs: "jobstats_read_bytes",
},
{ // We need to do the same for the write fields
inProc: "write",
field: 3,
reportAs: "jobstats_write_calls",
},
{
inProc: "write",
field: 7,
reportAs: "jobstats_write_min_size",
},
{
inProc: "write",
field: 9,
reportAs: "jobstats_write_max_size",
},
{
inProc: "write",
field: 11,
reportAs: "jobstats_write_bytes",
},
{ // Different inProc for newer versions
inProc: "write_bytes",
field: 3,
reportAs: "jobstats_write_calls",
},
{
inProc: "write_bytes",
field: 7,
reportAs: "jobstats_write_min_size",
},
{
inProc: "write_bytes",
field: 9,
reportAs: "jobstats_write_max_size",
},
{
inProc: "write_bytes",
field: 11,
reportAs: "jobstats_write_bytes",
},
{
inProc: "getattr",
field: 3,
reportAs: "jobstats_ost_getattr",
},
{
inProc: "setattr",
field: 3,
reportAs: "jobstats_ost_setattr",
},
{
inProc: "punch",
field: 3,
reportAs: "jobstats_punch",
},
{
inProc: "sync",
field: 3,
reportAs: "jobstats_ost_sync",
},
{
inProc: "destroy",
field: 3,
reportAs: "jobstats_destroy",
},
{
inProc: "create",
field: 3,
reportAs: "jobstats_create",
},
{
inProc: "statfs",
field: 3,
reportAs: "jobstats_ost_statfs",
},
{
inProc: "get_info",
field: 3,
reportAs: "jobstats_get_info",
},
{
inProc: "set_info",
field: 3,
reportAs: "jobstats_set_info",
},
{
inProc: "quotactl",
field: 3,
reportAs: "jobstats_quotactl",
},
}
var wanted_mds_fields = []*mapping{
{
inProc: "open",
@@ -133,6 +270,89 @@ var wanted_mds_fields = []*mapping{
},
}
var wanted_mdt_jobstats_fields = []*mapping{
{
inProc: "open",
field: 3,
reportAs: "jobstats_open",
},
{
inProc: "close",
field: 3,
reportAs: "jobstats_close",
},
{
inProc: "mknod",
field: 3,
reportAs: "jobstats_mknod",
},
{
inProc: "link",
field: 3,
reportAs: "jobstats_link",
},
{
inProc: "unlink",
field: 3,
reportAs: "jobstats_unlink",
},
{
inProc: "mkdir",
field: 3,
reportAs: "jobstats_mkdir",
},
{
inProc: "rmdir",
field: 3,
reportAs: "jobstats_rmdir",
},
{
inProc: "rename",
field: 3,
reportAs: "jobstats_rename",
},
{
inProc: "getattr",
field: 3,
reportAs: "jobstats_getattr",
},
{
inProc: "setattr",
field: 3,
reportAs: "jobstats_setattr",
},
{
inProc: "getxattr",
field: 3,
reportAs: "jobstats_getxattr",
},
{
inProc: "setxattr",
field: 3,
reportAs: "jobstats_setxattr",
},
{
inProc: "statfs",
field: 3,
reportAs: "jobstats_statfs",
},
{
inProc: "sync",
field: 3,
reportAs: "jobstats_sync",
},
{
inProc: "samedir_rename",
field: 3,
reportAs: "jobstats_samedir_rename",
},
{
inProc: "crossdir_rename",
field: 3,
reportAs: "jobstats_crossdir_rename",
},
}
func (l *Lustre2) GetLustreProcStats(fileglob string, wanted_fields []*mapping, acc telegraf.Accumulator) error {
files, err := filepath.Glob(fileglob)
if err != nil {
@@ -143,7 +363,7 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wanted_fields []*mapping,
/* Turn /proc/fs/lustre/obdfilter/<ost_name>/stats and similar
* into just the object store target name
* Assumpion: the target name is always second to last,
* which is true in Lustre 2.1->2.5
* which is true in Lustre 2.1->2.8
*/
path := strings.Split(file, "/")
name := path[len(path)-2]
@@ -161,16 +381,21 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wanted_fields []*mapping,
for _, line := range lines {
parts := strings.Fields(line)
if strings.HasPrefix(line, "- job_id:") {
// Set the job_id explicitly if present
fields["jobid"] = parts[2]
}
for _, wanted := range wanted_fields {
var data uint64
if parts[0] == wanted.inProc {
if strings.TrimSuffix(parts[0], ":") == wanted.inProc {
wanted_field := wanted.field
// if not set, assume field[1]. Shouldn't be field[0], as
// that's a string
if wanted_field == 0 {
wanted_field = 1
}
data, err = strconv.ParseUint((parts[wanted_field]), 10, 64)
data, err = strconv.ParseUint(strings.TrimSuffix((parts[wanted_field]), ","), 10, 64)
if err != nil {
return err
}
@@ -213,6 +438,12 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
if err != nil {
return err
}
// per job statistics are in obdfilter/<ost_name>/job_stats
err = l.GetLustreProcStats("/proc/fs/lustre/obdfilter/*/job_stats",
wanted_ost_jobstats_fields, acc)
if err != nil {
return err
}
}
if len(l.Mds_procfiles) == 0 {
@@ -222,16 +453,31 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
if err != nil {
return err
}
// Metadata target job stats
err = l.GetLustreProcStats("/proc/fs/lustre/mdt/*/job_stats",
wanted_mdt_jobstats_fields, acc)
if err != nil {
return err
}
}
for _, procfile := range l.Ost_procfiles {
err := l.GetLustreProcStats(procfile, wanted_ost_fields, acc)
ost_fields := wanted_ost_fields
if strings.HasSuffix(procfile, "job_stats") {
ost_fields = wanted_ost_jobstats_fields
}
err := l.GetLustreProcStats(procfile, ost_fields, acc)
if err != nil {
return err
}
}
for _, procfile := range l.Mds_procfiles {
err := l.GetLustreProcStats(procfile, wanted_mds_fields, acc)
mdt_fields := wanted_mds_fields
if strings.HasSuffix(procfile, "job_stats") {
mdt_fields = wanted_mdt_jobstats_fields
}
err := l.GetLustreProcStats(procfile, mdt_fields, acc)
if err != nil {
return err
}
@@ -241,6 +487,12 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
tags := map[string]string{
"name": name,
}
if _, ok := fields["jobid"]; ok {
if jobid, ok := fields["jobid"].(string); ok {
tags["jobid"] = jobid
}
delete(fields, "jobid")
}
acc.AddFields("lustre2", fields, tags)
}

View File

@@ -38,6 +38,23 @@ cache_hit 7393729777 samples [pages] 1 1 7393729777
cache_miss 11653333250 samples [pages] 1 1 11653333250
`
const obdfilterJobStatsContents = `job_stats:
- job_id: testjob1
snapshot_time: 1461772761
read_bytes: { samples: 1, unit: bytes, min: 4096, max: 4096, sum: 4096 }
write_bytes: { samples: 25, unit: bytes, min: 1048576, max: 1048576, sum: 26214400 }
getattr: { samples: 0, unit: reqs }
setattr: { samples: 0, unit: reqs }
punch: { samples: 1, unit: reqs }
sync: { samples: 0, unit: reqs }
destroy: { samples: 0, unit: reqs }
create: { samples: 0, unit: reqs }
statfs: { samples: 0, unit: reqs }
get_info: { samples: 0, unit: reqs }
set_info: { samples: 0, unit: reqs }
quotactl: { samples: 0, unit: reqs }
`
const mdtProcContents = `snapshot_time 1438693238.20113 secs.usecs
open 1024577037 samples [reqs]
close 873243496 samples [reqs]
@@ -57,6 +74,27 @@ samedir_rename 259625 samples [reqs]
crossdir_rename 369571 samples [reqs]
`
const mdtJobStatsContents = `job_stats:
- job_id: testjob1
snapshot_time: 1461772761
open: { samples: 5, unit: reqs }
close: { samples: 4, unit: reqs }
mknod: { samples: 6, unit: reqs }
link: { samples: 8, unit: reqs }
unlink: { samples: 90, unit: reqs }
mkdir: { samples: 521, unit: reqs }
rmdir: { samples: 520, unit: reqs }
rename: { samples: 9, unit: reqs }
getattr: { samples: 11, unit: reqs }
setattr: { samples: 1, unit: reqs }
getxattr: { samples: 3, unit: reqs }
setxattr: { samples: 4, unit: reqs }
statfs: { samples: 1205, unit: reqs }
sync: { samples: 2, unit: reqs }
samedir_rename: { samples: 705, unit: reqs }
crossdir_rename: { samples: 200, unit: reqs }
`
func TestLustre2GeneratesMetrics(t *testing.T) {
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
@@ -83,6 +121,7 @@ func TestLustre2GeneratesMetrics(t *testing.T) {
err = ioutil.WriteFile(obddir+"/"+ost_name+"/stats", []byte(obdfilterProcContents), 0644)
require.NoError(t, err)
// Begin by testing standard Lustre stats
m := &Lustre2{
Ost_procfiles: []string{obddir + "/*/stats", osddir + "/*/stats"},
Mds_procfiles: []string{mdtdir + "/*/md_stats"},
@@ -128,3 +167,82 @@ func TestLustre2GeneratesMetrics(t *testing.T) {
err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}
func TestLustre2GeneratesJobstatsMetrics(t *testing.T) {
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
ost_name := "OST0001"
job_name := "testjob1"
mdtdir := tempdir + "/mdt/"
err := os.MkdirAll(mdtdir+"/"+ost_name, 0755)
require.NoError(t, err)
obddir := tempdir + "/obdfilter/"
err = os.MkdirAll(obddir+"/"+ost_name, 0755)
require.NoError(t, err)
err = ioutil.WriteFile(mdtdir+"/"+ost_name+"/job_stats", []byte(mdtJobStatsContents), 0644)
require.NoError(t, err)
err = ioutil.WriteFile(obddir+"/"+ost_name+"/job_stats", []byte(obdfilterJobStatsContents), 0644)
require.NoError(t, err)
// Test Lustre Jobstats
m := &Lustre2{
Ost_procfiles: []string{obddir + "/*/job_stats"},
Mds_procfiles: []string{mdtdir + "/*/job_stats"},
}
var acc testutil.Accumulator
err = m.Gather(&acc)
require.NoError(t, err)
tags := map[string]string{
"name": ost_name,
"jobid": job_name,
}
fields := map[string]interface{}{
"jobstats_read_calls": uint64(1),
"jobstats_read_min_size": uint64(4096),
"jobstats_read_max_size": uint64(4096),
"jobstats_read_bytes": uint64(4096),
"jobstats_write_calls": uint64(25),
"jobstats_write_min_size": uint64(1048576),
"jobstats_write_max_size": uint64(1048576),
"jobstats_write_bytes": uint64(26214400),
"jobstats_ost_getattr": uint64(0),
"jobstats_ost_setattr": uint64(0),
"jobstats_punch": uint64(1),
"jobstats_ost_sync": uint64(0),
"jobstats_destroy": uint64(0),
"jobstats_create": uint64(0),
"jobstats_ost_statfs": uint64(0),
"jobstats_get_info": uint64(0),
"jobstats_set_info": uint64(0),
"jobstats_quotactl": uint64(0),
"jobstats_open": uint64(5),
"jobstats_close": uint64(4),
"jobstats_mknod": uint64(6),
"jobstats_link": uint64(8),
"jobstats_unlink": uint64(90),
"jobstats_mkdir": uint64(521),
"jobstats_rmdir": uint64(520),
"jobstats_rename": uint64(9),
"jobstats_getattr": uint64(11),
"jobstats_setattr": uint64(1),
"jobstats_getxattr": uint64(3),
"jobstats_setxattr": uint64(4),
"jobstats_statfs": uint64(1205),
"jobstats_sync": uint64(2),
"jobstats_samedir_rename": uint64(705),
"jobstats_crossdir_rename": uint64(200),
}
acc.AssertContainsTaggedFields(t, "lustre2", fields, tags)
err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}

View File

@@ -0,0 +1,54 @@
# Telegraf plugin: MongoDB
#### Configuration
```toml
[[inputs.mongodb]]
## An array of URI to gather stats about. Specify an ip or hostname
## with optional port add password. ie,
## mongodb://user:auth_key@10.10.3.30:27017,
## mongodb://10.10.3.33:18832,
## 10.0.0.1:10000, etc.
servers = ["127.0.0.1:27017"]
```
For authenticated mongodb istances use connection mongdb connection URI
```toml
[[inputs.mongodb]]
servers = ["mongodb://username:password@10.XX.XX.XX:27101/mydatabase?authSource=admin"]
```
This connection uri may be different based on your environement and mongodb
setup. If the user doesn't have the required privilege to execute serverStatus
command the you will get this error on telegraf
```
Error in input [mongodb]: not authorized on admin to execute command { serverStatus: 1, recordStats: 0 }
```
#### Description
The telegraf plugin collects mongodb stats exposed by serverStatus and few more
and create a single measurement containing values e.g.
* active_reads
* active_writes
* commands_per_sec
* deletes_per_sec
* flushes_per_sec
* getmores_per_sec
* inserts_per_sec
* net_in_bytes
* net_out_bytes
* open_connections
* percent_cache_dirty
* percent_cache_used
* queries_per_sec
* queued_reads
* queued_writes
* resident_megabytes
* updates_per_sec
* vsize_megabytes
* ttl_deletes_per_sec
* ttl_passes_per_sec
* repl_lag
* jumbo_chunks (only if mongos or mongo config)

View File

@@ -26,22 +26,24 @@ func NewMongodbData(statLine *StatLine, tags map[string]string) *MongodbData {
}
var DefaultStats = map[string]string{
"inserts_per_sec": "Insert",
"queries_per_sec": "Query",
"updates_per_sec": "Update",
"deletes_per_sec": "Delete",
"getmores_per_sec": "GetMore",
"commands_per_sec": "Command",
"flushes_per_sec": "Flushes",
"vsize_megabytes": "Virtual",
"resident_megabytes": "Resident",
"queued_reads": "QueuedReaders",
"queued_writes": "QueuedWriters",
"active_reads": "ActiveReaders",
"active_writes": "ActiveWriters",
"net_in_bytes": "NetIn",
"net_out_bytes": "NetOut",
"open_connections": "NumConnections",
"inserts_per_sec": "Insert",
"queries_per_sec": "Query",
"updates_per_sec": "Update",
"deletes_per_sec": "Delete",
"getmores_per_sec": "GetMore",
"commands_per_sec": "Command",
"flushes_per_sec": "Flushes",
"vsize_megabytes": "Virtual",
"resident_megabytes": "Resident",
"queued_reads": "QueuedReaders",
"queued_writes": "QueuedWriters",
"active_reads": "ActiveReaders",
"active_writes": "ActiveWriters",
"net_in_bytes": "NetIn",
"net_out_bytes": "NetOut",
"open_connections": "NumConnections",
"ttl_deletes_per_sec": "DeletedDocuments",
"ttl_passes_per_sec": "Passes",
}
var DefaultReplStats = map[string]string{
@@ -52,6 +54,11 @@ var DefaultReplStats = map[string]string{
"repl_getmores_per_sec": "GetMoreR",
"repl_commands_per_sec": "CommandR",
"member_status": "NodeType",
"repl_lag": "ReplLag",
}
var DefaultClusterStats = map[string]string{
"jumbo_chunks": "JumboChunksCount",
}
var MmapStats = map[string]string{
@@ -71,6 +78,7 @@ func (d *MongodbData) AddDefaultStats() {
if d.StatLine.NodeType != "" {
d.addStat(statLine, DefaultReplStats)
}
d.addStat(statLine, DefaultClusterStats)
if d.StatLine.StorageEngine == "mmapv1" {
d.addStat(statLine, MmapStats)
} else if d.StatLine.StorageEngine == "wiredTiger" {

View File

@@ -13,24 +13,26 @@ var tags = make(map[string]string)
func TestAddNonReplStats(t *testing.T) {
d := NewMongodbData(
&StatLine{
StorageEngine: "",
Time: time.Now(),
Insert: 0,
Query: 0,
Update: 0,
Delete: 0,
GetMore: 0,
Command: 0,
Flushes: 0,
Virtual: 0,
Resident: 0,
QueuedReaders: 0,
QueuedWriters: 0,
ActiveReaders: 0,
ActiveWriters: 0,
NetIn: 0,
NetOut: 0,
NumConnections: 0,
StorageEngine: "",
Time: time.Now(),
Insert: 0,
Query: 0,
Update: 0,
Delete: 0,
GetMore: 0,
Command: 0,
Flushes: 0,
Virtual: 0,
Resident: 0,
QueuedReaders: 0,
QueuedWriters: 0,
ActiveReaders: 0,
ActiveWriters: 0,
NetIn: 0,
NetOut: 0,
NumConnections: 0,
Passes: 0,
DeletedDocuments: 0,
},
tags,
)
@@ -125,9 +127,13 @@ func TestStateTag(t *testing.T) {
"repl_inserts_per_sec": int64(0),
"repl_queries_per_sec": int64(0),
"repl_updates_per_sec": int64(0),
"repl_lag": int64(0),
"resident_megabytes": int64(0),
"updates_per_sec": int64(0),
"vsize_megabytes": int64(0),
"ttl_deletes_per_sec": int64(0),
"ttl_passes_per_sec": int64(0),
"jumbo_chunks": int64(0),
}
acc.AssertContainsTaggedFields(t, "mongodb", fields, stateTags)
}

View File

@@ -1,6 +1,7 @@
package mongodb
import (
"log"
"net/url"
"time"
@@ -12,7 +13,7 @@ import (
type Server struct {
Url *url.URL
Session *mgo.Session
lastResult *ServerStatus
lastResult *MongoStatus
}
func (s *Server) getDefaultTags() map[string]string {
@@ -24,11 +25,29 @@ func (s *Server) getDefaultTags() map[string]string {
func (s *Server) gatherData(acc telegraf.Accumulator) error {
s.Session.SetMode(mgo.Eventual, true)
s.Session.SetSocketTimeout(0)
result := &ServerStatus{}
err := s.Session.DB("admin").Run(bson.D{{"serverStatus", 1}, {"recordStats", 0}}, result)
result_server := &ServerStatus{}
err := s.Session.DB("admin").Run(bson.D{{"serverStatus", 1}, {"recordStats", 0}}, result_server)
if err != nil {
return err
}
result_repl := &ReplSetStatus{}
err = s.Session.DB("admin").Run(bson.D{{"replSetGetStatus", 1}}, result_repl)
if err != nil {
log.Println("Not gathering replica set status, member not in replica set")
}
jumbo_chunks, _ := s.Session.DB("config").C("chunks").Find(bson.M{"jumbo": true}).Count()
result_cluster := &ClusterStatus{
JumboChunksCount: int64(jumbo_chunks),
}
result := &MongoStatus{
ServerStatus: result_server,
ReplSetStatus: result_repl,
ClusterStatus: result_cluster,
}
defer func() {
s.lastResult = result
}()

View File

@@ -11,6 +11,8 @@ import (
"sort"
"strings"
"time"
"gopkg.in/mgo.v2/bson"
)
const (
@@ -28,8 +30,14 @@ const (
WTOnly // only active if node has wiredtiger-specific fields
)
type MongoStatus struct {
SampleTime time.Time
ServerStatus *ServerStatus
ReplSetStatus *ReplSetStatus
ClusterStatus *ClusterStatus
}
type ServerStatus struct {
SampleTime time.Time `bson:""`
Host string `bson:"host"`
Version string `bson:"version"`
Process string `bson:"process"`
@@ -54,6 +62,25 @@ type ServerStatus struct {
ShardCursorType map[string]interface{} `bson:"shardCursorType"`
StorageEngine map[string]string `bson:"storageEngine"`
WiredTiger *WiredTiger `bson:"wiredTiger"`
Metrics *MetricsStats `bson:"metrics"`
}
// ClusterStatus stores information related to the whole cluster
type ClusterStatus struct {
JumboChunksCount int64
}
// ReplSetStatus stores information from replSetGetStatus
type ReplSetStatus struct {
Members []ReplSetMember `bson:"members"`
MyState int64 `bson:"myState"`
}
// ReplSetMember stores information related to a replica set member
type ReplSetMember struct {
Name string `bson:"name"`
State int64 `bson:"state"`
Optime *bson.MongoTimestamp `bson:"optime"`
}
// WiredTiger stores information related to the WiredTiger storage engine.
@@ -194,6 +221,17 @@ type OpcountStats struct {
Command int64 `bson:"command"`
}
// MetricsStats stores information related to metrics
type MetricsStats struct {
TTL *TTLStats `bson:"ttl"`
}
// TTLStats stores information related to documents with a ttl index.
type TTLStats struct {
DeletedDocuments int64 `bson:"deletedDocuments"`
Passes int64 `bson:"passes"`
}
// ReadWriteLockTimes stores time spent holding read/write locks.
type ReadWriteLockTimes struct {
Read int64 `bson:"R"`
@@ -332,6 +370,9 @@ type StatLine struct {
// Opcounter fields
Insert, Query, Update, Delete, GetMore, Command int64
// TTL fields
Passes, DeletedDocuments int64
// Collection locks (3.0 mmap only)
CollectionLocks *CollectionLockStatus
@@ -341,6 +382,7 @@ type StatLine struct {
// Replicated Opcounter fields
InsertR, QueryR, UpdateR, DeleteR, GetMoreR, CommandR int64
ReplLag int64
Flushes int64
Mapped, Virtual, Resident, NonMapped int64
Faults int64
@@ -351,6 +393,9 @@ type StatLine struct {
NumConnections int64
ReplSetName string
NodeType string
// Cluster fields
JumboChunksCount int64
}
func parseLocks(stat ServerStatus) map[string]LockUsage {
@@ -395,8 +440,11 @@ func diff(newVal, oldVal, sampleTime int64) int64 {
return d / sampleTime
}
// NewStatLine constructs a StatLine object from two ServerStatus objects.
func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs int64) *StatLine {
// NewStatLine constructs a StatLine object from two MongoStatus objects.
func NewStatLine(oldMongo, newMongo MongoStatus, key string, all bool, sampleSecs int64) *StatLine {
oldStat := *oldMongo.ServerStatus
newStat := *newMongo.ServerStatus
returnVal := &StatLine{
Key: key,
Host: newStat.Host,
@@ -423,6 +471,11 @@ func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs
returnVal.Command = diff(newStat.Opcounters.Command, oldStat.Opcounters.Command, sampleSecs)
}
if newStat.Metrics.TTL != nil && oldStat.Metrics.TTL != nil {
returnVal.Passes = diff(newStat.Metrics.TTL.Passes, oldStat.Metrics.TTL.Passes, sampleSecs)
returnVal.DeletedDocuments = diff(newStat.Metrics.TTL.DeletedDocuments, oldStat.Metrics.TTL.DeletedDocuments, sampleSecs)
}
if newStat.OpcountersRepl != nil && oldStat.OpcountersRepl != nil {
returnVal.InsertR = diff(newStat.OpcountersRepl.Insert, oldStat.OpcountersRepl.Insert, sampleSecs)
returnVal.QueryR = diff(newStat.OpcountersRepl.Query, oldStat.OpcountersRepl.Query, sampleSecs)
@@ -442,7 +495,7 @@ func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs
returnVal.Flushes = newStat.BackgroundFlushing.Flushes - oldStat.BackgroundFlushing.Flushes
}
returnVal.Time = newStat.SampleTime
returnVal.Time = newMongo.SampleTime
returnVal.IsMongos =
(newStat.ShardCursorType != nil || strings.HasPrefix(newStat.Process, MongosProcess))
@@ -587,5 +640,42 @@ func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs
returnVal.NumConnections = newStat.Connections.Current
}
newReplStat := *newMongo.ReplSetStatus
if newReplStat.Members != nil {
myName := newStat.Repl.Me
// Find the master and myself
master := ReplSetMember{}
me := ReplSetMember{}
for _, member := range newReplStat.Members {
if member.Name == myName {
if member.State == 1 {
// I'm the master
returnVal.ReplLag = 0
break
} else {
// I'm secondary
me = member
}
} else if member.State == 1 {
// Master found
master = member
}
}
if me.Optime != nil && master.Optime != nil && me.State == 2 {
// MongoTimestamp type is int64 where the first 32bits are the unix timestamp
lag := int64(*master.Optime>>32 - *me.Optime>>32)
if lag < 0 {
returnVal.ReplLag = 0
} else {
returnVal.ReplLag = lag
}
}
}
newClusterStat := *newMongo.ClusterStatus
returnVal.JumboChunksCount = newClusterStat.JumboChunksCount
return returnVal
}

View File

@@ -35,7 +35,7 @@ The plugin expects messages in the
## Use SSL but skip chain & host verification
# insecure_skip_verify = false
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md

View File

@@ -3,6 +3,7 @@ package mqtt_consumer
import (
"fmt"
"log"
"strings"
"sync"
"time"
@@ -46,6 +47,8 @@ type MQTTConsumer struct {
// keep the accumulator internally:
acc telegraf.Accumulator
started bool
}
var sampleConfig = `
@@ -100,6 +103,7 @@ func (m *MQTTConsumer) SetParser(parser parsers.Parser) {
func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
m.Lock()
defer m.Unlock()
m.started = false
if m.PersistentSession && m.ClientID == "" {
return fmt.Errorf("ERROR MQTT Consumer: When using persistent_session" +
@@ -124,20 +128,32 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
m.in = make(chan mqtt.Message, 1000)
m.done = make(chan struct{})
topics := make(map[string]byte)
for _, topic := range m.Topics {
topics[topic] = byte(m.QoS)
}
subscribeToken := m.client.SubscribeMultiple(topics, m.recvMessage)
subscribeToken.Wait()
if subscribeToken.Error() != nil {
return subscribeToken.Error()
}
go m.receiver()
return nil
}
func (m *MQTTConsumer) onConnect(c mqtt.Client) {
log.Printf("MQTT Client Connected")
if !m.PersistentSession || !m.started {
topics := make(map[string]byte)
for _, topic := range m.Topics {
topics[topic] = byte(m.QoS)
}
subscribeToken := c.SubscribeMultiple(topics, m.recvMessage)
subscribeToken.Wait()
if subscribeToken.Error() != nil {
log.Printf("MQTT SUBSCRIBE ERROR\ntopics: %s\nerror: %s",
strings.Join(m.Topics[:], ","), subscribeToken.Error())
}
m.started = true
}
return
}
func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) {
log.Printf("MQTT Connection lost\nerror: %s\nMQTT Client will try to reconnect", err.Error())
return
}
// receiver() reads all incoming messages from the consumer, and parses them into
// influxdb metric points.
@@ -172,6 +188,7 @@ func (m *MQTTConsumer) Stop() {
defer m.Unlock()
close(m.done)
m.client.Disconnect(200)
m.started = false
}
func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error {
@@ -219,6 +236,8 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
opts.SetAutoReconnect(true)
opts.SetKeepAlive(time.Second * 60)
opts.SetCleanSession(!m.PersistentSession)
opts.SetOnConnectHandler(m.onConnect)
opts.SetConnectionLostHandler(m.onConnectionLost)
return opts, nil
}

View File

@@ -0,0 +1,195 @@
# MySQL Input plugin
This plugin gathers the statistic data from MySQL server
* Global statuses
* Global variables
* Slave statuses
* Binlog size
* Process list
* Info schema auto increment columns
* Table I/O waits
* Index I/O waits
* Perf Schema table lock waits
* Perf Schema event waits
* Perf Schema events statements
* File events statistics
* Table schema statistics
## Configuration
```
# Read metrics from one or many mysql servers
[[inputs.mysql]]
## specify servers via a url matching:
## [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify]]
## see https://github.com/go-sql-driver/mysql#dsn-data-source-name
## e.g.
## root:passwd@tcp(127.0.0.1:3306)/?tls=false
## root@tcp(127.0.0.1:3306)/?tls=false
#
## If no servers are specified, then localhost is used as the host.
servers = ["tcp(127.0.0.1:3306)/"]
## the limits for metrics form perf_events_statements
perf_events_statements_digest_text_limit = 120
perf_events_statements_limit = 250
perf_events_statements_time_limit = 86400
#
## if the list is empty, then metrics are gathered from all database tables
table_schema_databases = []
#
## gather metrics from INFORMATION_SCHEMA.TABLES for databases provided above list
gather_table_schema = false
#
## gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST
gather_process_list = true
#
## gather auto_increment columns and max values from information schema
gather_info_schema_auto_inc = true
#
## gather metrics from SHOW SLAVE STATUS command output
gather_slave_status = true
#
## gather metrics from SHOW BINARY LOGS command output
gather_binary_logs = false
#
## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_TABLE
gather_table_io_waits = false
#
## gather metrics from PERFORMANCE_SCHEMA.TABLE_LOCK_WAITS
gather_table_lock_waits = false
#
## gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMART_BY_INDEX_USAGE
gather_index_io_waits = false
#
## gather metrics from PERFORMANCE_SCHEMA.EVENT_WAITS
gather_event_waits = false
#
## gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME
gather_file_events_stats = false
#
## gather metrics from PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
gather_perf_events_statements = false
#
## Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES)
interval_slow = "30m"
```
## Measurements & Fields
* Global statuses - all numeric and boolean values of `SHOW GLOBAL STATUSES`
* Global variables - all numeric and boolean values of `SHOW GLOBAL VARIABLES`
* Slave status - metrics from `SHOW SLAVE STATUS` the metrics are gathered when
the single-source replication is on. If the multi-source replication is set,
then everything works differently, this metric does not work with multi-source
replication.
* slave_[column name]()
* Binary logs - all metrics including size and count of all binary files.
Requires to be turned on in configuration.
* binary_size_bytes(int, number)
* binary_files_count(int, number)
* Process list - connection metrics from processlist for each user. It has the following tags
* connections(int, number)
* Perf Table IO waits - total count and time of I/O waits event for each table
and process. It has following fields:
* table_io_waits_total_fetch(float, number)
* table_io_waits_total_insert(float, number)
* table_io_waits_total_update(float, number)
* table_io_waits_total_delete(float, number)
* table_io_waits_seconds_total_fetch(float, milliseconds)
* table_io_waits_seconds_total_insert(float, milliseconds)
* table_io_waits_seconds_total_update(float, milliseconds)
* table_io_waits_seconds_total_delete(float, milliseconds)
* Perf index IO waits - total count and time of I/O waits event for each index
and process. It has following fields:
* index_io_waits_total_fetch(float, number)
* index_io_waits_seconds_total_fetch(float, milliseconds)
* index_io_waits_total_insert(float, number)
* index_io_waits_total_update(float, number)
* index_io_waits_total_delete(float, number)
* index_io_waits_seconds_total_insert(float, milliseconds)
* index_io_waits_seconds_total_update(float, milliseconds)
* index_io_waits_seconds_total_delete(float, milliseconds)
* Info schema autoincrement statuses - autoincrement fields and max values
for them. It has following fields:
* auto_increment_column(int, number)
* auto_increment_column_max(int, number)
* Perf table lock waits - gathers total number and time for SQL and external
lock waits events for each table and operation. It has following fields.
The unit of fields varies by the tags.
* read_normal(float, number/milliseconds)
* read_with_shared_locks(float, number/milliseconds)
* read_high_priority(float, number/milliseconds)
* read_no_insert(float, number/milliseconds)
* write_normal(float, number/milliseconds)
* write_allow_write(float, number/milliseconds)
* write_concurrent_insert(float, number/milliseconds)
* write_low_priority(float, number/milliseconds)
* read(float, number/milliseconds)
* write(float, number/milliseconds)
* Perf events waits - gathers total time and number of event waits
* events_waits_total(float, number)
* events_waits_seconds_total(float, milliseconds)
* Perf file events statuses - gathers file events statuses
* file_events_total(float,number)
* file_events_seconds_total(float, milliseconds)
* file_events_bytes_total(float, bytes)
* Perf file events statements - gathers attributes of each event
* events_statements_total(float, number)
* events_statements_seconds_total(float, millieconds)
* events_statements_errors_total(float, number)
* events_statements_warnings_total(float, number)
* events_statements_rows_affected_total(float, number)
* events_statements_rows_sent_total(float, number)
* events_statements_rows_examined_total(float, number)
* events_statements_tmp_tables_total(float, number)
* events_statements_tmp_disk_tables_total(float, number)
* events_statements_sort_merge_passes_totales(float, number)
* events_statements_sort_rows_total(float, number)
* events_statements_no_index_used_total(float, number)
* Table schema - gathers statistics of each schema. It has following measurements
* info_schema_table_rows(float, number)
* info_schema_table_size_data_length(float, number)
* info_schema_table_size_index_length(float, number)
* info_schema_table_size_data_free(float, number)
* info_schema_table_version(float, number)
## Tags
* All measurements has following tags
* server (the host name from which the metrics are gathered)
* Process list measurement has following tags
* user (username for whom the metrics are gathered)
* Perf table IO waits measurement has following tags
* schema
* name (object name for event or process)
* Perf index IO waits has following tags
* schema
* name
* index
* Info schema autoincrement statuses has following tags
* schema
* table
* column
* Perf table lock waits has following tags
* schema
* table
* sql_lock_waits_total(fields including this tag have numeric unit)
* external_lock_waits_total(fields including this tag have numeric unit)
* sql_lock_waits_seconds_total(fields including this tag have millisecond unit)
* external_lock_waits_seconds_total(fields including this tag have millisecond unit)
* Perf events statements has following tags
* event_name
* Perf file events statuses has following tags
* event_name
* mode
* Perf file events statements has following tags
* schema
* digest
* digest_text
* Table schema has following tags
* schema
* table
* component
* type
* engine
* row_format
* create_options

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,7 @@
package mysql
import (
"database/sql"
"fmt"
"testing"
@@ -115,3 +116,47 @@ func TestMysqlDNSAddTimeout(t *testing.T) {
}
}
}
func TestParseValue(t *testing.T) {
testCases := []struct {
rawByte sql.RawBytes
value float64
boolValue bool
}{
{sql.RawBytes("Yes"), 1, true},
{sql.RawBytes("No"), 0, false},
{sql.RawBytes("ON"), 1, true},
{sql.RawBytes("OFF"), 0, false},
{sql.RawBytes("ABC"), 0, false},
}
for _, cases := range testCases {
if value, ok := parseValue(cases.rawByte); value != cases.value && ok != cases.boolValue {
t.Errorf("want %d with %t, got %d with %t", int(cases.value), cases.boolValue, int(value), ok)
}
}
}
func TestNewNamespace(t *testing.T) {
testCases := []struct {
words []string
namespace string
}{
{
[]string{"thread", "info_scheme", "query update"},
"thread_info_scheme_query_update",
},
{
[]string{"thread", "info_scheme", "query_update"},
"thread_info_scheme_query_update",
},
{
[]string{"thread", "info", "scheme", "query", "update"},
"thread_info_scheme_query_update",
},
}
for _, cases := range testCases {
if got := newNamespace(cases.words...); got != cases.namespace {
t.Errorf("want %s, got %s", cases.namespace, got)
}
}
}

View File

@@ -23,7 +23,8 @@ from a NATS cluster in parallel.
## Maximum number of metrics to buffer between collection intervals
metric_buffer = 100000
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md

View File

@@ -184,6 +184,7 @@ func (g *phpfpm) gatherHttp(addr string, acc telegraf.Accumulator) error {
return fmt.Errorf("Unable to connect to phpfpm status page '%s': %v",
addr, err)
}
defer res.Body.Close()
if res.StatusCode != 200 {
return fmt.Errorf("Unable to get valid stat result from '%s': %v",

View File

@@ -9,15 +9,17 @@ import (
"strconv"
"strings"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
)
// HostPinger is a function that runs the "ping" function using a list of
// passed arguments. This can be easily switched with a mocked ping function
// for unit test purposes (see ping_test.go)
type HostPinger func(args ...string) (string, error)
type HostPinger func(timeout float64, args ...string) (string, error)
type Ping struct {
// Interval at which to ping (ping -i <INTERVAL>)
@@ -43,18 +45,18 @@ func (_ *Ping) Description() string {
return "Ping given url(s) and return statistics"
}
var sampleConfig = `
const sampleConfig = `
## NOTE: this plugin forks the ping command. You may need to set capabilities
## via setcap cap_net_raw+p /bin/ping
#
## urls to ping
urls = ["www.google.com"] # required
## number of pings to send (ping -c <COUNT>)
## number of pings to send per collection (ping -c <COUNT>)
count = 1 # required
## interval, in s, at which to ping. 0 == default (ping -i <PING_INTERVAL>)
ping_interval = 0.0
## ping timeout, in s. 0 == no timeout (ping -t <TIMEOUT>)
timeout = 0.0
## ping timeout, in s. 0 == no timeout (ping -W <TIMEOUT>)
timeout = 1.0
## interface to send ping from (ping -I <INTERFACE>)
interface = ""
`
@@ -71,16 +73,16 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error {
// Spin off a go routine for each url to ping
for _, url := range p.Urls {
wg.Add(1)
go func(url string, acc telegraf.Accumulator) {
go func(u string) {
defer wg.Done()
args := p.args(url)
out, err := p.pingHost(args...)
args := p.args(u)
out, err := p.pingHost(p.Timeout, args...)
if err != nil {
// Combine go err + stderr output
errorChannel <- errors.New(
strings.TrimSpace(out) + ", " + err.Error())
}
tags := map[string]string{"url": url}
tags := map[string]string{"url": u}
trans, rec, avg, err := processPingOutput(out)
if err != nil {
// fatal error
@@ -98,7 +100,7 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error {
fields["average_response_ms"] = avg
}
acc.AddFields("ping", fields, tags)
}(url, acc)
}(url)
}
wg.Wait()
@@ -116,13 +118,14 @@ func (p *Ping) Gather(acc telegraf.Accumulator) error {
return errors.New(strings.Join(errorStrings, "\n"))
}
func hostPinger(args ...string) (string, error) {
func hostPinger(timeout float64, args ...string) (string, error) {
bin, err := exec.LookPath("ping")
if err != nil {
return "", err
}
c := exec.Command(bin, args...)
out, err := c.CombinedOutput()
out, err := internal.CombinedOutputTimeout(c,
time.Second*time.Duration(timeout+1))
return string(out), err
}

View File

@@ -124,7 +124,7 @@ func TestArgs(t *testing.T) {
"Expected: %s Actual: %s", expected, actual)
}
func mockHostPinger(args ...string) (string, error) {
func mockHostPinger(timeout float64, args ...string) (string, error) {
return linuxPingOutput, nil
}
@@ -161,7 +161,7 @@ PING www.google.com (216.58.218.164) 56(84) bytes of data.
rtt min/avg/max/mdev = 35.225/44.033/51.806/5.325 ms
`
func mockLossyHostPinger(args ...string) (string, error) {
func mockLossyHostPinger(timeout float64, args ...string) (string, error) {
return lossyPingOutput, nil
}
@@ -192,7 +192,7 @@ Request timeout for icmp_seq 0
2 packets transmitted, 0 packets received, 100.0% packet loss
`
func mockErrorHostPinger(args ...string) (string, error) {
func mockErrorHostPinger(timeout float64, args ...string) (string, error) {
return errorPingOutput, errors.New("No packets received")
}
@@ -215,7 +215,7 @@ func TestBadPingGather(t *testing.T) {
acc.AssertContainsTaggedFields(t, "ping", fields, tags)
}
func mockFatalHostPinger(args ...string) (string, error) {
func mockFatalHostPinger(timeout float64, args ...string) (string, error) {
return fatalPingOutput, errors.New("So very bad")
}

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"database/sql"
"fmt"
"log"
"regexp"
"strings"
@@ -15,24 +16,27 @@ import (
type Postgresql struct {
Address string
Outputaddress string
Databases []string
OrderedColumns []string
AllColumns []string
AdditionalTags []string
sanitizedAddress string
Query []struct {
Sqlquery string
Version int
Withdbname bool
Tagvalue string
Sqlquery string
Version int
Withdbname bool
Tagvalue string
Measurement string
}
}
type query []struct {
Sqlquery string
Version int
Withdbname bool
Tagvalue string
Sqlquery string
Version int
Withdbname bool
Tagvalue string
Measurement string
}
var ignoredColumns = map[string]bool{"datid": true, "datname": true, "stats_reset": true}
@@ -55,6 +59,11 @@ var sampleConfig = `
## databases are gathered.
## databases = ["app_production", "testing"]
#
# outputaddress = "db01"
## A custom name for the database that will be used as the "server" tag in the
## measurement output. If not specified, a default one generated from
## the connection address is used.
#
## Define the toml config where the sql queries are stored
## New queries can be added, if the withdbname is set to true and there is no
## databases defined in the 'databases field', the sql query is ended by a
@@ -65,24 +74,28 @@ var sampleConfig = `
## because the databases variable was set to ['postgres', 'pgbench' ] and the
## withdbname was true. Be careful that if the withdbname is set to false you
## don't have to define the where clause (aka with the dbname) the tagvalue
## field is used to define custom tags (separated by comas)
## field is used to define custom tags (separated by commas)
## The optional "measurement" value can be used to override the default
## output measurement name ("postgresql").
#
## Structure :
## [[inputs.postgresql_extensible.query]]
## sqlquery string
## version string
## withdbname boolean
## tagvalue string (coma separated)
## tagvalue string (comma separated)
## measurement string
[[inputs.postgresql_extensible.query]]
sqlquery="SELECT * FROM pg_stat_database"
version=901
withdbname=false
tagvalue=""
measurement=""
[[inputs.postgresql_extensible.query]]
sqlquery="SELECT * FROM pg_stat_bgwriter"
version=901
withdbname=false
tagvalue=""
tagvalue="postgresql.stats"
`
func (p *Postgresql) SampleConfig() string {
@@ -106,6 +119,7 @@ func (p *Postgresql) Gather(acc telegraf.Accumulator) error {
var db_version int
var query string
var tag_value string
var meas_name string
if p.Address == "" || p.Address == "localhost" {
p.Address = localhost
@@ -131,6 +145,11 @@ func (p *Postgresql) Gather(acc telegraf.Accumulator) error {
for i := range p.Query {
sql_query = p.Query[i].Sqlquery
tag_value = p.Query[i].Tagvalue
if p.Query[i].Measurement != "" {
meas_name = p.Query[i].Measurement
} else {
meas_name = "postgresql"
}
if p.Query[i].Withdbname {
if len(p.Databases) != 0 {
@@ -170,7 +189,7 @@ func (p *Postgresql) Gather(acc telegraf.Accumulator) error {
}
for rows.Next() {
err = p.accRow(rows, acc)
err = p.accRow(meas_name, rows, acc)
if err != nil {
return err
}
@@ -184,9 +203,12 @@ type scanner interface {
Scan(dest ...interface{}) error
}
var passwordKVMatcher, _ = regexp.Compile("password=\\S+ ?")
var KVMatcher, _ = regexp.Compile("(password|sslcert|sslkey|sslmode|sslrootcert)=\\S+ ?")
func (p *Postgresql) SanitizedAddress() (_ string, err error) {
if p.Outputaddress != "" {
return p.Outputaddress, nil
}
var canonicalizedAddress string
if strings.HasPrefix(p.Address, "postgres://") || strings.HasPrefix(p.Address, "postgresql://") {
canonicalizedAddress, err = pq.ParseURL(p.Address)
@@ -196,12 +218,12 @@ func (p *Postgresql) SanitizedAddress() (_ string, err error) {
} else {
canonicalizedAddress = p.Address
}
p.sanitizedAddress = passwordKVMatcher.ReplaceAllString(canonicalizedAddress, "")
p.sanitizedAddress = KVMatcher.ReplaceAllString(canonicalizedAddress, "")
return p.sanitizedAddress, err
}
func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error {
func (p *Postgresql) accRow(meas_name string, row scanner, acc telegraf.Accumulator) error {
var columnVars []interface{}
var dbname bytes.Buffer
@@ -247,9 +269,11 @@ func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error {
var isATag int
fields := make(map[string]interface{})
for col, val := range columnMap {
if acc.Debug() {
log.Printf("postgresql_extensible: column: %s = %T: %s\n", col, *val, *val)
}
_, ignore := ignoredColumns[col]
//if !ignore && *val != "" {
if !ignore {
if !ignore && *val != nil {
isATag = 0
for tag := range p.AdditionalTags {
if col == p.AdditionalTags[tag] {
@@ -267,7 +291,7 @@ func (p *Postgresql) accRow(row scanner, acc telegraf.Accumulator) error {
}
}
}
acc.AddFields("postgresql", fields, tags)
acc.AddFields(meas_name, fields, tags)
return nil
}

View File

@@ -21,12 +21,16 @@ type Procstat struct {
Prefix string
User string
// pidmap maps a pid to a process object, so we don't recreate every gather
pidmap map[int32]*process.Process
// tagmap maps a pid to a map of tags for that pid
tagmap map[int32]map[string]string
}
func NewProcstat() *Procstat {
return &Procstat{
pidmap: make(map[int32]*process.Process),
tagmap: make(map[int32]map[string]string),
}
}
@@ -61,8 +65,8 @@ func (p *Procstat) Gather(acc telegraf.Accumulator) error {
log.Printf("Error: procstat getting process, exe: [%s] pidfile: [%s] pattern: [%s] user: [%s] %s",
p.Exe, p.PidFile, p.Pattern, p.User, err.Error())
} else {
for _, proc := range p.pidmap {
p := NewSpecProcessor(p.Prefix, acc, proc)
for pid, proc := range p.pidmap {
p := NewSpecProcessor(p.Prefix, acc, proc, p.tagmap[pid])
p.pushMetrics()
}
}
@@ -103,45 +107,50 @@ func (p *Procstat) getAllPids() ([]int32, error) {
var err error
if p.PidFile != "" {
pids, err = pidsFromFile(p.PidFile)
pids, err = p.pidsFromFile()
} else if p.Exe != "" {
pids, err = pidsFromExe(p.Exe)
pids, err = p.pidsFromExe()
} else if p.Pattern != "" {
pids, err = pidsFromPattern(p.Pattern)
pids, err = p.pidsFromPattern()
} else if p.User != "" {
pids, err = pidsFromUser(p.User)
pids, err = p.pidsFromUser()
} else {
err = fmt.Errorf("Either exe, pid_file or pattern has to be specified")
err = fmt.Errorf("Either exe, pid_file, user, or pattern has to be specified")
}
return pids, err
}
func pidsFromFile(file string) ([]int32, error) {
func (p *Procstat) pidsFromFile() ([]int32, error) {
var out []int32
var outerr error
pidString, err := ioutil.ReadFile(file)
pidString, err := ioutil.ReadFile(p.PidFile)
if err != nil {
outerr = fmt.Errorf("Failed to read pidfile '%s'. Error: '%s'", file, err)
outerr = fmt.Errorf("Failed to read pidfile '%s'. Error: '%s'",
p.PidFile, err)
} else {
pid, err := strconv.Atoi(strings.TrimSpace(string(pidString)))
if err != nil {
outerr = err
} else {
out = append(out, int32(pid))
p.tagmap[int32(pid)] = map[string]string{
"pidfile": p.PidFile,
"pid": strings.TrimSpace(string(pidString)),
}
}
}
return out, outerr
}
func pidsFromExe(exe string) ([]int32, error) {
func (p *Procstat) pidsFromExe() ([]int32, error) {
var out []int32
var outerr error
bin, err := exec.LookPath("pgrep")
if err != nil {
return out, fmt.Errorf("Couldn't find pgrep binary: %s", err)
}
pgrep, err := exec.Command(bin, exe).Output()
pgrep, err := exec.Command(bin, p.Exe).Output()
if err != nil {
return out, fmt.Errorf("Failed to execute %s. Error: '%s'", bin, err)
} else {
@@ -150,6 +159,10 @@ func pidsFromExe(exe string) ([]int32, error) {
ipid, err := strconv.Atoi(pid)
if err == nil {
out = append(out, int32(ipid))
p.tagmap[int32(ipid)] = map[string]string{
"exe": p.Exe,
"pid": pid,
}
} else {
outerr = err
}
@@ -158,14 +171,14 @@ func pidsFromExe(exe string) ([]int32, error) {
return out, outerr
}
func pidsFromPattern(pattern string) ([]int32, error) {
func (p *Procstat) pidsFromPattern() ([]int32, error) {
var out []int32
var outerr error
bin, err := exec.LookPath("pgrep")
if err != nil {
return out, fmt.Errorf("Couldn't find pgrep binary: %s", err)
}
pgrep, err := exec.Command(bin, "-f", pattern).Output()
pgrep, err := exec.Command(bin, "-f", p.Pattern).Output()
if err != nil {
return out, fmt.Errorf("Failed to execute %s. Error: '%s'", bin, err)
} else {
@@ -174,6 +187,10 @@ func pidsFromPattern(pattern string) ([]int32, error) {
ipid, err := strconv.Atoi(pid)
if err == nil {
out = append(out, int32(ipid))
p.tagmap[int32(ipid)] = map[string]string{
"pattern": p.Pattern,
"pid": pid,
}
} else {
outerr = err
}
@@ -182,14 +199,14 @@ func pidsFromPattern(pattern string) ([]int32, error) {
return out, outerr
}
func pidsFromUser(user string) ([]int32, error) {
func (p *Procstat) pidsFromUser() ([]int32, error) {
var out []int32
var outerr error
bin, err := exec.LookPath("pgrep")
if err != nil {
return out, fmt.Errorf("Couldn't find pgrep binary: %s", err)
}
pgrep, err := exec.Command(bin, "-u", user).Output()
pgrep, err := exec.Command(bin, "-u", p.User).Output()
if err != nil {
return out, fmt.Errorf("Failed to execute %s. Error: '%s'", bin, err)
} else {
@@ -198,6 +215,10 @@ func pidsFromUser(user string) ([]int32, error) {
ipid, err := strconv.Atoi(pid)
if err == nil {
out = append(out, int32(ipid))
p.tagmap[int32(ipid)] = map[string]string{
"user": p.User,
"pid": pid,
}
} else {
outerr = err
}

View File

@@ -25,6 +25,7 @@ func TestGather(t *testing.T) {
PidFile: file.Name(),
Prefix: "foo",
pidmap: make(map[int32]*process.Process),
tagmap: make(map[int32]map[string]string),
}
p.Gather(&acc)
assert.True(t, acc.HasFloatField("procstat", "foo_cpu_time_user"))

View File

@@ -1,7 +1,6 @@
package procstat
import (
"fmt"
"time"
"github.com/shirou/gopsutil/process"
@@ -17,28 +16,12 @@ type SpecProcessor struct {
proc *process.Process
}
func (p *SpecProcessor) add(metric string, value interface{}) {
var mname string
if p.Prefix == "" {
mname = metric
} else {
mname = p.Prefix + "_" + metric
}
p.fields[mname] = value
}
func (p *SpecProcessor) flush() {
p.acc.AddFields("procstat", p.fields, p.tags)
p.fields = make(map[string]interface{})
}
func NewSpecProcessor(
prefix string,
acc telegraf.Accumulator,
p *process.Process,
tags map[string]string,
) *SpecProcessor {
tags := make(map[string]string)
tags["pid"] = fmt.Sprintf("%v", p.Pid)
if name, err := p.Name(); err == nil {
tags["process_name"] = name
}
@@ -52,90 +35,62 @@ func NewSpecProcessor(
}
func (p *SpecProcessor) pushMetrics() {
p.pushNThreadsStats()
p.pushFDStats()
p.pushCtxStats()
p.pushIOStats()
p.pushCPUStats()
p.pushMemoryStats()
p.flush()
}
var prefix string
if p.Prefix != "" {
prefix = p.Prefix + "_"
}
fields := map[string]interface{}{}
func (p *SpecProcessor) pushNThreadsStats() error {
numThreads, err := p.proc.NumThreads()
if err != nil {
return fmt.Errorf("NumThreads error: %s\n", err)
if err == nil {
fields[prefix+"num_threads"] = numThreads
}
p.add("num_threads", numThreads)
return nil
}
func (p *SpecProcessor) pushFDStats() error {
fds, err := p.proc.NumFDs()
if err != nil {
return fmt.Errorf("NumFD error: %s\n", err)
if err == nil {
fields[prefix+"num_fds"] = fds
}
p.add("num_fds", fds)
return nil
}
func (p *SpecProcessor) pushCtxStats() error {
ctx, err := p.proc.NumCtxSwitches()
if err != nil {
return fmt.Errorf("ContextSwitch error: %s\n", err)
if err == nil {
fields[prefix+"voluntary_context_switches"] = ctx.Voluntary
fields[prefix+"involuntary_context_switches"] = ctx.Involuntary
}
p.add("voluntary_context_switches", ctx.Voluntary)
p.add("involuntary_context_switches", ctx.Involuntary)
return nil
}
func (p *SpecProcessor) pushIOStats() error {
io, err := p.proc.IOCounters()
if err != nil {
return fmt.Errorf("IOCounters error: %s\n", err)
if err == nil {
fields[prefix+"read_count"] = io.ReadCount
fields[prefix+"write_count"] = io.WriteCount
fields[prefix+"read_bytes"] = io.ReadBytes
fields[prefix+"write_bytes"] = io.WriteCount
}
p.add("read_count", io.ReadCount)
p.add("write_count", io.WriteCount)
p.add("read_bytes", io.ReadBytes)
p.add("write_bytes", io.WriteCount)
return nil
}
func (p *SpecProcessor) pushCPUStats() error {
cpu_time, err := p.proc.CPUTimes()
if err != nil {
return err
if err == nil {
fields[prefix+"cpu_time_user"] = cpu_time.User
fields[prefix+"cpu_time_system"] = cpu_time.System
fields[prefix+"cpu_time_idle"] = cpu_time.Idle
fields[prefix+"cpu_time_nice"] = cpu_time.Nice
fields[prefix+"cpu_time_iowait"] = cpu_time.Iowait
fields[prefix+"cpu_time_irq"] = cpu_time.Irq
fields[prefix+"cpu_time_soft_irq"] = cpu_time.Softirq
fields[prefix+"cpu_time_steal"] = cpu_time.Steal
fields[prefix+"cpu_time_stolen"] = cpu_time.Stolen
fields[prefix+"cpu_time_guest"] = cpu_time.Guest
fields[prefix+"cpu_time_guest_nice"] = cpu_time.GuestNice
}
p.add("cpu_time_user", cpu_time.User)
p.add("cpu_time_system", cpu_time.System)
p.add("cpu_time_idle", cpu_time.Idle)
p.add("cpu_time_nice", cpu_time.Nice)
p.add("cpu_time_iowait", cpu_time.Iowait)
p.add("cpu_time_irq", cpu_time.Irq)
p.add("cpu_time_soft_irq", cpu_time.Softirq)
p.add("cpu_time_steal", cpu_time.Steal)
p.add("cpu_time_stolen", cpu_time.Stolen)
p.add("cpu_time_guest", cpu_time.Guest)
p.add("cpu_time_guest_nice", cpu_time.GuestNice)
cpu_perc, err := p.proc.CPUPercent(time.Duration(0))
if err != nil {
return err
} else if cpu_perc == 0 {
return nil
if err == nil && cpu_perc != 0 {
fields[prefix+"cpu_usage"] = cpu_perc
}
p.add("cpu_usage", cpu_perc)
return nil
}
func (p *SpecProcessor) pushMemoryStats() error {
mem, err := p.proc.MemoryInfo()
if err != nil {
return err
if err == nil {
fields[prefix+"memory_rss"] = mem.RSS
fields[prefix+"memory_vms"] = mem.VMS
fields[prefix+"memory_swap"] = mem.Swap
}
p.add("memory_rss", mem.RSS)
p.add("memory_vms", mem.VMS)
p.add("memory_swap", mem.Swap)
return nil
p.acc.AddFields("procstat", fields, p.tags)
}

View File

@@ -22,7 +22,7 @@ to filter and some tags
# An array of urls to scrape metrics from.
urls = ["http://my-kube-apiserver:8080/metrics"]
# Get only metrics with "apiserver_" string is in metric name
namepass = ["apiserver_"]
namepass = ["apiserver_*"]
# Add a metric name prefix
name_prefix = "k8s_"
# Add tags to be able to make beautiful dashboards

View File

@@ -97,8 +97,8 @@ func (s *Server) getServerStatus() error {
func (s *Server) getDefaultTags() map[string]string {
tags := make(map[string]string)
tags["host"] = s.Url.Host
tags["hostname"] = s.serverStatus.Network.Hostname
tags["rethinkdb_host"] = s.Url.Host
tags["rethinkdb_hostname"] = s.serverStatus.Network.Hostname
return tags
}

View File

@@ -20,8 +20,8 @@ func TestGetDefaultTags(t *testing.T) {
in string
out string
}{
{"host", server.Url.Host},
{"hostname", server.serverStatus.Network.Hostname},
{"rethinkdb_host", server.Url.Host},
{"rethinkdb_hostname", server.serverStatus.Network.Hostname},
}
defaultTags := server.getDefaultTags()
for _, tt := range tagTests {

View File

@@ -398,15 +398,16 @@ func (s *Snmp) Gather(acc telegraf.Accumulator) error {
// only if len(s.OidInstanceMapping) == 0
if len(OidInstanceMapping) >= 0 {
if err := host.SNMPMap(acc, s.nameToOid, s.subTableMap); err != nil {
return err
log.Printf("SNMP Mapping error for host '%s': %s", host.Address, err)
continue
}
}
// Launch Get requests
if err := host.SNMPGet(acc, s.initNode); err != nil {
return err
log.Printf("SNMP Error for host '%s': %s", host.Address, err)
}
if err := host.SNMPBulk(acc, s.initNode); err != nil {
return err
log.Printf("SNMP Error for host '%s': %s", host.Address, err)
}
}
return nil
@@ -732,7 +733,11 @@ func (h *Host) HandleResponse(oids map[string]Data, result *gosnmp.SnmpPacket, a
break nextresult
}
}
if strings.HasPrefix(variable.Name, oid_key) {
// If variable.Name is the same as oid_key
// OR
// the result is SNMP table which "." comes right after oid_key.
// ex: oid_key: .1.3.6.1.2.1.2.2.1.16, variable.Name: .1.3.6.1.2.1.2.2.1.16.1
if variable.Name == oid_key || strings.HasPrefix(variable.Name, oid_key+".") {
switch variable.Type {
// handle Metrics
case gosnmp.Boolean, gosnmp.Integer, gosnmp.Counter32, gosnmp.Gauge32,

View File

@@ -5,7 +5,7 @@ import (
"github.com/influxdata/telegraf/testutil"
// "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@@ -45,7 +45,8 @@ func TestSNMPErrorGet2(t *testing.T) {
var acc testutil.Accumulator
err := s.Gather(&acc)
require.Error(t, err)
require.NoError(t, err)
assert.Equal(t, 0, len(acc.Metrics))
}
func TestSNMPErrorBulk(t *testing.T) {
@@ -65,7 +66,8 @@ func TestSNMPErrorBulk(t *testing.T) {
var acc testutil.Accumulator
err := s.Gather(&acc)
require.Error(t, err)
require.NoError(t, err)
assert.Equal(t, 0, len(acc.Metrics))
}
func TestSNMPGet1(t *testing.T) {

View File

@@ -1052,7 +1052,7 @@ SELECT
When 1073874176 Then IsNull(Cast(cc.cntr_value - pc.cntr_value as Money) / NullIf(cbc.cntr_value - pbc.cntr_value, 0), 0) -- Avg
When 272696320 Then IsNull(Cast(cc.cntr_value - pc.cntr_value as Money) / NullIf(cbc.cntr_value - pbc.cntr_value, 0), 0) -- Avg/sec
When 1073939712 Then cc.cntr_value - pc.cntr_value -- Base
Else cc.cntr_value End as int)
Else cc.cntr_value End as bigint)
--, currentvalue= CAST(cc.cntr_value as bigint)
FROM #CCounters cc
INNER JOIN #PCounters pc On cc.object_name = pc.object_name

View File

@@ -18,10 +18,10 @@
## Percentiles to calculate for timing & histogram stats
percentiles = [90]
## convert measurement names, "." to "_" and "-" to "__"
convert_names = true
## separator to use between elements of a statsd metric
metric_separator = "_"
## Parses tags in DataDog's dogstatsd format
## Parses tags in the datadog statsd format
## http://docs.datadoghq.com/guides/dogstatsd/
parse_data_dog_tags = false
@@ -39,10 +39,6 @@
## calculation of percentiles. Raising this limit increases the accuracy
## of percentiles but also increases the memory usage and cpu time.
percentile_limit = 1000
## UDP packet size for the server to listen for. This will depend on the size
## of the packets that the client is sending, which is usually 1500 bytes.
udp_packet_size = 1500
```
### Description

Some files were not shown because too many files have changed in this diff Show More