Compare commits

...

556 Commits

Author SHA1 Message Date
root
f2eea71c1f Merge branch 'master' into kb-procstat-x 2016-10-04 15:23:15 +03:00
Cameron Sparr
ce5054c850 Changelog update 2016-10-03 18:20:10 +01:00
Kostas Botsas
da9b719889 update changelog.md 2016-10-03 19:20:16 +03:00
Cameron Sparr
c7834209d2 Major Logging Overhaul
in this commit:

- centralize logging output handler.
- set global Info/Debug/Error log levels based on config file or flags.
- remove per-plugin debug arg handling.
- add a I!, D!, or E! to every log message.
- add configuration option to specify where to send logs.

closes #1786
2016-10-03 17:13:03 +01:00
root
07f57710df code formatting 2016-10-03 18:31:25 +03:00
root
dc84cce95c added -x option for pgrep in procstat 2016-09-30 18:48:17 +03:00
Cameron Sparr
78ced6bc30 Use a bufio.Scanner in http listener
this will prevent potential very large allocations due to a very large
chunk size send from a client.

fixes #1823
2016-09-29 16:07:51 +01:00
Cameron Sparr
ca8e512e5b Update changelog 2016-09-28 16:12:32 +01:00
zensqlmonitor
573628dbdd Fix collation issue 2016-09-28 16:11:00 +01:00
Peter Murray
e477620dc5 Making '-service' flags work from a non-interactive session, i.e. Ansible, related to #1760 2016-09-28 16:09:43 +01:00
Łukasz Harasimowicz
32268fb25b Disable mesos tasks statistics until we find a better way to deal with them.
Due to quite real problem of generating vast number of data series through
mesos tasks metrics this feature is disabled until better solution is found.
2016-09-28 16:07:35 +01:00
Łukasz Harasimowicz
80391bfe1f Fixed tags on mesos_task metrics.
Tagging values by executor_id can create quite a lot data series
in InfluxDB so we should stick to framework_id and server.
2016-09-28 16:07:35 +01:00
Cameron Sparr
e19845c202 Load config directory using filepath.Walk
closes #1137
2016-09-28 16:01:52 +01:00
Cameron Sparr
52134555d6 globpath: only walk tree if ** is defined
closes #1517
2016-09-28 15:44:29 +01:00
Cameron Sparr
e7e39df6a0 Default SNMP parameter changes
max-repetitions = 10 is the default of net-snmp utils according to
http://net-snmp.sourceforge.net/docs/man/snmpbulkwalk.html

retries = 3 is the default of gosnmp:
https://godoc.org/github.com/soniah/gosnmp#pkg-variables

Could deal with some parts of the performance issues reported
by #1665
2016-09-28 14:34:20 +01:00
Patrick Hemmer
055ef168ae add oid_index_suffix to snmp plugin 2016-09-27 11:30:25 +01:00
Patrick Hemmer
2778b7be30 add snmp conversions for MAC addresses & IPs 2016-09-27 11:30:25 +01:00
Patrick Hemmer
953db51b2c Adjust snmp translation to return conversion info.
Also consolidated the translation code to obtain all info with just 1 command execution.

Also split test command mocks out to their own file for cleanliness.
2016-09-27 11:30:25 +01:00
Cameron Sparr
c043461f6c Fix varnish plugin to use default values
closes #1752
2016-09-23 16:06:33 +01:00
Cameron Sparr
ddc07f9ef8 Fix powerdns integer parse error handling
closes #1751
2016-09-23 16:05:15 +01:00
lost_z
2cf1db0837 add mysql uptime (#1735) 2016-09-23 15:59:22 +01:00
Cameron Sparr
17e6496830 update changelog 2016-09-23 11:38:52 +01:00
Vinh Quốc Nguyễn
1d10eda84e Fix crash when allow pending messgae wasn't set (#1785)
The default is 0 so we hit a division by 0 error and crash. This checks
ensure we will not crash and `log` and continue to let telegraf run

Also we set default allow pending message number to 10000
2016-09-23 11:37:47 +01:00
Daniele Gozzi
9ea3dbeee8 Allow numeric and non-string values for tag_keys. (#1782)
* Allow numeric and non-string values for tag_keys.

According to the go documentation the JSON deserializer only produces these
base types in output:
- string
- bool
- float64
- nil
With this patch bool, float64 and nil values get converted to a string when
their field key is specified in tag_keys. Previously the field was simply
discarded.

* Updated handling of nil for passing tests.

The automated tests are less than trivial to reproduece locally for me,
so I hope CircleCI wonn't mind...

* Updated changelog entries with PR and issue links.
2016-09-21 18:07:35 +01:00
Rikaard Hosein
100501ba72 statsd input plugin correctly handles colons in data-dog tag values now (#1794)
* Code correctly handles colons in tag values now

* Modified existing datadog tag test to include a tag value containing a colon
2016-09-21 14:37:42 +01:00
Cameron Sparr
f12368698b Update etc/telegraf.conf
closes #1789
2016-09-21 11:53:06 +01:00
Ross McDonald
6b25a73629 Add container state metrics to docker plugin (#1791)
* Add container state metrics to docker plugin.

* Update changelog.
2016-09-21 10:37:49 +01:00
David Moravek
90c7475c68 Fix sysstat resource leak (#1792) 2016-09-21 10:19:59 +01:00
Cameron Sparr
6648c101dd Add configurable timeout to influxdb input
closes #1773
2016-09-16 16:50:39 +01:00
Cameron Sparr
8d3285522c Prometheus output: do not remake metrics map each write
closes #1775
2016-09-16 16:50:39 +01:00
David Norton
b613405f42 Merge pull request #1768 from influxdata/dgn-speedup-statsd-parser
speed up statsd parser
2016-09-15 10:46:56 -04:00
David Norton
e999298078 speed up statsd parser 2016-09-15 08:11:06 -04:00
David Norton
0f0ab953f6 Merge pull request #1766 from influxdata/dgn-statsd-parsing-benchmarks
add statsd parsing benchmarks
2016-09-15 07:10:18 -04:00
David Norton
aaddbd153e add statsd parsing benchmarks 2016-09-14 11:12:02 -04:00
Cameron Sparr
9b2e2cc41f kafka panic: Check that error is non-nil before
fixes #1764
2016-09-14 08:54:22 +01:00
Cameron Sparr
bc22309459 Add commit & branch to Makefile 2016-09-13 09:31:30 +01:00
Gunnar
b6f81b538a Add commit to Telegraf version string (#1756) 2016-09-13 08:41:02 +01:00
Cameron Sparr
c3aa43a6bd Fix prometheus output panic on reload
closes #1530
2016-09-12 10:46:37 +01:00
Rene Zbinden
b2ea39077e fix issue #1716 (#1749) 2016-09-12 10:30:35 +01:00
Cameron Sparr
811567a2f4 Update go version to 1.7, fix vet errors
closes #1728
2016-09-09 16:11:17 +01:00
Cameron Sparr
ca8fb440cc Fix statsd scientific notation parsing
closes #1733
2016-09-09 15:13:11 +01:00
Cameron Sparr
ac58a6bb3c Fix unmarshal of influxdb metrics will null tags
closes #1738
2016-09-09 14:49:21 +01:00
Sean Beckett
9757d39240 Update CHANGELOG.md 2016-09-08 09:11:24 -06:00
Cameron Sparr
5a9e7d77b8 Update readme & chglog for 1.0 2016-09-08 15:26:10 +01:00
Cameron Sparr
e963b7f01b alphabetize service inputs, add logparser 2016-09-07 15:55:21 +01:00
Nathan D Acuff
e7899d4dc5 Postgresql database blacklist configuration option (#1699)
* separate hello and authenticate functions, force connection close at end of write cycle so we don't hold open idle connections, which has the benefit of mostly removing the chance of getting hopelessly connection lost

* update changelog, though this will need to be updated again to merge into telegraf master

* bump instrumental agent version

* fix test to deal with better better connect/reconnect logic and changed ident & auth handshake

* Update CHANGELOG.md

correct URL from instrumental fork to origin and put the change in the correct part of the file

* go fmt

* Split out Instrumental tests for invalid metric and value.

* Ensure nothing remains on the wire after final test.

* Force valid metric names by replacing invalid parts with underscores.

* Multiple invalid characters being joined into a single udnerscore.

* Adjust comment to what happens.

* undo split hello and auth commands, to reduce roundtrips

* Add ignored_databases option to postgresql configuration files, to enable easy filtering of system databases without needing to whitelist all the databases on the server.  Add tests for database whitelist and blacklist.

* run go fmt on new postgresql database whitelist/blacklist code

* add postgresql database blacklist option to changelog

* remove a bad merge from the changelog
2016-09-07 09:39:55 +01:00
Cameron Sparr
301c79e57c Add a 404 and high-traffic test to http listener
also remove locking around adding metrics. Instead, keep a waitgroup on
the ServeHTTP function and wait for that to finish before returning from
the Stop() function

closes #1407
2016-09-06 17:21:01 +01:00
ncohensm
67c288abda initial http_listener implementation
fix incredibly stupid bugs

populate README

support query endpoint and change default listen port

set response headers for query endpoint

add unit tests

revert erroneous Godeps change

add plugin ref to top-level README

remove debug output and add empty post body test

fix linter errors

move stoppableListener into repo

use constants for http status codes

add CHANGELOG entry

address code review comments re. style/structure

address further code review comments

add note to README re. database creation calls per PR comments
2016-09-06 17:21:01 +01:00
Cameron Sparr
8dd2a8527a Refactor NATS ssl config 2016-09-06 13:52:29 +01:00
Cameron Sparr
2fe427b3b3 mongodb input: fix version 2.2 panic
closes #1628
2016-09-06 11:58:06 +01:00
Paulo Pires
6b1cc67664 Add NATS output plugin.
Added NATS server container needed for tests.

Added NATS output plug-in. Fixes #1487

NATS output plug-in use internal.GetTLSConfig to instrument TLS configuration.

Added NATS output plug-in to changelog.

closes #1487
closes #1697
2016-09-06 11:39:57 +01:00
Cameron Sparr
1271f9d71a jolokia input: add note about POST permissions
closes #1628
2016-09-06 11:11:27 +01:00
aaron jheng
49ea4e9f39 [Docker Plugin] add server hostname for each docker measurements (#1599)
* add server hostname for each docker measurements

* update CHANGELOG

* move feature to v1.1

* tweak docker_engine_host tag
2016-09-06 08:37:46 +01:00
Cameron Sparr
50ef3282b6 Refactor and code cleanup of filtering
started working on this with the idea of fixing #1623, although I
realized that this was actually just a documentation issue around
a toml eccentricity.

closes #1623
2016-09-05 16:30:18 +01:00
Phil
b63dedb74d sanitize parenthesis (#1701) 2016-09-05 14:30:40 +01:00
Denis Orlikhin
5628049440 Handle negative integers coming as unsigned integers from Aerospike (#1679)
* Handle negative integers coming as unsigned integers from Aerospike stats

* skip values with overflow

* aerospike stat values parsing tests

* better tests
2016-09-05 14:29:14 +01:00
Cameron Sparr
54c9ba7639 Update documentation for Gauge & Counters 2016-09-05 12:58:07 +01:00
Cameron Sparr
b18d375d6c Implement AddGauge & AddCounter functions
and utilize them in the in the 'system' input plugins.
2016-09-02 16:51:26 +01:00
Cameron Sparr
6dbbe65897 Remove Add() function from accumulator 2016-09-02 16:35:27 +01:00
Cameron Sparr
03d8abccdd Implement telegraf metric types
And use them in the prometheus output plugin.

Still need to test the prometheus output plugin.

Also need to actually create typed metrics in the system plugins.

closes #1683
2016-09-02 16:35:27 +01:00
David Caldwell
0f6d317a8e Fix MySQL plugin not sending 0 value fields (#1695)
closes #1695
2016-09-02 15:22:30 +01:00
Cameron Sparr
792682590c Remove snmp_legacy unit tests and docker image 2016-08-31 12:17:06 +01:00
François de Metz
2d3da343b3 Add basic filestack webhook.
closes #1542

Generalize event.

Add doc.

Update default config.

Add filestack to the list of plugins.

Check that video conversion event returns 400.

Update the readme.

Update the changelog.
2016-08-31 10:48:27 +01:00
Charles-Henri
094eda22c0 Add new iptables plugin
The iptables plugin aims at monitoring bytes and packet counters
matching a given set of iptables rules.

Typically the user would set a dedicated monitoring chain into a given
iptables table, and add the rules to monitor to this chain. The plugin
will allow to focus on the counters for this particular table/chain.

closes #1471
2016-08-31 10:42:44 +01:00
Butitsnotme
4886109d9c Added option to remove all CRs from input stream
Added the option removecr to inputs.exec to remove all carraige returns
(CR, ASCII 0x0D, Unicode codepoint \u0D, ^M). The option is boolean and
not enabled if not present in the config file.

closes #1606

Updated CHANGELOG.md with information about removecr

Ran go fmt ./...

Moved removal of CRs to internal/internal.go

Moved the code to remove carriage returns from
plugins/inputs/exec/exec.go to internal/internal.go. Additionally
changed the conditional on which it gets applied from using a
configuration file option to checking if it is running on Windows.

Moved Carriage Return check to correct place

Moved the carriage return removal back to the exec plugin. Added unit
testing for it. Fixed a bug (removing too many characters).

Ran go fmt ./...

Reverted CHANGELOG to master

Updated Changelog
2016-08-31 10:32:33 +01:00
Cameron Sparr
2dc47285bd Move CloudWatch rate limit to config (#1673)
* Move CloudWatch rate limit to config
Reference #1670

* make that variable a string

* ahem, apparently limiter wants an int

* add the ratelimit to the sample config

* update the test to include the rate

* set a default value of 10 for ratelimit

* Move default ratelimit to init
2016-08-31 10:29:24 +01:00
Eric
6e33a6d62f OpenTSDB HTTP output
closes #1539

First version of http put working

Refactored code to separate http handling from opentsdb module. Added batching support.

Fixed tag cleaning in http output and refactored telnet output.

Removed useless struct.

Fixed current unittest and added a new one.

Added benchmark test to test json serialization. Made sure http client would reuse connection.

Ran go fmt on opentsdb sources.

Updated README file

Removed useHttp in favor of parsing host string to determine the right API to use for sending metrics. Also renamed BatchSize to HttpBatchSize to better convey that it is only used when using Http API.

Updated changelog

Fixed format issues.

Removed TagSet type to make it more explicit.

Fixed unittest after removing TagSet type.

Revert "Updated changelog"

This reverts commit 24dba5520008d876b5a8d266c34a53e8805cc5f5.

Added PR under 1.1 release.

add missing redis metrics

This makes sure that all redis metrics are present without having to use a hard-coded list of what metrics to pull in.
2016-08-31 10:27:08 +01:00
Cameron Sparr
a8f9eb23cc add missing redis metrics (#1689)
This makes sure that all redis metrics are present without having to use a hard-coded list of what metrics to pull in.
2016-08-31 08:44:47 +01:00
Patrick Hemmer
41a5ee6571 add missing redis metrics
This makes sure that all redis metrics are present without having to use a hard-coded list of what metrics to pull in.
2016-08-31 01:05:11 -04:00
Nathan Haneysmith
7d8de4b8e1 Move default ratelimit to init 2016-08-30 14:33:51 -07:00
Cameron Sparr
cc2b53abf4 Fix changelog for #1650 2016-08-30 16:24:07 +01:00
Yaser Alraddadi
32aa1cc814 httpjson: support configurable response_timeout (#1651)
* httpjson: support configurable response_timeout

* make default ResponseTimeout in init

* Update CHANGELOG.md
2016-08-30 16:23:15 +01:00
Simon Murray
38d877165a Ceph Cluster Performance Input Plugin
The existing ceph input plugin only has access to the local admin daemon socket
on the local host, and as such has access to a limited subset of data.  This
extends the plugin to use CLI commands to get access to the full spread of Ceph
data.  This patch collects global OSD map and IO statistics, PG state and per pool
IO and utilization statistics.

closes #1513
2016-08-30 15:43:07 +01:00
Cameron Sparr
5c5984bfe1 Changelog update 2016-08-30 15:26:27 +01:00
tuier
30cdc31a27 Some improvment in mesos input plugin, (#1572)
* Some improvment in mesos input plugin,
     Removing uneeded statistics prefix for task's metric,
     Adding framework id tags into each task's metric,
     Adding state (leader/follower) tags to master's metric,
     Make sure the slave's metrics are tags with slave

* typo, replacing cpus_total with elected to determine leader

* Remove remaining statistics_ from sample

* using timestamp from mesos as metric timestamp

* change mesos-tasks to mesos_tasks, measurement

* change measurement name in test

* Replace follower by standby
2016-08-30 15:25:29 +01:00
Cameron Sparr
602a36e241 fix changelog #1607 2016-08-30 07:04:10 +01:00
Joel "The Merciless" Meador
b863ee1d65 [Instrumental] Underscore metric name output (#1607)
* separate hello and authenticate functions, force connection close at end of write cycle so we don't hold open idle connections, which has the benefit of mostly removing the chance of getting hopelessly connection lost

* update changelog, though this will need to be updated again to merge into telegraf master

* bump instrumental agent version

* fix test to deal with better better connect/reconnect logic and changed ident & auth handshake

* Update CHANGELOG.md

correct URL from instrumental fork to origin and put the change in the correct part of the file

* go fmt

* Split out Instrumental tests for invalid metric and value.

* Ensure nothing remains on the wire after final test.

* Force valid metric names by replacing invalid parts with underscores.

* Multiple invalid characters being joined into a single udnerscore.

* Adjust comment to what happens.

* undo split hello and auth commands, to reduce roundtrips

* Split out Instrumental tests for invalid metric and value.

* Ensure nothing remains on the wire after final test.

* Force valid metric names by replacing invalid parts with underscores.

* Multiple invalid characters being joined into a single udnerscore.

* add an entry to CHANGELOG for easy merging upstream

* go fmt variable alignment

* remove some bugfixes from changelog which now more properly are in a different section.

* remove headers and whitespace should should have been removed with the last commit
2016-08-30 07:03:32 +01:00
Nathan Haneysmith
ca49babf3a set a default value of 10 for ratelimit 2016-08-29 11:41:43 -07:00
SoleAngelus
cf37b5cdcf Update WINDOWS_SERVICE.md (#1669)
1. Added further clarification on running commands in PowerShell.
2. Added double quotes to file paths.
2016-08-29 17:36:05 +01:00
Cameron Sparr
969f388ef2 Make elasticsearch timeout configurable
closes #1674
2016-08-29 11:06:30 +01:00
Nathan Haneysmith
0589a1d0a5 update the test to include the rate 2016-08-25 18:17:33 -07:00
Nathan Haneysmith
4e019a176d add the ratelimit to the sample config 2016-08-25 18:04:29 -07:00
Nathan Haneysmith
a0e23d30fe ahem, apparently limiter wants an int 2016-08-25 17:56:33 -07:00
Nathan Haneysmith
e931706249 make that variable a string 2016-08-25 17:53:46 -07:00
Nathan Haneysmith
2457d95262 Move CloudWatch rate limit to config
Reference #1670
2016-08-25 17:46:38 -07:00
Cameron Sparr
e9d33726a9 start aerospike container 1st for unit tests
because it requires some time to initialize before it can respond to
metric requests.
2016-08-24 09:16:55 +01:00
Cameron Sparr
2462e04bf2 Rdme upd (#1660)
* Update README and CHANGELOG with 1.0 RC 1

* Increase circleci test docker sleep

* update aerospike dependency
2016-08-24 08:41:12 +01:00
Patrick Hemmer
7fac74919c Alternate SNMP plugin (#1389)
* Add a new and improved snmp plugin

* update gosnmp for duplicate packet fix

https://github.com/soniah/gosnmp/issues/68
https://github.com/soniah/gosnmp/pull/69
2016-08-22 16:37:53 +01:00
Robert Kánia
b022b5567d Added missing column, refs #1646 (#1647) 2016-08-22 15:35:39 +01:00
Cameron Sparr
dbf6380e4b update PR template with changelog note 2016-08-17 18:24:06 +01:00
Cameron Sparr
a0e42f8a61 Sanitize graphite characters in field names
also sanitize the names at a higher scope for better clarity

closes #1637
2016-08-17 16:56:31 +01:00
Cameron Sparr
94e673fe85 Revert "add pgbouncer plugin"
This reverts commit fec9760f72.
2016-08-17 16:50:11 +01:00
Cameron Sparr
7600757f16 ntpq: don't index ntp fields that dont exist
closes #1634
2016-08-16 15:16:42 +01:00
Cameron Sparr
4ce8dd5f9a Rename snmp plugin to snmp_legacy 2016-08-11 16:11:35 +01:00
politician
26315bfbea Defines GOOS and GOARCH for windows builds (#1621)
* defines GOOS and GOARCH for windows builds

* default to amd64 on windows

* windows: use latest versions of missing packages
2016-08-11 15:35:00 +01:00
David Bayendor
a282fb8524 Update README.md (#1622)
* Update README.md

Clean up minor typos and syntax.

* Update README.md

Fix typo in 'default'
2016-08-11 09:14:56 +01:00
Jack Zampolin
dee98612e2 Modernize zookeeper readme (#1615)
* Modernize zookeeper readme

* Add configuration
2016-08-10 22:58:47 +01:00
Ross McDonald
69e4e862a3 Fix typo of 'quorom' to 'quorum' when specifying write consistency. (#1618) 2016-08-10 17:51:21 +01:00
Cameron Sparr
c0e895c3a7 etc/telegraf.conf update 2016-08-10 15:16:01 +01:00
jsvisa
fec9760f72 add pgbouncer plugin
add pgbouncer docker for testing

add pgbouncer testcase

update changlog

closes #1400
2016-08-10 15:14:15 +01:00
Rene Zbinden
1989a5855d remove cgo dependeny with forking sensors command
closes #1414
closes #649
2016-08-09 08:38:05 +01:00
Cameron Sparr
abcd19493e If win stat buffer is empty, do not try to index
closes #1425
2016-08-09 08:29:37 +01:00
tuier
e457b7a8df Source improvement for librato output (#1416)
* Source improvement for librato output

Build the source from the list of tag instead of a configuration specified
single tag

Graphite Serializer:
* make buildTags public
* make sure not to use empty tags

Librato output:
* Improve Error handling for librato API base on error or debug flag
* Send Metric per Batch (max 300)
* use Graphite BuildTag function to generate source

The change is made that it should be retro compatible

Metric sample:
server=127.0.0.1 port=80 state=leader env=test
measurement.metric_name value
service_n.metric_x

Metric before with source tags set as "server":
source=127.0.0.1
test.80.127_0_0_1.leader.measurement.metric_name
test.80.127_0_0_1.leader.service_n.metric_x

Metric now:
source=test.80.127.0.0.1.leader
measurement.metric_name
service_n.metric_x

As you can see the source in the "new" version is much more precise
That way when filter (only from source) you can filter by env or any other tags

* Using template to specify which tagsusing for source, default concat all
tags

* revert change in graphite serializer

* better documentation, change default for template

* fmt

* test passing with new host as default tags

* use host tag in api integration test

* Limit 80 char per line, change resolution to be a int in the sample

* fmt

* remove resolution, doc for template

* fmt
2016-08-09 08:29:15 +01:00
Mariusz Brzeski
3853d0d065 Fix problem with metrics when ping return Destination net unreachable ( windows ) (#1561)
* Fix problem with metrics when ping return Destination net unreachable
Add test case TestUnreachablePingGather
Add percent_reply_loss
Fix some other tests

* Add errors measurment

* fir problem with ping reply "TTL expired in transit" ( use regex for more specific condition - TTL in line but it's a not valid replay )
add test case for "TTL expired in transit" - TestTTLExpiredPingGather
2016-08-09 08:27:30 +01:00
Patrick Hemmer
53e31cf1b5 Fix postgres extensible text (#1601)
* convert postgresql_extensible byte slice values to strings

* code cleanup in postgresql_extensible
2016-08-09 08:25:59 +01:00
Cameron Sparr
c99c22534b influxdb output: config doc update 2016-08-09 07:50:35 +01:00
Cameron Sparr
8e22526756 Adding c:\program files\telegraf\telegraf.conf
this will now be the default config file location on windows, basically
it is the windows equivalent of /etc/telegraf/telegraf.conf

also updating the changelog

closes #1543
2016-08-08 23:17:27 +01:00
Dennis Bellinger
7b6713b094 Telegraf support for built-in windows service.
Updated windows dependencies

Updated the windows dependencies so that the versions matched the
dependencies for Mac OS and Linux. Additionally added some that were
complained about being missing at compile time.

Incorporated kardianos/service for management

Incorporated the library github.com/kardianos/service to manage the
service on the various platforms (including Windows). This required an
alternate main function.

The original main function was renamed to reloadLoop (as that is what
the main loop in it does) (it also got a couple of parameters). The
service management library calls it as the main body of the program.

Merged service.go into telegraf.go

Due to compilation issues on Windows, moved the code from service.go
into telegraf.go and removed service.go entirely.

Updated dependencies and fixed Windows service

Updated the dependencies so that it builds properly on Windows,
additionally, fixed the registered command for starting it as
a service (needed to add the config file option). This currently
standardizes it as a C:\telegraf\telegraf.conf on Windows.

Added dependency for github.com/kardianos/service

Removed the common dependencies from _windows file

Removed all the common dependencies from the Godeps_windows file and
modified Makefile to load Godeps and then Godeps_windows when building
for Windows. This should reduce problems caused by the Godeps_windows
file being forgotten when updating dependencies.

Updated CHANGELOG.md with changes

Ran `go fmt ./...` to format code

Removed service library on all but Windows

The service library [kardianos/service](github.com/kardianos/service)
has been disabled on all platforms but windows, as there is already
existing infrastructure for other platforms.

Removed the dependency line for itself

It appears that gdm accidentally added the project itself to the
dependency list. This caused the dependency restoration to select an
earlier version of the project during build.

This only affected windows.
This only affected builds after 020b2c70

Updated documentation for Windows Service

Removed the documentation about using NSSM and added documentation on
installing telegraf directly as a Windows Service.

Added license info for kardianos/service

Added the license information for github.com/kardianos/service which is
licensed under the ZLib license, although that name is never mentioned
the license text matches word for word.

Changed the Windows Config file default location

Updated the default location of the configuration file on Windows from
C:\telegraf\telegraf.conf to C:\Program Files\Telegraf\telegraf.conf.
With this change includes updating the directions, including directing
that the executable be put into that same directory. Additionally, as
noted in the instructions, the location of the config file for the
service may be changed by specifying the location with the `-config`
flag at install time.

Fixed bug - Wrong data type: svcConfig

svcConfig service.Config => svcConfig *service.Config
(It needed to be a pointer)
2016-08-08 23:10:39 +01:00
Jack Zampolin
b0ef506a88 Add Kafka output readme (#1609) 2016-08-08 23:10:07 +01:00
Jack Zampolin
22c293de62 Add request for sample queries (#1608) 2016-08-08 23:06:03 +01:00
Cameron Sparr
d3bb1e7010 Rename internal_models package to models 2016-08-08 14:41:40 +01:00
Cameron Sparr
49988b15a3 Default config typo fix 2016-08-06 07:40:28 +01:00
Cameron Sparr
f0357b7a12 CHANGELOG formatting update
put all 1.0 beta releases into a single 1.0 release manifest

also add #1586 change
2016-08-05 14:51:19 +01:00
Cameron Sparr
9d3ad6309e Remove IF NOT EXISTS from influxdb output 2016-08-05 13:55:02 +01:00
Cameron Sparr
b55e9e78e3 gopsutil, fix /proc/pid/io naming issue
closes #1584
2016-08-05 09:53:14 +01:00
Cameron Sparr
4bc6fdb09e Removing INFLUXDB_HTTP_LOG from logparser usage/docs
this log format is likely soon going to be removed from a future
influxdb release, so we should not be recommending that users base any
of their log parsing infra on this.
2016-08-04 16:42:59 +01:00
Cameron Sparr
2b43b385de Begin implementing generic timestamp logparser capability 2016-08-04 16:08:55 +01:00
Cameron Sparr
13865f9e04 Disable darwin builds (#1571)
telegraf can't be cross-compiled for darwin, it has C dependencies and
thus many of the system plugins won't work.
2016-08-04 14:27:33 +01:00
Jack Zampolin
497353e586 add call to action for plugin contribuitors to write tickscripts (#1580) 2016-08-04 14:27:06 +01:00
Cameron Sparr
2d86dfba8b Removing deprecated flags
they are:
  -configdirectory
  -outputfilter
  -filter
2016-08-03 13:08:06 +01:00
Cameron Sparr
30dbfd9af8 Fix racy tail from beginning test 2016-07-28 14:08:12 +01:00
Cameron Sparr
c991b579d2 tcp/udp listeners, remove locks & improve test coverage 2016-07-28 13:42:34 +01:00
Srini Chebrolu
841729c0f9 RPM post remove script update for proper handle on all Linux distributions (#1381) 2016-07-28 08:34:57 +01:00
Victor Garcia
412f5b5acb Fixing changelog, MongoDB stats per db feature not release in 1.0beta3 (#1548) 2016-07-26 19:15:40 +01:00
Mariusz Brzeski
0b3958d3cd Ping windows (#1532)
* Ping for windows

* En ping output

* Code format

* Code review

* Default timeout

* Fix problem with std error when no data received ( exit status = 1 )
2016-07-25 13:17:41 +01:00
Patrick Hemmer
e68f251df7 add AddError method to accumulator (#1536) 2016-07-25 13:09:49 +01:00
Jason Gardner
986735234b Fix output config typo. (#1527) 2016-07-22 16:05:53 +01:00
Patrick Hemmer
4363eebc1b update gopsutil for FreeBSD disk time metrics (#1534)
Results in adding the io_time metric to FreeBSD, and adjusts the read_time and write_time metrics to be in milliseconds like linux.
2016-07-22 09:23:45 +01:00
Patrick Hemmer
1be6ea5696 remove unused accumulator.prefix (#1535) 2016-07-22 09:22:52 +01:00
Cameron Sparr
8acda0da8f Update etc/telegraf.conf 2016-07-21 17:53:41 +01:00
Łukasz Harasimowicz
ee240a5599 Added metrics for Mesos slaves and tasks running on them.
closes #1356
2016-07-21 17:13:00 +01:00
Mendelson Gusmão
29ea433763 Implement support for fetching hddtemp data (#1411) 2016-07-21 17:00:54 +01:00
Pierre Fersing
0462af164e Added option "total/perdevice" to Docker input (#1525)
Like cpu plugin, add two option "total" and "perdevice" to send network
and diskio metrics either per device and/or the sum of all devices.
2016-07-21 16:50:12 +01:00
Cameron Sparr
1c24665b29 Prometheus client & win_perf_counters char changes
1. in prometheus client, do not check for invalid characters anymore,
because we are already replacing all invalid characters with regex
anyways.
2. in win_perf_counters, sanitize field name _and_ measurement name.
Also add '%' to the list of sanitized characters, because this character
is invalid for most output plugins, and can also easily cause string
formatting issues throughout the stack.
3. All '%' will now be translated to 'Percent'

closes #1430
2016-07-21 16:24:19 +01:00
Torsten Rehn
0af0fa7c2e jolokia: handle multiple multi-dimensional attributes (#1524)
fixes #1481
2016-07-20 14:47:04 +01:00
Cameron Sparr
191608041f Strip container_version from container_image tag
closes #1413
2016-07-19 17:57:40 +01:00
Pierre Fersing
42d9d5d237 Fix Redis url, an extra "tcp://" was added (#1521) 2016-07-19 15:24:10 +01:00
Cameron Sparr
d54b169d67 nstat: fix nstat setting path for snmp6
closes #1477
2016-07-19 14:51:36 +01:00
Cameron Sparr
82166a36d0 Fix err race condition and partial failure issues
closes #1439
closes #1440
closes #1441
closes #1442
closes #1443
closes #1444
closes #1445
2016-07-19 14:45:55 +01:00
Victor Garcia
cbf5a55c7d MongoDB input plugin: Adding per DB stats (#1466) 2016-07-19 12:47:12 +01:00
Cameron Sparr
5f14ad9fa1 clean up and finish aerospike refactor & readme 2016-07-19 11:36:41 +01:00
Timothée GERMAIN
0be69b8a44 Make the user able to specify full path for HAproxy stats
closes #1499
closes #1019

Do no try to guess HAproxy stats url, just add ";csv" at the end of the
url if not present.

Signed-off-by: tgermain <timothee.germain@corp.ovh.com>
2016-07-19 11:35:15 +01:00
Matt Jones
375710488d Add support for self-signed certs to RabbitMQ input plugin (#1503)
* add initial support to allow self-signed certs

When using self-signed the metrics collection will fail, this will allow
the user to specify in the input configuration file if they want to skip
certificate verification. This is functionally identical to `curl -k`

At some point this functionality should be moved to the agent as it is
already implemented identically in several different input plugins.

* Add initial comment strings to remove noise

These should be properly fleshed out at some point to ensure
code completeness

* refactor to use generic helper function

* fix import statement against fork

* update changelog
2016-07-19 10:24:06 +01:00
Cameron Sparr
03d02fa67a Telegraf v1.0 beta 3 2016-07-18 18:20:41 +01:00
Cameron Sparr
b58cd78c79 Use errchan in redis input plugin
this may address, or at least log issue #1462
2016-07-18 17:26:44 +01:00
Cameron Sparr
dabb6f5466 Internally name all patterns for log parsing flexibility
closes #1436

This also fixes the bad behavior of waiting until runtime to return log
parsing pattern compile errors when a pattern was simply unfound.

closes #1418

Also protect against user error when the telegraf user does not have
permission to open the provided file. We will now error and exit in this
case, rather than silently waiting to get permission to open it.
2016-07-18 15:44:58 +01:00
Cameron Sparr
281a4d5500 Change resp_code from field to tag in logparser
closes #1479
2016-07-18 13:33:11 +01:00
François de Metz
1c2965703d Webhooks plugin: add mandrill (#1408)
* Add mandrill webhook.

* Store the id of the msg as part of event.

Signed-off-by: Cyril Duez <cyril@stormz.me>
Signed-off-by: François de Metz <francois@stormz.me>

* Decode body to get the mandrill_events.

Signed-off-by: Cyril Duez <cyril@stormz.me>
Signed-off-by: François de Metz <francois@stormz.me>

* Handle HEAD request.

Signed-off-by: Cyril Duez <cyril@stormz.me>
Signed-off-by: François de Metz <francois@stormz.me>

* Add the README.

Signed-off-by: Cyril Duez <cyril@stormz.me>
Signed-off-by: François de Metz <francois@stormz.me>

* Add mandrill_webhooks to the README.

Signed-off-by: Cyril Duez <cyril@stormz.me>
Signed-off-by: François de Metz <francois@stormz.me>

* Update changelog.

Signed-off-by: Cyril Duez <cyril@stormz.me>
Signed-off-by: François de Metz <francois@stormz.me>

* Run gofmt.

Signed-off-by: Cyril Duez <cyril@stormz.me>
Signed-off-by: François de Metz <francois@stormz.me>
2016-07-18 12:41:13 +01:00
Cameron Sparr
5dc4cce157 Fixup adding 'measurement' to logparser grok
closes #1434
2016-07-18 12:28:55 +01:00
Nathaniel Cook
8c7edeb53b allow measurement to be defined for logparser_grok plugin 2016-07-18 12:20:24 +01:00
Tim Allen
1d9745ee98 Move exec WaitGroup from Exec instance level to Gather.
If Gather is run concurently the shared WaitGroup variable never finishes.

closes #1463
closes #1464
2016-07-18 12:18:14 +01:00
Mark McKinstry
2d6c8767f7 add ability to read redis from a socket (#1480)
* add ability to read redis from a socket

* update CHANGELOG
2016-07-18 12:03:39 +01:00
Cameron Sparr
b4a6d9c647 Change prometheus replacer to reverse regex replacer
closes #1474
2016-07-18 11:50:22 +01:00
ashish
6afe9ceef1 cassandra plugin lower version support added
closes #1427
closes #1508
2016-07-18 09:22:20 +01:00
Cameron Sparr
704d9ad76c Refactor aerospike plugin to use client lib 2016-07-16 22:15:37 +01:00
tuier
300d9adbd0 Considere zookeeper's state as a tags (#1417)
This change will send the state of zookeeper (leader|follower) as a tag
and not a metrics
That way it will be easier to search for filter per state
2016-07-16 19:19:21 +01:00
Pierre Fersing
207c5498e7 Remove systemd Install alias (#1470)
Alias is a list of additional names. Adding it's cannonical name
cause systemctl enable telegraf to show a warning "Too many levels of
symbolic links"
2016-07-14 15:53:05 -06:00
Cameron Sparr
d5e7439343 procstat plugin: store PID as a field
closes #1460
2016-07-14 15:52:02 -06:00
Joel Meador
21add2c799 instrumental plugin, rewrite connection retries
closes #1412

separate hello and authenticate functions,
force connection close at end of write cycle so we don't
hold open idle connections,
which has the benefit of mostly removing
the chance of getting hopelessly connection lost

bump instrumental agent version

fix test to deal with better better connect/reconnect logic and changed ident & auth handshake

Update CHANGELOG.md

correct URL from instrumental fork to origin and put the change in the correct part of the file

go fmt

undo split hello and auth commands, to reduce roundtrips
2016-07-14 15:18:31 -06:00
Shashank Sahni
4651ab88ad Fetching galera status metrics in MySQL
These are useful for Percona Xtradb cluster.

closes #1437
2016-07-14 15:02:45 -06:00
Sebastian Borza
53f40063b3 Moving cgroup path name to field from tag to reduce cardinality (#1457)
adding assertContainsFields function to cgroup_test for custom validation
2016-07-14 14:18:55 -06:00
Andrei Burd
97d92bba67 Redis input enhancement (#1387)
master_last_io_seconds_ago added
role tag renamed to replication_role
2016-07-14 13:28:36 -06:00
Cameron Sparr
bfdd665435 Copy metrics for each configured output
This is for better thread-safety when running with multiple outputs,
which can cause very odd panics at very high loads

primarily this is to address #1432

closes #1432
2016-07-14 09:16:29 -06:00
Cameron Sparr
821d3fafa6 Refactor SerializeBucketName to be read-only for struct fields 2016-07-14 09:16:29 -06:00
Cameron Sparr
7c9b312cee Make race detector build in CI 2016-07-14 09:16:29 -06:00
Cameron Sparr
69ab8a645c graphite output: set write deadline on TCP connection 2016-07-14 09:16:29 -06:00
Kostas Botsas
7b550c11cb Documentation for load balancing on graphite output servers (#1469)
* Added documentation for load balancing on graphite output servers

* clarifications

* updates1

* updates2

* updates3
2016-07-14 09:06:00 -06:00
Cameron Sparr
bb4f18ca88 temp ci fix, aerospike changed their metrics
see http://www.aerospike.com/docs/operations/upgrade/stats_to_3_9

TODO change aerospike input plugin to use official go client library.
2016-07-14 08:52:37 -06:00
Cameron Sparr
6efe91ea9c prometheus_client, implement Collector interface
closes #1334
2016-07-13 06:52:18 -06:00
Vladimir S
5f0a63f554 fixes #1450 (#1472) 2016-07-10 13:17:53 +01:00
François de Metz
d14e7536ab Cleanup the list of plugins. (#1423)
Github and Rollbar are now part of the webhooks plugin.
2016-07-10 12:12:33 +02:00
Jack Zampolin
c873937356 Add note about influxdb compatability (#1465) 2016-07-10 12:11:43 +02:00
Cameron Sparr
e1c3800cd9 Prometheus parser fix, parse headers properly
closes #1458
2016-07-09 22:34:59 +02:00
Kostas Botsas
c046232425 Merge pull request #1426 from influxdata/metrics-panic
nil metric list panic fix
2016-06-29 13:50:11 +03:00
Cameron Sparr
2d4864e126 nil metric list panic fix 2016-06-29 12:08:36 +02:00
Rene Zbinden
048448aa93 add build directory to git ignore (#1415) 2016-06-25 11:17:51 +01:00
Cameron Sparr
755b2ec953 fixup: BOM Trim -> TrimPrefix 2016-06-24 08:47:31 +01:00
Cameron Sparr
f62c493c77 Recover from prometheus multiple handler panic
closes #1339
2016-06-23 14:29:35 +01:00
Jonathan Chauncey
a6365a6086 feat(nsq_consumer): Add input plugin
to consume metrics from an nsqd topic

closes #1347
closes #1369
2016-06-23 14:06:36 +01:00
Cameron Sparr
f7e057ec55 refactor cgroup build so non-linux systems see plugin
also updated the README for the fields->files change.
2016-06-23 11:47:25 +01:00
Cameron Sparr
30cc00d11b Update changelog, etc/telegraf.conf 2016-06-23 10:28:38 +01:00
Cameron Sparr
d641c42029 cgroup: change fields -> files
closes #1103
closes #1350
2016-06-23 10:23:59 +01:00
Vladimir Sagan
9c2ca805da Remove flush_scope logic 2016-06-23 10:13:31 +01:00
Vladimir Sagan
b0484d8a0c add cgroup plugin 2016-06-23 10:13:31 +01:00
Cameron Sparr
5ddd61d2e2 Trim BOM from config file for windows support
closes #1378
2016-06-23 09:00:51 +01:00
Victor Garcia
50ea7f4a9d x509 certs authentication now supported for Prometheus input plugin (#1396) 2016-06-23 08:59:44 +01:00
Thibault Cohen
b18134a4e3 Fix #1405 (#1406) 2016-06-23 08:59:14 +01:00
Cameron Sparr
7825df4771 Fix darwin ping tests 2016-06-22 18:21:07 +01:00
Cameron Sparr
d6951dacdc Remove docker-machine/boot2docker dependencies & references 2016-06-22 17:25:01 +01:00
François de Metz
e603825e37 Add new webhooks plugin that superseed github and rollbar plugins.
closes #1289

Signed-off-by: François de Metz <francois@stormz.me>
Signed-off-by: Cyril Duez <cyril@stormz.me>

Rename internals struct.

Signed-off-by: François de Metz <francois@stormz.me>
Signed-off-by: Cyril Duez <cyril@stormz.me>

Update changelog.

Signed-off-by: François de Metz <francois@stormz.me>
Signed-off-by: Cyril Duez <cyril@stormz.me>

Update READMEs and CHANGELOG.

Signed-off-by: François de Metz <francois@stormz.me>
Signed-off-by: Cyril Duez <cyril@stormz.me>

Update SampleConfig.

Update the config format.

Update telegraf config.

Update the webhooks README.

Update changelog.

Update the changelog with an upgrade path.

Update default ports.

Fix indent.

Check for nil value on AvailableWebhooks.

Check for CanInterface.
2016-06-22 17:18:14 +01:00
Mike Glazer
e3448153e1 Allow for TLS connections to ElasticSearch (#1398)
* Allow for TLS connections to ElasticSearch

Extremely similar implementation to the HTTP JSON module's
implementation of the same code.

* Changelog update
2016-06-22 16:23:49 +01:00
jsvisa
25848c545a Fix: riak with read_repairs available
closes #1399
2016-06-22 14:56:44 +01:00
Konstantin Kulikov
3098564896 fix datarace in input apache plugin
closes #1384
2016-06-22 14:42:47 +01:00
Stian Øvrevåge
4b6f9b93dd Updated sqlserver.go - Added Rows/Logs max size (#1380)
I added Rows/Logs max size counters for tracking databases that do not have autogrowth enabled. The counters return numbers in 8KB pages since there are a few special values (such as -1 for no max size) that can't directly be multiplied by 8192 to get size in bytes.

Also added Rows/Logs size in 8KB pages for comparison from the same system table. Even though it returns the same size as sizes from sys.dm_io_virtual_file_stats which are already collected.
2016-06-22 14:39:35 +01:00
Cameron Sparr
2beef21231 Beta 2 Release 1.0 2016-06-21 14:35:26 +01:00
Cameron Sparr
cb3c54a1ae logparser input plugin
closes #102
closes #328
2016-06-21 14:23:01 +01:00
Iiro Uusitalo
d50a1e83ac Added support for Tengine (#1390)
* Adds support for Tengine

* Added #1390 Tengine PR to changelog
2016-06-21 14:22:51 +01:00
Cameron Sparr
1f10639222 Fix Graphite output mangling '%' character.
closes #1377
2016-06-21 11:52:49 +01:00
Cameron Sparr
af0979cce5 change "default" retention policy to ""
closes #1374
2016-06-16 12:22:27 +01:00
Cameron Sparr
5b43901bd8 update issue_template.md 2016-06-14 18:17:11 +01:00
Cameron Sparr
d7efb7a71d Add precision rounding to accumulator
Adding precision rounding to the accumulator. This means that now every
input metric will get rounded at collection, rather than at write (and
only for the influxdb output).

This feature is disabled for service inputs, because service inputs
should be in control of their own timestamps & precisions.
2016-06-14 00:36:39 +01:00
Adrian Moisey
4d242836ee Fix typo (#1367)
* Fix typo

* Fix another typo
2016-06-13 10:38:58 +01:00
Cameron Sparr
06cb5a041e statsd, udp, tcp: do not log every dropped metric.
also applying this change to the udp_listener and tcp_listener input
plugins

closes #1340
2016-06-10 13:47:33 +01:00
Cameron Sparr
ea2521bf27 Fixup ping change
fixes #1335
2016-06-10 13:05:28 +01:00
kodek
4cd1f7a104 Increase ping timeout based on ping count and interval 2016-06-10 12:49:37 +01:00
Cameron Sparr
137843b2f6 Change default zookeeper chroot to empty string
closes #1112
2016-06-10 12:07:36 +01:00
Cameron Sparr
008ed17a79 Fix exec plugin panic with single binary
fixes #1330
2016-06-10 11:27:46 +01:00
Tobias Schoknecht
75e6cb9064 Fixed incorrect prometheus metrics source selection (#1337)
Metrics type summary should retrieve values via GetSummary
 while histogram should retrieve values via GetHistogram for 
both count and sum
2016-06-09 22:50:00 +01:00
Cameron Sparr
ad88a9421a Beta 1 Release 1.0 2016-06-07 10:48:17 +01:00
Cameron Sparr
346deb30a3 OpenTSDB test problems, disabling output integration tests 2016-06-07 10:39:25 +01:00
Cameron Sparr
8c3d7cd145 Fix rare panic in RHEL 5.2 diskio plugin (#1327)
closes #1322
2016-06-03 14:28:47 +01:00
Cameron Sparr
821b30eb92 Add timeout param to exec readme (#1325) 2016-06-03 13:32:16 +01:00
Cameron Sparr
a362352587 Use glob match for finding /proc/<pid>/stat files
closes #1323
2016-06-03 13:31:31 +01:00
Ross McDonald
94f952787f Add statically-linked amd64 builds to default build targets.
Remove version and iteration from root packaging folder. (#1318)

closes #1201
2016-06-02 16:14:18 +01:00
Pierre Fersing
3ff184c061 Removed leaked "database" tag on redis metrics (#1316) 2016-06-02 14:25:23 +01:00
Meng Ye
80368e3936 fix used_percent Calculation formula (#1313) 2016-06-02 14:24:48 +01:00
Cameron Sparr
2c448e22e1 New object: ErrChan for concurrent err handling 2016-06-02 13:29:37 +01:00
Ali Alrahaleh
1aabd38eb2 Add graylog input pluging change log (#1309) 2016-06-02 13:13:17 +01:00
Cameron Sparr
675457873a haproxy input: fix potential race condition 2016-06-02 11:22:07 +01:00
Cameron Sparr
8173338f8a fix build flags 2016-06-01 18:58:54 +01:00
Cameron Sparr
c4841843a9 Create dummy zfs plugin file 2016-06-01 18:53:29 +01:00
Cameron Sparr
f08a27be5d graylog input doc tweaks
closes #1261
2016-06-01 18:44:18 +01:00
Ali Alrahahleh
a4b36d12dd add graylog plugin
add unit test for graylog
2016-06-01 18:21:09 +01:00
Cameron Sparr
c842724b61 Fix graylog test race 2016-06-01 16:32:38 +01:00
Cameron Sparr
fb5f40319e update gitattributes for easier fork mngmnt 2016-06-01 16:18:17 +01:00
Cameron Sparr
52b9fc837c Adding active & inactive memory to mem plugin
closes #1213
2016-06-01 16:04:20 +01:00
Cameron Sparr
6f991ec78a Sleep longer in tail test 2016-06-01 15:49:32 +01:00
Łukasz Harasimowicz
7921d87a45 Added Consul health checks state monitoring. (#1294) 2016-06-01 11:02:28 +01:00
Rickard von Essen
9f7a758bf9 RFR: Initial support for ZFS on FreeBSD (#1224)
* WIP: Initial support for ZFS on FreeBSD

* Added build directives

* Ignore 'kstatPath' config option on FreeBSD

* Added tests for ZFS FreeBSD input plugin.

* Updated the README to confrom with the guidelines and added FreeBSD info

* Fixed indents

* Spell check
2016-05-31 17:49:56 +01:00
Cameron Sparr
0aff7a0bc1 Disk plugin: return immediately if usage fails
closes #1297
2016-05-31 17:17:06 +01:00
Cameron Sparr
c4cfdb8a25 Revert "Revert graylog output"
This reverts commit 4f27315720.
2016-05-31 16:45:14 +01:00
Cameron Sparr
342cfc4087 ReAdd gelf serializer & graylog output filter. (#1299)
This reverts commit 958ef2f872.
2016-05-31 16:41:27 +01:00
Cameron Sparr
bd1282eddf Don't print config with trailing whitespace 2016-05-31 16:25:02 +01:00
Cameron Sparr
892abec025 Refactor collection_jitter and flush_jitter
use a common function between collection_jitter and flush_jitter. which
creates the same behavior between the two options.

going forward, both jitters will be random sleeps that get re-evaluated
at runtime for every interval (previously only collection_jitter did
this)

also fixes behavior so that both jitters will exit in the event of a
process exit.

closes #1296
2016-05-31 14:24:32 +01:00
Martin Seener
e809c4e445 Also added reasonable default for influxdb input plugin
to simplify configuration for most users

closes #1295
2016-05-31 13:41:02 +01:00
Cameron Sparr
9ff536d94d Limit GetMetricStatistics to 10 per second
closes #1197
2016-05-31 11:26:52 +01:00
Cameron Sparr
4f27315720 Revert graylog output 2016-05-31 11:23:01 +01:00
Cameron Sparr
958ef2f872 Revert "Add gelf serializer & graylog output filter." (#1299) 2016-05-31 11:21:20 +01:00
Cameron Sparr
069764f05e Update README & etc/telegraf.conf 2016-05-31 11:02:10 +01:00
vanillahsu
eeeab5192b Add gelf serializer & graylog output filter. (#1167)
* add gelf serializer.

* change url.

* handle fields in correct format.

* add graylog.

* handle host field of graylog.

* 1: Add go-gelf entry to Godeps to fix ci.
2: switch to github.com/Graylog2/go-gelf.

* implement Close().

* Deprecated gelf serializer, and back to graylog-golang.

* Update graylog-golang's hash.

* move gelf related function to graylog.go.

* 1: remove uneeded deps on Godeps_windows.
2: add README.md
3: add unittest.

* Fix unittest on 'go test -race'
2016-05-31 10:58:35 +01:00
robinpercy-xm
a7dfbce3d3 Addressing PR feedback
- Updated README/CHANGELOG
- Added links to further info to input README
- Reduced lines to 80 chars

Removing input declaration from SampleConfig

Moved PR to unreleased section of changelog

closes #1165
2016-05-31 10:47:26 +01:00
Jan Shim
ed2d1d9bb7 Add kernel_vmstat input plugins 2016-05-31 10:46:34 +01:00
Robin Percy
0fb2d2ffae Adding a conntrack input plugin
- Collects conntrack stats from the configured directories and files.

Applying PR feedback:

- Rebased onto master
- Updated README/CHANGELOG
- Limited lines to 80 chars
- Improved plugin docs and README
- added a dummy notlinux build file

Fixed up CHANGELOG and README after rebase

closes #1164
2016-05-31 10:42:19 +01:00
Ranjib Dey
3af65e7abb Fix typo in output plugin example (#1290) 2016-05-27 17:44:41 +01:00
Martin
984b6cb0fb Made the apache input’s urls parameter optional by using a reasonable default for most users (#1288) 2016-05-27 16:12:36 +01:00
Björn Lichtblau
ca504a19ec Use optimeDate to get MongoTimestamp (mongo input plugin). (#1281) 2016-05-27 11:57:17 +01:00
Lukasz Jagiello
c2797c85d1 Updated documentation. (#1284) 2016-05-26 19:31:51 +01:00
Pierre Fersing
d5add07c0b processes: Don't return error if process exited (#1283) 2016-05-26 17:58:20 +01:00
Kostas Botsas
0ebf1c1ad7 write_consistency documentation (#1282)
Added write_consistency to InfluxDB output documentation
2016-05-26 17:23:01 +01:00
Martin Seener
42d7fc5e16 Use the DefaultURL parameter if no url is explicitly set by the user
closes #1278
closes #1277
2016-05-26 12:14:43 +01:00
Jari Sukanen
6828fc48e1 exec plugin: allow using glob pattern in command list
Allow using glob pattern in the command list in configuration. This enables for
example placing all commands in a single directory and using /path/to/dir/*.sh
as one of the commands to run all shell scripts in that directory.

Glob patterns are applied on every run of the commands, so matching commands can
be added without restarting telegraf.

closes #1142
2016-05-26 11:38:15 +01:00
Pierre Fersing
98d91b1c89 Fix reloading Telegraf under systemd (#1279) 2016-05-26 11:32:05 +01:00
Cameron Sparr
9bbdb2d562 Allow wildcard filtering of varnish stats
closes #1275
2016-05-26 10:42:34 +01:00
Rene Zbinden
a8334c3261 add option to disable dns lookup for chronyc
closes #1265
2016-05-25 18:58:56 +01:00
Cameron Sparr
9144f9630b graphite parser: support multiple tag keys
closes #1272
2016-05-25 17:11:28 +01:00
Cameron Sparr
3e4a19539a http_response plugin: Add SSL config options
closes #1264
2016-05-25 13:44:36 +01:00
Cameron Sparr
5fe7e6e40e influxdb input: Use non-panicking type assertion
closes #1268
2016-05-25 13:32:10 +01:00
Cameron Sparr
58f2ba1247 kernel: use strconv.ParseInt instead of strconv.Atoi
closes #1258
2016-05-25 12:31:10 +01:00
John Engelman
5f3a91bffd Consolidate AWS credentials (#1208)
* Use shared AWS credential configuration.

*  Cloudwatch dimension wilcards 

* Allow configuring cache_ttl for cloudwatch metrics.

* Allow for wildcard in dimension values to select all available metrics.

* Use internal.Duration for CacheTTL and go fmt

* Refactor to not use embedded structs for config.

* Update AWS plugin READMEs with credentials details, update Changelog.

* Fix changelog after rebasing to master and 0.13.1 release.

* Fix changelog after rebase.
2016-05-25 12:30:39 +01:00
Cameron Sparr
6351aa5167 only count shard if it's non-empty
closes #1221
2016-05-25 12:05:14 +01:00
Nick
9966099d1a Replace ":" with "_" in tags. This should make the mysql plugin work with the opentsdb output (it uses a "server" tag like "127.0.0.1:3306") (#1256) 2016-05-25 11:37:48 +01:00
Cameron Sparr
1ef5599361 update changelog & etc/telegraf.conf 2016-05-24 15:34:56 +01:00
Cyril Duez
c78b6cdb4e Add input plugin for rollbar service. (#1247)
* Report rollbar events.

Signed-off-by: Cyril Duez <cyril@stormz.me>
Signed-off-by: François de Metz <francois@stormz.me>

* Fix indent with go fmt.

* Add test for rollbar webhooks.

* Report more data from new_item event.

* Handle new deploy webhook.

Signed-off-by: Cyril Duez <cyril@stormz.me>
Signed-off-by: François de Metz <francois@stormz.me>

* Update default port.

* Add readme.

* Add rollbar_webhooks to the readme.

* Add rollbar_webhooks to plugins list.

* Add tag level for new_item event.

* Update readme.

* Update changelog.
2016-05-24 15:32:42 +01:00
Cameron Sparr
d736c7235a prevent potential tail datarace (#1254) 2016-05-24 15:16:33 +01:00
Rene Zbinden
475252d873 fix telegraf service (#1252) 2016-05-24 15:14:58 +01:00
Cameron Sparr
e103923430 Release 0.13.1 2016-05-24 12:04:48 +01:00
Cameron Sparr
cb59517ceb Update etc/telegraf.conf 2016-05-24 11:16:21 +01:00
robinpercy-xm
1248934f3e Adding Varnish HTTP Cache input plugin (#1173)
* Adding Varnish HTTP Cache input plugin

* Applying PR feedback

- Linked to varnish in input README
- Updated README/CHANGELOG
- Cleaned up sampleConfig to remove formatting
- Shorted lines to 80 chars (except where test input requires long strings)
- Using internal.RunTimeout to wrap call to varnishtat
- Added dummy file for windows
2016-05-24 11:06:25 +01:00
Cameron Sparr
204ebf6bf6 influxdb output: write consistency parameter
closes #1249
2016-05-24 10:50:27 +01:00
Rene Zbinden
52d5b19219 add chrony support (#1238)
* add chrony support

* remove path definition

* add changelog
2016-05-24 09:55:25 +01:00
Cameron Sparr
8e92d3a4a0 Log to /var/log/telegraf/telegraf.log on systemd
closes #1243
2016-05-23 18:00:59 +01:00
Cameron Sparr
c44ecf54a5 Utilize timeout in net_response plugin.
Also changing the net_response and http_response plugins to only accept
duration strings for their timeout parameters. This is a breaking config
file change.

closes #1214
2016-05-23 15:59:23 +01:00
Klaudiusz Staniek
c6699c36d3 Add the OctetString OID value support (#1242)
This update adds support for strings values. Not sure why this was missed.
2016-05-23 11:21:53 +01:00
François de Metz
d6ceae7005 Refactor GitHub webhooks (#1240)
* Fix a typo.

* Extract similar code to generateEvent function.

* Remove functions to use generateEvent in the switch.

* Refactor tests.
2016-05-23 11:21:34 +01:00
Rene Zbinden
4dcb82bf08 fix interval rounding error
closes #1190
2016-05-23 11:20:01 +01:00
Cameron Sparr
4f5d5926d9 Set a timeout for calls to input.Gather
Changing the internal behavior around running plugins. Each plugin
will now have it's own goroutine with it's own ticker. This means that a
hung plugin will not block any other plugins. When a plugin is hung, we
will log an error message every interval, letting users know which
plugin is hung.

Currently the input interface does not have any methods for killing a
running Gather call, so there is nothing we can do but log an "ERROR"
and move on. This will give some visibility into the plugin that is
acting up.

closes #1230
fixes #479
2016-05-21 21:39:01 +01:00
Cameron Sparr
3c5c3b98df update gopsutil to commit with timeout support
closes #1215
2016-05-21 21:00:51 +01:00
Cameron Sparr
56aee1ceee Update gopsutil dependency
closes #1233
2016-05-20 15:30:13 +01:00
Cameron Sparr
f176c28a56 http_response: override req.Host header properly
closes #1198
2016-05-19 13:19:51 +01:00
Cameron Sparr
2e68bd1412 don't overwrite host tags in plugins
closes #1227
closes #1210
2016-05-19 13:19:28 +01:00
Cameron Sparr
35eb65460d github issue and pr templates 2016-05-19 12:11:19 +01:00
Jared Biel
ab54064689 Procstat input plugin - functionality for overriding of process_name (#1192)
Being able to override the process_name in the procstat module
is useful for daemonized perl, ruby, erlang etc. processes. This
allows for manually setting process_name rather than it being set to
the interpreter/VM of the process.
2016-05-19 11:34:25 +01:00
Cameron Sparr
debf7bf149 ntpq input: ignore lines that are '-'
closes #1223
2016-05-18 22:20:47 +01:00
Kane Dou
1dbe3b8231 fix mongodb input concurrent map read/write
closes #1211
2016-05-18 21:23:39 +01:00
Cameron Sparr
b065573e23 influxdb input: Add shard counter
closes #1221
2016-05-18 19:31:36 +01:00
Cameron Sparr
e94e50181c update changelog, readme, telegraf.conf 2016-05-18 16:07:17 +01:00
robinpercy-xm
69dfe63809 Implemented ceph collector (#1172)
- records metrics from the output of mon and osd socket perf
  dumps.
2016-05-18 15:48:44 +01:00
Jason Roelofs
f32916a5bd Output stats to the Instrumental TCP Collector
closes #1139
2016-05-18 15:03:28 +01:00
Cameron Sparr
be7ca56872 Update README w/ tail plugin 2016-05-18 15:00:30 +01:00
Anthony Brodard
33cacc71b8 Add role tag to redis plugins (#1207)
fixes #1203

- Format code
2016-05-18 14:17:14 +01:00
Cameron Sparr
c292e3931a Remove ntpq state prefixes and make their own tag
closes #1161
2016-05-18 14:16:01 +01:00
François de Metz
a87d6f0545 Fix typo. (#1220) 2016-05-18 14:14:50 +01:00
Cameron Sparr
3a01b6d5b7 Update elasticsearch readme
closes #1145
2016-05-18 12:22:34 +01:00
Zdenek Styblik
39df2635bd Fix crash in Docker input plugin - Fixes #1195
Commit fixes crash in Docker input plugin caused by the fact that return value
might be nil when error occurs.

closes #1195
2016-05-18 11:27:06 +01:00
Jack Tench
08ecfb8a67 Replace sudo with su in init script
To avoid issues starting service when 'Defaults requiretty' is enabled in the sudoers file.

Fixes #1204
closes #1205
2016-05-17 18:31:49 +01:00
Baptiste Mille-Mathias
a59bf7246a Don't use root user as example
Using root as user is a bad habit.

closes #1177
2016-05-17 18:30:15 +01:00
Cameron Sparr
281296cd3f changelog update 2016-05-17 17:05:27 +01:00
Jonathan A. Sternberg
61d190b1ae Add docker pull badge to the README 2016-05-17 17:02:42 +01:00
Cameron Sparr
dc89f029ad nstat plugin cleanups and formatting
closes #1104
closes #1138
2016-05-17 17:00:46 +01:00
Maksadbek
7557056a31 updated readme for nstat 2016-05-17 16:51:42 +01:00
Maksadbek
20c45a150c nstat plugin: reading files paths from env 2016-05-17 16:51:42 +01:00
Maksadbek
46bf0ef271 nstat input plugin 2016-05-17 16:51:42 +01:00
Brano Zarnovican
a7b632eb5e fix 0.13 download urls 2016-05-13 12:57:22 +02:00
Cameron Sparr
90a98c76a0 Finalize 0.13 release 2016-05-11 13:41:32 -07:00
Cameron Sparr
12357ee8c5 processes: add 'unknown' procs (?) 2016-05-11 11:52:29 -07:00
Cameron Sparr
bb254fc2b9 Default docker timeout in case one is not defined in config 2016-05-10 14:18:55 -07:00
Cameron Sparr
aeadc2c43a Update etc/telegraf.conf, mqtt_cons readme 2016-05-10 14:18:55 -07:00
Cameron Sparr
ed492fe950 update influxdb & gopsutil deps 2016-05-10 14:18:55 -07:00
Cameron Sparr
775daba8f5 Change Version->version for consistency w/ influxdb 2016-05-10 14:18:55 -07:00
Cameron Sparr
677dd7ad53 Release 0.13 2016-05-10 14:18:55 -07:00
Cameron Sparr
85dee02a3b snmp plugin: change host -> snmp_host
closes #1156
2016-05-10 14:18:00 -07:00
Cameron Sparr
afdebbc3a2 Make OidInstanceMapping a field of the snmp host
fixes #1171
2016-05-10 10:15:01 -07:00
Jörg Thalheim
5deb22a539 docker: add container_id also to per cpu stats
currently this field exists only for total cpu usage

closes #1168
2016-05-09 16:43:27 -07:00
Ross McDonald
36b9e2e077 Merge pull request #1157 from influxdata/ross-build-updates
Minor fixes to build script
2016-05-06 11:28:48 -05:00
Ross McDonald
5348937c3d Choose correct configuration when building for windows. 2016-05-06 10:46:29 -05:00
Ross McDonald
72fcacbbc7 Minor fixes to build script:
- Fix for --name build parameter
- Remove rc parameter from build script
- Fix regression on first-level tarball directory structure
- Convert any dashes/underscores in version tag to tilde
2016-05-05 14:02:34 -05:00
Lukasz Jagiello
4c28f15b35 Fix #1148 - chatty MySQL
Two additional config options to reduce MySQL metrics

With:
 - gather_table_lock_waits = false
 - gather_event_waits = false

```
| wc -l
34
```

With:
 - gather_table_lock_waits = true
 - gather_event_waits = true

```
| wc -l
50040
```

closes #1148
closes #1149
2016-05-04 10:23:54 -06:00
Marko Crnic
095ef04c04 Fix formatting for haproxy tests
closes #1146
2016-05-04 09:47:39 -06:00
Marko Crnic
7d49979658 Update haproxy input plugin sample configuration 2016-05-04 09:46:44 -06:00
Marko Crnic
7a36695a21 Add tests for haproxy stats socket 2016-05-04 09:46:44 -06:00
Marko Crnic
5865587bd0 Add stats socket support to haproxy plugin 2016-05-04 09:46:44 -06:00
Jörg Thalheim
219bf93566 docker: add container_id to blkio metrics
all container metrics except blkio have this field

closes #1150
2016-05-04 09:28:13 -06:00
Ross McDonald
8371546a66 Disable circle uploads to S3 until more testing can be done for external PR's. 2016-05-03 11:26:52 -05:00
John Engelman
0b9b7bddd7 Specify host mount prefix with envvar.
closes #1120
2016-05-02 15:53:30 -06:00
Rene Zbinden
4c8449f4bc fix sysstat timeout error
closes #1134
2016-05-02 15:17:11 -06:00
Ross McDonald
36d7b5c9ab Improvements to build.py:
- Now uses Python argparse for cleaner handling of arguments
- Added function documentation
- Removed a few unneeded functions
- Updated nightly logic to incremement minor version
- Added support for building from specific branch or commit
- Changed --no-stash option to --no-uncommitted for clarity
- Added a --release flag, default package output will now contain the branch and commit hash in the version number
- Static builds are now listed as an architecture
- Changed default upload bucket to dl.influxdata.com/telegraf
- Don't include iteration in package name

closes #1040
2016-05-02 14:37:29 -06:00
Cameron Sparr
f2b0ea6722 value parser: doc & string handling 2016-05-02 12:17:20 -06:00
Cameron Sparr
46f4be88a6 Revert "exec plugin: allow using glob pattern in command list"
This reverts commit 6381efa7ce.
2016-05-02 12:07:17 -06:00
Jari Sukanen
6381efa7ce exec plugin: allow using glob pattern in command list
Allow using glob pattern in the command list in configuration. This enables for
example placing all commands in a single directory and using /path/to/dir/*.sh
as one of the commands to run all shell scripts in that directory.

Glob patterns are applied on every run of the commands, so matching commands can
be added without restarting telegraf.

closes #1127
2016-05-02 11:36:15 -06:00
Pierre Fersing
85ee66efb9 "DELAYED" Inserts were deprecated in MySQL 5.6.6
closes #1136
2016-05-02 11:23:28 -06:00
Victor Garcia
40dccf5b29 Metric for MongoDB jumbo chunks
closes #1128
2016-05-01 14:27:27 -06:00
Cameron Sparr
c114849a31 Use a timeout for docker list & stat cmds
closes #1133
2016-05-01 10:26:46 -06:00
Cameron Sparr
4e9798d0e6 agent and tags configs sometimes not applied
closes #1090
2016-04-29 19:44:01 -06:00
Cameron Sparr
a30b1a394f Kafka output: set max_retry=3 & required_acks=-1 as defaults
closes #1113
2016-04-29 18:51:45 -06:00
Cameron Sparr
91460436cf Changelog update 2016-04-29 12:32:04 -06:00
Cameron Sparr
3f807a9432 Implement timeouts for all exec command runners
First is to write an internal CombinedOutput and Run function with a
timeout.

Second, the following instances of command runners need to have timeouts:

    plugins/inputs/ping/ping.go
    125:	out, err := c.CombinedOutput()

    plugins/inputs/exec/exec.go
    91:	if err := cmd.Run(); err != nil {

    plugins/inputs/ipmi_sensor/command.go
    31:	err := cmd.Run()

    plugins/inputs/sysstat/sysstat.go
    194:	out, err := cmd.CombinedOutput()

    plugins/inputs/leofs/leofs.go
    185:	defer cmd.Wait()

    plugins/inputs/sysstat/sysstat.go
    282:	if err := cmd.Wait(); err != nil {

closes #1067
2016-04-29 12:06:22 -06:00
Cameron Sparr
cbe32c7482 Support default config paths
precedence will be:

 1. --config command-line option
 2. $TELEGRAF_CONFIG_PATH
 3. $HOME/.telegraf/telegraf.conf
 4. /etc/telegraf/telegraf.conf
2016-04-28 17:48:24 -06:00
Cameron Sparr
5d3c582ecf changelog update 2016-04-28 17:34:37 -06:00
Cameron Sparr
3ed006d216 Sanitize invalid opentsdb characters
closes #1098
2016-04-28 17:01:50 -06:00
Cameron Sparr
3e1026286b skip network-dependent unit tests in short mode 2016-04-28 14:44:08 -06:00
Cameron Sparr
b59266249d README fixups for udp_listener, statsd inputs
closes #1119
2016-04-28 13:11:41 -06:00
G-regL
015261a524 Sanitize Field name
Replace '/[sS]ec' for '_persec' and spaces with underscores.

closes #1118
2016-04-28 12:21:28 -06:00
Adithya B Cherunilam
024e1088eb Ensure sure that the post install script is compatible with RHEL 5
closes #1091
closes #1094
2016-04-28 11:58:06 -06:00
Cameron Sparr
08f4b1ae8a Update build to go 1.6.2 2016-04-28 11:41:16 -06:00
Bob Zoller
1390c22004 sanitize * to - in graphite serializer
closes #1110
2016-04-27 18:05:44 -06:00
Cameron Sparr
8742ead585 Change server_ -> jolokia_ in tags and other formatting 2016-04-27 16:00:58 -06:00
Cameron Sparr
59a297abe6 etc/telegraf.conf update 2016-04-27 15:46:21 -06:00
Simone Aiello
18636ea628 jolokia: use always POST
code refactor to use same prepareRequest method
for both 'agent' and 'proxy' mode

closes #1031
closes #1050
closes #473
2016-04-27 15:45:37 -06:00
Simone Aiello
cf5980ace2 jolokia: add proxy mode 2016-04-27 15:39:55 -06:00
Jesse Hanley
a7b0861436 Adding Jobstats support to Lustre2 input plugin
Lustre Jobstats allows for RPCs to be tagged with a value, such
as a job's ID.  This allows for per job statistics. This plugin
collects statistics and tags the data with the jobid.

closes #1107
2016-04-27 15:35:24 -06:00
Cameron Sparr
89f2c0b0a4 Cassandra: update plugin supported prefix & fix panic
fixes #1102
2016-04-27 15:23:05 -06:00
Cameron Sparr
ee4f4d7800 ping plugin: Set default timeout 2016-04-27 15:08:38 -06:00
Cameron Sparr
4de75ce621 Performance refactor of running_output buffers
closes #914
closes #967
2016-04-27 13:40:05 -06:00
John Engelman
1c4043ab39 Closes #1085 - allow for specifying AWS credentials in config.
closes #1085
closes #1086
2016-04-26 17:24:05 -06:00
Cameron Sparr
44c945b9f5 Tail unit tests and README tweaks 2016-04-26 10:43:41 -06:00
Cameron Sparr
c7719ac365 buffers: fix bug when Write called before AddMetric 2016-04-26 10:25:04 -06:00
Cameron Sparr
b9c24189e4 Tail input plugin 2016-04-26 09:42:06 -06:00
Cameron Sparr
411d8d7439 Fix leaky tcp connections in phpfpm plugin
closes #1089
2016-04-26 09:24:32 -06:00
Cameron Sparr
671b40df2a procstat: field prefix fixup 2016-04-25 20:10:34 -06:00
Cameron Sparr
249a860c6f procstat: fix newlines in tags 2016-04-25 19:57:38 -06:00
Mika Eloranta
0367a39e1f postgresql_extensible: custom address in metrics output
Allow overriding the the metrics "server" tag with the specified
value. Can be used to give a more user-friendly value for the server
name.

closes #1093
2016-04-25 16:33:35 -06:00
Mika Eloranta
1a7340bb02 postgresql_extensible: fix nil field values
nil field values would break the output influxdb line procotol.
Skip them from the output.
2016-04-25 16:33:33 -06:00
Mika Eloranta
ce7d852d22 postgresql_extensible: configurable measurement name
The output measurement name can be configured per query.
2016-04-25 16:33:33 -06:00
Hannu Valtonen
01b01c5969 postgresql_extensible: Censor also other security related conn params
While these aren't quite as sensitive as passwords, they do tend to be
long filesystem paths that shouldn't be reported along with
every measurement.
2016-04-25 16:33:33 -06:00
Pierre Fersing
c159460b2c Refactor running_output buffering
closes #1087
2016-04-25 16:32:41 -06:00
Cameron Sparr
07728d7425 Refactor globpath pkg to return a map
this is so that we don't call os.Stat twice for every file matched
by Match(). Also changing the behavior to _not_ return the name of a
file that doesn't exist if it's not a glob.
2016-04-24 14:37:44 -06:00
Cameron Sparr
d3a25e4dc1 globpath refactor into pkg separate from filestat 2016-04-23 11:56:33 -06:00
zensqlmonitor
1751c35f69 SQL Server input. Fix datatype conversion. 2016-04-23 09:18:08 +02:00
zensqlmonitor
93f5b8cc4a Fix datatype conversion 2016-04-23 09:14:04 +02:00
Cameron Sparr
5b1e59a48c filestat plugin config fixup 2016-04-22 19:15:07 -06:00
Cameron Sparr
7b27cad1ba Dont specify AWS credential chain, use default
closes #1078
2016-04-22 11:43:20 -06:00
Cameron Sparr
1b083d63ab add gitattributes file 2016-04-22 11:42:22 -06:00
Cameron Sparr
23f2b47531 Ignore errors in systemd
closes #1022
2016-04-22 11:23:24 -06:00
Victor Garcia
194288c00e Adding replication lag metric
closes #1066
2016-04-22 11:07:32 -06:00
Cameron Sparr
f9c8ed0dc3 Add filestat plugin to README 2016-04-21 19:47:23 -06:00
Cameron Sparr
88def9b71b filestat input plugin
closes #929
2016-04-21 16:53:02 -06:00
Martin Gehrke
f818f44693 Added Network Interface Object block to Generic Queries examples in win_perf_counters/README.md
Network metrics are pretty important and the block adds a couple with a link to the names for more. This adds a block with a few counters to the Generic Queries examples in plugins/inputs/win_perf_counters/README.md
2016-04-21 09:26:46 -04:00
Cameron Sparr
8a395fdb4a changelog update feature 1041 2016-04-20 18:36:14 -06:00
Cameron Sparr
c0588926b8 Add n_cpu field to system plugin
closes #1041
2016-04-20 18:22:04 -06:00
Cameron Sparr
f1b7ecb2a2 procstat: Add user, pidfile, pattern & exe tags
closes #1035
2016-04-20 13:18:07 -06:00
Cameron Sparr
4bcf157d88 Don't replace _ with . in datadog names
closes #1024
2016-04-20 09:06:13 -06:00
Cameron Sparr
2f7da03cce Do not log every tcp connect/disconnect
leaving as comments for whenever I rig up global debug logging.

closes #1062
2016-04-19 22:58:24 -06:00
Cameron Sparr
f1c995dcb8 Update etc/telegraf.conf 2016-04-19 18:01:41 -06:00
Cameron Sparr
9aec58c6b8 Don't allow inputs to overwrite host tag
closes #1054

This affects tags in the following plugins:

- cassandra
- disque
- rethinkdb
2016-04-19 17:44:33 -06:00
Victor Garcia
46aaaa9b70 Adding TTL metrics data
closes #1060
2016-04-19 17:02:25 -06:00
Larry Kim
46543d6323 Possible bug fix for oid_key collision
closes #1044
2016-04-19 17:00:04 -06:00
Cameron Sparr
a585119a67 Change prometheus doc to glob match 2016-04-19 14:46:37 -06:00
Cameron Sparr
8cc72368ca influxdb output: close connections & dont always overwrite
closes #1058
closes #1059

also see https://github.com/influxdata/influxdb/pull/6425
2016-04-19 13:40:08 -06:00
Cameron Sparr
92e57ee06c Set default tags in test accumulator
closes #1012
2016-04-18 19:24:17 -06:00
Eugene Chupriyanov
c737a19d9f Just close Riemann client on send metrics failure
Signed-off-by: Eugene Chupriyanov <e.chupriyanov@cpm.ru>

closes #1013
2016-04-18 17:25:36 -06:00
Eugene Chupriyanov
708a97d773 Try to reconnect to Riemann if metrics upload failed.
Signed-off-by: Eugene Chupriyanov <tchu@tchu.ru>

Error checks added

Don't Close() nil client

Signed-off-by: Eugene Chupriyanov <e.chupriyanov@cpm.ru>
2016-04-18 17:25:19 -06:00
Maksadbek
b95a90dbd6 updated README for mysql input plugin
closes #889
closes #403
2016-04-18 17:21:25 -06:00
Maksadbek
a2d1ee08d4 transposed the matrix of tags/fields for Lock Waits stats gathering 2016-04-18 17:11:26 -06:00
Maksadbek
7e64dc380f preventing tags from mutation by creating new tag for each metric 2016-04-18 17:11:26 -06:00
Maksadbek
046cb6a564 changed types to decrease needless uint64 to float64 casts 2016-04-18 17:11:26 -06:00
Maksadbek
644ce9edab fixed code regarding needless type casting; single creation of map 2016-04-18 17:11:26 -06:00
Maksadbek
059b601b13 mysql plugin conf field names are lowercase-underscored 2016-04-18 17:11:26 -06:00
Maksadbek
d59999f510 improvements on queries and additional comments 2016-04-18 17:11:26 -06:00
Maksadbek
c5d31e7527 statics that lack on MySQL 5.5 is turned off by default 2016-04-18 17:11:26 -06:00
Maksadbek
c121e38da6 mysql plugin, check for existence of table before scanning 2016-04-18 17:11:26 -06:00
Maksadbek
b16bc3d2e3 remove duplicate function; Mysql plugin GatherTableSchema is configurable 2016-04-18 17:11:26 -06:00
maksadbek
c732abbda2 Improved mysql plugin
shows global variables
	shows slave statuses
	shows size and count of binary log files
	shows information_schema.processlist stats
	shows perf table stats
	shows auto increments stats from information schema
	shows perf index stats
	shows table lock waits summary by table
	shows time and operations of event waits
	shows file event statuses
	shows events statements stats from perf_schema
	shows schema statistics
	refactored plugin, provided multiple fields per insert
2016-04-18 17:11:26 -06:00
Cameron Sparr
61d681a7c8 docker changelog update 2016-04-18 16:24:32 -06:00
Cameron Sparr
7828bc09cf Fixup docker blkio container name & docker doc 2016-04-18 16:20:46 -06:00
Cameron Sparr
36d330fea0 docker plugin schema refactor
- renaming cont_name and cont_image to container_name and
container_image.
- cont_id is now a field, called container_id
- docker_cpu, docker_mem, docker_net measurements have been renamed to
  docker_container_cpu, docker_container_mem, and docker_container_net

closes #1014
closes #1052
2016-04-18 15:16:59 -06:00
Cameron Sparr
4d46589d39 JSON input: make string ignores clear 2016-04-18 13:20:06 -06:00
chaton78
93f57edd3a Better logging for MQTT consumer
closes #1023
closes #921
2016-04-18 11:27:50 -06:00
chaton78
8ec8ae0587 Added onConnection and connectionLost Handlers 2016-04-18 11:25:03 -06:00
Pascal Larin
ce94e636bb Resubscribe if not using persistent sessions 2016-04-18 11:25:03 -06:00
Pascal Larin
21c7378b61 Handle onConnect 2016-04-18 11:25:03 -06:00
Thibault Cohen
75a9845d20 SNMP Fix #995
closes #995
2016-04-18 11:19:16 -06:00
Shahzheeb Khan
d638f6e411 mongodb readme and examples
mongodb readme and examples

closes #1039
2016-04-16 15:53:04 -06:00
Cameron Sparr
81d0a64d46 Adds support for removing/keeping tags from metrics
closes #706
2016-04-16 15:13:38 -06:00
Cameron Sparr
f76739cb1b Release 0.12.1 2016-04-14 16:16:26 -06:00
Ross McDonald
7f992fd321 Changed nohup fallback command to use 'sudo -u' so that Telegraf doesn't run as the root user. 2016-04-14 13:56:20 -06:00
Cameron Sparr
e428d11add Update etc/telegraf.conf, README 2016-04-12 14:50:56 -06:00
Subhachandra Chandra
0ed5b75a14 Fixed CHANGELOG with the correct pull request id.
closes #681
closes #1009
2016-04-12 10:57:34 -06:00
subhachandrachandra
b1b4adec74 Added cassandra plugin to access metrics using jolokia and push them to influxdb. 2016-04-12 10:55:59 -06:00
Cameron Sparr
1934cc2e62 Add memstats to the influxdb input plugin
closes #958
2016-04-12 10:13:11 -06:00
Lukasz Jagiello
ae8cf8c35e Fix plugin name in README 2016-04-11 19:43:30 -06:00
Cameron Sparr
f5878eafb9 Create a template system for the graphite serializer
closes #925
closes #879
2016-04-11 16:30:18 -06:00
Cameron Sparr
27fe4f7062 Update changelog, etc/telegraf.conf
closes #998
2016-04-08 12:00:41 -06:00
Michele Fadda
7ad8b26297 dovecot: enabled global, user and ip queries 2016-04-08 11:56:08 -06:00
Cameron Sparr
1a383b7d90 Telegraf no longer depends on lsof
so remove it as a dependency from the linux packages.

closes #974
2016-04-08 11:27:33 -06:00
Shahzheeb Khan
445946792e Adding few metrics example in jolokia plugin
Adding few metrics example in jolokia plugin

closes #993
2016-04-08 11:20:47 -06:00
Shahzheeb Khan
de82c7d5ac Adding few metrics example
Adding more metrics in example to make it easier to better understand the plugin
2016-04-08 11:20:42 -06:00
Cameron Sparr
07f0d561dc Eliminate byte buffer, copy scanner.Bytes directly 2016-04-08 09:50:03 -06:00
Cameron Sparr
be379f3dac Refactor UDP & TCP input buffers
closes #991
2016-04-08 09:50:03 -06:00
Cameron Sparr
1bf904fe60 Godeps: update paho mqtt client dep
this might fix #921

see https://github.com/eclipse/paho.mqtt.golang/issues/32
2016-04-07 17:32:28 -06:00
Cameron Sparr
c6faf005cb Add sysstat dummy file for non-linux builds 2016-04-07 12:08:26 -06:00
Rene Zbinden
b534b58542 fix tests
closes #939
2016-04-07 11:54:41 -06:00
Rene Zbinden
920711533e move pathe lookup for sadf to init() 2016-04-07 10:45:20 -06:00
Rene Zbinden
194110433e change sadf options so that it also works on older linux distributions 2016-04-07 10:45:20 -06:00
Rene Zbinden
7926396d2a cleanup code, set dfltActivities in init() function, this leads to an if less in collect() method 2016-04-07 10:45:20 -06:00
Rene Zbinden
797522e8ca change group=true by default 2016-04-07 10:45:20 -06:00
Rene Zbinden
64b5d1a269 add documentation about sadc path on different linux distributions 2016-04-07 10:45:20 -06:00
Rene Zbinden
d00d3802c9 fix build tags 2016-04-07 10:45:20 -06:00
Rene Zbinden
46fff13341 disable TestInterval with -race test option 2016-04-07 10:45:20 -06:00
Rene Zbinden
be3374a3ef remove interval configuration 2016-04-07 10:45:20 -06:00
Rene Zbinden
264ac0b017 fix race condition 2016-04-07 10:45:20 -06:00
Rene Zbinden
17033b3c6c change readme 2016-04-07 10:45:20 -06:00
Rene Zbinden
c4ea122d66 add sysstat plugin 2016-04-07 10:45:20 -06:00
Cameron Sparr
90185dc6b3 cleanup & comment http_response def config
closes #332
2016-04-07 10:37:52 -06:00
Luke Swithenbank
377b030d88 update to 5 second default and string map for headers 2016-04-07 10:28:39 -06:00
Luke Swithenbank
437bd87d7c added tests and did some refactoring 2016-04-07 10:28:39 -06:00
Luke Swithenbank
73a7916ce3 take a request body as a param 2016-04-07 10:28:39 -06:00
Luke Swithenbank
f947fa86e3 update to allow for following redirects 2016-04-07 10:28:39 -06:00
Luke Swithenbank
7219efbdb7 add the ability to parse http headers 2016-04-07 10:28:39 -06:00
Luke Swithenbank
b7435b9cd1 fmt 2016-04-07 10:28:39 -06:00
Luke Swithenbank
207ab5a0d1 update to make a working sample_config 2016-04-07 10:28:39 -06:00
Luke Swithenbank
70aa0ef85d add plugin to all 2016-04-07 10:28:39 -06:00
Luke Swithenbank
dfbe231a51 add http_response plugin 2016-04-07 10:28:39 -06:00
Cameron Sparr
cce35da366 Godeps_windows: update file 2016-04-07 10:00:10 -06:00
Cameron Sparr
1a612bcae9 Update README and etc/telegraf.conf 2016-04-06 13:49:50 -06:00
Josh Hardy
d5b9e003fe Add CloudWatch input plugin
Rebased commit of previously reviewed branch.
Added cloudwatch client Mock and more rich unit tests.

closes #935
closes #936
2016-04-06 13:45:06 -06:00
Sergio Jimenez
e19c474a92 input(docker): Cleanup
* Removed leftovers, unused code

closes #957
fixes #645
2016-04-06 12:01:36 -06:00
Sergio Jimenez
8274798499 fix(Godeps): Added github.com/opencontainers/runc 2016-04-06 11:55:30 -06:00
Sergio Jimenez
9f68a32934 fix(): Last link on README 2016-04-06 11:55:30 -06:00
Sergio Jimenez
5c688daff1 input(docker): Updated README
* Replaced links to fsouza/go-dockerclient by docker/engine-api
2016-04-06 11:55:30 -06:00
Sergio Jimenez
741fb1181f godeps(): Updated Godeps file for engine-api
* Added required deps for engine-api
* Removed fsouza/go-dockerclient
2016-04-06 11:55:30 -06:00
Sergio Jimenez
fd1f05c8e0 input(docker): Fixed io sectors/io_time recursive
* On engine-api sectors_recursive and io_time_recursive have no Op
2016-04-06 11:55:30 -06:00
Sergio Jimenez
708cbf937f input(docker): Fixed tests to work with engine-api
* Modified tests to work with engine-api
2016-04-06 11:55:30 -06:00
Sergio Jimenez
32213cad01 input(docker): docker/engine-api
* Made required changes to get it to compile
* First manual tests looking good, still unit tests need fixing
* Made go linter happier
2016-04-06 11:55:30 -06:00
Ricard Clau
9320a6e115 windows service docs
closes #954
2016-04-06 11:50:36 -06:00
Cameron Sparr
0f16c0f4cf Reduce TCP listener allocations 2016-04-05 17:37:09 -06:00
Cameron Sparr
30464396d9 Make the UDP input buffer only once 2016-04-05 17:35:43 -06:00
Cameron Sparr
64066c4ea8 Update input data format readme 2016-04-05 17:27:04 -06:00
Cameron Sparr
7e97787d9d More readme fixups 2016-04-05 16:17:45 -06:00
Cameron Sparr
40f2dd8c6c Readme fixup for exec plugin 2016-04-05 15:22:58 -06:00
Cameron Sparr
4dd364e1c3 Update all readme instances of data formats 2016-04-05 14:42:20 -06:00
Cameron Sparr
03f2a35b31 Update jolokia plugin readme 2016-04-05 13:54:02 -06:00
Martti Rannanjärvi
73bd98df57 dovecot: remove extra newline from stats query
Extra newline in the stats query is interpreted as an empty query
which is an error for dovecot.

closes #972
2016-04-05 10:54:27 -06:00
Armin Wolfermann
bcf1fc658d ipmi_sensors: Allow : in password
closes #969
2016-04-05 10:52:41 -06:00
Cameron Sparr
863cbe512d processes plugin: fix case where there are spaces in cmd name
fixes #968
2016-04-05 10:27:30 -06:00
Cameron Sparr
d871e9aee7 Dummy kernel plugin added for consistent config generation 2016-04-04 17:43:53 -06:00
Cameron Sparr
70ef61ac6d Release 0.12 2016-04-04 16:34:41 -06:00
Cameron Sparr
a4a140bfad etc/telegraf.conf update for procstat change 2016-04-04 16:30:24 -06:00
Florent Ramière
d2d91e713a Add plugin links
closes #964
2016-04-04 16:28:36 -06:00
Cameron Sparr
d9bb1ceaec Changelog update 2016-04-04 16:12:50 -06:00
Pierre Fersing
5fe8903fd2 Use timeout smaller than 10 seconds
closes #959
2016-04-04 16:10:23 -06:00
Cameron Sparr
8509101b83 drop cpu_time_* from procstat by default
closes #963
2016-04-04 16:10:09 -06:00
Cameron Sparr
357849c348 Godeps: update wvanbergen/kafka dependency
see https://github.com/wvanbergen/kafka/pull/87

fixes #805
2016-04-02 13:45:24 -06:00
Nikhil Bafna
0f1b4e06f5 Update README.md
Fix redis input plugin name in configuration example
2016-04-02 10:13:21 +05:30
Cameron Sparr
8e041420cd config: parse environment variables in the config file
closes #663
2016-04-01 16:06:19 -06:00
Rubycut
9211d22b2b Add writing in documentation.
closes #950
2016-04-01 11:46:32 -06:00
Cameron Sparr
f5246eb167 Update changelog with config file PR 2016-04-01 11:45:09 -06:00
Cameron Sparr
e436b2d720 Cleanup & standardize config file
changes:

- -sample-config will now comment out all but a few default plugins.
- config file parse errors will output path to bad conf file.
- cleanup 80-char line-length and some other style issues.
- default package conf file will now have all plugins, but commented
  out.

closes #199
closes #944
2016-04-01 10:59:53 -06:00
Florent Ramière
51f4e9c0d3 Update changelog
closes #945
2016-04-01 10:09:04 -06:00
Florent Ramière
8c3371c4ac Use numerical codes instead of symbolic ones 2016-04-01 10:08:55 -06:00
Florent Ramière
6ff0fc6d83 Add compression/acks/retry conf to Kafka output plugin
The following configuration is now possible

	## CompressionCodec represents the various compression codecs
recognized by Kafka in messages.
	##  "none" : No compression
	##  "gzip" : Gzip compression
	##  "snappy" : Snappy compression
	# compression_codec = "none"

	##  RequiredAcks is used in Produce Requests to tell the broker how
many replica acknowledgements it must see before responding
	##  "none" : the producer never waits for an acknowledgement from the
broker. This option provides the lowest latency but the weakest
durability guarantees (some data will be lost when a server fails).
	##  "leader" : the producer gets an acknowledgement after the leader
replica has received the data. This option provides better durability
as the client waits until the server acknowledges the request as
successful (only messages that were written to the now-dead leader but
not yet replicated will be lost).
	##  "leader_and_replicas" : the producer gets an acknowledgement after
all in-sync replicas have received the data. This option provides the
best durability, we guarantee that no messages will be lost as long as
at least one in sync replica remains.
	# required_acks = "leader_and_replicas"

	##  The total number of times to retry sending a message
	# max_retry = "3"
2016-04-01 10:08:55 -06:00
Cameron Sparr
9347a70425 Fix httpjson README
closes #947
2016-03-31 20:37:04 -06:00
Cameron Sparr
91957f0848 Update Godeps_windows file to HEAD 2016-03-30 14:43:05 -06:00
Cameron Sparr
62105bb353 Use github paho mqtt client instead of gerrit
this might fix #921
2016-03-30 11:54:01 -06:00
Rudenkovk Konstantin
e03f684508 Fix parse fcgi URI path in php-fpm input module
closes #934
2016-03-30 10:34:48 -06:00
Ross McDonald
2f41ae24f8 Swap systemd command, as it was causing issues on Debian. 2016-03-30 10:17:31 -06:00
Cameron Sparr
4ad551be9a add '*' to metric prefixes for consistency 2016-03-29 17:00:51 -06:00
Cameron Sparr
bd640ae2c5 changelog fixup 2016-03-29 12:19:07 -06:00
Cameron Sparr
2cfc882c62 changelog & readme update 2016-03-29 12:18:23 -06:00
Cameron Sparr
21ece2d76d Convert ipmi stats/tags to underscore and lowercase
closes #888
2016-03-29 11:39:57 -06:00
张光权
d055d7f496 Add the ipmi plugin 2016-03-29 11:39:57 -06:00
Cameron Sparr
b1cfb1afe4 Deprecate statsd convert_names option, expose separator
closes #876
2016-03-28 12:13:15 -06:00
Cameron Sparr
2f215356d6 Update statsd graphite parser link to telegraf version 2016-03-28 11:57:51 -06:00
Adam Argo
e07c79259b PR feedback changes
closes #927
2016-03-28 10:43:34 -06:00
Adam Argo
59085f072a adds ability to parse datadog-formatted tags in the statsd input 2016-03-28 10:43:26 -06:00
Cameron Sparr
474d6db42f Don't log every string metric that prometheus doesnt support 2016-03-23 16:01:06 -06:00
Thibault Cohen
a95710ed0c SNMP plugin fixes
fixes #873
2016-03-22 22:58:02 -06:00
JP
51d7724255 add verifyValue func for datadog and librato, bail if no good
closes #906
2016-03-22 15:22:57 -06:00
Cameron Sparr
276e7629bd memcached unix socket: fix panic. Do not recreate conn inside if
closes #841
2016-03-22 15:12:35 -06:00
Cameron Sparr
69606a45e0 Fix prometheus label names, and dont panic if invalid
fixes #907
2016-03-22 12:29:55 -06:00
Chris Goller
7f65ffcb15 Add optional parameters to influxdb output README 2016-03-22 09:14:25 -06:00
Cameron Sparr
4f5f6761f3 Update gopsutil dependency
closes #656
2016-03-22 09:13:31 -06:00
Cameron Sparr
f543dbb42f Allow users to tell telegraf Agent not to include host tag
closes #848
2016-03-21 15:51:10 -06:00
Cameron Sparr
5917a42997 influxdb output: quote the database name
closes #898
2016-03-21 14:37:33 -06:00
Cameron Sparr
fbe1664214 README cleanup and update 2016-03-21 14:30:59 -06:00
david birdsong
d09bb13cb6 special case 'value'
it usually connotes a single value type metric, appending just clutters

closes #793
2016-03-21 13:49:34 -06:00
david birdsong
31c323c097 fix prometheus output
if i understand the prometheus data model correctly, the current output
for this plugin is unusable

prometheus only accepts a single value per measurement. prior to this change, the range loop
causes a measurement to end up w/ a random value

for instance:

net,dc=sjc1,grp_dashboard=1,grp_home=1,grp_hwy_fetcher=1,grp_web_admin=1,host=sjc1-b4-8,hw=app,interface=docker0,state=live
bytes_recv=477596i,bytes_sent=152963303i,drop_in=0i,drop_out=0i,err_in=0i,err_out=0i,packets_recv=7231i,packets_sent=11460i
1457121990003778992

this 'net' measurent  would have all it's tags copied to prometheus
labels, but any of 152963303, or 0, or 7231 as a value for
'net' depending on which field is last in the map iteration

this change expands the fields into new measurements by appending
the field name to the influxdb measurement name.

ie, the above example results with 'net' dropped and new measurements
to take it's place:
	net_bytes_recv
	net_bytes_sent
	net_drop_in
	net_err_in
	net_packets_recv
	net_packets_sent

i hope this can be merged, i love telegraf's composability of tags and
filtering
2016-03-21 13:49:09 -06:00
Thibault Cohen
8f09aadfdf Add nagios parser for exec input plugin
closes #762
2016-03-21 13:34:47 -06:00
Chris H (CruftMaster)
20b4e8c779 GREEDY field templates for the graphite parser, and support for multiple specific field names
closes #789
2016-03-21 13:32:51 -06:00
Cameron Sparr
402a0108ae Merge pull request #896 from jipperinbham/graphite-tag-sanitizer
sanitize known issue characters from graphite tag name
2016-03-21 12:29:05 -06:00
Cameron Sparr
9de4a8efcf Update readme, changelog for couchbase plugin
closes #866
closes #482
2016-03-21 12:12:23 -06:00
Vebjorn Ljosa
077fa2e6b9 Improve README for couchabase input plugin
Proper terminology and case. Exmaples for tags. Example output.
2016-03-21 12:09:32 -06:00
Vebjorn Ljosa
2ae9316f48 Add examples in documentation for couchbase input plugin 2016-03-21 12:09:32 -06:00
Vebjorn Ljosa
9b5a90e3b9 Unit test couchbase input plugin 2016-03-21 12:09:32 -06:00
Vebjorn Ljosa
483942dc41 Comment on default pool name 2016-03-21 12:09:32 -06:00
Vebjorn Ljosa
2ddda6457f Convert measurement names to snake_case 2016-03-21 12:09:32 -06:00
Vebjorn Ljosa
681e695170 Don't copy lock when rangeing over map
Make `go vet` happy.
2016-03-21 12:09:32 -06:00
Vebjorn Ljosa
a043664dc4 Couchbase input plugin 2016-03-21 12:09:32 -06:00
JP
e940f99646 sanitize known issue characters from graphite tag name 2016-03-21 10:01:51 -05:00
Cameron Sparr
22073042a9 Merge pull request #891 from jipperinbham/librato-serialize-fix
replace @ character with - for librato
2016-03-18 17:00:00 -06:00
Cameron Sparr
2634cc408a Update README 2016-03-18 11:26:05 -06:00
Thomas Menard
36446bcbc2 Remove the columns used as tag
closes #844
2016-03-18 11:25:04 -06:00
Thomas Menard
b371ec5cf6 Add the postgresql_extensible plugin
This plugin is intended to add an extended support of Postgresql
compared to the legacy postgres plugin.

Basically, the plugin don’t have any metrics defined and it’s up to the
user to define it in the telegraph.conf (as a toml structure).

Each query can have it’s specific tags, and can be written specifically
using a where clause in order to eventually filter per database name.

To be more generic, a minimum postgresql version  has been defined per
query in case you have 2 different version of Postgresql running on the
same host.
2016-03-18 11:23:02 -06:00
HUANG Wei
18f4afb388 Inherit previous instance's stats in statsd plugin.
This way, after a reload, the stats wont restart again at least for the
counter type.

closes #887
2016-03-18 11:20:35 -06:00
Cameron Sparr
77dcbe95c0 Do not write metrics if there are 0 to write
closes #884
2016-03-18 10:54:51 -06:00
Cameron Sparr
061b749041 TLS config: if only given ssl_ca, create tls config anyways
fixes #890
2016-03-18 10:53:55 -06:00
JP
5b0c3951f6 replace @ character with - for librato 2016-03-18 11:25:51 -05:00
Cameron Sparr
f2394b5a8d Merge pull request #886 from entertainyou/typo
Fix typo, should be input instead of output.
2016-03-17 21:36:26 -06:00
Cameron Sparr
fe7b884cc9 Update changelog 2016-03-17 20:40:22 -06:00
Cameron Sparr
5c1b635229 Value parser, for parsing a single value into a metric
closes #849
2016-03-17 20:08:21 -06:00
HUANG Wei
63410491b7 Fix typo, should be input instead of output. 2016-03-18 10:06:44 +08:00
Cameron Sparr
26e0a4bbde Merge pull request #882 from VasuBalakrishnan/master
Fixed SQL Server Plugin issues #881
2016-03-17 19:41:42 -06:00
Balakrishnan
c356e56522 Updated Change log #881 2016-03-17 19:56:39 -04:00
Cameron Sparr
fd26bbbd0b Merge pull request #883 from ljagiello/minor-changelog-fix
Duplicated line
2016-03-17 17:09:08 -06:00
Lukasz Jagiello
7aa55371b5 Duplicate line 2016-03-17 15:54:22 -07:00
Balakrishnan
ba06533c3e Fixed SQL Server Plugin issues #881 2016-03-17 18:01:19 -04:00
Marcelo Salazar
d66d66e74b added json serializer
closes #878
2016-03-17 13:51:16 -06:00
Jonathan Chauncey
d6b5f3efe6 fix(prometheus): Add support for bearer token to prometheus input plugin
closes #864
merges #880
2016-03-17 13:47:22 -06:00
Cameron Sparr
b5a431624b Close UDP listener in udp_listener plugin
also adding waitgroups to udp_listener and statsd plugins to verify that
all goroutines have been cleaned up before Stop() exits.

closes #869
2016-03-17 10:51:35 -06:00
HUANG Wei
8e7284de5a fixup! Close the UDP connection in Stop() of statsd input plugin. 2016-03-17 10:51:35 -06:00
HUANG Wei
b2d38cd31c Close the UDP connection in Stop() of statsd input plugin.
If not, when doing reload, we may listen to the same port, we'll get
error about listen to already used address.
2016-03-17 10:51:35 -06:00
Cameron Sparr
a15fed35b7 Merge pull request #875 from Onefootball/feature/link-freebsd-package
Add FreeBSD tarball location to README
2016-03-17 10:51:21 -06:00
Dirk Pahl
eee6b0059c Add FreeBSD tarball location to README 2016-03-17 16:53:55 +01:00
Eugene Dementiev
530b4f3bee [amqp output] Allow external auth (cert-based tls auth)
closes #863
2016-03-16 19:03:41 -06:00
Thibault Cohen
bac1c223de Improve prometheus plugin
closes #707
2016-03-16 19:00:06 -06:00
marknmel
57f7582b4d Cleanup of Exec Inputs documentation - redux
Hi @sparrc

(Sorry for the noise - new pr)

closes #853

Please find some improvements to readability including the \n for the exec/telegraf line-protocol input.

I hope you (and others) find it easier to read.

/Mark

This is an ammend
2016-03-16 18:55:48 -06:00
Cameron Sparr
5afe819ebd Changelog update for 0.11.1 2016-03-16 14:55:40 -06:00
402 changed files with 38412 additions and 8162 deletions

4
.gitattributes vendored Normal file
View File

@@ -0,0 +1,4 @@
CHANGELOG.md merge=union
README.md merge=union
plugins/inputs/all/all.go merge=union
plugins/outputs/all/all.go merge=union

44
.github/ISSUE_TEMPLATE.md vendored Normal file
View File

@@ -0,0 +1,44 @@
## Directions
GitHub Issues are reserved for actionable bug reports and feature requests.
General questions should be sent to the [InfluxDB mailing list](https://groups.google.com/forum/#!forum/influxdb).
Before opening an issue, search for similar bug reports or feature requests on GitHub Issues.
If no similar issue can be found, fill out either the "Bug Report" or the "Feature Request" section below.
Erase the other section and everything on and above this line.
*Please note, the quickest way to fix a bug is to open a Pull Request.*
## Bug report
### Relevant telegraf.conf:
### System info:
[Include Telegraf version, operating system name, and other relevant details]
### Steps to reproduce:
1. ...
2. ...
### Expected behavior:
### Actual behavior:
### Additional info:
[Include gist of relevant config, logs, etc.]
## Feature Request
Opening a feature request kicks off a discussion.
### Proposal:
### Current behavior:
### Desired behavior:
### Use case: [Why is this important (helps with prioritizing requests)]

5
.github/PULL_REQUEST_TEMPLATE.md vendored Normal file
View File

@@ -0,0 +1,5 @@
### Required for all PRs:
- [ ] CHANGELOG.md updated (we recommend not updating this until the PR has been approved by a maintainer)
- [ ] Sign [CLA](https://influxdata.com/community/cla/) (if not already signed)
- [ ] README.md updated (if adding a new plugin)

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
build
tivan
.vagrant
/telegraf

View File

@@ -1,4 +1,409 @@
## v0.11.1 [unreleased]
## v1.1 [unreleased]
### Release Notes
### Features
- [#1782](https://github.com/influxdata/telegraf/pull/1782): Allow numeric and non-string values for tag_keys.
- [#1694](https://github.com/influxdata/telegraf/pull/1694): Adding Gauge and Counter metric types.
- [#1606](https://github.com/influxdata/telegraf/pull/1606): Remove carraige returns from exec plugin output on Windows
- [#1674](https://github.com/influxdata/telegraf/issues/1674): elasticsearch input: configurable timeout.
- [#1607](https://github.com/influxdata/telegraf/pull/1607): Massage metric names in Instrumental output plugin
- [#1572](https://github.com/influxdata/telegraf/pull/1572): mesos improvements.
- [#1513](https://github.com/influxdata/telegraf/issues/1513): Add Ceph Cluster Performance Statistics
- [#1650](https://github.com/influxdata/telegraf/issues/1650): Ability to configure response_timeout in httpjson input.
- [#1685](https://github.com/influxdata/telegraf/issues/1685): Add additional redis metrics.
- [#1539](https://github.com/influxdata/telegraf/pull/1539): Added capability to send metrics through Http API for OpenTSDB.
- [#1471](https://github.com/influxdata/telegraf/pull/1471): iptables input plugin.
- [#1542](https://github.com/influxdata/telegraf/pull/1542): Add filestack webhook plugin.
- [#1599](https://github.com/influxdata/telegraf/pull/1599): Add server hostname for each docker measurements.
- [#1697](https://github.com/influxdata/telegraf/pull/1697): Add NATS output plugin.
- [#1407](https://github.com/influxdata/telegraf/pull/1407): HTTP service listener input plugin.
- [#1699](https://github.com/influxdata/telegraf/pull/1699): Add database blacklist option for Postgresql
- [#1791](https://github.com/influxdata/telegraf/pull/1791): Add Docker container state metrics to Docker input plugin output
- [#1755](https://github.com/influxdata/telegraf/issues/1755): Add support to SNMP for IP & MAC address conversion.
- [#1729](https://github.com/influxdata/telegraf/issues/1729): Add support to SNMP for OID index suffixes.
- [#1813](https://github.com/influxdata/telegraf/pull/1813): Change default arguments for SNMP plugin.
- [#1686](https://github.com/influxdata/telegraf/pull/1686): Mesos input plugin: very high-cardinality mesos-task metrics removed.
- [#1839](https://github.com/influxdata/telegraf/pull/1839): Exact match with pgrep -x option in procstat
- [#1838](https://github.com/influxdata/telegraf/pull/1838): Logging overhaul to centralize the logger & log levels, & provide a logfile config option.
### Bugfixes
- [#1746](https://github.com/influxdata/telegraf/issues/1746): Fix handling of non-string values for JSON keys listed in tag_keys.
- [#1628](https://github.com/influxdata/telegraf/issues/1628): Fix mongodb input panic on version 2.2.
- [#1733](https://github.com/influxdata/telegraf/issues/1733): Fix statsd scientific notation parsing
- [#1716](https://github.com/influxdata/telegraf/issues/1716): Sensors plugin strconv.ParseFloat: parsing "": invalid syntax
- [#1530](https://github.com/influxdata/telegraf/issues/1530): Fix prometheus_client reload panic
- [#1764](https://github.com/influxdata/telegraf/issues/1764): Fix kafka consumer panic when nil error is returned down errs channel.
- [#1768](https://github.com/influxdata/telegraf/pull/1768): Speed up statsd parsing.
- [#1751](https://github.com/influxdata/telegraf/issues/1751): Fix powerdns integer parse error handling.
- [#1752](https://github.com/influxdata/telegraf/issues/1752): Fix varnish plugin defaults not being used.
- [#1517](https://github.com/influxdata/telegraf/issues/1517): Fix windows glob paths.
- [#1137](https://github.com/influxdata/telegraf/issues/1137): Fix issue loading config directory on windows.
- [#1772](https://github.com/influxdata/telegraf/pull/1772): Windows remote management interactive service fix.
- [#1702](https://github.com/influxdata/telegraf/issues/1702): sqlserver, fix issue when case sensitive collation is activated.
- [#1823](https://github.com/influxdata/telegraf/issues/1823): Fix huge allocations in http_listener when dealing with huge payloads.
## v1.0.1 [unreleased]
### Bugfixes
- [#1775](https://github.com/influxdata/telegraf/issues/1775): Prometheus output: Fix bug with multi-batch writes.
- [#1738](https://github.com/influxdata/telegraf/issues/1738): Fix unmarshal of influxdb metrics with null tags.
- [#1773](https://github.com/influxdata/telegraf/issues/1773): Add configurable timeout to influxdb input plugin.
- [#1785](https://github.com/influxdata/telegraf/pull/1785): Fix statsd no default value panic.
## v1.0 [2016-09-08]
### Release Notes
**Breaking Change** The SNMP plugin is being deprecated in it's current form.
There is a [new SNMP plugin](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/snmp)
which fixes many of the issues and confusions
of its predecessor. For users wanting to continue to use the deprecated SNMP
plugin, you will need to change your config file from `[[inputs.snmp]]` to
`[[inputs.snmp_legacy]]`. The configuration of the new SNMP plugin is _not_
backwards-compatible.
**Breaking Change**: Aerospike main server node measurements have been renamed
aerospike_node. Aerospike namespace measurements have been renamed to
aerospike_namespace. They will also now be tagged with the node_name
that they correspond to. This has been done to differentiate measurements
that pertain to node vs. namespace statistics.
**Breaking Change**: users of github_webhooks must change to the new
`[[inputs.webhooks]]` plugin.
This means that the default github_webhooks config:
```
# A Github Webhook Event collector
[[inputs.github_webhooks]]
## Address and port to host Webhook listener on
service_address = ":1618"
```
should now look like:
```
# A Webhooks Event collector
[[inputs.webhooks]]
## Address and port to host Webhook listener on
service_address = ":1618"
[inputs.webhooks.github]
path = "/"
```
- Telegraf now supports being installed as an official windows service,
which can be installed via
`> C:\Program Files\Telegraf\telegraf.exe --service install`
- `flush_jitter` behavior has been changed. The random jitter will now be
evaluated at every flush interval, rather than once at startup. This makes it
consistent with the behavior of `collection_jitter`.
### Features
- [#1413](https://github.com/influxdata/telegraf/issues/1413): Separate container_version from container_image tag.
- [#1525](https://github.com/influxdata/telegraf/pull/1525): Support setting per-device and total metrics for Docker network and blockio.
- [#1466](https://github.com/influxdata/telegraf/pull/1466): MongoDB input plugin: adding per DB stats from db.stats()
- [#1503](https://github.com/influxdata/telegraf/pull/1503): Add tls support for certs to RabbitMQ input plugin
- [#1289](https://github.com/influxdata/telegraf/pull/1289): webhooks input plugin. Thanks @francois2metz and @cduez!
- [#1247](https://github.com/influxdata/telegraf/pull/1247): rollbar webhook plugin.
- [#1408](https://github.com/influxdata/telegraf/pull/1408): mandrill webhook plugin.
- [#1402](https://github.com/influxdata/telegraf/pull/1402): docker-machine/boot2docker no longer required for unit tests.
- [#1350](https://github.com/influxdata/telegraf/pull/1350): cgroup input plugin.
- [#1369](https://github.com/influxdata/telegraf/pull/1369): Add input plugin for consuming metrics from NSQD.
- [#1369](https://github.com/influxdata/telegraf/pull/1480): add ability to read redis from a socket.
- [#1387](https://github.com/influxdata/telegraf/pull/1387): **Breaking Change** - Redis `role` tag renamed to `replication_role` to avoid global_tags override
- [#1437](https://github.com/influxdata/telegraf/pull/1437): Fetching Galera status metrics in MySQL
- [#1500](https://github.com/influxdata/telegraf/pull/1500): Aerospike plugin refactored to use official client lib.
- [#1434](https://github.com/influxdata/telegraf/pull/1434): Add measurement name arg to logparser plugin.
- [#1479](https://github.com/influxdata/telegraf/pull/1479): logparser: change resp_code from a field to a tag.
- [#1411](https://github.com/influxdata/telegraf/pull/1411): Implement support for fetching hddtemp data
- [#1340](https://github.com/influxdata/telegraf/issues/1340): statsd: do not log every dropped metric.
- [#1368](https://github.com/influxdata/telegraf/pull/1368): Add precision rounding to all metrics on collection.
- [#1390](https://github.com/influxdata/telegraf/pull/1390): Add support for Tengine
- [#1320](https://github.com/influxdata/telegraf/pull/1320): Logparser input plugin for parsing grok-style log patterns.
- [#1397](https://github.com/influxdata/telegraf/issues/1397): ElasticSearch: now supports connecting to ElasticSearch via SSL
- [#1262](https://github.com/influxdata/telegraf/pull/1261): Add graylog input pluging.
- [#1294](https://github.com/influxdata/telegraf/pull/1294): consul input plugin. Thanks @harnash
- [#1164](https://github.com/influxdata/telegraf/pull/1164): conntrack input plugin. Thanks @robinpercy!
- [#1165](https://github.com/influxdata/telegraf/pull/1165): vmstat input plugin. Thanks @jshim-xm!
- [#1208](https://github.com/influxdata/telegraf/pull/1208): Standardized AWS credentials evaluation & wildcard CloudWatch dimensions. Thanks @johnrengelman!
- [#1264](https://github.com/influxdata/telegraf/pull/1264): Add SSL config options to http_response plugin.
- [#1272](https://github.com/influxdata/telegraf/pull/1272): graphite parser: add ability to specify multiple tag keys, for consistency with influxdb parser.
- [#1265](https://github.com/influxdata/telegraf/pull/1265): Make dns lookups for chrony configurable. Thanks @zbindenren!
- [#1275](https://github.com/influxdata/telegraf/pull/1275): Allow wildcard filtering of varnish stats.
- [#1142](https://github.com/influxdata/telegraf/pull/1142): Support for glob patterns in exec plugin commands configuration.
- [#1278](https://github.com/influxdata/telegraf/pull/1278): RabbitMQ input: made url parameter optional by using DefaultURL (http://localhost:15672) if not specified
- [#1197](https://github.com/influxdata/telegraf/pull/1197): Limit AWS GetMetricStatistics requests to 10 per second.
- [#1278](https://github.com/influxdata/telegraf/pull/1278) & [#1288](https://github.com/influxdata/telegraf/pull/1288) & [#1295](https://github.com/influxdata/telegraf/pull/1295): RabbitMQ/Apache/InfluxDB inputs: made url(s) parameter optional by using reasonable input defaults if not specified
- [#1296](https://github.com/influxdata/telegraf/issues/1296): Refactor of flush_jitter argument.
- [#1213](https://github.com/influxdata/telegraf/issues/1213): Add inactive & active memory to mem plugin.
- [#1543](https://github.com/influxdata/telegraf/pull/1543): Official Windows service.
- [#1414](https://github.com/influxdata/telegraf/pull/1414): Forking sensors command to remove C package dependency.
- [#1389](https://github.com/influxdata/telegraf/pull/1389): Add a new SNMP plugin.
### Bugfixes
- [#1619](https://github.com/influxdata/telegraf/issues/1619): Fix `make windows` build target
- [#1519](https://github.com/influxdata/telegraf/pull/1519): Fix error race conditions and partial failures.
- [#1477](https://github.com/influxdata/telegraf/issues/1477): nstat: fix inaccurate config panic.
- [#1481](https://github.com/influxdata/telegraf/issues/1481): jolokia: fix handling multiple multi-dimensional attributes.
- [#1430](https://github.com/influxdata/telegraf/issues/1430): Fix prometheus character sanitizing. Sanitize more win_perf_counters characters.
- [#1534](https://github.com/influxdata/telegraf/pull/1534): Add diskio io_time to FreeBSD & report timing metrics as ms (as linux does).
- [#1379](https://github.com/influxdata/telegraf/issues/1379): Fix covering Amazon Linux for post remove flow.
- [#1584](https://github.com/influxdata/telegraf/issues/1584): procstat missing fields: read/write bytes & count
- [#1472](https://github.com/influxdata/telegraf/pull/1472): diskio input plugin: set 'skip_serial_number = true' by default to avoid high cardinality.
- [#1426](https://github.com/influxdata/telegraf/pull/1426): nil metrics panic fix.
- [#1384](https://github.com/influxdata/telegraf/pull/1384): Fix datarace in apache input plugin.
- [#1399](https://github.com/influxdata/telegraf/issues/1399): Add `read_repairs` statistics to riak plugin.
- [#1405](https://github.com/influxdata/telegraf/issues/1405): Fix memory/connection leak in prometheus input plugin.
- [#1378](https://github.com/influxdata/telegraf/issues/1378): Trim BOM from config file for Windows support.
- [#1339](https://github.com/influxdata/telegraf/issues/1339): Prometheus client output panic on service reload.
- [#1461](https://github.com/influxdata/telegraf/pull/1461): Prometheus parser, protobuf format header fix.
- [#1334](https://github.com/influxdata/telegraf/issues/1334): Prometheus output, metric refresh and caching fixes.
- [#1432](https://github.com/influxdata/telegraf/issues/1432): Panic fix for multiple graphite outputs under very high load.
- [#1412](https://github.com/influxdata/telegraf/pull/1412): Instrumental output has better reconnect behavior
- [#1460](https://github.com/influxdata/telegraf/issues/1460): Remove PID from procstat plugin to fix cardinality issues.
- [#1427](https://github.com/influxdata/telegraf/issues/1427): Cassandra input: version 2.x "column family" fix.
- [#1463](https://github.com/influxdata/telegraf/issues/1463): Shared WaitGroup in Exec plugin
- [#1436](https://github.com/influxdata/telegraf/issues/1436): logparser: honor modifiers in "pattern" config.
- [#1418](https://github.com/influxdata/telegraf/issues/1418): logparser: error and exit on file permissions/missing errors.
- [#1499](https://github.com/influxdata/telegraf/pull/1499): Make the user able to specify full path for HAproxy stats
- [#1521](https://github.com/influxdata/telegraf/pull/1521): Fix Redis url, an extra "tcp://" was added.
- [#1330](https://github.com/influxdata/telegraf/issues/1330): Fix exec plugin panic when using single binary.
- [#1336](https://github.com/influxdata/telegraf/issues/1336): Fixed incorrect prometheus metrics source selection.
- [#1112](https://github.com/influxdata/telegraf/issues/1112): Set default Zookeeper chroot to empty string.
- [#1335](https://github.com/influxdata/telegraf/issues/1335): Fix overall ping timeout to be calculated based on per-ping timeout.
- [#1374](https://github.com/influxdata/telegraf/pull/1374): Change "default" retention policy to "".
- [#1377](https://github.com/influxdata/telegraf/issues/1377): Graphite output mangling '%' character.
- [#1396](https://github.com/influxdata/telegraf/pull/1396): Prometheus input plugin now supports x509 certs authentication
- [#1252](https://github.com/influxdata/telegraf/pull/1252) & [#1279](https://github.com/influxdata/telegraf/pull/1279): Fix systemd service. Thanks @zbindenren & @PierreF!
- [#1221](https://github.com/influxdata/telegraf/pull/1221): Fix influxdb n_shards counter.
- [#1258](https://github.com/influxdata/telegraf/pull/1258): Fix potential kernel plugin integer parse error.
- [#1268](https://github.com/influxdata/telegraf/pull/1268): Fix potential influxdb input type assertion panic.
- [#1283](https://github.com/influxdata/telegraf/pull/1283): Still send processes metrics if a process exited during metric collection.
- [#1297](https://github.com/influxdata/telegraf/issues/1297): disk plugin panic when usage grab fails.
- [#1316](https://github.com/influxdata/telegraf/pull/1316): Removed leaked "database" tag on redis metrics. Thanks @PierreF!
- [#1323](https://github.com/influxdata/telegraf/issues/1323): Processes plugin: fix potential error with /proc/net/stat directory.
- [#1322](https://github.com/influxdata/telegraf/issues/1322): Fix rare RHEL 5.2 panic in gopsutil diskio gathering function.
- [#1586](https://github.com/influxdata/telegraf/pull/1586): Remove IF NOT EXISTS from influxdb output database creation.
- [#1600](https://github.com/influxdata/telegraf/issues/1600): Fix quoting with text values in postgresql_extensible plugin.
- [#1425](https://github.com/influxdata/telegraf/issues/1425): Fix win_perf_counter "index out of range" panic.
- [#1634](https://github.com/influxdata/telegraf/issues/1634): Fix ntpq panic when field is missing.
- [#1637](https://github.com/influxdata/telegraf/issues/1637): Sanitize graphite output field names.
- [#1695](https://github.com/influxdata/telegraf/pull/1695): Fix MySQL plugin not sending 0 value fields.
## v0.13.1 [2016-05-24]
### Release Notes
- net_response and http_response plugins timeouts will now accept duration
strings, ie, "2s" or "500ms".
- Input plugin Gathers will no longer be logged by default, but a Gather for
_each_ plugin will be logged in Debug mode.
- Debug mode will no longer print every point added to the accumulator. This
functionality can be duplicated using the `file` output plugin and printing
to "stdout".
### Features
- [#1173](https://github.com/influxdata/telegraf/pull/1173): varnish input plugin. Thanks @sfox-xmatters!
- [#1138](https://github.com/influxdata/telegraf/pull/1138): nstat input plugin. Thanks @Maksadbek!
- [#1139](https://github.com/influxdata/telegraf/pull/1139): instrumental output plugin. Thanks @jasonroelofs!
- [#1172](https://github.com/influxdata/telegraf/pull/1172): Ceph storage stats. Thanks @robinpercy!
- [#1233](https://github.com/influxdata/telegraf/pull/1233): Updated golint gopsutil dependency.
- [#1238](https://github.com/influxdata/telegraf/pull/1238): chrony input plugin. Thanks @zbindenren!
- [#479](https://github.com/influxdata/telegraf/issues/479): per-plugin execution time added to debug output.
- [#1249](https://github.com/influxdata/telegraf/issues/1249): influxdb output: added write_consistency argument.
### Bugfixes
- [#1195](https://github.com/influxdata/telegraf/pull/1195): Docker panic on timeout. Thanks @zstyblik!
- [#1211](https://github.com/influxdata/telegraf/pull/1211): mongodb input. Fix possible panic. Thanks @kols!
- [#1215](https://github.com/influxdata/telegraf/pull/1215): Fix for possible gopsutil-dependent plugin hangs.
- [#1228](https://github.com/influxdata/telegraf/pull/1228): Fix service plugin host tag overwrite.
- [#1198](https://github.com/influxdata/telegraf/pull/1198): http_response: override request Host header properly
- [#1230](https://github.com/influxdata/telegraf/issues/1230): Fix Telegraf process hangup due to a single plugin hanging.
- [#1214](https://github.com/influxdata/telegraf/issues/1214): Use TCP timeout argument in net_response plugin.
- [#1243](https://github.com/influxdata/telegraf/pull/1243): Logfile not created on systemd.
## v0.13 [2016-05-11]
### Release Notes
- **Breaking change** in jolokia plugin. See
https://github.com/influxdata/telegraf/blob/master/plugins/inputs/jolokia/README.md
for updated configuration. The plugin will now support proxy mode and will make
POST requests.
- New [agent] configuration option: `metric_batch_size`. This option tells
telegraf the maximum batch size to allow to accumulate before sending a flush
to the configured outputs. `metric_buffer_limit` now refers to the absolute
maximum number of metrics that will accumulate before metrics are dropped.
- There is no longer an option to
`flush_buffer_when_full`, this is now the default and only behavior of telegraf.
- **Breaking Change**: docker plugin tags. The cont_id tag no longer exists, it
will now be a field, and be called container_id. Additionally, cont_image and
cont_name are being renamed to container_image and container_name.
- **Breaking Change**: docker plugin measurements. The `docker_cpu`, `docker_mem`,
`docker_blkio` and `docker_net` measurements are being renamed to
`docker_container_cpu`, `docker_container_mem`, `docker_container_blkio` and
`docker_container_net`. Why? Because these metrics are
specifically tracking per-container stats. The problem with per-container stats,
in some use-cases, is that if containers are short-lived AND names are not
kept consistent, then the series cardinality will balloon very quickly.
So adding "container" to each metric will:
(1) make it more clear that these metrics are per-container, and
(2) allow users to easily drop per-container metrics if cardinality is an
issue (`namedrop = ["docker_container_*"]`)
- `tagexclude` and `taginclude` are now available, which can be used to remove
tags from measurements on inputs and outputs. See
[the configuration doc](https://github.com/influxdata/telegraf/blob/master/docs/CONFIGURATION.md)
for more details.
- **Measurement filtering:** All measurement filters now match based on glob
only. Previously there was an undocumented behavior where filters would match
based on _prefix_ in addition to globs. This means that a filter like
`fielddrop = ["time_"]` will need to be changed to `fielddrop = ["time_*"]`
- **datadog**: measurement and field names will no longer have `_` replaced by `.`
- The following plugins have changed their tags to _not_ overwrite the host tag:
- cassandra: `host -> cassandra_host`
- disque: `host -> disque_host`
- rethinkdb: `host -> rethinkdb_host`
- **Breaking Change**: The `win_perf_counters` input has been changed to
sanitize field names, replacing `/Sec` and `/sec` with `_persec`, as well as
spaces with underscores. This is needed because Graphite doesn't like slashes
and spaces, and was failing to accept metrics that had them.
The `/[sS]ec` -> `_persec` is just to make things clearer and uniform.
- **Breaking Change**: snmp plugin. The `host` tag of the snmp plugin has been
changed to the `snmp_host` tag.
- The `disk` input plugin can now be configured with the `HOST_MOUNT_PREFIX` environment variable.
This value is prepended to any mountpaths discovered before retrieving stats.
It is not included on the report path. This is necessary for reporting host disk stats when running from within a container.
### Features
- [#1031](https://github.com/influxdata/telegraf/pull/1031): Jolokia plugin proxy mode. Thanks @saiello!
- [#1017](https://github.com/influxdata/telegraf/pull/1017): taginclude and tagexclude arguments.
- [#1015](https://github.com/influxdata/telegraf/pull/1015): Docker plugin schema refactor.
- [#889](https://github.com/influxdata/telegraf/pull/889): Improved MySQL plugin. Thanks @maksadbek!
- [#1060](https://github.com/influxdata/telegraf/pull/1060): TTL metrics added to MongoDB input plugin
- [#1056](https://github.com/influxdata/telegraf/pull/1056): Don't allow inputs to overwrite host tags.
- [#1035](https://github.com/influxdata/telegraf/issues/1035): Add `user`, `exe`, `pidfile` tags to procstat plugin.
- [#1041](https://github.com/influxdata/telegraf/issues/1041): Add `n_cpus` field to the system plugin.
- [#1072](https://github.com/influxdata/telegraf/pull/1072): New Input Plugin: filestat.
- [#1066](https://github.com/influxdata/telegraf/pull/1066): Replication lag metrics for MongoDB input plugin
- [#1086](https://github.com/influxdata/telegraf/pull/1086): Ability to specify AWS keys in config file. Thanks @johnrengleman!
- [#1096](https://github.com/influxdata/telegraf/pull/1096): Performance refactor of running output buffers.
- [#967](https://github.com/influxdata/telegraf/issues/967): Buffer logging improvements.
- [#1107](https://github.com/influxdata/telegraf/issues/1107): Support lustre2 job stats. Thanks @hanleyja!
- [#1122](https://github.com/influxdata/telegraf/pull/1122): Support setting config path through env variable and default paths.
- [#1128](https://github.com/influxdata/telegraf/pull/1128): MongoDB jumbo chunks metric for MongoDB input plugin
- [#1146](https://github.com/influxdata/telegraf/pull/1146): HAProxy socket support. Thanks weshmashian!
### Bugfixes
- [#1050](https://github.com/influxdata/telegraf/issues/1050): jolokia plugin - do not overwrite host tag. Thanks @saiello!
- [#921](https://github.com/influxdata/telegraf/pull/921): mqtt_consumer stops gathering metrics. Thanks @chaton78!
- [#1013](https://github.com/influxdata/telegraf/pull/1013): Close dead riemann output connections. Thanks @echupriyanov!
- [#1012](https://github.com/influxdata/telegraf/pull/1012): Set default tags in test accumulator.
- [#1024](https://github.com/influxdata/telegraf/issues/1024): Don't replace `.` with `_` in datadog output.
- [#1058](https://github.com/influxdata/telegraf/issues/1058): Fix possible leaky TCP connections in influxdb output.
- [#1044](https://github.com/influxdata/telegraf/pull/1044): Fix SNMP OID possible collisions. Thanks @relip
- [#1022](https://github.com/influxdata/telegraf/issues/1022): Dont error deb/rpm install on systemd errors.
- [#1078](https://github.com/influxdata/telegraf/issues/1078): Use default AWS credential chain.
- [#1070](https://github.com/influxdata/telegraf/issues/1070): SQL Server input. Fix datatype conversion.
- [#1089](https://github.com/influxdata/telegraf/issues/1089): Fix leaky TCP connections in phpfpm plugin.
- [#914](https://github.com/influxdata/telegraf/issues/914): Telegraf can drop metrics on full buffers.
- [#1098](https://github.com/influxdata/telegraf/issues/1098): Sanitize invalid OpenTSDB characters.
- [#1110](https://github.com/influxdata/telegraf/pull/1110): Sanitize * to - in graphite serializer. Thanks @goodeggs!
- [#1118](https://github.com/influxdata/telegraf/pull/1118): Sanitize Counter names for `win_perf_counters` input.
- [#1125](https://github.com/influxdata/telegraf/pull/1125): Wrap all exec command runners with a timeout, so hung os processes don't halt Telegraf.
- [#1113](https://github.com/influxdata/telegraf/pull/1113): Set MaxRetry and RequiredAcks defaults in Kafka output.
- [#1090](https://github.com/influxdata/telegraf/issues/1090): [agent] and [global_tags] config sometimes not getting applied.
- [#1133](https://github.com/influxdata/telegraf/issues/1133): Use a timeout for docker list & stat cmds.
- [#1052](https://github.com/influxdata/telegraf/issues/1052): Docker panic fix when decode fails.
- [#1136](https://github.com/influxdata/telegraf/pull/1136): "DELAYED" Inserts were deprecated in MySQL 5.6.6. Thanks @PierreF
## v0.12.1 [2016-04-14]
### Release Notes
- Breaking change in the dovecot input plugin. See Features section below.
- Graphite output templates are now supported. See
https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#graphite
- Possible breaking change for the librato and graphite outputs. Telegraf will
no longer insert field names when the field is simply named `value`. This is
because the `value` field is redundant in the graphite/librato context.
### Features
- [#1009](https://github.com/influxdata/telegraf/pull/1009): Cassandra input plugin. Thanks @subhachandrachandra!
- [#976](https://github.com/influxdata/telegraf/pull/976): Reduce allocations in the UDP and statsd inputs.
- [#979](https://github.com/influxdata/telegraf/pull/979): Reduce allocations in the TCP listener.
- [#992](https://github.com/influxdata/telegraf/pull/992): Refactor allocations in TCP/UDP listeners.
- [#935](https://github.com/influxdata/telegraf/pull/935): AWS Cloudwatch input plugin. Thanks @joshhardy & @ljosa!
- [#943](https://github.com/influxdata/telegraf/pull/943): http_response input plugin. Thanks @Lswith!
- [#939](https://github.com/influxdata/telegraf/pull/939): sysstat input plugin. Thanks @zbindenren!
- [#998](https://github.com/influxdata/telegraf/pull/998): **breaking change** enabled global, user and ip queries in dovecot plugin. Thanks @mikif70!
- [#1001](https://github.com/influxdata/telegraf/pull/1001): Graphite serializer templates.
- [#1008](https://github.com/influxdata/telegraf/pull/1008): Adding memstats metrics to the influxdb plugin.
### Bugfixes
- [#968](https://github.com/influxdata/telegraf/issues/968): Processes plugin gets unknown state when spaces are in (command name)
- [#969](https://github.com/influxdata/telegraf/pull/969): ipmi_sensors: allow : in password. Thanks @awaw!
- [#972](https://github.com/influxdata/telegraf/pull/972): dovecot: remove extra newline in dovecot command. Thanks @mrannanj!
- [#645](https://github.com/influxdata/telegraf/issues/645): docker plugin i/o error on closed pipe. Thanks @tripledes!
## v0.12.0 [2016-04-05]
### Features
- [#951](https://github.com/influxdata/telegraf/pull/951): Parse environment variables in the config file.
- [#948](https://github.com/influxdata/telegraf/pull/948): Cleanup config file and make default package version include all plugins (but commented).
- [#927](https://github.com/influxdata/telegraf/pull/927): Adds parsing of tags to the statsd input when using DataDog's dogstatsd extension
- [#863](https://github.com/influxdata/telegraf/pull/863): AMQP output: allow external auth. Thanks @ekini!
- [#707](https://github.com/influxdata/telegraf/pull/707): Improved prometheus plugin. Thanks @titilambert!
- [#878](https://github.com/influxdata/telegraf/pull/878): Added json serializer. Thanks @ch3lo!
- [#880](https://github.com/influxdata/telegraf/pull/880): Add the ability to specify the bearer token to the prometheus plugin. Thanks @jchauncey!
- [#882](https://github.com/influxdata/telegraf/pull/882): Fixed SQL Server Plugin issues
- [#849](https://github.com/influxdata/telegraf/issues/849): Adding ability to parse single values as an input data type.
- [#844](https://github.com/influxdata/telegraf/pull/844): postgres_extensible plugin added. Thanks @menardorama!
- [#866](https://github.com/influxdata/telegraf/pull/866): couchbase input plugin. Thanks @ljosa!
- [#789](https://github.com/influxdata/telegraf/pull/789): Support multiple field specification and `field*` in graphite templates. Thanks @chrusty!
- [#762](https://github.com/influxdata/telegraf/pull/762): Nagios parser for the exec plugin. Thanks @titilambert!
- [#848](https://github.com/influxdata/telegraf/issues/848): Provide option to omit host tag from telegraf agent.
- [#928](https://github.com/influxdata/telegraf/pull/928): Deprecating the statsd "convert_names" options, expose separator config.
- [#919](https://github.com/influxdata/telegraf/pull/919): ipmi_sensor input plugin. Thanks @ebookbug!
- [#945](https://github.com/influxdata/telegraf/pull/945): KAFKA output: codec, acks, and retry configuration. Thanks @framiere!
### Bugfixes
- [#890](https://github.com/influxdata/telegraf/issues/890): Create TLS config even if only ssl_ca is provided.
- [#884](https://github.com/influxdata/telegraf/issues/884): Do not call write method if there are 0 metrics to write.
- [#898](https://github.com/influxdata/telegraf/issues/898): Put database name in quotes, fixes special characters in the database name.
- [#656](https://github.com/influxdata/telegraf/issues/656): No longer run `lsof` on linux to get netstat data, fixes permissions issue.
- [#907](https://github.com/influxdata/telegraf/issues/907): Fix prometheus invalid label/measurement name key.
- [#841](https://github.com/influxdata/telegraf/issues/841): Fix memcached unix socket panic.
- [#873](https://github.com/influxdata/telegraf/issues/873): Fix SNMP plugin sometimes not returning metrics. Thanks @titiliambert!
- [#934](https://github.com/influxdata/telegraf/pull/934): phpfpm: Fix fcgi uri path. Thanks @rudenkovk!
- [#805](https://github.com/influxdata/telegraf/issues/805): Kafka consumer stops gathering after i/o timeout.
- [#959](https://github.com/influxdata/telegraf/pull/959): reduce mongodb & prometheus collection timeouts. Thanks @PierreF!
## v0.11.1 [2016-03-17]
### Release Notes
- Primarily this release was cut to fix [#859](https://github.com/influxdata/telegraf/issues/859)
### Features
- [#747](https://github.com/influxdata/telegraf/pull/747): Start telegraf on install & remove on uninstall. Thanks @pierref!

View File

@@ -11,6 +11,8 @@ Output plugins READMEs are less structured,
but any information you can provide on how the data will look is appreciated.
See the [OpenTSDB output](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/opentsdb)
for a good example.
1. **Optional:** Help users of your plugin by including example queries for populating dashboards. Include these sample queries in the `README.md` for the plugin.
1. **Optional:** Write a [tickscript](https://docs.influxdata.com/kapacitor/v1.0/tick/syntax/) for your plugin and add it to [Kapacitor](https://github.com/influxdata/kapacitor/tree/master/examples/telegraf). Or mention @jackzampolin in a PR comment with some common queries that you would want to alert on and he will write one for you.
## GoDoc
@@ -30,7 +32,7 @@ Assuming you can already build the project, run these in the telegraf directory:
1. `go get github.com/sparrc/gdm`
1. `gdm restore`
1. `gdm save`
1. `GOOS=linux gdm save`
## Input Plugins
@@ -82,9 +84,9 @@ func (s *Simple) SampleConfig() string {
func (s *Simple) Gather(acc telegraf.Accumulator) error {
if s.Ok {
acc.Add("state", "pretty good", nil)
acc.AddFields("state", map[string]interface{}{"value": "pretty good"}, nil)
} else {
acc.Add("state", "not great", nil)
acc.AddFields("state", map[string]interface{}{"value": "not great"}, nil)
}
return nil
@@ -95,6 +97,13 @@ func init() {
}
```
## Adding Typed Metrics
In addition the the `AddFields` function, the accumulator also supports an
`AddGauge` and `AddCounter` function. These functions are for adding _typed_
metrics. Metric types are ignored for the InfluxDB output, but can be used
for other outputs, such as [prometheus](https://prometheus.io/docs/concepts/metric_types/).
## Input Plugins Accepting Arbitrary Data Formats
Some input plugins (such as
@@ -114,7 +123,7 @@ creating the `Parser` object.
You should also add the following to your SampleConfig() return:
```toml
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
@@ -212,8 +221,8 @@ func (s *Simple) Close() error {
}
func (s *Simple) Write(metrics []telegraf.Metric) error {
for _, pt := range points {
// write `pt` to the output sink here
for _, metric := range metrics {
// write `metric` to the output sink here
}
return nil
}
@@ -244,7 +253,7 @@ instantiating and creating the `Serializer` object.
You should also add the following to your SampleConfig() return:
```toml
## Data format to output. This can be "influx" or "graphite"
## Data format to output.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
@@ -290,10 +299,6 @@ To execute Telegraf tests follow these simple steps:
instructions
- execute `make test`
**OSX users**: you will need to install `boot2docker` or `docker-machine`.
The Makefile will assume that you have a `docker-machine` box called `default` to
get the IP address.
### Unit test troubleshooting
Try cleaning up your test environment by executing `make docker-kill` and

35
Godeps
View File

@@ -1,49 +1,60 @@
git.eclipse.org/gitroot/paho/org.eclipse.paho.mqtt.golang.git 617c801af238c3af2d9e72c5d4a0f02edad03ce5
github.com/Shopify/sarama 8aadb476e66ca998f2f6bb3c993e9a2daa3666b9
github.com/Sirupsen/logrus 219c8cb75c258c552e999735be6df753ffc7afdc
github.com/aerospike/aerospike-client-go 7f3a312c3b2a60ac083ec6da296091c52c795c63
github.com/amir/raidman 53c1b967405155bfc8758557863bf2e14f814687
github.com/aws/aws-sdk-go 13a12060f716145019378a10e2806c174356b857
github.com/beorn7/perks 3ac7bf7a47d159a033b107610db8a1b6575507a4
github.com/cenkalti/backoff 4dc77674aceaabba2c7e3da25d4c823edfb73f99
github.com/couchbase/go-couchbase cb664315a324d87d19c879d9cc67fda6be8c2ac1
github.com/couchbase/gomemcached a5ea6356f648fec6ab89add00edd09151455b4b2
github.com/couchbase/goutils 5823a0cbaaa9008406021dc5daf80125ea30bba6
github.com/dancannon/gorethink e7cac92ea2bc52638791a021f212145acfedb1fc
github.com/davecgh/go-spew 5215b55f46b2b919f50a1df0eaa5886afe4e3b3d
github.com/docker/engine-api 8924d6900370b4c7e7984be5adc61f50a80d7537
github.com/docker/go-connections f549a9393d05688dff0992ef3efd8bbe6c628aeb
github.com/docker/go-units 5d2041e26a699eaca682e2ea41c8f891e1060444
github.com/eapache/go-resiliency b86b1ec0dd4209a588dc1285cdd471e73525c0b3
github.com/eapache/queue ded5959c0d4e360646dc9e9908cff48666781367
github.com/fsouza/go-dockerclient a49c8269a6899cae30da1f8a4b82e0ce945f9967
github.com/go-ini/ini 776aa739ce9373377cd16f526cdf06cb4c89b40f
github.com/eclipse/paho.mqtt.golang 0f7a459f04f13a41b7ed752d47944528d4bf9a86
github.com/go-sql-driver/mysql 1fca743146605a172a266e1654e01e5cd5669bee
github.com/gobwas/glob 49571a1557cd20e6a2410adc6421f85b66c730b5
github.com/golang/protobuf 552c7b9542c194800fd493123b3798ef0a832032
github.com/golang/snappy 427fb6fc07997f43afa32f35e850833760e489a7
github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2
github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a
github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e
github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478
github.com/hashicorp/consul 5aa90455ce78d4d41578bafc86305e6e6b28d7d2
github.com/hpcloud/tail b2940955ab8b26e19d43a43c4da0475dd81bdb56
github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da
github.com/influxdata/influxdb e3fef5593c21644f2b43af55d6e17e70910b0e48
github.com/influxdata/influxdb e094138084855d444195b252314dfee9eae34cab
github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0
github.com/jmespath/go-jmespath 0b12d6b521d83fc7f755e7cfc1b1fbdd35a01a74
github.com/influxdata/wlog 7c63b0a71ef8300adc255344d275e10e5c3a71ec
github.com/kardianos/osext 29ae4ffbc9a6fe9fb2bc5029050ce6996ea1d3bc
github.com/kardianos/service 5e335590050d6d00f3aa270217d288dda1c94d0a
github.com/klauspost/crc32 19b0b332c9e4516a6370a0456e6182c3b5036720
github.com/lib/pq e182dc4027e2ded4b19396d638610f2653295f36
github.com/matttproud/golang_protobuf_extensions d0c3fe89de86839aecf2e0579c40ba3bb336a453
github.com/miekg/dns cce6c130cdb92c752850880fd285bea1d64439dd
github.com/mreiferson/go-snappystream 028eae7ab5c4c9e2d1cb4c4ca1e53259bbe7e504
github.com/naoina/go-stringutil 6b638e95a32d0c1131db0e7fe83775cbea4a0d0b
github.com/nats-io/nats b13fc9d12b0b123ebc374e6b808c6228ae4234a3
github.com/nats-io/nuid 4f84f5f3b2786224e336af2e13dba0a0a80b76fa
github.com/nats-io/nats ea8b4fd12ebb823073c0004b9f09ac8748f4f165
github.com/nats-io/nuid a5152d67cf63cbfb5d992a395458722a45194715
github.com/nsqio/go-nsq 0b80d6f05e15ca1930e0c5e1d540ed627e299980
github.com/pmezard/go-difflib 792786c7400a136282c1664665ae0a8db921c6c2
github.com/opencontainers/runc 89ab7f2ccc1e45ddf6485eaa802c35dcf321dfc8
github.com/prometheus/client_golang 18acf9993a863f4c4b40612e19cdd243e7c86831
github.com/prometheus/client_model fa8ad6fec33561be4280a8f0514318c79d7f6cb6
github.com/prometheus/common e8eabff8812b05acf522b45fdcd725a785188e37
github.com/prometheus/procfs 406e5b7bfd8201a36e2bb5f7bdae0b03380c2ce8
github.com/samuel/go-zookeeper 218e9c81c0dd8b3b18172b2bbfad92cc7d6db55f
github.com/shirou/gopsutil 1de1357e7737a536c7f4ff6be7bd27977db4d2cb
github.com/soniah/gosnmp b1b4f885b12c5dcbd021c5cee1c904110de6db7d
github.com/shirou/gopsutil 4d0c402af66c78735c5ccf820dc2ca7de5e4ff08
github.com/soniah/gosnmp eb32571c2410868d85849ad67d1e51d01273eb84
github.com/streadway/amqp b4f3ceab0337f013208d31348b578d83c0064744
github.com/stretchr/objx 1a9d0bb9f541897e62256577b352fdbc1fb4fd94
github.com/stretchr/testify 1f4a1643a57e798696635ea4c126e9127adb7d3c
github.com/wvanbergen/kafka 1a8639a45164fcc245d5c7b4bd3ccfbd1a0ffbf3
github.com/vjeantet/grok 83bfdfdfd1a8146795b28e547a8e3c8b28a466c2
github.com/wvanbergen/kafka 46f9a1cf3f670edec492029fadded9c2d9e18866
github.com/wvanbergen/kazoo-go 0f768712ae6f76454f987c3356177e138df258f8
github.com/yuin/gopher-lua bf3808abd44b1e55143a2d7f08571aaa80db1808
github.com/zensqlmonitor/go-mssqldb ffe5510c6fa5e15e6d983210ab501c815b56b363
golang.org/x/crypto 5dc8cb4b8a8eb076cbb5a06bc3b8682c15bdbbd3
golang.org/x/net 6acef71eb69611914f7a30939ea9f6e194c78172

View File

@@ -1,53 +1,12 @@
git.eclipse.org/gitroot/paho/org.eclipse.paho.mqtt.golang.git 617c801af238c3af2d9e72c5d4a0f02edad03ce5
github.com/Shopify/sarama 8aadb476e66ca998f2f6bb3c993e9a2daa3666b9
github.com/Sirupsen/logrus 219c8cb75c258c552e999735be6df753ffc7afdc
github.com/Microsoft/go-winio ce2922f643c8fd76b46cadc7f404a06282678b34
github.com/StackExchange/wmi f3e2bae1e0cb5aef83e319133eabfee30013a4a5
github.com/amir/raidman 53c1b967405155bfc8758557863bf2e14f814687
github.com/aws/aws-sdk-go 13a12060f716145019378a10e2806c174356b857
github.com/beorn7/perks 3ac7bf7a47d159a033b107610db8a1b6575507a4
github.com/cenkalti/backoff 4dc77674aceaabba2c7e3da25d4c823edfb73f99
github.com/dancannon/gorethink e7cac92ea2bc52638791a021f212145acfedb1fc
github.com/davecgh/go-spew 5215b55f46b2b919f50a1df0eaa5886afe4e3b3d
github.com/eapache/go-resiliency b86b1ec0dd4209a588dc1285cdd471e73525c0b3
github.com/eapache/queue ded5959c0d4e360646dc9e9908cff48666781367
github.com/fsouza/go-dockerclient a49c8269a6899cae30da1f8a4b82e0ce945f9967
github.com/go-ole/go-ole 50055884d646dd9434f16bbb5c9801749b9bafe4
github.com/go-sql-driver/mysql 1fca743146605a172a266e1654e01e5cd5669bee
github.com/golang/protobuf 552c7b9542c194800fd493123b3798ef0a832032
github.com/golang/snappy 427fb6fc07997f43afa32f35e850833760e489a7
github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2
github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a
github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e
github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478
github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da
github.com/influxdata/influxdb e3fef5593c21644f2b43af55d6e17e70910b0e48
github.com/influxdata/toml af4df43894b16e3fd2b788d01bd27ad0776ef2d0
github.com/klauspost/crc32 19b0b332c9e4516a6370a0456e6182c3b5036720
github.com/lib/pq e182dc4027e2ded4b19396d638610f2653295f36
github.com/lxn/win 9a7734ea4db26bc593d52f6a8a957afdad39c5c1
github.com/matttproud/golang_protobuf_extensions d0c3fe89de86839aecf2e0579c40ba3bb336a453
github.com/miekg/dns cce6c130cdb92c752850880fd285bea1d64439dd
github.com/mreiferson/go-snappystream 028eae7ab5c4c9e2d1cb4c4ca1e53259bbe7e504
github.com/naoina/go-stringutil 6b638e95a32d0c1131db0e7fe83775cbea4a0d0b
github.com/nats-io/nats b13fc9d12b0b123ebc374e6b808c6228ae4234a3
github.com/nats-io/nuid 4f84f5f3b2786224e336af2e13dba0a0a80b76fa
github.com/nsqio/go-nsq 0b80d6f05e15ca1930e0c5e1d540ed627e299980
github.com/prometheus/client_golang 18acf9993a863f4c4b40612e19cdd243e7c86831
github.com/prometheus/client_model fa8ad6fec33561be4280a8f0514318c79d7f6cb6
github.com/prometheus/common e8eabff8812b05acf522b45fdcd725a785188e37
github.com/prometheus/procfs 406e5b7bfd8201a36e2bb5f7bdae0b03380c2ce8
github.com/samuel/go-zookeeper 218e9c81c0dd8b3b18172b2bbfad92cc7d6db55f
github.com/shirou/gopsutil 1de1357e7737a536c7f4ff6be7bd27977db4d2cb
github.com/shirou/w32 ada3ba68f000aa1b58580e45c9d308fe0b7fc5c5
github.com/soniah/gosnmp b1b4f885b12c5dcbd021c5cee1c904110de6db7d
github.com/streadway/amqp b4f3ceab0337f013208d31348b578d83c0064744
github.com/stretchr/testify 1f4a1643a57e798696635ea4c126e9127adb7d3c
github.com/wvanbergen/kafka 1a8639a45164fcc245d5c7b4bd3ccfbd1a0ffbf3
github.com/wvanbergen/kazoo-go 0f768712ae6f76454f987c3356177e138df258f8
github.com/zensqlmonitor/go-mssqldb ffe5510c6fa5e15e6d983210ab501c815b56b363
golang.org/x/net 6acef71eb69611914f7a30939ea9f6e194c78172
golang.org/x/text a71fd10341b064c10f4a81ceac72bcf70f26ea34
gopkg.in/dancannon/gorethink.v1 7d1af5be49cb5ecc7b177bf387d232050299d6ef
gopkg.in/fatih/pool.v2 cba550ebf9bce999a02e963296d4bc7a486cb715
gopkg.in/mgo.v2 d90005c5262a3463800497ea5a89aed5fe22c886
gopkg.in/yaml.v2 a83829b6f1293c91addabc89d0571c246397bbf4
github.com/go-ole/go-ole be49f7c07711fcb603cff39e1de7c67926dc0ba7
github.com/lxn/win 950a0e81e7678e63d8e6cd32412bdecb325ccd88
github.com/shirou/w32 3c9377fc6748f222729a8270fe2775d149a249ad
golang.org/x/sys a646d33e2ee3172a661fc09bca23bb4889a41bc8
github.com/go-ini/ini 9144852efba7c4daf409943ee90767da62d55438
github.com/jmespath/go-jmespath bd40a432e4c76585ef6b72d3fd96fb9b6dc7b68d
github.com/pmezard/go-difflib/difflib 792786c7400a136282c1664665ae0a8db921c6c2
github.com/stretchr/objx 1a9d0bb9f541897e62256577b352fdbc1fb4fd94
gopkg.in/fsnotify.v1 a8a77c9133d2d6fd8334f3260d06f60e8d80a5fb
gopkg.in/tomb.v1 dd632973f1e7218eb1089048e0798ec9ae7dceb8

View File

@@ -1,5 +1,6 @@
UNAME := $(shell sh -c 'uname')
VERSION := $(shell sh -c 'git describe --always --tags')
BRANCH := $(shell sh -c 'git rev-parse --abbrev-ref HEAD')
COMMIT := $(shell sh -c 'git rev-parse HEAD')
ifdef GOBIN
PATH := $(GOBIN):$(PATH)
else
@@ -14,21 +15,18 @@ windows: prepare-windows build-windows
# Only run the build (no dependency grabbing)
build:
go install -ldflags "-X main.Version=$(VERSION)" ./...
go install -ldflags \
"-X main.version=$(VERSION) -X main.commit=$(COMMIT) -X main.branch=$(BRANCH)" ./...
build-windows:
go build -o telegraf.exe -ldflags \
"-X main.Version=$(VERSION)" \
GOOS=windows GOARCH=amd64 go build -o telegraf.exe -ldflags \
"-X main.version=$(VERSION) -X main.commit=$(COMMIT) -X main.branch=$(BRANCH)" \
./cmd/telegraf/telegraf.go
build-for-docker:
CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o telegraf -ldflags \
"-s -X main.Version=$(VERSION)" \
./cmd/telegraf/telegraf.go
# Build with race detector
dev: prepare
go build -race -ldflags "-X main.Version=$(VERSION)" ./...
"-s -X main.version=$(VERSION) -X main.commit=$(COMMIT) -X main.branch=$(BRANCH)" \
./cmd/telegraf/telegraf.go
# run package script
package:
@@ -42,54 +40,44 @@ prepare:
# Use the windows godeps file to prepare dependencies
prepare-windows:
go get github.com/sparrc/gdm
gdm restore
gdm restore -f Godeps_windows
# Run all docker containers necessary for unit tests
docker-run:
ifeq ($(UNAME), Darwin)
docker run --name kafka \
-e ADVERTISED_HOST=$(shell sh -c 'boot2docker ip || docker-machine ip default') \
-e ADVERTISED_PORT=9092 \
-p "2181:2181" -p "9092:9092" \
-d spotify/kafka
endif
ifeq ($(UNAME), Linux)
docker run --name aerospike -p "3000:3000" -d aerospike/aerospike-server:3.9.0
docker run --name kafka \
-e ADVERTISED_HOST=localhost \
-e ADVERTISED_PORT=9092 \
-p "2181:2181" -p "9092:9092" \
-d spotify/kafka
endif
docker run --name mysql -p "3306:3306" -e MYSQL_ALLOW_EMPTY_PASSWORD=yes -d mysql
docker run --name memcached -p "11211:11211" -d memcached
docker run --name postgres -p "5432:5432" -d postgres
docker run --name rabbitmq -p "15672:15672" -p "5672:5672" -d rabbitmq:3-management
docker run --name opentsdb -p "4242:4242" -d petergrace/opentsdb-docker
docker run --name redis -p "6379:6379" -d redis
docker run --name aerospike -p "3000:3000" -d aerospike
docker run --name nsq -p "4150:4150" -d nsqio/nsq /nsqd
docker run --name mqtt -p "1883:1883" -d ncarlier/mqtt
docker run --name riemann -p "5555:5555" -d blalor/riemann
docker run --name snmp -p "31161:31161/udp" -d titilambert/snmpsim
docker run --name nats -p "4222:4222" -d nats
# Run docker containers necessary for CircleCI unit tests
docker-run-circle:
docker run --name aerospike -p "3000:3000" -d aerospike/aerospike-server:3.9.0
docker run --name kafka \
-e ADVERTISED_HOST=localhost \
-e ADVERTISED_PORT=9092 \
-p "2181:2181" -p "9092:9092" \
-d spotify/kafka
docker run --name opentsdb -p "4242:4242" -d petergrace/opentsdb-docker
docker run --name aerospike -p "3000:3000" -d aerospike
docker run --name nsq -p "4150:4150" -d nsqio/nsq /nsqd
docker run --name mqtt -p "1883:1883" -d ncarlier/mqtt
docker run --name riemann -p "5555:5555" -d blalor/riemann
docker run --name snmp -p "31161:31161/udp" -d titilambert/snmpsim
docker run --name nats -p "4222:4222" -d nats
# Kill all docker containers, ignore errors
docker-kill:
-docker kill nsq aerospike redis opentsdb rabbitmq postgres memcached mysql kafka mqtt riemann snmp
-docker rm nsq aerospike redis opentsdb rabbitmq postgres memcached mysql kafka mqtt riemann snmp
-docker kill nsq aerospike redis rabbitmq postgres memcached mysql kafka mqtt riemann nats
-docker rm nsq aerospike redis rabbitmq postgres memcached mysql kafka mqtt riemann nats
# Run full unit tests using docker containers (includes setup and teardown)
test: vet docker-kill docker-run

219
README.md
View File

@@ -1,4 +1,4 @@
# Telegraf [![Circle CI](https://circleci.com/gh/influxdata/telegraf.svg?style=svg)](https://circleci.com/gh/influxdata/telegraf)
# Telegraf [![Circle CI](https://circleci.com/gh/influxdata/telegraf.svg?style=svg)](https://circleci.com/gh/influxdata/telegraf) [![Docker pulls](https://img.shields.io/docker/pulls/library/telegraf.svg)](https://hub.docker.com/_/telegraf/)
Telegraf is an agent written in Go for collecting metrics from the system it's
running on, or from other services, and writing them into InfluxDB or other
@@ -17,26 +17,15 @@ new plugins.
## Installation:
NOTE: Telegraf 0.10.x is **not** backwards-compatible with previous versions
of telegraf, both in the database layout and the configuration file. 0.2.x
will continue to be supported, see below for download links.
For more details on the differences between Telegraf 0.2.x and 0.10.x, see
the [release blog post](https://influxdata.com/blog/announcing-telegraf-0-10-0/).
### Linux deb and rpm Packages:
Latest:
* http://get.influxdb.org/telegraf/telegraf_0.11.1-1_amd64.deb
* http://get.influxdb.org/telegraf/telegraf-0.11.1-1.x86_64.rpm
* https://dl.influxdata.com/telegraf/releases/telegraf_1.0.0_amd64.deb
* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0.x86_64.rpm
Latest (arm):
* http://get.influxdb.org/telegraf/telegraf_0.11.1-1_armhf.deb
* http://get.influxdb.org/telegraf/telegraf-0.11.1-1.armhf.rpm
0.2.x:
* http://get.influxdb.org/telegraf/telegraf_0.2.4_amd64.deb
* http://get.influxdb.org/telegraf/telegraf-0.2.4-1.x86_64.rpm
* https://dl.influxdata.com/telegraf/releases/telegraf_1.0.0_armhf.deb
* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0.armhf.rpm
##### Package Instructions:
@@ -50,34 +39,21 @@ controlled via `systemctl [action] telegraf`
### yum/apt Repositories:
There is a yum/apt repo available for the whole InfluxData stack, see
[here](https://docs.influxdata.com/influxdb/v0.9/introduction/installation/#installation)
for instructions, replacing the `influxdb` package name with `telegraf`.
[here](https://docs.influxdata.com/influxdb/v0.10/introduction/installation/#installation)
for instructions on setting up the repo. Once it is configured, you will be able
to use this repo to install & update telegraf.
### Linux tarballs:
Latest:
* http://get.influxdb.org/telegraf/telegraf-0.11.1-1_linux_amd64.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.11.1-1_linux_i386.tar.gz
* http://get.influxdb.org/telegraf/telegraf-0.11.1-1_linux_armhf.tar.gz
* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_linux_amd64.tar.gz
* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_linux_i386.tar.gz
* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_linux_armhf.tar.gz
0.2.x:
* http://get.influxdb.org/telegraf/telegraf_linux_amd64_0.2.4.tar.gz
* http://get.influxdb.org/telegraf/telegraf_linux_386_0.2.4.tar.gz
* http://get.influxdb.org/telegraf/telegraf_linux_arm_0.2.4.tar.gz
### FreeBSD tarball:
##### tarball Instructions:
To install the full directory structure with config file, run:
```
sudo tar -C / -zxvf ./telegraf-0.11.1-1_linux_amd64.tar.gz
```
To extract only the binary, run:
```
tar -zxvf telegraf-0.11.1-1_linux_amd64.tar.gz --strip-components=3 ./usr/bin/telegraf
```
Latest:
* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_freebsd_amd64.tar.gz
### Ansible Role:
@@ -93,8 +69,7 @@ brew install telegraf
### Windows Binaries (EXPERIMENTAL)
Latest:
* http://get.influxdb.org/telegraf/telegraf-0.11.1-1_windows_amd64.zip
* http://get.influxdb.org/telegraf/telegraf-0.11.1-1_windows_i386.zip
* https://dl.influxdata.com/telegraf/releases/telegraf-1.0.0_windows_amd64.zip
### From Source:
@@ -162,52 +137,69 @@ more information on each, please look at the directory of the same name in
Currently implemented sources:
* aerospike
* apache
* bcache
* couchdb
* disque
* dns query time
* docker
* dovecot
* elasticsearch
* exec (generic executable plugin, support JSON, influx and graphite)
* haproxy
* httpjson (generic JSON-emitting http service plugin)
* influxdb
* jolokia
* leofs
* lustre2
* mailchimp
* memcached
* mesos
* mongodb
* mysql
* net_response
* nginx
* nsq
* ntpq
* phpfpm
* phusion passenger
* ping
* postgresql
* powerdns
* procstat
* prometheus
* puppetagent
* rabbitmq
* raindrops
* redis
* rethinkdb
* riak
* sensors (only available if built from source)
* snmp
* sql server (microsoft)
* twemproxy
* zfs
* zookeeper
* win_perf_counters (windows performance counters)
* system
* [aws cloudwatch](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/cloudwatch)
* [aerospike](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/aerospike)
* [apache](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/apache)
* [bcache](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/bcache)
* [cassandra](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/cassandra)
* [ceph](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ceph)
* [chrony](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/chrony)
* [consul](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/consul)
* [conntrack](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/conntrack)
* [couchbase](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchbase)
* [couchdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchdb)
* [disque](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/disque)
* [dns query time](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/dns_query)
* [docker](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/docker)
* [dovecot](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/dovecot)
* [elasticsearch](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/elasticsearch)
* [exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec) (generic executable plugin, support JSON, influx, graphite and nagios)
* [filestat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/filestat)
* [haproxy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/haproxy)
* [hddtemp](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/hddtemp)
* [http_response](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/http_response)
* [httpjson](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/httpjson) (generic JSON-emitting http service plugin)
* [influxdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/influxdb)
* [ipmi_sensor](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ipmi_sensor)
* [iptables](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/iptables)
* [jolokia](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/jolokia)
* [leofs](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/leofs)
* [lustre2](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/lustre2)
* [mailchimp](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mailchimp)
* [memcached](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/memcached)
* [mesos](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mesos)
* [mongodb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mongodb)
* [mysql](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mysql)
* [net_response](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/net_response)
* [nginx](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nginx)
* [nsq](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nsq)
* [nstat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nstat)
* [ntpq](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ntpq)
* [phpfpm](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/phpfpm)
* [phusion passenger](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/passenger)
* [ping](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ping)
* [postgresql](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/postgresql)
* [postgresql_extensible](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/postgresql_extensible)
* [powerdns](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/powerdns)
* [procstat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/procstat)
* [prometheus](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/prometheus)
* [puppetagent](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/puppetagent)
* [rabbitmq](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/rabbitmq)
* [raindrops](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/raindrops)
* [redis](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/redis)
* [rethinkdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/rethinkdb)
* [riak](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/riak)
* [sensors](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/sensors)
* [snmp](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/snmp)
* [snmp_legacy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/snmp_legacy)
* [sql server](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/sqlserver) (microsoft)
* [twemproxy](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/twemproxy)
* [varnish](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/varnish)
* [zfs](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/zfs)
* [zookeeper](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/zookeeper)
* [win_perf_counters ](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/win_perf_counters) (windows performance counters)
* [sysstat](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/sysstat)
* [system](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/system)
* cpu
* mem
* net
@@ -217,36 +209,49 @@ Currently implemented sources:
* swap
* processes
* kernel (/proc/stat)
* kernel (/proc/vmstat)
Telegraf can also collect metrics via the following service plugins:
* statsd
* udp_listener
* tcp_listener
* mqtt_consumer
* kafka_consumer
* nats_consumer
* github_webhooks
* [http_listener](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/http_listener)
* [kafka_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/kafka_consumer)
* [mqtt_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mqtt_consumer)
* [nats_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nats_consumer)
* [nsq_consumer](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/nsq_consumer)
* [logparser](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/logparser)
* [statsd](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/statsd)
* [tail](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/tail)
* [tcp_listener](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/tcp_listener)
* [udp_listener](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/udp_listener)
* [webhooks](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks)
* [filestack](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/filestack)
* [github](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/github)
* [mandrill](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/mandrill)
* [rollbar](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/webhooks/rollbar)
We'll be adding support for many more over the coming months. Read on if you
want to add support for another service or third-party API.
## Supported Output Plugins
* influxdb
* amon
* amqp
* aws kinesis
* aws cloudwatch
* datadog
* graphite
* kafka
* librato
* mqtt
* nsq
* opentsdb
* prometheus
* riemann
* [influxdb](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/influxdb)
* [amon](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/amon)
* [amqp](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/amqp)
* [aws kinesis](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kinesis)
* [aws cloudwatch](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/cloudwatch)
* [datadog](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/datadog)
* [file](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/file)
* [graphite](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/graphite)
* [graylog](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/graylog)
* [instrumental](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/instrumental)
* [kafka](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kafka)
* [librato](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/librato)
* [mqtt](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/mqtt)
* [nats](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/nats)
* [nsq](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/nsq)
* [opentsdb](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/opentsdb)
* [prometheus](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/prometheus_client)
* [riemann](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/riemann)
## Contributing

View File

@@ -2,20 +2,39 @@ package telegraf
import "time"
// Accumulator is an interface for "accumulating" metrics from input plugin(s).
// The metrics are sent down a channel shared between all input plugins and then
// flushed on the configured flush_interval.
type Accumulator interface {
// AddFields adds a metric to the accumulator with the given measurement
// name, fields, and tags (and timestamp). If a timestamp is not provided,
// then the accumulator sets it to "now".
// Create a point with a value, decorating it with tags
// NOTE: tags is expected to be owned by the caller, don't mutate
// it after passing to Add.
Add(measurement string,
value interface{},
tags map[string]string,
t ...time.Time)
AddFields(measurement string,
fields map[string]interface{},
tags map[string]string,
t ...time.Time)
// AddGauge is the same as AddFields, but will add the metric as a "Gauge" type
AddGauge(measurement string,
fields map[string]interface{},
tags map[string]string,
t ...time.Time)
// AddCounter is the same as AddFields, but will add the metric as a "Counter" type
AddCounter(measurement string,
fields map[string]interface{},
tags map[string]string,
t ...time.Time)
AddError(err error)
Debug() bool
SetDebug(enabled bool)
SetPrecision(precision, interval time.Duration)
DisablePrecision()
}

View File

@@ -4,7 +4,7 @@ import (
"fmt"
"log"
"math"
"sync"
"sync/atomic"
"time"
"github.com/influxdata/telegraf"
@@ -12,43 +12,30 @@ import (
)
func NewAccumulator(
inputConfig *internal_models.InputConfig,
inputConfig *models.InputConfig,
metrics chan telegraf.Metric,
) *accumulator {
acc := accumulator{}
acc.metrics = metrics
acc.inputConfig = inputConfig
acc.precision = time.Nanosecond
return &acc
}
type accumulator struct {
sync.Mutex
metrics chan telegraf.Metric
defaultTags map[string]string
debug bool
// print every point added to the accumulator
trace bool
inputConfig *internal_models.InputConfig
inputConfig *models.InputConfig
prefix string
}
precision time.Duration
func (ac *accumulator) Add(
measurement string,
value interface{},
tags map[string]string,
t ...time.Time,
) {
fields := make(map[string]interface{})
fields["value"] = value
if !ac.inputConfig.Filter.ShouldNamePass(measurement) {
return
}
ac.AddFields(measurement, fields, tags, t...)
errCount uint64
}
func (ac *accumulator) AddFields(
@@ -57,16 +44,47 @@ func (ac *accumulator) AddFields(
tags map[string]string,
t ...time.Time,
) {
if m := ac.makeMetric(measurement, fields, tags, telegraf.Untyped, t...); m != nil {
ac.metrics <- m
}
}
func (ac *accumulator) AddGauge(
measurement string,
fields map[string]interface{},
tags map[string]string,
t ...time.Time,
) {
if m := ac.makeMetric(measurement, fields, tags, telegraf.Gauge, t...); m != nil {
ac.metrics <- m
}
}
func (ac *accumulator) AddCounter(
measurement string,
fields map[string]interface{},
tags map[string]string,
t ...time.Time,
) {
if m := ac.makeMetric(measurement, fields, tags, telegraf.Counter, t...); m != nil {
ac.metrics <- m
}
}
// makeMetric either returns a metric, or returns nil if the metric doesn't
// need to be created (because of filtering, an error, etc.)
func (ac *accumulator) makeMetric(
measurement string,
fields map[string]interface{},
tags map[string]string,
mType telegraf.ValueType,
t ...time.Time,
) telegraf.Metric {
if len(fields) == 0 || len(measurement) == 0 {
return
return nil
}
if !ac.inputConfig.Filter.ShouldNamePass(measurement) {
return
}
if !ac.inputConfig.Filter.ShouldTagsPass(tags) {
return
if tags == nil {
tags = make(map[string]string)
}
// Override measurement name if set
@@ -81,9 +99,6 @@ func (ac *accumulator) AddFields(
measurement = measurement + ac.inputConfig.MeasurementSuffix
}
if tags == nil {
tags = make(map[string]string)
}
// Apply plugin-wide tags if set
for k, v := range ac.inputConfig.Tags {
if _, ok := tags[k]; !ok {
@@ -97,42 +112,36 @@ func (ac *accumulator) AddFields(
}
}
result := make(map[string]interface{})
for k, v := range fields {
// Filter out any filtered fields
if ac.inputConfig != nil {
if !ac.inputConfig.Filter.ShouldFieldsPass(k) {
continue
}
}
// Apply the metric filter(s)
if ok := ac.inputConfig.Filter.Apply(measurement, fields, tags); !ok {
return nil
}
for k, v := range fields {
// Validate uint64 and float64 fields
switch val := v.(type) {
case uint64:
// InfluxDB does not support writing uint64
if val < uint64(9223372036854775808) {
result[k] = int64(val)
fields[k] = int64(val)
} else {
result[k] = int64(9223372036854775807)
fields[k] = int64(9223372036854775807)
}
continue
case float64:
// NaNs are invalid values in influxdb, skip measurement
if math.IsNaN(val) || math.IsInf(val, 0) {
if ac.debug {
log.Printf("Measurement [%s] field [%s] has a NaN or Inf "+
log.Printf("I! Measurement [%s] field [%s] has a NaN or Inf "+
"field, skipping",
measurement, k)
}
delete(fields, k)
continue
}
}
result[k] = v
}
fields = nil
if len(result) == 0 {
return
fields[k] = v
}
var timestamp time.Time
@@ -141,20 +150,39 @@ func (ac *accumulator) AddFields(
} else {
timestamp = time.Now()
}
timestamp = timestamp.Round(ac.precision)
if ac.prefix != "" {
measurement = ac.prefix + measurement
var m telegraf.Metric
var err error
switch mType {
case telegraf.Counter:
m, err = telegraf.NewCounterMetric(measurement, tags, fields, timestamp)
case telegraf.Gauge:
m, err = telegraf.NewGaugeMetric(measurement, tags, fields, timestamp)
default:
m, err = telegraf.NewMetric(measurement, tags, fields, timestamp)
}
m, err := telegraf.NewMetric(measurement, tags, result, timestamp)
if err != nil {
log.Printf("Error adding point [%s]: %s\n", measurement, err.Error())
return
log.Printf("E! Error adding point [%s]: %s\n", measurement, err.Error())
return nil
}
if ac.debug {
if ac.trace {
fmt.Println("> " + m.String())
}
ac.metrics <- m
return m
}
// AddError passes a runtime error to the accumulator.
// The error will be tagged with the plugin name and written to the log.
func (ac *accumulator) AddError(err error) {
if err == nil {
return
}
atomic.AddUint64(&ac.errCount, 1)
//TODO suppress/throttle consecutive duplicate errors?
log.Printf("E! Error in input [%s]: %s", ac.inputConfig.Name, err)
}
func (ac *accumulator) Debug() bool {
@@ -165,6 +193,39 @@ func (ac *accumulator) SetDebug(debug bool) {
ac.debug = debug
}
func (ac *accumulator) Trace() bool {
return ac.trace
}
func (ac *accumulator) SetTrace(trace bool) {
ac.trace = trace
}
// SetPrecision takes two time.Duration objects. If the first is non-zero,
// it sets that as the precision. Otherwise, it takes the second argument
// as the order of time that the metrics should be rounded to, with the
// maximum being 1s.
func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
if precision > 0 {
ac.precision = precision
return
}
switch {
case interval >= time.Second:
ac.precision = time.Second
case interval >= time.Millisecond:
ac.precision = time.Millisecond
case interval >= time.Microsecond:
ac.precision = time.Microsecond
default:
ac.precision = time.Nanosecond
}
}
func (ac *accumulator) DisablePrecision() {
ac.precision = time.Nanosecond
}
func (ac *accumulator) setDefaultTags(tags map[string]string) {
ac.defaultTags = tags
}

View File

@@ -1,8 +1,11 @@
package agent
import (
"bytes"
"fmt"
"log"
"math"
"os"
"testing"
"time"
@@ -10,6 +13,7 @@ import (
"github.com/influxdata/telegraf/internal/models"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestAdd(t *testing.T) {
@@ -17,11 +21,17 @@ func TestAdd(t *testing.T) {
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
a.Add("acctest", float64(101), map[string]string{})
a.Add("acctest", float64(101), map[string]string{"acc": "test"})
a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now)
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
@@ -38,17 +48,241 @@ func TestAdd(t *testing.T) {
actual)
}
func TestAddGauge(t *testing.T) {
a := accumulator{}
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &models.InputConfig{}
a.AddGauge("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{})
a.AddGauge("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"})
a.AddGauge("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
assert.Contains(t, actual, "acctest value=101")
assert.Equal(t, testm.Type(), telegraf.Gauge)
testm = <-a.metrics
actual = testm.String()
assert.Contains(t, actual, "acctest,acc=test value=101")
assert.Equal(t, testm.Type(), telegraf.Gauge)
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest,acc=test value=101 %d", now.UnixNano()),
actual)
assert.Equal(t, testm.Type(), telegraf.Gauge)
}
func TestAddCounter(t *testing.T) {
a := accumulator{}
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &models.InputConfig{}
a.AddCounter("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{})
a.AddCounter("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"})
a.AddCounter("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
assert.Contains(t, actual, "acctest value=101")
assert.Equal(t, testm.Type(), telegraf.Counter)
testm = <-a.metrics
actual = testm.String()
assert.Contains(t, actual, "acctest,acc=test value=101")
assert.Equal(t, testm.Type(), telegraf.Counter)
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest,acc=test value=101 %d", now.UnixNano()),
actual)
assert.Equal(t, testm.Type(), telegraf.Counter)
}
func TestAddNoPrecisionWithInterval(t *testing.T) {
a := accumulator{}
now := time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC)
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &models.InputConfig{}
a.SetPrecision(0, time.Second)
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
assert.Contains(t, actual, "acctest value=101")
testm = <-a.metrics
actual = testm.String()
assert.Contains(t, actual, "acctest,acc=test value=101")
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800000000000)),
actual)
}
func TestAddNoIntervalWithPrecision(t *testing.T) {
a := accumulator{}
now := time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC)
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &models.InputConfig{}
a.SetPrecision(time.Second, time.Millisecond)
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
assert.Contains(t, actual, "acctest value=101")
testm = <-a.metrics
actual = testm.String()
assert.Contains(t, actual, "acctest,acc=test value=101")
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800000000000)),
actual)
}
func TestAddDisablePrecision(t *testing.T) {
a := accumulator{}
now := time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC)
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &models.InputConfig{}
a.SetPrecision(time.Second, time.Millisecond)
a.DisablePrecision()
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
assert.Contains(t, actual, "acctest value=101")
testm = <-a.metrics
actual = testm.String()
assert.Contains(t, actual, "acctest,acc=test value=101")
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800082912748)),
actual)
}
func TestDifferentPrecisions(t *testing.T) {
a := accumulator{}
now := time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC)
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &models.InputConfig{}
a.SetPrecision(0, time.Second)
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
assert.Equal(t,
fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800000000000)),
actual)
a.SetPrecision(0, time.Millisecond)
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800083000000)),
actual)
a.SetPrecision(0, time.Microsecond)
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800082913000)),
actual)
a.SetPrecision(0, time.Nanosecond)
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest,acc=test value=101 %d", int64(1139572800082912748)),
actual)
}
func TestAddDefaultTags(t *testing.T) {
a := accumulator{}
a.addDefaultTag("default", "tag")
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
a.Add("acctest", float64(101), map[string]string{})
a.Add("acctest", float64(101), map[string]string{"acc": "test"})
a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now)
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
@@ -70,7 +304,7 @@ func TestAddFields(t *testing.T) {
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
fields := map[string]interface{}{
"usage": float64(99),
@@ -103,7 +337,7 @@ func TestAddInfFields(t *testing.T) {
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
fields := map[string]interface{}{
"usage": inf,
@@ -131,7 +365,7 @@ func TestAddNaNFields(t *testing.T) {
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
fields := map[string]interface{}{
"usage": nan,
@@ -155,7 +389,7 @@ func TestAddUint64Fields(t *testing.T) {
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
fields := map[string]interface{}{
"usage": uint64(99),
@@ -184,7 +418,7 @@ func TestAddUint64Overflow(t *testing.T) {
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
fields := map[string]interface{}{
"usage": uint64(9223372036854775808),
@@ -214,11 +448,17 @@ func TestAddInts(t *testing.T) {
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
a.Add("acctest", int(101), map[string]string{})
a.Add("acctest", int32(101), map[string]string{"acc": "test"})
a.Add("acctest", int64(101), map[string]string{"acc": "test"}, now)
a.AddFields("acctest",
map[string]interface{}{"value": int(101)},
map[string]string{})
a.AddFields("acctest",
map[string]interface{}{"value": int32(101)},
map[string]string{"acc": "test"})
a.AddFields("acctest",
map[string]interface{}{"value": int64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
@@ -241,10 +481,14 @@ func TestAddFloats(t *testing.T) {
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
a.Add("acctest", float32(101), map[string]string{"acc": "test"})
a.Add("acctest", float64(101), map[string]string{"acc": "test"}, now)
a.AddFields("acctest",
map[string]interface{}{"value": float32(101)},
map[string]string{"acc": "test"})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
@@ -263,10 +507,14 @@ func TestAddStrings(t *testing.T) {
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
a.Add("acctest", "test", map[string]string{"acc": "test"})
a.Add("acctest", "foo", map[string]string{"acc": "test"}, now)
a.AddFields("acctest",
map[string]interface{}{"value": "test"},
map[string]string{"acc": "test"})
a.AddFields("acctest",
map[string]interface{}{"value": "foo"},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
@@ -285,10 +533,12 @@ func TestAddBools(t *testing.T) {
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
a.inputConfig = &internal_models.InputConfig{}
a.inputConfig = &models.InputConfig{}
a.Add("acctest", true, map[string]string{"acc": "test"})
a.Add("acctest", false, map[string]string{"acc": "test"}, now)
a.AddFields("acctest",
map[string]interface{}{"value": true}, map[string]string{"acc": "test"})
a.AddFields("acctest",
map[string]interface{}{"value": false}, map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
@@ -300,3 +550,65 @@ func TestAddBools(t *testing.T) {
fmt.Sprintf("acctest,acc=test,default=tag value=false %d", now.UnixNano()),
actual)
}
// Test that tag filters get applied to metrics.
func TestAccFilterTags(t *testing.T) {
a := accumulator{}
now := time.Now()
a.metrics = make(chan telegraf.Metric, 10)
defer close(a.metrics)
filter := models.Filter{
TagExclude: []string{"acc"},
}
assert.NoError(t, filter.Compile())
a.inputConfig = &models.InputConfig{}
a.inputConfig.Filter = filter
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"})
a.AddFields("acctest",
map[string]interface{}{"value": float64(101)},
map[string]string{"acc": "test"}, now)
testm := <-a.metrics
actual := testm.String()
assert.Contains(t, actual, "acctest value=101")
testm = <-a.metrics
actual = testm.String()
assert.Contains(t, actual, "acctest value=101")
testm = <-a.metrics
actual = testm.String()
assert.Equal(t,
fmt.Sprintf("acctest value=101 %d", now.UnixNano()),
actual)
}
func TestAccAddError(t *testing.T) {
errBuf := bytes.NewBuffer(nil)
log.SetOutput(errBuf)
defer log.SetOutput(os.Stderr)
a := accumulator{}
a.inputConfig = &models.InputConfig{}
a.inputConfig.Name = "mock_plugin"
a.AddError(fmt.Errorf("foo"))
a.AddError(fmt.Errorf("bar"))
a.AddError(fmt.Errorf("baz"))
errs := bytes.Split(errBuf.Bytes(), []byte{'\n'})
assert.EqualValues(t, 3, a.errCount)
require.Len(t, errs, 4) // 4 because of trailing newline
assert.Contains(t, string(errs[0]), "mock_plugin")
assert.Contains(t, string(errs[0]), "foo")
assert.Contains(t, string(errs[1]), "mock_plugin")
assert.Contains(t, string(errs[1]), "bar")
assert.Contains(t, string(errs[2]), "mock_plugin")
assert.Contains(t, string(errs[2]), "baz")
}

View File

@@ -1,17 +1,15 @@
package agent
import (
cryptorand "crypto/rand"
"fmt"
"log"
"math/big"
"math/rand"
"os"
"runtime"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/config"
"github.com/influxdata/telegraf/internal/models"
)
@@ -27,17 +25,19 @@ func NewAgent(config *config.Config) (*Agent, error) {
Config: config,
}
if a.Config.Agent.Hostname == "" {
hostname, err := os.Hostname()
if err != nil {
return nil, err
if !a.Config.Agent.OmitHostname {
if a.Config.Agent.Hostname == "" {
hostname, err := os.Hostname()
if err != nil {
return nil, err
}
a.Config.Agent.Hostname = hostname
}
a.Config.Agent.Hostname = hostname
config.Tags["host"] = a.Config.Agent.Hostname
}
config.Tags["host"] = a.Config.Agent.Hostname
return a, nil
}
@@ -49,18 +49,16 @@ func (a *Agent) Connect() error {
switch ot := o.Output.(type) {
case telegraf.ServiceOutput:
if err := ot.Start(); err != nil {
log.Printf("Service for output %s failed to start, exiting\n%s\n",
log.Printf("E! Service for output %s failed to start, exiting\n%s\n",
o.Name, err.Error())
return err
}
}
if a.Config.Agent.Debug {
log.Printf("Attempting connection to output: %s\n", o.Name)
}
log.Printf("D! Attempting connection to output: %s\n", o.Name)
err := o.Output.Connect()
if err != nil {
log.Printf("Failed to connect to output %s, retrying in 15s, "+
log.Printf("E! Failed to connect to output %s, retrying in 15s, "+
"error was '%s' \n", o.Name, err)
time.Sleep(15 * time.Second)
err = o.Output.Connect()
@@ -68,9 +66,7 @@ func (a *Agent) Connect() error {
return err
}
}
if a.Config.Agent.Debug {
log.Printf("Successfully connected to output: %s\n", o.Name)
}
log.Printf("D! Successfully connected to output: %s\n", o.Name)
}
return nil
}
@@ -88,105 +84,50 @@ func (a *Agent) Close() error {
return err
}
func panicRecover(input *internal_models.RunningInput) {
func panicRecover(input *models.RunningInput) {
if err := recover(); err != nil {
trace := make([]byte, 2048)
runtime.Stack(trace, true)
log.Printf("FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
input.Name, err, trace)
log.Println("PLEASE REPORT THIS PANIC ON GITHUB with " +
log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
"stack trace, configuration, and OS information: " +
"https://github.com/influxdata/telegraf/issues/new")
}
}
// gatherParallel runs the inputs that are using the same reporting interval
// as the telegraf agent.
func (a *Agent) gatherParallel(metricC chan telegraf.Metric) error {
var wg sync.WaitGroup
start := time.Now()
counter := 0
jitter := a.Config.Agent.CollectionJitter.Duration.Nanoseconds()
for _, input := range a.Config.Inputs {
if input.Config.Interval != 0 {
continue
}
wg.Add(1)
counter++
go func(input *internal_models.RunningInput) {
defer panicRecover(input)
defer wg.Done()
acc := NewAccumulator(input.Config, metricC)
acc.SetDebug(a.Config.Agent.Debug)
acc.setDefaultTags(a.Config.Tags)
if jitter != 0 {
nanoSleep := rand.Int63n(jitter)
d, err := time.ParseDuration(fmt.Sprintf("%dns", nanoSleep))
if err != nil {
log.Printf("Jittering collection interval failed for plugin %s",
input.Name)
} else {
time.Sleep(d)
}
}
if err := input.Input.Gather(acc); err != nil {
log.Printf("Error in input [%s]: %s", input.Name, err)
}
}(input)
}
if counter == 0 {
return nil
}
wg.Wait()
elapsed := time.Since(start)
if !a.Config.Agent.Quiet {
log.Printf("Gathered metrics, (%s interval), from %d inputs in %s\n",
a.Config.Agent.Interval.Duration, counter, elapsed)
}
return nil
}
// gatherSeparate runs the inputs that have been configured with their own
// gatherer runs the inputs that have been configured with their own
// reporting interval.
func (a *Agent) gatherSeparate(
func (a *Agent) gatherer(
shutdown chan struct{},
input *internal_models.RunningInput,
input *models.RunningInput,
interval time.Duration,
metricC chan telegraf.Metric,
) error {
defer panicRecover(input)
ticker := time.NewTicker(input.Config.Interval)
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
var outerr error
start := time.Now()
acc := NewAccumulator(input.Config, metricC)
acc.SetDebug(a.Config.Agent.Debug)
acc.SetPrecision(a.Config.Agent.Precision.Duration,
a.Config.Agent.Interval.Duration)
acc.setDefaultTags(a.Config.Tags)
if err := input.Input.Gather(acc); err != nil {
log.Printf("Error in input [%s]: %s", input.Name, err)
}
internal.RandomSleep(a.Config.Agent.CollectionJitter.Duration, shutdown)
start := time.Now()
gatherWithTimeout(shutdown, input, acc, interval)
elapsed := time.Since(start)
if !a.Config.Agent.Quiet {
log.Printf("Gathered metrics, (separate %s interval), from %s in %s\n",
input.Config.Interval, input.Name, elapsed)
}
if outerr != nil {
return outerr
}
log.Printf("D! Input [%s] gathered metrics, (%s interval) in %s\n",
input.Name, interval, elapsed)
select {
case <-shutdown:
@@ -197,6 +138,42 @@ func (a *Agent) gatherSeparate(
}
}
// gatherWithTimeout gathers from the given input, with the given timeout.
// when the given timeout is reached, gatherWithTimeout logs an error message
// but continues waiting for it to return. This is to avoid leaving behind
// hung processes, and to prevent re-calling the same hung process over and
// over.
func gatherWithTimeout(
shutdown chan struct{},
input *models.RunningInput,
acc *accumulator,
timeout time.Duration,
) {
ticker := time.NewTicker(timeout)
defer ticker.Stop()
done := make(chan error)
go func() {
done <- input.Input.Gather(acc)
}()
for {
select {
case err := <-done:
if err != nil {
log.Printf("E! ERROR in input [%s]: %s", input.Name, err)
}
return
case <-ticker.C:
log.Printf("E! ERROR: input [%s] took longer to collect than "+
"collection interval (%s)",
input.Name, timeout)
continue
case <-shutdown:
return
}
}
}
// Test verifies that we can 'Gather' from all inputs with their configured
// Config struct
func (a *Agent) Test() error {
@@ -218,7 +195,10 @@ func (a *Agent) Test() error {
for _, input := range a.Config.Inputs {
acc := NewAccumulator(input.Config, metricC)
acc.SetDebug(true)
acc.SetTrace(true)
acc.SetPrecision(a.Config.Agent.Precision.Duration,
a.Config.Agent.Interval.Duration)
acc.setDefaultTags(a.Config.Tags)
fmt.Printf("* Plugin: %s, Collection 1\n", input.Name)
if input.Config.Interval != 0 {
@@ -228,6 +208,9 @@ func (a *Agent) Test() error {
if err := input.Input.Gather(acc); err != nil {
return err
}
if acc.errCount > 0 {
return fmt.Errorf("Errors encountered during processing")
}
// Special instructions for some inputs. cpu, for example, needs to be
// run twice in order to return cpu usage percentages.
@@ -250,11 +233,11 @@ func (a *Agent) flush() {
wg.Add(len(a.Config.Outputs))
for _, o := range a.Config.Outputs {
go func(output *internal_models.RunningOutput) {
go func(output *models.RunningOutput) {
defer wg.Done()
err := output.Write()
if err != nil {
log.Printf("Error writing to output [%s]: %s\n",
log.Printf("E! Error writing to output [%s]: %s\n",
output.Name, err.Error())
}
}(o)
@@ -274,51 +257,47 @@ func (a *Agent) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) er
for {
select {
case <-shutdown:
log.Println("Hang on, flushing any cached metrics before shutdown")
log.Println("I! Hang on, flushing any cached metrics before shutdown")
a.flush()
return nil
case <-ticker.C:
internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown)
a.flush()
case m := <-metricC:
for _, o := range a.Config.Outputs {
o.AddMetric(m)
for i, o := range a.Config.Outputs {
if i == len(a.Config.Outputs)-1 {
o.AddMetric(m)
} else {
o.AddMetric(copyMetric(m))
}
}
}
}
}
// jitterInterval applies the the interval jitter to the flush interval using
// crypto/rand number generator
func jitterInterval(ininterval, injitter time.Duration) time.Duration {
var jitter int64
outinterval := ininterval
if injitter.Nanoseconds() != 0 {
maxjitter := big.NewInt(injitter.Nanoseconds())
if j, err := cryptorand.Int(cryptorand.Reader, maxjitter); err == nil {
jitter = j.Int64()
}
outinterval = time.Duration(jitter + ininterval.Nanoseconds())
func copyMetric(m telegraf.Metric) telegraf.Metric {
t := time.Time(m.Time())
tags := make(map[string]string)
fields := make(map[string]interface{})
for k, v := range m.Tags() {
tags[k] = v
}
for k, v := range m.Fields() {
fields[k] = v
}
if outinterval.Nanoseconds() < time.Duration(500*time.Millisecond).Nanoseconds() {
log.Printf("Flush interval %s too low, setting to 500ms\n", outinterval)
outinterval = time.Duration(500 * time.Millisecond)
}
return outinterval
out, _ := telegraf.NewMetric(m.Name(), tags, fields, t)
return out
}
// Run runs the agent daemon, gathering every Interval
func (a *Agent) Run(shutdown chan struct{}) error {
var wg sync.WaitGroup
a.Config.Agent.FlushInterval.Duration = jitterInterval(
a.Config.Agent.FlushInterval.Duration,
a.Config.Agent.FlushJitter.Duration)
log.Printf("Agent Config: Interval:%s, Debug:%#v, Quiet:%#v, Hostname:%#v, "+
log.Printf("I! Agent Config: Interval:%s, Quiet:%#v, Hostname:%#v, "+
"Flush Interval:%s \n",
a.Config.Agent.Interval.Duration, a.Config.Agent.Debug, a.Config.Agent.Quiet,
a.Config.Agent.Interval.Duration, a.Config.Agent.Quiet,
a.Config.Agent.Hostname, a.Config.Agent.FlushInterval.Duration)
// channel shared between all input threads for accumulating metrics
@@ -329,10 +308,12 @@ func (a *Agent) Run(shutdown chan struct{}) error {
switch p := input.Input.(type) {
case telegraf.ServiceInput:
acc := NewAccumulator(input.Config, metricC)
acc.SetDebug(a.Config.Agent.Debug)
// Service input plugins should set their own precision of their
// metrics.
acc.DisablePrecision()
acc.setDefaultTags(a.Config.Tags)
if err := p.Start(acc); err != nil {
log.Printf("Service for input %s failed to start, exiting\n%s\n",
log.Printf("E! Service for input %s failed to start, exiting\n%s\n",
input.Name, err.Error())
return err
}
@@ -345,43 +326,31 @@ func (a *Agent) Run(shutdown chan struct{}) error {
i := int64(a.Config.Agent.Interval.Duration)
time.Sleep(time.Duration(i - (time.Now().UnixNano() % i)))
}
ticker := time.NewTicker(a.Config.Agent.Interval.Duration)
wg.Add(1)
go func() {
defer wg.Done()
if err := a.flusher(shutdown, metricC); err != nil {
log.Printf("Flusher routine failed, exiting: %s\n", err.Error())
log.Printf("E! Flusher routine failed, exiting: %s\n", err.Error())
close(shutdown)
}
}()
wg.Add(len(a.Config.Inputs))
for _, input := range a.Config.Inputs {
// Special handling for inputs that have their own collection interval
// configured. Default intervals are handled below with gatherParallel
interval := a.Config.Agent.Interval.Duration
// overwrite global interval if this plugin has it's own.
if input.Config.Interval != 0 {
wg.Add(1)
go func(input *internal_models.RunningInput) {
defer wg.Done()
if err := a.gatherSeparate(shutdown, input, metricC); err != nil {
log.Printf(err.Error())
}
}(input)
interval = input.Config.Interval
}
go func(in *models.RunningInput, interv time.Duration) {
defer wg.Done()
if err := a.gatherer(shutdown, in, interv, metricC); err != nil {
log.Printf("E! " + err.Error())
}
}(input, interval)
}
defer wg.Wait()
for {
if err := a.gatherParallel(metricC); err != nil {
log.Printf(err.Error())
}
select {
case <-shutdown:
return nil
case <-ticker.C:
continue
}
}
wg.Wait()
return nil
}

View File

@@ -1,9 +1,7 @@
package agent
import (
"github.com/stretchr/testify/assert"
"testing"
"time"
"github.com/influxdata/telegraf/internal/config"
@@ -11,8 +9,18 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/all"
// needing to load the outputs
_ "github.com/influxdata/telegraf/plugins/outputs/all"
"github.com/stretchr/testify/assert"
)
func TestAgent_OmitHostname(t *testing.T) {
c := config.NewConfig()
c.Agent.OmitHostname = true
_, err := NewAgent(c)
assert.NoError(t, err)
assert.NotContains(t, c.Tags, "host")
}
func TestAgent_LoadPlugin(t *testing.T) {
c := config.NewConfig()
c.InputFilters = []string{"mysql"}
@@ -101,75 +109,3 @@ func TestAgent_LoadOutput(t *testing.T) {
a, _ = NewAgent(c)
assert.Equal(t, 3, len(a.Config.Outputs))
}
func TestAgent_ZeroJitter(t *testing.T) {
flushinterval := jitterInterval(time.Duration(10*time.Second),
time.Duration(0*time.Second))
actual := flushinterval.Nanoseconds()
exp := time.Duration(10 * time.Second).Nanoseconds()
if actual != exp {
t.Errorf("Actual %v, expected %v", actual, exp)
}
}
func TestAgent_ZeroInterval(t *testing.T) {
min := time.Duration(500 * time.Millisecond).Nanoseconds()
max := time.Duration(5 * time.Second).Nanoseconds()
for i := 0; i < 1000; i++ {
flushinterval := jitterInterval(time.Duration(0*time.Second),
time.Duration(5*time.Second))
actual := flushinterval.Nanoseconds()
if actual > max {
t.Errorf("Didn't expect interval %d to be > %d", actual, max)
break
}
if actual < min {
t.Errorf("Didn't expect interval %d to be < %d", actual, min)
break
}
}
}
func TestAgent_ZeroBoth(t *testing.T) {
flushinterval := jitterInterval(time.Duration(0*time.Second),
time.Duration(0*time.Second))
actual := flushinterval
exp := time.Duration(500 * time.Millisecond)
if actual != exp {
t.Errorf("Actual %v, expected %v", actual, exp)
}
}
func TestAgent_JitterMax(t *testing.T) {
max := time.Duration(32 * time.Second).Nanoseconds()
for i := 0; i < 1000; i++ {
flushinterval := jitterInterval(time.Duration(30*time.Second),
time.Duration(2*time.Second))
actual := flushinterval.Nanoseconds()
if actual > max {
t.Errorf("Didn't expect interval %d to be > %d", actual, max)
break
}
}
}
func TestAgent_JitterMin(t *testing.T) {
min := time.Duration(30 * time.Second).Nanoseconds()
for i := 0; i < 1000; i++ {
flushinterval := jitterInterval(time.Duration(30*time.Second),
time.Duration(2*time.Second))
actual := flushinterval.Nanoseconds()
if actual < min {
t.Errorf("Didn't expect interval %d to be < %d", actual, min)
break
}
}
}

View File

@@ -4,9 +4,9 @@ machine:
post:
- sudo service zookeeper stop
- go version
- go version | grep 1.5.3 || sudo rm -rf /usr/local/go
- wget https://storage.googleapis.com/golang/go1.5.3.linux-amd64.tar.gz
- sudo tar -C /usr/local -xzf go1.5.3.linux-amd64.tar.gz
- go version | grep 1.7.1 || sudo rm -rf /usr/local/go
- wget https://storage.googleapis.com/golang/go1.7.1.linux-amd64.tar.gz
- sudo tar -C /usr/local -xzf go1.7.1.linux-amd64.tar.gz
- go version
dependencies:

View File

@@ -6,19 +6,23 @@ import (
"log"
"os"
"os/signal"
"runtime"
"strings"
"syscall"
"github.com/influxdata/telegraf/agent"
"github.com/influxdata/telegraf/internal/config"
"github.com/influxdata/telegraf/logger"
"github.com/influxdata/telegraf/plugins/inputs"
_ "github.com/influxdata/telegraf/plugins/inputs/all"
"github.com/influxdata/telegraf/plugins/outputs"
_ "github.com/influxdata/telegraf/plugins/outputs/all"
"github.com/kardianos/service"
)
var fDebug = flag.Bool("debug", false,
"show metrics as they're generated to stdout")
"turn on debug logging")
var fQuiet = flag.Bool("quiet", false,
"run in quiet mode")
var fTest = flag.Bool("test", false, "gather metrics, print them out, and exit")
@@ -32,23 +36,33 @@ var fPidfile = flag.String("pidfile", "", "file to write our pid to")
var fInputFilters = flag.String("input-filter", "",
"filter the inputs to enable, separator is :")
var fInputList = flag.Bool("input-list", false,
"print available output plugins.")
"print available input plugins.")
var fOutputFilters = flag.String("output-filter", "",
"filter the outputs to enable, separator is :")
var fOutputList = flag.Bool("output-list", false,
"print available output plugins.")
var fUsage = flag.String("usage", "",
"print usage for a plugin, ie, 'telegraf -usage mysql'")
var fInputFiltersLegacy = flag.String("filter", "",
"filter the inputs to enable, separator is :")
var fOutputFiltersLegacy = flag.String("outputfilter", "",
"filter the outputs to enable, separator is :")
var fConfigDirectoryLegacy = flag.String("configdirectory", "",
"directory containing additional *.conf files")
var fService = flag.String("service", "",
"operate on the service")
// Telegraf version
// -ldflags "-X main.Version=`git describe --always --tags`"
var Version string
// Telegraf version, populated linker.
// ie, -ldflags "-X main.version=`git describe --always --tags`"
var (
version string
commit string
branch string
)
func init() {
// If commit or branch are not set, make that clear.
if commit == "" {
commit = "unknown"
}
if branch == "" {
branch = "unknown"
}
}
const usage = `Telegraf, The plugin-driven server agent for collecting and reporting metrics.
@@ -70,6 +84,14 @@ The flags are:
-debug print metrics as they're generated to stdout
-quiet run in quiet mode
-version print the version to stdout
-service Control the service, ie, 'telegraf -service install (windows only)'
In addition to the -config flag, telegraf will also load the config file from
an environment variable or default location. Precedence is:
1. -config flag
2. $TELEGRAF_CONFIG_PATH environment variable
3. $HOME/.telegraf/telegraf.conf
4. /etc/telegraf/telegraf.conf
Examples:
@@ -89,7 +111,19 @@ Examples:
telegraf -config telegraf.conf -input-filter cpu:mem -output-filter influxdb
`
func main() {
var stop chan struct{}
var srvc service.Service
type program struct{}
func reloadLoop(stop chan struct{}, s service.Service) {
defer func() {
if service.Interactive() {
os.Exit(0)
}
return
}()
reload := make(chan bool, 1)
reload <- true
for <-reload {
@@ -98,25 +132,12 @@ func main() {
flag.Parse()
args := flag.Args()
if flag.NFlag() == 0 && len(args) == 0 {
usageExit(0)
}
var inputFilters []string
if *fInputFiltersLegacy != "" {
inputFilter := strings.TrimSpace(*fInputFiltersLegacy)
inputFilters = strings.Split(":"+inputFilter+":", ":")
}
if *fInputFilters != "" {
inputFilter := strings.TrimSpace(*fInputFilters)
inputFilters = strings.Split(":"+inputFilter+":", ":")
}
var outputFilters []string
if *fOutputFiltersLegacy != "" {
outputFilter := strings.TrimSpace(*fOutputFiltersLegacy)
outputFilters = strings.Split(":"+outputFilter+":", ":")
}
if *fOutputFilters != "" {
outputFilter := strings.TrimSpace(*fOutputFilters)
outputFilters = strings.Split(":"+outputFilter+":", ":")
@@ -125,8 +146,7 @@ func main() {
if len(args) > 0 {
switch args[0] {
case "version":
v := fmt.Sprintf("Telegraf - Version %s", Version)
fmt.Println(v)
fmt.Printf("Telegraf v%s (git: %s %s)\n", version, branch, commit)
return
case "config":
config.PrintSampleConfig(inputFilters, outputFilters)
@@ -134,34 +154,27 @@ func main() {
}
}
if *fOutputList {
// switch for flags which just do something and exit immediately
switch {
case *fOutputList:
fmt.Println("Available Output Plugins:")
for k, _ := range outputs.Outputs {
fmt.Printf(" %s\n", k)
}
return
}
if *fInputList {
case *fInputList:
fmt.Println("Available Input Plugins:")
for k, _ := range inputs.Inputs {
fmt.Printf(" %s\n", k)
}
return
}
if *fVersion {
v := fmt.Sprintf("Telegraf - Version %s", Version)
fmt.Println(v)
case *fVersion:
fmt.Printf("Telegraf v%s (git: %s %s)\n", version, branch, commit)
return
}
if *fSampleConfig {
case *fSampleConfig:
config.PrintSampleConfig(inputFilters, outputFilters)
return
}
if *fUsage != "" {
case *fUsage != "":
if err := config.PrintInputConfig(*fUsage); err != nil {
if err2 := config.PrintOutputConfig(*fUsage); err2 != nil {
log.Fatalf("%s and %s", err, err2)
@@ -170,31 +183,16 @@ func main() {
return
}
var (
c *config.Config
err error
)
if *fConfig != "" {
c = config.NewConfig()
c.OutputFilters = outputFilters
c.InputFilters = inputFilters
err = c.LoadConfig(*fConfig)
if err != nil {
log.Fatal(err)
}
} else {
fmt.Println("You must specify a config file. See telegraf --help")
// If no other options are specified, load the config file and run.
c := config.NewConfig()
c.OutputFilters = outputFilters
c.InputFilters = inputFilters
err := c.LoadConfig(*fConfig)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
if *fConfigDirectoryLegacy != "" {
err = c.LoadDirectory(*fConfigDirectoryLegacy)
if err != nil {
log.Fatal(err)
}
}
if *fConfigDirectory != "" {
err = c.LoadDirectory(*fConfigDirectory)
if err != nil {
@@ -213,13 +211,12 @@ func main() {
log.Fatal(err)
}
if *fDebug {
ag.Config.Agent.Debug = true
}
if *fQuiet {
ag.Config.Agent.Quiet = true
}
// Setup logging
logger.SetupLogging(
ag.Config.Agent.Debug || *fDebug,
ag.Config.Agent.Quiet || *fQuiet,
ag.Config.Agent.Logfile,
)
if *fTest {
err = ag.Test()
@@ -238,22 +235,26 @@ func main() {
signals := make(chan os.Signal)
signal.Notify(signals, os.Interrupt, syscall.SIGHUP)
go func() {
sig := <-signals
if sig == os.Interrupt {
close(shutdown)
}
if sig == syscall.SIGHUP {
log.Printf("Reloading Telegraf config\n")
<-reload
reload <- true
select {
case sig := <-signals:
if sig == os.Interrupt {
close(shutdown)
}
if sig == syscall.SIGHUP {
log.Printf("I! Reloading Telegraf config\n")
<-reload
reload <- true
close(shutdown)
}
case <-stop:
close(shutdown)
}
}()
log.Printf("Starting Telegraf (version %s)\n", Version)
log.Printf("Loaded outputs: %s", strings.Join(c.OutputNames(), " "))
log.Printf("Loaded inputs: %s", strings.Join(c.InputNames(), " "))
log.Printf("Tags enabled: %s", c.ListTags())
log.Printf("I! Starting Telegraf (version %s)\n", version)
log.Printf("I! Loaded outputs: %s", strings.Join(c.OutputNames(), " "))
log.Printf("I! Loaded inputs: %s", strings.Join(c.InputNames(), " "))
log.Printf("I! Tags enabled: %s", c.ListTags())
if *fPidfile != "" {
f, err := os.Create(*fPidfile)
@@ -274,3 +275,55 @@ func usageExit(rc int) {
fmt.Println(usage)
os.Exit(rc)
}
func (p *program) Start(s service.Service) error {
srvc = s
go p.run()
return nil
}
func (p *program) run() {
stop = make(chan struct{})
reloadLoop(stop, srvc)
}
func (p *program) Stop(s service.Service) error {
close(stop)
return nil
}
func main() {
flag.Parse()
if runtime.GOOS == "windows" {
svcConfig := &service.Config{
Name: "telegraf",
DisplayName: "Telegraf Data Collector Service",
Description: "Collects data using a series of plugins and publishes it to" +
"another series of plugins.",
Arguments: []string{"-config", "C:\\Program Files\\Telegraf\\telegraf.conf"},
}
prg := &program{}
s, err := service.New(prg, svcConfig)
if err != nil {
log.Fatal(err)
}
// Handle the -service flag here to prevent any issues with tooling that
// may not have an interactive session, e.g. installing from Ansible.
if *fService != "" {
if *fConfig != "" {
(*svcConfig).Arguments = []string{"-config", *fConfig}
}
err := service.Control(s, *fService)
if err != nil {
log.Fatal(err)
}
} else {
err = s.Run()
if err != nil {
log.Println("E! " + err.Error())
}
}
} else {
stop = make(chan struct{})
reloadLoop(stop, nil)
}
}

View File

@@ -3,16 +3,31 @@
## Generating a Configuration File
A default Telegraf config file can be generated using the -sample-config flag:
`telegraf -sample-config > telegraf.conf`
```
telegraf -sample-config > telegraf.conf
```
To generate a file with specific inputs and outputs, you can use the
-input-filter and -output-filter flags:
`telegraf -sample-config -input-filter cpu:mem:net:swap -output-filter influxdb:kafka`
```
telegraf -sample-config -input-filter cpu:mem:net:swap -output-filter influxdb:kafka
```
You can see the latest config file with all available plugins here:
[telegraf.conf](https://github.com/influxdata/telegraf/blob/master/etc/telegraf.conf)
## Environment Variables
Environment variables can be used anywhere in the config file, simply prepend
them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
## `[global_tags]` Configuration
Global tags can be specific in the `[global_tags]` section of the config file in
key="value" format. All metrics being gathered on this host will be tagged
Global tags can be specified in the `[global_tags]` section of the config file
in key="value" format. All metrics being gathered on this host will be tagged
with the tags specified here.
## `[agent]` Configuration
@@ -23,8 +38,12 @@ config.
* **interval**: Default data collection interval for all inputs
* **round_interval**: Rounds collection interval to 'interval'
ie, if interval="10s" then always collect on :00, :10, :20, etc.
* **metric_batch_size**: Telegraf will send metrics to output in batch of at
most metric_batch_size metrics.
* **metric_buffer_limit**: Telegraf will cache metric_buffer_limit metrics
for each output, and will flush this buffer on a successful write.
This should be a multiple of metric_batch_size and could not be less
than 2 times metric_batch_size.
* **collection_jitter**: Collection jitter is used to jitter
the collection by a random amount.
Each plugin will sleep for a random time within jitter before collecting.
@@ -41,9 +60,39 @@ ie, a jitter of 5s and flush_interval 10s means flushes will happen every 10-15s
* **quiet**: Run telegraf in quiet mode.
* **hostname**: Override default hostname, if empty use os.Hostname().
## `[inputs.xxx]` Configuration
#### Measurement Filtering
There are some configuration options that are configurable per input:
Filters can be configured per input or output, see below for examples.
* **namepass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against
measurement names and if it matches, the field is emitted.
* **namedrop**: The inverse of pass, if a measurement name matches, it is not emitted.
* **fieldpass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against field names
and if it matches, the field is emitted. fieldpass is not available for outputs.
* **fielddrop**: The inverse of pass, if a field name matches, it is not emitted.
fielddrop is not available for outputs.
* **tagpass**: tag names and arrays of strings that are used to filter
measurements by the current input. Each string in the array is tested as a glob
match against the tag name, and if it matches the measurement is emitted.
* **tagdrop**: The inverse of tagpass. If a tag matches, the measurement is not
emitted. This is tested on measurements that have passed the tagpass test.
* **tagexclude**: tagexclude can be used to exclude a tag from measurement(s).
As opposed to tagdrop, which will drop an entire measurement based on it's
tags, tagexclude simply strips the given tag keys from the measurement. This
can be used on inputs & outputs, but it is _recommended_ to be used on inputs,
as it is more efficient to filter out tags at the ingestion point.
* **taginclude**: taginclude is the inverse of tagexclude. It will only include
the tag keys in the final measurement.
**NOTE** `tagpass` and `tagdrop` parameters must be defined at the _end_ of
the plugin definition, otherwise subsequent plugin config options will be
interpreted as part of the tagpass/tagdrop map.
## Input Configuration
Some configuration options are configurable per input:
* **name_override**: Override the base name of the measurement.
(Default is the name of the input).
@@ -54,24 +103,6 @@ There are some configuration options that are configurable per input:
global interval, but if one particular input should be run less or more often,
you can configure that here.
#### Input Filters
There are also filters that can be configured per input:
* **namepass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against
measurement names and if it matches, the field is emitted.
* **namedrop**: The inverse of pass, if a measurement name matches, it is not emitted.
* **fieldpass**: An array of strings that is used to filter metrics generated by the
current input. Each string in the array is tested as a glob match against field names
and if it matches, the field is emitted.
* **fielddrop**: The inverse of pass, if a field name matches, it is not emitted.
* **tagpass**: tag names and arrays of strings that are used to filter
measurements by the current input. Each string in the array is tested as a glob
match against the tag name, and if it matches the measurement is emitted.
* **tagdrop**: The inverse of tagpass. If a tag matches, the measurement is not
emitted. This is tested on measurements that have passed the tagpass test.
#### Input Configuration Examples
This is a full working config that will output CPU data to an InfluxDB instance
@@ -102,6 +133,10 @@ fields which begin with `time_`.
#### Input Config: tagpass and tagdrop
**NOTE** `tagpass` and `tagdrop` parameters must be defined at the _end_ of
the plugin definition, otherwise subsequent plugin config options will be
interpreted as part of the tagpass/tagdrop map.
```toml
[[inputs.cpu]]
percpu = true
@@ -141,12 +176,26 @@ fields which begin with `time_`.
# Drop all metrics about containers for kubelet
[[inputs.prometheus]]
urls = ["http://kube-node-1:4194/metrics"]
namedrop = ["container_"]
namedrop = ["container_*"]
# Only store rest client related metrics for kubelet
[[inputs.prometheus]]
urls = ["http://kube-node-1:4194/metrics"]
namepass = ["rest_client_"]
namepass = ["rest_client_*"]
```
#### Input Config: taginclude and tagexclude
```toml
# Only include the "cpu" tag in the measurements for the cpu plugin.
[[inputs.cpu]]
percpu = true
totalcpu = true
taginclude = ["cpu"]
# Exclude the "fstype" tag from the measurements for the disk plugin.
[[inputs.disk]]
tagexclude = ["fstype"]
```
#### Input config: prefix, suffix, and override
@@ -174,6 +223,9 @@ This will emit measurements with the name `foobar`
This plugin will emit measurements with two additional tags: `tag1=foo` and
`tag2=bar`
NOTE: Order matters, the `[inputs.cpu.tags]` table must be at the _end_ of the
plugin definition.
```toml
[[inputs.cpu]]
percpu = false
@@ -202,15 +254,12 @@ to avoid measurement collisions:
fielddrop = ["cpu_time*"]
```
## `[outputs.xxx]` Configuration
## Output Configuration
Telegraf also supports specifying multiple output sinks to send data to,
configuring each output sink is different, but examples can be
found by running `telegraf -sample-config`.
Outputs also support the same configurable options as inputs
(namepass, namedrop, tagpass, tagdrop)
```toml
[[outputs.influxdb]]
urls = [ "http://localhost:8086" ]

View File

@@ -1,5 +1,13 @@
# Telegraf Input Data Formats
Telegraf is able to parse the following input data formats into metrics:
1. [InfluxDB Line Protocol](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#influx)
1. [JSON](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#json)
1. [Graphite](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite)
1. [Value](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#value), ie: 45 or "booyah"
1. [Nagios](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#nagios) (exec input only)
Telegraf metrics, like InfluxDB
[points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/),
are a combination of four basic parts:
@@ -31,7 +39,7 @@ example, in the exec plugin:
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
@@ -43,7 +51,7 @@ example, in the exec plugin:
Each data_format has an additional set of configuration options available, which
I'll go over below.
## Influx:
# Influx:
There are no additional configuration options for InfluxDB line-protocol. The
metrics are parsed directly into Telegraf metrics.
@@ -58,23 +66,28 @@ metrics are parsed directly into Telegraf metrics.
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
```
## JSON:
# JSON:
The JSON data format flattens JSON into metric _fields_. For example, this JSON:
The JSON data format flattens JSON into metric _fields_.
NOTE: Only numerical values are converted to fields, and they are converted
into a float. strings are ignored unless specified as a tag_key (see below).
So for example, this JSON:
```json
{
"a": 5,
"b": {
"c": 6
}
},
"ignored": "I'm a string"
}
```
@@ -103,7 +116,7 @@ For example, if you had this configuration:
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
## Data format to consume. This can be "json", "influx" or "graphite"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
@@ -134,62 +147,125 @@ Your Telegraf metrics would get tagged with "my_tag_1"
exec_mycollector,my_tag_1=foo a=5,b_c=6
```
## Graphite:
# Value:
The "value" data format translates single values into Telegraf metrics. This
is done by assigning a measurement name and setting a single field ("value")
as the parsed metric.
#### Value Configuration:
You **must** tell Telegraf what type of metric to collect by using the
`data_type` configuration option. Available options are:
1. integer
2. float or long
3. string
4. boolean
**Note:** It is also recommended that you set `name_override` to a measurement
name that makes sense for your metric, otherwise it will just be set to the
name of the plugin.
```toml
[[inputs.exec]]
## Commands array
commands = ["cat /proc/sys/kernel/random/entropy_avail"]
## override the default metric name of "exec"
name_override = "entropy_available"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "value"
data_type = "integer" # required
```
# Graphite:
The Graphite data format translates graphite _dot_ buckets directly into
telegraf measurement names, with a single value field, and without any tags. For
more advanced options, Telegraf supports specifying "templates" to translate
telegraf measurement names, with a single value field, and without any tags.
By default, the separator is left as ".", but this can be changed using the
"separator" argument. For more advanced options,
Telegraf supports specifying "templates" to translate
graphite buckets into Telegraf metrics.
#### Separator:
You can specify a separator to use for the parsed metrics.
By default, it will leave the metrics with a "." separator.
Setting `separator = "_"` will translate:
Templates are of the form:
```
cpu.usage.idle 99
=> cpu_usage_idle value=99
"host.mytag.mytag.measurement.measurement.field*"
```
#### Measurement/Tag Templates:
Where the following keywords exist:
1. `measurement`: specifies that this section of the graphite bucket corresponds
to the measurement name. This can be specified multiple times.
2. `field`: specifies that this section of the graphite bucket corresponds
to the field name. This can be specified multiple times.
3. `measurement*`: specifies that all remaining elements of the graphite bucket
correspond to the measurement name.
4. `field*`: specifies that all remaining elements of the graphite bucket
correspond to the field name.
Any part of the template that is not a keyword is treated as a tag key. This
can also be specified multiple times.
NOTE: `field*` cannot be used in conjunction with `measurement*`!
#### Measurement & Tag Templates:
The most basic template is to specify a single transformation to apply to all
incoming metrics. _measurement_ is a special keyword that tells Telegraf which
parts of the graphite bucket to combine into the measurement name. It can have a
trailing `*` to indicate that the remainder of the metric should be used.
Other words are considered tag keys. So the following template:
incoming metrics. So the following template:
```toml
templates = [
"region.measurement*"
"region.region.measurement*"
]
```
would result in the following Graphite -> Telegraf transformation.
```
us-west.cpu.load 100
=> cpu.load,region=us-west value=100
us.west.cpu.load 100
=> cpu.load,region=us.west value=100
```
#### Field Templates:
There is also a _field_ keyword, which can only be specified once.
The field keyword tells Telegraf to give the metric that field name.
So the following template:
```toml
separator = "_"
templates = [
"measurement.measurement.field.region"
"measurement.measurement.field.field.region"
]
```
would result in the following Graphite -> Telegraf transformation.
```
cpu.usage.idle.us-west 100
=> cpu_usage,region=us-west idle=100
cpu.usage.idle.percent.eu-east 100
=> cpu_usage,region=eu-east idle_percent=100
```
The field key can also be derived from all remaining elements of the graphite
bucket by specifying `field*`:
```toml
separator = "_"
templates = [
"measurement.measurement.region.field*"
]
```
which would result in the following Graphite -> Telegraf transformation.
```
cpu.usage.eu-east.idle.percentage 100
=> cpu_usage,region=eu-east idle_percentage=100
```
#### Filter Templates:
@@ -207,8 +283,8 @@ templates = [
which would result in the following transformation:
```
cpu.load.us-west 100
=> cpu_load,region=us-west value=100
cpu.load.eu-east 100
=> cpu_load,region=eu-east value=100
mem.cached.localhost 256
=> mem_cached,host=localhost value=256
@@ -230,8 +306,8 @@ templates = [
would result in the following Graphite -> Telegraf transformation.
```
cpu.usage.idle.us-west 100
=> cpu_usage,region=us-west,datacenter=1a idle=100
cpu.usage.idle.eu-east 100
=> cpu_usage,region=eu-east,datacenter=1a idle=100
```
There are many more options available,
@@ -247,7 +323,7 @@ There are many more options available,
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
## Data format to consume. This can be "json", "influx" or "graphite" (line-protocol)
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
@@ -262,13 +338,37 @@ There are many more options available,
## similar to the line protocol format. There can be only one default template.
## Templates support below format:
## 1. filter + template
## 2. filter + template + extra tag
## 2. filter + template + extra tag(s)
## 3. filter + template with field key
## 4. default template
templates = [
"*.app env.service.resource.measurement",
"stats.* .host.measurement* region=us-west,agent=sensu",
"stats.* .host.measurement* region=eu-east,agent=sensu",
"stats2.* .host.measurement.field",
"measurement*"
]
```
# Nagios:
There are no additional configuration options for Nagios line-protocol. The
metrics are parsed directly into Telegraf metrics.
Note: Nagios Input Data Formats is only supported in `exec` input plugin.
#### Nagios Configuration:
```toml
[[inputs.exec]]
## Commands array
commands = ["/usr/lib/nagios/plugins/check_load", "-w 5,6,7 -c 7,8,9"]
## measurement name suffix (for separating different commands)
name_suffix = "_mycollector"
## Data format to consume.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "nagios"
```

View File

@@ -1,5 +1,11 @@
# Telegraf Output Data Formats
Telegraf is able to serialize metrics into the following output data formats:
1. [InfluxDB Line Protocol](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#influx)
1. [JSON](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#json)
1. [Graphite](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md#graphite)
Telegraf metrics, like InfluxDB
[points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/),
are a combination of four basic parts:
@@ -29,7 +35,7 @@ config option, for example, in the `file` output plugin:
## Files to write to, "stdout" is a specially handled file.
files = ["stdout"]
## Data format to output. This can be "influx" or "graphite"
## Data format to output.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
@@ -41,34 +47,46 @@ config option, for example, in the `file` output plugin:
Each data_format has an additional set of configuration options available, which
I'll go over below.
## Influx:
# Influx:
There are no additional configuration options for InfluxDB line-protocol. The
metrics are serialized directly into InfluxDB line-protocol.
#### Influx Configuration:
### Influx Configuration:
```toml
[[outputs.file]]
## Files to write to, "stdout" is a specially handled file.
files = ["stdout", "/tmp/metrics.out"]
## Data format to output. This can be "influx" or "graphite"
## Data format to output.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
data_format = "influx"
```
## Graphite:
# Graphite:
The Graphite data format translates Telegraf metrics into _dot_ buckets.
The format is:
The Graphite data format translates Telegraf metrics into _dot_ buckets. A
template can be specified for the output of Telegraf metrics into Graphite
buckets. The default template is:
```
[prefix].[host tag].[all tags (alphabetical)].[measurement name].[field name] value timestamp
template = "host.tags.measurement.field"
```
In the above template, we have four parts:
1. _host_ is a tag key. This can be any tag key that is in the Telegraf
metric(s). If the key doesn't exist, it will be ignored. If it does exist, the
tag value will be filled in.
1. _tags_ is a special keyword that outputs all remaining tag values, separated
by dots and in alphabetical order (by tag key). These will be filled after all
tag keys are filled.
1. _measurement_ is a special keyword that outputs the measurement name.
1. _field_ is a special keyword that outputs the field name.
Which means the following influx metric -> graphite conversion would happen:
```
@@ -78,20 +96,55 @@ tars.cpu-total.us-east-1.cpu.usage_user 0.89 1455320690
tars.cpu-total.us-east-1.cpu.usage_idle 98.09 1455320690
```
`prefix` is a configuration option when using the graphite output data format.
#### Graphite Configuration:
### Graphite Configuration:
```toml
[[outputs.file]]
## Files to write to, "stdout" is a specially handled file.
files = ["stdout", "/tmp/metrics.out"]
## Data format to output. This can be "influx" or "graphite"
## Data format to output.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
data_format = "influx"
data_format = "graphite"
# prefix each graphite bucket
prefix = "telegraf"
# graphite template
template = "host.tags.measurement.field"
```
# JSON:
The JSON data format serialized Telegraf metrics in json format. The format is:
```json
{
"fields":{
"field_1":30,
"field_2":4,
"field_N":59,
"n_images":660
},
"name":"docker",
"tags":{
"host":"raynor"
},
"timestamp":1458229140
}
```
### JSON Configuration:
```toml
[[outputs.file]]
## Files to write to, "stdout" is a specially handled file.
files = ["stdout", "/tmp/metrics.out"]
## Data format to output.
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
data_format = "json"
```

View File

@@ -16,6 +16,7 @@
- github.com/hashicorp/go-msgpack [BSD LICENSE](https://github.com/hashicorp/go-msgpack/blob/master/LICENSE)
- github.com/hashicorp/raft [MPL LICENSE](https://github.com/hashicorp/raft/blob/master/LICENSE)
- github.com/hashicorp/raft-boltdb [MPL LICENSE](https://github.com/hashicorp/raft-boltdb/blob/master/LICENSE)
- github.com/kardianos/service [ZLIB LICENSE](https://github.com/kardianos/service/blob/master/LICENSE) (License not named but matches word for word with ZLib)
- github.com/lib/pq [MIT LICENSE](https://github.com/lib/pq/blob/master/LICENSE.md)
- github.com/matttproud/golang_protobuf_extensions [APACHE LICENSE](https://github.com/matttproud/golang_protobuf_extensions/blob/master/LICENSE)
- github.com/naoina/go-stringutil [MIT LICENSE](https://github.com/naoina/go-stringutil/blob/master/LICENSE)
@@ -28,6 +29,5 @@
- github.com/wvanbergen/kazoo-go [MIT LICENSE](https://github.com/wvanbergen/kazoo-go/blob/master/MIT-LICENSE)
- gopkg.in/dancannon/gorethink.v1 [APACHE LICENSE](https://github.com/dancannon/gorethink/blob/v1.1.2/LICENSE)
- gopkg.in/mgo.v2 [BSD LICENSE](https://github.com/go-mgo/mgo/blob/v2/LICENSE)
- golang.org/x/crypto/* [BSD LICENSE](https://github.com/golang/crypto/blob/master/LICENSE)
- internal Glob function [MIT LICENSE](https://github.com/ryanuber/go-glob/blob/master/LICENSE)
- golang.org/x/crypto/ [BSD LICENSE](https://github.com/golang/crypto/blob/master/LICENSE)

39
docs/WINDOWS_SERVICE.md Normal file
View File

@@ -0,0 +1,39 @@
# Running Telegraf as a Windows Service
Telegraf natively supports running as a Windows Service. Outlined below is are
the general steps to set it up.
1. Obtain the telegraf windows distribution
2. Create the directory `C:\Program Files\Telegraf` (if you install in a different
location simply specify the `-config` parameter with the desired location)
3. Place the telegraf.exe and the telegraf.conf config file into `C:\Program Files\Telegraf`
4. To install the service into the Windows Service Manager, run the following in PowerShell as an administrator (If necessary, you can wrap any spaces in the file paths in double quotes ""):
```
> C:\"Program Files"\Telegraf\telegraf.exe --service install
```
5. Edit the configuration file to meet your needs
6. To check that it works, run:
```
> C:\"Program Files"\Telegraf\telegraf.exe --config C:\"Program Files"\Telegraf\telegraf.conf --test
```
7. To start collecting data, run:
```
> net start telegraf
```
## Other supported operations
Telegraf can manage its own service through the --service flag:
| Command | Effect |
|------------------------------------|-------------------------------|
| `telegraf.exe --service install` | Install telegraf as a service |
| `telegraf.exe --service uninstall` | Remove the telegraf service |
| `telegraf.exe --service start` | Start the telegraf service |
| `telegraf.exe --service stop` | Stop the telegraf service |

File diff suppressed because it is too large Load Diff

View File

@@ -42,10 +42,14 @@
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
## Logging configuration:
## Run telegraf in debug mode
debug = false
## Run telegraf in quiet mode
quiet = false
## Specify the log file name. The empty string means to log to stdout.
logfile = "/Program Files/Telegraf/telegraf.log"
## Override default hostname, if empty use os.Hostname()
hostname = ""
@@ -85,7 +89,7 @@
# Windows Performance Counters plugin.
# These are the recommended method of monitoring system metrics on windows,
# as the regular system plugins (inputs.cpu, inputs.mem, etc.) rely on WMI,
# which utilizes a lot of system resources.
# which utilize more system resources.
#
# See more configuration examples at:
# https://github.com/influxdata/telegraf/tree/master/plugins/inputs/win_perf_counters
@@ -95,70 +99,104 @@
# Processor usage, alternative to native, reports on a per core.
ObjectName = "Processor"
Instances = ["*"]
Counters = ["% Idle Time", "% Interrupt Time", "% Privileged Time", "% User Time", "% Processor Time"]
Counters = [
"% Idle Time",
"% Interrupt Time",
"% Privileged Time",
"% User Time",
"% Processor Time",
]
Measurement = "win_cpu"
#IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
# Set to true to include _Total instance when querying for all (*).
#IncludeTotal=false
[[inputs.win_perf_counters.object]]
# Disk times and queues
ObjectName = "LogicalDisk"
Instances = ["*"]
Counters = ["% Idle Time", "% Disk Time","% Disk Read Time", "% Disk Write Time", "% User Time", "Current Disk Queue Length"]
Counters = [
"% Idle Time",
"% Disk Time","% Disk Read Time",
"% Disk Write Time",
"% User Time",
"Current Disk Queue Length",
]
Measurement = "win_disk"
#IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
# Set to true to include _Total instance when querying for all (*).
#IncludeTotal=false
[[inputs.win_perf_counters.object]]
ObjectName = "System"
Counters = ["Context Switches/sec","System Calls/sec"]
Counters = [
"Context Switches/sec",
"System Calls/sec",
]
Instances = ["------"]
Measurement = "win_system"
#IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
# Set to true to include _Total instance when querying for all (*).
#IncludeTotal=false
[[inputs.win_perf_counters.object]]
# Example query where the Instance portion must be removed to get data back, such as from the Memory object.
# Example query where the Instance portion must be removed to get data back,
# such as from the Memory object.
ObjectName = "Memory"
Counters = ["Available Bytes","Cache Faults/sec","Demand Zero Faults/sec","Page Faults/sec","Pages/sec","Transition Faults/sec","Pool Nonpaged Bytes","Pool Paged Bytes"]
Instances = ["------"] # Use 6 x - to remove the Instance bit from the query.
Counters = [
"Available Bytes",
"Cache Faults/sec",
"Demand Zero Faults/sec",
"Page Faults/sec",
"Pages/sec",
"Transition Faults/sec",
"Pool Nonpaged Bytes",
"Pool Paged Bytes",
]
# Use 6 x - to remove the Instance bit from the query.
Instances = ["------"]
Measurement = "win_mem"
#IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
# Set to true to include _Total instance when querying for all (*).
#IncludeTotal=false
# Windows system plugins using WMI (disabled by default, using
# win_perf_counters over WMI is recommended)
# Read metrics about cpu usage
#[[inputs.cpu]]
## Whether to report per-cpu stats or not
#percpu = true
## Whether to report total system cpu stats or not
#totalcpu = true
## Comment this line if you want the raw CPU time metrics
#fielddrop = ["time_*"]
# # Read metrics about cpu usage
# [[inputs.cpu]]
# ## Whether to report per-cpu stats or not
# percpu = true
# ## Whether to report total system cpu stats or not
# totalcpu = true
# ## Comment this line if you want the raw CPU time metrics
# fielddrop = ["time_*"]
# Read metrics about disk usage by mount point
#[[inputs.disk]]
## By default, telegraf gather stats for all mountpoints.
## Setting mountpoints will restrict the stats to the specified mountpoints.
## mount_points=["/"]
## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
## present on /run, /var/run, /dev/shm or /dev).
#ignore_fs = ["tmpfs", "devtmpfs"]
# # Read metrics about disk usage by mount point
# [[inputs.disk]]
# ## By default, telegraf gather stats for all mountpoints.
# ## Setting mountpoints will restrict the stats to the specified mountpoints.
# ## mount_points=["/"]
#
# ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
# ## present on /run, /var/run, /dev/shm or /dev).
# # ignore_fs = ["tmpfs", "devtmpfs"]
# Read metrics about disk IO by device
#[[inputs.diskio]]
## By default, telegraf will gather stats for all devices including
## disk partitions.
## Setting devices will restrict the stats to the specified devices.
## devices = ["sda", "sdb"]
## Uncomment the following line if you do not need disk serial numbers.
## skip_serial_number = true
# Read metrics about memory usage
#[[inputs.mem]]
# no configuration
# # Read metrics about disk IO by device
# [[inputs.diskio]]
# ## By default, telegraf will gather stats for all devices including
# ## disk partitions.
# ## Setting devices will restrict the stats to the specified devices.
# ## devices = ["sda", "sdb"]
# ## Uncomment the following line if you do not need disk serial numbers.
# ## skip_serial_number = true
# Read metrics about swap memory usage
#[[inputs.swap]]
# no configuration
# # Read metrics about memory usage
# [[inputs.mem]]
# # no configuration
# # Read metrics about swap memory usage
# [[inputs.swap]]
# # no configuration

79
filter/filter.go Normal file
View File

@@ -0,0 +1,79 @@
package filter
import (
"strings"
"github.com/gobwas/glob"
)
type Filter interface {
Match(string) bool
}
// Compile takes a list of string filters and returns a Filter interface
// for matching a given string against the filter list. The filter list
// supports glob matching too, ie:
//
// f, _ := Compile([]string{"cpu", "mem", "net*"})
// f.Match("cpu") // true
// f.Match("network") // true
// f.Match("memory") // false
//
func Compile(filters []string) (Filter, error) {
// return if there is nothing to compile
if len(filters) == 0 {
return nil, nil
}
// check if we can compile a non-glob filter
noGlob := true
for _, filter := range filters {
if hasMeta(filter) {
noGlob = false
break
}
}
switch {
case noGlob:
// return non-globbing filter if not needed.
return compileFilterNoGlob(filters), nil
case len(filters) == 1:
return glob.Compile(filters[0])
default:
return glob.Compile("{" + strings.Join(filters, ",") + "}")
}
}
// hasMeta reports whether path contains any magic glob characters.
func hasMeta(s string) bool {
return strings.IndexAny(s, "*?[") >= 0
}
type filter struct {
m map[string]struct{}
}
func (f *filter) Match(s string) bool {
_, ok := f.m[s]
return ok
}
type filtersingle struct {
s string
}
func (f *filtersingle) Match(s string) bool {
return f.s == s
}
func compileFilterNoGlob(filters []string) Filter {
if len(filters) == 1 {
return &filtersingle{s: filters[0]}
}
out := filter{m: make(map[string]struct{})}
for _, filter := range filters {
out.m[filter] = struct{}{}
}
return &out
}

96
filter/filter_test.go Normal file
View File

@@ -0,0 +1,96 @@
package filter
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestCompile(t *testing.T) {
f, err := Compile([]string{})
assert.NoError(t, err)
assert.Nil(t, f)
f, err = Compile([]string{"cpu"})
assert.NoError(t, err)
assert.True(t, f.Match("cpu"))
assert.False(t, f.Match("cpu0"))
assert.False(t, f.Match("mem"))
f, err = Compile([]string{"cpu*"})
assert.NoError(t, err)
assert.True(t, f.Match("cpu"))
assert.True(t, f.Match("cpu0"))
assert.False(t, f.Match("mem"))
f, err = Compile([]string{"cpu", "mem"})
assert.NoError(t, err)
assert.True(t, f.Match("cpu"))
assert.False(t, f.Match("cpu0"))
assert.True(t, f.Match("mem"))
f, err = Compile([]string{"cpu", "mem", "net*"})
assert.NoError(t, err)
assert.True(t, f.Match("cpu"))
assert.False(t, f.Match("cpu0"))
assert.True(t, f.Match("mem"))
assert.True(t, f.Match("network"))
}
var benchbool bool
func BenchmarkFilterSingleNoGlobFalse(b *testing.B) {
f, _ := Compile([]string{"cpu"})
var tmp bool
for n := 0; n < b.N; n++ {
tmp = f.Match("network")
}
benchbool = tmp
}
func BenchmarkFilterSingleNoGlobTrue(b *testing.B) {
f, _ := Compile([]string{"cpu"})
var tmp bool
for n := 0; n < b.N; n++ {
tmp = f.Match("cpu")
}
benchbool = tmp
}
func BenchmarkFilter(b *testing.B) {
f, _ := Compile([]string{"cpu", "mem", "net*"})
var tmp bool
for n := 0; n < b.N; n++ {
tmp = f.Match("network")
}
benchbool = tmp
}
func BenchmarkFilterNoGlob(b *testing.B) {
f, _ := Compile([]string{"cpu", "mem", "net"})
var tmp bool
for n := 0; n < b.N; n++ {
tmp = f.Match("net")
}
benchbool = tmp
}
func BenchmarkFilter2(b *testing.B) {
f, _ := Compile([]string{"aa", "bb", "c", "ad", "ar", "at", "aq",
"aw", "az", "axxx", "ab", "cpu", "mem", "net*"})
var tmp bool
for n := 0; n < b.N; n++ {
tmp = f.Match("network")
}
benchbool = tmp
}
func BenchmarkFilter2NoGlob(b *testing.B) {
f, _ := Compile([]string{"aa", "bb", "c", "ad", "ar", "at", "aq",
"aw", "az", "axxx", "ab", "cpu", "mem", "net"})
var tmp bool
for n := 0; n < b.N; n++ {
tmp = f.Match("net")
}
benchbool = tmp
}

77
internal/buffer/buffer.go Normal file
View File

@@ -0,0 +1,77 @@
package buffer
import (
"github.com/influxdata/telegraf"
)
// Buffer is an object for storing metrics in a circular buffer.
type Buffer struct {
buf chan telegraf.Metric
// total dropped metrics
drops int
// total metrics added
total int
}
// NewBuffer returns a Buffer
// size is the maximum number of metrics that Buffer will cache. If Add is
// called when the buffer is full, then the oldest metric(s) will be dropped.
func NewBuffer(size int) *Buffer {
return &Buffer{
buf: make(chan telegraf.Metric, size),
}
}
// IsEmpty returns true if Buffer is empty.
func (b *Buffer) IsEmpty() bool {
return len(b.buf) == 0
}
// Len returns the current length of the buffer.
func (b *Buffer) Len() int {
return len(b.buf)
}
// Drops returns the total number of dropped metrics that have occured in this
// buffer since instantiation.
func (b *Buffer) Drops() int {
return b.drops
}
// Total returns the total number of metrics that have been added to this buffer.
func (b *Buffer) Total() int {
return b.total
}
// Add adds metrics to the buffer.
func (b *Buffer) Add(metrics ...telegraf.Metric) {
for i, _ := range metrics {
b.total++
select {
case b.buf <- metrics[i]:
default:
b.drops++
<-b.buf
b.buf <- metrics[i]
}
}
}
// Batch returns a batch of metrics of size batchSize.
// the batch will be of maximum length batchSize. It can be less than batchSize,
// if the length of Buffer is less than batchSize.
func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
n := min(len(b.buf), batchSize)
out := make([]telegraf.Metric, n)
for i := 0; i < n; i++ {
out[i] = <-b.buf
}
return out
}
func min(a, b int) int {
if b < a {
return b
}
return a
}

View File

@@ -0,0 +1,94 @@
package buffer
import (
"testing"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
var metricList = []telegraf.Metric{
testutil.TestMetric(2, "mymetric1"),
testutil.TestMetric(1, "mymetric2"),
testutil.TestMetric(11, "mymetric3"),
testutil.TestMetric(15, "mymetric4"),
testutil.TestMetric(8, "mymetric5"),
}
func BenchmarkAddMetrics(b *testing.B) {
buf := NewBuffer(10000)
m := testutil.TestMetric(1, "mymetric")
for n := 0; n < b.N; n++ {
buf.Add(m)
}
}
func TestNewBufferBasicFuncs(t *testing.T) {
b := NewBuffer(10)
assert.True(t, b.IsEmpty())
assert.Zero(t, b.Len())
assert.Zero(t, b.Drops())
assert.Zero(t, b.Total())
m := testutil.TestMetric(1, "mymetric")
b.Add(m)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 1)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 1)
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 6)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 6)
}
func TestDroppingMetrics(t *testing.T) {
b := NewBuffer(10)
// Add up to the size of the buffer
b.Add(metricList...)
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 10)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 10)
// Add 5 more and verify they were dropped
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 10)
assert.Equal(t, b.Drops(), 5)
assert.Equal(t, b.Total(), 15)
}
func TestGettingBatches(t *testing.T) {
b := NewBuffer(20)
// Verify that the buffer returned is smaller than requested when there are
// not as many items as requested.
b.Add(metricList...)
batch := b.Batch(10)
assert.Len(t, batch, 5)
// Verify that the buffer is now empty
assert.True(t, b.IsEmpty())
assert.Zero(t, b.Len())
assert.Zero(t, b.Drops())
assert.Equal(t, b.Total(), 5)
// Verify that the buffer returned is not more than the size requested
b.Add(metricList...)
batch = b.Batch(3)
assert.Len(t, batch, 3)
// Verify that buffer is not empty
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 2)
assert.Equal(t, b.Drops(), 0)
assert.Equal(t, b.Total(), 10)
}

View File

@@ -0,0 +1,49 @@
package aws
import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/client"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/credentials/stscreds"
"github.com/aws/aws-sdk-go/aws/session"
)
type CredentialConfig struct {
Region string
AccessKey string
SecretKey string
RoleARN string
Profile string
Filename string
Token string
}
func (c *CredentialConfig) Credentials() client.ConfigProvider {
if c.RoleARN != "" {
return c.assumeCredentials()
} else {
return c.rootCredentials()
}
}
func (c *CredentialConfig) rootCredentials() client.ConfigProvider {
config := &aws.Config{
Region: aws.String(c.Region),
}
if c.AccessKey != "" || c.SecretKey != "" {
config.Credentials = credentials.NewStaticCredentials(c.AccessKey, c.SecretKey, c.Token)
} else if c.Profile != "" || c.Filename != "" {
config.Credentials = credentials.NewSharedCredentials(c.Filename, c.Profile)
}
return session.New(config)
}
func (c *CredentialConfig) assumeCredentials() client.ConfigProvider {
rootCredentials := c.rootCredentials()
config := &aws.Config{
Region: aws.String(c.Region),
}
config.Credentials = stscreds.NewCredentials(rootCredentials, c.RoleARN)
return session.New(config)
}

View File

@@ -1,11 +1,15 @@
package config
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"
"regexp"
"runtime"
"sort"
"strings"
"time"
@@ -19,9 +23,22 @@ import (
"github.com/influxdata/telegraf/plugins/serializers"
"github.com/influxdata/config"
"github.com/influxdata/toml"
"github.com/influxdata/toml/ast"
)
var (
// Default input plugins
inputDefaults = []string{"cpu", "mem", "swap", "system", "kernel",
"processes", "disk", "diskio"}
// Default output plugins
outputDefaults = []string{"influxdb"}
// envVarRe is a regex to find environment variables in the config file
envVarRe = regexp.MustCompile(`\$\w+`)
)
// Config specifies the URL/user/password for the database that telegraf
// will be logging to, as well as all the plugins that the user has
// specified
@@ -31,8 +48,8 @@ type Config struct {
OutputFilters []string
Agent *AgentConfig
Inputs []*internal_models.RunningInput
Outputs []*internal_models.RunningOutput
Inputs []*models.RunningInput
Outputs []*models.RunningOutput
}
func NewConfig() *Config {
@@ -42,12 +59,11 @@ func NewConfig() *Config {
Interval: internal.Duration{Duration: 10 * time.Second},
RoundInterval: true,
FlushInterval: internal.Duration{Duration: 10 * time.Second},
FlushJitter: internal.Duration{Duration: 5 * time.Second},
},
Tags: make(map[string]string),
Inputs: make([]*internal_models.RunningInput, 0),
Outputs: make([]*internal_models.RunningOutput, 0),
Inputs: make([]*models.RunningInput, 0),
Outputs: make([]*models.RunningOutput, 0),
InputFilters: make([]string, 0),
OutputFilters: make([]string, 0),
}
@@ -62,6 +78,14 @@ type AgentConfig struct {
// ie, if Interval=10s then always collect on :00, :10, :20, etc.
RoundInterval bool
// By default, precision will be set to the same timestamp order as the
// collection interval, with the maximum being 1s.
// ie, when interval = "10s", precision will be "1s"
// when interval = "250ms", precision will be "1ms"
// Precision will NOT be used for service inputs. It is up to each individual
// service input to set the timestamp at the appropriate precision.
Precision internal.Duration
// CollectionJitter is used to jitter the collection by a random amount.
// Each plugin will sleep for a random time within jitter before collecting.
// This can be used to avoid many plugins querying things like sysfs at the
@@ -77,9 +101,15 @@ type AgentConfig struct {
// ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
FlushJitter internal.Duration
// MetricBatchSize is the maximum number of metrics that is wrote to an
// output plugin in one call.
MetricBatchSize int
// MetricBufferLimit is the max number of metrics that each output plugin
// will cache. The buffer is cleared when a successful write occurs. When
// full, the oldest metrics will be overwritten.
// full, the oldest metrics will be overwritten. This number should be a
// multiple of MetricBatchSize. Due to current implementation, this could
// not be less than 2 times MetricBatchSize.
MetricBufferLimit int
// FlushBufferWhenFull tells Telegraf to flush the metric buffer whenever
@@ -87,18 +117,21 @@ type AgentConfig struct {
// does _not_ deactivate FlushInterval.
FlushBufferWhenFull bool
// TODO(cam): Remove UTC and Precision parameters, they are no longer
// TODO(cam): Remove UTC and parameter, they are no longer
// valid for the agent config. Leaving them here for now for backwards-
// compatability
UTC bool `toml:"utc"`
Precision string
UTC bool `toml:"utc"`
// Debug is the option for running in debug mode
Debug bool
// Logfile specifies the file to send logs to
Logfile string
// Quiet is the option for running in quiet mode
Quiet bool
Hostname string
Quiet bool
Hostname string
OmitHostname bool
}
// Inputs returns a list of strings of the configured inputs.
@@ -110,7 +143,7 @@ func (c *Config) InputNames() []string {
return name
}
// Outputs returns a list of strings of the configured inputs.
// Outputs returns a list of strings of the configured outputs.
func (c *Config) OutputNames() []string {
var name []string
for _, output := range c.Outputs {
@@ -134,20 +167,28 @@ func (c *Config) ListTags() string {
}
var header = `# Telegraf Configuration
#
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared inputs, and sent to the declared outputs.
#
# Plugins must be declared in here to be active.
# To deactivate a plugin, comment out the name and any variables.
#
# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
# file would generate.
#
# Environment variables can be used anywhere in this config file, simply prepend
# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
# Global tags can be specified here in key="value" format.
[global_tags]
# dc = "us-east-1" # will tag all metrics with dc=us-east-1
# rack = "1a"
## Environment variables can be used as tags, and throughout the config file
# user = "$USER"
# Configuration for telegraf agent
[agent]
@@ -157,11 +198,16 @@ var header = `# Telegraf Configuration
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will cache metric_buffer_limit metrics for each output, and will
## flush this buffer on a successful write.
metric_buffer_limit = 1000
## Flush the buffer whenever full, regardless of flush_interval.
flush_buffer_when_full = true
## Telegraf will send metrics to outputs in batches of at most
## metric_batch_size metrics.
## This controls the size of writes that Telegraf sends to output plugins.
metric_batch_size = 1000
## For failed writes, telegraf will cache metric_buffer_limit metrics for each
## output, and will flush this buffer on a successful write. Oldest metrics
## are dropped first when this buffer fills.
## This buffer only fills when writes fail to output plugin(s).
metric_buffer_limit = 10000
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
@@ -177,40 +223,127 @@ var header = `# Telegraf Configuration
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
## Run telegraf in debug mode
## By default, precision will be set to the same timestamp order as the
## collection interval, with the maximum being 1s.
## Precision will NOT be used for service inputs, such as logparser and statsd.
## Valid values are "ns", "us" (or "µs"), "ms", "s".
precision = ""
## Logging configuration:
## Run telegraf with debug log messages.
debug = false
## Run telegraf in quiet mode
## Run telegraf in quiet mode (error log messages only).
quiet = false
## Specify the log file name. The empty string means to log to stdout.
logfile = ""
## Override default hostname, if empty use os.Hostname()
hostname = ""
## If set to true, do no set the "host" tag in the telegraf agent.
omit_hostname = false
#
# OUTPUTS:
#
###############################################################################
# OUTPUT PLUGINS #
###############################################################################
`
var pluginHeader = `
#
# INPUTS:
#
var inputHeader = `
###############################################################################
# INPUT PLUGINS #
###############################################################################
`
var serviceInputHeader = `
#
# SERVICE INPUTS:
#
###############################################################################
# SERVICE INPUT PLUGINS #
###############################################################################
`
// PrintSampleConfig prints the sample config
func PrintSampleConfig(pluginFilters []string, outputFilters []string) {
func PrintSampleConfig(inputFilters []string, outputFilters []string) {
fmt.Printf(header)
if len(outputFilters) != 0 {
printFilteredOutputs(outputFilters, false)
} else {
printFilteredOutputs(outputDefaults, false)
// Print non-default outputs, commented
var pnames []string
for pname := range outputs.Outputs {
if !sliceContains(pname, outputDefaults) {
pnames = append(pnames, pname)
}
}
sort.Strings(pnames)
printFilteredOutputs(pnames, true)
}
fmt.Printf(inputHeader)
if len(inputFilters) != 0 {
printFilteredInputs(inputFilters, false)
} else {
printFilteredInputs(inputDefaults, false)
// Print non-default inputs, commented
var pnames []string
for pname := range inputs.Inputs {
if !sliceContains(pname, inputDefaults) {
pnames = append(pnames, pname)
}
}
sort.Strings(pnames)
printFilteredInputs(pnames, true)
}
}
func printFilteredInputs(inputFilters []string, commented bool) {
// Filter inputs
var pnames []string
for pname := range inputs.Inputs {
if sliceContains(pname, inputFilters) {
pnames = append(pnames, pname)
}
}
sort.Strings(pnames)
// cache service inputs to print them at the end
servInputs := make(map[string]telegraf.ServiceInput)
// for alphabetical looping:
servInputNames := []string{}
// Print Inputs
for _, pname := range pnames {
creator := inputs.Inputs[pname]
input := creator()
switch p := input.(type) {
case telegraf.ServiceInput:
servInputs[pname] = p
servInputNames = append(servInputNames, pname)
continue
}
printConfig(pname, input, "inputs", commented)
}
// Print Service Inputs
if len(servInputs) == 0 {
return
}
sort.Strings(servInputNames)
fmt.Printf(serviceInputHeader)
for _, name := range servInputNames {
printConfig(name, servInputs[name], "inputs", commented)
}
}
func printFilteredOutputs(outputFilters []string, commented bool) {
// Filter outputs
var onames []string
for oname := range outputs.Outputs {
if len(outputFilters) == 0 || sliceContains(oname, outputFilters) {
if sliceContains(oname, outputFilters) {
onames = append(onames, oname)
}
}
@@ -220,38 +353,7 @@ func PrintSampleConfig(pluginFilters []string, outputFilters []string) {
for _, oname := range onames {
creator := outputs.Outputs[oname]
output := creator()
printConfig(oname, output, "outputs")
}
// Filter inputs
var pnames []string
for pname := range inputs.Inputs {
if len(pluginFilters) == 0 || sliceContains(pname, pluginFilters) {
pnames = append(pnames, pname)
}
}
sort.Strings(pnames)
// Print Inputs
fmt.Printf(pluginHeader)
servInputs := make(map[string]telegraf.ServiceInput)
for _, pname := range pnames {
creator := inputs.Inputs[pname]
input := creator()
switch p := input.(type) {
case telegraf.ServiceInput:
servInputs[pname] = p
continue
}
printConfig(pname, input, "inputs")
}
// Print Service Inputs
fmt.Printf(serviceInputHeader)
for name, input := range servInputs {
printConfig(name, input, "inputs")
printConfig(oname, output, "outputs", commented)
}
}
@@ -260,13 +362,26 @@ type printer interface {
SampleConfig() string
}
func printConfig(name string, p printer, op string) {
fmt.Printf("\n# %s\n[[%s.%s]]", p.Description(), op, name)
func printConfig(name string, p printer, op string, commented bool) {
comment := ""
if commented {
comment = "# "
}
fmt.Printf("\n%s# %s\n%s[[%s.%s]]", comment, p.Description(), comment,
op, name)
config := p.SampleConfig()
if config == "" {
fmt.Printf("\n # no configuration\n")
fmt.Printf("\n%s # no configuration\n\n", comment)
} else {
fmt.Printf(config)
lines := strings.Split(config, "\n")
for i, line := range lines {
if i == 0 || i == len(lines)-1 {
fmt.Print("\n")
continue
}
fmt.Print(strings.TrimRight(comment+line, " ") + "\n")
}
}
}
@@ -282,7 +397,7 @@ func sliceContains(name string, list []string) bool {
// PrintInputConfig prints the config usage of a single input.
func PrintInputConfig(name string) error {
if creator, ok := inputs.Inputs[name]; ok {
printConfig(name, creator(), "inputs")
printConfig(name, creator(), "inputs", false)
} else {
return errors.New(fmt.Sprintf("Input %s not found", name))
}
@@ -292,7 +407,7 @@ func PrintInputConfig(name string) error {
// PrintOutputConfig prints the config usage of a single output.
func PrintOutputConfig(name string) error {
if creator, ok := outputs.Outputs[name]; ok {
printConfig(name, creator(), "outputs")
printConfig(name, creator(), "outputs", false)
} else {
return errors.New(fmt.Sprintf("Output %s not found", name))
}
@@ -300,66 +415,111 @@ func PrintOutputConfig(name string) error {
}
func (c *Config) LoadDirectory(path string) error {
directoryEntries, err := ioutil.ReadDir(path)
if err != nil {
return err
}
for _, entry := range directoryEntries {
if entry.IsDir() {
continue
walkfn := func(thispath string, info os.FileInfo, _ error) error {
if info.IsDir() {
return nil
}
name := entry.Name()
name := info.Name()
if len(name) < 6 || name[len(name)-5:] != ".conf" {
continue
return nil
}
err := c.LoadConfig(filepath.Join(path, name))
err := c.LoadConfig(thispath)
if err != nil {
return err
}
return nil
}
return nil
return filepath.Walk(path, walkfn)
}
// Try to find a default config file at these locations (in order):
// 1. $TELEGRAF_CONFIG_PATH
// 2. $HOME/.telegraf/telegraf.conf
// 3. /etc/telegraf/telegraf.conf
//
func getDefaultConfigPath() (string, error) {
envfile := os.Getenv("TELEGRAF_CONFIG_PATH")
homefile := os.ExpandEnv("${HOME}/.telegraf/telegraf.conf")
etcfile := "/etc/telegraf/telegraf.conf"
if runtime.GOOS == "windows" {
etcfile = `C:\Program Files\Telegraf\telegraf.conf`
}
for _, path := range []string{envfile, homefile, etcfile} {
if _, err := os.Stat(path); err == nil {
log.Printf("I! Using config file: %s", path)
return path, nil
}
}
// if we got here, we didn't find a file in a default location
return "", fmt.Errorf("No config file specified, and could not find one"+
" in $TELEGRAF_CONFIG_PATH, %s, or %s", homefile, etcfile)
}
// LoadConfig loads the given config file and applies it to c
func (c *Config) LoadConfig(path string) error {
tbl, err := config.ParseFile(path)
var err error
if path == "" {
if path, err = getDefaultConfigPath(); err != nil {
return err
}
}
tbl, err := parseFile(path)
if err != nil {
return err
return fmt.Errorf("Error parsing %s, %s", path, err)
}
// Parse tags tables first:
for _, tableName := range []string{"tags", "global_tags"} {
if val, ok := tbl.Fields[tableName]; ok {
subTable, ok := val.(*ast.Table)
if !ok {
return fmt.Errorf("%s: invalid configuration", path)
}
if err = config.UnmarshalTable(subTable, c.Tags); err != nil {
log.Printf("E! Could not parse [global_tags] config\n")
return fmt.Errorf("Error parsing %s, %s", path, err)
}
}
}
// Parse agent table:
if val, ok := tbl.Fields["agent"]; ok {
subTable, ok := val.(*ast.Table)
if !ok {
return fmt.Errorf("%s: invalid configuration", path)
}
if err = config.UnmarshalTable(subTable, c.Agent); err != nil {
log.Printf("E! Could not parse [agent] config\n")
return fmt.Errorf("Error parsing %s, %s", path, err)
}
}
// Parse all the rest of the plugins:
for name, val := range tbl.Fields {
subTable, ok := val.(*ast.Table)
if !ok {
return errors.New("invalid configuration")
return fmt.Errorf("%s: invalid configuration", path)
}
switch name {
case "agent":
if err = config.UnmarshalTable(subTable, c.Agent); err != nil {
log.Printf("Could not parse [agent] config\n")
return err
}
case "global_tags", "tags":
if err = config.UnmarshalTable(subTable, c.Tags); err != nil {
log.Printf("Could not parse [global_tags] config\n")
return err
}
case "agent", "global_tags", "tags":
case "outputs":
for pluginName, pluginVal := range subTable.Fields {
switch pluginSubTable := pluginVal.(type) {
case *ast.Table:
if err = c.addOutput(pluginName, pluginSubTable); err != nil {
return err
return fmt.Errorf("Error parsing %s, %s", path, err)
}
case []*ast.Table:
for _, t := range pluginSubTable {
if err = c.addOutput(pluginName, t); err != nil {
return err
return fmt.Errorf("Error parsing %s, %s", path, err)
}
}
default:
return fmt.Errorf("Unsupported config format: %s",
pluginName)
return fmt.Errorf("Unsupported config format: %s, file %s",
pluginName, path)
}
}
case "inputs", "plugins":
@@ -367,30 +527,59 @@ func (c *Config) LoadConfig(path string) error {
switch pluginSubTable := pluginVal.(type) {
case *ast.Table:
if err = c.addInput(pluginName, pluginSubTable); err != nil {
return err
return fmt.Errorf("Error parsing %s, %s", path, err)
}
case []*ast.Table:
for _, t := range pluginSubTable {
if err = c.addInput(pluginName, t); err != nil {
return err
return fmt.Errorf("Error parsing %s, %s", path, err)
}
}
default:
return fmt.Errorf("Unsupported config format: %s",
pluginName)
return fmt.Errorf("Unsupported config format: %s, file %s",
pluginName, path)
}
}
// Assume it's an input input for legacy config file support if no other
// identifiers are present
default:
if err = c.addInput(name, subTable); err != nil {
return err
return fmt.Errorf("Error parsing %s, %s", path, err)
}
}
}
return nil
}
// trimBOM trims the Byte-Order-Marks from the beginning of the file.
// this is for Windows compatability only.
// see https://github.com/influxdata/telegraf/issues/1378
func trimBOM(f []byte) []byte {
return bytes.TrimPrefix(f, []byte("\xef\xbb\xbf"))
}
// parseFile loads a TOML configuration from a provided path and
// returns the AST produced from the TOML parser. When loading the file, it
// will find environment variables and replace them.
func parseFile(fpath string) (*ast.Table, error) {
contents, err := ioutil.ReadFile(fpath)
if err != nil {
return nil, err
}
// ugh windows why
contents = trimBOM(contents)
env_vars := envVarRe.FindAll(contents, -1)
for _, env_var := range env_vars {
env_val := os.Getenv(strings.TrimPrefix(string(env_var), "$"))
if env_val != "" {
contents = bytes.Replace(contents, env_var, []byte(env_val), 1)
}
}
return toml.Parse(contents)
}
func (c *Config) addOutput(name string, table *ast.Table) error {
if len(c.OutputFilters) > 0 && !sliceContains(name, c.OutputFilters) {
return nil
@@ -421,11 +610,8 @@ func (c *Config) addOutput(name string, table *ast.Table) error {
return err
}
ro := internal_models.NewRunningOutput(name, output, outputConfig)
if c.Agent.MetricBufferLimit > 0 {
ro.MetricBufferLimit = c.Agent.MetricBufferLimit
}
ro.FlushBufferWhenFull = c.Agent.FlushBufferWhenFull
ro := models.NewRunningOutput(name, output, outputConfig,
c.Agent.MetricBatchSize, c.Agent.MetricBufferLimit)
c.Outputs = append(c.Outputs, ro)
return nil
}
@@ -465,7 +651,7 @@ func (c *Config) addInput(name string, table *ast.Table) error {
return err
}
rp := &internal_models.RunningInput{
rp := &models.RunningInput{
Name: name,
Input: input,
Config: pluginConfig,
@@ -476,10 +662,10 @@ func (c *Config) addInput(name string, table *ast.Table) error {
// buildFilter builds a Filter
// (tagpass/tagdrop/namepass/namedrop/fieldpass/fielddrop) to
// be inserted into the internal_models.OutputConfig/internal_models.InputConfig to be used for prefix
// filtering on tags and measurements
func buildFilter(tbl *ast.Table) internal_models.Filter {
f := internal_models.Filter{}
// be inserted into the models.OutputConfig/models.InputConfig
// to be used for glob filtering on tags and measurements
func buildFilter(tbl *ast.Table) (models.Filter, error) {
f := models.Filter{}
if node, ok := tbl.Fields["namepass"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
@@ -487,7 +673,6 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
f.NamePass = append(f.NamePass, str.Value)
f.IsActive = true
}
}
}
@@ -500,7 +685,6 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
f.NameDrop = append(f.NameDrop, str.Value)
f.IsActive = true
}
}
}
@@ -515,7 +699,6 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
f.FieldPass = append(f.FieldPass, str.Value)
f.IsActive = true
}
}
}
@@ -531,7 +714,6 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
f.FieldDrop = append(f.FieldDrop, str.Value)
f.IsActive = true
}
}
}
@@ -543,7 +725,7 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
if subtbl, ok := node.(*ast.Table); ok {
for name, val := range subtbl.Fields {
if kv, ok := val.(*ast.KeyValue); ok {
tagfilter := &internal_models.TagFilter{Name: name}
tagfilter := &models.TagFilter{Name: name}
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
@@ -552,7 +734,6 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
}
}
f.TagPass = append(f.TagPass, *tagfilter)
f.IsActive = true
}
}
}
@@ -562,7 +743,7 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
if subtbl, ok := node.(*ast.Table); ok {
for name, val := range subtbl.Fields {
if kv, ok := val.(*ast.KeyValue); ok {
tagfilter := &internal_models.TagFilter{Name: name}
tagfilter := &models.TagFilter{Name: name}
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
@@ -571,12 +752,38 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
}
}
f.TagDrop = append(f.TagDrop, *tagfilter)
f.IsActive = true
}
}
}
}
if node, ok := tbl.Fields["tagexclude"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
f.TagExclude = append(f.TagExclude, str.Value)
}
}
}
}
}
if node, ok := tbl.Fields["taginclude"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
f.TagInclude = append(f.TagInclude, str.Value)
}
}
}
}
}
if err := f.Compile(); err != nil {
return f, err
}
delete(tbl.Fields, "namedrop")
delete(tbl.Fields, "namepass")
delete(tbl.Fields, "fielddrop")
@@ -585,14 +792,16 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
delete(tbl.Fields, "pass")
delete(tbl.Fields, "tagdrop")
delete(tbl.Fields, "tagpass")
return f
delete(tbl.Fields, "tagexclude")
delete(tbl.Fields, "taginclude")
return f, nil
}
// buildInput parses input specific items from the ast.Table,
// builds the filter and returns a
// internal_models.InputConfig to be inserted into internal_models.RunningInput
func buildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, error) {
cp := &internal_models.InputConfig{Name: name}
// models.InputConfig to be inserted into models.RunningInput
func buildInput(name string, tbl *ast.Table) (*models.InputConfig, error) {
cp := &models.InputConfig{Name: name}
if node, ok := tbl.Fields["interval"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if str, ok := kv.Value.(*ast.String); ok {
@@ -634,7 +843,7 @@ func buildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, erro
if node, ok := tbl.Fields["tags"]; ok {
if subtbl, ok := node.(*ast.Table); ok {
if err := config.UnmarshalTable(subtbl, cp.Tags); err != nil {
log.Printf("Could not parse tags for input %s\n", name)
log.Printf("E! Could not parse tags for input %s\n", name)
}
}
}
@@ -644,7 +853,11 @@ func buildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, erro
delete(tbl.Fields, "name_override")
delete(tbl.Fields, "interval")
delete(tbl.Fields, "tags")
cp.Filter = buildFilter(tbl)
var err error
cp.Filter, err = buildFilter(tbl)
if err != nil {
return cp, err
}
return cp, nil
}
@@ -701,12 +914,21 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) {
}
}
if node, ok := tbl.Fields["data_type"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if str, ok := kv.Value.(*ast.String); ok {
c.DataType = str.Value
}
}
}
c.MetricName = name
delete(tbl.Fields, "data_format")
delete(tbl.Fields, "separator")
delete(tbl.Fields, "templates")
delete(tbl.Fields, "tag_keys")
delete(tbl.Fields, "data_type")
return parsers.NewParser(c)
}
@@ -737,18 +959,32 @@ func buildSerializer(name string, tbl *ast.Table) (serializers.Serializer, error
}
}
if node, ok := tbl.Fields["template"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if str, ok := kv.Value.(*ast.String); ok {
c.Template = str.Value
}
}
}
delete(tbl.Fields, "data_format")
delete(tbl.Fields, "prefix")
delete(tbl.Fields, "template")
return serializers.NewSerializer(c)
}
// buildOutput parses output specific items from the ast.Table, builds the filter and returns an
// internal_models.OutputConfig to be inserted into internal_models.RunningInput
// buildOutput parses output specific items from the ast.Table,
// builds the filter and returns an
// models.OutputConfig to be inserted into models.RunningInput
// Note: error exists in the return for future calls that might require error
func buildOutput(name string, tbl *ast.Table) (*internal_models.OutputConfig, error) {
oc := &internal_models.OutputConfig{
func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
filter, err := buildFilter(tbl)
if err != nil {
return nil, err
}
oc := &models.OutputConfig{
Name: name,
Filter: buildFilter(tbl),
Filter: filter,
}
// Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass
if len(oc.Filter.FieldDrop) > 0 {

View File

@@ -1,6 +1,7 @@
package config
import (
"os"
"testing"
"time"
@@ -10,9 +11,53 @@ import (
"github.com/influxdata/telegraf/plugins/inputs/memcached"
"github.com/influxdata/telegraf/plugins/inputs/procstat"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/stretchr/testify/assert"
)
func TestConfig_LoadSingleInputWithEnvVars(t *testing.T) {
c := NewConfig()
err := os.Setenv("MY_TEST_SERVER", "192.168.1.1")
assert.NoError(t, err)
err = os.Setenv("TEST_INTERVAL", "10s")
assert.NoError(t, err)
c.LoadConfig("./testdata/single_plugin_env_vars.toml")
memcached := inputs.Inputs["memcached"]().(*memcached.Memcached)
memcached.Servers = []string{"192.168.1.1"}
filter := models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []models.TagFilter{
models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
},
TagPass: []models.TagFilter{
models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
}
assert.NoError(t, filter.Compile())
mConfig := &models.InputConfig{
Name: "memcached",
Filter: filter,
Interval: 10 * time.Second,
}
mConfig.Tags = make(map[string]string)
assert.Equal(t, memcached, c.Inputs[0].Input,
"Testdata did not produce a correct memcached struct.")
assert.Equal(t, mConfig, c.Inputs[0].Config,
"Testdata did not produce correct memcached metadata.")
}
func TestConfig_LoadSingleInput(t *testing.T) {
c := NewConfig()
c.LoadConfig("./testdata/single_plugin.toml")
@@ -20,27 +65,28 @@ func TestConfig_LoadSingleInput(t *testing.T) {
memcached := inputs.Inputs["memcached"]().(*memcached.Memcached)
memcached.Servers = []string{"localhost"}
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
filter := models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []models.TagFilter{
models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
TagPass: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
IsActive: true,
},
TagPass: []models.TagFilter{
models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
}
assert.NoError(t, filter.Compile())
mConfig := &models.InputConfig{
Name: "memcached",
Filter: filter,
Interval: 5 * time.Second,
}
mConfig.Tags = make(map[string]string)
@@ -65,27 +111,28 @@ func TestConfig_LoadDirectory(t *testing.T) {
memcached := inputs.Inputs["memcached"]().(*memcached.Memcached)
memcached.Servers = []string{"localhost"}
mConfig := &internal_models.InputConfig{
Name: "memcached",
Filter: internal_models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
filter := models.Filter{
NameDrop: []string{"metricname2"},
NamePass: []string{"metricname1"},
FieldDrop: []string{"other", "stuff"},
FieldPass: []string{"some", "strings"},
TagDrop: []models.TagFilter{
models.TagFilter{
Name: "badtag",
Filter: []string{"othertag"},
},
TagPass: []internal_models.TagFilter{
internal_models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
IsActive: true,
},
TagPass: []models.TagFilter{
models.TagFilter{
Name: "goodtag",
Filter: []string{"mytag"},
},
},
}
assert.NoError(t, filter.Compile())
mConfig := &models.InputConfig{
Name: "memcached",
Filter: filter,
Interval: 5 * time.Second,
}
mConfig.Tags = make(map[string]string)
@@ -100,7 +147,7 @@ func TestConfig_LoadDirectory(t *testing.T) {
assert.NoError(t, err)
ex.SetParser(p)
ex.Command = "/usr/bin/myothercollector --foo=bar"
eConfig := &internal_models.InputConfig{
eConfig := &models.InputConfig{
Name: "exec",
MeasurementSuffix: "_myothercollector",
}
@@ -119,7 +166,7 @@ func TestConfig_LoadDirectory(t *testing.T) {
pstat := inputs.Inputs["procstat"]().(*procstat.Procstat)
pstat.PidFile = "/var/run/grafana-server.pid"
pConfig := &internal_models.InputConfig{Name: "procstat"}
pConfig := &models.InputConfig{Name: "procstat"}
pConfig.Tags = make(map[string]string)
assert.Equal(t, pstat, c.Inputs[3].Input,

View File

@@ -0,0 +1,11 @@
[[inputs.memcached]]
servers = ["$MY_TEST_SERVER"]
namepass = ["metricname1"]
namedrop = ["metricname2"]
fieldpass = ["some", "strings"]
fielddrop = ["other", "stuff"]
interval = "$TEST_INTERVAL"
[inputs.memcached.tagpass]
goodtag = ["mytag"]
[inputs.memcached.tagdrop]
badtag = ["othertag"]

View File

@@ -0,0 +1,37 @@
package errchan
import (
"fmt"
"strings"
)
type ErrChan struct {
C chan error
}
// New returns an error channel of max length 'n'
// errors can be sent to the ErrChan.C channel, and will be returned when
// ErrChan.Error() is called.
func New(n int) *ErrChan {
return &ErrChan{
C: make(chan error, n),
}
}
// Error closes the ErrChan.C channel and returns an error if there are any
// non-nil errors, otherwise returns nil.
func (e *ErrChan) Error() error {
close(e.C)
var out string
for err := range e.C {
if err != nil {
out += "[" + err.Error() + "], "
}
}
if out != "" {
return fmt.Errorf("Errors encountered: " + strings.TrimRight(out, ", "))
}
return nil
}

View File

@@ -0,0 +1,116 @@
package globpath
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/gobwas/glob"
)
var sepStr = fmt.Sprintf("%v", string(os.PathSeparator))
type GlobPath struct {
path string
hasMeta bool
hasSuperMeta bool
g glob.Glob
root string
}
func Compile(path string) (*GlobPath, error) {
out := GlobPath{
hasMeta: hasMeta(path),
hasSuperMeta: hasSuperMeta(path),
path: path,
}
// if there are no glob meta characters in the path, don't bother compiling
// a glob object or finding the root directory. (see short-circuit in Match)
if !out.hasMeta || !out.hasSuperMeta {
return &out, nil
}
var err error
if out.g, err = glob.Compile(path, os.PathSeparator); err != nil {
return nil, err
}
// Get the root directory for this filepath
out.root = findRootDir(path)
return &out, nil
}
func (g *GlobPath) Match() map[string]os.FileInfo {
if !g.hasMeta {
out := make(map[string]os.FileInfo)
info, err := os.Stat(g.path)
if !os.IsNotExist(err) {
out[g.path] = info
}
return out
}
if !g.hasSuperMeta {
out := make(map[string]os.FileInfo)
files, _ := filepath.Glob(g.path)
for _, file := range files {
info, err := os.Stat(file)
if !os.IsNotExist(err) {
out[file] = info
}
}
return out
}
return walkFilePath(g.root, g.g)
}
// walk the filepath from the given root and return a list of files that match
// the given glob.
func walkFilePath(root string, g glob.Glob) map[string]os.FileInfo {
matchedFiles := make(map[string]os.FileInfo)
walkfn := func(path string, info os.FileInfo, _ error) error {
if g.Match(path) {
matchedFiles[path] = info
}
return nil
}
filepath.Walk(root, walkfn)
return matchedFiles
}
// find the root dir of the given path (could include globs).
// ie:
// /var/log/telegraf.conf -> /var/log
// /home/** -> /home
// /home/*/** -> /home
// /lib/share/*/*/**.txt -> /lib/share
func findRootDir(path string) string {
pathItems := strings.Split(path, sepStr)
out := sepStr
for i, item := range pathItems {
if i == len(pathItems)-1 {
break
}
if item == "" {
continue
}
if hasMeta(item) {
break
}
out += item + sepStr
}
if out != "/" {
out = strings.TrimSuffix(out, "/")
}
return out
}
// hasMeta reports whether path contains any magic glob characters.
func hasMeta(path string) bool {
return strings.IndexAny(path, "*?[") >= 0
}
// hasSuperMeta reports whether path contains any super magic glob characters (**).
func hasSuperMeta(path string) bool {
return strings.Index(path, "**") >= 0
}

View File

@@ -0,0 +1,62 @@
package globpath
import (
"runtime"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestCompileAndMatch(t *testing.T) {
dir := getTestdataDir()
// test super asterisk
g1, err := Compile(dir + "/**")
require.NoError(t, err)
// test single asterisk
g2, err := Compile(dir + "/*.log")
require.NoError(t, err)
// test no meta characters (file exists)
g3, err := Compile(dir + "/log1.log")
require.NoError(t, err)
// test file that doesn't exist
g4, err := Compile(dir + "/i_dont_exist.log")
require.NoError(t, err)
// test super asterisk that doesn't exist
g5, err := Compile(dir + "/dir_doesnt_exist/**")
require.NoError(t, err)
matches := g1.Match()
assert.Len(t, matches, 3)
matches = g2.Match()
assert.Len(t, matches, 2)
matches = g3.Match()
assert.Len(t, matches, 1)
matches = g4.Match()
assert.Len(t, matches, 0)
matches = g5.Match()
assert.Len(t, matches, 0)
}
func TestFindRootDir(t *testing.T) {
tests := []struct {
input string
output string
}{
{"/var/log/telegraf.conf", "/var/log"},
{"/home/**", "/home"},
{"/home/*/**", "/home"},
{"/lib/share/*/*/**.txt", "/lib/share"},
}
for _, test := range tests {
actual := findRootDir(test.input)
assert.Equal(t, test.output, actual)
}
}
func getTestdataDir() string {
_, filename, _, _ := runtime.Caller(1)
return strings.Replace(filename, "globpath_test.go", "testdata", 1)
}

0
internal/globpath/testdata/log1.log vendored Normal file
View File

0
internal/globpath/testdata/log2.log vendored Normal file
View File

5
internal/globpath/testdata/test.conf vendored Normal file
View File

@@ -0,0 +1,5 @@
# this is a fake testing config file
# for testing the filestat plugin
option1 = "foo"
option2 = "bar"

View File

@@ -2,19 +2,31 @@ package internal
import (
"bufio"
"bytes"
"crypto/rand"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"io/ioutil"
"log"
"math/big"
"os"
"os/exec"
"strconv"
"strings"
"time"
"unicode"
)
const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
var (
TimeoutErr = errors.New("Command timed out.")
NotImplementedError = errors.New("not implemented yet")
)
// Duration just wraps time.Duration
type Duration struct {
Duration time.Duration
@@ -22,18 +34,29 @@ type Duration struct {
// UnmarshalTOML parses the duration from the TOML config file
func (d *Duration) UnmarshalTOML(b []byte) error {
dur, err := time.ParseDuration(string(b[1 : len(b)-1]))
if err != nil {
return err
var err error
// Parse string duration, ie, "1s"
d.Duration, err = time.ParseDuration(string(b[1 : len(b)-1]))
if err == nil {
return nil
}
d.Duration = dur
// First try parsing as integer seconds
sI, err := strconv.ParseInt(string(b), 10, 64)
if err == nil {
d.Duration = time.Second * time.Duration(sI)
return nil
}
// Second try parsing as float seconds
sF, err := strconv.ParseFloat(string(b), 64)
if err == nil {
d.Duration = time.Second * time.Duration(sF)
return nil
}
return nil
}
var NotImplementedError = errors.New("not implemented yet")
// ReadLines reads contents from a file and splits them by new lines.
// A convenience wrapper to ReadLinesOffsetN(filename, 0, -1).
func ReadLines(filename string) ([]string, error) {
@@ -86,15 +109,15 @@ func GetTLSConfig(
SSLCert, SSLKey, SSLCA string,
InsecureSkipVerify bool,
) (*tls.Config, error) {
t := &tls.Config{}
if SSLCert != "" && SSLKey != "" && SSLCA != "" {
cert, err := tls.LoadX509KeyPair(SSLCert, SSLKey)
if err != nil {
return nil, errors.New(fmt.Sprintf(
"Could not load TLS client key/certificate: %s",
err))
}
if SSLCert == "" && SSLKey == "" && SSLCA == "" && !InsecureSkipVerify {
return nil, nil
}
t := &tls.Config{
InsecureSkipVerify: InsecureSkipVerify,
}
if SSLCA != "" {
caCert, err := ioutil.ReadFile(SSLCA)
if err != nil {
return nil, errors.New(fmt.Sprintf("Could not load TLS CA: %s",
@@ -103,75 +126,107 @@ func GetTLSConfig(
caCertPool := x509.NewCertPool()
caCertPool.AppendCertsFromPEM(caCert)
t = &tls.Config{
Certificates: []tls.Certificate{cert},
RootCAs: caCertPool,
InsecureSkipVerify: InsecureSkipVerify,
}
} else {
if InsecureSkipVerify {
t.InsecureSkipVerify = true
} else {
return nil, nil
}
t.RootCAs = caCertPool
}
if SSLCert != "" && SSLKey != "" {
cert, err := tls.LoadX509KeyPair(SSLCert, SSLKey)
if err != nil {
return nil, errors.New(fmt.Sprintf(
"Could not load TLS client key/certificate from %s:%s: %s",
SSLKey, SSLCert, err))
}
t.Certificates = []tls.Certificate{cert}
t.BuildNameToCertificate()
}
// will be nil by default if nothing is provided
return t, nil
}
// Glob will test a string pattern, potentially containing globs, against a
// subject string. The result is a simple true/false, determining whether or
// not the glob pattern matched the subject text.
//
// Adapted from https://github.com/ryanuber/go-glob/blob/master/glob.go
// thanks Ryan Uber!
func Glob(pattern, measurement string) bool {
// Empty pattern can only match empty subject
if pattern == "" {
return measurement == pattern
}
// SnakeCase converts the given string to snake case following the Golang format:
// acronyms are converted to lower-case and preceded by an underscore.
func SnakeCase(in string) string {
runes := []rune(in)
length := len(runes)
// If the pattern _is_ a glob, it matches everything
if pattern == "*" {
return true
}
parts := strings.Split(pattern, "*")
if len(parts) == 1 {
// No globs in pattern, so test for match
return pattern == measurement
}
leadingGlob := strings.HasPrefix(pattern, "*")
trailingGlob := strings.HasSuffix(pattern, "*")
end := len(parts) - 1
for i, part := range parts {
switch i {
case 0:
if leadingGlob {
continue
}
if !strings.HasPrefix(measurement, part) {
return false
}
case end:
if len(measurement) > 0 {
return trailingGlob || strings.HasSuffix(measurement, part)
}
default:
if !strings.Contains(measurement, part) {
return false
}
var out []rune
for i := 0; i < length; i++ {
if i > 0 && unicode.IsUpper(runes[i]) && ((i+1 < length && unicode.IsLower(runes[i+1])) || unicode.IsLower(runes[i-1])) {
out = append(out, '_')
}
// Trim evaluated text from measurement as we loop over the pattern.
idx := strings.Index(measurement, part) + len(part)
measurement = measurement[idx:]
out = append(out, unicode.ToLower(runes[i]))
}
// All parts of the pattern matched
return true
return string(out)
}
// CombinedOutputTimeout runs the given command with the given timeout and
// returns the combined output of stdout and stderr.
// If the command times out, it attempts to kill the process.
func CombinedOutputTimeout(c *exec.Cmd, timeout time.Duration) ([]byte, error) {
var b bytes.Buffer
c.Stdout = &b
c.Stderr = &b
if err := c.Start(); err != nil {
return nil, err
}
err := WaitTimeout(c, timeout)
return b.Bytes(), err
}
// RunTimeout runs the given command with the given timeout.
// If the command times out, it attempts to kill the process.
func RunTimeout(c *exec.Cmd, timeout time.Duration) error {
if err := c.Start(); err != nil {
return err
}
return WaitTimeout(c, timeout)
}
// WaitTimeout waits for the given command to finish with a timeout.
// It assumes the command has already been started.
// If the command times out, it attempts to kill the process.
func WaitTimeout(c *exec.Cmd, timeout time.Duration) error {
timer := time.NewTimer(timeout)
done := make(chan error)
go func() { done <- c.Wait() }()
select {
case err := <-done:
timer.Stop()
return err
case <-timer.C:
if err := c.Process.Kill(); err != nil {
log.Printf("E! FATAL error killing process: %s", err)
return err
}
// wait for the command to return after killing it
<-done
return TimeoutErr
}
}
// RandomSleep will sleep for a random amount of time up to max.
// If the shutdown channel is closed, it will return before it has finished
// sleeping.
func RandomSleep(max time.Duration, shutdown chan struct{}) {
if max == 0 {
return
}
maxSleep := big.NewInt(max.Nanoseconds())
var sleepns int64
if j, err := rand.Int(rand.Reader, maxSleep); err == nil {
sleepns = j.Int64()
}
t := time.NewTimer(time.Nanosecond * time.Duration(sleepns))
select {
case <-t.C:
return
case <-shutdown:
t.Stop()
return
}
}

View File

@@ -1,44 +1,133 @@
package internal
import "testing"
import (
"os/exec"
"testing"
"time"
func testGlobMatch(t *testing.T, pattern, subj string) {
if !Glob(pattern, subj) {
t.Errorf("%s should match %s", pattern, subj)
"github.com/stretchr/testify/assert"
)
type SnakeTest struct {
input string
output string
}
var tests = []SnakeTest{
{"a", "a"},
{"snake", "snake"},
{"A", "a"},
{"ID", "id"},
{"MOTD", "motd"},
{"Snake", "snake"},
{"SnakeTest", "snake_test"},
{"APIResponse", "api_response"},
{"SnakeID", "snake_id"},
{"SnakeIDGoogle", "snake_id_google"},
{"LinuxMOTD", "linux_motd"},
{"OMGWTFBBQ", "omgwtfbbq"},
{"omg_wtf_bbq", "omg_wtf_bbq"},
}
func TestSnakeCase(t *testing.T) {
for _, test := range tests {
if SnakeCase(test.input) != test.output {
t.Errorf(`SnakeCase("%s"), wanted "%s", got \%s"`, test.input, test.output, SnakeCase(test.input))
}
}
}
func testGlobNoMatch(t *testing.T, pattern, subj string) {
if Glob(pattern, subj) {
t.Errorf("%s should not match %s", pattern, subj)
var (
sleepbin, _ = exec.LookPath("sleep")
echobin, _ = exec.LookPath("echo")
)
func TestRunTimeout(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "10")
start := time.Now()
err := RunTimeout(cmd, time.Millisecond*20)
elapsed := time.Since(start)
assert.Equal(t, TimeoutErr, err)
// Verify that command gets killed in 20ms, with some breathing room
assert.True(t, elapsed < time.Millisecond*75)
}
func TestEmptyPattern(t *testing.T) {
testGlobMatch(t, "", "")
testGlobNoMatch(t, "", "test")
}
func TestPatternWithoutGlobs(t *testing.T) {
testGlobMatch(t, "test", "test")
}
func TestGlob(t *testing.T) {
for _, pattern := range []string{
"*test", // Leading glob
"this*", // Trailing glob
"*is*a*", // Lots of globs
"**test**", // Double glob characters
"**is**a***test*", // Varying number of globs
} {
testGlobMatch(t, pattern, "this_is_a_test")
func TestCombinedOutputTimeout(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "10")
start := time.Now()
_, err := CombinedOutputTimeout(cmd, time.Millisecond*20)
elapsed := time.Since(start)
for _, pattern := range []string{
"test*", // Implicit substring match should fail
"*is", // Partial match should fail
"*no*", // Globs without a match between them should fail
} {
testGlobNoMatch(t, pattern, "this_is_a_test")
}
assert.Equal(t, TimeoutErr, err)
// Verify that command gets killed in 20ms, with some breathing room
assert.True(t, elapsed < time.Millisecond*75)
}
func TestCombinedOutput(t *testing.T) {
if echobin == "" {
t.Skip("'echo' binary not available on OS, skipping.")
}
cmd := exec.Command(echobin, "foo")
out, err := CombinedOutputTimeout(cmd, time.Second)
assert.NoError(t, err)
assert.Equal(t, "foo\n", string(out))
}
// test that CombinedOutputTimeout and exec.Cmd.CombinedOutput return
// the same output from a failed command.
func TestCombinedOutputError(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "foo")
expected, err := cmd.CombinedOutput()
cmd2 := exec.Command(sleepbin, "foo")
actual, err := CombinedOutputTimeout(cmd2, time.Second)
assert.Error(t, err)
assert.Equal(t, expected, actual)
}
func TestRunError(t *testing.T) {
if sleepbin == "" {
t.Skip("'sleep' binary not available on OS, skipping.")
}
cmd := exec.Command(sleepbin, "foo")
err := RunTimeout(cmd, time.Second)
assert.Error(t, err)
}
func TestRandomSleep(t *testing.T) {
// test that zero max returns immediately
s := time.Now()
RandomSleep(time.Duration(0), make(chan struct{}))
elapsed := time.Since(s)
assert.True(t, elapsed < time.Millisecond)
// test that max sleep is respected
s = time.Now()
RandomSleep(time.Millisecond*50, make(chan struct{}))
elapsed = time.Since(s)
assert.True(t, elapsed < time.Millisecond*100)
// test that shutdown is respected
s = time.Now()
shutdown := make(chan struct{})
go func() {
time.Sleep(time.Millisecond * 100)
close(shutdown)
}()
RandomSleep(time.Second, shutdown)
elapsed = time.Since(s)
assert.True(t, elapsed < time.Millisecond*150)
}

View File

@@ -0,0 +1,59 @@
package limiter
import (
"sync"
"time"
)
// NewRateLimiter returns a rate limiter that will will emit from the C
// channel only 'n' times every 'rate' seconds.
func NewRateLimiter(n int, rate time.Duration) *rateLimiter {
r := &rateLimiter{
C: make(chan bool),
rate: rate,
n: n,
shutdown: make(chan bool),
}
r.wg.Add(1)
go r.limiter()
return r
}
type rateLimiter struct {
C chan bool
rate time.Duration
n int
shutdown chan bool
wg sync.WaitGroup
}
func (r *rateLimiter) Stop() {
close(r.shutdown)
r.wg.Wait()
close(r.C)
}
func (r *rateLimiter) limiter() {
defer r.wg.Done()
ticker := time.NewTicker(r.rate)
defer ticker.Stop()
counter := 0
for {
select {
case <-r.shutdown:
return
case <-ticker.C:
counter = 0
default:
if counter < r.n {
select {
case r.C <- true:
counter++
case <-r.shutdown:
return
}
}
}
}
}

View File

@@ -0,0 +1,54 @@
package limiter
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestRateLimiter(t *testing.T) {
r := NewRateLimiter(5, time.Second)
ticker := time.NewTicker(time.Millisecond * 75)
// test that we can only get 5 receives from the rate limiter
counter := 0
outer:
for {
select {
case <-r.C:
counter++
case <-ticker.C:
break outer
}
}
assert.Equal(t, 5, counter)
r.Stop()
// verify that the Stop function closes the channel.
_, ok := <-r.C
assert.False(t, ok)
}
func TestRateLimiterMultipleIterations(t *testing.T) {
r := NewRateLimiter(5, time.Millisecond*50)
ticker := time.NewTicker(time.Millisecond * 250)
// test that we can get 15 receives from the rate limiter
counter := 0
outer:
for {
select {
case <-ticker.C:
break outer
case <-r.C:
counter++
}
}
assert.True(t, counter > 10)
r.Stop()
// verify that the Stop function closes the channel.
_, ok := <-r.C
assert.False(t, ok)
}

View File

@@ -1,105 +1,188 @@
package internal_models
package models
import (
"strings"
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/filter"
)
// TagFilter is the name of a tag, and the values on which to filter
type TagFilter struct {
Name string
Filter []string
filter filter.Filter
}
// Filter containing drop/pass and tagdrop/tagpass rules
type Filter struct {
NameDrop []string
nameDrop filter.Filter
NamePass []string
namePass filter.Filter
FieldDrop []string
fieldDrop filter.Filter
FieldPass []string
fieldPass filter.Filter
TagDrop []TagFilter
TagPass []TagFilter
IsActive bool
TagExclude []string
tagExclude filter.Filter
TagInclude []string
tagInclude filter.Filter
isActive bool
}
func (f Filter) ShouldMetricPass(metric telegraf.Metric) bool {
if f.ShouldNamePass(metric.Name()) && f.ShouldTagsPass(metric.Tags()) {
// Compile all Filter lists into filter.Filter objects.
func (f *Filter) Compile() error {
if len(f.NameDrop) == 0 &&
len(f.NamePass) == 0 &&
len(f.FieldDrop) == 0 &&
len(f.FieldPass) == 0 &&
len(f.TagInclude) == 0 &&
len(f.TagExclude) == 0 &&
len(f.TagPass) == 0 &&
len(f.TagDrop) == 0 {
return nil
}
f.isActive = true
var err error
f.nameDrop, err = filter.Compile(f.NameDrop)
if err != nil {
return fmt.Errorf("Error compiling 'namedrop', %s", err)
}
f.namePass, err = filter.Compile(f.NamePass)
if err != nil {
return fmt.Errorf("Error compiling 'namepass', %s", err)
}
f.fieldDrop, err = filter.Compile(f.FieldDrop)
if err != nil {
return fmt.Errorf("Error compiling 'fielddrop', %s", err)
}
f.fieldPass, err = filter.Compile(f.FieldPass)
if err != nil {
return fmt.Errorf("Error compiling 'fieldpass', %s", err)
}
f.tagExclude, err = filter.Compile(f.TagExclude)
if err != nil {
return fmt.Errorf("Error compiling 'tagexclude', %s", err)
}
f.tagInclude, err = filter.Compile(f.TagInclude)
if err != nil {
return fmt.Errorf("Error compiling 'taginclude', %s", err)
}
for i, _ := range f.TagDrop {
f.TagDrop[i].filter, err = filter.Compile(f.TagDrop[i].Filter)
if err != nil {
return fmt.Errorf("Error compiling 'tagdrop', %s", err)
}
}
for i, _ := range f.TagPass {
f.TagPass[i].filter, err = filter.Compile(f.TagPass[i].Filter)
if err != nil {
return fmt.Errorf("Error compiling 'tagpass', %s", err)
}
}
return nil
}
// Apply applies the filter to the given measurement name, fields map, and
// tags map. It will return false if the metric should be "filtered out", and
// true if the metric should "pass".
// It will modify tags in-place if they need to be deleted.
func (f *Filter) Apply(
measurement string,
fields map[string]interface{},
tags map[string]string,
) bool {
if !f.isActive {
return true
}
return false
// check if the measurement name should pass
if !f.shouldNamePass(measurement) {
return false
}
// check if the tags should pass
if !f.shouldTagsPass(tags) {
return false
}
// filter fields
for fieldkey, _ := range fields {
if !f.shouldFieldPass(fieldkey) {
delete(fields, fieldkey)
}
}
if len(fields) == 0 {
return false
}
// filter tags
f.filterTags(tags)
return true
}
// ShouldFieldsPass returns true if the metric should pass, false if should drop
func (f *Filter) IsActive() bool {
return f.isActive
}
// shouldNamePass returns true if the metric should pass, false if should drop
// based on the drop/pass filter parameters
func (f Filter) ShouldNamePass(key string) bool {
if f.NamePass != nil {
for _, pat := range f.NamePass {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
return true
}
func (f *Filter) shouldNamePass(key string) bool {
if f.namePass != nil {
if f.namePass.Match(key) {
return true
}
return false
}
if f.NameDrop != nil {
for _, pat := range f.NameDrop {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
return false
}
if f.nameDrop != nil {
if f.nameDrop.Match(key) {
return false
}
return true
}
return true
}
// ShouldFieldsPass returns true if the metric should pass, false if should drop
// shouldFieldPass returns true if the metric should pass, false if should drop
// based on the drop/pass filter parameters
func (f Filter) ShouldFieldsPass(key string) bool {
if f.FieldPass != nil {
for _, pat := range f.FieldPass {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
return true
}
func (f *Filter) shouldFieldPass(key string) bool {
if f.fieldPass != nil {
if f.fieldPass.Match(key) {
return true
}
return false
}
if f.FieldDrop != nil {
for _, pat := range f.FieldDrop {
// TODO remove HasPrefix check, leaving it for now for legacy support.
// Cam, 2015-12-07
if strings.HasPrefix(key, pat) || internal.Glob(pat, key) {
return false
}
if f.fieldDrop != nil {
if f.fieldDrop.Match(key) {
return false
}
return true
}
return true
}
// ShouldTagsPass returns true if the metric should pass, false if should drop
// shouldTagsPass returns true if the metric should pass, false if should drop
// based on the tagdrop/tagpass filter parameters
func (f Filter) ShouldTagsPass(tags map[string]string) bool {
func (f *Filter) shouldTagsPass(tags map[string]string) bool {
if f.TagPass != nil {
for _, pat := range f.TagPass {
if pat.filter == nil {
continue
}
if tagval, ok := tags[pat.Name]; ok {
for _, filter := range pat.Filter {
if internal.Glob(filter, tagval) {
return true
}
if pat.filter.Match(tagval) {
return true
}
}
}
@@ -108,11 +191,12 @@ func (f Filter) ShouldTagsPass(tags map[string]string) bool {
if f.TagDrop != nil {
for _, pat := range f.TagDrop {
if pat.filter == nil {
continue
}
if tagval, ok := tags[pat.Name]; ok {
for _, filter := range pat.Filter {
if internal.Glob(filter, tagval) {
return false
}
if pat.filter.Match(tagval) {
return false
}
}
}
@@ -121,3 +205,23 @@ func (f Filter) ShouldTagsPass(tags map[string]string) bool {
return true
}
// Apply TagInclude and TagExclude filters.
// modifies the tags map in-place.
func (f *Filter) filterTags(tags map[string]string) {
if f.tagInclude != nil {
for k, _ := range tags {
if !f.tagInclude.Match(k) {
delete(tags, k)
}
}
}
if f.tagExclude != nil {
for k, _ := range tags {
if f.tagExclude.Match(k) {
delete(tags, k)
}
}
}
}

View File

@@ -1,9 +1,64 @@
package internal_models
package models
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFilter_ApplyEmpty(t *testing.T) {
f := Filter{}
require.NoError(t, f.Compile())
assert.False(t, f.IsActive())
assert.True(t, f.Apply("m", map[string]interface{}{"value": int64(1)}, map[string]string{}))
}
func TestFilter_ApplyTagsDontPass(t *testing.T) {
filters := []TagFilter{
TagFilter{
Name: "cpu",
Filter: []string{"cpu-*"},
},
}
f := Filter{
TagDrop: filters,
}
require.NoError(t, f.Compile())
require.NoError(t, f.Compile())
assert.True(t, f.IsActive())
assert.False(t, f.Apply("m",
map[string]interface{}{"value": int64(1)},
map[string]string{"cpu": "cpu-total"}))
}
func TestFilter_ApplyDeleteFields(t *testing.T) {
f := Filter{
FieldDrop: []string{"value"},
}
require.NoError(t, f.Compile())
require.NoError(t, f.Compile())
assert.True(t, f.IsActive())
fields := map[string]interface{}{"value": int64(1), "value2": int64(2)}
assert.True(t, f.Apply("m", fields, nil))
assert.Equal(t, map[string]interface{}{"value2": int64(2)}, fields)
}
func TestFilter_ApplyDeleteAllFields(t *testing.T) {
f := Filter{
FieldDrop: []string{"value*"},
}
require.NoError(t, f.Compile())
require.NoError(t, f.Compile())
assert.True(t, f.IsActive())
fields := map[string]interface{}{"value": int64(1), "value2": int64(2)}
assert.False(t, f.Apply("m", fields, nil))
}
func TestFilter_Empty(t *testing.T) {
f := Filter{}
@@ -18,7 +73,7 @@ func TestFilter_Empty(t *testing.T) {
}
for _, measurement := range measurements {
if !f.ShouldFieldsPass(measurement) {
if !f.shouldFieldPass(measurement) {
t.Errorf("Expected measurement %s to pass", measurement)
}
}
@@ -28,6 +83,7 @@ func TestFilter_NamePass(t *testing.T) {
f := Filter{
NamePass: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.Compile())
passes := []string{
"foo",
@@ -45,13 +101,13 @@ func TestFilter_NamePass(t *testing.T) {
}
for _, measurement := range passes {
if !f.ShouldNamePass(measurement) {
if !f.shouldNamePass(measurement) {
t.Errorf("Expected measurement %s to pass", measurement)
}
}
for _, measurement := range drops {
if f.ShouldNamePass(measurement) {
if f.shouldNamePass(measurement) {
t.Errorf("Expected measurement %s to drop", measurement)
}
}
@@ -61,6 +117,7 @@ func TestFilter_NameDrop(t *testing.T) {
f := Filter{
NameDrop: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.Compile())
drops := []string{
"foo",
@@ -78,13 +135,13 @@ func TestFilter_NameDrop(t *testing.T) {
}
for _, measurement := range passes {
if !f.ShouldNamePass(measurement) {
if !f.shouldNamePass(measurement) {
t.Errorf("Expected measurement %s to pass", measurement)
}
}
for _, measurement := range drops {
if f.ShouldNamePass(measurement) {
if f.shouldNamePass(measurement) {
t.Errorf("Expected measurement %s to drop", measurement)
}
}
@@ -94,6 +151,7 @@ func TestFilter_FieldPass(t *testing.T) {
f := Filter{
FieldPass: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.Compile())
passes := []string{
"foo",
@@ -111,13 +169,13 @@ func TestFilter_FieldPass(t *testing.T) {
}
for _, measurement := range passes {
if !f.ShouldFieldsPass(measurement) {
if !f.shouldFieldPass(measurement) {
t.Errorf("Expected measurement %s to pass", measurement)
}
}
for _, measurement := range drops {
if f.ShouldFieldsPass(measurement) {
if f.shouldFieldPass(measurement) {
t.Errorf("Expected measurement %s to drop", measurement)
}
}
@@ -127,6 +185,7 @@ func TestFilter_FieldDrop(t *testing.T) {
f := Filter{
FieldDrop: []string{"foo*", "cpu_usage_idle"},
}
require.NoError(t, f.Compile())
drops := []string{
"foo",
@@ -144,13 +203,13 @@ func TestFilter_FieldDrop(t *testing.T) {
}
for _, measurement := range passes {
if !f.ShouldFieldsPass(measurement) {
if !f.shouldFieldPass(measurement) {
t.Errorf("Expected measurement %s to pass", measurement)
}
}
for _, measurement := range drops {
if f.ShouldFieldsPass(measurement) {
if f.shouldFieldPass(measurement) {
t.Errorf("Expected measurement %s to drop", measurement)
}
}
@@ -169,6 +228,7 @@ func TestFilter_TagPass(t *testing.T) {
f := Filter{
TagPass: filters,
}
require.NoError(t, f.Compile())
passes := []map[string]string{
{"cpu": "cpu-total"},
@@ -187,13 +247,13 @@ func TestFilter_TagPass(t *testing.T) {
}
for _, tags := range passes {
if !f.ShouldTagsPass(tags) {
if !f.shouldTagsPass(tags) {
t.Errorf("Expected tags %v to pass", tags)
}
}
for _, tags := range drops {
if f.ShouldTagsPass(tags) {
if f.shouldTagsPass(tags) {
t.Errorf("Expected tags %v to drop", tags)
}
}
@@ -212,6 +272,7 @@ func TestFilter_TagDrop(t *testing.T) {
f := Filter{
TagDrop: filters,
}
require.NoError(t, f.Compile())
drops := []map[string]string{
{"cpu": "cpu-total"},
@@ -230,14 +291,69 @@ func TestFilter_TagDrop(t *testing.T) {
}
for _, tags := range passes {
if !f.ShouldTagsPass(tags) {
if !f.shouldTagsPass(tags) {
t.Errorf("Expected tags %v to pass", tags)
}
}
for _, tags := range drops {
if f.ShouldTagsPass(tags) {
if f.shouldTagsPass(tags) {
t.Errorf("Expected tags %v to drop", tags)
}
}
}
func TestFilter_FilterTagsNoMatches(t *testing.T) {
pretags := map[string]string{
"host": "localhost",
"mytag": "foobar",
}
f := Filter{
TagExclude: []string{"nomatch"},
}
require.NoError(t, f.Compile())
f.filterTags(pretags)
assert.Equal(t, map[string]string{
"host": "localhost",
"mytag": "foobar",
}, pretags)
f = Filter{
TagInclude: []string{"nomatch"},
}
require.NoError(t, f.Compile())
f.filterTags(pretags)
assert.Equal(t, map[string]string{}, pretags)
}
func TestFilter_FilterTagsMatches(t *testing.T) {
pretags := map[string]string{
"host": "localhost",
"mytag": "foobar",
}
f := Filter{
TagExclude: []string{"ho*"},
}
require.NoError(t, f.Compile())
f.filterTags(pretags)
assert.Equal(t, map[string]string{
"mytag": "foobar",
}, pretags)
pretags = map[string]string{
"host": "localhost",
"mytag": "foobar",
}
f = Filter{
TagInclude: []string{"my*"},
}
require.NoError(t, f.Compile())
f.filterTags(pretags)
assert.Equal(t, map[string]string{
"mytag": "foobar",
}, pretags)
}

View File

@@ -1,4 +1,4 @@
package internal_models
package models
import (
"time"

View File

@@ -1,49 +1,55 @@
package internal_models
package models
import (
"log"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/buffer"
)
const (
// Default number of metrics kept between flushes.
DEFAULT_METRIC_BUFFER_LIMIT = 1000
// Default size of metrics batch size.
DEFAULT_METRIC_BATCH_SIZE = 1000
// Limit how many full metric buffers are kept due to failed writes.
FULL_METRIC_BUFFERS_LIMIT = 100
// Default number of metrics kept. It should be a multiple of batch size.
DEFAULT_METRIC_BUFFER_LIMIT = 10000
)
// RunningOutput contains the output configuration
type RunningOutput struct {
Name string
Output telegraf.Output
Config *OutputConfig
Quiet bool
MetricBufferLimit int
FlushBufferWhenFull bool
Name string
Output telegraf.Output
Config *OutputConfig
Quiet bool
MetricBufferLimit int
MetricBatchSize int
metrics []telegraf.Metric
tmpmetrics map[int][]telegraf.Metric
overwriteI int
mapI int
sync.Mutex
metrics *buffer.Buffer
failMetrics *buffer.Buffer
}
func NewRunningOutput(
name string,
output telegraf.Output,
conf *OutputConfig,
batchSize int,
bufferLimit int,
) *RunningOutput {
if bufferLimit == 0 {
bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
}
if batchSize == 0 {
batchSize = DEFAULT_METRIC_BATCH_SIZE
}
ro := &RunningOutput{
Name: name,
metrics: make([]telegraf.Metric, 0),
tmpmetrics: make(map[int][]telegraf.Metric),
metrics: buffer.NewBuffer(batchSize),
failMetrics: buffer.NewBuffer(bufferLimit),
Output: output,
Config: conf,
MetricBufferLimit: DEFAULT_METRIC_BUFFER_LIMIT,
MetricBufferLimit: bufferLimit,
MetricBatchSize: batchSize,
}
return ro
}
@@ -51,83 +57,93 @@ func NewRunningOutput(
// AddMetric adds a metric to the output. This function can also write cached
// points if FlushBufferWhenFull is true.
func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
if ro.Config.Filter.IsActive {
if !ro.Config.Filter.ShouldMetricPass(metric) {
// Filter any tagexclude/taginclude parameters before adding metric
if ro.Config.Filter.IsActive() {
// In order to filter out tags, we need to create a new metric, since
// metrics are immutable once created.
name := metric.Name()
tags := metric.Tags()
fields := metric.Fields()
t := metric.Time()
if ok := ro.Config.Filter.Apply(name, fields, tags); !ok {
return
}
// error is not possible if creating from another metric, so ignore.
metric, _ = telegraf.NewMetric(name, tags, fields, t)
}
ro.Lock()
defer ro.Unlock()
if len(ro.metrics) < ro.MetricBufferLimit {
ro.metrics = append(ro.metrics, metric)
} else {
if ro.FlushBufferWhenFull {
ro.metrics = append(ro.metrics, metric)
tmpmetrics := make([]telegraf.Metric, len(ro.metrics))
copy(tmpmetrics, ro.metrics)
ro.metrics = make([]telegraf.Metric, 0)
err := ro.write(tmpmetrics)
if err != nil {
log.Printf("ERROR writing full metric buffer to output %s, %s",
ro.Name, err)
if len(ro.tmpmetrics) == FULL_METRIC_BUFFERS_LIMIT {
ro.mapI = 0
// overwrite one
ro.tmpmetrics[ro.mapI] = tmpmetrics
ro.mapI++
} else {
ro.tmpmetrics[ro.mapI] = tmpmetrics
ro.mapI++
}
}
} else {
if ro.overwriteI == 0 {
log.Printf("WARNING: overwriting cached metrics, you may want to " +
"increase the metric_buffer_limit setting in your [agent] " +
"config if you do not wish to overwrite metrics.\n")
}
if ro.overwriteI == len(ro.metrics) {
ro.overwriteI = 0
}
ro.metrics[ro.overwriteI] = metric
ro.overwriteI++
ro.metrics.Add(metric)
if ro.metrics.Len() == ro.MetricBatchSize {
batch := ro.metrics.Batch(ro.MetricBatchSize)
err := ro.write(batch)
if err != nil {
ro.failMetrics.Add(batch...)
}
}
}
// Write writes all cached points to this output.
func (ro *RunningOutput) Write() error {
ro.Lock()
defer ro.Unlock()
err := ro.write(ro.metrics)
if err != nil {
return err
} else {
ro.metrics = make([]telegraf.Metric, 0)
ro.overwriteI = 0
if !ro.Quiet {
log.Printf("I! Output [%s] buffer fullness: %d / %d metrics. "+
"Total gathered metrics: %d. Total dropped metrics: %d.",
ro.Name,
ro.failMetrics.Len()+ro.metrics.Len(),
ro.MetricBufferLimit,
ro.metrics.Total(),
ro.metrics.Drops()+ro.failMetrics.Drops())
}
// Write any cached metric buffers that failed previously
for i, tmpmetrics := range ro.tmpmetrics {
if err := ro.write(tmpmetrics); err != nil {
return err
} else {
delete(ro.tmpmetrics, i)
var err error
if !ro.failMetrics.IsEmpty() {
bufLen := ro.failMetrics.Len()
// how many batches of failed writes we need to write.
nBatches := bufLen/ro.MetricBatchSize + 1
batchSize := ro.MetricBatchSize
for i := 0; i < nBatches; i++ {
// If it's the last batch, only grab the metrics that have not had
// a write attempt already (this is primarily to preserve order).
if i == nBatches-1 {
batchSize = bufLen % ro.MetricBatchSize
}
batch := ro.failMetrics.Batch(batchSize)
// If we've already failed previous writes, don't bother trying to
// write to this output again. We are not exiting the loop just so
// that we can rotate the metrics to preserve order.
if err == nil {
err = ro.write(batch)
}
if err != nil {
ro.failMetrics.Add(batch...)
}
}
}
batch := ro.metrics.Batch(ro.MetricBatchSize)
// see comment above about not trying to write to an already failed output.
// if ro.failMetrics is empty then err will always be nil at this point.
if err == nil {
err = ro.write(batch)
}
if err != nil {
ro.failMetrics.Add(batch...)
return err
}
return nil
}
func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
if metrics == nil || len(metrics) == 0 {
return nil
}
start := time.Now()
err := ro.Output.Write(metrics)
elapsed := time.Since(start)
if err == nil {
if !ro.Quiet {
log.Printf("Wrote %d metrics to output %s in %s\n",
len(metrics), ro.Name, elapsed)
log.Printf("I! Output [%s] wrote batch of %d metrics in %s\n",
ro.Name, len(metrics), elapsed)
}
}
return err

View File

@@ -1,8 +1,7 @@
package internal_models
package models
import (
"fmt"
"sort"
"sync"
"testing"
@@ -29,16 +28,92 @@ var next5 = []telegraf.Metric{
testutil.TestMetric(101, "metric10"),
}
// Test that we can write metrics with simple default setup.
func TestRunningOutputDefault(t *testing.T) {
// Benchmark adding metrics.
func BenchmarkRunningOutputAddWrite(b *testing.B) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
Filter: Filter{},
}
m := &perfOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.Quiet = true
for n := 0; n < b.N; n++ {
ro.AddMetric(first5[0])
ro.Write()
}
}
// Benchmark adding metrics.
func BenchmarkRunningOutputAddWriteEvery100(b *testing.B) {
conf := &OutputConfig{
Filter: Filter{},
}
m := &perfOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.Quiet = true
for n := 0; n < b.N; n++ {
ro.AddMetric(first5[0])
if n%100 == 0 {
ro.Write()
}
}
}
// Benchmark adding metrics.
func BenchmarkRunningOutputAddFailWrites(b *testing.B) {
conf := &OutputConfig{
Filter: Filter{},
}
m := &perfOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.Quiet = true
for n := 0; n < b.N; n++ {
ro.AddMetric(first5[0])
}
}
// Test that NameDrop filters ger properly applied.
func TestRunningOutput_DropFilter(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
NameDrop: []string{"metric1", "metric2"},
},
}
assert.NoError(t, conf.Filter.Compile())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
}
for _, metric := range next5 {
ro.AddMetric(metric)
}
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 8)
}
// Test that NameDrop filters without a match do nothing.
func TestRunningOutput_PassFilter(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
NameDrop: []string{"metric1000", "foo*"},
},
}
assert.NoError(t, conf.Filter.Compile())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
@@ -53,50 +128,102 @@ func TestRunningOutputDefault(t *testing.T) {
assert.Len(t, m.Metrics(), 10)
}
// Test that the first metric gets overwritten if there is a buffer overflow.
func TestRunningOutputOverwrite(t *testing.T) {
// Test that tags are properly included
func TestRunningOutput_TagIncludeNoMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
TagInclude: []string{"nothing*"},
},
}
assert.NoError(t, conf.Filter.Compile())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.MetricBufferLimit = 4
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
}
require.Len(t, m.Metrics(), 0)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
require.NoError(t, err)
require.Len(t, m.Metrics(), 4)
var expected, actual []string
for i, exp := range first5[1:] {
expected = append(expected, exp.String())
actual = append(actual, m.Metrics()[i].String())
}
sort.Strings(expected)
sort.Strings(actual)
assert.Equal(t, expected, actual)
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Empty(t, m.Metrics()[0].Tags())
}
// Test that multiple buffer overflows are handled properly.
func TestRunningOutputMultiOverwrite(t *testing.T) {
// Test that tags are properly excluded
func TestRunningOutput_TagExcludeMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
TagExclude: []string{"tag*"},
},
}
assert.NoError(t, conf.Filter.Compile())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Len(t, m.Metrics()[0].Tags(), 0)
}
// Test that tags are properly Excluded
func TestRunningOutput_TagExcludeNoMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
TagExclude: []string{"nothing*"},
},
}
assert.NoError(t, conf.Filter.Compile())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Len(t, m.Metrics()[0].Tags(), 1)
}
// Test that tags are properly included
func TestRunningOutput_TagIncludeMatch(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
TagInclude: []string{"tag*"},
},
}
assert.NoError(t, conf.Filter.Compile())
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 1000, 10000)
ro.AddMetric(first5[0])
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 1)
assert.Len(t, m.Metrics()[0].Tags(), 1)
}
// Test that we can write metrics with simple default setup.
func TestRunningOutputDefault(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{},
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.MetricBufferLimit = 3
ro := NewRunningOutput("test", m, conf, 1000, 10000)
for _, metric := range first5 {
ro.AddMetric(metric)
@@ -104,39 +231,24 @@ func TestRunningOutputMultiOverwrite(t *testing.T) {
for _, metric := range next5 {
ro.AddMetric(metric)
}
require.Len(t, m.Metrics(), 0)
assert.Len(t, m.Metrics(), 0)
err := ro.Write()
require.NoError(t, err)
require.Len(t, m.Metrics(), 3)
var expected, actual []string
for i, exp := range next5[2:] {
expected = append(expected, exp.String())
actual = append(actual, m.Metrics()[i].String())
}
sort.Strings(expected)
sort.Strings(actual)
assert.Equal(t, expected, actual)
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 10)
}
// Test that running output doesn't flush until it's full when
// FlushBufferWhenFull is set.
func TestRunningOutputFlushWhenFull(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
Filter: Filter{},
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.FlushBufferWhenFull = true
ro.MetricBufferLimit = 5
ro := NewRunningOutput("test", m, conf, 6, 10)
// Fill buffer to limit
// Fill buffer to 1 under limit
for _, metric := range first5 {
ro.AddMetric(metric)
}
@@ -159,15 +271,11 @@ func TestRunningOutputFlushWhenFull(t *testing.T) {
// FlushBufferWhenFull is set, twice.
func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
Filter: Filter{},
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf)
ro.FlushBufferWhenFull = true
ro.MetricBufferLimit = 4
ro := NewRunningOutput("test", m, conf, 4, 12)
// Fill buffer past limit twive
for _, metric := range first5 {
@@ -177,23 +285,19 @@ func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
ro.AddMetric(metric)
}
// flushed twice
assert.Len(t, m.Metrics(), 10)
assert.Len(t, m.Metrics(), 8)
}
func TestRunningOutputWriteFail(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{
IsActive: false,
},
Filter: Filter{},
}
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf)
ro.FlushBufferWhenFull = true
ro.MetricBufferLimit = 4
ro := NewRunningOutput("test", m, conf, 4, 12)
// Fill buffer past limit twice
// Fill buffer to limit twice
for _, metric := range first5 {
ro.AddMetric(metric)
}
@@ -216,6 +320,155 @@ func TestRunningOutputWriteFail(t *testing.T) {
assert.Len(t, m.Metrics(), 10)
}
// Verify that the order of points is preserved during a write failure.
func TestRunningOutputWriteFailOrder(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{},
}
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 100, 1000)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// Write fails
err := ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
m.failWrite = false
// add 5 more metrics
for _, metric := range next5 {
ro.AddMetric(metric)
}
err = ro.Write()
require.NoError(t, err)
// Verify that 10 metrics were written
assert.Len(t, m.Metrics(), 10)
// Verify that they are in order
expected := append(first5, next5...)
assert.Equal(t, expected, m.Metrics())
}
// Verify that the order of points is preserved during many write failures.
func TestRunningOutputWriteFailOrder2(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{},
}
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 5, 100)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// Write fails
err := ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add 5 metrics
for _, metric := range next5 {
ro.AddMetric(metric)
}
// Write fails
err = ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// Write fails
err = ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add 5 metrics
for _, metric := range next5 {
ro.AddMetric(metric)
}
// Write fails
err = ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
m.failWrite = false
err = ro.Write()
require.NoError(t, err)
// Verify that 10 metrics were written
assert.Len(t, m.Metrics(), 20)
// Verify that they are in order
expected := append(first5, next5...)
expected = append(expected, first5...)
expected = append(expected, next5...)
assert.Equal(t, expected, m.Metrics())
}
// Verify that the order of points is preserved when there is a remainder
// of points for the batch.
//
// ie, with a batch size of 5:
//
// 1 2 3 4 5 6 <-- order, failed points
// 6 1 2 3 4 5 <-- order, after 1st write failure (1 2 3 4 5 was batch)
// 1 2 3 4 5 6 <-- order, after 2nd write failure, (6 was batch)
//
func TestRunningOutputWriteFailOrder3(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{},
}
m := &mockOutput{}
m.failWrite = true
ro := NewRunningOutput("test", m, conf, 5, 1000)
// add 5 metrics
for _, metric := range first5 {
ro.AddMetric(metric)
}
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// Write fails
err := ro.Write()
require.Error(t, err)
// no successful flush yet
assert.Len(t, m.Metrics(), 0)
// add and attempt to write a single metric:
ro.AddMetric(next5[0])
err = ro.Write()
require.Error(t, err)
// unset fail and write metrics
m.failWrite = false
err = ro.Write()
require.NoError(t, err)
// Verify that 6 metrics were written
assert.Len(t, m.Metrics(), 6)
// Verify that they are in order
expected := append(first5, next5[0])
assert.Equal(t, expected, m.Metrics())
}
type mockOutput struct {
sync.Mutex
@@ -263,3 +516,31 @@ func (m *mockOutput) Metrics() []telegraf.Metric {
defer m.Unlock()
return m.metrics
}
type perfOutput struct {
// if true, mock a write failure
failWrite bool
}
func (m *perfOutput) Connect() error {
return nil
}
func (m *perfOutput) Close() error {
return nil
}
func (m *perfOutput) Description() string {
return ""
}
func (m *perfOutput) SampleConfig() string {
return ""
}
func (m *perfOutput) Write(metrics []telegraf.Metric) error {
if m.failWrite {
return fmt.Errorf("Failed Write!")
}
return nil
}

58
logger/logger.go Normal file
View File

@@ -0,0 +1,58 @@
package logger
import (
"io"
"log"
"os"
"github.com/influxdata/wlog"
)
// newTelegrafWriter returns a logging-wrapped writer.
func newTelegrafWriter(w io.Writer) io.Writer {
return &telegrafLog{
writer: wlog.NewWriter(w),
}
}
type telegrafLog struct {
writer io.Writer
}
func (t *telegrafLog) Write(p []byte) (n int, err error) {
return t.writer.Write(p)
}
// SetupLogging configures the logging output.
// debug will set the log level to DEBUG
// quiet will set the log level to ERROR
// logfile will direct the logging output to a file. Empty string is
// interpreted as stdout. If there is an error opening the file the
// logger will fallback to stdout.
func SetupLogging(debug, quiet bool, logfile string) {
if debug {
wlog.SetLevel(wlog.DEBUG)
}
if quiet {
wlog.SetLevel(wlog.ERROR)
}
var oFile *os.File
if logfile != "" {
if _, err := os.Stat(logfile); os.IsNotExist(err) {
if oFile, err = os.Create(logfile); err != nil {
log.Printf("E! Unable to create %s (%s), using stdout", logfile, err)
oFile = os.Stdout
}
} else {
if oFile, err = os.OpenFile(logfile, os.O_APPEND|os.O_WRONLY, os.ModeAppend); err != nil {
log.Printf("E! Unable to append to %s (%s), using stdout", logfile, err)
oFile = os.Stdout
}
}
} else {
oFile = os.Stdout
}
log.SetOutput(newTelegrafWriter(oFile))
}

View File

@@ -6,6 +6,17 @@ import (
"github.com/influxdata/influxdb/client/v2"
)
// ValueType is an enumeration of metric types that represent a simple value.
type ValueType int
// Possible values for the ValueType enum.
const (
_ ValueType = iota
Counter
Gauge
Untyped
)
type Metric interface {
// Name returns the measurement name of the metric
Name() string
@@ -16,6 +27,9 @@ type Metric interface {
// Time return the timestamp for the metric
Time() time.Time
// Type returns the metric type. Can be either telegraf.Gauge or telegraf.Counter
Type() ValueType
// UnixNano returns the unix nano time of the metric
UnixNano() int64
@@ -35,29 +49,62 @@ type Metric interface {
// metric is a wrapper of the influxdb client.Point struct
type metric struct {
pt *client.Point
mType ValueType
}
// NewMetric returns a metric with the given timestamp. If a timestamp is not
// given, then data is sent to the database without a timestamp, in which case
// the server will assign local time upon reception. NOTE: it is recommended to
// send data with a timestamp.
// NewMetric returns an untyped metric.
func NewMetric(
name string,
tags map[string]string,
fields map[string]interface{},
t ...time.Time,
t time.Time,
) (Metric, error) {
var T time.Time
if len(t) > 0 {
T = t[0]
}
pt, err := client.NewPoint(name, tags, fields, T)
pt, err := client.NewPoint(name, tags, fields, t)
if err != nil {
return nil, err
}
return &metric{
pt: pt,
pt: pt,
mType: Untyped,
}, nil
}
// NewGaugeMetric returns a gauge metric.
// Gauge metrics should be used when the metric is can arbitrarily go up and
// down. ie, temperature, memory usage, cpu usage, etc.
func NewGaugeMetric(
name string,
tags map[string]string,
fields map[string]interface{},
t time.Time,
) (Metric, error) {
pt, err := client.NewPoint(name, tags, fields, t)
if err != nil {
return nil, err
}
return &metric{
pt: pt,
mType: Gauge,
}, nil
}
// NewCounterMetric returns a Counter metric.
// Counter metrics should be used when the metric being created is an
// always-increasing counter. ie, net bytes received, requests served, errors, etc.
func NewCounterMetric(
name string,
tags map[string]string,
fields map[string]interface{},
t time.Time,
) (Metric, error) {
pt, err := client.NewPoint(name, tags, fields, t)
if err != nil {
return nil, err
}
return &metric{
pt: pt,
mType: Counter,
}, nil
}
@@ -73,6 +120,10 @@ func (m *metric) Time() time.Time {
return m.pt.Time()
}
func (m *metric) Type() ValueType {
return m.mType
}
func (m *metric) UnixNano() int64 {
return m.pt.UnixNano()
}

View File

@@ -23,6 +23,51 @@ func TestNewMetric(t *testing.T) {
m, err := NewMetric("cpu", tags, fields, now)
assert.NoError(t, err)
assert.Equal(t, Untyped, m.Type())
assert.Equal(t, tags, m.Tags())
assert.Equal(t, fields, m.Fields())
assert.Equal(t, "cpu", m.Name())
assert.Equal(t, now, m.Time())
assert.Equal(t, now.UnixNano(), m.UnixNano())
}
func TestNewGaugeMetric(t *testing.T) {
now := time.Now()
tags := map[string]string{
"host": "localhost",
"datacenter": "us-east-1",
}
fields := map[string]interface{}{
"usage_idle": float64(99),
"usage_busy": float64(1),
}
m, err := NewGaugeMetric("cpu", tags, fields, now)
assert.NoError(t, err)
assert.Equal(t, Gauge, m.Type())
assert.Equal(t, tags, m.Tags())
assert.Equal(t, fields, m.Fields())
assert.Equal(t, "cpu", m.Name())
assert.Equal(t, now, m.Time())
assert.Equal(t, now.UnixNano(), m.UnixNano())
}
func TestNewCounterMetric(t *testing.T) {
now := time.Now()
tags := map[string]string{
"host": "localhost",
"datacenter": "us-east-1",
}
fields := map[string]interface{}{
"usage_idle": float64(99),
"usage_busy": float64(1),
}
m, err := NewCounterMetric("cpu", tags, fields, now)
assert.NoError(t, err)
assert.Equal(t, Counter, m.Type())
assert.Equal(t, tags, m.Tags())
assert.Equal(t, fields, m.Fields())
assert.Equal(t, "cpu", m.Name())
@@ -51,23 +96,6 @@ func TestNewMetricString(t *testing.T) {
assert.Equal(t, lineProtoPrecision, m.PrecisionString("s"))
}
func TestNewMetricStringNoTime(t *testing.T) {
tags := map[string]string{
"host": "localhost",
}
fields := map[string]interface{}{
"usage_idle": float64(99),
}
m, err := NewMetric("cpu", tags, fields)
assert.NoError(t, err)
lineProto := fmt.Sprintf("cpu,host=localhost usage_idle=99")
assert.Equal(t, lineProto, m.String())
lineProtoPrecision := fmt.Sprintf("cpu,host=localhost usage_idle=99")
assert.Equal(t, lineProtoPrecision, m.PrecisionString("s"))
}
func TestNewMetricFailNaN(t *testing.T) {
now := time.Now()

View File

@@ -27,6 +27,14 @@ The example plugin gathers metrics about example things
- tag2
- measurement2 has the following tags:
- tag3
### Sample Queries:
These are some useful queries (to generate dashboards or other) to run against data from this plugin:
```
SELECT max(field1), mean(field1), min(field1) FROM measurement1 WHERE tag1=bar AND time > now() - 1h GROUP BY tag
```
### Example Output:

File diff suppressed because one or more lines are too long

View File

@@ -1,104 +1,21 @@
package aerospike
import (
"bytes"
"encoding/binary"
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"errors"
"log"
"net"
"strconv"
"strings"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/errchan"
"github.com/influxdata/telegraf/plugins/inputs"
as "github.com/aerospike/aerospike-client-go"
)
const (
MSG_HEADER_SIZE = 8
MSG_TYPE = 1 // Info is 1
MSG_VERSION = 2
)
var (
STATISTICS_COMMAND = []byte("statistics\n")
NAMESPACES_COMMAND = []byte("namespaces\n")
)
type aerospikeMessageHeader struct {
Version uint8
Type uint8
DataLen [6]byte
}
type aerospikeMessage struct {
aerospikeMessageHeader
Data []byte
}
// Taken from aerospike-client-go/types/message.go
func (msg *aerospikeMessage) Serialize() []byte {
msg.DataLen = msgLenToBytes(int64(len(msg.Data)))
buf := bytes.NewBuffer([]byte{})
binary.Write(buf, binary.BigEndian, msg.aerospikeMessageHeader)
binary.Write(buf, binary.BigEndian, msg.Data[:])
return buf.Bytes()
}
type aerospikeInfoCommand struct {
msg *aerospikeMessage
}
// Taken from aerospike-client-go/info.go
func (nfo *aerospikeInfoCommand) parseMultiResponse() (map[string]string, error) {
responses := make(map[string]string)
offset := int64(0)
begin := int64(0)
dataLen := int64(len(nfo.msg.Data))
// Create reusable StringBuilder for performance.
for offset < dataLen {
b := nfo.msg.Data[offset]
if b == '\t' {
name := nfo.msg.Data[begin:offset]
offset++
begin = offset
// Parse field value.
for offset < dataLen {
if nfo.msg.Data[offset] == '\n' {
break
}
offset++
}
if offset > begin {
value := nfo.msg.Data[begin:offset]
responses[string(name)] = string(value)
} else {
responses[string(name)] = ""
}
offset++
begin = offset
} else if b == '\n' {
if offset > begin {
name := nfo.msg.Data[begin:offset]
responses[string(name)] = ""
}
offset++
begin = offset
} else {
offset++
}
}
if offset > begin {
name := nfo.msg.Data[begin:offset]
responses[string(name)] = ""
}
return responses, nil
}
type Aerospike struct {
Servers []string
}
@@ -115,7 +32,7 @@ func (a *Aerospike) SampleConfig() string {
}
func (a *Aerospike) Description() string {
return "Read stats from an aerospike server"
return "Read stats from aerospike server(s)"
}
func (a *Aerospike) Gather(acc telegraf.Accumulator) error {
@@ -124,214 +41,114 @@ func (a *Aerospike) Gather(acc telegraf.Accumulator) error {
}
var wg sync.WaitGroup
var outerr error
errChan := errchan.New(len(a.Servers))
wg.Add(len(a.Servers))
for _, server := range a.Servers {
wg.Add(1)
go func(server string) {
go func(serv string) {
defer wg.Done()
outerr = a.gatherServer(server, acc)
errChan.C <- a.gatherServer(serv, acc)
}(server)
}
wg.Wait()
return outerr
return errChan.Error()
}
func (a *Aerospike) gatherServer(host string, acc telegraf.Accumulator) error {
aerospikeInfo, err := getMap(STATISTICS_COMMAND, host)
func (a *Aerospike) gatherServer(hostport string, acc telegraf.Accumulator) error {
host, port, err := net.SplitHostPort(hostport)
if err != nil {
return fmt.Errorf("Aerospike info failed: %s", err)
return err
}
readAerospikeStats(aerospikeInfo, acc, host, "")
namespaces, err := getList(NAMESPACES_COMMAND, host)
iport, err := strconv.Atoi(port)
if err != nil {
return fmt.Errorf("Aerospike namespace list failed: %s", err)
iport = 3000
}
for ix := range namespaces {
nsInfo, err := getMap([]byte("namespace/"+namespaces[ix]+"\n"), host)
if err != nil {
return fmt.Errorf("Aerospike namespace '%s' query failed: %s", namespaces[ix], err)
c, err := as.NewClient(host, iport)
if err != nil {
return err
}
defer c.Close()
nodes := c.GetNodes()
for _, n := range nodes {
tags := map[string]string{
"aerospike_host": hostport,
}
fields := map[string]interface{}{
"node_name": n.GetName(),
}
stats, err := as.RequestNodeStats(n)
if err != nil {
return err
}
for k, v := range stats {
val, err := parseValue(v)
if err == nil {
fields[strings.Replace(k, "-", "_", -1)] = val
} else {
log.Printf("I! skipping aerospike field %v with int64 overflow", k)
}
}
acc.AddFields("aerospike_node", fields, tags, time.Now())
info, err := as.RequestNodeInfo(n, "namespaces")
if err != nil {
return err
}
namespaces := strings.Split(info["namespaces"], ";")
for _, namespace := range namespaces {
nTags := map[string]string{
"aerospike_host": hostport,
}
nTags["namespace"] = namespace
nFields := map[string]interface{}{
"node_name": n.GetName(),
}
info, err := as.RequestNodeInfo(n, "namespace/"+namespace)
if err != nil {
continue
}
stats := strings.Split(info["namespace/"+namespace], ";")
for _, stat := range stats {
parts := strings.Split(stat, "=")
if len(parts) < 2 {
continue
}
val, err := parseValue(parts[1])
if err == nil {
nFields[strings.Replace(parts[0], "-", "_", -1)] = val
} else {
log.Printf("I! skipping aerospike field %v with int64 overflow", parts[0])
}
}
acc.AddFields("aerospike_namespace", nFields, nTags, time.Now())
}
readAerospikeStats(nsInfo, acc, host, namespaces[ix])
}
return nil
}
func getMap(key []byte, host string) (map[string]string, error) {
data, err := get(key, host)
if err != nil {
return nil, fmt.Errorf("Failed to get data: %s", err)
func parseValue(v string) (interface{}, error) {
if parsed, err := strconv.ParseInt(v, 10, 64); err == nil {
return parsed, nil
} else if _, err := strconv.ParseUint(v, 10, 64); err == nil {
// int64 overflow, yet valid uint64
return nil, errors.New("Number is too large")
} else if parsed, err := strconv.ParseBool(v); err == nil {
return parsed, nil
} else {
return v, nil
}
parsed, err := unmarshalMapInfo(data, string(key))
if err != nil {
return nil, fmt.Errorf("Failed to unmarshal data: %s", err)
}
return parsed, nil
}
func getList(key []byte, host string) ([]string, error) {
data, err := get(key, host)
if err != nil {
return nil, fmt.Errorf("Failed to get data: %s", err)
func copyTags(m map[string]string) map[string]string {
out := make(map[string]string)
for k, v := range m {
out[k] = v
}
parsed, err := unmarshalListInfo(data, string(key))
if err != nil {
return nil, fmt.Errorf("Failed to unmarshal data: %s", err)
}
return parsed, nil
}
func get(key []byte, host string) (map[string]string, error) {
var err error
var data map[string]string
asInfo := &aerospikeInfoCommand{
msg: &aerospikeMessage{
aerospikeMessageHeader: aerospikeMessageHeader{
Version: uint8(MSG_VERSION),
Type: uint8(MSG_TYPE),
DataLen: msgLenToBytes(int64(len(key))),
},
Data: key,
},
}
cmd := asInfo.msg.Serialize()
addr, err := net.ResolveTCPAddr("tcp", host)
if err != nil {
return data, fmt.Errorf("Lookup failed for '%s': %s", host, err)
}
conn, err := net.DialTCP("tcp", nil, addr)
if err != nil {
return data, fmt.Errorf("Connection failed for '%s': %s", host, err)
}
defer conn.Close()
_, err = conn.Write(cmd)
if err != nil {
return data, fmt.Errorf("Failed to send to '%s': %s", host, err)
}
msgHeader := bytes.NewBuffer(make([]byte, MSG_HEADER_SIZE))
_, err = readLenFromConn(conn, msgHeader.Bytes(), MSG_HEADER_SIZE)
if err != nil {
return data, fmt.Errorf("Failed to read header: %s", err)
}
err = binary.Read(msgHeader, binary.BigEndian, &asInfo.msg.aerospikeMessageHeader)
if err != nil {
return data, fmt.Errorf("Failed to unmarshal header: %s", err)
}
msgLen := msgLenFromBytes(asInfo.msg.aerospikeMessageHeader.DataLen)
if int64(len(asInfo.msg.Data)) != msgLen {
asInfo.msg.Data = make([]byte, msgLen)
}
_, err = readLenFromConn(conn, asInfo.msg.Data, len(asInfo.msg.Data))
if err != nil {
return data, fmt.Errorf("Failed to read from connection to '%s': %s", host, err)
}
data, err = asInfo.parseMultiResponse()
if err != nil {
return data, fmt.Errorf("Failed to parse response from '%s': %s", host, err)
}
return data, err
}
func readAerospikeStats(
stats map[string]string,
acc telegraf.Accumulator,
host string,
namespace string,
) {
fields := make(map[string]interface{})
tags := map[string]string{
"aerospike_host": host,
"namespace": "_service",
}
if namespace != "" {
tags["namespace"] = namespace
}
for key, value := range stats {
// We are going to ignore all string based keys
val, err := strconv.ParseInt(value, 10, 64)
if err == nil {
if strings.Contains(key, "-") {
key = strings.Replace(key, "-", "_", -1)
}
fields[key] = val
}
}
acc.AddFields("aerospike", fields, tags)
}
func unmarshalMapInfo(infoMap map[string]string, key string) (map[string]string, error) {
key = strings.TrimSuffix(key, "\n")
res := map[string]string{}
v, exists := infoMap[key]
if !exists {
return res, fmt.Errorf("Key '%s' missing from info", key)
}
values := strings.Split(v, ";")
for i := range values {
kv := strings.Split(values[i], "=")
if len(kv) > 1 {
res[kv[0]] = kv[1]
}
}
return res, nil
}
func unmarshalListInfo(infoMap map[string]string, key string) ([]string, error) {
key = strings.TrimSuffix(key, "\n")
v, exists := infoMap[key]
if !exists {
return []string{}, fmt.Errorf("Key '%s' missing from info", key)
}
values := strings.Split(v, ";")
return values, nil
}
func readLenFromConn(c net.Conn, buffer []byte, length int) (total int, err error) {
var r int
for total < length {
r, err = c.Read(buffer[total:length])
total += r
if err != nil {
break
}
}
return
}
// Taken from aerospike-client-go/types/message.go
func msgLenToBytes(DataLen int64) [6]byte {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, uint64(DataLen))
res := [6]byte{}
copy(res[:], b[2:])
return res
}
// Taken from aerospike-client-go/types/message.go
func msgLenFromBytes(buf [6]byte) int64 {
nbytes := append([]byte{0, 0}, buf[:]...)
DataLen := binary.BigEndian.Uint64(nbytes)
return int64(DataLen)
return out
}
func init() {

View File

@@ -1,7 +1,6 @@
package aerospike
import (
"reflect"
"testing"
"github.com/influxdata/telegraf/testutil"
@@ -11,7 +10,7 @@ import (
func TestAerospikeStatistics(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
t.Skip("Skipping aerospike integration tests.")
}
a := &Aerospike{
@@ -23,96 +22,46 @@ func TestAerospikeStatistics(t *testing.T) {
err := a.Gather(&acc)
require.NoError(t, err)
// Only use a few of the metrics
asMetrics := []string{
"transactions",
"stat_write_errs",
"stat_read_reqs",
"stat_write_reqs",
}
for _, metric := range asMetrics {
assert.True(t, acc.HasIntField("aerospike", metric), metric)
}
assert.True(t, acc.HasMeasurement("aerospike_node"))
assert.True(t, acc.HasMeasurement("aerospike_namespace"))
assert.True(t, acc.HasIntField("aerospike_node", "batch_error"))
}
func TestAerospikeMsgLenFromToBytes(t *testing.T) {
var i int64 = 8
assert.True(t, i == msgLenFromBytes(msgLenToBytes(i)))
}
func TestAerospikeStatisticsPartialErr(t *testing.T) {
if testing.Short() {
t.Skip("Skipping aerospike integration tests.")
}
a := &Aerospike{
Servers: []string{
testutil.GetLocalHost() + ":3000",
testutil.GetLocalHost() + ":9999",
},
}
func TestReadAerospikeStatsNoNamespace(t *testing.T) {
// Also test for re-writing
var acc testutil.Accumulator
stats := map[string]string{
"stat-write-errs": "12345",
"stat_read_reqs": "12345",
}
readAerospikeStats(stats, &acc, "host1", "")
fields := map[string]interface{}{
"stat_write_errs": int64(12345),
"stat_read_reqs": int64(12345),
}
tags := map[string]string{
"aerospike_host": "host1",
"namespace": "_service",
}
acc.AssertContainsTaggedFields(t, "aerospike", fields, tags)
err := a.Gather(&acc)
require.Error(t, err)
assert.True(t, acc.HasMeasurement("aerospike_node"))
assert.True(t, acc.HasMeasurement("aerospike_namespace"))
assert.True(t, acc.HasIntField("aerospike_node", "batch_error"))
}
func TestReadAerospikeStatsNamespace(t *testing.T) {
var acc testutil.Accumulator
stats := map[string]string{
"stat_write_errs": "12345",
"stat_read_reqs": "12345",
}
readAerospikeStats(stats, &acc, "host1", "test")
func TestAerospikeParseValue(t *testing.T) {
// uint64 with value bigger than int64 max
val, err := parseValue("18446744041841121751")
assert.Nil(t, val)
assert.Error(t, err)
fields := map[string]interface{}{
"stat_write_errs": int64(12345),
"stat_read_reqs": int64(12345),
}
tags := map[string]string{
"aerospike_host": "host1",
"namespace": "test",
}
acc.AssertContainsTaggedFields(t, "aerospike", fields, tags)
}
func TestAerospikeUnmarshalList(t *testing.T) {
i := map[string]string{
"test": "one;two;three",
}
expected := []string{"one", "two", "three"}
list, err := unmarshalListInfo(i, "test2")
assert.True(t, err != nil)
list, err = unmarshalListInfo(i, "test")
assert.True(t, err == nil)
equal := true
for ix := range expected {
if list[ix] != expected[ix] {
equal = false
break
}
}
assert.True(t, equal)
}
func TestAerospikeUnmarshalMap(t *testing.T) {
i := map[string]string{
"test": "key1=value1;key2=value2",
}
expected := map[string]string{
"key1": "value1",
"key2": "value2",
}
m, err := unmarshalMapInfo(i, "test")
assert.True(t, err == nil)
assert.True(t, reflect.DeepEqual(m, expected))
// int values
val, err = parseValue("42")
assert.NoError(t, err)
assert.Equal(t, val, int64(42), "must be parsed as int")
// string values
val, err = parseValue("BB977942A2CA502")
assert.NoError(t, err)
assert.Equal(t, val, `BB977942A2CA502`, "must be left as string")
}

View File

@@ -4,6 +4,14 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/aerospike"
_ "github.com/influxdata/telegraf/plugins/inputs/apache"
_ "github.com/influxdata/telegraf/plugins/inputs/bcache"
_ "github.com/influxdata/telegraf/plugins/inputs/cassandra"
_ "github.com/influxdata/telegraf/plugins/inputs/ceph"
_ "github.com/influxdata/telegraf/plugins/inputs/cgroup"
_ "github.com/influxdata/telegraf/plugins/inputs/chrony"
_ "github.com/influxdata/telegraf/plugins/inputs/cloudwatch"
_ "github.com/influxdata/telegraf/plugins/inputs/conntrack"
_ "github.com/influxdata/telegraf/plugins/inputs/consul"
_ "github.com/influxdata/telegraf/plugins/inputs/couchbase"
_ "github.com/influxdata/telegraf/plugins/inputs/couchdb"
_ "github.com/influxdata/telegraf/plugins/inputs/disque"
_ "github.com/influxdata/telegraf/plugins/inputs/dns_query"
@@ -11,13 +19,20 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/dovecot"
_ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch"
_ "github.com/influxdata/telegraf/plugins/inputs/exec"
_ "github.com/influxdata/telegraf/plugins/inputs/github_webhooks"
_ "github.com/influxdata/telegraf/plugins/inputs/filestat"
_ "github.com/influxdata/telegraf/plugins/inputs/graylog"
_ "github.com/influxdata/telegraf/plugins/inputs/haproxy"
_ "github.com/influxdata/telegraf/plugins/inputs/hddtemp"
_ "github.com/influxdata/telegraf/plugins/inputs/http_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/http_response"
_ "github.com/influxdata/telegraf/plugins/inputs/httpjson"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb"
_ "github.com/influxdata/telegraf/plugins/inputs/ipmi_sensor"
_ "github.com/influxdata/telegraf/plugins/inputs/iptables"
_ "github.com/influxdata/telegraf/plugins/inputs/jolokia"
_ "github.com/influxdata/telegraf/plugins/inputs/kafka_consumer"
_ "github.com/influxdata/telegraf/plugins/inputs/leofs"
_ "github.com/influxdata/telegraf/plugins/inputs/logparser"
_ "github.com/influxdata/telegraf/plugins/inputs/lustre2"
_ "github.com/influxdata/telegraf/plugins/inputs/mailchimp"
_ "github.com/influxdata/telegraf/plugins/inputs/memcached"
@@ -29,11 +44,14 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/net_response"
_ "github.com/influxdata/telegraf/plugins/inputs/nginx"
_ "github.com/influxdata/telegraf/plugins/inputs/nsq"
_ "github.com/influxdata/telegraf/plugins/inputs/nsq_consumer"
_ "github.com/influxdata/telegraf/plugins/inputs/nstat"
_ "github.com/influxdata/telegraf/plugins/inputs/ntpq"
_ "github.com/influxdata/telegraf/plugins/inputs/passenger"
_ "github.com/influxdata/telegraf/plugins/inputs/phpfpm"
_ "github.com/influxdata/telegraf/plugins/inputs/ping"
_ "github.com/influxdata/telegraf/plugins/inputs/postgresql"
_ "github.com/influxdata/telegraf/plugins/inputs/postgresql_extensible"
_ "github.com/influxdata/telegraf/plugins/inputs/powerdns"
_ "github.com/influxdata/telegraf/plugins/inputs/procstat"
_ "github.com/influxdata/telegraf/plugins/inputs/prometheus"
@@ -45,13 +63,18 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/riak"
_ "github.com/influxdata/telegraf/plugins/inputs/sensors"
_ "github.com/influxdata/telegraf/plugins/inputs/snmp"
_ "github.com/influxdata/telegraf/plugins/inputs/snmp_legacy"
_ "github.com/influxdata/telegraf/plugins/inputs/sqlserver"
_ "github.com/influxdata/telegraf/plugins/inputs/statsd"
_ "github.com/influxdata/telegraf/plugins/inputs/sysstat"
_ "github.com/influxdata/telegraf/plugins/inputs/system"
_ "github.com/influxdata/telegraf/plugins/inputs/tail"
_ "github.com/influxdata/telegraf/plugins/inputs/tcp_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/trig"
_ "github.com/influxdata/telegraf/plugins/inputs/twemproxy"
_ "github.com/influxdata/telegraf/plugins/inputs/udp_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/varnish"
_ "github.com/influxdata/telegraf/plugins/inputs/webhooks"
_ "github.com/influxdata/telegraf/plugins/inputs/win_perf_counters"
_ "github.com/influxdata/telegraf/plugins/inputs/zfs"
_ "github.com/influxdata/telegraf/plugins/inputs/zookeeper"

View File

@@ -1,7 +1,7 @@
# Telegraf plugin: Apache
#### Plugin arguments:
- **urls** []string: List of apache-status URLs to collect from.
- **urls** []string: List of apache-status URLs to collect from. Default is "http://localhost/server-status?auto".
#### Description

View File

@@ -8,7 +8,6 @@ import (
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/influxdata/telegraf"
@@ -21,6 +20,7 @@ type Apache struct {
var sampleConfig = `
## An array of Apache status URI to gather stats.
## Default is "http://localhost/server-status?auto".
urls = ["http://localhost/server-status?auto"]
`
@@ -33,8 +33,12 @@ func (n *Apache) Description() string {
}
func (n *Apache) Gather(acc telegraf.Accumulator) error {
var wg sync.WaitGroup
if len(n.Urls) == 0 {
n.Urls = []string{"http://localhost/server-status?auto"}
}
var outerr error
var errch = make(chan error)
for _, u := range n.Urls {
addr, err := url.Parse(u)
@@ -42,14 +46,17 @@ func (n *Apache) Gather(acc telegraf.Accumulator) error {
return fmt.Errorf("Unable to parse address '%s': %s", u, err)
}
wg.Add(1)
go func(addr *url.URL) {
defer wg.Done()
outerr = n.gatherUrl(addr, acc)
errch <- n.gatherUrl(addr, acc)
}(addr)
}
wg.Wait()
// Drain channel, waiting for all requests to finish and save last error.
for range n.Urls {
if err := <-errch; err != nil {
outerr = err
}
}
return outerr
}

View File

@@ -36,7 +36,8 @@ func TestHTTPApache(t *testing.T) {
defer ts.Close()
a := Apache{
Urls: []string{ts.URL},
// Fetch it 2 times to catch possible data races.
Urls: []string{ts.URL, ts.URL},
}
var acc testutil.Accumulator

View File

@@ -0,0 +1,125 @@
# Telegraf plugin: Cassandra
#### Plugin arguments:
- **context** string: Context root used for jolokia url
- **servers** []string: List of servers with the format "<user:passwd@><host>:port"
- **metrics** []string: List of Jmx paths that identify mbeans attributes
#### Description
The Cassandra plugin collects Cassandra/JVM metrics exposed as MBean's attributes through jolokia REST endpoint. All metrics are collected for each server configured.
See: https://jolokia.org/ and [Cassandra Documentation](http://docs.datastax.com/en/cassandra/3.x/cassandra/operations/monitoringCassandraTOC.html)
# Measurements:
Cassandra plugin produces one or more measurements for each metric configured, adding Server's name as `host` tag. More than one measurement is generated when querying table metrics with a wildcard for the keyspace or table name.
Given a configuration like:
```toml
[[inputs.cassandra]]
context = "/jolokia/read"
servers = [":8778"]
metrics = ["/java.lang:type=Memory/HeapMemoryUsage"]
```
The collected metrics will be:
```
javaMemory,host=myHost,mname=HeapMemoryUsage HeapMemoryUsage_committed=1040187392,HeapMemoryUsage_init=1050673152,HeapMemoryUsage_max=1040187392,HeapMemoryUsage_used=368155000 1459551767230567084
```
# Useful Metrics:
Here is a list of metrics that might be useful to monitor your cassandra cluster. This was put together from multiple sources on the web.
- [How to monitor Cassandra performance metrics](https://www.datadoghq.com/blog/how-to-monitor-cassandra-performance-metrics)
- [Cassandra Documentation](http://docs.datastax.com/en/cassandra/3.x/cassandra/operations/monitoringCassandraTOC.html)
####measurement = javaGarbageCollector
- /java.lang:type=GarbageCollector,name=ConcurrentMarkSweep/CollectionTime
- /java.lang:type=GarbageCollector,name=ConcurrentMarkSweep/CollectionCount
- /java.lang:type=GarbageCollector,name=ParNew/CollectionTime
- /java.lang:type=GarbageCollector,name=ParNew/CollectionCount
####measurement = javaMemory
- /java.lang:type=Memory/HeapMemoryUsage
- /java.lang:type=Memory/NonHeapMemoryUsage
####measurement = cassandraCache
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Hit
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Requests
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Entries
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Size
- /org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Capacity
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Hit
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Requests
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Entries
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Size
- /org.apache.cassandra.metrics:type=Cache,scope=RowCache,name=Capacity
####measurement = cassandraClient
- /org.apache.cassandra.metrics:type=Client,name=connectedNativeClients
####measurement = cassandraClientRequest
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=TotalLatency
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=TotalLatency
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=Latency
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=Latency
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=Timeouts
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=Timeouts
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=Unavailables
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=Unavailables
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Read,name=Failures
- /org.apache.cassandra.metrics:type=ClientRequest,scope=Write,name=Failures
####measurement = cassandraCommitLog
- /org.apache.cassandra.metrics:type=CommitLog,name=PendingTasks
- /org.apache.cassandra.metrics:type=CommitLog,name=TotalCommitLogSize
####measurement = cassandraCompaction
- /org.apache.cassandra.metrics:type=Compaction,name=CompletedTask
- /org.apache.cassandra.metrics:type=Compaction,name=PendingTasks
- /org.apache.cassandra.metrics:type=Compaction,name=TotalCompactionsCompleted
- /org.apache.cassandra.metrics:type=Compaction,name=BytesCompacted
####measurement = cassandraStorage
- /org.apache.cassandra.metrics:type=Storage,name=Load
- /org.apache.cassandra.metrics:type=Storage,name=Exceptions
####measurement = cassandraTable
Using wildcards for "keyspace" and "scope" can create a lot of series as metrics will be reported for every table and keyspace including internal system tables. Specify a keyspace name and/or a table name to limit them.
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=LiveDiskSpaceUsed
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=TotalDiskSpaceUsed
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=ReadLatency
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=CoordinatorReadLatency
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=WriteLatency
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=ReadTotalLatency
- /org.apache.cassandra.metrics:type=Table,keyspace=\*,scope=\*,name=WriteTotalLatency
####measurement = cassandraThreadPools
- /org.apache.cassandra.metrics:type=ThreadPools,path=internal,scope=CompactionExecutor,name=ActiveTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=internal,scope=AntiEntropyStage,name=ActiveTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=CounterMutationStage,name=PendingTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=CounterMutationStage,name=CurrentlyBlockedTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=MutationStage,name=PendingTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=MutationStage,name=CurrentlyBlockedTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=ReadRepairStage,name=PendingTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=ReadRepairStage,name=CurrentlyBlockedTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=ReadStage,name=PendingTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=ReadStage,name=CurrentlyBlockedTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=RequestResponseStage,name=PendingTasks
- /org.apache.cassandra.metrics:type=ThreadPools,path=request,scope=RequestResponseStage,name=CurrentlyBlockedTasks

View File

@@ -0,0 +1,309 @@
package cassandra
import (
"encoding/json"
"errors"
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"io/ioutil"
"log"
"net/http"
"net/url"
"strings"
)
type JolokiaClient interface {
MakeRequest(req *http.Request) (*http.Response, error)
}
type JolokiaClientImpl struct {
client *http.Client
}
func (c JolokiaClientImpl) MakeRequest(req *http.Request) (*http.Response, error) {
return c.client.Do(req)
}
type Cassandra struct {
jClient JolokiaClient
Context string
Servers []string
Metrics []string
}
type javaMetric struct {
host string
metric string
acc telegraf.Accumulator
}
type cassandraMetric struct {
host string
metric string
acc telegraf.Accumulator
}
type jmxMetric interface {
addTagsFields(out map[string]interface{})
}
func newJavaMetric(host string, metric string,
acc telegraf.Accumulator) *javaMetric {
return &javaMetric{host: host, metric: metric, acc: acc}
}
func newCassandraMetric(host string, metric string,
acc telegraf.Accumulator) *cassandraMetric {
return &cassandraMetric{host: host, metric: metric, acc: acc}
}
func addValuesAsFields(values map[string]interface{}, fields map[string]interface{},
mname string) {
for k, v := range values {
if v != nil {
fields[mname+"_"+k] = v
}
}
}
func parseJmxMetricRequest(mbean string) map[string]string {
tokens := make(map[string]string)
classAndPairs := strings.Split(mbean, ":")
if classAndPairs[0] == "org.apache.cassandra.metrics" {
tokens["class"] = "cassandra"
} else if classAndPairs[0] == "java.lang" {
tokens["class"] = "java"
} else {
return tokens
}
pairs := strings.Split(classAndPairs[1], ",")
for _, pair := range pairs {
p := strings.Split(pair, "=")
tokens[p[0]] = p[1]
}
return tokens
}
func addTokensToTags(tokens map[string]string, tags map[string]string) {
for k, v := range tokens {
if k == "name" {
tags["mname"] = v // name seems to a reserved word in influxdb
} else if k == "class" || k == "type" {
continue // class and type are used in the metric name
} else {
tags[k] = v
}
}
}
func (j javaMetric) addTagsFields(out map[string]interface{}) {
tags := make(map[string]string)
fields := make(map[string]interface{})
a := out["request"].(map[string]interface{})
attribute := a["attribute"].(string)
mbean := a["mbean"].(string)
tokens := parseJmxMetricRequest(mbean)
addTokensToTags(tokens, tags)
tags["cassandra_host"] = j.host
if _, ok := tags["mname"]; !ok {
//Queries for a single value will not return a "name" tag in the response.
tags["mname"] = attribute
}
if values, ok := out["value"]; ok {
switch t := values.(type) {
case map[string]interface{}:
addValuesAsFields(values.(map[string]interface{}), fields, attribute)
case interface{}:
fields[attribute] = t
}
j.acc.AddFields(tokens["class"]+tokens["type"], fields, tags)
} else {
fmt.Printf("Missing key 'value' in '%s' output response\n%v\n",
j.metric, out)
}
}
func addCassandraMetric(mbean string, c cassandraMetric,
values map[string]interface{}) {
tags := make(map[string]string)
fields := make(map[string]interface{})
tokens := parseJmxMetricRequest(mbean)
addTokensToTags(tokens, tags)
tags["cassandra_host"] = c.host
addValuesAsFields(values, fields, tags["mname"])
c.acc.AddFields(tokens["class"]+tokens["type"], fields, tags)
}
func (c cassandraMetric) addTagsFields(out map[string]interface{}) {
r := out["request"]
tokens := parseJmxMetricRequest(r.(map[string]interface{})["mbean"].(string))
// Requests with wildcards for keyspace or table names will return nested
// maps in the json response
if (tokens["type"] == "Table" || tokens["type"] == "ColumnFamily") && (tokens["keyspace"] == "*" ||
tokens["scope"] == "*") {
if valuesMap, ok := out["value"]; ok {
for k, v := range valuesMap.(map[string]interface{}) {
addCassandraMetric(k, c, v.(map[string]interface{}))
}
} else {
fmt.Printf("Missing key 'value' in '%s' output response\n%v\n",
c.metric, out)
return
}
} else {
if values, ok := out["value"]; ok {
addCassandraMetric(r.(map[string]interface{})["mbean"].(string),
c, values.(map[string]interface{}))
} else {
fmt.Printf("Missing key 'value' in '%s' output response\n%v\n",
c.metric, out)
return
}
}
}
func (j *Cassandra) SampleConfig() string {
return `
# This is the context root used to compose the jolokia url
context = "/jolokia/read"
## List of cassandra servers exposing jolokia read service
servers = ["myuser:mypassword@10.10.10.1:8778","10.10.10.2:8778",":8778"]
## List of metrics collected on above servers
## Each metric consists of a jmx path.
## This will collect all heap memory usage metrics from the jvm and
## ReadLatency metrics for all keyspaces and tables.
## "type=Table" in the query works with Cassandra3.0. Older versions might
## need to use "type=ColumnFamily"
metrics = [
"/java.lang:type=Memory/HeapMemoryUsage",
"/org.apache.cassandra.metrics:type=Table,keyspace=*,scope=*,name=ReadLatency"
]
`
}
func (j *Cassandra) Description() string {
return "Read Cassandra metrics through Jolokia"
}
func (j *Cassandra) getAttr(requestUrl *url.URL) (map[string]interface{}, error) {
// Create + send request
req, err := http.NewRequest("GET", requestUrl.String(), nil)
if err != nil {
return nil, err
}
resp, err := j.jClient.MakeRequest(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Process response
if resp.StatusCode != http.StatusOK {
err = fmt.Errorf("Response from url \"%s\" has status code %d (%s), expected %d (%s)",
requestUrl,
resp.StatusCode,
http.StatusText(resp.StatusCode),
http.StatusOK,
http.StatusText(http.StatusOK))
return nil, err
}
// read body
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
// Unmarshal json
var jsonOut map[string]interface{}
if err = json.Unmarshal([]byte(body), &jsonOut); err != nil {
return nil, errors.New("Error decoding JSON response")
}
return jsonOut, nil
}
func parseServerTokens(server string) map[string]string {
serverTokens := make(map[string]string)
hostAndUser := strings.Split(server, "@")
hostPort := ""
userPasswd := ""
if len(hostAndUser) == 2 {
hostPort = hostAndUser[1]
userPasswd = hostAndUser[0]
} else {
hostPort = hostAndUser[0]
}
hostTokens := strings.Split(hostPort, ":")
serverTokens["host"] = hostTokens[0]
serverTokens["port"] = hostTokens[1]
if userPasswd != "" {
userTokens := strings.Split(userPasswd, ":")
serverTokens["user"] = userTokens[0]
serverTokens["passwd"] = userTokens[1]
}
return serverTokens
}
func (c *Cassandra) Gather(acc telegraf.Accumulator) error {
context := c.Context
servers := c.Servers
metrics := c.Metrics
for _, server := range servers {
for _, metric := range metrics {
serverTokens := parseServerTokens(server)
var m jmxMetric
if strings.HasPrefix(metric, "/java.lang:") {
m = newJavaMetric(serverTokens["host"], metric, acc)
} else if strings.HasPrefix(metric,
"/org.apache.cassandra.metrics:") {
m = newCassandraMetric(serverTokens["host"], metric, acc)
} else {
// unsupported metric type
log.Printf("I! Unsupported Cassandra metric [%s], skipping",
metric)
continue
}
// Prepare URL
requestUrl, err := url.Parse("http://" + serverTokens["host"] + ":" +
serverTokens["port"] + context + metric)
if err != nil {
return err
}
if serverTokens["user"] != "" && serverTokens["passwd"] != "" {
requestUrl.User = url.UserPassword(serverTokens["user"],
serverTokens["passwd"])
}
fmt.Printf("host %s url %s\n", serverTokens["host"], requestUrl)
out, err := c.getAttr(requestUrl)
if out["status"] != 200.0 {
fmt.Printf("URL returned with status %v\n", out["status"])
continue
}
m.addTagsFields(out)
}
}
return nil
}
func init() {
inputs.Add("cassandra", func() telegraf.Input {
return &Cassandra{jClient: &JolokiaClientImpl{client: &http.Client{}}}
})
}

View File

@@ -0,0 +1,286 @@
package cassandra
import (
_ "fmt"
"io/ioutil"
"net/http"
"strings"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
_ "github.com/stretchr/testify/require"
)
const validJavaMultiValueJSON = `
{
"request":{
"mbean":"java.lang:type=Memory",
"attribute":"HeapMemoryUsage",
"type":"read"
},
"value":{
"init":67108864,
"committed":456130560,
"max":477626368,
"used":203288528
},
"timestamp":1446129191,
"status":200
}`
const validCassandraMultiValueJSON = `
{
"request": {
"mbean": "org.apache.cassandra.metrics:keyspace=test_keyspace1,name=ReadLatency,scope=test_table,type=Table",
"type": "read"},
"status": 200,
"timestamp": 1458089229,
"value": {
"999thPercentile": 20.0,
"99thPercentile": 10.0,
"Count": 400,
"DurationUnit": "microseconds",
"Max": 30.0,
"Mean": null,
"MeanRate": 3.0,
"Min": 1.0,
"RateUnit": "events/second",
"StdDev": null
}
}`
const validCassandraNestedMultiValueJSON = `
{
"request": {
"mbean": "org.apache.cassandra.metrics:keyspace=test_keyspace1,name=ReadLatency,scope=*,type=Table",
"type": "read"},
"status": 200,
"timestamp": 1458089184,
"value": {
"org.apache.cassandra.metrics:keyspace=test_keyspace1,name=ReadLatency,scope=test_table1,type=Table":
{ "999thPercentile": 1.0,
"Count": 100,
"DurationUnit": "microseconds",
"OneMinuteRate": 1.0,
"RateUnit": "events/second",
"StdDev": null
},
"org.apache.cassandra.metrics:keyspace=test_keyspace2,name=ReadLatency,scope=test_table2,type=Table":
{ "999thPercentile": 2.0,
"Count": 200,
"DurationUnit": "microseconds",
"OneMinuteRate": 2.0,
"RateUnit": "events/second",
"StdDev": null
}
}
}`
const validSingleValueJSON = `
{
"request":{
"path":"used",
"mbean":"java.lang:type=Memory",
"attribute":"HeapMemoryUsage",
"type":"read"
},
"value":209274376,
"timestamp":1446129256,
"status":200
}`
const validJavaMultiTypeJSON = `
{
"request":{
"mbean":"java.lang:name=ConcurrentMarkSweep,type=GarbageCollector",
"attribute":"CollectionCount",
"type":"read"
},
"value":1,
"timestamp":1459316570,
"status":200
}`
const invalidJSON = "I don't think this is JSON"
const empty = ""
var Servers = []string{"10.10.10.10:8778"}
var AuthServers = []string{"user:passwd@10.10.10.10:8778"}
var MultipleServers = []string{"10.10.10.10:8778", "10.10.10.11:8778"}
var HeapMetric = "/java.lang:type=Memory/HeapMemoryUsage"
var ReadLatencyMetric = "/org.apache.cassandra.metrics:type=Table,keyspace=test_keyspace1,scope=test_table,name=ReadLatency"
var NestedReadLatencyMetric = "/org.apache.cassandra.metrics:type=Table,keyspace=test_keyspace1,scope=*,name=ReadLatency"
var GarbageCollectorMetric1 = "/java.lang:type=GarbageCollector,name=ConcurrentMarkSweep/CollectionCount"
var GarbageCollectorMetric2 = "/java.lang:type=GarbageCollector,name=ConcurrentMarkSweep/CollectionTime"
var Context = "/jolokia/read"
type jolokiaClientStub struct {
responseBody string
statusCode int
}
func (c jolokiaClientStub) MakeRequest(req *http.Request) (*http.Response, error) {
resp := http.Response{}
resp.StatusCode = c.statusCode
resp.Body = ioutil.NopCloser(strings.NewReader(c.responseBody))
return &resp, nil
}
// Generates a pointer to an HttpJson object that uses a mock HTTP client.
// Parameters:
// response : Body of the response that the mock HTTP client should return
// statusCode: HTTP status code the mock HTTP client should return
//
// Returns:
// *HttpJson: Pointer to an HttpJson object that uses the generated mock HTTP client
func genJolokiaClientStub(response string, statusCode int, servers []string, metrics []string) *Cassandra {
return &Cassandra{
jClient: jolokiaClientStub{responseBody: response, statusCode: statusCode},
Context: Context,
Servers: servers,
Metrics: metrics,
}
}
// Test that the proper values are ignored or collected for class=Java
func TestHttpJsonJavaMultiValue(t *testing.T) {
cassandra := genJolokiaClientStub(validJavaMultiValueJSON, 200,
MultipleServers, []string{HeapMetric})
var acc testutil.Accumulator
acc.SetDebug(true)
err := cassandra.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 2, len(acc.Metrics))
fields := map[string]interface{}{
"HeapMemoryUsage_init": 67108864.0,
"HeapMemoryUsage_committed": 456130560.0,
"HeapMemoryUsage_max": 477626368.0,
"HeapMemoryUsage_used": 203288528.0,
}
tags1 := map[string]string{
"cassandra_host": "10.10.10.10",
"mname": "HeapMemoryUsage",
}
tags2 := map[string]string{
"cassandra_host": "10.10.10.11",
"mname": "HeapMemoryUsage",
}
acc.AssertContainsTaggedFields(t, "javaMemory", fields, tags1)
acc.AssertContainsTaggedFields(t, "javaMemory", fields, tags2)
}
func TestHttpJsonJavaMultiType(t *testing.T) {
cassandra := genJolokiaClientStub(validJavaMultiTypeJSON, 200, AuthServers, []string{GarbageCollectorMetric1, GarbageCollectorMetric2})
var acc testutil.Accumulator
acc.SetDebug(true)
err := cassandra.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 2, len(acc.Metrics))
fields := map[string]interface{}{
"CollectionCount": 1.0,
}
tags := map[string]string{
"cassandra_host": "10.10.10.10",
"mname": "ConcurrentMarkSweep",
}
acc.AssertContainsTaggedFields(t, "javaGarbageCollector", fields, tags)
}
// Test that the proper values are ignored or collected
func TestHttpJsonOn404(t *testing.T) {
jolokia := genJolokiaClientStub(validJavaMultiValueJSON, 404, Servers,
[]string{HeapMetric})
var acc testutil.Accumulator
err := jolokia.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 0, len(acc.Metrics))
}
// Test that the proper values are ignored or collected for class=Cassandra
func TestHttpJsonCassandraMultiValue(t *testing.T) {
cassandra := genJolokiaClientStub(validCassandraMultiValueJSON, 200, Servers, []string{ReadLatencyMetric})
var acc testutil.Accumulator
err := cassandra.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 1, len(acc.Metrics))
fields := map[string]interface{}{
"ReadLatency_999thPercentile": 20.0,
"ReadLatency_99thPercentile": 10.0,
"ReadLatency_Count": 400.0,
"ReadLatency_DurationUnit": "microseconds",
"ReadLatency_Max": 30.0,
"ReadLatency_MeanRate": 3.0,
"ReadLatency_Min": 1.0,
"ReadLatency_RateUnit": "events/second",
}
tags := map[string]string{
"cassandra_host": "10.10.10.10",
"mname": "ReadLatency",
"keyspace": "test_keyspace1",
"scope": "test_table",
}
acc.AssertContainsTaggedFields(t, "cassandraTable", fields, tags)
}
// Test that the proper values are ignored or collected for class=Cassandra with
// nested values
func TestHttpJsonCassandraNestedMultiValue(t *testing.T) {
cassandra := genJolokiaClientStub(validCassandraNestedMultiValueJSON, 200, Servers, []string{NestedReadLatencyMetric})
var acc testutil.Accumulator
acc.SetDebug(true)
err := cassandra.Gather(&acc)
assert.Nil(t, err)
assert.Equal(t, 2, len(acc.Metrics))
fields1 := map[string]interface{}{
"ReadLatency_999thPercentile": 1.0,
"ReadLatency_Count": 100.0,
"ReadLatency_DurationUnit": "microseconds",
"ReadLatency_OneMinuteRate": 1.0,
"ReadLatency_RateUnit": "events/second",
}
fields2 := map[string]interface{}{
"ReadLatency_999thPercentile": 2.0,
"ReadLatency_Count": 200.0,
"ReadLatency_DurationUnit": "microseconds",
"ReadLatency_OneMinuteRate": 2.0,
"ReadLatency_RateUnit": "events/second",
}
tags1 := map[string]string{
"cassandra_host": "10.10.10.10",
"mname": "ReadLatency",
"keyspace": "test_keyspace1",
"scope": "test_table1",
}
tags2 := map[string]string{
"cassandra_host": "10.10.10.10",
"mname": "ReadLatency",
"keyspace": "test_keyspace2",
"scope": "test_table2",
}
acc.AssertContainsTaggedFields(t, "cassandraTable", fields1, tags1)
acc.AssertContainsTaggedFields(t, "cassandraTable", fields2, tags2)
}

View File

@@ -0,0 +1,222 @@
# Ceph Storage Input Plugin
Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster.
*Admin Socket Stats*
This gatherer works by scanning the configured SocketDir for OSD and MON socket files. When it finds
a MON socket, it runs **ceph --admin-daemon $file perfcounters_dump**. For OSDs it runs **ceph --admin-daemon $file perf dump**
The resulting JSON is parsed and grouped into collections, based on top-level key. Top-level keys are
used as collection tags, and all sub-keys are flattened. For example:
```
{
"paxos": {
"refresh": 9363435,
"refresh_latency": {
"avgcount": 9363435,
"sum": 5378.794002000
}
}
}
```
Would be parsed into the following metrics, all of which would be tagged with collection=paxos:
- refresh = 9363435
- refresh_latency.avgcount: 9363435
- refresh_latency.sum: 5378.794002000
*Cluster Stats*
This gatherer works by invoking ceph commands against the cluster thus only requires the ceph client, valid
ceph configuration and an access key to function (the ceph_config and ceph_user configuration variables work
in conjunction to specify these prerequisites). It may be run on any server you wish which has access to
the cluster. The currently supported commands are:
* ceph status
* ceph df
* ceph osd pool stats
### Configuration:
```
# Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster.
[[inputs.ceph]]
## This is the recommended interval to poll. Too frequent and you will lose
## data points due to timeouts during rebalancing and recovery
interval = '1m'
## All configuration values are optional, defaults are shown below
## location of ceph binary
ceph_binary = "/usr/bin/ceph"
## directory in which to look for socket files
socket_dir = "/var/run/ceph"
## prefix of MON and OSD socket files, used to determine socket type
mon_prefix = "ceph-mon"
osd_prefix = "ceph-osd"
## suffix used to identify socket files
socket_suffix = "asok"
## Ceph user to authenticate as, ceph will search for the corresponding keyring
## e.g. client.admin.keyring in /etc/ceph, or the explicit path defined in the
## client section of ceph.conf for example:
##
## [client.telegraf]
## keyring = /etc/ceph/client.telegraf.keyring
##
## Consult the ceph documentation for more detail on keyring generation.
ceph_user = "client.admin"
## Ceph configuration to use to locate the cluster
ceph_config = "/etc/ceph/ceph.conf"
## Whether to gather statistics via the admin socket
gather_admin_socket_stats = true
## Whether to gather statistics via ceph commands, requires ceph_user and ceph_config
## to be specified
gather_cluster_stats = true
```
### Measurements & Fields:
*Admin Socket Stats*
All fields are collected under the **ceph** measurement and stored as float64s. For a full list of fields, see the sample perf dumps in ceph_test.go.
*Cluster Stats*
* ceph\_osdmap
* epoch (float)
* full (boolean)
* nearfull (boolean)
* num\_in\_osds (float)
* num\_osds (float)
* num\_remremapped\_pgs (float)
* num\_up\_osds (float)
* ceph\_pgmap
* bytes\_avail (float)
* bytes\_total (float)
* bytes\_used (float)
* data\_bytes (float)
* num\_pgs (float)
* op\_per\_sec (float)
* read\_bytes\_sec (float)
* version (float)
* write\_bytes\_sec (float)
* recovering\_bytes\_per\_sec (float)
* recovering\_keys\_per\_sec (float)
* recovering\_objects\_per\_sec (float)
* ceph\_pgmap\_state
* state name e.g. active+clean (float)
* ceph\_usage
* bytes\_used (float)
* kb\_used (float)
* max\_avail (float)
* objects (float)
* ceph\_pool\_usage
* bytes\_used (float)
* kb\_used (float)
* max\_avail (float)
* objects (float)
* ceph\_pool\_stats
* op\_per\_sec (float)
* read\_bytes\_sec (float)
* write\_bytes\_sec (float)
* recovering\_object\_per\_sec (float)
* recovering\_bytes\_per\_sec (float)
* recovering\_keys\_per\_sec (float)
### Tags:
*Admin Socket Stats*
All measurements will have the following tags:
- type: either 'osd' or 'mon' to indicate which type of node was queried
- id: a unique string identifier, parsed from the socket file name for the node
- collection: the top-level key under which these fields were reported. Possible values are:
- for MON nodes:
- cluster
- leveldb
- mon
- paxos
- throttle-mon_client_bytes
- throttle-mon_daemon_bytes
- throttle-msgr_dispatch_throttler-mon
- for OSD nodes:
- WBThrottle
- filestore
- leveldb
- mutex-FileJournal::completions_lock
- mutex-FileJournal::finisher_lock
- mutex-FileJournal::write_lock
- mutex-FileJournal::writeq_lock
- mutex-JOS::ApplyManager::apply_lock
- mutex-JOS::ApplyManager::com_lock
- mutex-JOS::SubmitManager::lock
- mutex-WBThrottle::lock
- objecter
- osd
- recoverystate_perf
- throttle-filestore_bytes
- throttle-filestore_ops
- throttle-msgr_dispatch_throttler-client
- throttle-msgr_dispatch_throttler-cluster
- throttle-msgr_dispatch_throttler-hb_back_server
- throttle-msgr_dispatch_throttler-hb_front_serve
- throttle-msgr_dispatch_throttler-hbclient
- throttle-msgr_dispatch_throttler-ms_objecter
- throttle-objecter_bytes
- throttle-objecter_ops
- throttle-osd_client_bytes
- throttle-osd_client_messages
*Cluster Stats*
* ceph\_pg\_state has the following tags:
* state (state for which the value applies e.g. active+clean, active+remapped+backfill)
* ceph\_pool\_usage has the following tags:
* id
* name
* ceph\_pool\_stats has the following tags:
* id
* name
### Example Output:
*Admin Socket Stats*
<pre>
telegraf -test -config /etc/telegraf/telegraf.conf -config-directory /etc/telegraf/telegraf.d -input-filter ceph
* Plugin: ceph, Collection 1
> ceph,collection=paxos, id=node-2,role=openstack,type=mon accept_timeout=0,begin=14931264,begin_bytes.avgcount=14931264,begin_bytes.sum=180309683362,begin_keys.avgcount=0,begin_keys.sum=0,begin_latency.avgcount=14931264,begin_latency.sum=9293.29589,collect=1,collect_bytes.avgcount=1,collect_bytes.sum=24,collect_keys.avgcount=1,collect_keys.sum=1,collect_latency.avgcount=1,collect_latency.sum=0.00028,collect_timeout=0,collect_uncommitted=0,commit=14931264,commit_bytes.avgcount=0,commit_bytes.sum=0,commit_keys.avgcount=0,commit_keys.sum=0,commit_latency.avgcount=0,commit_latency.sum=0,lease_ack_timeout=0,lease_timeout=0,new_pn=0,new_pn_latency.avgcount=0,new_pn_latency.sum=0,refresh=14931264,refresh_latency.avgcount=14931264,refresh_latency.sum=8706.98498,restart=4,share_state=0,share_state_bytes.avgcount=0,share_state_bytes.sum=0,share_state_keys.avgcount=0,share_state_keys.sum=0,start_leader=0,start_peon=1,store_state=14931264,store_state_bytes.avgcount=14931264,store_state_bytes.sum=353119959211,store_state_keys.avgcount=14931264,store_state_keys.sum=289807523,store_state_latency.avgcount=14931264,store_state_latency.sum=10952.835724 1462821234814535148
> ceph,collection=throttle-mon_client_bytes,id=node-2,type=mon get=1413017,get_or_fail_fail=0,get_or_fail_success=0,get_sum=71211705,max=104857600,put=1413013,put_sum=71211459,take=0,take_sum=0,val=246,wait.avgcount=0,wait.sum=0 1462821234814737219
> ceph,collection=throttle-mon_daemon_bytes,id=node-2,type=mon get=4058121,get_or_fail_fail=0,get_or_fail_success=0,get_sum=6027348117,max=419430400,put=4058121,put_sum=6027348117,take=0,take_sum=0,val=0,wait.avgcount=0,wait.sum=0 1462821234814815661
> ceph,collection=throttle-msgr_dispatch_throttler-mon,id=node-2,type=mon get=54276277,get_or_fail_fail=0,get_or_fail_success=0,get_sum=370232877040,max=104857600,put=54276277,put_sum=370232877040,take=0,take_sum=0,val=0,wait.avgcount=0,wait.sum=0 1462821234814872064
</pre>
*Cluster Stats*
<pre>
> ceph_osdmap,host=ceph-mon-0 epoch=170772,full=false,nearfull=false,num_in_osds=340,num_osds=340,num_remapped_pgs=0,num_up_osds=340 1468841037000000000
> ceph_pgmap,host=ceph-mon-0 bytes_avail=634895531270144,bytes_total=812117151809536,bytes_used=177221620539392,data_bytes=56979991615058,num_pgs=22952,op_per_sec=15869,read_bytes_sec=43956026,version=39387592,write_bytes_sec=165344818 1468841037000000000
> ceph_pgmap_state,host=ceph-mon-0 active+clean=22952 1468928660000000000
> ceph_usage,host=ceph-mon-0 total_avail_bytes=634895514791936,total_bytes=812117151809536,total_used_bytes=177221637017600 1468841037000000000
> ceph_pool_usage,host=ceph-mon-0,id=150,name=cinder.volumes bytes_used=12648553794802,kb_used=12352103316,max_avail=154342562489244,objects=3026295 1468841037000000000
> ceph_pool_usage,host=ceph-mon-0,id=182,name=cinder.volumes.flash bytes_used=8541308223964,kb_used=8341121313,max_avail=39388593563936,objects=2075066 1468841037000000000
> ceph_pool_stats,host=ceph-mon-0,id=150,name=cinder.volumes op_per_sec=1706,read_bytes_sec=28671674,write_bytes_sec=29994541 1468841037000000000
> ceph_pool_stats,host=ceph-mon-0,id=182,name=cinder.volumes.flash op_per_sec=9748,read_bytes_sec=9605524,write_bytes_sec=45593310 1468841037000000000
</pre>

489
plugins/inputs/ceph/ceph.go Normal file
View File

@@ -0,0 +1,489 @@
package ceph
import (
"bytes"
"encoding/json"
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"io/ioutil"
"log"
"os/exec"
"path/filepath"
"strings"
)
const (
measurement = "ceph"
typeMon = "monitor"
typeOsd = "osd"
osdPrefix = "ceph-osd"
monPrefix = "ceph-mon"
sockSuffix = "asok"
)
type Ceph struct {
CephBinary string
OsdPrefix string
MonPrefix string
SocketDir string
SocketSuffix string
CephUser string
CephConfig string
GatherAdminSocketStats bool
GatherClusterStats bool
}
func (c *Ceph) Description() string {
return "Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster."
}
var sampleConfig = `
## This is the recommended interval to poll. Too frequent and you will lose
## data points due to timeouts during rebalancing and recovery
interval = '1m'
## All configuration values are optional, defaults are shown below
## location of ceph binary
ceph_binary = "/usr/bin/ceph"
## directory in which to look for socket files
socket_dir = "/var/run/ceph"
## prefix of MON and OSD socket files, used to determine socket type
mon_prefix = "ceph-mon"
osd_prefix = "ceph-osd"
## suffix used to identify socket files
socket_suffix = "asok"
## Ceph user to authenticate as
ceph_user = "client.admin"
## Ceph configuration to use to locate the cluster
ceph_config = "/etc/ceph/ceph.conf"
## Whether to gather statistics via the admin socket
gather_admin_socket_stats = true
## Whether to gather statistics via ceph commands
gather_cluster_stats = true
`
func (c *Ceph) SampleConfig() string {
return sampleConfig
}
func (c *Ceph) Gather(acc telegraf.Accumulator) error {
if c.GatherAdminSocketStats {
if err := c.gatherAdminSocketStats(acc); err != nil {
return err
}
}
if c.GatherClusterStats {
if err := c.gatherClusterStats(acc); err != nil {
return err
}
}
return nil
}
func (c *Ceph) gatherAdminSocketStats(acc telegraf.Accumulator) error {
sockets, err := findSockets(c)
if err != nil {
return fmt.Errorf("failed to find sockets at path '%s': %v", c.SocketDir, err)
}
for _, s := range sockets {
dump, err := perfDump(c.CephBinary, s)
if err != nil {
log.Printf("E! error reading from socket '%s': %v", s.socket, err)
continue
}
data, err := parseDump(dump)
if err != nil {
log.Printf("E! error parsing dump from socket '%s': %v", s.socket, err)
continue
}
for tag, metrics := range *data {
acc.AddFields(measurement,
map[string]interface{}(metrics),
map[string]string{"type": s.sockType, "id": s.sockId, "collection": tag})
}
}
return nil
}
func (c *Ceph) gatherClusterStats(acc telegraf.Accumulator) error {
jobs := []struct {
command string
parser func(telegraf.Accumulator, string) error
}{
{"status", decodeStatus},
{"df", decodeDf},
{"osd pool stats", decodeOsdPoolStats},
}
// For each job, execute against the cluster, parse and accumulate the data points
for _, job := range jobs {
output, err := c.exec(job.command)
if err != nil {
return fmt.Errorf("error executing command: %v", err)
}
err = job.parser(acc, output)
if err != nil {
return fmt.Errorf("error parsing output: %v", err)
}
}
return nil
}
func init() {
c := Ceph{
CephBinary: "/usr/bin/ceph",
OsdPrefix: osdPrefix,
MonPrefix: monPrefix,
SocketDir: "/var/run/ceph",
SocketSuffix: sockSuffix,
CephUser: "client.admin",
CephConfig: "/etc/ceph/ceph.conf",
GatherAdminSocketStats: true,
GatherClusterStats: false,
}
inputs.Add(measurement, func() telegraf.Input { return &c })
}
var perfDump = func(binary string, socket *socket) (string, error) {
cmdArgs := []string{"--admin-daemon", socket.socket}
if socket.sockType == typeOsd {
cmdArgs = append(cmdArgs, "perf", "dump")
} else if socket.sockType == typeMon {
cmdArgs = append(cmdArgs, "perfcounters_dump")
} else {
return "", fmt.Errorf("ignoring unknown socket type: %s", socket.sockType)
}
cmd := exec.Command(binary, cmdArgs...)
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err != nil {
return "", fmt.Errorf("error running ceph dump: %s", err)
}
return out.String(), nil
}
var findSockets = func(c *Ceph) ([]*socket, error) {
listing, err := ioutil.ReadDir(c.SocketDir)
if err != nil {
return []*socket{}, fmt.Errorf("Failed to read socket directory '%s': %v", c.SocketDir, err)
}
sockets := make([]*socket, 0, len(listing))
for _, info := range listing {
f := info.Name()
var sockType string
var sockPrefix string
if strings.HasPrefix(f, c.MonPrefix) {
sockType = typeMon
sockPrefix = monPrefix
}
if strings.HasPrefix(f, c.OsdPrefix) {
sockType = typeOsd
sockPrefix = osdPrefix
}
if sockType == typeOsd || sockType == typeMon {
path := filepath.Join(c.SocketDir, f)
sockets = append(sockets, &socket{parseSockId(f, sockPrefix, c.SocketSuffix), sockType, path})
}
}
return sockets, nil
}
func parseSockId(fname, prefix, suffix string) string {
s := fname
s = strings.TrimPrefix(s, prefix)
s = strings.TrimSuffix(s, suffix)
s = strings.Trim(s, ".-_")
return s
}
type socket struct {
sockId string
sockType string
socket string
}
type metric struct {
pathStack []string // lifo stack of name components
value float64
}
// Pops names of pathStack to build the flattened name for a metric
func (m *metric) name() string {
buf := bytes.Buffer{}
for i := len(m.pathStack) - 1; i >= 0; i-- {
if buf.Len() > 0 {
buf.WriteString(".")
}
buf.WriteString(m.pathStack[i])
}
return buf.String()
}
type metricMap map[string]interface{}
type taggedMetricMap map[string]metricMap
// Parses a raw JSON string into a taggedMetricMap
// Delegates the actual parsing to newTaggedMetricMap(..)
func parseDump(dump string) (*taggedMetricMap, error) {
data := make(map[string]interface{})
err := json.Unmarshal([]byte(dump), &data)
if err != nil {
return nil, fmt.Errorf("failed to parse json: '%s': %v", dump, err)
}
tmm := newTaggedMetricMap(data)
if err != nil {
return nil, fmt.Errorf("failed to tag dataset: '%v': %v", tmm, err)
}
return tmm, nil
}
// Builds a TaggedMetricMap out of a generic string map.
// The top-level key is used as a tag and all sub-keys are flattened into metrics
func newTaggedMetricMap(data map[string]interface{}) *taggedMetricMap {
tmm := make(taggedMetricMap)
for tag, datapoints := range data {
mm := make(metricMap)
for _, m := range flatten(datapoints) {
mm[m.name()] = m.value
}
tmm[tag] = mm
}
return &tmm
}
// Recursively flattens any k-v hierarchy present in data.
// Nested keys are flattened into ordered slices associated with a metric value.
// The key slices are treated as stacks, and are expected to be reversed and concatenated
// when passed as metrics to the accumulator. (see (*metric).name())
func flatten(data interface{}) []*metric {
var metrics []*metric
switch val := data.(type) {
case float64:
metrics = []*metric{&metric{make([]string, 0, 1), val}}
case map[string]interface{}:
metrics = make([]*metric, 0, len(val))
for k, v := range val {
for _, m := range flatten(v) {
m.pathStack = append(m.pathStack, k)
metrics = append(metrics, m)
}
}
default:
log.Printf("I! Ignoring unexpected type '%T' for value %v", val, val)
}
return metrics
}
func (c *Ceph) exec(command string) (string, error) {
cmdArgs := []string{"--conf", c.CephConfig, "--name", c.CephUser, "--format", "json"}
cmdArgs = append(cmdArgs, strings.Split(command, " ")...)
cmd := exec.Command(c.CephBinary, cmdArgs...)
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err != nil {
return "", fmt.Errorf("error running ceph %v: %s", command, err)
}
output := out.String()
// Ceph doesn't sanitize its output, and may return invalid JSON. Patch this
// up for them, as having some inaccurate data is better than none.
output = strings.Replace(output, "-inf", "0", -1)
output = strings.Replace(output, "inf", "0", -1)
return output, nil
}
func decodeStatus(acc telegraf.Accumulator, input string) error {
data := make(map[string]interface{})
err := json.Unmarshal([]byte(input), &data)
if err != nil {
return fmt.Errorf("failed to parse json: '%s': %v", input, err)
}
err = decodeStatusOsdmap(acc, data)
if err != nil {
return err
}
err = decodeStatusPgmap(acc, data)
if err != nil {
return err
}
err = decodeStatusPgmapState(acc, data)
if err != nil {
return err
}
return nil
}
func decodeStatusOsdmap(acc telegraf.Accumulator, data map[string]interface{}) error {
osdmap, ok := data["osdmap"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode osdmap", measurement)
}
fields, ok := osdmap["osdmap"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode osdmap", measurement)
}
acc.AddFields("ceph_osdmap", fields, map[string]string{})
return nil
}
func decodeStatusPgmap(acc telegraf.Accumulator, data map[string]interface{}) error {
pgmap, ok := data["pgmap"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode pgmap", measurement)
}
fields := make(map[string]interface{})
for key, value := range pgmap {
switch value.(type) {
case float64:
fields[key] = value
}
}
acc.AddFields("ceph_pgmap", fields, map[string]string{})
return nil
}
func decodeStatusPgmapState(acc telegraf.Accumulator, data map[string]interface{}) error {
pgmap, ok := data["pgmap"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode pgmap", measurement)
}
fields := make(map[string]interface{})
for key, value := range pgmap {
switch value.(type) {
case []interface{}:
if key != "pgs_by_state" {
continue
}
for _, state := range value.([]interface{}) {
state_map, ok := state.(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode pg state", measurement)
}
state_name, ok := state_map["state_name"].(string)
if !ok {
return fmt.Errorf("WARNING %s - unable to decode pg state name", measurement)
}
state_count, ok := state_map["count"].(float64)
if !ok {
return fmt.Errorf("WARNING %s - unable to decode pg state count", measurement)
}
fields[state_name] = state_count
}
}
}
acc.AddFields("ceph_pgmap_state", fields, map[string]string{})
return nil
}
func decodeDf(acc telegraf.Accumulator, input string) error {
data := make(map[string]interface{})
err := json.Unmarshal([]byte(input), &data)
if err != nil {
return fmt.Errorf("failed to parse json: '%s': %v", input, err)
}
// ceph.usage: records global utilization and number of objects
stats_fields, ok := data["stats"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode df stats", measurement)
}
acc.AddFields("ceph_usage", stats_fields, map[string]string{})
// ceph.pool.usage: records per pool utilization and number of objects
pools, ok := data["pools"].([]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode df pools", measurement)
}
for _, pool := range pools {
pool_map, ok := pool.(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode df pool", measurement)
}
pool_name, ok := pool_map["name"].(string)
if !ok {
return fmt.Errorf("WARNING %s - unable to decode df pool name", measurement)
}
fields, ok := pool_map["stats"].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode df pool stats", measurement)
}
tags := map[string]string{
"name": pool_name,
}
acc.AddFields("ceph_pool_usage", fields, tags)
}
return nil
}
func decodeOsdPoolStats(acc telegraf.Accumulator, input string) error {
data := make([]map[string]interface{}, 0)
err := json.Unmarshal([]byte(input), &data)
if err != nil {
return fmt.Errorf("failed to parse json: '%s': %v", input, err)
}
// ceph.pool.stats: records pre pool IO and recovery throughput
for _, pool := range data {
pool_name, ok := pool["pool_name"].(string)
if !ok {
return fmt.Errorf("WARNING %s - unable to decode osd pool stats name", measurement)
}
// Note: the 'recovery' object looks broken (in hammer), so it's omitted
objects := []string{
"client_io_rate",
"recovery_rate",
}
fields := make(map[string]interface{})
for _, object := range objects {
perfdata, ok := pool[object].(map[string]interface{})
if !ok {
return fmt.Errorf("WARNING %s - unable to decode osd pool stats", measurement)
}
for key, value := range perfdata {
fields[key] = value
}
}
tags := map[string]string{
"name": pool_name,
}
acc.AddFields("ceph_pool_stats", fields, tags)
}
return nil
}

View File

@@ -0,0 +1,687 @@
package ceph
import (
"fmt"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"io/ioutil"
"os"
"path"
"strconv"
"strings"
"testing"
)
const (
epsilon = float64(0.00000001)
)
func TestParseSockId(t *testing.T) {
s := parseSockId(sockFile(osdPrefix, 1), osdPrefix, sockSuffix)
assert.Equal(t, s, "1")
}
func TestParseMonDump(t *testing.T) {
dump, err := parseDump(monPerfDump)
assert.NoError(t, err)
assert.InEpsilon(t, 5678670180, (*dump)["cluster"]["osd_kb_used"], epsilon)
assert.InEpsilon(t, 6866.540527000, (*dump)["paxos"]["store_state_latency.sum"], epsilon)
}
func TestParseOsdDump(t *testing.T) {
dump, err := parseDump(osdPerfDump)
assert.NoError(t, err)
assert.InEpsilon(t, 552132.109360000, (*dump)["filestore"]["commitcycle_interval.sum"], epsilon)
assert.Equal(t, float64(0), (*dump)["mutex-FileJournal::finisher_lock"]["wait.avgcount"])
}
func TestGather(t *testing.T) {
saveFind := findSockets
saveDump := perfDump
defer func() {
findSockets = saveFind
perfDump = saveDump
}()
findSockets = func(c *Ceph) ([]*socket, error) {
return []*socket{&socket{"osd.1", typeOsd, ""}}, nil
}
perfDump = func(binary string, s *socket) (string, error) {
return osdPerfDump, nil
}
acc := &testutil.Accumulator{}
c := &Ceph{}
c.Gather(acc)
}
func TestFindSockets(t *testing.T) {
tmpdir, err := ioutil.TempDir("", "socktest")
assert.NoError(t, err)
defer func() {
err := os.Remove(tmpdir)
assert.NoError(t, err)
}()
c := &Ceph{
CephBinary: "foo",
OsdPrefix: "ceph-osd",
MonPrefix: "ceph-mon",
SocketDir: tmpdir,
SocketSuffix: "asok",
CephUser: "client.admin",
CephConfig: "/etc/ceph/ceph.conf",
GatherAdminSocketStats: true,
GatherClusterStats: false,
}
for _, st := range sockTestParams {
createTestFiles(tmpdir, st)
sockets, err := findSockets(c)
assert.NoError(t, err)
for i := 1; i <= st.osds; i++ {
assertFoundSocket(t, tmpdir, typeOsd, i, sockets)
}
for i := 1; i <= st.mons; i++ {
assertFoundSocket(t, tmpdir, typeMon, i, sockets)
}
cleanupTestFiles(tmpdir, st)
}
}
func assertFoundSocket(t *testing.T, dir, sockType string, i int, sockets []*socket) {
var prefix string
if sockType == typeOsd {
prefix = osdPrefix
} else {
prefix = monPrefix
}
expected := path.Join(dir, sockFile(prefix, i))
found := false
for _, s := range sockets {
fmt.Printf("Checking %s\n", s.socket)
if s.socket == expected {
found = true
assert.Equal(t, s.sockType, sockType, "Unexpected socket type for '%s'", s)
assert.Equal(t, s.sockId, strconv.Itoa(i))
}
}
assert.True(t, found, "Did not find socket: %s", expected)
}
func sockFile(prefix string, i int) string {
return strings.Join([]string{prefix, strconv.Itoa(i), sockSuffix}, ".")
}
func createTestFiles(dir string, st *SockTest) {
writeFile := func(prefix string, i int) {
f := sockFile(prefix, i)
fpath := path.Join(dir, f)
ioutil.WriteFile(fpath, []byte(""), 0777)
}
tstFileApply(st, writeFile)
}
func cleanupTestFiles(dir string, st *SockTest) {
rmFile := func(prefix string, i int) {
f := sockFile(prefix, i)
fpath := path.Join(dir, f)
err := os.Remove(fpath)
if err != nil {
fmt.Printf("Error removing test file %s: %v\n", fpath, err)
}
}
tstFileApply(st, rmFile)
}
func tstFileApply(st *SockTest, fn func(prefix string, i int)) {
for i := 1; i <= st.osds; i++ {
fn(osdPrefix, i)
}
for i := 1; i <= st.mons; i++ {
fn(monPrefix, i)
}
}
type SockTest struct {
osds int
mons int
}
var sockTestParams = []*SockTest{
&SockTest{
osds: 2,
mons: 2,
},
&SockTest{
mons: 1,
},
&SockTest{
osds: 1,
},
&SockTest{},
}
var monPerfDump = `
{ "cluster": { "num_mon": 2,
"num_mon_quorum": 2,
"num_osd": 26,
"num_osd_up": 26,
"num_osd_in": 26,
"osd_epoch": 3306,
"osd_kb": 11487846448,
"osd_kb_used": 5678670180,
"osd_kb_avail": 5809176268,
"num_pool": 12,
"num_pg": 768,
"num_pg_active_clean": 768,
"num_pg_active": 768,
"num_pg_peering": 0,
"num_object": 397616,
"num_object_degraded": 0,
"num_object_unfound": 0,
"num_bytes": 2917848227467,
"num_mds_up": 0,
"num_mds_in": 0,
"num_mds_failed": 0,
"mds_epoch": 1},
"leveldb": { "leveldb_get": 321950312,
"leveldb_transaction": 18729922,
"leveldb_compact": 0,
"leveldb_compact_range": 74141,
"leveldb_compact_queue_merge": 0,
"leveldb_compact_queue_len": 0},
"mon": {},
"paxos": { "start_leader": 0,
"start_peon": 1,
"restart": 4,
"refresh": 9363435,
"refresh_latency": { "avgcount": 9363435,
"sum": 5378.794002000},
"begin": 9363435,
"begin_keys": { "avgcount": 0,
"sum": 0},
"begin_bytes": { "avgcount": 9363435,
"sum": 110468605489},
"begin_latency": { "avgcount": 9363435,
"sum": 5850.060682000},
"commit": 9363435,
"commit_keys": { "avgcount": 0,
"sum": 0},
"commit_bytes": { "avgcount": 0,
"sum": 0},
"commit_latency": { "avgcount": 0,
"sum": 0.000000000},
"collect": 1,
"collect_keys": { "avgcount": 1,
"sum": 1},
"collect_bytes": { "avgcount": 1,
"sum": 24},
"collect_latency": { "avgcount": 1,
"sum": 0.000280000},
"collect_uncommitted": 0,
"collect_timeout": 0,
"accept_timeout": 0,
"lease_ack_timeout": 0,
"lease_timeout": 0,
"store_state": 9363435,
"store_state_keys": { "avgcount": 9363435,
"sum": 176572789},
"store_state_bytes": { "avgcount": 9363435,
"sum": 216355887217},
"store_state_latency": { "avgcount": 9363435,
"sum": 6866.540527000},
"share_state": 0,
"share_state_keys": { "avgcount": 0,
"sum": 0},
"share_state_bytes": { "avgcount": 0,
"sum": 0},
"new_pn": 0,
"new_pn_latency": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-mon_client_bytes": { "val": 246,
"max": 104857600,
"get": 896030,
"get_sum": 45854374,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 896026,
"put_sum": 45854128,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-mon_daemon_bytes": { "val": 0,
"max": 419430400,
"get": 2773768,
"get_sum": 3627676976,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 2773768,
"put_sum": 3627676976,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-msgr_dispatch_throttler-mon": { "val": 0,
"max": 104857600,
"get": 34504949,
"get_sum": 226860281124,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 34504949,
"put_sum": 226860281124,
"wait": { "avgcount": 0,
"sum": 0.000000000}}}
`
var osdPerfDump = `
{ "WBThrottle": { "bytes_dirtied": 28405539,
"bytes_wb": 0,
"ios_dirtied": 93,
"ios_wb": 0,
"inodes_dirtied": 86,
"inodes_wb": 0},
"filestore": { "journal_queue_max_ops": 0,
"journal_queue_ops": 0,
"journal_ops": 1108008,
"journal_queue_max_bytes": 0,
"journal_queue_bytes": 0,
"journal_bytes": 73233416196,
"journal_latency": { "avgcount": 1108008,
"sum": 290.981036000},
"journal_wr": 1091866,
"journal_wr_bytes": { "avgcount": 1091866,
"sum": 74925682688},
"journal_full": 0,
"committing": 0,
"commitcycle": 110389,
"commitcycle_interval": { "avgcount": 110389,
"sum": 552132.109360000},
"commitcycle_latency": { "avgcount": 110389,
"sum": 178.657804000},
"op_queue_max_ops": 50,
"op_queue_ops": 0,
"ops": 1108008,
"op_queue_max_bytes": 104857600,
"op_queue_bytes": 0,
"bytes": 73226768148,
"apply_latency": { "avgcount": 1108008,
"sum": 947.742722000},
"queue_transaction_latency_avg": { "avgcount": 1108008,
"sum": 0.511327000}},
"leveldb": { "leveldb_get": 4361221,
"leveldb_transaction": 4351276,
"leveldb_compact": 0,
"leveldb_compact_range": 0,
"leveldb_compact_queue_merge": 0,
"leveldb_compact_queue_len": 0},
"mutex-FileJournal::completions_lock": { "wait": { "avgcount": 0,
"sum": 0.000000000}},
"mutex-FileJournal::finisher_lock": { "wait": { "avgcount": 0,
"sum": 0.000000000}},
"mutex-FileJournal::write_lock": { "wait": { "avgcount": 0,
"sum": 0.000000000}},
"mutex-FileJournal::writeq_lock": { "wait": { "avgcount": 0,
"sum": 0.000000000}},
"mutex-JOS::ApplyManager::apply_lock": { "wait": { "avgcount": 0,
"sum": 0.000000000}},
"mutex-JOS::ApplyManager::com_lock": { "wait": { "avgcount": 0,
"sum": 0.000000000}},
"mutex-JOS::SubmitManager::lock": { "wait": { "avgcount": 0,
"sum": 0.000000000}},
"mutex-WBThrottle::lock": { "wait": { "avgcount": 0,
"sum": 0.000000000}},
"objecter": { "op_active": 0,
"op_laggy": 0,
"op_send": 0,
"op_send_bytes": 0,
"op_resend": 0,
"op_ack": 0,
"op_commit": 0,
"op": 0,
"op_r": 0,
"op_w": 0,
"op_rmw": 0,
"op_pg": 0,
"osdop_stat": 0,
"osdop_create": 0,
"osdop_read": 0,
"osdop_write": 0,
"osdop_writefull": 0,
"osdop_append": 0,
"osdop_zero": 0,
"osdop_truncate": 0,
"osdop_delete": 0,
"osdop_mapext": 0,
"osdop_sparse_read": 0,
"osdop_clonerange": 0,
"osdop_getxattr": 0,
"osdop_setxattr": 0,
"osdop_cmpxattr": 0,
"osdop_rmxattr": 0,
"osdop_resetxattrs": 0,
"osdop_tmap_up": 0,
"osdop_tmap_put": 0,
"osdop_tmap_get": 0,
"osdop_call": 0,
"osdop_watch": 0,
"osdop_notify": 0,
"osdop_src_cmpxattr": 0,
"osdop_pgls": 0,
"osdop_pgls_filter": 0,
"osdop_other": 0,
"linger_active": 0,
"linger_send": 0,
"linger_resend": 0,
"poolop_active": 0,
"poolop_send": 0,
"poolop_resend": 0,
"poolstat_active": 0,
"poolstat_send": 0,
"poolstat_resend": 0,
"statfs_active": 0,
"statfs_send": 0,
"statfs_resend": 0,
"command_active": 0,
"command_send": 0,
"command_resend": 0,
"map_epoch": 3300,
"map_full": 0,
"map_inc": 3293,
"osd_sessions": 0,
"osd_session_open": 0,
"osd_session_close": 0,
"osd_laggy": 0},
"osd": { "opq": 0,
"op_wip": 0,
"op": 23939,
"op_in_bytes": 1245903961,
"op_out_bytes": 29103083856,
"op_latency": { "avgcount": 23939,
"sum": 440.192015000},
"op_process_latency": { "avgcount": 23939,
"sum": 30.170685000},
"op_r": 23112,
"op_r_out_bytes": 29103056146,
"op_r_latency": { "avgcount": 23112,
"sum": 19.373526000},
"op_r_process_latency": { "avgcount": 23112,
"sum": 14.625928000},
"op_w": 549,
"op_w_in_bytes": 1245804358,
"op_w_rlat": { "avgcount": 549,
"sum": 17.022299000},
"op_w_latency": { "avgcount": 549,
"sum": 418.494610000},
"op_w_process_latency": { "avgcount": 549,
"sum": 13.316555000},
"op_rw": 278,
"op_rw_in_bytes": 99603,
"op_rw_out_bytes": 27710,
"op_rw_rlat": { "avgcount": 278,
"sum": 2.213785000},
"op_rw_latency": { "avgcount": 278,
"sum": 2.323879000},
"op_rw_process_latency": { "avgcount": 278,
"sum": 2.228202000},
"subop": 1074774,
"subop_in_bytes": 26841811636,
"subop_latency": { "avgcount": 1074774,
"sum": 745.509160000},
"subop_w": 0,
"subop_w_in_bytes": 26841811636,
"subop_w_latency": { "avgcount": 1074774,
"sum": 745.509160000},
"subop_pull": 0,
"subop_pull_latency": { "avgcount": 0,
"sum": 0.000000000},
"subop_push": 0,
"subop_push_in_bytes": 0,
"subop_push_latency": { "avgcount": 0,
"sum": 0.000000000},
"pull": 0,
"push": 28,
"push_out_bytes": 103483392,
"push_in": 0,
"push_in_bytes": 0,
"recovery_ops": 15,
"loadavg": 202,
"buffer_bytes": 0,
"numpg": 18,
"numpg_primary": 8,
"numpg_replica": 10,
"numpg_stray": 0,
"heartbeat_to_peers": 10,
"heartbeat_from_peers": 0,
"map_messages": 7413,
"map_message_epochs": 9792,
"map_message_epoch_dups": 10105,
"messages_delayed_for_map": 83,
"stat_bytes": 102123175936,
"stat_bytes_used": 49961820160,
"stat_bytes_avail": 52161355776,
"copyfrom": 0,
"tier_promote": 0,
"tier_flush": 0,
"tier_flush_fail": 0,
"tier_try_flush": 0,
"tier_try_flush_fail": 0,
"tier_evict": 0,
"tier_whiteout": 0,
"tier_dirty": 230,
"tier_clean": 0,
"tier_delay": 0,
"agent_wake": 0,
"agent_skip": 0,
"agent_flush": 0,
"agent_evict": 0},
"recoverystate_perf": { "initial_latency": { "avgcount": 473,
"sum": 0.027207000},
"started_latency": { "avgcount": 1480,
"sum": 9854902.397648000},
"reset_latency": { "avgcount": 1953,
"sum": 0.096206000},
"start_latency": { "avgcount": 1953,
"sum": 0.059947000},
"primary_latency": { "avgcount": 765,
"sum": 4688922.186935000},
"peering_latency": { "avgcount": 704,
"sum": 1668.652135000},
"backfilling_latency": { "avgcount": 0,
"sum": 0.000000000},
"waitremotebackfillreserved_latency": { "avgcount": 0,
"sum": 0.000000000},
"waitlocalbackfillreserved_latency": { "avgcount": 0,
"sum": 0.000000000},
"notbackfilling_latency": { "avgcount": 0,
"sum": 0.000000000},
"repnotrecovering_latency": { "avgcount": 462,
"sum": 5158922.114600000},
"repwaitrecoveryreserved_latency": { "avgcount": 15,
"sum": 0.008275000},
"repwaitbackfillreserved_latency": { "avgcount": 1,
"sum": 0.000095000},
"RepRecovering_latency": { "avgcount": 16,
"sum": 2274.944727000},
"activating_latency": { "avgcount": 514,
"sum": 261.008520000},
"waitlocalrecoveryreserved_latency": { "avgcount": 20,
"sum": 0.175422000},
"waitremoterecoveryreserved_latency": { "avgcount": 20,
"sum": 0.682778000},
"recovering_latency": { "avgcount": 20,
"sum": 0.697551000},
"recovered_latency": { "avgcount": 511,
"sum": 0.011038000},
"clean_latency": { "avgcount": 503,
"sum": 4686961.154278000},
"active_latency": { "avgcount": 506,
"sum": 4687223.640464000},
"replicaactive_latency": { "avgcount": 446,
"sum": 5161197.078966000},
"stray_latency": { "avgcount": 794,
"sum": 4805.105128000},
"getinfo_latency": { "avgcount": 704,
"sum": 1138.477937000},
"getlog_latency": { "avgcount": 678,
"sum": 0.036393000},
"waitactingchange_latency": { "avgcount": 69,
"sum": 59.172893000},
"incomplete_latency": { "avgcount": 0,
"sum": 0.000000000},
"getmissing_latency": { "avgcount": 609,
"sum": 0.012288000},
"waitupthru_latency": { "avgcount": 576,
"sum": 530.106999000}},
"throttle-filestore_bytes": { "val": 0,
"max": 0,
"get": 0,
"get_sum": 0,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 0,
"put_sum": 0,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-filestore_ops": { "val": 0,
"max": 0,
"get": 0,
"get_sum": 0,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 0,
"put_sum": 0,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-msgr_dispatch_throttler-client": { "val": 0,
"max": 104857600,
"get": 130730,
"get_sum": 1246039872,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 130730,
"put_sum": 1246039872,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-msgr_dispatch_throttler-cluster": { "val": 0,
"max": 104857600,
"get": 1108033,
"get_sum": 71277949992,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 1108033,
"put_sum": 71277949992,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-msgr_dispatch_throttler-hb_back_server": { "val": 0,
"max": 104857600,
"get": 18320575,
"get_sum": 861067025,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 18320575,
"put_sum": 861067025,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-msgr_dispatch_throttler-hb_front_server": { "val": 0,
"max": 104857600,
"get": 18320575,
"get_sum": 861067025,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 18320575,
"put_sum": 861067025,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-msgr_dispatch_throttler-hbclient": { "val": 0,
"max": 104857600,
"get": 40479394,
"get_sum": 1902531518,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 40479394,
"put_sum": 1902531518,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-msgr_dispatch_throttler-ms_objecter": { "val": 0,
"max": 104857600,
"get": 0,
"get_sum": 0,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 0,
"put_sum": 0,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-objecter_bytes": { "val": 0,
"max": 104857600,
"get": 0,
"get_sum": 0,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 0,
"put_sum": 0,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-objecter_ops": { "val": 0,
"max": 1024,
"get": 0,
"get_sum": 0,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 0,
"put_sum": 0,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-osd_client_bytes": { "val": 0,
"max": 524288000,
"get": 24241,
"get_sum": 1241992581,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 25958,
"put_sum": 1241992581,
"wait": { "avgcount": 0,
"sum": 0.000000000}},
"throttle-osd_client_messages": { "val": 0,
"max": 100,
"get": 49214,
"get_sum": 49214,
"get_or_fail_fail": 0,
"get_or_fail_success": 0,
"take": 0,
"take_sum": 0,
"put": 49214,
"put_sum": 49214,
"wait": { "avgcount": 0,
"sum": 0.000000000}}}
`

View File

@@ -0,0 +1,59 @@
# CGroup Input Plugin For Telegraf Agent
This input plugin will capture specific statistics per cgroup.
Following file formats are supported:
* Single value
```
VAL\n
```
* New line separated values
```
VAL0\n
VAL1\n
```
* Space separated values
```
VAL0 VAL1 ...\n
```
* New line separated key-space-value's
```
KEY0 VAL0\n
KEY1 VAL1\n
```
### Tags:
Measurements don't have any specific tags unless you define them at the telegraf level (defaults). We
used to have the path listed as a tag, but to keep cardinality in check it's easier to move this
value to a field. Thanks @sebito91!
### Configuration:
```
# [[inputs.cgroup]]
# paths = [
# "/cgroup/memory", # root cgroup
# "/cgroup/memory/child1", # container cgroup
# "/cgroup/memory/child2/*", # all children cgroups under child2, but not child2 itself
# ]
# files = ["memory.*usage*", "memory.limit_in_bytes"]
# [[inputs.cgroup]]
# paths = [
# "/cgroup/cpu", # root cgroup
# "/cgroup/cpu/*", # all container cgroups
# "/cgroup/cpu/*/*", # all children cgroups under each container cgroup
# ]
# files = ["cpuacct.usage", "cpu.cfs_period_us", "cpu.cfs_quota_us"]
```

View File

@@ -0,0 +1,35 @@
package cgroup
import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
type CGroup struct {
Paths []string `toml:"paths"`
Files []string `toml:"files"`
}
var sampleConfig = `
## Directories in which to look for files, globs are supported.
# paths = [
# "/cgroup/memory",
# "/cgroup/memory/child1",
# "/cgroup/memory/child2/*",
# ]
## cgroup stat fields, as file names, globs are supported.
## these file names are appended to each path from above.
# files = ["memory.*usage*", "memory.limit_in_bytes"]
`
func (g *CGroup) SampleConfig() string {
return sampleConfig
}
func (g *CGroup) Description() string {
return "Read specific statistics per cgroup"
}
func init() {
inputs.Add("cgroup", func() telegraf.Input { return &CGroup{} })
}

View File

@@ -0,0 +1,243 @@
// +build linux
package cgroup
import (
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"
"regexp"
"strconv"
"github.com/influxdata/telegraf"
)
const metricName = "cgroup"
func (g *CGroup) Gather(acc telegraf.Accumulator) error {
list := make(chan pathInfo)
go g.generateDirs(list)
for dir := range list {
if dir.err != nil {
return dir.err
}
if err := g.gatherDir(dir.path, acc); err != nil {
return err
}
}
return nil
}
func (g *CGroup) gatherDir(dir string, acc telegraf.Accumulator) error {
fields := make(map[string]interface{})
list := make(chan pathInfo)
go g.generateFiles(dir, list)
for file := range list {
if file.err != nil {
return file.err
}
raw, err := ioutil.ReadFile(file.path)
if err != nil {
return err
}
if len(raw) == 0 {
continue
}
fd := fileData{data: raw, path: file.path}
if err := fd.parse(fields); err != nil {
return err
}
}
fields["path"] = dir
acc.AddFields(metricName, fields, nil)
return nil
}
// ======================================================================
type pathInfo struct {
path string
err error
}
func isDir(path string) (bool, error) {
result, err := os.Stat(path)
if err != nil {
return false, err
}
return result.IsDir(), nil
}
func (g *CGroup) generateDirs(list chan<- pathInfo) {
for _, dir := range g.Paths {
// getting all dirs that match the pattern 'dir'
items, err := filepath.Glob(dir)
if err != nil {
list <- pathInfo{err: err}
return
}
for _, item := range items {
ok, err := isDir(item)
if err != nil {
list <- pathInfo{err: err}
return
}
// supply only dirs
if ok {
list <- pathInfo{path: item}
}
}
}
close(list)
}
func (g *CGroup) generateFiles(dir string, list chan<- pathInfo) {
for _, file := range g.Files {
// getting all file paths that match the pattern 'dir + file'
// path.Base make sure that file variable does not contains part of path
items, err := filepath.Glob(path.Join(dir, path.Base(file)))
if err != nil {
list <- pathInfo{err: err}
return
}
for _, item := range items {
ok, err := isDir(item)
if err != nil {
list <- pathInfo{err: err}
return
}
// supply only files not dirs
if !ok {
list <- pathInfo{path: item}
}
}
}
close(list)
}
// ======================================================================
type fileData struct {
data []byte
path string
}
func (fd *fileData) format() (*fileFormat, error) {
for _, ff := range fileFormats {
ok, err := ff.match(fd.data)
if err != nil {
return nil, err
}
if ok {
return &ff, nil
}
}
return nil, fmt.Errorf("%v: unknown file format", fd.path)
}
func (fd *fileData) parse(fields map[string]interface{}) error {
format, err := fd.format()
if err != nil {
return err
}
format.parser(filepath.Base(fd.path), fields, fd.data)
return nil
}
// ======================================================================
type fileFormat struct {
name string
pattern string
parser func(measurement string, fields map[string]interface{}, b []byte)
}
const keyPattern = "[[:alpha:]_]+"
const valuePattern = "[\\d-]+"
var fileFormats = [...]fileFormat{
// VAL\n
fileFormat{
name: "Single value",
pattern: "^" + valuePattern + "\n$",
parser: func(measurement string, fields map[string]interface{}, b []byte) {
re := regexp.MustCompile("^(" + valuePattern + ")\n$")
matches := re.FindAllStringSubmatch(string(b), -1)
fields[measurement] = numberOrString(matches[0][1])
},
},
// VAL0\n
// VAL1\n
// ...
fileFormat{
name: "New line separated values",
pattern: "^(" + valuePattern + "\n){2,}$",
parser: func(measurement string, fields map[string]interface{}, b []byte) {
re := regexp.MustCompile("(" + valuePattern + ")\n")
matches := re.FindAllStringSubmatch(string(b), -1)
for i, v := range matches {
fields[measurement+"."+strconv.Itoa(i)] = numberOrString(v[1])
}
},
},
// VAL0 VAL1 ...\n
fileFormat{
name: "Space separated values",
pattern: "^(" + valuePattern + " )+\n$",
parser: func(measurement string, fields map[string]interface{}, b []byte) {
re := regexp.MustCompile("(" + valuePattern + ") ")
matches := re.FindAllStringSubmatch(string(b), -1)
for i, v := range matches {
fields[measurement+"."+strconv.Itoa(i)] = numberOrString(v[1])
}
},
},
// KEY0 VAL0\n
// KEY1 VAL1\n
// ...
fileFormat{
name: "New line separated key-space-value's",
pattern: "^(" + keyPattern + " " + valuePattern + "\n)+$",
parser: func(measurement string, fields map[string]interface{}, b []byte) {
re := regexp.MustCompile("(" + keyPattern + ") (" + valuePattern + ")\n")
matches := re.FindAllStringSubmatch(string(b), -1)
for _, v := range matches {
fields[measurement+"."+v[1]] = numberOrString(v[2])
}
},
},
}
func numberOrString(s string) interface{} {
i, err := strconv.Atoi(s)
if err == nil {
return i
}
return s
}
func (f fileFormat) match(b []byte) (bool, error) {
ok, err := regexp.Match(f.pattern, b)
if err != nil {
return false, err
}
if ok {
return true, nil
}
return false, nil
}

View File

@@ -0,0 +1,11 @@
// +build !linux
package cgroup
import (
"github.com/influxdata/telegraf"
)
func (g *CGroup) Gather(acc telegraf.Accumulator) error {
return nil
}

View File

@@ -0,0 +1,194 @@
// +build linux
package cgroup
import (
"fmt"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"reflect"
)
var cg1 = &CGroup{
Paths: []string{"testdata/memory"},
Files: []string{
"memory.empty",
"memory.max_usage_in_bytes",
"memory.limit_in_bytes",
"memory.stat",
"memory.use_hierarchy",
"notify_on_release",
},
}
func assertContainsFields(a *testutil.Accumulator, t *testing.T, measurement string, fieldSet []map[string]interface{}) {
a.Lock()
defer a.Unlock()
numEquals := 0
for _, p := range a.Metrics {
if p.Measurement == measurement {
for _, fields := range fieldSet {
if reflect.DeepEqual(fields, p.Fields) {
numEquals++
}
}
}
}
if numEquals != len(fieldSet) {
assert.Fail(t, fmt.Sprintf("only %d of %d are equal", numEquals, len(fieldSet)))
}
}
func TestCgroupStatistics_1(t *testing.T) {
var acc testutil.Accumulator
err := cg1.Gather(&acc)
require.NoError(t, err)
fields := map[string]interface{}{
"memory.stat.cache": 1739362304123123123,
"memory.stat.rss": 1775325184,
"memory.stat.rss_huge": 778043392,
"memory.stat.mapped_file": 421036032,
"memory.stat.dirty": -307200,
"memory.max_usage_in_bytes.0": 0,
"memory.max_usage_in_bytes.1": -1,
"memory.max_usage_in_bytes.2": 2,
"memory.limit_in_bytes": 223372036854771712,
"memory.use_hierarchy": "12-781",
"notify_on_release": 0,
"path": "testdata/memory",
}
assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields})
}
// ======================================================================
var cg2 = &CGroup{
Paths: []string{"testdata/cpu"},
Files: []string{"cpuacct.usage_percpu"},
}
func TestCgroupStatistics_2(t *testing.T) {
var acc testutil.Accumulator
err := cg2.Gather(&acc)
require.NoError(t, err)
fields := map[string]interface{}{
"cpuacct.usage_percpu.0": -1452543795404,
"cpuacct.usage_percpu.1": 1376681271659,
"cpuacct.usage_percpu.2": 1450950799997,
"cpuacct.usage_percpu.3": -1473113374257,
"path": "testdata/cpu",
}
assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields})
}
// ======================================================================
var cg3 = &CGroup{
Paths: []string{"testdata/memory/*"},
Files: []string{"memory.limit_in_bytes"},
}
func TestCgroupStatistics_3(t *testing.T) {
var acc testutil.Accumulator
err := cg3.Gather(&acc)
require.NoError(t, err)
fields := map[string]interface{}{
"memory.limit_in_bytes": 223372036854771712,
"path": "testdata/memory/group_1",
}
fieldsTwo := map[string]interface{}{
"memory.limit_in_bytes": 223372036854771712,
"path": "testdata/memory/group_2",
}
assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields, fieldsTwo})
}
// ======================================================================
var cg4 = &CGroup{
Paths: []string{"testdata/memory/*/*", "testdata/memory/group_2"},
Files: []string{"memory.limit_in_bytes"},
}
func TestCgroupStatistics_4(t *testing.T) {
var acc testutil.Accumulator
err := cg4.Gather(&acc)
require.NoError(t, err)
fields := map[string]interface{}{
"memory.limit_in_bytes": 223372036854771712,
"path": "testdata/memory/group_1/group_1_1",
}
fieldsTwo := map[string]interface{}{
"memory.limit_in_bytes": 223372036854771712,
"path": "testdata/memory/group_1/group_1_2",
}
fieldsThree := map[string]interface{}{
"memory.limit_in_bytes": 223372036854771712,
"path": "testdata/memory/group_2",
}
assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields, fieldsTwo, fieldsThree})
}
// ======================================================================
var cg5 = &CGroup{
Paths: []string{"testdata/memory/*/group_1_1"},
Files: []string{"memory.limit_in_bytes"},
}
func TestCgroupStatistics_5(t *testing.T) {
var acc testutil.Accumulator
err := cg5.Gather(&acc)
require.NoError(t, err)
fields := map[string]interface{}{
"memory.limit_in_bytes": 223372036854771712,
"path": "testdata/memory/group_1/group_1_1",
}
fieldsTwo := map[string]interface{}{
"memory.limit_in_bytes": 223372036854771712,
"path": "testdata/memory/group_2/group_1_1",
}
assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields, fieldsTwo})
}
// ======================================================================
var cg6 = &CGroup{
Paths: []string{"testdata/memory"},
Files: []string{"memory.us*", "*/memory.kmem.*"},
}
func TestCgroupStatistics_6(t *testing.T) {
var acc testutil.Accumulator
err := cg6.Gather(&acc)
require.NoError(t, err)
fields := map[string]interface{}{
"memory.usage_in_bytes": 3513667584,
"memory.use_hierarchy": "12-781",
"memory.kmem.limit_in_bytes": 9223372036854771712,
"path": "testdata/memory",
}
assertContainsFields(&acc, t, "cgroup", []map[string]interface{}{fields})
}

View File

@@ -0,0 +1 @@
Total 0

View File

@@ -0,0 +1,131 @@
11:0 Read 0
11:0 Write 0
11:0 Sync 0
11:0 Async 0
11:0 Total 0
8:0 Read 49134
8:0 Write 216703
8:0 Sync 177906
8:0 Async 87931
8:0 Total 265837
7:7 Read 0
7:7 Write 0
7:7 Sync 0
7:7 Async 0
7:7 Total 0
7:6 Read 0
7:6 Write 0
7:6 Sync 0
7:6 Async 0
7:6 Total 0
7:5 Read 0
7:5 Write 0
7:5 Sync 0
7:5 Async 0
7:5 Total 0
7:4 Read 0
7:4 Write 0
7:4 Sync 0
7:4 Async 0
7:4 Total 0
7:3 Read 0
7:3 Write 0
7:3 Sync 0
7:3 Async 0
7:3 Total 0
7:2 Read 0
7:2 Write 0
7:2 Sync 0
7:2 Async 0
7:2 Total 0
7:1 Read 0
7:1 Write 0
7:1 Sync 0
7:1 Async 0
7:1 Total 0
7:0 Read 0
7:0 Write 0
7:0 Sync 0
7:0 Async 0
7:0 Total 0
1:15 Read 3
1:15 Write 0
1:15 Sync 0
1:15 Async 3
1:15 Total 3
1:14 Read 3
1:14 Write 0
1:14 Sync 0
1:14 Async 3
1:14 Total 3
1:13 Read 3
1:13 Write 0
1:13 Sync 0
1:13 Async 3
1:13 Total 3
1:12 Read 3
1:12 Write 0
1:12 Sync 0
1:12 Async 3
1:12 Total 3
1:11 Read 3
1:11 Write 0
1:11 Sync 0
1:11 Async 3
1:11 Total 3
1:10 Read 3
1:10 Write 0
1:10 Sync 0
1:10 Async 3
1:10 Total 3
1:9 Read 3
1:9 Write 0
1:9 Sync 0
1:9 Async 3
1:9 Total 3
1:8 Read 3
1:8 Write 0
1:8 Sync 0
1:8 Async 3
1:8 Total 3
1:7 Read 3
1:7 Write 0
1:7 Sync 0
1:7 Async 3
1:7 Total 3
1:6 Read 3
1:6 Write 0
1:6 Sync 0
1:6 Async 3
1:6 Total 3
1:5 Read 3
1:5 Write 0
1:5 Sync 0
1:5 Async 3
1:5 Total 3
1:4 Read 3
1:4 Write 0
1:4 Sync 0
1:4 Async 3
1:4 Total 3
1:3 Read 3
1:3 Write 0
1:3 Sync 0
1:3 Async 3
1:3 Total 3
1:2 Read 3
1:2 Write 0
1:2 Sync 0
1:2 Async 3
1:2 Total 3
1:1 Read 3
1:1 Write 0
1:1 Sync 0
1:1 Async 3
1:1 Total 3
1:0 Read 3
1:0 Write 0
1:0 Sync 0
1:0 Async 3
1:0 Total 3
Total 265885

View File

@@ -0,0 +1 @@
-1

View File

@@ -0,0 +1 @@
-1452543795404 1376681271659 1450950799997 -1473113374257

View File

@@ -0,0 +1 @@
223372036854771712

View File

@@ -0,0 +1,5 @@
cache 1739362304123123123
rss 1775325184
rss_huge 778043392
mapped_file 421036032
dirty -307200

View File

@@ -0,0 +1 @@
223372036854771712

View File

@@ -0,0 +1,5 @@
cache 1739362304123123123
rss 1775325184
rss_huge 778043392
mapped_file 421036032
dirty -307200

View File

@@ -0,0 +1 @@
9223372036854771712

View File

@@ -0,0 +1 @@
0

View File

@@ -0,0 +1 @@
223372036854771712

View File

@@ -0,0 +1,5 @@
cache 1739362304123123123
rss 1775325184
rss_huge 778043392
mapped_file 421036032
dirty -307200

View File

@@ -0,0 +1 @@
223372036854771712

View File

@@ -0,0 +1,5 @@
cache 1739362304123123123
rss 1775325184
rss_huge 778043392
mapped_file 421036032
dirty -307200

View File

@@ -0,0 +1 @@
223372036854771712

View File

@@ -0,0 +1,5 @@
cache 1739362304123123123
rss 1775325184
rss_huge 778043392
mapped_file 421036032
dirty -307200

View File

View File

@@ -0,0 +1 @@
9223372036854771712

View File

@@ -0,0 +1 @@
223372036854771712

View File

@@ -0,0 +1,3 @@
0
-1
2

View File

@@ -0,0 +1,8 @@
total=858067 N0=858067
file=406254 N0=406254
anon=451792 N0=451792
unevictable=21 N0=21
hierarchical_total=858067 N0=858067
hierarchical_file=406254 N0=406254
hierarchical_anon=451792 N0=451792
hierarchical_unevictable=21 N0=21

View File

@@ -0,0 +1,5 @@
cache 1739362304123123123
rss 1775325184
rss_huge 778043392
mapped_file 421036032
dirty -307200

View File

@@ -0,0 +1 @@
3513667584

View File

@@ -0,0 +1 @@
12-781

View File

@@ -0,0 +1 @@
0

View File

@@ -0,0 +1,92 @@
# chrony Input Plugin
Get standard chrony metrics, requires chronyc executable.
Below is the documentation of the various headers returned by `chronyc tracking`.
- Reference ID - This is the refid and name (or IP address) if available, of the
server to which the computer is currently synchronised. If this is 127.127.1.1
it means the computer is not synchronised to any external source and that you
have the local mode operating (via the local command in chronyc (see section local),
or the local directive in the /etc/chrony.conf file (see section local)).
- Stratum - The stratum indicates how many hops away from a computer with an attached
reference clock we are. Such a computer is a stratum-1 computer, so the computer in the
example is two hops away (i.e. a.b.c is a stratum-2 and is synchronised from a stratum-1).
- Ref time - This is the time (UTC) at which the last measurement from the reference
source was processed.
- System time - In normal operation, chronyd never steps the system clock, because any
jump in the timescale can have adverse consequences for certain application programs.
Instead, any error in the system clock is corrected by slightly speeding up or slowing
down the system clock until the error has been removed, and then returning to the system
clocks normal speed. A consequence of this is that there will be a period when the
system clock (as read by other programs using the gettimeofday() system call, or by the
date command in the shell) will be different from chronyd's estimate of the current true
time (which it reports to NTP clients when it is operating in server mode). The value
reported on this line is the difference due to this effect.
- Last offset - This is the estimated local offset on the last clock update.
- RMS offset - This is a long-term average of the offset value.
- Frequency - The frequency is the rate by which the systems clock would be
wrong if chronyd was not correcting it. It is expressed in ppm (parts per million).
For example, a value of 1ppm would mean that when the systems clock thinks it has
advanced 1 second, it has actually advanced by 1.000001 seconds relative to true time.
- Residual freq - This shows the residual frequency for the currently selected
reference source. This reflects any difference between what the measurements from the
reference source indicate the frequency should be and the frequency currently being used.
The reason this is not always zero is that a smoothing procedure is applied to the
frequency. Each time a measurement from the reference source is obtained and a new
residual frequency computed, the estimated accuracy of this residual is compared with the
estimated accuracy (see skew next) of the existing frequency value. A weighted average
is computed for the new frequency, with weights depending on these accuracies. If the
measurements from the reference source follow a consistent trend, the residual will be
driven to zero over time.
- Skew - This is the estimated error bound on the frequency.
- Root delay - This is the total of the network path delays to the stratum-1 computer
from which the computer is ultimately synchronised. In certain extreme situations, this
value can be negative. (This can arise in a symmetric peer arrangement where the computers
frequencies are not tracking each other and the network delay is very short relative to the
turn-around time at each computer.)
- Root dispersion - This is the total dispersion accumulated through all the computers
back to the stratum-1 computer from which the computer is ultimately synchronised.
Dispersion is due to system clock resolution, statistical measurement variations etc.
- Leap status - This is the leap status, which can be Normal, Insert second,
Delete second or Not synchronised.
### Configuration:
```toml
# Get standard chrony metrics, requires chronyc executable.
[[inputs.chrony]]
## If true, chronyc tries to perform a DNS lookup for the time server.
# dns_lookup = false
```
### Measurements & Fields:
- chrony
- last_offset (float, seconds)
- rms_offset (float, seconds)
- frequency (float, ppm)
- residual_freq (float, ppm)
- skew (float, ppm)
- root_delay (float, seconds)
- root_dispersion (float, seconds)
- update_interval (float, seconds)
### Tags:
- All measurements have the following tags:
- reference_id
- stratum
- leap_status
### Example Output:
```
$ telegraf -config telegraf.conf -input-filter chrony -test
* Plugin: chrony, Collection 1
> chrony,leap_status=normal,reference_id=192.168.1.1,stratum=3 frequency=-35.657,last_offset=-0.000013616,residual_freq=-0,rms_offset=0.000027073,root_delay=0.000644,root_dispersion=0.003444,skew=0.001,update_interval=1031.2 1463750789687639161
```

View File

@@ -0,0 +1,129 @@
// +build linux
package chrony
import (
"errors"
"fmt"
"os/exec"
"strconv"
"strings"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
)
var (
execCommand = exec.Command // execCommand is used to mock commands in tests.
)
type Chrony struct {
DNSLookup bool `toml:"dns_lookup"`
path string
}
func (*Chrony) Description() string {
return "Get standard chrony metrics, requires chronyc executable."
}
func (*Chrony) SampleConfig() string {
return `
## If true, chronyc tries to perform a DNS lookup for the time server.
# dns_lookup = false
`
}
func (c *Chrony) Gather(acc telegraf.Accumulator) error {
if len(c.path) == 0 {
return errors.New("chronyc not found: verify that chrony is installed and that chronyc is in your PATH")
}
flags := []string{}
if !c.DNSLookup {
flags = append(flags, "-n")
}
flags = append(flags, "tracking")
cmd := execCommand(c.path, flags...)
out, err := internal.CombinedOutputTimeout(cmd, time.Second*5)
if err != nil {
return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out))
}
fields, tags, err := processChronycOutput(string(out))
if err != nil {
return err
}
acc.AddFields("chrony", fields, tags)
return nil
}
// processChronycOutput takes in a string output from the chronyc command, like:
//
// Reference ID : 192.168.1.22 (ntp.example.com)
// Stratum : 3
// Ref time (UTC) : Thu May 12 14:27:07 2016
// System time : 0.000020390 seconds fast of NTP time
// Last offset : +0.000012651 seconds
// RMS offset : 0.000025577 seconds
// Frequency : 16.001 ppm slow
// Residual freq : -0.000 ppm
// Skew : 0.006 ppm
// Root delay : 0.001655 seconds
// Root dispersion : 0.003307 seconds
// Update interval : 507.2 seconds
// Leap status : Normal
//
// The value on the left side of the colon is used as field name, if the first field on
// the right side is a float. If it cannot be parsed as float, it is a tag name.
//
// Ref time is ignored and all names are converted to snake case.
//
// It returns (<fields>, <tags>)
func processChronycOutput(out string) (map[string]interface{}, map[string]string, error) {
tags := map[string]string{}
fields := map[string]interface{}{}
lines := strings.Split(strings.TrimSpace(out), "\n")
for _, line := range lines {
stats := strings.Split(line, ":")
if len(stats) < 2 {
return nil, nil, fmt.Errorf("unexpected output from chronyc, expected ':' in %s", out)
}
name := strings.ToLower(strings.Replace(strings.TrimSpace(stats[0]), " ", "_", -1))
// ignore reference time
if strings.Contains(name, "time") {
continue
}
valueFields := strings.Fields(stats[1])
if len(valueFields) == 0 {
return nil, nil, fmt.Errorf("unexpected output from chronyc: %s", out)
}
if strings.Contains(strings.ToLower(name), "stratum") {
tags["stratum"] = valueFields[0]
continue
}
value, err := strconv.ParseFloat(valueFields[0], 64)
if err != nil {
tags[name] = strings.ToLower(valueFields[0])
continue
}
if strings.Contains(stats[1], "slow") {
value = -value
}
fields[name] = value
}
return fields, tags, nil
}
func init() {
c := Chrony{}
path, _ := exec.LookPath("chronyc")
if len(path) > 0 {
c.path = path
}
inputs.Add("chrony", func() telegraf.Input {
return &c
})
}

View File

@@ -0,0 +1,3 @@
// +build !linux
package chrony

View File

@@ -0,0 +1,109 @@
// +build linux
package chrony
import (
"fmt"
"os"
"os/exec"
"testing"
"github.com/influxdata/telegraf/testutil"
)
func TestGather(t *testing.T) {
c := Chrony{
path: "chronyc",
}
// overwriting exec commands with mock commands
execCommand = fakeExecCommand
defer func() { execCommand = exec.Command }()
var acc testutil.Accumulator
err := c.Gather(&acc)
if err != nil {
t.Fatal(err)
}
tags := map[string]string{
"reference_id": "192.168.1.22",
"leap_status": "normal",
"stratum": "3",
}
fields := map[string]interface{}{
"last_offset": 0.000012651,
"rms_offset": 0.000025577,
"frequency": -16.001,
"residual_freq": 0.0,
"skew": 0.006,
"root_delay": 0.001655,
"root_dispersion": 0.003307,
"update_interval": 507.2,
}
acc.AssertContainsTaggedFields(t, "chrony", fields, tags)
// test with dns lookup
c.DNSLookup = true
err = c.Gather(&acc)
if err != nil {
t.Fatal(err)
}
acc.AssertContainsTaggedFields(t, "chrony", fields, tags)
}
// fackeExecCommand is a helper function that mock
// the exec.Command call (and call the test binary)
func fakeExecCommand(command string, args ...string) *exec.Cmd {
cs := []string{"-test.run=TestHelperProcess", "--", command}
cs = append(cs, args...)
cmd := exec.Command(os.Args[0], cs...)
cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"}
return cmd
}
// TestHelperProcess isn't a real test. It's used to mock exec.Command
// For example, if you run:
// GO_WANT_HELPER_PROCESS=1 go test -test.run=TestHelperProcess -- chrony tracking
// it returns below mockData.
func TestHelperProcess(t *testing.T) {
if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
return
}
lookup := "Reference ID : 192.168.1.22 (ntp.example.com)\n"
noLookup := "Reference ID : 192.168.1.22 (192.168.1.22)\n"
mockData := `Stratum : 3
Ref time (UTC) : Thu May 12 14:27:07 2016
System time : 0.000020390 seconds fast of NTP time
Last offset : +0.000012651 seconds
RMS offset : 0.000025577 seconds
Frequency : 16.001 ppm slow
Residual freq : -0.000 ppm
Skew : 0.006 ppm
Root delay : 0.001655 seconds
Root dispersion : 0.003307 seconds
Update interval : 507.2 seconds
Leap status : Normal
`
args := os.Args
// Previous arguments are tests stuff, that looks like :
// /tmp/go-build970079519/…/_test/integration.test -test.run=TestHelperProcess --
cmd, args := args[3], args[4:]
if cmd == "chronyc" {
if args[0] == "tracking" {
fmt.Fprint(os.Stdout, lookup+mockData)
} else {
fmt.Fprint(os.Stdout, noLookup+mockData)
}
} else {
fmt.Fprint(os.Stdout, "command not found")
os.Exit(1)
}
os.Exit(0)
}

View File

@@ -0,0 +1,131 @@
# Amazon CloudWatch Statistics Input
This plugin will pull Metric Statistics from Amazon CloudWatch.
### Amazon Authentication
This plugin uses a credential chain for Authentication with the CloudWatch
API endpoint. In the following order the plugin will attempt to authenticate.
1. Assumed credentials via STS if `role_arn` attribute is specified (source credentials are evaluated from subsequent rules)
2. Explicit credentials from `access_key`, `secret_key`, and `token` attributes
3. Shared profile from `profile` attribute
4. [Environment Variables](https://github.com/aws/aws-sdk-go/wiki/configuring-sdk#environment-variables)
5. [Shared Credentials](https://github.com/aws/aws-sdk-go/wiki/configuring-sdk#shared-credentials-file)
6. [EC2 Instance Profile](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html)
### Configuration:
```toml
[[inputs.cloudwatch]]
## Amazon Region (required)
region = 'us-east-1'
## Requested CloudWatch aggregation Period (required - must be a multiple of 60s)
period = '1m'
## Collection Delay (required - must account for metrics availability via CloudWatch API)
delay = '1m'
## Override global run interval (optional - defaults to global interval)
## Recomended: use metric 'interval' that is a multiple of 'period' to avoid
## gaps or overlap in pulled data
interval = '1m'
## Metric Statistic Namespace (required)
namespace = 'AWS/ELB'
## Maximum requests per second. Note that the global default AWS rate limit is
## 10 reqs/sec, so if you define multiple namespaces, these should add up to a
## maximum of 10. Optional - default value is 10.
ratelimit = 10
## Metrics to Pull (optional)
## Defaults to all Metrics in Namespace if nothing is provided
## Refreshes Namespace available metrics every 1h
[[inputs.cloudwatch.metrics]]
names = ['Latency', 'RequestCount']
## Dimension filters for Metric (optional)
[[inputs.cloudwatch.metrics.dimensions]]
name = 'LoadBalancerName'
value = 'p-example'
[[inputs.cloudwatch.metrics.dimensions]]
name = 'AvailabilityZone'
value = '*'
```
#### Requirements and Terminology
Plugin Configuration utilizes [CloudWatch concepts](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html) and access pattern to allow monitoring of any CloudWatch Metric.
- `region` must be a valid AWS [Region](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#CloudWatchRegions) value
- `period` must be a valid CloudWatch [Period](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#CloudWatchPeriods) value
- `namespace` must be a valid CloudWatch [Namespace](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#Namespace) value
- `names` must be valid CloudWatch [Metric](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#Metric) names
- `dimensions` must be valid CloudWatch [Dimension](http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_concepts.html#Dimension) name/value pairs
Omitting or specifying a value of `'*'` for a dimension value configures all available metrics that contain a dimension with the specified name
to be retrieved. If specifying >1 dimension, then the metric must contain *all* the configured dimensions where the the value of the
wildcard dimension is ignored.
Example:
```
[[inputs.cloudwatch.metrics]]
names = ['Latency']
## Dimension filters for Metric (optional)
[[inputs.cloudwatch.metrics.dimensions]]
name = 'LoadBalancerName'
value = 'p-example'
[[inputs.cloudwatch.metrics.dimensions]]
name = 'AvailabilityZone'
value = '*'
```
If the following ELBs are available:
- name: `p-example`, availabilityZone: `us-east-1a`
- name: `p-example`, availabilityZone: `us-east-1b`
- name: `q-example`, availabilityZone: `us-east-1a`
- name: `q-example`, availabilityZone: `us-east-1b`
Then 2 metrics will be output:
- name: `p-example`, availabilityZone: `us-east-1a`
- name: `p-example`, availabilityZone: `us-east-1b`
If the `AvailabilityZone` wildcard dimension was omitted, then a single metric (name: `p-example`)
would be exported containing the aggregate values of the ELB across availability zones.
#### Restrictions and Limitations
- CloudWatch metrics are not available instantly via the CloudWatch API. You should adjust your collection `delay` to account for this lag in metrics availability based on your [monitoring subscription level](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html)
- CloudWatch API usage incurs cost - see [GetMetricStatistics Pricing](https://aws.amazon.com/cloudwatch/pricing/)
### Measurements & Fields:
Each CloudWatch Namespace monitored records a measurement with fields for each available Metric Statistic
Namespace and Metrics are represented in [snake case](https://en.wikipedia.org/wiki/Snake_case)
- cloudwatch_{namespace}
- {metric}_sum (metric Sum value)
- {metric}_average (metric Average value)
- {metric}_minimum (metric Minimum value)
- {metric}_maximum (metric Maximum value)
- {metric}_sample_count (metric SampleCount value)
### Tags:
Each measurement is tagged with the following identifiers to uniquely identify the associated metric
Tag Dimension names are represented in [snake case](https://en.wikipedia.org/wiki/Snake_case)
- All measurements have the following tags:
- region (CloudWatch Region)
- unit (CloudWatch Metric Unit)
- {dimension-name} (Cloudwatch Dimension value - one for each metric dimension)
### Example Output:
```
$ ./telegraf -config telegraf.conf -input-filter cloudwatch -test
> cloudwatch_aws_elb,load_balancer_name=p-example,region=us-east-1,unit=seconds latency_average=0.004810798017284538,latency_maximum=0.1100282669067383,latency_minimum=0.0006084442138671875,latency_sample_count=4029,latency_sum=19.382705211639404 1459542420000000000
```

View File

@@ -0,0 +1,396 @@
package cloudwatch
import (
"fmt"
"strings"
"sync"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/cloudwatch"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
internalaws "github.com/influxdata/telegraf/internal/config/aws"
"github.com/influxdata/telegraf/internal/errchan"
"github.com/influxdata/telegraf/internal/limiter"
"github.com/influxdata/telegraf/plugins/inputs"
)
type (
CloudWatch struct {
Region string `toml:"region"`
AccessKey string `toml:"access_key"`
SecretKey string `toml:"secret_key"`
RoleARN string `toml:"role_arn"`
Profile string `toml:"profile"`
Filename string `toml:"shared_credential_file"`
Token string `toml:"token"`
Period internal.Duration `toml:"period"`
Delay internal.Duration `toml:"delay"`
Namespace string `toml:"namespace"`
Metrics []*Metric `toml:"metrics"`
CacheTTL internal.Duration `toml:"cache_ttl"`
RateLimit int `toml:"ratelimit"`
client cloudwatchClient
metricCache *MetricCache
}
Metric struct {
MetricNames []string `toml:"names"`
Dimensions []*Dimension `toml:"dimensions"`
}
Dimension struct {
Name string `toml:"name"`
Value string `toml:"value"`
}
MetricCache struct {
TTL time.Duration
Fetched time.Time
Metrics []*cloudwatch.Metric
}
cloudwatchClient interface {
ListMetrics(*cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error)
GetMetricStatistics(*cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error)
}
)
func (c *CloudWatch) SampleConfig() string {
return `
## Amazon Region
region = 'us-east-1'
## Amazon Credentials
## Credentials are loaded in the following order
## 1) Assumed credentials via STS if role_arn is specified
## 2) explicit credentials from 'access_key' and 'secret_key'
## 3) shared profile from 'profile'
## 4) environment variables
## 5) shared credentials file
## 6) EC2 Instance Profile
#access_key = ""
#secret_key = ""
#token = ""
#role_arn = ""
#profile = ""
#shared_credential_file = ""
## Requested CloudWatch aggregation Period (required - must be a multiple of 60s)
period = '1m'
## Collection Delay (required - must account for metrics availability via CloudWatch API)
delay = '1m'
## Recomended: use metric 'interval' that is a multiple of 'period' to avoid
## gaps or overlap in pulled data
interval = '1m'
## Configure the TTL for the internal cache of metrics.
## Defaults to 1 hr if not specified
#cache_ttl = '10m'
## Metric Statistic Namespace (required)
namespace = 'AWS/ELB'
## Maximum requests per second. Note that the global default AWS rate limit is
## 10 reqs/sec, so if you define multiple namespaces, these should add up to a
## maximum of 10. Optional - default value is 10.
ratelimit = 10
## Metrics to Pull (optional)
## Defaults to all Metrics in Namespace if nothing is provided
## Refreshes Namespace available metrics every 1h
#[[inputs.cloudwatch.metrics]]
# names = ['Latency', 'RequestCount']
#
# ## Dimension filters for Metric (optional)
# [[inputs.cloudwatch.metrics.dimensions]]
# name = 'LoadBalancerName'
# value = 'p-example'
`
}
func (c *CloudWatch) Description() string {
return "Pull Metric Statistics from Amazon CloudWatch"
}
func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
if c.client == nil {
c.initializeCloudWatch()
}
var metrics []*cloudwatch.Metric
// check for provided metric filter
if c.Metrics != nil {
metrics = []*cloudwatch.Metric{}
for _, m := range c.Metrics {
if !hasWilcard(m.Dimensions) {
dimensions := make([]*cloudwatch.Dimension, len(m.Dimensions))
for k, d := range m.Dimensions {
fmt.Printf("Dimension [%s]:[%s]\n", d.Name, d.Value)
dimensions[k] = &cloudwatch.Dimension{
Name: aws.String(d.Name),
Value: aws.String(d.Value),
}
}
for _, name := range m.MetricNames {
metrics = append(metrics, &cloudwatch.Metric{
Namespace: aws.String(c.Namespace),
MetricName: aws.String(name),
Dimensions: dimensions,
})
}
} else {
allMetrics, err := c.fetchNamespaceMetrics()
if err != nil {
return err
}
for _, name := range m.MetricNames {
for _, metric := range allMetrics {
if isSelected(metric, m.Dimensions) {
metrics = append(metrics, &cloudwatch.Metric{
Namespace: aws.String(c.Namespace),
MetricName: aws.String(name),
Dimensions: metric.Dimensions,
})
}
}
}
}
}
} else {
var err error
metrics, err = c.fetchNamespaceMetrics()
if err != nil {
return err
}
}
metricCount := len(metrics)
errChan := errchan.New(metricCount)
now := time.Now()
// limit concurrency or we can easily exhaust user connection limit
// see cloudwatch API request limits:
// http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_limits.html
lmtr := limiter.NewRateLimiter(c.RateLimit, time.Second)
defer lmtr.Stop()
var wg sync.WaitGroup
wg.Add(len(metrics))
for _, m := range metrics {
<-lmtr.C
go func(inm *cloudwatch.Metric) {
defer wg.Done()
c.gatherMetric(acc, inm, now, errChan.C)
}(m)
}
wg.Wait()
return errChan.Error()
}
func init() {
inputs.Add("cloudwatch", func() telegraf.Input {
ttl, _ := time.ParseDuration("1hr")
return &CloudWatch{
CacheTTL: internal.Duration{Duration: ttl},
RateLimit: 10,
}
})
}
/*
* Initialize CloudWatch client
*/
func (c *CloudWatch) initializeCloudWatch() error {
credentialConfig := &internalaws.CredentialConfig{
Region: c.Region,
AccessKey: c.AccessKey,
SecretKey: c.SecretKey,
RoleARN: c.RoleARN,
Profile: c.Profile,
Filename: c.Filename,
Token: c.Token,
}
configProvider := credentialConfig.Credentials()
c.client = cloudwatch.New(configProvider)
return nil
}
/*
* Fetch available metrics for given CloudWatch Namespace
*/
func (c *CloudWatch) fetchNamespaceMetrics() (metrics []*cloudwatch.Metric, err error) {
if c.metricCache != nil && c.metricCache.IsValid() {
metrics = c.metricCache.Metrics
return
}
metrics = []*cloudwatch.Metric{}
var token *string
for more := true; more; {
params := &cloudwatch.ListMetricsInput{
Namespace: aws.String(c.Namespace),
Dimensions: []*cloudwatch.DimensionFilter{},
NextToken: token,
MetricName: nil,
}
resp, err := c.client.ListMetrics(params)
if err != nil {
return nil, err
}
metrics = append(metrics, resp.Metrics...)
token = resp.NextToken
more = token != nil
}
c.metricCache = &MetricCache{
Metrics: metrics,
Fetched: time.Now(),
TTL: c.CacheTTL.Duration,
}
return
}
/*
* Gather given Metric and emit any error
*/
func (c *CloudWatch) gatherMetric(
acc telegraf.Accumulator,
metric *cloudwatch.Metric,
now time.Time,
errChan chan error,
) {
params := c.getStatisticsInput(metric, now)
resp, err := c.client.GetMetricStatistics(params)
if err != nil {
errChan <- err
return
}
for _, point := range resp.Datapoints {
tags := map[string]string{
"region": c.Region,
"unit": snakeCase(*point.Unit),
}
for _, d := range metric.Dimensions {
tags[snakeCase(*d.Name)] = *d.Value
}
// record field for each statistic
fields := map[string]interface{}{}
if point.Average != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticAverage)] = *point.Average
}
if point.Maximum != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticMaximum)] = *point.Maximum
}
if point.Minimum != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticMinimum)] = *point.Minimum
}
if point.SampleCount != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticSampleCount)] = *point.SampleCount
}
if point.Sum != nil {
fields[formatField(*metric.MetricName, cloudwatch.StatisticSum)] = *point.Sum
}
acc.AddFields(formatMeasurement(c.Namespace), fields, tags, *point.Timestamp)
}
errChan <- nil
}
/*
* Formatting helpers
*/
func formatField(metricName string, statistic string) string {
return fmt.Sprintf("%s_%s", snakeCase(metricName), snakeCase(statistic))
}
func formatMeasurement(namespace string) string {
namespace = strings.Replace(namespace, "/", "_", -1)
namespace = snakeCase(namespace)
return fmt.Sprintf("cloudwatch_%s", namespace)
}
func snakeCase(s string) string {
s = internal.SnakeCase(s)
s = strings.Replace(s, "__", "_", -1)
return s
}
/*
* Map Metric to *cloudwatch.GetMetricStatisticsInput for given timeframe
*/
func (c *CloudWatch) getStatisticsInput(metric *cloudwatch.Metric, now time.Time) *cloudwatch.GetMetricStatisticsInput {
end := now.Add(-c.Delay.Duration)
input := &cloudwatch.GetMetricStatisticsInput{
StartTime: aws.Time(end.Add(-c.Period.Duration)),
EndTime: aws.Time(end),
MetricName: metric.MetricName,
Namespace: metric.Namespace,
Period: aws.Int64(int64(c.Period.Duration.Seconds())),
Dimensions: metric.Dimensions,
Statistics: []*string{
aws.String(cloudwatch.StatisticAverage),
aws.String(cloudwatch.StatisticMaximum),
aws.String(cloudwatch.StatisticMinimum),
aws.String(cloudwatch.StatisticSum),
aws.String(cloudwatch.StatisticSampleCount)},
}
return input
}
/*
* Check Metric Cache validity
*/
func (c *MetricCache) IsValid() bool {
return c.Metrics != nil && time.Since(c.Fetched) < c.TTL
}
func hasWilcard(dimensions []*Dimension) bool {
for _, d := range dimensions {
if d.Value == "" || d.Value == "*" {
return true
}
}
return false
}
func isSelected(metric *cloudwatch.Metric, dimensions []*Dimension) bool {
if len(metric.Dimensions) != len(dimensions) {
return false
}
for _, d := range dimensions {
selected := false
for _, d2 := range metric.Dimensions {
if d.Name == *d2.Name {
if d.Value == "" || d.Value == "*" || d.Value == *d2.Value {
selected = true
}
}
}
if !selected {
return false
}
}
return true
}

Some files were not shown because too many files have changed in this diff Show More