From 3af65e7abb68ef4e6785ed36fbae5c412f25b64c Mon Sep 17 00:00:00 2001 From: Ranjib Dey Date: Fri, 27 May 2016 09:44:41 -0700 Subject: [PATCH 01/34] Fix typo in output plugin example (#1290) --- CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3997a448e..6792abaa4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -212,8 +212,8 @@ func (s *Simple) Close() error { } func (s *Simple) Write(metrics []telegraf.Metric) error { - for _, pt := range points { - // write `pt` to the output sink here + for _, metric := range metrics { + // write `metric` to the output sink here } return nil } From 0fb2d2ffaecd18afae30c91365b340a6f23e98cb Mon Sep 17 00:00:00 2001 From: Robin Percy Date: Tue, 29 Mar 2016 21:39:50 -0700 Subject: [PATCH 02/34] Adding a conntrack input plugin - Collects conntrack stats from the configured directories and files. Applying PR feedback: - Rebased onto master - Updated README/CHANGELOG - Limited lines to 80 chars - Improved plugin docs and README - added a dummy notlinux build file Fixed up CHANGELOG and README after rebase closes #1164 --- CHANGELOG.md | 1 + README.md | 1 + etc/telegraf.conf | 10 ++ plugins/inputs/all/all.go | 1 + plugins/inputs/conntrack/README.md | 56 +++++++++ plugins/inputs/conntrack/conntrack.go | 119 ++++++++++++++++++ .../inputs/conntrack/conntrack_notlinux.go | 3 + plugins/inputs/conntrack/conntrack_test.go | 90 +++++++++++++ 8 files changed, 281 insertions(+) create mode 100644 plugins/inputs/conntrack/README.md create mode 100644 plugins/inputs/conntrack/conntrack.go create mode 100644 plugins/inputs/conntrack/conntrack_notlinux.go create mode 100644 plugins/inputs/conntrack/conntrack_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index faa36cf38..0f66771af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ time before a new metric is included by the plugin. ### Features +- [#1164](https://github.com/influxdata/telegraf/pull/1164): conntrack input plugin. Thanks @robinpercy! - [#1247](https://github.com/influxdata/telegraf/pull/1247): rollbar input plugin. Thanks @francois2metz and @cduez! - [#1208](https://github.com/influxdata/telegraf/pull/1208): Standardized AWS credentials evaluation & wildcard CloudWatch dimensions. Thanks @johnrengelman! - [#1264](https://github.com/influxdata/telegraf/pull/1264): Add SSL config options to http_response plugin. diff --git a/README.md b/README.md index 1a6a04382..5adcdb39d 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,7 @@ Currently implemented sources: * [cassandra](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/cassandra) * [ceph](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ceph) * [chrony](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/chrony) +* [conntrack](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/conntrack) * [couchbase](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchbase) * [couchdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchdb) * [disque](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/disque) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 4081cf484..f0d8a3361 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -1575,3 +1575,13 @@ # ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md # data_format = "influx" +# # Collects conntrack stats from the configured directories and files. +# [[inputs.conntrack]] +# ## The following defaults would work with multiple versions of contrack. Note the nf_ and ip_ +# ## filename prefixes are mutually exclusive across conntrack versions, as are the directory locations. +# +# ## Superset of filenames to look for within the conntrack dirs. Missing files will be ignored. +# files = ["ip_conntrack_count","ip_conntrack_max","nf_conntrack_count","nf_conntrack_max"] +# +# ## Directories to search within for the conntrack files above. Missing directrories will be ignored. +# dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index c2322c436..8c12e0858 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -8,6 +8,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/ceph" _ "github.com/influxdata/telegraf/plugins/inputs/chrony" _ "github.com/influxdata/telegraf/plugins/inputs/cloudwatch" + _ "github.com/influxdata/telegraf/plugins/inputs/conntrack" _ "github.com/influxdata/telegraf/plugins/inputs/couchbase" _ "github.com/influxdata/telegraf/plugins/inputs/couchdb" _ "github.com/influxdata/telegraf/plugins/inputs/disque" diff --git a/plugins/inputs/conntrack/README.md b/plugins/inputs/conntrack/README.md new file mode 100644 index 000000000..dff20337e --- /dev/null +++ b/plugins/inputs/conntrack/README.md @@ -0,0 +1,56 @@ +# Conntrack Plugin + +Collects stats from Netfilter's conntrack-tools. + +The conntrack-tools provide a mechanism for tracking various aspects of +network connections as they are processed by netfilter. At runtime, +conntrack exposes many of those connection statistics within /proc/sys/net. +Depending on your kernel version, these files can be found in either +/proc/sys/net/ipv4/netfilter or /proc/sys/net/netfilter and will be +prefixed with either ip_ or nf_. This plugin reads the files specified +in its configuration and publishes each one as a field, with the prefix +normalized to ip_. + +In order to simplify configuration in a heterogeneous environment, a superset +of directory and filenames can be specified. Any locations that don't exist +will be ignored. + +For more information on conntrack-tools, see the +[Netfilter Documentation](http://conntrack-tools.netfilter.org/). + + +### Configuration: + +```toml + # Collects conntrack stats from the configured directories and files. + [[inputs.conntrack]] + ## The following defaults would work with multiple versions of conntrack. + ## Note the nf_ and ip_ filename prefixes are mutually exclusive across + ## kernel versions, as are the directory locations. + + ## Superset of filenames to look for within the conntrack dirs. + ## Missing files will be ignored. + files = ["ip_conntrack_count","ip_conntrack_max", + "nf_conntrack_count","nf_conntrack_max"] + + ## Directories to search within for the conntrack files above. + ## Missing directrories will be ignored. + dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] +``` + +### Measurements & Fields: + +- conntrack + - ip_conntrack_count (int, count): the number of entries in the conntrack table + - ip_conntrack_max (int, size): the max capacity of the conntrack table + +### Tags: + +This input does not use tags. + +### Example Output: + +``` +$ ./telegraf -config telegraf.conf -input-filter conntrack -test +conntrack,host=myhost ip_conntrack_count=2,ip_conntrack_max=262144 1461620427667995735 +``` diff --git a/plugins/inputs/conntrack/conntrack.go b/plugins/inputs/conntrack/conntrack.go new file mode 100644 index 000000000..68bf8adba --- /dev/null +++ b/plugins/inputs/conntrack/conntrack.go @@ -0,0 +1,119 @@ +// +build linux + +package conntrack + +import ( + "fmt" + "io/ioutil" + "os" + "strconv" + "strings" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" + "log" + "path/filepath" +) + +type Conntrack struct { + Path string + Dirs []string + Files []string +} + +const ( + inputName = "conntrack" +) + +var dfltDirs = []string{ + "/proc/sys/net/ipv4/netfilter", + "/proc/sys/net/netfilter", +} + +var dfltFiles = []string{ + "ip_conntrack_count", + "ip_conntrack_max", + "nf_conntrack_count", + "nf_conntrack_max", +} + +func (c *Conntrack) setDefaults() { + if len(c.Dirs) == 0 { + c.Dirs = dfltDirs + } + + if len(c.Files) == 0 { + c.Files = dfltFiles + } +} + +func (c *Conntrack) Description() string { + return "Collects conntrack stats from the configured directories and files." +} + +var sampleConfig = ` + ## The following defaults would work with multiple versions of conntrack. + ## Note the nf_ and ip_ filename prefixes are mutually exclusive across + ## kernel versions, as are the directory locations. + + ## Superset of filenames to look for within the conntrack dirs. + ## Missing files will be ignored. + files = ["ip_conntrack_count","ip_conntrack_max", + "nf_conntrack_count","nf_conntrack_max"] + + ## Directories to search within for the conntrack files above. + ## Missing directrories will be ignored. + dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] +` + +func (c *Conntrack) SampleConfig() string { + return sampleConfig +} + +func (c *Conntrack) Gather(acc telegraf.Accumulator) error { + c.setDefaults() + + var metricKey string + fields := make(map[string]interface{}) + + for _, dir := range c.Dirs { + for _, file := range c.Files { + // NOTE: no system will have both nf_ and ip_ prefixes, + // so we're safe to branch on suffix only. + parts := strings.SplitN(file, "_", 2) + if len(parts) < 2 { + continue + } + metricKey = "ip_" + parts[1] + + fName := filepath.Join(dir, file) + if _, err := os.Stat(fName); err != nil { + continue + } + + contents, err := ioutil.ReadFile(fName) + if err != nil { + log.Printf("failed to read file '%s': %v", fName, err) + } + + v := strings.TrimSpace(string(contents)) + fields[metricKey], err = strconv.ParseFloat(v, 64) + if err != nil { + log.Printf("failed to parse metric, expected number but "+ + " found '%s': %v", v, err) + } + } + } + + if len(fields) == 0 { + return fmt.Errorf("Conntrack input failed to collect metrics. " + + "Is the conntrack kernel module loaded?") + } + + acc.AddFields(inputName, fields, nil) + return nil +} + +func init() { + inputs.Add(inputName, func() telegraf.Input { return &Conntrack{} }) +} diff --git a/plugins/inputs/conntrack/conntrack_notlinux.go b/plugins/inputs/conntrack/conntrack_notlinux.go new file mode 100644 index 000000000..11948731b --- /dev/null +++ b/plugins/inputs/conntrack/conntrack_notlinux.go @@ -0,0 +1,3 @@ +// +build !linux + +package conntrack diff --git a/plugins/inputs/conntrack/conntrack_test.go b/plugins/inputs/conntrack/conntrack_test.go new file mode 100644 index 000000000..c457006ac --- /dev/null +++ b/plugins/inputs/conntrack/conntrack_test.go @@ -0,0 +1,90 @@ +// +build linux + +package conntrack + +import ( + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" + "io/ioutil" + "os" + "path" + "strconv" + "strings" + "testing" +) + +func restoreDflts(savedFiles, savedDirs []string) { + dfltFiles = savedFiles + dfltDirs = savedDirs +} + +func TestNoFilesFound(t *testing.T) { + defer restoreDflts(dfltFiles, dfltDirs) + + dfltFiles = []string{"baz.txt"} + dfltDirs = []string{"./foo/bar"} + c := &Conntrack{} + acc := &testutil.Accumulator{} + err := c.Gather(acc) + + assert.EqualError(t, err, "Conntrack input failed to collect metrics. "+ + "Is the conntrack kernel module loaded?") +} + +func TestDefaultsUsed(t *testing.T) { + defer restoreDflts(dfltFiles, dfltDirs) + tmpdir, err := ioutil.TempDir("", "tmp1") + assert.NoError(t, err) + defer os.Remove(tmpdir) + + tmpFile, err := ioutil.TempFile(tmpdir, "ip_conntrack_count") + assert.NoError(t, err) + + dfltDirs = []string{tmpdir} + fname := path.Base(tmpFile.Name()) + dfltFiles = []string{fname} + + count := 1234321 + ioutil.WriteFile(tmpFile.Name(), []byte(strconv.Itoa(count)), 0660) + c := &Conntrack{} + acc := &testutil.Accumulator{} + + c.Gather(acc) + acc.AssertContainsFields(t, inputName, map[string]interface{}{ + fname: float64(count)}) +} + +func TestConfigsUsed(t *testing.T) { + defer restoreDflts(dfltFiles, dfltDirs) + tmpdir, err := ioutil.TempDir("", "tmp1") + assert.NoError(t, err) + defer os.Remove(tmpdir) + + cntFile, err := ioutil.TempFile(tmpdir, "nf_conntrack_count") + maxFile, err := ioutil.TempFile(tmpdir, "nf_conntrack_max") + assert.NoError(t, err) + + dfltDirs = []string{tmpdir} + cntFname := path.Base(cntFile.Name()) + maxFname := path.Base(maxFile.Name()) + dfltFiles = []string{cntFname, maxFname} + + count := 1234321 + max := 9999999 + ioutil.WriteFile(cntFile.Name(), []byte(strconv.Itoa(count)), 0660) + ioutil.WriteFile(maxFile.Name(), []byte(strconv.Itoa(max)), 0660) + c := &Conntrack{} + acc := &testutil.Accumulator{} + + c.Gather(acc) + + fix := func(s string) string { + return strings.Replace(s, "nf_", "ip_", 1) + } + + acc.AssertContainsFields(t, inputName, + map[string]interface{}{ + fix(cntFname): float64(count), + fix(maxFname): float64(max), + }) +} From ed2d1d9bb7f46d82a108de063855d0e8e534ab17 Mon Sep 17 00:00:00 2001 From: Jan Shim Date: Wed, 6 Apr 2016 11:35:33 -0700 Subject: [PATCH 03/34] Add kernel_vmstat input plugins --- etc/telegraf.conf | 3 + plugins/inputs/system/KERNEL_VMSTAT_README.md | 226 +++++++++++++ plugins/inputs/system/kernel_vmstat.go | 173 ++++++++++ plugins/inputs/system/kernel_vmstat_test.go | 315 ++++++++++++++++++ 4 files changed, 717 insertions(+) create mode 100644 plugins/inputs/system/KERNEL_VMSTAT_README.md create mode 100644 plugins/inputs/system/kernel_vmstat.go create mode 100644 plugins/inputs/system/kernel_vmstat_test.go diff --git a/etc/telegraf.conf b/etc/telegraf.conf index f0d8a3361..302bb3be5 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -422,6 +422,9 @@ [[inputs.kernel]] # no configuration +# # Get kernel statistics from /proc/vmstat +# [[inputs.kernel_vmstat]] +# # no configuration # Read metrics about memory usage [[inputs.mem]] diff --git a/plugins/inputs/system/KERNEL_VMSTAT_README.md b/plugins/inputs/system/KERNEL_VMSTAT_README.md new file mode 100644 index 000000000..c16f23aab --- /dev/null +++ b/plugins/inputs/system/KERNEL_VMSTAT_README.md @@ -0,0 +1,226 @@ +# Kernel VMStat Input Plugin + +This plugin is only available on Linux. + +The kernel_vmstat plugin gathers info about the kernel that doesn't fit into other +plugins. In general, it is the statistics available in `/proc/vmstat` that are +not covered by other plugins. + +The metrics are documented in `man proc` under the `/proc/vmstat` section. + +``` +/proc/vmstat +kernel/system statistics. Common entries include (from http://www.linuxinsight.com/proc_vmstat.html): + +Number of pages that are dirty, under writeback or unstable: + +nr_dirty 1550 +nr_writeback 0 +nr_unstable 0 + +Number of pages allocated to page tables, mapped by files or allocated by the kernel slab allocator: + +nr_page_table_pages 699 +nr_mapped 139596 +nr_slab 42723 + +Number of pageins and pageouts (since the last boot): + +pgpgin 33754195 +pgpgout 38985992 + +Number of swapins and swapouts (since the last boot): + +pswpin 2473 +pswpout 2995 + +Number of page allocations per zone (since the last boot): + +pgalloc_high 0 +pgalloc_normal 110123213 +pgalloc_dma32 0 +pgalloc_dma 415219 + +Number of page frees, activations and deactivations (since the last boot): + +pgfree 110549163 +pgactivate 4509729 +pgdeactivate 2136215 + +Number of minor and major page faults (since the last boot): + +pgfault 80663722 +pgmajfault 49813 + +Number of page refills (per zone, since the last boot): + +pgrefill_high 0 +pgrefill_normal 5817500 +pgrefill_dma32 0 +pgrefill_dma 149176 + +Number of page steals (per zone, since the last boot): + +pgsteal_high 0 +pgsteal_normal 10421346 +pgsteal_dma32 0 +pgsteal_dma 142196 + +Number of pages scanned by the kswapd daemon (per zone, since the last boot): + +pgscan_kswapd_high 0 +pgscan_kswapd_normal 10491424 +pgscan_kswapd_dma32 0 +pgscan_kswapd_dma 156130 + +Number of pages reclaimed directly (per zone, since the last boot): + +pgscan_direct_high 0 +pgscan_direct_normal 11904 +pgscan_direct_dma32 0 +pgscan_direct_dma 225 + +Number of pages reclaimed via inode freeing (since the last boot): + +pginodesteal 11 + +Number of slab objects scanned (since the last boot): + +slabs_scanned 8926976 + +Number of pages reclaimed by kswapd (since the last boot): + +kswapd_steal 10551674 + +Number of pages reclaimed by kswapd via inode freeing (since the last boot): + +kswapd_inodesteal 338730 + +Number of kswapd's calls to page reclaim (since the last boot): + +pageoutrun 181908 + +Number of direct reclaim calls (since the last boot): + +allocstall 160 + +Miscellaneous statistics: + +pgrotated 3781 +nr_bounce 0 +``` + +### Configuration: + +```toml +# Get kernel statistics from /proc/vmstat +[[inputs.kernel_vmstat]] + # no configuration +``` + +### Measurements & Fields: + +- kernel_vmstat + - nr_free_pages (integer, `nr_free_pages`) + - nr_inactive_anon (integer, `nr_inactive_anon`) + - nr_active_anon (integer, `nr_active_anon`) + - nr_inactive_file (integer, `nr_inactive_file`) + - nr_active_file (integer, `nr_active_file`) + - nr_unevictable (integer, `nr_unevictable`) + - nr_mlock (integer, `nr_mlock`) + - nr_anon_pages (integer, `nr_anon_pages`) + - nr_mapped (integer, `nr_mapped`) + - nr_file_pages (integer, `nr_file_pages`) + - nr_dirty (integer, `nr_dirty`) + - nr_writeback (integer, `nr_writeback`) + - nr_slab_reclaimable (integer, `nr_slab_reclaimable`) + - nr_slab_unreclaimable (integer, `nr_slab_unreclaimable`) + - nr_page_table_pages (integer, `nr_page_table_pages`) + - nr_kernel_stack (integer, `nr_kernel_stack`) + - nr_unstable (integer, `nr_unstable`) + - nr_bounce (integer, `nr_bounce`) + - nr_vmscan_write (integer, `nr_vmscan_write`) + - nr_writeback_temp (integer, `nr_writeback_temp`) + - nr_isolated_anon (integer, `nr_isolated_anon`) + - nr_isolated_file (integer, `nr_isolated_file`) + - nr_shmem (integer, `nr_shmem`) + - numa_hit (integer, `numa_hit`) + - numa_miss (integer, `numa_miss`) + - numa_foreign (integer, `numa_foreign`) + - numa_interleave (integer, `numa_interleave`) + - numa_local (integer, `numa_local`) + - numa_other (integer, `numa_other`) + - nr_anon_transparent_hugepages (integer, `nr_anon_transparent_hugepages`) + - pgpgin (integer, `pgpgin`) + - pgpgout (integer, `pgpgout`) + - pswpin (integer, `pswpin`) + - pswpout (integer, `pswpout`) + - pgalloc_dma (integer, `pgalloc_dma`) + - pgalloc_dma32 (integer, `pgalloc_dma32`) + - pgalloc_normal (integer, `pgalloc_normal`) + - pgalloc_movable (integer, `pgalloc_movable`) + - pgfree (integer, `pgfree`) + - pgactivate (integer, `pgactivate`) + - pgdeactivate (integer, `pgdeactivate`) + - pgfault (integer, `pgfault`) + - pgmajfault (integer, `pgmajfault`) + - pgrefill_dma (integer, `pgrefill_dma`) + - pgrefill_dma32 (integer, `pgrefill_dma32`) + - pgrefill_normal (integer, `pgrefill_normal`) + - pgrefill_movable (integer, `pgrefill_movable`) + - pgsteal_dma (integer, `pgsteal_dma`) + - pgsteal_dma32 (integer, `pgsteal_dma32`) + - pgsteal_normal (integer, `pgsteal_normal`) + - pgsteal_movable (integer, `pgsteal_movable`) + - pgscan_kswapd_dma (integer, `pgscan_kswapd_dma`) + - pgscan_kswapd_dma32 (integer, `pgscan_kswapd_dma32`) + - pgscan_kswapd_normal (integer, `pgscan_kswapd_normal`) + - pgscan_kswapd_movable (integer, `pgscan_kswapd_movable`) + - pgscan_direct_dma (integer, `pgscan_direct_dma`) + - pgscan_direct_dma32 (integer, `pgscan_direct_dma32`) + - pgscan_direct_normal (integer, `pgscan_direct_normal`) + - pgscan_direct_movable (integer, `pgscan_direct_movable`) + - zone_reclaim_failed (integer, `zone_reclaim_failed`) + - pginodesteal (integer, `pginodesteal`) + - slabs_scanned (integer, `slabs_scanned`) + - kswapd_steal (integer, `kswapd_steal`) + - kswapd_inodesteal (integer, `kswapd_inodesteal`) + - kswapd_low_wmark_hit_quickly (integer, `kswapd_low_wmark_hit_quickly`) + - kswapd_high_wmark_hit_quickly (integer, `kswapd_high_wmark_hit_quickly`) + - kswapd_skip_congestion_wait (integer, `kswapd_skip_congestion_wait`) + - pageoutrun (integer, `pageoutrun`) + - allocstall (integer, `allocstall`) + - pgrotated (integer, `pgrotated`) + - compact_blocks_moved (integer, `compact_blocks_moved`) + - compact_pages_moved (integer, `compact_pages_moved`) + - compact_pagemigrate_failed (integer, `compact_pagemigrate_failed`) + - compact_stall (integer, `compact_stall`) + - compact_fail (integer, `compact_fail`) + - compact_success (integer, `compact_success`) + - htlb_buddy_alloc_success (integer, `htlb_buddy_alloc_success`) + - htlb_buddy_alloc_fail (integer, `htlb_buddy_alloc_fail`) + - unevictable_pgs_culled (integer, `unevictable_pgs_culled`) + - unevictable_pgs_scanned (integer, `unevictable_pgs_scanned`) + - unevictable_pgs_rescued (integer, `unevictable_pgs_rescued`) + - unevictable_pgs_mlocked (integer, `unevictable_pgs_mlocked`) + - unevictable_pgs_munlocked (integer, `unevictable_pgs_munlocked`) + - unevictable_pgs_cleared (integer, `unevictable_pgs_cleared`) + - unevictable_pgs_stranded (integer, `unevictable_pgs_stranded`) + - unevictable_pgs_mlockfreed (integer, `unevictable_pgs_mlockfreed`) + - thp_fault_alloc (integer, `thp_fault_alloc`) + - thp_fault_fallback (integer, `thp_fault_fallback`) + - thp_collapse_alloc (integer, `thp_collapse_alloc`) + - thp_collapse_alloc_failed (integer, `thp_collapse_alloc_failed`) + - thp_split (integer, `thp_split`) + +### Tags: + +None + +### Example Output: + +``` +$ telegraf -config ~/ws/telegraf.conf -input-filter kernel_vmstat -test +* Plugin: kernel_vmstat, Collection 1 +> kernel_vmstat allocstall=81496i,compact_blocks_moved=238196i,compact_fail=135220i,compact_pagemigrate_failed=0i,compact_pages_moved=6370588i,compact_stall=142092i,compact_success=6872i,htlb_buddy_alloc_fail=0i,htlb_buddy_alloc_success=0i,kswapd_high_wmark_hit_quickly=25439i,kswapd_inodesteal=29770874i,kswapd_low_wmark_hit_quickly=8756i,kswapd_skip_congestion_wait=0i,kswapd_steal=291534428i,nr_active_anon=2515657i,nr_active_file=2244914i,nr_anon_pages=1358675i,nr_anon_transparent_hugepages=2034i,nr_bounce=0i,nr_dirty=5690i,nr_file_pages=5153546i,nr_free_pages=78730i,nr_inactive_anon=426259i,nr_inactive_file=2366791i,nr_isolated_anon=0i,nr_isolated_file=0i,nr_kernel_stack=579i,nr_mapped=558821i,nr_mlock=0i,nr_page_table_pages=11115i,nr_shmem=541689i,nr_slab_reclaimable=459806i,nr_slab_unreclaimable=47859i,nr_unevictable=0i,nr_unstable=0i,nr_vmscan_write=6206i,nr_writeback=0i,nr_writeback_temp=0i,numa_foreign=0i,numa_hit=5113399878i,numa_interleave=35793i,numa_local=5113399878i,numa_miss=0i,numa_other=0i,pageoutrun=505006i,pgactivate=375664931i,pgalloc_dma=0i,pgalloc_dma32=122480220i,pgalloc_movable=0i,pgalloc_normal=5233176719i,pgdeactivate=122735906i,pgfault=8699921410i,pgfree=5359765021i,pginodesteal=9188431i,pgmajfault=122210i,pgpgin=219717626i,pgpgout=3495885510i,pgrefill_dma=0i,pgrefill_dma32=1180010i,pgrefill_movable=0i,pgrefill_normal=119866676i,pgrotated=60620i,pgscan_direct_dma=0i,pgscan_direct_dma32=12256i,pgscan_direct_movable=0i,pgscan_direct_normal=31501600i,pgscan_kswapd_dma=0i,pgscan_kswapd_dma32=4480608i,pgscan_kswapd_movable=0i,pgscan_kswapd_normal=287857984i,pgsteal_dma=0i,pgsteal_dma32=4466436i,pgsteal_movable=0i,pgsteal_normal=318463755i,pswpin=2092i,pswpout=6206i,slabs_scanned=93775616i,thp_collapse_alloc=24857i,thp_collapse_alloc_failed=102214i,thp_fault_alloc=346219i,thp_fault_fallback=895453i,thp_split=9817i,unevictable_pgs_cleared=0i,unevictable_pgs_culled=1531i,unevictable_pgs_mlocked=6988i,unevictable_pgs_mlockfreed=0i,unevictable_pgs_munlocked=6988i,unevictable_pgs_rescued=5426i,unevictable_pgs_scanned=0i,unevictable_pgs_stranded=0i,zone_reclaim_failed=0i 1459455200071462843 +``` diff --git a/plugins/inputs/system/kernel_vmstat.go b/plugins/inputs/system/kernel_vmstat.go new file mode 100644 index 000000000..c40a437c0 --- /dev/null +++ b/plugins/inputs/system/kernel_vmstat.go @@ -0,0 +1,173 @@ +// +build linux + +package system + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "strconv" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +// /proc/vmstat file line prefixes to gather stats on. +// This is currently not being used as we are retrieving all the stats. Left here for references. +var ( + nr_free_pages = []byte("nr_free_pages") + nr_inactive_anon = []byte("nr_inactive_anon") + nr_active_anon = []byte("nr_active_anon") + nr_inactive_file = []byte("nr_inactive_file") + nr_active_file = []byte("nr_active_file") + nr_unevictable = []byte("nr_unevictable") + nr_mlock = []byte("nr_mlock") + nr_anon_pages = []byte("nr_anon_pages") + nr_mapped = []byte("nr_mapped") + nr_file_pages = []byte("nr_file_pages") + nr_dirty = []byte("nr_dirty") + nr_writeback = []byte("nr_writeback") + nr_slab_reclaimable = []byte("nr_slab_reclaimable") + nr_slab_unreclaimable = []byte("nr_slab_unreclaimable") + nr_page_table_pages = []byte("nr_page_table_pages") + nr_kernel_stack = []byte("nr_kernel_stack") + nr_unstable = []byte("nr_unstable") + nr_bounce = []byte("nr_bounce") + nr_vmscan_write = []byte("nr_vmscan_write") + nr_writeback_temp = []byte("nr_writeback_temp") + nr_isolated_anon = []byte("nr_isolated_anon") + nr_isolated_file = []byte("nr_isolated_file") + nr_shmem = []byte("nr_shmem") + numa_hit = []byte("numa_hit") + numa_miss = []byte("numa_miss") + numa_foreign = []byte("numa_foreign") + numa_interleave = []byte("numa_interleave") + numa_local = []byte("numa_local") + numa_other = []byte("numa_other") + nr_anon_transparent_hugepages = []byte("nr_anon_transparent_hugepages") + pgpgin = []byte("pgpgin") + pgpgout = []byte("pgpgout") + pswpin = []byte("pswpin") + pswpout = []byte("pswpout") + pgalloc_dma = []byte("pgalloc_dma") + pgalloc_dma32 = []byte("pgalloc_dma32") + pgalloc_normal = []byte("pgalloc_normal") + pgalloc_movable = []byte("pgalloc_movable") + pgfree = []byte("pgfree") + pgactivate = []byte("pgactivate") + pgdeactivate = []byte("pgdeactivate") + pgfault = []byte("pgfault") + pgmajfault = []byte("pgmajfault") + pgrefill_dma = []byte("pgrefill_dma") + pgrefill_dma32 = []byte("pgrefill_dma32") + pgrefill_normal = []byte("pgrefill_normal") + pgrefill_movable = []byte("pgrefill_movable") + pgsteal_dma = []byte("pgsteal_dma") + pgsteal_dma32 = []byte("pgsteal_dma32") + pgsteal_normal = []byte("pgsteal_normal") + pgsteal_movable = []byte("pgsteal_movable") + pgscan_kswapd_dma = []byte("pgscan_kswapd_dma") + pgscan_kswapd_dma32 = []byte("pgscan_kswapd_dma32") + pgscan_kswapd_normal = []byte("pgscan_kswapd_normal") + pgscan_kswapd_movable = []byte("pgscan_kswapd_movable") + pgscan_direct_dma = []byte("pgscan_direct_dma") + pgscan_direct_dma32 = []byte("pgscan_direct_dma32") + pgscan_direct_normal = []byte("pgscan_direct_normal") + pgscan_direct_movable = []byte("pgscan_direct_movable") + zone_reclaim_failed = []byte("zone_reclaim_failed") + pginodesteal = []byte("pginodesteal") + slabs_scanned = []byte("slabs_scanned") + kswapd_steal = []byte("kswapd_steal") + kswapd_inodesteal = []byte("kswapd_inodesteal") + kswapd_low_wmark_hit_quickly = []byte("kswapd_low_wmark_hit_quickly") + kswapd_high_wmark_hit_quickly = []byte("kswapd_high_wmark_hit_quickly") + kswapd_skip_congestion_wait = []byte("kswapd_skip_congestion_wait") + pageoutrun = []byte("pageoutrun") + allocstall = []byte("allocstall") + pgrotated = []byte("pgrotated") + compact_blocks_moved = []byte("compact_blocks_moved") + compact_pages_moved = []byte("compact_pages_moved") + compact_pagemigrate_failed = []byte("compact_pagemigrate_failed") + compact_stall = []byte("compact_stall") + compact_fail = []byte("compact_fail") + compact_success = []byte("compact_success") + htlb_buddy_alloc_success = []byte("htlb_buddy_alloc_success") + htlb_buddy_alloc_fail = []byte("htlb_buddy_alloc_fail") + unevictable_pgs_culled = []byte("unevictable_pgs_culled") + unevictable_pgs_scanned = []byte("unevictable_pgs_scanned") + unevictable_pgs_rescued = []byte("unevictable_pgs_rescued") + unevictable_pgs_mlocked = []byte("unevictable_pgs_mlocked") + unevictable_pgs_munlocked = []byte("unevictable_pgs_munlocked") + unevictable_pgs_cleared = []byte("unevictable_pgs_cleared") + unevictable_pgs_stranded = []byte("unevictable_pgs_stranded") + unevictable_pgs_mlockfreed = []byte("unevictable_pgs_mlockfreed") + thp_fault_alloc = []byte("thp_fault_alloc") + thp_fault_fallback = []byte("thp_fault_fallback") + thp_collapse_alloc = []byte("thp_collapse_alloc") + thp_collapse_alloc_failed = []byte("thp_collapse_alloc_failed") + thp_split = []byte("thp_split") +) + +type KernelVmstat struct { + statFile string +} + +func (k *KernelVmstat) Description() string { + return "Get kernel statistics from /proc/vmstat" +} + +func (k *KernelVmstat) SampleConfig() string { + return `[[inputs.kernel_vmstat]]` +} + +func (k *KernelVmstat) Gather(acc telegraf.Accumulator) error { + data, err := k.getProcVmstat() + if err != nil { + return err + } + + fields := make(map[string]interface{}) + + dataFields := bytes.Fields(data) + for i, field := range dataFields { + + // dataFields is an array of {"stat1_name", "stat1_value", "stat2_name", "stat2_value", ...} + // We only want the even number index as that contain the stat name. + if i%2 == 0 { + // Convert the stat value into an integer. + m, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + + fields[string(field)] = int64(m) + } + } + + acc.AddFields("kernel_vmstat", fields, map[string]string{}) + return nil +} + +func (k *KernelVmstat) getProcVmstat() ([]byte, error) { + if _, err := os.Stat(k.statFile); os.IsNotExist(err) { + return nil, fmt.Errorf("kernel_vmstat: %s does not exist!", k.statFile) + } else if err != nil { + return nil, err + } + + data, err := ioutil.ReadFile(k.statFile) + if err != nil { + return nil, err + } + + return data, nil +} + +func init() { + inputs.Add("kernel_vmstat", func() telegraf.Input { + return &KernelVmstat{ + statFile: "/proc/vmstat", + } + }) +} diff --git a/plugins/inputs/system/kernel_vmstat_test.go b/plugins/inputs/system/kernel_vmstat_test.go new file mode 100644 index 000000000..963cf7f4a --- /dev/null +++ b/plugins/inputs/system/kernel_vmstat_test.go @@ -0,0 +1,315 @@ +// +build linux + +package system + +import ( + "io/ioutil" + "os" + "testing" + + "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" +) + +func TestFullVmStatProcFile(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(vmStatFile_Full)) + defer os.Remove(tmpfile) + + k := KernelVmstat{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.NoError(t, err) + + fields := map[string]interface{}{ + "nr_free_pages": int64(78730), + "nr_inactive_anon": int64(426259), + "nr_active_anon": int64(2515657), + "nr_inactive_file": int64(2366791), + "nr_active_file": int64(2244914), + "nr_unevictable": int64(0), + "nr_mlock": int64(0), + "nr_anon_pages": int64(1358675), + "nr_mapped": int64(558821), + "nr_file_pages": int64(5153546), + "nr_dirty": int64(5690), + "nr_writeback": int64(0), + "nr_slab_reclaimable": int64(459806), + "nr_slab_unreclaimable": int64(47859), + "nr_page_table_pages": int64(11115), + "nr_kernel_stack": int64(579), + "nr_unstable": int64(0), + "nr_bounce": int64(0), + "nr_vmscan_write": int64(6206), + "nr_writeback_temp": int64(0), + "nr_isolated_anon": int64(0), + "nr_isolated_file": int64(0), + "nr_shmem": int64(541689), + "numa_hit": int64(5113399878), + "numa_miss": int64(0), + "numa_foreign": int64(0), + "numa_interleave": int64(35793), + "numa_local": int64(5113399878), + "numa_other": int64(0), + "nr_anon_transparent_hugepages": int64(2034), + "pgpgin": int64(219717626), + "pgpgout": int64(3495885510), + "pswpin": int64(2092), + "pswpout": int64(6206), + "pgalloc_dma": int64(0), + "pgalloc_dma32": int64(122480220), + "pgalloc_normal": int64(5233176719), + "pgalloc_movable": int64(0), + "pgfree": int64(5359765021), + "pgactivate": int64(375664931), + "pgdeactivate": int64(122735906), + "pgfault": int64(8699921410), + "pgmajfault": int64(122210), + "pgrefill_dma": int64(0), + "pgrefill_dma32": int64(1180010), + "pgrefill_normal": int64(119866676), + "pgrefill_movable": int64(0), + "pgsteal_dma": int64(0), + "pgsteal_dma32": int64(4466436), + "pgsteal_normal": int64(318463755), + "pgsteal_movable": int64(0), + "pgscan_kswapd_dma": int64(0), + "pgscan_kswapd_dma32": int64(4480608), + "pgscan_kswapd_normal": int64(287857984), + "pgscan_kswapd_movable": int64(0), + "pgscan_direct_dma": int64(0), + "pgscan_direct_dma32": int64(12256), + "pgscan_direct_normal": int64(31501600), + "pgscan_direct_movable": int64(0), + "zone_reclaim_failed": int64(0), + "pginodesteal": int64(9188431), + "slabs_scanned": int64(93775616), + "kswapd_steal": int64(291534428), + "kswapd_inodesteal": int64(29770874), + "kswapd_low_wmark_hit_quickly": int64(8756), + "kswapd_high_wmark_hit_quickly": int64(25439), + "kswapd_skip_congestion_wait": int64(0), + "pageoutrun": int64(505006), + "allocstall": int64(81496), + "pgrotated": int64(60620), + "compact_blocks_moved": int64(238196), + "compact_pages_moved": int64(6370588), + "compact_pagemigrate_failed": int64(0), + "compact_stall": int64(142092), + "compact_fail": int64(135220), + "compact_success": int64(6872), + "htlb_buddy_alloc_success": int64(0), + "htlb_buddy_alloc_fail": int64(0), + "unevictable_pgs_culled": int64(1531), + "unevictable_pgs_scanned": int64(0), + "unevictable_pgs_rescued": int64(5426), + "unevictable_pgs_mlocked": int64(6988), + "unevictable_pgs_munlocked": int64(6988), + "unevictable_pgs_cleared": int64(0), + "unevictable_pgs_stranded": int64(0), + "unevictable_pgs_mlockfreed": int64(0), + "thp_fault_alloc": int64(346219), + "thp_fault_fallback": int64(895453), + "thp_collapse_alloc": int64(24857), + "thp_collapse_alloc_failed": int64(102214), + "thp_split": int64(9817), + } + acc.AssertContainsFields(t, "kernel_vmstat", fields) +} + +func TestPartialVmStatProcFile(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(vmStatFile_Partial)) + defer os.Remove(tmpfile) + + k := KernelVmstat{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.NoError(t, err) + + fields := map[string]interface{}{ + "unevictable_pgs_culled": int64(1531), + "unevictable_pgs_scanned": int64(0), + "unevictable_pgs_rescued": int64(5426), + "unevictable_pgs_mlocked": int64(6988), + "unevictable_pgs_munlocked": int64(6988), + "unevictable_pgs_cleared": int64(0), + "unevictable_pgs_stranded": int64(0), + "unevictable_pgs_mlockfreed": int64(0), + "thp_fault_alloc": int64(346219), + "thp_fault_fallback": int64(895453), + "thp_collapse_alloc": int64(24857), + "thp_collapse_alloc_failed": int64(102214), + "thp_split": int64(9817), + } + acc.AssertContainsFields(t, "kernel_vmstat", fields) +} + +func TestInvalidVmStatProcFile1(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(vmStatFile_Invalid)) + defer os.Remove(tmpfile) + + k := KernelVmstat{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.Error(t, err) +} + +func TestNoVmStatProcFile(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(vmStatFile_Invalid)) + os.Remove(tmpfile) + + k := KernelVmstat{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.Error(t, err) + assert.Contains(t, err.Error(), "does not exist") +} + +const vmStatFile_Full = `nr_free_pages 78730 +nr_inactive_anon 426259 +nr_active_anon 2515657 +nr_inactive_file 2366791 +nr_active_file 2244914 +nr_unevictable 0 +nr_mlock 0 +nr_anon_pages 1358675 +nr_mapped 558821 +nr_file_pages 5153546 +nr_dirty 5690 +nr_writeback 0 +nr_slab_reclaimable 459806 +nr_slab_unreclaimable 47859 +nr_page_table_pages 11115 +nr_kernel_stack 579 +nr_unstable 0 +nr_bounce 0 +nr_vmscan_write 6206 +nr_writeback_temp 0 +nr_isolated_anon 0 +nr_isolated_file 0 +nr_shmem 541689 +numa_hit 5113399878 +numa_miss 0 +numa_foreign 0 +numa_interleave 35793 +numa_local 5113399878 +numa_other 0 +nr_anon_transparent_hugepages 2034 +pgpgin 219717626 +pgpgout 3495885510 +pswpin 2092 +pswpout 6206 +pgalloc_dma 0 +pgalloc_dma32 122480220 +pgalloc_normal 5233176719 +pgalloc_movable 0 +pgfree 5359765021 +pgactivate 375664931 +pgdeactivate 122735906 +pgfault 8699921410 +pgmajfault 122210 +pgrefill_dma 0 +pgrefill_dma32 1180010 +pgrefill_normal 119866676 +pgrefill_movable 0 +pgsteal_dma 0 +pgsteal_dma32 4466436 +pgsteal_normal 318463755 +pgsteal_movable 0 +pgscan_kswapd_dma 0 +pgscan_kswapd_dma32 4480608 +pgscan_kswapd_normal 287857984 +pgscan_kswapd_movable 0 +pgscan_direct_dma 0 +pgscan_direct_dma32 12256 +pgscan_direct_normal 31501600 +pgscan_direct_movable 0 +zone_reclaim_failed 0 +pginodesteal 9188431 +slabs_scanned 93775616 +kswapd_steal 291534428 +kswapd_inodesteal 29770874 +kswapd_low_wmark_hit_quickly 8756 +kswapd_high_wmark_hit_quickly 25439 +kswapd_skip_congestion_wait 0 +pageoutrun 505006 +allocstall 81496 +pgrotated 60620 +compact_blocks_moved 238196 +compact_pages_moved 6370588 +compact_pagemigrate_failed 0 +compact_stall 142092 +compact_fail 135220 +compact_success 6872 +htlb_buddy_alloc_success 0 +htlb_buddy_alloc_fail 0 +unevictable_pgs_culled 1531 +unevictable_pgs_scanned 0 +unevictable_pgs_rescued 5426 +unevictable_pgs_mlocked 6988 +unevictable_pgs_munlocked 6988 +unevictable_pgs_cleared 0 +unevictable_pgs_stranded 0 +unevictable_pgs_mlockfreed 0 +thp_fault_alloc 346219 +thp_fault_fallback 895453 +thp_collapse_alloc 24857 +thp_collapse_alloc_failed 102214 +thp_split 9817` + +const vmStatFile_Partial = `unevictable_pgs_culled 1531 +unevictable_pgs_scanned 0 +unevictable_pgs_rescued 5426 +unevictable_pgs_mlocked 6988 +unevictable_pgs_munlocked 6988 +unevictable_pgs_cleared 0 +unevictable_pgs_stranded 0 +unevictable_pgs_mlockfreed 0 +thp_fault_alloc 346219 +thp_fault_fallback 895453 +thp_collapse_alloc 24857 +thp_collapse_alloc_failed 102214 +thp_split 9817` + +// invalid thp_split measurement +const vmStatFile_Invalid = `unevictable_pgs_culled 1531 +unevictable_pgs_scanned 0 +unevictable_pgs_rescued 5426 +unevictable_pgs_mlocked 6988 +unevictable_pgs_munlocked 6988 +unevictable_pgs_cleared 0 +unevictable_pgs_stranded 0 +unevictable_pgs_mlockfreed 0 +thp_fault_alloc 346219 +thp_fault_fallback 895453 +thp_collapse_alloc 24857 +thp_collapse_alloc_failed 102214 +thp_split abcd` + +func makeFakeVmStatFile(content []byte) string { + tmpfile, err := ioutil.TempFile("", "kernel_vmstat_test") + if err != nil { + panic(err) + } + + if _, err := tmpfile.Write(content); err != nil { + panic(err) + } + if err := tmpfile.Close(); err != nil { + panic(err) + } + + return tmpfile.Name() +} From a7dfbce3d3040ba4790c57290cec9fb87907b0e3 Mon Sep 17 00:00:00 2001 From: robinpercy-xm Date: Sat, 21 May 2016 15:48:02 -0700 Subject: [PATCH 04/34] Addressing PR feedback - Updated README/CHANGELOG - Added links to further info to input README - Reduced lines to 80 chars Removing input declaration from SampleConfig Moved PR to unreleased section of changelog closes #1165 --- CHANGELOG.md | 1 + README.md | 1 + plugins/inputs/system/KERNEL_VMSTAT_README.md | 11 +- plugins/inputs/system/kernel_vmstat.go | 101 +----------------- 4 files changed, 10 insertions(+), 104 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f66771af..da2b41a5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ time before a new metric is included by the plugin. ### Features - [#1164](https://github.com/influxdata/telegraf/pull/1164): conntrack input plugin. Thanks @robinpercy! +- [#1165](https://github.com/influxdata/telegraf/pull/1165): vmstat input plugin. Thanks @jshim-xm! - [#1247](https://github.com/influxdata/telegraf/pull/1247): rollbar input plugin. Thanks @francois2metz and @cduez! - [#1208](https://github.com/influxdata/telegraf/pull/1208): Standardized AWS credentials evaluation & wildcard CloudWatch dimensions. Thanks @johnrengelman! - [#1264](https://github.com/influxdata/telegraf/pull/1264): Add SSL config options to http_response plugin. diff --git a/README.md b/README.md index 5adcdb39d..7366d5986 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,7 @@ Currently implemented sources: * swap * processes * kernel (/proc/stat) + * kernel (/proc/vmstat) Telegraf can also collect metrics via the following service plugins: diff --git a/plugins/inputs/system/KERNEL_VMSTAT_README.md b/plugins/inputs/system/KERNEL_VMSTAT_README.md index c16f23aab..7235e2f2b 100644 --- a/plugins/inputs/system/KERNEL_VMSTAT_README.md +++ b/plugins/inputs/system/KERNEL_VMSTAT_README.md @@ -1,12 +1,11 @@ # Kernel VMStat Input Plugin -This plugin is only available on Linux. +The kernel_vmstat plugin gathers virtual memory statistics +by reading /proc/vmstat. For a full list of available fields see the +/proc/vmstat section of the [proc man page](http://man7.org/linux/man-pages/man5/proc.5.html). +For a better idea of what each field represents, see the +[vmstat man page](http://linux.die.net/man/8/vmstat). -The kernel_vmstat plugin gathers info about the kernel that doesn't fit into other -plugins. In general, it is the statistics available in `/proc/vmstat` that are -not covered by other plugins. - -The metrics are documented in `man proc` under the `/proc/vmstat` section. ``` /proc/vmstat diff --git a/plugins/inputs/system/kernel_vmstat.go b/plugins/inputs/system/kernel_vmstat.go index c40a437c0..93c822789 100644 --- a/plugins/inputs/system/kernel_vmstat.go +++ b/plugins/inputs/system/kernel_vmstat.go @@ -13,102 +13,6 @@ import ( "github.com/influxdata/telegraf/plugins/inputs" ) -// /proc/vmstat file line prefixes to gather stats on. -// This is currently not being used as we are retrieving all the stats. Left here for references. -var ( - nr_free_pages = []byte("nr_free_pages") - nr_inactive_anon = []byte("nr_inactive_anon") - nr_active_anon = []byte("nr_active_anon") - nr_inactive_file = []byte("nr_inactive_file") - nr_active_file = []byte("nr_active_file") - nr_unevictable = []byte("nr_unevictable") - nr_mlock = []byte("nr_mlock") - nr_anon_pages = []byte("nr_anon_pages") - nr_mapped = []byte("nr_mapped") - nr_file_pages = []byte("nr_file_pages") - nr_dirty = []byte("nr_dirty") - nr_writeback = []byte("nr_writeback") - nr_slab_reclaimable = []byte("nr_slab_reclaimable") - nr_slab_unreclaimable = []byte("nr_slab_unreclaimable") - nr_page_table_pages = []byte("nr_page_table_pages") - nr_kernel_stack = []byte("nr_kernel_stack") - nr_unstable = []byte("nr_unstable") - nr_bounce = []byte("nr_bounce") - nr_vmscan_write = []byte("nr_vmscan_write") - nr_writeback_temp = []byte("nr_writeback_temp") - nr_isolated_anon = []byte("nr_isolated_anon") - nr_isolated_file = []byte("nr_isolated_file") - nr_shmem = []byte("nr_shmem") - numa_hit = []byte("numa_hit") - numa_miss = []byte("numa_miss") - numa_foreign = []byte("numa_foreign") - numa_interleave = []byte("numa_interleave") - numa_local = []byte("numa_local") - numa_other = []byte("numa_other") - nr_anon_transparent_hugepages = []byte("nr_anon_transparent_hugepages") - pgpgin = []byte("pgpgin") - pgpgout = []byte("pgpgout") - pswpin = []byte("pswpin") - pswpout = []byte("pswpout") - pgalloc_dma = []byte("pgalloc_dma") - pgalloc_dma32 = []byte("pgalloc_dma32") - pgalloc_normal = []byte("pgalloc_normal") - pgalloc_movable = []byte("pgalloc_movable") - pgfree = []byte("pgfree") - pgactivate = []byte("pgactivate") - pgdeactivate = []byte("pgdeactivate") - pgfault = []byte("pgfault") - pgmajfault = []byte("pgmajfault") - pgrefill_dma = []byte("pgrefill_dma") - pgrefill_dma32 = []byte("pgrefill_dma32") - pgrefill_normal = []byte("pgrefill_normal") - pgrefill_movable = []byte("pgrefill_movable") - pgsteal_dma = []byte("pgsteal_dma") - pgsteal_dma32 = []byte("pgsteal_dma32") - pgsteal_normal = []byte("pgsteal_normal") - pgsteal_movable = []byte("pgsteal_movable") - pgscan_kswapd_dma = []byte("pgscan_kswapd_dma") - pgscan_kswapd_dma32 = []byte("pgscan_kswapd_dma32") - pgscan_kswapd_normal = []byte("pgscan_kswapd_normal") - pgscan_kswapd_movable = []byte("pgscan_kswapd_movable") - pgscan_direct_dma = []byte("pgscan_direct_dma") - pgscan_direct_dma32 = []byte("pgscan_direct_dma32") - pgscan_direct_normal = []byte("pgscan_direct_normal") - pgscan_direct_movable = []byte("pgscan_direct_movable") - zone_reclaim_failed = []byte("zone_reclaim_failed") - pginodesteal = []byte("pginodesteal") - slabs_scanned = []byte("slabs_scanned") - kswapd_steal = []byte("kswapd_steal") - kswapd_inodesteal = []byte("kswapd_inodesteal") - kswapd_low_wmark_hit_quickly = []byte("kswapd_low_wmark_hit_quickly") - kswapd_high_wmark_hit_quickly = []byte("kswapd_high_wmark_hit_quickly") - kswapd_skip_congestion_wait = []byte("kswapd_skip_congestion_wait") - pageoutrun = []byte("pageoutrun") - allocstall = []byte("allocstall") - pgrotated = []byte("pgrotated") - compact_blocks_moved = []byte("compact_blocks_moved") - compact_pages_moved = []byte("compact_pages_moved") - compact_pagemigrate_failed = []byte("compact_pagemigrate_failed") - compact_stall = []byte("compact_stall") - compact_fail = []byte("compact_fail") - compact_success = []byte("compact_success") - htlb_buddy_alloc_success = []byte("htlb_buddy_alloc_success") - htlb_buddy_alloc_fail = []byte("htlb_buddy_alloc_fail") - unevictable_pgs_culled = []byte("unevictable_pgs_culled") - unevictable_pgs_scanned = []byte("unevictable_pgs_scanned") - unevictable_pgs_rescued = []byte("unevictable_pgs_rescued") - unevictable_pgs_mlocked = []byte("unevictable_pgs_mlocked") - unevictable_pgs_munlocked = []byte("unevictable_pgs_munlocked") - unevictable_pgs_cleared = []byte("unevictable_pgs_cleared") - unevictable_pgs_stranded = []byte("unevictable_pgs_stranded") - unevictable_pgs_mlockfreed = []byte("unevictable_pgs_mlockfreed") - thp_fault_alloc = []byte("thp_fault_alloc") - thp_fault_fallback = []byte("thp_fault_fallback") - thp_collapse_alloc = []byte("thp_collapse_alloc") - thp_collapse_alloc_failed = []byte("thp_collapse_alloc_failed") - thp_split = []byte("thp_split") -) - type KernelVmstat struct { statFile string } @@ -118,7 +22,7 @@ func (k *KernelVmstat) Description() string { } func (k *KernelVmstat) SampleConfig() string { - return `[[inputs.kernel_vmstat]]` + return "" } func (k *KernelVmstat) Gather(acc telegraf.Accumulator) error { @@ -132,7 +36,8 @@ func (k *KernelVmstat) Gather(acc telegraf.Accumulator) error { dataFields := bytes.Fields(data) for i, field := range dataFields { - // dataFields is an array of {"stat1_name", "stat1_value", "stat2_name", "stat2_value", ...} + // dataFields is an array of {"stat1_name", "stat1_value", "stat2_name", + // "stat2_value", ...} // We only want the even number index as that contain the stat name. if i%2 == 0 { // Convert the stat value into an integer. From eeeab5192b21677766474729397fa1406f5f5caa Mon Sep 17 00:00:00 2001 From: vanillahsu Date: Tue, 31 May 2016 17:58:35 +0800 Subject: [PATCH 05/34] Add gelf serializer & graylog output filter. (#1167) * add gelf serializer. * change url. * handle fields in correct format. * add graylog. * handle host field of graylog. * 1: Add go-gelf entry to Godeps to fix ci. 2: switch to github.com/Graylog2/go-gelf. * implement Close(). * Deprecated gelf serializer, and back to graylog-golang. * Update graylog-golang's hash. * move gelf related function to graylog.go. * 1: remove uneeded deps on Godeps_windows. 2: add README.md 3: add unittest. * Fix unittest on 'go test -race' --- plugins/outputs/all/all.go | 1 + plugins/outputs/graylog/README.md | 5 + plugins/outputs/graylog/graylog.go | 247 ++++++++++++++++++++++++ plugins/outputs/graylog/graylog_test.go | 55 ++++++ 4 files changed, 308 insertions(+) create mode 100644 plugins/outputs/graylog/README.md create mode 100644 plugins/outputs/graylog/graylog.go create mode 100644 plugins/outputs/graylog/graylog_test.go diff --git a/plugins/outputs/all/all.go b/plugins/outputs/all/all.go index 5b223529c..27f8958fe 100644 --- a/plugins/outputs/all/all.go +++ b/plugins/outputs/all/all.go @@ -7,6 +7,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/outputs/datadog" _ "github.com/influxdata/telegraf/plugins/outputs/file" _ "github.com/influxdata/telegraf/plugins/outputs/graphite" + _ "github.com/influxdata/telegraf/plugins/outputs/graylog" _ "github.com/influxdata/telegraf/plugins/outputs/influxdb" _ "github.com/influxdata/telegraf/plugins/outputs/instrumental" _ "github.com/influxdata/telegraf/plugins/outputs/kafka" diff --git a/plugins/outputs/graylog/README.md b/plugins/outputs/graylog/README.md new file mode 100644 index 000000000..26b8d8fc6 --- /dev/null +++ b/plugins/outputs/graylog/README.md @@ -0,0 +1,5 @@ +# Graylog Output Plugin + +This plugin writes to a Graylog instance using the "gelf" format. + +It requires a `servers` name. diff --git a/plugins/outputs/graylog/graylog.go b/plugins/outputs/graylog/graylog.go new file mode 100644 index 000000000..7f2480134 --- /dev/null +++ b/plugins/outputs/graylog/graylog.go @@ -0,0 +1,247 @@ +package graylog + +import ( + "bytes" + "compress/zlib" + "crypto/rand" + "encoding/binary" + ejson "encoding/json" + "fmt" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/outputs" + "io" + "math" + "net" + "os" +) + +const ( + defaultGraylogEndpoint = "127.0.0.1:12201" + defaultConnection = "wan" + defaultMaxChunkSizeWan = 1420 + defaultMaxChunkSizeLan = 8154 +) + +type GelfConfig struct { + GraylogEndpoint string + Connection string + MaxChunkSizeWan int + MaxChunkSizeLan int +} + +type Gelf struct { + GelfConfig +} + +func NewGelfWriter(config GelfConfig) *Gelf { + if config.GraylogEndpoint == "" { + config.GraylogEndpoint = defaultGraylogEndpoint + } + + if config.Connection == "" { + config.Connection = defaultConnection + } + + if config.MaxChunkSizeWan == 0 { + config.MaxChunkSizeWan = defaultMaxChunkSizeWan + } + + if config.MaxChunkSizeLan == 0 { + config.MaxChunkSizeLan = defaultMaxChunkSizeLan + } + + g := &Gelf{GelfConfig: config} + + return g +} + +func (g *Gelf) Write(message []byte) (n int, err error) { + compressed := g.compress(message) + + chunksize := g.GelfConfig.MaxChunkSizeWan + length := compressed.Len() + + if length > chunksize { + + chunkCountInt := int(math.Ceil(float64(length) / float64(chunksize))) + + id := make([]byte, 8) + rand.Read(id) + + for i, index := 0, 0; i < length; i, index = i+chunksize, index+1 { + packet := g.createChunkedMessage(index, chunkCountInt, id, &compressed) + _, err = g.send(packet.Bytes()) + if err != nil { + return 0, err + } + } + } else { + _, err = g.send(compressed.Bytes()) + if err != nil { + return 0, err + } + } + + n = len(message) + + return +} + +func (g *Gelf) createChunkedMessage(index int, chunkCountInt int, id []byte, compressed *bytes.Buffer) bytes.Buffer { + var packet bytes.Buffer + + chunksize := g.getChunksize() + + packet.Write(g.intToBytes(30)) + packet.Write(g.intToBytes(15)) + packet.Write(id) + + packet.Write(g.intToBytes(index)) + packet.Write(g.intToBytes(chunkCountInt)) + + packet.Write(compressed.Next(chunksize)) + + return packet +} + +func (g *Gelf) getChunksize() int { + if g.GelfConfig.Connection == "wan" { + return g.GelfConfig.MaxChunkSizeWan + } + + if g.GelfConfig.Connection == "lan" { + return g.GelfConfig.MaxChunkSizeLan + } + + return g.GelfConfig.MaxChunkSizeWan +} + +func (g *Gelf) intToBytes(i int) []byte { + buf := new(bytes.Buffer) + + binary.Write(buf, binary.LittleEndian, int8(i)) + return buf.Bytes() +} + +func (g *Gelf) compress(b []byte) bytes.Buffer { + var buf bytes.Buffer + comp := zlib.NewWriter(&buf) + + comp.Write(b) + comp.Close() + + return buf +} + +func (g *Gelf) send(b []byte) (n int, err error) { + udpAddr, err := net.ResolveUDPAddr("udp", g.GelfConfig.GraylogEndpoint) + if err != nil { + return + } + + conn, err := net.DialUDP("udp", nil, udpAddr) + if err != nil { + return + } + + n, err = conn.Write(b) + return +} + +type Graylog struct { + Servers []string + writer io.Writer +} + +var sampleConfig = ` + ## Udp endpoint for your graylog instance. + servers = ["127.0.0.1:12201", "192.168.1.1:12201"] +` + +func (g *Graylog) Connect() error { + writers := []io.Writer{} + + if len(g.Servers) == 0 { + g.Servers = append(g.Servers, "localhost:12201") + } + + for _, server := range g.Servers { + w := NewGelfWriter(GelfConfig{GraylogEndpoint: server}) + writers = append(writers, w) + } + + g.writer = io.MultiWriter(writers...) + return nil +} + +func (g *Graylog) Close() error { + return nil +} + +func (g *Graylog) SampleConfig() string { + return sampleConfig +} + +func (g *Graylog) Description() string { + return "Send telegraf metrics to graylog(s)" +} + +func (g *Graylog) Write(metrics []telegraf.Metric) error { + if len(metrics) == 0 { + return nil + } + + for _, metric := range metrics { + values, err := serialize(metric) + if err != nil { + return err + } + + for _, value := range values { + _, err := g.writer.Write([]byte(value)) + if err != nil { + return fmt.Errorf("FAILED to write message: %s, %s", value, err) + } + } + } + return nil +} + +func serialize(metric telegraf.Metric) ([]string, error) { + out := []string{} + + m := make(map[string]interface{}) + m["version"] = "1.1" + m["timestamp"] = metric.UnixNano() / 1000000000 + m["short_message"] = " " + m["name"] = metric.Name() + + if host, ok := metric.Tags()["host"]; ok { + m["host"] = host + } else { + host, err := os.Hostname() + if err != nil { + return []string{}, err + } + m["host"] = host + } + + for key, value := range metric.Fields() { + nkey := fmt.Sprintf("_%s", key) + m[nkey] = value + } + + serialized, err := ejson.Marshal(m) + if err != nil { + return []string{}, err + } + out = append(out, string(serialized)) + + return out, nil +} + +func init() { + outputs.Add("graylog", func() telegraf.Output { + return &Graylog{} + }) +} diff --git a/plugins/outputs/graylog/graylog_test.go b/plugins/outputs/graylog/graylog_test.go new file mode 100644 index 000000000..521f83dc1 --- /dev/null +++ b/plugins/outputs/graylog/graylog_test.go @@ -0,0 +1,55 @@ +package graylog + +import ( + "bytes" + "compress/zlib" + "encoding/json" + "io" + "net" + "sync" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +func TestWrite(t *testing.T) { + var wg sync.WaitGroup + wg.Add(1) + go UDPServer(t, &wg) + + i := Graylog{ + Servers: []string{"127.0.0.1:12201"}, + } + i.Connect() + + metrics := testutil.MockMetrics() + metrics = append(metrics, testutil.TestMetric(int64(1234567890))) + + i.Write(metrics) + + wg.Wait() + i.Close() +} + +type GelfObject map[string]interface{} + +func UDPServer(t *testing.T, wg *sync.WaitGroup) { + serverAddr, _ := net.ResolveUDPAddr("udp", "127.0.0.1:12201") + udpServer, _ := net.ListenUDP("udp", serverAddr) + defer wg.Done() + + bufR := make([]byte, 1024) + n, _, _ := udpServer.ReadFromUDP(bufR) + + b := bytes.NewReader(bufR[0:n]) + r, _ := zlib.NewReader(b) + + bufW := bytes.NewBuffer(nil) + io.Copy(bufW, r) + r.Close() + + var obj GelfObject + json.Unmarshal(bufW.Bytes(), &obj) + assert.Equal(t, obj["_value"], float64(1)) +} From 069764f05edeea26469464d0c7ddcbefc33c9f83 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 31 May 2016 11:02:10 +0100 Subject: [PATCH 06/34] Update README & etc/telegraf.conf --- README.md | 1 + etc/telegraf.conf | 275 +++++++++++++++++++++++++--------------------- 2 files changed, 152 insertions(+), 124 deletions(-) diff --git a/README.md b/README.md index 7366d5986..7890fa113 100644 --- a/README.md +++ b/README.md @@ -233,6 +233,7 @@ want to add support for another service or third-party API. * [datadog](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/datadog) * [file](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/file) * [graphite](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/graphite) +* [graylog](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/graylog) * [instrumental](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/instrumental) * [kafka](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kafka) * [librato](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/librato) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 302bb3be5..2d03b0f2b 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -106,10 +106,10 @@ # [[outputs.amon]] # ## Amon Server Key # server_key = "my-server-key" # required. -# +# # ## Amon Instance URL # amon_instance = "https://youramoninstance" # required -# +# # ## Connection timeout. # # timeout = "5s" @@ -125,21 +125,21 @@ # ## Telegraf tag to use as a routing key # ## ie, if this tag exists, it's value will be used as the routing key # routing_tag = "host" -# +# # ## InfluxDB retention policy # # retention_policy = "default" # ## InfluxDB database # # database = "telegraf" # ## InfluxDB precision # # precision = "s" -# +# # ## Optional SSL Config # # ssl_ca = "/etc/telegraf/ca.pem" # # ssl_cert = "/etc/telegraf/cert.pem" # # ssl_key = "/etc/telegraf/key.pem" # ## Use SSL but skip chain & host verification # # insecure_skip_verify = false -# +# # ## Data format to output. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -151,16 +151,22 @@ # [[outputs.cloudwatch]] # ## Amazon REGION # region = 'us-east-1' -# +# # ## Amazon Credentials # ## Credentials are loaded in the following order -# ## 1) explicit credentials from 'access_key' and 'secret_key' -# ## 2) environment variables -# ## 3) shared credentials file -# ## 4) EC2 Instance Profile +# ## 1) Assumed credentials via STS if role_arn is specified +# ## 2) explicit credentials from 'access_key' and 'secret_key' +# ## 3) shared profile from 'profile' +# ## 4) environment variables +# ## 5) shared credentials file +# ## 6) EC2 Instance Profile # #access_key = "" # #secret_key = "" -# +# #token = "" +# #role_arn = "" +# #profile = "" +# #shared_credential_file = "" +# # ## Namespace for the CloudWatch MetricDatums # namespace = 'InfluxData/Telegraf' @@ -169,7 +175,7 @@ # [[outputs.datadog]] # ## Datadog API key # apikey = "my-secret-key" # required. -# +# # ## Connection timeout. # # timeout = "5s" @@ -178,7 +184,7 @@ # [[outputs.file]] # ## Files to write to, "stdout" is a specially handled file. # files = ["stdout", "/tmp/metrics.out"] -# +# # ## Data format to output. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -199,6 +205,12 @@ # timeout = 2 +# # Send telegraf metrics to graylog(s) +# [[outputs.graylog]] +# ## Udp endpoint for your graylog instance. +# servers = ["127.0.0.1:12201", "192.168.1.1:12201"] + + # # Configuration for sending metrics to an Instrumental project # [[outputs.instrumental]] # ## Project API Token (required) @@ -223,14 +235,14 @@ # ## Telegraf tag to use as a routing key # ## ie, if this tag exists, it's value will be used as the routing key # routing_tag = "host" -# +# # ## CompressionCodec represents the various compression codecs recognized by # ## Kafka in messages. # ## 0 : No compression # ## 1 : Gzip compression # ## 2 : Snappy compression # compression_codec = 0 -# +# # ## RequiredAcks is used in Produce Requests to tell the broker how many # ## replica acknowledgements it must see before responding # ## 0 : the producer never waits for an acknowledgement from the broker. @@ -246,17 +258,17 @@ # ## guarantee that no messages will be lost as long as at least one in # ## sync replica remains. # required_acks = -1 -# +# # ## The total number of times to retry sending a message # max_retry = 3 -# +# # ## Optional SSL Config # # ssl_ca = "/etc/telegraf/ca.pem" # # ssl_cert = "/etc/telegraf/cert.pem" # # ssl_key = "/etc/telegraf/key.pem" # ## Use SSL but skip chain & host verification # # insecure_skip_verify = false -# +# # ## Data format to output. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -268,16 +280,22 @@ # [[outputs.kinesis]] # ## Amazon REGION of kinesis endpoint. # region = "ap-southeast-2" -# +# # ## Amazon Credentials # ## Credentials are loaded in the following order -# ## 1) explicit credentials from 'access_key' and 'secret_key' -# ## 2) environment variables -# ## 3) shared credentials file -# ## 4) EC2 Instance Profile +# ## 1) Assumed credentials via STS if role_arn is specified +# ## 2) explicit credentials from 'access_key' and 'secret_key' +# ## 3) shared profile from 'profile' +# ## 4) environment variables +# ## 5) shared credentials file +# ## 6) EC2 Instance Profile # #access_key = "" # #secret_key = "" -# +# #token = "" +# #role_arn = "" +# #profile = "" +# #shared_credential_file = "" +# # ## Kinesis StreamName must exist prior to starting telegraf. # streamname = "StreamName" # ## PartitionKey as used for sharding data. @@ -312,23 +330,23 @@ # # Configuration for MQTT server to send metrics to # [[outputs.mqtt]] # servers = ["localhost:1883"] # required. -# +# # ## MQTT outputs send metrics to this topic format # ## "///" # ## ex: prefix/web01.example.com/mem # topic_prefix = "telegraf" -# +# # ## username and password to connect MQTT server. # # username = "telegraf" # # password = "metricsmetricsmetricsmetrics" -# +# # ## Optional SSL Config # # ssl_ca = "/etc/telegraf/ca.pem" # # ssl_cert = "/etc/telegraf/cert.pem" # # ssl_key = "/etc/telegraf/key.pem" # ## Use SSL but skip chain & host verification # # insecure_skip_verify = false -# +# # ## Data format to output. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -342,7 +360,7 @@ # server = "localhost:4150" # ## NSQ topic for producer messages # topic = "telegraf" -# +# # ## Data format to output. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -354,14 +372,14 @@ # [[outputs.opentsdb]] # ## prefix for metrics keys # prefix = "my.specific.prefix." -# +# # ## Telnet Mode ## # ## DNS name of the OpenTSDB server in telnet mode # host = "opentsdb.example.com" -# +# # ## Port of the OpenTSDB server in telnet mode # port = 4242 -# +# # ## Debug true - Prints OpenTSDB communication # debug = false @@ -422,9 +440,6 @@ [[inputs.kernel]] # no configuration -# # Get kernel statistics from /proc/vmstat -# [[inputs.kernel_vmstat]] -# # no configuration # Read metrics about memory usage [[inputs.mem]] @@ -466,7 +481,7 @@ # ## Bcache sets path # ## If not specified, then default is: # bcachePath = "/sys/fs/bcache" -# +# # ## By default, telegraf gather stats for all bcache devices # ## Setting devices will restrict the stats to the specified # ## bcache devices. @@ -494,17 +509,17 @@ # # Collects performance metrics from the MON and OSD nodes in a Ceph storage cluster. # [[inputs.ceph]] # ## All configuration values are optional, defaults are shown below -# +# # ## location of ceph binary # ceph_binary = "/usr/bin/ceph" -# +# # ## directory in which to look for socket files # socket_dir = "/var/run/ceph" -# +# # ## prefix of MON and OSD socket files, used to determine socket type # mon_prefix = "ceph-mon" # osd_prefix = "ceph-osd" -# +# # ## suffix used to identify socket files # socket_suffix = "asok" @@ -513,29 +528,39 @@ # [[inputs.cloudwatch]] # ## Amazon Region # region = 'us-east-1' -# +# # ## Amazon Credentials # ## Credentials are loaded in the following order -# ## 1) explicit credentials from 'access_key' and 'secret_key' -# ## 2) environment variables -# ## 3) shared credentials file -# ## 4) EC2 Instance Profile +# ## 1) Assumed credentials via STS if role_arn is specified +# ## 2) explicit credentials from 'access_key' and 'secret_key' +# ## 3) shared profile from 'profile' +# ## 4) environment variables +# ## 5) shared credentials file +# ## 6) EC2 Instance Profile # #access_key = "" # #secret_key = "" -# +# #token = "" +# #role_arn = "" +# #profile = "" +# #shared_credential_file = "" +# # ## Requested CloudWatch aggregation Period (required - must be a multiple of 60s) # period = '1m' -# +# # ## Collection Delay (required - must account for metrics availability via CloudWatch API) # delay = '1m' -# +# # ## Recomended: use metric 'interval' that is a multiple of 'period' to avoid # ## gaps or overlap in pulled data # interval = '1m' -# +# +# ## Configure the TTL for the internal cache of metrics. +# ## Defaults to 1 hr if not specified +# #cache_ttl = '10m' +# # ## Metric Statistic Namespace (required) # namespace = 'AWS/ELB' -# +# # ## Metrics to Pull (optional) # ## Defaults to all Metrics in Namespace if nothing is provided # ## Refreshes Namespace available metrics every 1h @@ -582,17 +607,17 @@ # [[inputs.dns_query]] # ## servers to query # servers = ["8.8.8.8"] # required -# +# # ## Domains or subdomains to query. "."(root) is default # domains = ["."] # optional -# +# # ## Query record type. Default is "A" # ## Posible values: A, AAAA, CNAME, MX, NS, PTR, TXT, SOA, SPF, SRV. # record_type = "A" # optional -# +# # ## Dns server port. 53 is default # port = 53 # optional -# +# # ## Query timeout in seconds. Default is 2 seconds # timeout = 2 # optional @@ -628,11 +653,11 @@ # [[inputs.elasticsearch]] # ## specify a list of one or more Elasticsearch servers # servers = ["http://localhost:9200"] -# +# # ## set local to false when you want to read the indices stats from all nodes # ## within the cluster # local = true -# +# # ## set cluster_health to true when you want to also obtain cluster level stats # cluster_health = false @@ -640,14 +665,18 @@ # # Read metrics from one or more commands that can output to stdout # [[inputs.exec]] # ## Commands array -# commands = ["/tmp/test.sh", "/usr/bin/mycollector --foo=bar"] -# +# commands = [ +# "/tmp/test.sh", +# "/usr/bin/mycollector --foo=bar", +# "/tmp/collect_*.sh" +# ] +# # ## Timeout for each command to complete. # timeout = "5s" -# +# # ## measurement name suffix (for separating different commands) # name_suffix = "_mycollector" -# +# # ## Data format to consume. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -675,7 +704,7 @@ # [[inputs.haproxy]] # ## An array of address to gather stats about. Specify an ip on hostname # ## with optional port. ie localhost, 10.10.3.33:1936, etc. -# +# # ## If no servers are specified, then default to 127.0.0.1:1936 # servers = ["http://myhaproxy.com:1936", "http://anotherhaproxy.com:1936"] # ## Or you can also use local socket @@ -699,41 +728,48 @@ # # body = ''' # # {'fake':'data'} # # ''' +# +# ## Optional SSL Config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false # # Read flattened metrics from one or more JSON HTTP endpoints # [[inputs.httpjson]] # ## NOTE This plugin only reads numerical measurements, strings and booleans # ## will be ignored. -# +# # ## a name for the service being polled # name = "webserver_stats" -# +# # ## URL of each server in the service's cluster # servers = [ # "http://localhost:9999/stats/", # "http://localhost:9998/stats/", # ] -# +# # ## HTTP method to use: GET or POST (case-sensitive) # method = "GET" -# +# # ## List of tag names to extract from top-level of JSON server response # # tag_keys = [ # # "my_tag_1", # # "my_tag_2" # # ] -# +# # ## HTTP parameters (all values must be strings) # [inputs.httpjson.parameters] # event_type = "cpu_spike" # threshold = "0.75" -# +# # ## HTTP Header parameters (all values must be strings) # # [inputs.httpjson.headers] # # X-Auth-Token = "my-xauth-token" # # apiVersion = "v1" -# +# # ## Optional SSL Config # # ssl_ca = "/etc/telegraf/ca.pem" # # ssl_cert = "/etc/telegraf/cert.pem" @@ -747,7 +783,7 @@ # ## Works with InfluxDB debug endpoints out of the box, # ## but other services can use this format too. # ## See the influxdb plugin's README for more details. -# +# # ## Multiple URLs from which to read InfluxDB-formatted JSON # urls = [ # "http://localhost:8086/debug/vars" @@ -768,7 +804,7 @@ # [[inputs.jolokia]] # ## This is the context root used to compose the jolokia url # context = "/jolokia" -# +# # ## This specifies the mode used # # mode = "proxy" # # @@ -778,8 +814,8 @@ # # [inputs.jolokia.proxy] # # host = "127.0.0.1" # # port = "8080" -# -# +# +# # ## List of servers exposing jolokia read service # [[inputs.jolokia.servers]] # name = "as-server-01" @@ -787,7 +823,7 @@ # port = "8080" # # username = "myuser" # # password = "mypassword" -# +# # ## List of metrics collected on above servers # ## Each metric consists in a name, a jmx path and either # ## a pass or drop slice attribute. @@ -796,13 +832,13 @@ # name = "heap_memory_usage" # mbean = "java.lang:type=Memory" # attribute = "HeapMemoryUsage" -# +# # ## This collect thread counts metrics. # [[inputs.jolokia.metrics]] # name = "thread_count" # mbean = "java.lang:type=Threading" # attribute = "TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount" -# +# # ## This collect number of class loaded/unloaded counts metrics. # [[inputs.jolokia.metrics]] # name = "class_count" @@ -955,7 +991,7 @@ # address = "github.com:80" # ## Set timeout # timeout = "1s" -# +# # ## Optional string sent to the server # # send = "ssh" # ## Optional expected string in answer @@ -1069,7 +1105,7 @@ # ## to grab metrics for. # ## # address = "host=localhost user=postgres sslmode=disable" -# +# # ## A list of databases to pull metrics about. If not specified, metrics for all # ## databases are gathered. # # databases = ["app_production", "testing"] @@ -1151,7 +1187,7 @@ # # pattern = "nginx" # ## user as argument for pgrep (ie, pgrep -u ) # # user = "nginx" -# +# # ## override for process_name # ## This is optional; default is sourced from /proc//status # # process_name = "bar" @@ -1165,7 +1201,7 @@ # [[inputs.prometheus]] # ## An array of urls to scrape metrics from. # urls = ["http://localhost:9100/metrics"] -# +# # ## Use SSL but skip chain & host verification # # insecure_skip_verify = false # ## Use bearer token for authorization @@ -1184,7 +1220,7 @@ # # name = "rmq-server-1" # optional tag # # username = "guest" # # password = "guest" -# +# # ## A list of nodes to pull metrics about. If not specified, metrics for # ## all nodes are gathered. # # nodes = ["rabbit@node1", "rabbit@node2"] @@ -1248,7 +1284,7 @@ # collect = ["mybulk", "sysservices", "sysdescr"] # # Simple list of OIDs to get, in addition to "collect" # get_oids = [] -# +# # [[inputs.snmp.host]] # address = "192.168.2.3:161" # community = "public" @@ -1260,31 +1296,31 @@ # "ifNumber", # ".1.3.6.1.2.1.1.3.0", # ] -# +# # [[inputs.snmp.get]] # name = "ifnumber" # oid = "ifNumber" -# +# # [[inputs.snmp.get]] # name = "interface_speed" # oid = "ifSpeed" # instance = "0" -# +# # [[inputs.snmp.get]] # name = "sysuptime" # oid = ".1.3.6.1.2.1.1.3.0" # unit = "second" -# +# # [[inputs.snmp.bulk]] # name = "mybulk" # max_repetition = 127 # oid = ".1.3.6.1.2.1.1" -# +# # [[inputs.snmp.bulk]] # name = "ifoutoctets" # max_repetition = 127 # oid = "ifOutOctets" -# +# # [[inputs.snmp.host]] # address = "192.168.2.13:161" # #address = "127.0.0.1:161" @@ -1297,19 +1333,19 @@ # [[inputs.snmp.host.table]] # name = "iftable3" # include_instances = ["enp5s0", "eth1"] -# +# # # SNMP TABLEs # # table without mapping neither subtables # [[inputs.snmp.table]] # name = "iftable1" # oid = ".1.3.6.1.2.1.31.1.1.1" -# +# # # table without mapping but with subtables # [[inputs.snmp.table]] # name = "iftable2" # oid = ".1.3.6.1.2.1.31.1.1.1" # sub_tables = [".1.3.6.1.2.1.2.2.1.13"] -# +# # # table with mapping but without subtables # [[inputs.snmp.table]] # name = "iftable3" @@ -1317,7 +1353,7 @@ # # if empty. get all instances # mapping_table = ".1.3.6.1.2.1.31.1.1.1.1" # # if empty, get all subtables -# +# # # table with both mapping and subtables # [[inputs.snmp.table]] # name = "iftable4" @@ -1360,10 +1396,11 @@ # [[inputs.varnish]] # ## The default location of the varnishstat binary can be overridden with: # binary = "/usr/bin/varnishstat" -# +# # ## By default, telegraf gather stats for 3 metric points. # ## Setting stats will override the defaults shown below. -# ## stats may also be set to ["all"], which will collect all stats +# ## Glob matching can be used, ie, stats = ["MAIN.*"] +# ## stats may also be set to ["*"], which will collect all stats # stats = ["MAIN.cache_hit", "MAIN.cache_miss", "MAIN.uptime"] @@ -1372,11 +1409,11 @@ # ## ZFS kstat path # ## If not specified, then default is: # kstatPath = "/proc/spl/kstat/zfs" -# +# # ## By default, telegraf gather all zfs stats # ## If not specified, then default is: # kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] -# +# # ## By default, don't gather zpool stats # poolMetrics = false @@ -1385,7 +1422,7 @@ # [[inputs.zookeeper]] # ## An array of address to gather stats about. Specify an ip or hostname # ## with port. ie localhost:2181, 10.0.0.1:2181, etc. -# +# # ## If no servers are specified, then localhost is used as the host. # ## If no port is specified, 2181 is used # servers = [":2181"] @@ -1414,7 +1451,7 @@ # consumer_group = "telegraf_metrics_consumers" # ## Offset (must be either "oldest" or "newest") # offset = "oldest" -# +# # ## Data format to consume. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -1427,32 +1464,32 @@ # servers = ["localhost:1883"] # ## MQTT QoS, must be 0, 1, or 2 # qos = 0 -# +# # ## Topics to subscribe to # topics = [ # "telegraf/host01/cpu", # "telegraf/+/mem", # "sensors/#", # ] -# +# # # if true, messages that can't be delivered while the subscriber is offline # # will be delivered when it comes back (such as on service restart). # # NOTE: if true, client_id MUST be set # persistent_session = false # # If empty, a random client ID will be generated. # client_id = "" -# +# # ## username and password to connect MQTT server. # # username = "telegraf" # # password = "metricsmetricsmetricsmetrics" -# +# # ## Optional SSL Config # # ssl_ca = "/etc/telegraf/ca.pem" # # ssl_cert = "/etc/telegraf/cert.pem" # # ssl_key = "/etc/telegraf/key.pem" # ## Use SSL but skip chain & host verification # # insecure_skip_verify = false -# +# # ## Data format to consume. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -1470,7 +1507,7 @@ # subjects = ["telegraf"] # ## name a queue group # queue_group = "telegraf_consumers" -# +# # ## Data format to consume. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -1498,24 +1535,24 @@ # delete_timings = true # ## Percentiles to calculate for timing & histogram stats # percentiles = [90] -# +# # ## separator to use between elements of a statsd metric # metric_separator = "_" -# +# # ## Parses tags in the datadog statsd format # ## http://docs.datadoghq.com/guides/dogstatsd/ # parse_data_dog_tags = false -# +# # ## Statsd data translation templates, more info can be read here: # ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite # # templates = [ # # "cpu.* measurement*" # # ] -# +# # ## Number of UDP messages allowed to queue up, once filled, # ## the statsd server will start dropping packets # allowed_pending_messages = 10000 -# +# # ## Number of timing/histogram values to track per-measurement in the # ## calculation of percentiles. Raising this limit increases the accuracy # ## of percentiles but also increases the memory usage and cpu time. @@ -1536,7 +1573,7 @@ # files = ["/var/mymetrics.out"] # ## Read file from beginning. # from_beginning = false -# +# # ## Data format to consume. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -1548,14 +1585,14 @@ # [[inputs.tcp_listener]] # ## Address and port to host TCP listener on # service_address = ":8094" -# +# # ## Number of TCP messages allowed to queue up. Once filled, the # ## TCP listener will start dropping packets. # allowed_pending_messages = 10000 -# +# # ## Maximum number of concurrent TCP connections to allow # max_tcp_connections = 250 -# +# # ## Data format to consume. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: @@ -1567,24 +1604,14 @@ # [[inputs.udp_listener]] # ## Address and port to host UDP listener on # service_address = ":8092" -# +# # ## Number of UDP messages allowed to queue up. Once filled, the # ## UDP listener will start dropping packets. # allowed_pending_messages = 10000 -# +# # ## Data format to consume. # ## Each data format has it's own unique set of configuration options, read # ## more about them here: # ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md # data_format = "influx" -# # Collects conntrack stats from the configured directories and files. -# [[inputs.conntrack]] -# ## The following defaults would work with multiple versions of contrack. Note the nf_ and ip_ -# ## filename prefixes are mutually exclusive across conntrack versions, as are the directory locations. -# -# ## Superset of filenames to look for within the conntrack dirs. Missing files will be ignored. -# files = ["ip_conntrack_count","ip_conntrack_max","nf_conntrack_count","nf_conntrack_max"] -# -# ## Directories to search within for the conntrack files above. Missing directrories will be ignored. -# dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] From 958ef2f87238afc5eb8a2601af738a2f3e7bd5c8 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 31 May 2016 11:21:20 +0100 Subject: [PATCH 07/34] Revert "Add gelf serializer & graylog output filter." (#1299) --- plugins/outputs/all/all.go | 1 - plugins/outputs/graylog/README.md | 5 - plugins/outputs/graylog/graylog.go | 247 ------------------------ plugins/outputs/graylog/graylog_test.go | 55 ------ 4 files changed, 308 deletions(-) delete mode 100644 plugins/outputs/graylog/README.md delete mode 100644 plugins/outputs/graylog/graylog.go delete mode 100644 plugins/outputs/graylog/graylog_test.go diff --git a/plugins/outputs/all/all.go b/plugins/outputs/all/all.go index 27f8958fe..5b223529c 100644 --- a/plugins/outputs/all/all.go +++ b/plugins/outputs/all/all.go @@ -7,7 +7,6 @@ import ( _ "github.com/influxdata/telegraf/plugins/outputs/datadog" _ "github.com/influxdata/telegraf/plugins/outputs/file" _ "github.com/influxdata/telegraf/plugins/outputs/graphite" - _ "github.com/influxdata/telegraf/plugins/outputs/graylog" _ "github.com/influxdata/telegraf/plugins/outputs/influxdb" _ "github.com/influxdata/telegraf/plugins/outputs/instrumental" _ "github.com/influxdata/telegraf/plugins/outputs/kafka" diff --git a/plugins/outputs/graylog/README.md b/plugins/outputs/graylog/README.md deleted file mode 100644 index 26b8d8fc6..000000000 --- a/plugins/outputs/graylog/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Graylog Output Plugin - -This plugin writes to a Graylog instance using the "gelf" format. - -It requires a `servers` name. diff --git a/plugins/outputs/graylog/graylog.go b/plugins/outputs/graylog/graylog.go deleted file mode 100644 index 7f2480134..000000000 --- a/plugins/outputs/graylog/graylog.go +++ /dev/null @@ -1,247 +0,0 @@ -package graylog - -import ( - "bytes" - "compress/zlib" - "crypto/rand" - "encoding/binary" - ejson "encoding/json" - "fmt" - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/outputs" - "io" - "math" - "net" - "os" -) - -const ( - defaultGraylogEndpoint = "127.0.0.1:12201" - defaultConnection = "wan" - defaultMaxChunkSizeWan = 1420 - defaultMaxChunkSizeLan = 8154 -) - -type GelfConfig struct { - GraylogEndpoint string - Connection string - MaxChunkSizeWan int - MaxChunkSizeLan int -} - -type Gelf struct { - GelfConfig -} - -func NewGelfWriter(config GelfConfig) *Gelf { - if config.GraylogEndpoint == "" { - config.GraylogEndpoint = defaultGraylogEndpoint - } - - if config.Connection == "" { - config.Connection = defaultConnection - } - - if config.MaxChunkSizeWan == 0 { - config.MaxChunkSizeWan = defaultMaxChunkSizeWan - } - - if config.MaxChunkSizeLan == 0 { - config.MaxChunkSizeLan = defaultMaxChunkSizeLan - } - - g := &Gelf{GelfConfig: config} - - return g -} - -func (g *Gelf) Write(message []byte) (n int, err error) { - compressed := g.compress(message) - - chunksize := g.GelfConfig.MaxChunkSizeWan - length := compressed.Len() - - if length > chunksize { - - chunkCountInt := int(math.Ceil(float64(length) / float64(chunksize))) - - id := make([]byte, 8) - rand.Read(id) - - for i, index := 0, 0; i < length; i, index = i+chunksize, index+1 { - packet := g.createChunkedMessage(index, chunkCountInt, id, &compressed) - _, err = g.send(packet.Bytes()) - if err != nil { - return 0, err - } - } - } else { - _, err = g.send(compressed.Bytes()) - if err != nil { - return 0, err - } - } - - n = len(message) - - return -} - -func (g *Gelf) createChunkedMessage(index int, chunkCountInt int, id []byte, compressed *bytes.Buffer) bytes.Buffer { - var packet bytes.Buffer - - chunksize := g.getChunksize() - - packet.Write(g.intToBytes(30)) - packet.Write(g.intToBytes(15)) - packet.Write(id) - - packet.Write(g.intToBytes(index)) - packet.Write(g.intToBytes(chunkCountInt)) - - packet.Write(compressed.Next(chunksize)) - - return packet -} - -func (g *Gelf) getChunksize() int { - if g.GelfConfig.Connection == "wan" { - return g.GelfConfig.MaxChunkSizeWan - } - - if g.GelfConfig.Connection == "lan" { - return g.GelfConfig.MaxChunkSizeLan - } - - return g.GelfConfig.MaxChunkSizeWan -} - -func (g *Gelf) intToBytes(i int) []byte { - buf := new(bytes.Buffer) - - binary.Write(buf, binary.LittleEndian, int8(i)) - return buf.Bytes() -} - -func (g *Gelf) compress(b []byte) bytes.Buffer { - var buf bytes.Buffer - comp := zlib.NewWriter(&buf) - - comp.Write(b) - comp.Close() - - return buf -} - -func (g *Gelf) send(b []byte) (n int, err error) { - udpAddr, err := net.ResolveUDPAddr("udp", g.GelfConfig.GraylogEndpoint) - if err != nil { - return - } - - conn, err := net.DialUDP("udp", nil, udpAddr) - if err != nil { - return - } - - n, err = conn.Write(b) - return -} - -type Graylog struct { - Servers []string - writer io.Writer -} - -var sampleConfig = ` - ## Udp endpoint for your graylog instance. - servers = ["127.0.0.1:12201", "192.168.1.1:12201"] -` - -func (g *Graylog) Connect() error { - writers := []io.Writer{} - - if len(g.Servers) == 0 { - g.Servers = append(g.Servers, "localhost:12201") - } - - for _, server := range g.Servers { - w := NewGelfWriter(GelfConfig{GraylogEndpoint: server}) - writers = append(writers, w) - } - - g.writer = io.MultiWriter(writers...) - return nil -} - -func (g *Graylog) Close() error { - return nil -} - -func (g *Graylog) SampleConfig() string { - return sampleConfig -} - -func (g *Graylog) Description() string { - return "Send telegraf metrics to graylog(s)" -} - -func (g *Graylog) Write(metrics []telegraf.Metric) error { - if len(metrics) == 0 { - return nil - } - - for _, metric := range metrics { - values, err := serialize(metric) - if err != nil { - return err - } - - for _, value := range values { - _, err := g.writer.Write([]byte(value)) - if err != nil { - return fmt.Errorf("FAILED to write message: %s, %s", value, err) - } - } - } - return nil -} - -func serialize(metric telegraf.Metric) ([]string, error) { - out := []string{} - - m := make(map[string]interface{}) - m["version"] = "1.1" - m["timestamp"] = metric.UnixNano() / 1000000000 - m["short_message"] = " " - m["name"] = metric.Name() - - if host, ok := metric.Tags()["host"]; ok { - m["host"] = host - } else { - host, err := os.Hostname() - if err != nil { - return []string{}, err - } - m["host"] = host - } - - for key, value := range metric.Fields() { - nkey := fmt.Sprintf("_%s", key) - m[nkey] = value - } - - serialized, err := ejson.Marshal(m) - if err != nil { - return []string{}, err - } - out = append(out, string(serialized)) - - return out, nil -} - -func init() { - outputs.Add("graylog", func() telegraf.Output { - return &Graylog{} - }) -} diff --git a/plugins/outputs/graylog/graylog_test.go b/plugins/outputs/graylog/graylog_test.go deleted file mode 100644 index 521f83dc1..000000000 --- a/plugins/outputs/graylog/graylog_test.go +++ /dev/null @@ -1,55 +0,0 @@ -package graylog - -import ( - "bytes" - "compress/zlib" - "encoding/json" - "io" - "net" - "sync" - "testing" - - "github.com/influxdata/telegraf/testutil" - "github.com/stretchr/testify/assert" -) - -func TestWrite(t *testing.T) { - var wg sync.WaitGroup - wg.Add(1) - go UDPServer(t, &wg) - - i := Graylog{ - Servers: []string{"127.0.0.1:12201"}, - } - i.Connect() - - metrics := testutil.MockMetrics() - metrics = append(metrics, testutil.TestMetric(int64(1234567890))) - - i.Write(metrics) - - wg.Wait() - i.Close() -} - -type GelfObject map[string]interface{} - -func UDPServer(t *testing.T, wg *sync.WaitGroup) { - serverAddr, _ := net.ResolveUDPAddr("udp", "127.0.0.1:12201") - udpServer, _ := net.ListenUDP("udp", serverAddr) - defer wg.Done() - - bufR := make([]byte, 1024) - n, _, _ := udpServer.ReadFromUDP(bufR) - - b := bytes.NewReader(bufR[0:n]) - r, _ := zlib.NewReader(b) - - bufW := bytes.NewBuffer(nil) - io.Copy(bufW, r) - r.Close() - - var obj GelfObject - json.Unmarshal(bufW.Bytes(), &obj) - assert.Equal(t, obj["_value"], float64(1)) -} From 4f27315720b443671937a8dbd16cf4feb0a5c388 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 31 May 2016 11:23:01 +0100 Subject: [PATCH 08/34] Revert graylog output --- README.md | 1 - etc/telegraf.conf | 6 ------ 2 files changed, 7 deletions(-) diff --git a/README.md b/README.md index 7890fa113..7366d5986 100644 --- a/README.md +++ b/README.md @@ -233,7 +233,6 @@ want to add support for another service or third-party API. * [datadog](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/datadog) * [file](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/file) * [graphite](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/graphite) -* [graylog](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/graylog) * [instrumental](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/instrumental) * [kafka](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kafka) * [librato](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/librato) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 2d03b0f2b..824564c0f 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -205,12 +205,6 @@ # timeout = 2 -# # Send telegraf metrics to graylog(s) -# [[outputs.graylog]] -# ## Udp endpoint for your graylog instance. -# servers = ["127.0.0.1:12201", "192.168.1.1:12201"] - - # # Configuration for sending metrics to an Instrumental project # [[outputs.instrumental]] # ## Project API Token (required) From 9ff536d94d4803c8639a50cead9ef046914f70d3 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 24 May 2016 14:50:01 +0100 Subject: [PATCH 09/34] Limit GetMetricStatistics to 10 per second closes #1197 --- CHANGELOG.md | 1 + internal/limiter/limiter.go | 59 +++++++++++++++++++++++++ internal/limiter/limiter_test.go | 54 ++++++++++++++++++++++ plugins/inputs/cloudwatch/cloudwatch.go | 20 ++++++--- 4 files changed, 127 insertions(+), 7 deletions(-) create mode 100644 internal/limiter/limiter.go create mode 100644 internal/limiter/limiter_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index da2b41a5f..f81375479 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ time before a new metric is included by the plugin. - [#1275](https://github.com/influxdata/telegraf/pull/1275): Allow wildcard filtering of varnish stats. - [#1142](https://github.com/influxdata/telegraf/pull/1142): Support for glob patterns in exec plugin commands configuration. - [#1278](https://github.com/influxdata/telegraf/pull/1278): RabbitMQ input: made url parameter optional by using DefaultURL (http://localhost:15672) if not specified +- [#1197](https://github.com/influxdata/telegraf/pull/1197): Limit AWS GetMetricStatistics requests to 10 per second. ### Bugfixes diff --git a/internal/limiter/limiter.go b/internal/limiter/limiter.go new file mode 100644 index 000000000..c5689751d --- /dev/null +++ b/internal/limiter/limiter.go @@ -0,0 +1,59 @@ +package limiter + +import ( + "sync" + "time" +) + +// NewRateLimiter returns a rate limiter that will will emit from the C +// channel only 'n' times every 'rate' seconds. +func NewRateLimiter(n int, rate time.Duration) *rateLimiter { + r := &rateLimiter{ + C: make(chan bool), + rate: rate, + n: n, + shutdown: make(chan bool), + } + r.wg.Add(1) + go r.limiter() + return r +} + +type rateLimiter struct { + C chan bool + rate time.Duration + n int + + shutdown chan bool + wg sync.WaitGroup +} + +func (r *rateLimiter) Stop() { + close(r.shutdown) + r.wg.Wait() + close(r.C) +} + +func (r *rateLimiter) limiter() { + defer r.wg.Done() + ticker := time.NewTicker(r.rate) + defer ticker.Stop() + counter := 0 + for { + select { + case <-r.shutdown: + return + case <-ticker.C: + counter = 0 + default: + if counter < r.n { + select { + case r.C <- true: + counter++ + case <-r.shutdown: + return + } + } + } + } +} diff --git a/internal/limiter/limiter_test.go b/internal/limiter/limiter_test.go new file mode 100644 index 000000000..83c9d86f1 --- /dev/null +++ b/internal/limiter/limiter_test.go @@ -0,0 +1,54 @@ +package limiter + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestRateLimiter(t *testing.T) { + r := NewRateLimiter(5, time.Second) + ticker := time.NewTicker(time.Millisecond * 75) + + // test that we can only get 5 receives from the rate limiter + counter := 0 +outer: + for { + select { + case <-r.C: + counter++ + case <-ticker.C: + break outer + } + } + + assert.Equal(t, 5, counter) + r.Stop() + // verify that the Stop function closes the channel. + _, ok := <-r.C + assert.False(t, ok) +} + +func TestRateLimiterMultipleIterations(t *testing.T) { + r := NewRateLimiter(5, time.Millisecond*50) + ticker := time.NewTicker(time.Millisecond * 250) + + // test that we can get 15 receives from the rate limiter + counter := 0 +outer: + for { + select { + case <-ticker.C: + break outer + case <-r.C: + counter++ + } + } + + assert.True(t, counter > 10) + r.Stop() + // verify that the Stop function closes the channel. + _, ok := <-r.C + assert.False(t, ok) +} diff --git a/plugins/inputs/cloudwatch/cloudwatch.go b/plugins/inputs/cloudwatch/cloudwatch.go index e6671a3bf..1bd2d5c07 100644 --- a/plugins/inputs/cloudwatch/cloudwatch.go +++ b/plugins/inputs/cloudwatch/cloudwatch.go @@ -12,6 +12,7 @@ import ( "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" internalaws "github.com/influxdata/telegraf/internal/config/aws" + "github.com/influxdata/telegraf/internal/limiter" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -170,11 +171,13 @@ func (c *CloudWatch) Gather(acc telegraf.Accumulator) error { now := time.Now() // limit concurrency or we can easily exhaust user connection limit - semaphore := make(chan byte, 64) - + // see cloudwatch API request limits: + // http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_limits.html + lmtr := limiter.NewRateLimiter(10, time.Second) + defer lmtr.Stop() for _, m := range metrics { - semaphore <- 0x1 - go c.gatherMetric(acc, m, now, semaphore, errChan) + <-lmtr.C + go c.gatherMetric(acc, m, now, errChan) } for i := 1; i <= metricCount; i++ { @@ -257,12 +260,16 @@ func (c *CloudWatch) fetchNamespaceMetrics() (metrics []*cloudwatch.Metric, err /* * Gather given Metric and emit any error */ -func (c *CloudWatch) gatherMetric(acc telegraf.Accumulator, metric *cloudwatch.Metric, now time.Time, semaphore chan byte, errChan chan error) { +func (c *CloudWatch) gatherMetric( + acc telegraf.Accumulator, + metric *cloudwatch.Metric, + now time.Time, + errChan chan error, +) { params := c.getStatisticsInput(metric, now) resp, err := c.client.GetMetricStatistics(params) if err != nil { errChan <- err - <-semaphore return } @@ -299,7 +306,6 @@ func (c *CloudWatch) gatherMetric(acc telegraf.Accumulator, metric *cloudwatch.M } errChan <- nil - <-semaphore } /* From e809c4e445bdf1faa62d6e4d400d6e9b032322bf Mon Sep 17 00:00:00 2001 From: Martin Seener Date: Mon, 30 May 2016 14:45:21 +0200 Subject: [PATCH 10/34] Also added reasonable default for influxdb input plugin to simplify configuration for most users closes #1295 --- CHANGELOG.md | 1 + etc/telegraf.conf | 1 + plugins/inputs/influxdb/README.md | 1 + plugins/inputs/influxdb/influxdb.go | 4 ++++ 4 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f81375479..5fbcaf018 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ time before a new metric is included by the plugin. - [#1142](https://github.com/influxdata/telegraf/pull/1142): Support for glob patterns in exec plugin commands configuration. - [#1278](https://github.com/influxdata/telegraf/pull/1278): RabbitMQ input: made url parameter optional by using DefaultURL (http://localhost:15672) if not specified - [#1197](https://github.com/influxdata/telegraf/pull/1197): Limit AWS GetMetricStatistics requests to 10 per second. +- [#1278](https://github.com/influxdata/telegraf/pull/1278) & [#1288](https://github.com/influxdata/telegraf/pull/1288) & [#1295](https://github.com/influxdata/telegraf/pull/1295): RabbitMQ/Apache/InfluxDB inputs: made url(s) parameter optional by using reasonable input defaults if not specified ### Bugfixes diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 824564c0f..d8fe3b865 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -779,6 +779,7 @@ # ## See the influxdb plugin's README for more details. # # ## Multiple URLs from which to read InfluxDB-formatted JSON +# ## Default is "http://localhost:8086/debug/vars". # urls = [ # "http://localhost:8086/debug/vars" # ] diff --git a/plugins/inputs/influxdb/README.md b/plugins/inputs/influxdb/README.md index 433191495..c983e9749 100644 --- a/plugins/inputs/influxdb/README.md +++ b/plugins/inputs/influxdb/README.md @@ -15,6 +15,7 @@ InfluxDB-formatted endpoints. See below for more information. ## See the influxdb plugin's README for more details. ## Multiple URLs from which to read InfluxDB-formatted JSON + ## Default is "http://localhost:8086/debug/vars". urls = [ "http://localhost:8086/debug/vars" ] diff --git a/plugins/inputs/influxdb/influxdb.go b/plugins/inputs/influxdb/influxdb.go index 6a53ecdcb..974a1b9e7 100644 --- a/plugins/inputs/influxdb/influxdb.go +++ b/plugins/inputs/influxdb/influxdb.go @@ -28,6 +28,7 @@ func (*InfluxDB) SampleConfig() string { ## See the influxdb plugin's README for more details. ## Multiple URLs from which to read InfluxDB-formatted JSON + ## Default is "http://localhost:8086/debug/vars". urls = [ "http://localhost:8086/debug/vars" ] @@ -35,6 +36,9 @@ func (*InfluxDB) SampleConfig() string { } func (i *InfluxDB) Gather(acc telegraf.Accumulator) error { + if len(i.URLs) == 0 { + i.URLs = []string{"http://localhost:8086/debug/vars"} + } errorChannel := make(chan error, len(i.URLs)) var wg sync.WaitGroup From 892abec025dd596cdcd69a03a0556b52301125cb Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Mon, 30 May 2016 23:24:42 +0100 Subject: [PATCH 11/34] Refactor collection_jitter and flush_jitter use a common function between collection_jitter and flush_jitter. which creates the same behavior between the two options. going forward, both jitters will be random sleeps that get re-evaluated at runtime for every interval (previously only collection_jitter did this) also fixes behavior so that both jitters will exit in the event of a process exit. closes #1296 --- CHANGELOG.md | 5 +++ agent/agent.go | 45 +++--------------------- agent/agent_test.go | 73 --------------------------------------- internal/config/config.go | 1 - internal/internal.go | 25 ++++++++++++++ internal/internal_test.go | 25 ++++++++++++++ 6 files changed, 59 insertions(+), 115 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fbcaf018..71d70e976 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ### Release Notes +- `flush_jitter` behavior has been changed. The random jitter will now be +evaluated at every flush interval, rather than once at startup. This makes it +consistent with the behavior of `collection_jitter`. + - All AWS plugins now utilize a standard mechanism for evaluating credentials. This allows all AWS plugins to support environment variables, shared credential files & profiles, and role assumptions. See the specific plugin README for @@ -31,6 +35,7 @@ time before a new metric is included by the plugin. - [#1278](https://github.com/influxdata/telegraf/pull/1278): RabbitMQ input: made url parameter optional by using DefaultURL (http://localhost:15672) if not specified - [#1197](https://github.com/influxdata/telegraf/pull/1197): Limit AWS GetMetricStatistics requests to 10 per second. - [#1278](https://github.com/influxdata/telegraf/pull/1278) & [#1288](https://github.com/influxdata/telegraf/pull/1288) & [#1295](https://github.com/influxdata/telegraf/pull/1295): RabbitMQ/Apache/InfluxDB inputs: made url(s) parameter optional by using reasonable input defaults if not specified +- [#1296](https://github.com/influxdata/telegraf/issues/1296): Refactor of flush_jitter argument. ### Bugfixes diff --git a/agent/agent.go b/agent/agent.go index 6b6714760..1423ef773 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1,17 +1,15 @@ package agent import ( - cryptorand "crypto/rand" "fmt" "log" - "math/big" - "math/rand" "os" "runtime" "sync" "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal/config" "github.com/influxdata/telegraf/internal/models" ) @@ -115,27 +113,16 @@ func (a *Agent) gatherer( ticker := time.NewTicker(interval) defer ticker.Stop() - jitter := a.Config.Agent.CollectionJitter.Duration.Nanoseconds() - for { var outerr error - start := time.Now() acc := NewAccumulator(input.Config, metricC) acc.SetDebug(a.Config.Agent.Debug) acc.setDefaultTags(a.Config.Tags) - if jitter != 0 { - nanoSleep := rand.Int63n(jitter) - d, err := time.ParseDuration(fmt.Sprintf("%dns", nanoSleep)) - if err != nil { - log.Printf("Jittering collection interval failed for plugin %s", - input.Name) - } else { - time.Sleep(d) - } - } + internal.RandomSleep(a.Config.Agent.CollectionJitter.Duration, shutdown) + start := time.Now() gatherWithTimeout(shutdown, input, acc, interval) elapsed := time.Since(start) @@ -274,6 +261,7 @@ func (a *Agent) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) er a.flush() return nil case <-ticker.C: + internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown) a.flush() case m := <-metricC: for _, o := range a.Config.Outputs { @@ -283,35 +271,10 @@ func (a *Agent) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) er } } -// jitterInterval applies the the interval jitter to the flush interval using -// crypto/rand number generator -func jitterInterval(ininterval, injitter time.Duration) time.Duration { - var jitter int64 - outinterval := ininterval - if injitter.Nanoseconds() != 0 { - maxjitter := big.NewInt(injitter.Nanoseconds()) - if j, err := cryptorand.Int(cryptorand.Reader, maxjitter); err == nil { - jitter = j.Int64() - } - outinterval = time.Duration(jitter + ininterval.Nanoseconds()) - } - - if outinterval.Nanoseconds() < time.Duration(500*time.Millisecond).Nanoseconds() { - log.Printf("Flush interval %s too low, setting to 500ms\n", outinterval) - outinterval = time.Duration(500 * time.Millisecond) - } - - return outinterval -} - // Run runs the agent daemon, gathering every Interval func (a *Agent) Run(shutdown chan struct{}) error { var wg sync.WaitGroup - a.Config.Agent.FlushInterval.Duration = jitterInterval( - a.Config.Agent.FlushInterval.Duration, - a.Config.Agent.FlushJitter.Duration) - log.Printf("Agent Config: Interval:%s, Debug:%#v, Quiet:%#v, Hostname:%#v, "+ "Flush Interval:%s \n", a.Config.Agent.Interval.Duration, a.Config.Agent.Debug, a.Config.Agent.Quiet, diff --git a/agent/agent_test.go b/agent/agent_test.go index adbde9a13..a5920ce1c 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -2,7 +2,6 @@ package agent import ( "testing" - "time" "github.com/influxdata/telegraf/internal/config" @@ -110,75 +109,3 @@ func TestAgent_LoadOutput(t *testing.T) { a, _ = NewAgent(c) assert.Equal(t, 3, len(a.Config.Outputs)) } - -func TestAgent_ZeroJitter(t *testing.T) { - flushinterval := jitterInterval(time.Duration(10*time.Second), - time.Duration(0*time.Second)) - - actual := flushinterval.Nanoseconds() - exp := time.Duration(10 * time.Second).Nanoseconds() - - if actual != exp { - t.Errorf("Actual %v, expected %v", actual, exp) - } -} - -func TestAgent_ZeroInterval(t *testing.T) { - min := time.Duration(500 * time.Millisecond).Nanoseconds() - max := time.Duration(5 * time.Second).Nanoseconds() - - for i := 0; i < 1000; i++ { - flushinterval := jitterInterval(time.Duration(0*time.Second), - time.Duration(5*time.Second)) - actual := flushinterval.Nanoseconds() - - if actual > max { - t.Errorf("Didn't expect interval %d to be > %d", actual, max) - break - } - if actual < min { - t.Errorf("Didn't expect interval %d to be < %d", actual, min) - break - } - } -} - -func TestAgent_ZeroBoth(t *testing.T) { - flushinterval := jitterInterval(time.Duration(0*time.Second), - time.Duration(0*time.Second)) - - actual := flushinterval - exp := time.Duration(500 * time.Millisecond) - - if actual != exp { - t.Errorf("Actual %v, expected %v", actual, exp) - } -} - -func TestAgent_JitterMax(t *testing.T) { - max := time.Duration(32 * time.Second).Nanoseconds() - - for i := 0; i < 1000; i++ { - flushinterval := jitterInterval(time.Duration(30*time.Second), - time.Duration(2*time.Second)) - actual := flushinterval.Nanoseconds() - if actual > max { - t.Errorf("Didn't expect interval %d to be > %d", actual, max) - break - } - } -} - -func TestAgent_JitterMin(t *testing.T) { - min := time.Duration(30 * time.Second).Nanoseconds() - - for i := 0; i < 1000; i++ { - flushinterval := jitterInterval(time.Duration(30*time.Second), - time.Duration(2*time.Second)) - actual := flushinterval.Nanoseconds() - if actual < min { - t.Errorf("Didn't expect interval %d to be < %d", actual, min) - break - } - } -} diff --git a/internal/config/config.go b/internal/config/config.go index daaaa10fc..545cec84d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -58,7 +58,6 @@ func NewConfig() *Config { Interval: internal.Duration{Duration: 10 * time.Second}, RoundInterval: true, FlushInterval: internal.Duration{Duration: 10 * time.Second}, - FlushJitter: internal.Duration{Duration: 5 * time.Second}, }, Tags: make(map[string]string), diff --git a/internal/internal.go b/internal/internal.go index 33ee40a26..27a24f021 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -10,6 +10,7 @@ import ( "fmt" "io/ioutil" "log" + "math/big" "os" "os/exec" "strconv" @@ -228,3 +229,27 @@ func CompileFilter(filters []string) (glob.Glob, error) { } return out, err } + +// RandomSleep will sleep for a random amount of time up to max. +// If the shutdown channel is closed, it will return before it has finished +// sleeping. +func RandomSleep(max time.Duration, shutdown chan struct{}) { + if max == 0 { + return + } + maxSleep := big.NewInt(max.Nanoseconds()) + + var sleepns int64 + if j, err := rand.Int(rand.Reader, maxSleep); err == nil { + sleepns = j.Int64() + } + + t := time.NewTimer(time.Nanosecond * time.Duration(sleepns)) + select { + case <-t.C: + return + case <-shutdown: + t.Stop() + return + } +} diff --git a/internal/internal_test.go b/internal/internal_test.go index 341fdd370..31bb5ec61 100644 --- a/internal/internal_test.go +++ b/internal/internal_test.go @@ -137,3 +137,28 @@ func TestCompileFilter(t *testing.T) { assert.True(t, f.Match("mem")) assert.True(t, f.Match("network")) } + +func TestRandomSleep(t *testing.T) { + // test that zero max returns immediately + s := time.Now() + RandomSleep(time.Duration(0), make(chan struct{})) + elapsed := time.Since(s) + assert.True(t, elapsed < time.Millisecond) + + // test that max sleep is respected + s = time.Now() + RandomSleep(time.Millisecond*50, make(chan struct{})) + elapsed = time.Since(s) + assert.True(t, elapsed < time.Millisecond*50) + + // test that shutdown is respected + s = time.Now() + shutdown := make(chan struct{}) + go func() { + time.Sleep(time.Millisecond * 100) + close(shutdown) + }() + RandomSleep(time.Second, shutdown) + elapsed = time.Since(s) + assert.True(t, elapsed < time.Millisecond*150) +} From bd1282eddf31c060bc25893cc3e4ae4fbfb2238e Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 31 May 2016 16:25:02 +0100 Subject: [PATCH 12/34] Don't print config with trailing whitespace --- internal/config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/config/config.go b/internal/config/config.go index 545cec84d..fdc9a8753 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -356,7 +356,7 @@ func printConfig(name string, p printer, op string, commented bool) { fmt.Print("\n") continue } - fmt.Print(comment + line + "\n") + fmt.Print(strings.TrimRight(comment+line, " ") + "\n") } } } From 342cfc4087da846584442dd5d9c1e83f2cc5fe7c Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 31 May 2016 16:41:27 +0100 Subject: [PATCH 13/34] ReAdd gelf serializer & graylog output filter. (#1299) This reverts commit 958ef2f87238afc5eb8a2601af738a2f3e7bd5c8. --- plugins/outputs/all/all.go | 1 + plugins/outputs/graylog/README.md | 5 + plugins/outputs/graylog/graylog.go | 247 ++++++++++++++++++++++++ plugins/outputs/graylog/graylog_test.go | 55 ++++++ 4 files changed, 308 insertions(+) create mode 100644 plugins/outputs/graylog/README.md create mode 100644 plugins/outputs/graylog/graylog.go create mode 100644 plugins/outputs/graylog/graylog_test.go diff --git a/plugins/outputs/all/all.go b/plugins/outputs/all/all.go index 5b223529c..27f8958fe 100644 --- a/plugins/outputs/all/all.go +++ b/plugins/outputs/all/all.go @@ -7,6 +7,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/outputs/datadog" _ "github.com/influxdata/telegraf/plugins/outputs/file" _ "github.com/influxdata/telegraf/plugins/outputs/graphite" + _ "github.com/influxdata/telegraf/plugins/outputs/graylog" _ "github.com/influxdata/telegraf/plugins/outputs/influxdb" _ "github.com/influxdata/telegraf/plugins/outputs/instrumental" _ "github.com/influxdata/telegraf/plugins/outputs/kafka" diff --git a/plugins/outputs/graylog/README.md b/plugins/outputs/graylog/README.md new file mode 100644 index 000000000..26b8d8fc6 --- /dev/null +++ b/plugins/outputs/graylog/README.md @@ -0,0 +1,5 @@ +# Graylog Output Plugin + +This plugin writes to a Graylog instance using the "gelf" format. + +It requires a `servers` name. diff --git a/plugins/outputs/graylog/graylog.go b/plugins/outputs/graylog/graylog.go new file mode 100644 index 000000000..7f2480134 --- /dev/null +++ b/plugins/outputs/graylog/graylog.go @@ -0,0 +1,247 @@ +package graylog + +import ( + "bytes" + "compress/zlib" + "crypto/rand" + "encoding/binary" + ejson "encoding/json" + "fmt" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/outputs" + "io" + "math" + "net" + "os" +) + +const ( + defaultGraylogEndpoint = "127.0.0.1:12201" + defaultConnection = "wan" + defaultMaxChunkSizeWan = 1420 + defaultMaxChunkSizeLan = 8154 +) + +type GelfConfig struct { + GraylogEndpoint string + Connection string + MaxChunkSizeWan int + MaxChunkSizeLan int +} + +type Gelf struct { + GelfConfig +} + +func NewGelfWriter(config GelfConfig) *Gelf { + if config.GraylogEndpoint == "" { + config.GraylogEndpoint = defaultGraylogEndpoint + } + + if config.Connection == "" { + config.Connection = defaultConnection + } + + if config.MaxChunkSizeWan == 0 { + config.MaxChunkSizeWan = defaultMaxChunkSizeWan + } + + if config.MaxChunkSizeLan == 0 { + config.MaxChunkSizeLan = defaultMaxChunkSizeLan + } + + g := &Gelf{GelfConfig: config} + + return g +} + +func (g *Gelf) Write(message []byte) (n int, err error) { + compressed := g.compress(message) + + chunksize := g.GelfConfig.MaxChunkSizeWan + length := compressed.Len() + + if length > chunksize { + + chunkCountInt := int(math.Ceil(float64(length) / float64(chunksize))) + + id := make([]byte, 8) + rand.Read(id) + + for i, index := 0, 0; i < length; i, index = i+chunksize, index+1 { + packet := g.createChunkedMessage(index, chunkCountInt, id, &compressed) + _, err = g.send(packet.Bytes()) + if err != nil { + return 0, err + } + } + } else { + _, err = g.send(compressed.Bytes()) + if err != nil { + return 0, err + } + } + + n = len(message) + + return +} + +func (g *Gelf) createChunkedMessage(index int, chunkCountInt int, id []byte, compressed *bytes.Buffer) bytes.Buffer { + var packet bytes.Buffer + + chunksize := g.getChunksize() + + packet.Write(g.intToBytes(30)) + packet.Write(g.intToBytes(15)) + packet.Write(id) + + packet.Write(g.intToBytes(index)) + packet.Write(g.intToBytes(chunkCountInt)) + + packet.Write(compressed.Next(chunksize)) + + return packet +} + +func (g *Gelf) getChunksize() int { + if g.GelfConfig.Connection == "wan" { + return g.GelfConfig.MaxChunkSizeWan + } + + if g.GelfConfig.Connection == "lan" { + return g.GelfConfig.MaxChunkSizeLan + } + + return g.GelfConfig.MaxChunkSizeWan +} + +func (g *Gelf) intToBytes(i int) []byte { + buf := new(bytes.Buffer) + + binary.Write(buf, binary.LittleEndian, int8(i)) + return buf.Bytes() +} + +func (g *Gelf) compress(b []byte) bytes.Buffer { + var buf bytes.Buffer + comp := zlib.NewWriter(&buf) + + comp.Write(b) + comp.Close() + + return buf +} + +func (g *Gelf) send(b []byte) (n int, err error) { + udpAddr, err := net.ResolveUDPAddr("udp", g.GelfConfig.GraylogEndpoint) + if err != nil { + return + } + + conn, err := net.DialUDP("udp", nil, udpAddr) + if err != nil { + return + } + + n, err = conn.Write(b) + return +} + +type Graylog struct { + Servers []string + writer io.Writer +} + +var sampleConfig = ` + ## Udp endpoint for your graylog instance. + servers = ["127.0.0.1:12201", "192.168.1.1:12201"] +` + +func (g *Graylog) Connect() error { + writers := []io.Writer{} + + if len(g.Servers) == 0 { + g.Servers = append(g.Servers, "localhost:12201") + } + + for _, server := range g.Servers { + w := NewGelfWriter(GelfConfig{GraylogEndpoint: server}) + writers = append(writers, w) + } + + g.writer = io.MultiWriter(writers...) + return nil +} + +func (g *Graylog) Close() error { + return nil +} + +func (g *Graylog) SampleConfig() string { + return sampleConfig +} + +func (g *Graylog) Description() string { + return "Send telegraf metrics to graylog(s)" +} + +func (g *Graylog) Write(metrics []telegraf.Metric) error { + if len(metrics) == 0 { + return nil + } + + for _, metric := range metrics { + values, err := serialize(metric) + if err != nil { + return err + } + + for _, value := range values { + _, err := g.writer.Write([]byte(value)) + if err != nil { + return fmt.Errorf("FAILED to write message: %s, %s", value, err) + } + } + } + return nil +} + +func serialize(metric telegraf.Metric) ([]string, error) { + out := []string{} + + m := make(map[string]interface{}) + m["version"] = "1.1" + m["timestamp"] = metric.UnixNano() / 1000000000 + m["short_message"] = " " + m["name"] = metric.Name() + + if host, ok := metric.Tags()["host"]; ok { + m["host"] = host + } else { + host, err := os.Hostname() + if err != nil { + return []string{}, err + } + m["host"] = host + } + + for key, value := range metric.Fields() { + nkey := fmt.Sprintf("_%s", key) + m[nkey] = value + } + + serialized, err := ejson.Marshal(m) + if err != nil { + return []string{}, err + } + out = append(out, string(serialized)) + + return out, nil +} + +func init() { + outputs.Add("graylog", func() telegraf.Output { + return &Graylog{} + }) +} diff --git a/plugins/outputs/graylog/graylog_test.go b/plugins/outputs/graylog/graylog_test.go new file mode 100644 index 000000000..521f83dc1 --- /dev/null +++ b/plugins/outputs/graylog/graylog_test.go @@ -0,0 +1,55 @@ +package graylog + +import ( + "bytes" + "compress/zlib" + "encoding/json" + "io" + "net" + "sync" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +func TestWrite(t *testing.T) { + var wg sync.WaitGroup + wg.Add(1) + go UDPServer(t, &wg) + + i := Graylog{ + Servers: []string{"127.0.0.1:12201"}, + } + i.Connect() + + metrics := testutil.MockMetrics() + metrics = append(metrics, testutil.TestMetric(int64(1234567890))) + + i.Write(metrics) + + wg.Wait() + i.Close() +} + +type GelfObject map[string]interface{} + +func UDPServer(t *testing.T, wg *sync.WaitGroup) { + serverAddr, _ := net.ResolveUDPAddr("udp", "127.0.0.1:12201") + udpServer, _ := net.ListenUDP("udp", serverAddr) + defer wg.Done() + + bufR := make([]byte, 1024) + n, _, _ := udpServer.ReadFromUDP(bufR) + + b := bytes.NewReader(bufR[0:n]) + r, _ := zlib.NewReader(b) + + bufW := bytes.NewBuffer(nil) + io.Copy(bufW, r) + r.Close() + + var obj GelfObject + json.Unmarshal(bufW.Bytes(), &obj) + assert.Equal(t, obj["_value"], float64(1)) +} From c4cfdb8a25cc99f444f055cb9c68b7fe1310e116 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 31 May 2016 16:42:07 +0100 Subject: [PATCH 14/34] Revert "Revert graylog output" This reverts commit 4f27315720b443671937a8dbd16cf4feb0a5c388. --- README.md | 1 + etc/telegraf.conf | 6 ++++++ plugins/outputs/graylog/README.md | 9 +++++++++ plugins/outputs/graylog/graylog_test.go | 1 - 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7366d5986..7890fa113 100644 --- a/README.md +++ b/README.md @@ -233,6 +233,7 @@ want to add support for another service or third-party API. * [datadog](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/datadog) * [file](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/file) * [graphite](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/graphite) +* [graylog](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/graylog) * [instrumental](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/instrumental) * [kafka](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kafka) * [librato](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/librato) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index d8fe3b865..e1ae2cdd7 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -205,6 +205,12 @@ # timeout = 2 +# # Send telegraf metrics to graylog(s) +# [[outputs.graylog]] +# ## Udp endpoint for your graylog instance. +# servers = ["127.0.0.1:12201", "192.168.1.1:12201"] + + # # Configuration for sending metrics to an Instrumental project # [[outputs.instrumental]] # ## Project API Token (required) diff --git a/plugins/outputs/graylog/README.md b/plugins/outputs/graylog/README.md index 26b8d8fc6..833482047 100644 --- a/plugins/outputs/graylog/README.md +++ b/plugins/outputs/graylog/README.md @@ -3,3 +3,12 @@ This plugin writes to a Graylog instance using the "gelf" format. It requires a `servers` name. + +### Configuration: + +```toml +# Send telegraf metrics to graylog(s) +[[outputs.graylog]] + ## Udp endpoint for your graylog instance. + servers = ["127.0.0.1:12201", "192.168.1.1:12201"] +``` diff --git a/plugins/outputs/graylog/graylog_test.go b/plugins/outputs/graylog/graylog_test.go index 521f83dc1..fd8a418f4 100644 --- a/plugins/outputs/graylog/graylog_test.go +++ b/plugins/outputs/graylog/graylog_test.go @@ -24,7 +24,6 @@ func TestWrite(t *testing.T) { i.Connect() metrics := testutil.MockMetrics() - metrics = append(metrics, testutil.TestMetric(int64(1234567890))) i.Write(metrics) From 0aff7a0bc15670cfa5d71e5ce8cb50a61057a6d2 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 31 May 2016 16:58:41 +0100 Subject: [PATCH 15/34] Disk plugin: return immediately if usage fails closes #1297 --- CHANGELOG.md | 1 + plugins/inputs/system/ps.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71d70e976..cc8f5a349 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ time before a new metric is included by the plugin. - [#1258](https://github.com/influxdata/telegraf/pull/1258): Fix potential kernel plugin integer parse error. - [#1268](https://github.com/influxdata/telegraf/pull/1268): Fix potential influxdb input type assertion panic. - [#1283](https://github.com/influxdata/telegraf/pull/1283): Still send processes metrics if a process exited during metric collection. +- [#1297](https://github.com/influxdata/telegraf/issues/1297): disk plugin panic when usage grab fails. ## v0.13.1 [2016-05-24] diff --git a/plugins/inputs/system/ps.go b/plugins/inputs/system/ps.go index 23bfe1628..3ed123d15 100644 --- a/plugins/inputs/system/ps.go +++ b/plugins/inputs/system/ps.go @@ -84,10 +84,10 @@ func (s *systemPS) DiskUsage( mountpoint := os.Getenv("HOST_MOUNT_PREFIX") + p.Mountpoint if _, err := os.Stat(mountpoint); err == nil { du, err := disk.Usage(mountpoint) - du.Path = p.Mountpoint if err != nil { return nil, err } + du.Path = p.Mountpoint // If the mount point is a member of the exclude set, // don't gather info on it. _, ok := fstypeExcludeSet[p.Fstype] From 9f7a758bf94db6c4a1e80e29034698371885a943 Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 31 May 2016 18:49:56 +0200 Subject: [PATCH 16/34] RFR: Initial support for ZFS on FreeBSD (#1224) * WIP: Initial support for ZFS on FreeBSD * Added build directives * Ignore 'kstatPath' config option on FreeBSD * Added tests for ZFS FreeBSD input plugin. * Updated the README to confrom with the guidelines and added FreeBSD info * Fixed indents * Spell check --- plugins/inputs/zfs/README.md | 409 ++++++++++-------- plugins/inputs/zfs/zfs.go | 141 +----- plugins/inputs/zfs/zfs_freebsd.go | 140 ++++++ plugins/inputs/zfs/zfs_freebsd_test.go | 148 +++++++ plugins/inputs/zfs/zfs_linux.go | 131 ++++++ .../zfs/{zfs_test.go => zfs_linux_test.go} | 2 + 6 files changed, 668 insertions(+), 303 deletions(-) create mode 100644 plugins/inputs/zfs/zfs_freebsd.go create mode 100644 plugins/inputs/zfs/zfs_freebsd_test.go create mode 100644 plugins/inputs/zfs/zfs_linux.go rename plugins/inputs/zfs/{zfs_test.go => zfs_linux_test.go} (99%) diff --git a/plugins/inputs/zfs/README.md b/plugins/inputs/zfs/README.md index 72510d45b..c6abc0600 100644 --- a/plugins/inputs/zfs/README.md +++ b/plugins/inputs/zfs/README.md @@ -1,227 +1,294 @@ -# Telegraf plugin: zfs +# ZFS plugin -Get ZFS stat from /proc/spl/kstat/zfs +This ZFS plugin provides metrics from your ZFS filesystems. It supports ZFS on +Linux and FreeBSD. It gets ZFS stat from `/proc/spl/kstat/zfs` on Linux and +from `sysctl` and `zpool` on FreeBSD. -# Measurements +### Configuration: -Meta: +```toml +[[inputs.zfs]] + ## ZFS kstat path. Ignored on FreeBSD + ## If not specified, then default is: + # kstatPath = "/proc/spl/kstat/zfs" -- tags: `pools=POOL1::POOL2` + ## By default, telegraf gather all zfs stats + ## If not specified, then default is: + # kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] -Measurement names: + ## By default, don't gather zpool stats + # poolMetrics = false +``` -- arcstats_hits -- arcstats_misses +### Measurements & Fields: + +By default this plugin collects metrics about **Arc**, **Zfetch**, and +**Vdev cache**. All these metrics are either counters or measure sizes +in bytes. These metrics will be in the `zfs` measurement with the field +names listed bellow. + +If `poolMetrics` is enabled then additional metrics will be gathered for +each pool. + +- zfs + With fields listed bellow. + +#### Arc Stats + +- arcstats_allocated (FreeBSD only) +- arcstats_anon_evict_data (Linux only) +- arcstats_anon_evict_metadata (Linux only) +- arcstats_anon_evictable_data (FreeBSD only) +- arcstats_anon_evictable_metadata (FreeBSD only) +- arcstats_anon_size +- arcstats_arc_loaned_bytes (Linux only) +- arcstats_arc_meta_limit +- arcstats_arc_meta_max +- arcstats_arc_meta_min (FreeBSD only) +- arcstats_arc_meta_used +- arcstats_arc_no_grow (Linux only) +- arcstats_arc_prune (Linux only) +- arcstats_arc_tempreserve (Linux only) +- arcstats_c +- arcstats_c_max +- arcstats_c_min +- arcstats_data_size +- arcstats_deleted - arcstats_demand_data_hits - arcstats_demand_data_misses +- arcstats_demand_hit_predictive_prefetch (FreeBSD only) - arcstats_demand_metadata_hits - arcstats_demand_metadata_misses +- arcstats_duplicate_buffers +- arcstats_duplicate_buffers_size +- arcstats_duplicate_reads +- arcstats_evict_l2_cached +- arcstats_evict_l2_eligible +- arcstats_evict_l2_ineligible +- arcstats_evict_l2_skip (FreeBSD only) +- arcstats_evict_not_enough (FreeBSD only) +- arcstats_evict_skip +- arcstats_hash_chain_max +- arcstats_hash_chains +- arcstats_hash_collisions +- arcstats_hash_elements +- arcstats_hash_elements_max +- arcstats_hdr_size +- arcstats_hits +- arcstats_l2_abort_lowmem +- arcstats_l2_asize +- arcstats_l2_cdata_free_on_write +- arcstats_l2_cksum_bad +- arcstats_l2_compress_failures +- arcstats_l2_compress_successes +- arcstats_l2_compress_zeros +- arcstats_l2_evict_l1cached (FreeBSD only) +- arcstats_l2_evict_lock_retry +- arcstats_l2_evict_reading +- arcstats_l2_feeds +- arcstats_l2_free_on_write +- arcstats_l2_hdr_size +- arcstats_l2_hits +- arcstats_l2_io_error +- arcstats_l2_misses +- arcstats_l2_read_bytes +- arcstats_l2_rw_clash +- arcstats_l2_size +- arcstats_l2_write_buffer_bytes_scanned (FreeBSD only) +- arcstats_l2_write_buffer_iter (FreeBSD only) +- arcstats_l2_write_buffer_list_iter (FreeBSD only) +- arcstats_l2_write_buffer_list_null_iter (FreeBSD only) +- arcstats_l2_write_bytes +- arcstats_l2_write_full (FreeBSD only) +- arcstats_l2_write_in_l2 (FreeBSD only) +- arcstats_l2_write_io_in_progress (FreeBSD only) +- arcstats_l2_write_not_cacheable (FreeBSD only) +- arcstats_l2_write_passed_headroom (FreeBSD only) +- arcstats_l2_write_pios (FreeBSD only) +- arcstats_l2_write_spa_mismatch (FreeBSD only) +- arcstats_l2_write_trylock_fail (FreeBSD only) +- arcstats_l2_writes_done +- arcstats_l2_writes_error +- arcstats_l2_writes_hdr_miss (Linux only) +- arcstats_l2_writes_lock_retry (FreeBSD only) +- arcstats_l2_writes_sent +- arcstats_memory_direct_count (Linux only) +- arcstats_memory_indirect_count (Linux only) +- arcstats_memory_throttle_count +- arcstats_meta_size (Linux only) +- arcstats_mfu_evict_data (Linux only) +- arcstats_mfu_evict_metadata (Linux only) +- arcstats_mfu_ghost_evict_data (Linux only) +- arcstats_mfu_ghost_evict_metadata (Linux only) +- arcstats_metadata_size (FreeBSD only) +- arcstats_mfu_evictable_data (FreeBSD only) +- arcstats_mfu_evictable_metadata (FreeBSD only) +- arcstats_mfu_ghost_evictable_data (FreeBSD only) +- arcstats_mfu_ghost_evictable_metadata (FreeBSD only) +- arcstats_mfu_ghost_hits +- arcstats_mfu_ghost_size +- arcstats_mfu_hits +- arcstats_mfu_size +- arcstats_misses +- arcstats_mru_evict_data (Linux only) +- arcstats_mru_evict_metadata (Linux only) +- arcstats_mru_ghost_evict_data (Linux only) +- arcstats_mru_ghost_evict_metadata (Linux only) +- arcstats_mru_evictable_data (FreeBSD only) +- arcstats_mru_evictable_metadata (FreeBSD only) +- arcstats_mru_ghost_evictable_data (FreeBSD only) +- arcstats_mru_ghost_evictable_metadata (FreeBSD only) +- arcstats_mru_ghost_hits +- arcstats_mru_ghost_size +- arcstats_mru_hits +- arcstats_mru_size +- arcstats_mutex_miss +- arcstats_other_size +- arcstats_p - arcstats_prefetch_data_hits - arcstats_prefetch_data_misses - arcstats_prefetch_metadata_hits - arcstats_prefetch_metadata_misses -- arcstats_mru_hits -- arcstats_mru_ghost_hits -- arcstats_mfu_hits -- arcstats_mfu_ghost_hits -- arcstats_deleted -- arcstats_recycle_miss -- arcstats_mutex_miss -- arcstats_evict_skip -- arcstats_evict_l2_cached -- arcstats_evict_l2_eligible -- arcstats_evict_l2_ineligible -- arcstats_hash_elements -- arcstats_hash_elements_max -- arcstats_hash_collisions -- arcstats_hash_chains -- arcstats_hash_chain_max -- arcstats_p -- arcstats_c -- arcstats_c_min -- arcstats_c_max +- arcstats_recycle_miss (Linux only) - arcstats_size -- arcstats_hdr_size -- arcstats_data_size -- arcstats_meta_size -- arcstats_other_size -- arcstats_anon_size -- arcstats_anon_evict_data -- arcstats_anon_evict_metadata -- arcstats_mru_size -- arcstats_mru_evict_data -- arcstats_mru_evict_metadata -- arcstats_mru_ghost_size -- arcstats_mru_ghost_evict_data -- arcstats_mru_ghost_evict_metadata -- arcstats_mfu_size -- arcstats_mfu_evict_data -- arcstats_mfu_evict_metadata -- arcstats_mfu_ghost_size -- arcstats_mfu_ghost_evict_data -- arcstats_mfu_ghost_evict_metadata -- arcstats_l2_hits -- arcstats_l2_misses -- arcstats_l2_feeds -- arcstats_l2_rw_clash -- arcstats_l2_read_bytes -- arcstats_l2_write_bytes -- arcstats_l2_writes_sent -- arcstats_l2_writes_done -- arcstats_l2_writes_error -- arcstats_l2_writes_hdr_miss -- arcstats_l2_evict_lock_retry -- arcstats_l2_evict_reading -- arcstats_l2_free_on_write -- arcstats_l2_cdata_free_on_write -- arcstats_l2_abort_lowmem -- arcstats_l2_cksum_bad -- arcstats_l2_io_error -- arcstats_l2_size -- arcstats_l2_asize -- arcstats_l2_hdr_size -- arcstats_l2_compress_successes -- arcstats_l2_compress_zeros -- arcstats_l2_compress_failures -- arcstats_memory_throttle_count -- arcstats_duplicate_buffers -- arcstats_duplicate_buffers_size -- arcstats_duplicate_reads -- arcstats_memory_direct_count -- arcstats_memory_indirect_count -- arcstats_arc_no_grow -- arcstats_arc_tempreserve -- arcstats_arc_loaned_bytes -- arcstats_arc_prune -- arcstats_arc_meta_used -- arcstats_arc_meta_limit -- arcstats_arc_meta_max +- arcstats_sync_wait_for_async (FreeBSD only) + +#### Zfetch Stats + +- zfetchstats_bogus_streams (Linux only) +- zfetchstats_colinear_hits (Linux only) +- zfetchstats_colinear_misses (Linux only) - zfetchstats_hits +- zfetchstats_max_streams (FreeBSD only) - zfetchstats_misses -- zfetchstats_colinear_hits -- zfetchstats_colinear_misses -- zfetchstats_stride_hits -- zfetchstats_stride_misses -- zfetchstats_reclaim_successes -- zfetchstats_reclaim_failures -- zfetchstats_streams_resets -- zfetchstats_streams_noresets -- zfetchstats_bogus_streams +- zfetchstats_reclaim_failures (Linux only) +- zfetchstats_reclaim_successes (Linux only) +- zfetchstats_streams_noresets (Linux only) +- zfetchstats_streams_resets (Linux only) +- zfetchstats_stride_hits (Linux only) +- zfetchstats_stride_misses (Linux only) + +#### Vdev Cache Stats + - vdev_cache_stats_delegations - vdev_cache_stats_hits - vdev_cache_stats_misses +#### Pool Metrics (optional) + +On Linux: + +- zfs_pool + - nread (integer, ) + - nwritten (integer, ) + - reads (integer, ) + - writes (integer, ) + - wtime (integer, ) + - wlentime (integer, ) + - wupdate (integer, ) + - rtime (integer, ) + - rlentime (integer, ) + - rupdate (integer, ) + - wcnt (integer, ) + - rcnt (integer, ) + +On FreeBSD: + +- zfs_pool + - allocated (integer, bytes) + - capacity (integer, bytes) + - dedupratio (float, ratio) + - free (integer, bytes) + - size (integer, bytes) + - fragmentation (integer, percent) + +### Tags: + +- ZFS stats (`zfs`) will have the following tag: + - pools - A `::` concatenated list of all ZFS pools on the machine. + +- Pool metrics (`zfs_pool`) will have the following tag: + - pool - with the name of the pool which the metrics are for. + - health - the health status of the pool. (FreeBSD only) + +### Example Output: + +``` +$ ./telegraf -config telegraf.conf -input-filter zfs -test +* Plugin: zfs, Collection 1 +> zfs_pool,health=ONLINE,pool=zroot allocated=1578590208i,capacity=2i,dedupratio=1,fragmentation=1i,free=64456531968i,size=66035122176i 1464473103625653908 +> zfs,pools=zroot arcstats_allocated=4167764i,arcstats_anon_evictable_data=0i,arcstats_anon_evictable_metadata=0i,arcstats_anon_size=16896i,arcstats_arc_meta_limit=10485760i,arcstats_arc_meta_max=115269568i,arcstats_arc_meta_min=8388608i,arcstats_arc_meta_used=51977456i,arcstats_c=16777216i,arcstats_c_max=41943040i,arcstats_c_min=16777216i,arcstats_data_size=0i,arcstats_deleted=1699340i,arcstats_demand_data_hits=14836131i,arcstats_demand_data_misses=2842945i,arcstats_demand_hit_predictive_prefetch=0i,arcstats_demand_metadata_hits=1655006i,arcstats_demand_metadata_misses=830074i,arcstats_duplicate_buffers=0i,arcstats_duplicate_buffers_size=0i,arcstats_duplicate_reads=123i,arcstats_evict_l2_cached=0i,arcstats_evict_l2_eligible=332172623872i,arcstats_evict_l2_ineligible=6168576i,arcstats_evict_l2_skip=0i,arcstats_evict_not_enough=12189444i,arcstats_evict_skip=195190764i,arcstats_hash_chain_max=2i,arcstats_hash_chains=10i,arcstats_hash_collisions=43134i,arcstats_hash_elements=2268i,arcstats_hash_elements_max=6136i,arcstats_hdr_size=565632i,arcstats_hits=16515778i,arcstats_l2_abort_lowmem=0i,arcstats_l2_asize=0i,arcstats_l2_cdata_free_on_write=0i,arcstats_l2_cksum_bad=0i,arcstats_l2_compress_failures=0i,arcstats_l2_compress_successes=0i,arcstats_l2_compress_zeros=0i,arcstats_l2_evict_l1cached=0i,arcstats_l2_evict_lock_retry=0i,arcstats_l2_evict_reading=0i,arcstats_l2_feeds=0i,arcstats_l2_free_on_write=0i,arcstats_l2_hdr_size=0i,arcstats_l2_hits=0i,arcstats_l2_io_error=0i,arcstats_l2_misses=0i,arcstats_l2_read_bytes=0i,arcstats_l2_rw_clash=0i,arcstats_l2_size=0i,arcstats_l2_write_buffer_bytes_scanned=0i,arcstats_l2_write_buffer_iter=0i,arcstats_l2_write_buffer_list_iter=0i,arcstats_l2_write_buffer_list_null_iter=0i,arcstats_l2_write_bytes=0i,arcstats_l2_write_full=0i,arcstats_l2_write_in_l2=0i,arcstats_l2_write_io_in_progress=0i,arcstats_l2_write_not_cacheable=380i,arcstats_l2_write_passed_headroom=0i,arcstats_l2_write_pios=0i,arcstats_l2_write_spa_mismatch=0i,arcstats_l2_write_trylock_fail=0i,arcstats_l2_writes_done=0i,arcstats_l2_writes_error=0i,arcstats_l2_writes_lock_retry=0i,arcstats_l2_writes_sent=0i,arcstats_memory_throttle_count=0i,arcstats_metadata_size=17014784i,arcstats_mfu_evictable_data=0i,arcstats_mfu_evictable_metadata=16384i,arcstats_mfu_ghost_evictable_data=5723648i,arcstats_mfu_ghost_evictable_metadata=10709504i,arcstats_mfu_ghost_hits=1315619i,arcstats_mfu_ghost_size=16433152i,arcstats_mfu_hits=7646611i,arcstats_mfu_size=305152i,arcstats_misses=3676993i,arcstats_mru_evictable_data=0i,arcstats_mru_evictable_metadata=0i,arcstats_mru_ghost_evictable_data=0i,arcstats_mru_ghost_evictable_metadata=80896i,arcstats_mru_ghost_hits=324250i,arcstats_mru_ghost_size=80896i,arcstats_mru_hits=8844526i,arcstats_mru_size=16693248i,arcstats_mutex_miss=354023i,arcstats_other_size=34397040i,arcstats_p=4172800i,arcstats_prefetch_data_hits=0i,arcstats_prefetch_data_misses=0i,arcstats_prefetch_metadata_hits=24641i,arcstats_prefetch_metadata_misses=3974i,arcstats_size=51977456i,arcstats_sync_wait_for_async=0i,vdev_cache_stats_delegations=779i,vdev_cache_stats_hits=323123i,vdev_cache_stats_misses=59929i,zfetchstats_hits=0i,zfetchstats_max_streams=0i,zfetchstats_misses=0i 1464473103634124908 +``` + ### Description -``` -arcstats_hits - Total amount of cache hits in the arc. +A short description for some of the metrics. -arcstats_misses - Total amount of cache misses in the arc. +#### Arc Stats -arcstats_demand_data_hits - Amount of cache hits for demand data, this is what matters (is good) for your application/share. +`arcstats_hits` Total amount of cache hits in the arc. -arcstats_demand_data_misses - Amount of cache misses for demand data, this is what matters (is bad) for your application/share. +`arcstats_misses` Total amount of cache misses in the arc. -arcstats_demand_metadata_hits - Ammount of cache hits for demand metadata, this matters (is good) for getting filesystem data (ls,find,…) +`arcstats_demand_data_hits` Amount of cache hits for demand data, this is what matters (is good) for your application/share. -arcstats_demand_metadata_misses - Ammount of cache misses for demand metadata, this matters (is bad) for getting filesystem data (ls,find,…) +`arcstats_demand_data_misses` Amount of cache misses for demand data, this is what matters (is bad) for your application/share. -arcstats_prefetch_data_hits - The zfs prefetcher tried to prefetch somethin, but it was allready cached (boring) +`arcstats_demand_metadata_hits` Amount of cache hits for demand metadata, this matters (is good) for getting filesystem data (ls,find,…) -arcstats_prefetch_data_misses - The zfs prefetcher prefetched something which was not in the cache (good job, could become a demand hit in the future) +`arcstats_demand_metadata_misses` Amount of cache misses for demand metadata, this matters (is bad) for getting filesystem data (ls,find,…) -arcstats_prefetch_metadata_hits - Same as above, but for metadata +`arcstats_prefetch_data_hits` The zfs prefetcher tried to prefetch something, but it was already cached (boring) -arcstats_prefetch_metadata_misses - Same as above, but for metadata +`arcstats_prefetch_data_misses` The zfs prefetcher prefetched something which was not in the cache (good job, could become a demand hit in the future) -arcstats_mru_hits - Cache hit in the “most recently used cache”, we move this to the mfu cache. +`arcstats_prefetch_metadata_hits` Same as above, but for metadata -arcstats_mru_ghost_hits - Cache hit in the “most recently used ghost list” we had this item in the cache, but evicted it, maybe we should increase the mru cache size. +`arcstats_prefetch_metadata_misses` Same as above, but for metadata -arcstats_mfu_hits - Cache hit in the “most freqently used cache” we move this to the begining of the mfu cache. +`arcstats_mru_hits` Cache hit in the “most recently used cache”, we move this to the mfu cache. -arcstats_mfu_ghost_hits - Cache hit in the “most frequently used ghost list” we had this item in the cache, but evicted it, maybe we should increase the mfu cache size. +`arcstats_mru_ghost_hits` Cache hit in the “most recently used ghost list” we had this item in the cache, but evicted it, maybe we should increase the mru cache size. -arcstats_allocated - New data is written to the cache. +`arcstats_mfu_hits` Cache hit in the “most frequently used cache” we move this to the beginning of the mfu cache. -arcstats_deleted - Old data is evicted (deleted) from the cache. +`arcstats_mfu_ghost_hits` Cache hit in the “most frequently used ghost list” we had this item in the cache, but evicted it, maybe we should increase the mfu cache size. -arcstats_evict_l2_cached - We evicted something from the arc, but its still cached in the l2 if we need it. +`arcstats_allocated` New data is written to the cache. -arcstats_evict_l2_eligible - We evicted something from the arc, and it’s not in the l2 this is sad. (maybe we hadn’t had enough time to store it there) +`arcstats_deleted` Old data is evicted (deleted) from the cache. -arcstats_evict_l2_ineligible - We evicted something which cannot be stored in the l2. - Reasons could be: - We have multiple pools, we evicted something from a pool whithot an l2 device. - The zfs property secondarycache. +`arcstats_evict_l2_cached` We evicted something from the arc, but its still cached in the l2 if we need it. -arcstats_c - Arc target size, this is the size the system thinks the arc should have. +`arcstats_evict_l2_eligible` We evicted something from the arc, and it’s not in the l2 this is sad. (maybe we hadn’t had enough time to store it there) -arcstats_size - Total size of the arc. +`arcstats_evict_l2_ineligible` We evicted something which cannot be stored in the l2. + Reasons could be: + - We have multiple pools, we evicted something from a pool whithout an l2 device. + - The zfs property secondary cache. -arcstats_l2_hits - Hits to the L2 cache. (It was not in the arc, but in the l2 cache) +`arcstats_c` Arc target size, this is the size the system thinks the arc should have. -arcstats_l2_misses - Miss to the L2 cache. (It was not in the arc, and not in the l2 cache) +`arcstats_size` Total size of the arc. -arcstats_l2_size - Size of the l2 cache. +`arcstats_l2_hits` Hits to the L2 cache. (It was not in the arc, but in the l2 cache) -arcstats_l2_hdr_size - Size of the metadata in the arc (ram) used to manage (lookup if someting is in the l2) the l2 cache. +`arcstats_l2_misses` Miss to the L2 cache. (It was not in the arc, and not in the l2 cache) +`arcstats_l2_size` Size of the l2 cache. +`arcstats_l2_hdr_size` Size of the metadata in the arc (ram) used to manage (lookup if something is in the l2) the l2 cache. -zfetchstats_hits - Counts the number of cache hits, to items wich are in the cache because of the prefetcher. +#### Zfetch Stats -zfetchstats_colinear_hits - Counts the number of cache hits, to items wich are in the cache because of the prefetcher (prefetched linear reads) +`zfetchstats_hits` Counts the number of cache hits, to items which are in the cache because of the prefetcher. -zfetchstats_stride_hits - Counts the number of cache hits, to items wich are in the cache because of the prefetcher (prefetched stride reads) +`zfetchstats_colinear_hits` Counts the number of cache hits, to items which are in the cache because of the prefetcher (prefetched linear reads) +`zfetchstats_stride_hits` Counts the number of cache hits, to items which are in the cache because of the prefetcher (prefetched stride reads) +#### Vdev Cache Stats -vdev_cache_stats_hits - Hits to the vdev (device level) cache. - -vdev_cache_stats_misses - Misses to the vdev (device level) cache. -``` - -# Default config - -``` -[zfs] - # ZFS kstat path - # If not specified, then default is: - # kstatPath = "/proc/spl/kstat/zfs" - # - # By default, telegraf gather all zfs stats - # If not specified, then default is: - # kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] -``` +`vdev_cache_stats_hits` Hits to the vdev (device level) cache. +`vdev_cache_stats_misses` Misses to the vdev (device level) cache. diff --git a/plugins/inputs/zfs/zfs.go b/plugins/inputs/zfs/zfs.go index bcbe03e95..05ca346b0 100644 --- a/plugins/inputs/zfs/zfs.go +++ b/plugins/inputs/zfs/zfs.go @@ -1,38 +1,27 @@ package zfs -import ( - "fmt" - "path/filepath" - "strconv" - "strings" - - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/internal" - "github.com/influxdata/telegraf/plugins/inputs" -) +type Sysctl func(metric string) ([]string, error) +type Zpool func() ([]string, error) type Zfs struct { KstatPath string KstatMetrics []string PoolMetrics bool -} - -type poolInfo struct { - name string - ioFilename string + sysctl Sysctl + zpool Zpool } var sampleConfig = ` - ## ZFS kstat path + ## ZFS kstat path. Ignored on FreeBSD ## If not specified, then default is: - kstatPath = "/proc/spl/kstat/zfs" + # kstatPath = "/proc/spl/kstat/zfs" ## By default, telegraf gather all zfs stats ## If not specified, then default is: - kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] + # kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] ## By default, don't gather zpool stats - poolMetrics = false + # poolMetrics = false ` func (z *Zfs) SampleConfig() string { @@ -40,117 +29,5 @@ func (z *Zfs) SampleConfig() string { } func (z *Zfs) Description() string { - return "Read metrics of ZFS from arcstats, zfetchstats and vdev_cache_stats" -} - -func getPools(kstatPath string) []poolInfo { - pools := make([]poolInfo, 0) - poolsDirs, _ := filepath.Glob(kstatPath + "/*/io") - - for _, poolDir := range poolsDirs { - poolDirSplit := strings.Split(poolDir, "/") - pool := poolDirSplit[len(poolDirSplit)-2] - pools = append(pools, poolInfo{name: pool, ioFilename: poolDir}) - } - - return pools -} - -func getTags(pools []poolInfo) map[string]string { - var poolNames string - - for _, pool := range pools { - if len(poolNames) != 0 { - poolNames += "::" - } - poolNames += pool.name - } - - return map[string]string{"pools": poolNames} -} - -func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error { - lines, err := internal.ReadLines(pool.ioFilename) - if err != nil { - return err - } - - if len(lines) != 3 { - return err - } - - keys := strings.Fields(lines[1]) - values := strings.Fields(lines[2]) - - keyCount := len(keys) - - if keyCount != len(values) { - return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values) - } - - tag := map[string]string{"pool": pool.name} - fields := make(map[string]interface{}) - for i := 0; i < keyCount; i++ { - value, err := strconv.ParseInt(values[i], 10, 64) - if err != nil { - return err - } - fields[keys[i]] = value - } - acc.AddFields("zfs_pool", fields, tag) - - return nil -} - -func (z *Zfs) Gather(acc telegraf.Accumulator) error { - kstatMetrics := z.KstatMetrics - if len(kstatMetrics) == 0 { - kstatMetrics = []string{"arcstats", "zfetchstats", "vdev_cache_stats"} - } - - kstatPath := z.KstatPath - if len(kstatPath) == 0 { - kstatPath = "/proc/spl/kstat/zfs" - } - - pools := getPools(kstatPath) - tags := getTags(pools) - - if z.PoolMetrics { - for _, pool := range pools { - err := gatherPoolStats(pool, acc) - if err != nil { - return err - } - } - } - - fields := make(map[string]interface{}) - for _, metric := range kstatMetrics { - lines, err := internal.ReadLines(kstatPath + "/" + metric) - if err != nil { - return err - } - for i, line := range lines { - if i == 0 || i == 1 { - continue - } - if len(line) < 1 { - continue - } - rawData := strings.Split(line, " ") - key := metric + "_" + rawData[0] - rawValue := rawData[len(rawData)-1] - value, _ := strconv.ParseInt(rawValue, 10, 64) - fields[key] = value - } - } - acc.AddFields("zfs", fields, tags) - return nil -} - -func init() { - inputs.Add("zfs", func() telegraf.Input { - return &Zfs{} - }) + return "Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, and pools" } diff --git a/plugins/inputs/zfs/zfs_freebsd.go b/plugins/inputs/zfs/zfs_freebsd.go new file mode 100644 index 000000000..7ee72a140 --- /dev/null +++ b/plugins/inputs/zfs/zfs_freebsd.go @@ -0,0 +1,140 @@ +// +build freebsd + +package zfs + +import ( + "bytes" + "fmt" + "os/exec" + "strconv" + "strings" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +func (z *Zfs) gatherPoolStats(acc telegraf.Accumulator) (string, error) { + + lines, err := z.zpool() + if err != nil { + return "", err + } + + pools := []string{} + for _, line := range lines { + col := strings.Split(line, "\t") + + pools = append(pools, col[0]) + } + + if z.PoolMetrics { + for _, line := range lines { + col := strings.Split(line, "\t") + tags := map[string]string{"pool": col[0], "health": col[8]} + fields := map[string]interface{}{} + + size, err := strconv.ParseInt(col[1], 10, 64) + if err != nil { + return "", fmt.Errorf("Error parsing size: %s", err) + } + fields["size"] = size + + alloc, err := strconv.ParseInt(col[2], 10, 64) + if err != nil { + return "", fmt.Errorf("Error parsing allocation: %s", err) + } + fields["allocated"] = alloc + + free, err := strconv.ParseInt(col[3], 10, 64) + if err != nil { + return "", fmt.Errorf("Error parsing free: %s", err) + } + fields["free"] = free + + frag, err := strconv.ParseInt(strings.TrimSuffix(col[5], "%"), 10, 0) + if err != nil { // This might be - for RO devs + frag = 0 + } + fields["fragmentation"] = frag + + capval, err := strconv.ParseInt(col[6], 10, 0) + if err != nil { + return "", fmt.Errorf("Error parsing capacity: %s", err) + } + fields["capacity"] = capval + + dedup, err := strconv.ParseFloat(strings.TrimSuffix(col[7], "x"), 32) + if err != nil { + return "", fmt.Errorf("Error parsing dedupratio: %s", err) + } + fields["dedupratio"] = dedup + + acc.AddFields("zfs_pool", fields, tags) + } + } + + return strings.Join(pools, "::"), nil +} + +func (z *Zfs) Gather(acc telegraf.Accumulator) error { + kstatMetrics := z.KstatMetrics + if len(kstatMetrics) == 0 { + kstatMetrics = []string{"arcstats", "zfetchstats", "vdev_cache_stats"} + } + + tags := map[string]string{} + poolNames, err := z.gatherPoolStats(acc) + if err != nil { + return err + } + tags["pools"] = poolNames + + fields := make(map[string]interface{}) + for _, metric := range kstatMetrics { + stdout, err := z.sysctl(metric) + if err != nil { + return err + } + for _, line := range stdout { + rawData := strings.Split(line, ": ") + key := metric + "_" + strings.Split(rawData[0], ".")[4] + value, _ := strconv.ParseInt(rawData[1], 10, 64) + fields[key] = value + } + } + acc.AddFields("zfs", fields, tags) + return nil +} + +func run(command string, args ...string) ([]string, error) { + cmd := exec.Command(command, args...) + var outbuf, errbuf bytes.Buffer + cmd.Stdout = &outbuf + cmd.Stderr = &errbuf + err := cmd.Run() + + stdout := strings.TrimSpace(outbuf.String()) + stderr := strings.TrimSpace(errbuf.String()) + + if _, ok := err.(*exec.ExitError); ok { + return nil, fmt.Errorf("%s error: %s", command, stderr) + } + return strings.Split(stdout, "\n"), nil +} + +func zpool() ([]string, error) { + return run("zpool", []string{"list", "-Hp"}...) +} + +func sysctl(metric string) ([]string, error) { + return run("sysctl", []string{"-q", fmt.Sprintf("kstat.zfs.misc.%s", metric)}...) +} + +func init() { + inputs.Add("zfs", func() telegraf.Input { + return &Zfs{ + sysctl: sysctl, + zpool: zpool, + } + }) +} diff --git a/plugins/inputs/zfs/zfs_freebsd_test.go b/plugins/inputs/zfs/zfs_freebsd_test.go new file mode 100644 index 000000000..193c2816f --- /dev/null +++ b/plugins/inputs/zfs/zfs_freebsd_test.go @@ -0,0 +1,148 @@ +// +build freebsd + +package zfs + +import ( + "fmt" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/require" +) + +// $ zpool list -Hp +var zpool_output = []string{ + "freenas-boot 30601641984 2022177280 28579464704 - - 6 1.00x ONLINE -", + "red1 8933531975680 1126164848640 7807367127040 - 8% 12 1.83x ONLINE /mnt", + "temp1 2989297238016 1626309320704 1362987917312 - 38% 54 1.28x ONLINE /mnt", + "temp2 2989297238016 626958278656 2362338959360 - 12% 20 1.00x ONLINE /mnt", +} + +func mock_zpool() ([]string, error) { + return zpool_output, nil +} + +// sysctl -q kstat.zfs.misc.arcstats + +// sysctl -q kstat.zfs.misc.vdev_cache_stats +var kstat_vdev_cache_stats_output = []string{ + "kstat.zfs.misc.vdev_cache_stats.misses: 87789", + "kstat.zfs.misc.vdev_cache_stats.hits: 465583", + "kstat.zfs.misc.vdev_cache_stats.delegations: 6952", +} + +// sysctl -q kstat.zfs.misc.zfetchstats +var kstat_zfetchstats_output = []string{ + "kstat.zfs.misc.zfetchstats.max_streams: 0", + "kstat.zfs.misc.zfetchstats.misses: 0", + "kstat.zfs.misc.zfetchstats.hits: 0", +} + +func mock_sysctl(metric string) ([]string, error) { + if metric == "vdev_cache_stats" { + return kstat_vdev_cache_stats_output, nil + } + if metric == "zfetchstats" { + return kstat_zfetchstats_output, nil + } + return []string{}, fmt.Errorf("Invalid arg") +} + +func TestZfsPoolMetrics(t *testing.T) { + var acc testutil.Accumulator + + z := &Zfs{ + KstatMetrics: []string{"vdev_cache_stats"}, + sysctl: mock_sysctl, + zpool: mock_zpool, + } + err := z.Gather(&acc) + require.NoError(t, err) + + require.False(t, acc.HasMeasurement("zfs_pool")) + acc.Metrics = nil + + z = &Zfs{ + KstatMetrics: []string{"vdev_cache_stats"}, + PoolMetrics: true, + sysctl: mock_sysctl, + zpool: mock_zpool, + } + err = z.Gather(&acc) + require.NoError(t, err) + + //one pool, all metrics + tags := map[string]string{ + "pool": "freenas-boot", + "health": "ONLINE", + } + + poolMetrics := getFreeNasBootPoolMetrics() + + acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags) +} + +func TestZfsGeneratesMetrics(t *testing.T) { + var acc testutil.Accumulator + + z := &Zfs{ + KstatMetrics: []string{"vdev_cache_stats"}, + sysctl: mock_sysctl, + zpool: mock_zpool, + } + err := z.Gather(&acc) + require.NoError(t, err) + + //four pool, vdev_cache_stats metrics + tags := map[string]string{ + "pools": "freenas-boot::red1::temp1::temp2", + } + intMetrics := getKstatMetricsVdevOnly() + + acc.AssertContainsTaggedFields(t, "zfs", intMetrics, tags) + + acc.Metrics = nil + + z = &Zfs{ + KstatMetrics: []string{"zfetchstats", "vdev_cache_stats"}, + sysctl: mock_sysctl, + zpool: mock_zpool, + } + err = z.Gather(&acc) + require.NoError(t, err) + + //four pool, vdev_cache_stats and zfetchstatus metrics + intMetrics = getKstatMetricsVdevAndZfetch() + + acc.AssertContainsTaggedFields(t, "zfs", intMetrics, tags) +} + +func getFreeNasBootPoolMetrics() map[string]interface{} { + return map[string]interface{}{ + "allocated": int64(2022177280), + "capacity": int64(6), + "dedupratio": float64(1), + "free": int64(28579464704), + "size": int64(30601641984), + "fragmentation": int64(0), + } +} + +func getKstatMetricsVdevOnly() map[string]interface{} { + return map[string]interface{}{ + "vdev_cache_stats_misses": int64(87789), + "vdev_cache_stats_hits": int64(465583), + "vdev_cache_stats_delegations": int64(6952), + } +} + +func getKstatMetricsVdevAndZfetch() map[string]interface{} { + return map[string]interface{}{ + "vdev_cache_stats_misses": int64(87789), + "vdev_cache_stats_hits": int64(465583), + "vdev_cache_stats_delegations": int64(6952), + "zfetchstats_max_streams": int64(0), + "zfetchstats_misses": int64(0), + "zfetchstats_hits": int64(0), + } +} diff --git a/plugins/inputs/zfs/zfs_linux.go b/plugins/inputs/zfs/zfs_linux.go new file mode 100644 index 000000000..71ec7e5dc --- /dev/null +++ b/plugins/inputs/zfs/zfs_linux.go @@ -0,0 +1,131 @@ +// +build linux + +package zfs + +import ( + "fmt" + "path/filepath" + "strconv" + "strings" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type poolInfo struct { + name string + ioFilename string +} + +func getPools(kstatPath string) []poolInfo { + pools := make([]poolInfo, 0) + poolsDirs, _ := filepath.Glob(kstatPath + "/*/io") + + for _, poolDir := range poolsDirs { + poolDirSplit := strings.Split(poolDir, "/") + pool := poolDirSplit[len(poolDirSplit)-2] + pools = append(pools, poolInfo{name: pool, ioFilename: poolDir}) + } + + return pools +} + +func getTags(pools []poolInfo) map[string]string { + var poolNames string + + for _, pool := range pools { + if len(poolNames) != 0 { + poolNames += "::" + } + poolNames += pool.name + } + + return map[string]string{"pools": poolNames} +} + +func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error { + lines, err := internal.ReadLines(pool.ioFilename) + if err != nil { + return err + } + + if len(lines) != 3 { + return err + } + + keys := strings.Fields(lines[1]) + values := strings.Fields(lines[2]) + + keyCount := len(keys) + + if keyCount != len(values) { + return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values) + } + + tag := map[string]string{"pool": pool.name} + fields := make(map[string]interface{}) + for i := 0; i < keyCount; i++ { + value, err := strconv.ParseInt(values[i], 10, 64) + if err != nil { + return err + } + fields[keys[i]] = value + } + acc.AddFields("zfs_pool", fields, tag) + + return nil +} + +func (z *Zfs) Gather(acc telegraf.Accumulator) error { + kstatMetrics := z.KstatMetrics + if len(kstatMetrics) == 0 { + kstatMetrics = []string{"arcstats", "zfetchstats", "vdev_cache_stats"} + } + + kstatPath := z.KstatPath + if len(kstatPath) == 0 { + kstatPath = "/proc/spl/kstat/zfs" + } + + pools := getPools(kstatPath) + tags := getTags(pools) + + if z.PoolMetrics { + for _, pool := range pools { + err := gatherPoolStats(pool, acc) + if err != nil { + return err + } + } + } + + fields := make(map[string]interface{}) + for _, metric := range kstatMetrics { + lines, err := internal.ReadLines(kstatPath + "/" + metric) + if err != nil { + return err + } + for i, line := range lines { + if i == 0 || i == 1 { + continue + } + if len(line) < 1 { + continue + } + rawData := strings.Split(line, " ") + key := metric + "_" + rawData[0] + rawValue := rawData[len(rawData)-1] + value, _ := strconv.ParseInt(rawValue, 10, 64) + fields[key] = value + } + } + acc.AddFields("zfs", fields, tags) + return nil +} + +func init() { + inputs.Add("zfs", func() telegraf.Input { + return &Zfs{} + }) +} diff --git a/plugins/inputs/zfs/zfs_test.go b/plugins/inputs/zfs/zfs_linux_test.go similarity index 99% rename from plugins/inputs/zfs/zfs_test.go rename to plugins/inputs/zfs/zfs_linux_test.go index 03179ba59..c4db75ff5 100644 --- a/plugins/inputs/zfs/zfs_test.go +++ b/plugins/inputs/zfs/zfs_linux_test.go @@ -1,3 +1,5 @@ +// +build linux + package zfs import ( From 7921d87a455f7c93d16e4bad901cfb585f472202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Harasimowicz?= Date: Wed, 1 Jun 2016 12:02:28 +0200 Subject: [PATCH 17/34] Added Consul health checks state monitoring. (#1294) --- CHANGELOG.md | 2 +- Godeps | 1 + README.md | 1 + plugins/inputs/all/all.go | 1 + plugins/inputs/consul/README.md | 46 +++++++++ plugins/inputs/consul/consul.go | 136 +++++++++++++++++++++++++++ plugins/inputs/consul/consul_test.go | 42 +++++++++ 7 files changed, 228 insertions(+), 1 deletion(-) create mode 100644 plugins/inputs/consul/README.md create mode 100644 plugins/inputs/consul/consul.go create mode 100644 plugins/inputs/consul/consul_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index cc8f5a349..90332b881 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ in conjunction with wildcard dimension values as it will control the amount of time before a new metric is included by the plugin. ### Features - +- [#1294](https://github.com/influxdata/telegraf/pull/1294): consul input plugin. Thanks @harnash - [#1164](https://github.com/influxdata/telegraf/pull/1164): conntrack input plugin. Thanks @robinpercy! - [#1165](https://github.com/influxdata/telegraf/pull/1165): vmstat input plugin. Thanks @jshim-xm! - [#1247](https://github.com/influxdata/telegraf/pull/1247): rollbar input plugin. Thanks @francois2metz and @cduez! diff --git a/Godeps b/Godeps index ed28e1470..05bd7ef16 100644 --- a/Godeps +++ b/Godeps @@ -23,6 +23,7 @@ github.com/gonuts/go-shellquote e842a11b24c6abfb3dd27af69a17f482e4b483c2 github.com/gorilla/context 1ea25387ff6f684839d82767c1733ff4d4d15d0a github.com/gorilla/mux c9e326e2bdec29039a3761c07bece13133863e1e github.com/hailocab/go-hostpool e80d13ce29ede4452c43dea11e79b9bc8a15b478 +github.com/hashicorp/consul 5aa90455ce78d4d41578bafc86305e6e6b28d7d2 github.com/hpcloud/tail b2940955ab8b26e19d43a43c4da0475dd81bdb56 github.com/influxdata/config b79f6829346b8d6e78ba73544b1e1038f1f1c9da github.com/influxdata/influxdb e094138084855d444195b252314dfee9eae34cab diff --git a/README.md b/README.md index 7890fa113..eb684f23f 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,7 @@ Currently implemented sources: * [cassandra](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/cassandra) * [ceph](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ceph) * [chrony](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/chrony) +* [consul](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/consul) * [conntrack](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/conntrack) * [couchbase](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchbase) * [couchdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchdb) diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 8c12e0858..0dbbb613d 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -9,6 +9,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/chrony" _ "github.com/influxdata/telegraf/plugins/inputs/cloudwatch" _ "github.com/influxdata/telegraf/plugins/inputs/conntrack" + _ "github.com/influxdata/telegraf/plugins/inputs/consul" _ "github.com/influxdata/telegraf/plugins/inputs/couchbase" _ "github.com/influxdata/telegraf/plugins/inputs/couchdb" _ "github.com/influxdata/telegraf/plugins/inputs/disque" diff --git a/plugins/inputs/consul/README.md b/plugins/inputs/consul/README.md new file mode 100644 index 000000000..a2685e2bf --- /dev/null +++ b/plugins/inputs/consul/README.md @@ -0,0 +1,46 @@ +# Telegraf Input Plugin: Consul + +This plugin will collect statistics about all helath checks registered in the Consul. It uses [Consul API](https://www.consul.io/docs/agent/http/health.html#health_state) +to query the data. It will not report the [telemetry](https://www.consul.io/docs/agent/telemetry.html) but Consul can report those stats already using StatsD protocol if needed. + +## Configuration: + +``` +# Gather health check statuses from services registered in Consul +[[inputs.consul]] + ## Most of these values defaults to the one configured on a Consul's agent level. + ## Optional Consul server address (default: "") + # address = "" + ## Optional URI scheme for the Consul server (default: "") + # scheme = "" + ## Optional ACL token used in every request (default: "") + # token = "" + ## Optional username used for request HTTP Basic Authentication (default: "") + # username = "" + ## Optional password used for HTTP Basic Authentication (default: "") + # password = "" + ## Optional data centre to query the health checks from (default: "") + # datacentre = "" +``` + +## Measurements: + +### Consul: +Tags: +- node: on which node check/service is registered on +- service_name: name of the service (this is the service name not the service ID) + +Fields: +- check_id +- check_name +- service_id +- status + +## Example output + +``` +$ telegraf --config ./telegraf.conf -input-filter consul -test +* Plugin: consul, Collection 1 +> consul_health_checks,host=wolfpit,node=consul-server-node check_id="serfHealth",check_name="Serf Health Status",service_id="",status="passing" 1464698464486439902 +> consul_health_checks,host=wolfpit,node=consul-server-node,service_name=www.example.com check_id="service:www-example-com.test01",check_name="Service 'www.example.com' check",service_id="www-example-com.test01",status="critical" 1464698464486519036 +``` diff --git a/plugins/inputs/consul/consul.go b/plugins/inputs/consul/consul.go new file mode 100644 index 000000000..eaeae73c1 --- /dev/null +++ b/plugins/inputs/consul/consul.go @@ -0,0 +1,136 @@ +package consul + +import ( + "net/http" + + "github.com/hashicorp/consul/api" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type Consul struct { + Address string + Scheme string + Token string + Username string + Password string + Datacentre string + + // Path to CA file + SSLCA string `toml:"ssl_ca"` + // Path to host cert file + SSLCert string `toml:"ssl_cert"` + // Path to cert key file + SSLKey string `toml:"ssl_key"` + // Use SSL but skip chain & host verification + InsecureSkipVerify bool + + // client used to connect to Consul agnet + client *api.Client +} + +var sampleConfig = ` + ## Most of these values defaults to the one configured on a Consul's agent level. + ## Optional Consul server address (default: "localhost") + # address = "localhost" + ## Optional URI scheme for the Consul server (default: "http") + # scheme = "http" + ## Optional ACL token used in every request (default: "") + # token = "" + ## Optional username used for request HTTP Basic Authentication (default: "") + # username = "" + ## Optional password used for HTTP Basic Authentication (default: "") + # password = "" + ## Optional data centre to query the health checks from (default: "") + # datacentre = "" +` + +func (c *Consul) Description() string { + return "Gather health check statuses from services registered in Consul" +} + +func (c *Consul) SampleConfig() string { + return sampleConfig +} + +func (c *Consul) createAPIClient() (*api.Client, error) { + config := api.DefaultConfig() + + if c.Address != "" { + config.Address = c.Address + } + + if c.Scheme != "" { + config.Scheme = c.Scheme + } + + if c.Datacentre != "" { + config.Datacenter = c.Datacentre + } + + if c.Username != "" { + config.HttpAuth = &api.HttpBasicAuth{ + Username: c.Username, + Password: c.Password, + } + } + + tlsCfg, err := internal.GetTLSConfig( + c.SSLCert, c.SSLKey, c.SSLCA, c.InsecureSkipVerify) + + if err != nil { + return nil, err + } + + config.HttpClient.Transport = &http.Transport{ + TLSClientConfig: tlsCfg, + } + + return api.NewClient(config) +} + +func (c *Consul) GatherHealthCheck(acc telegraf.Accumulator, checks []*api.HealthCheck) { + for _, check := range checks { + record := make(map[string]interface{}) + tags := make(map[string]string) + + record["check_id"] = check.CheckID + record["check_name"] = check.Name + record["service_id"] = check.ServiceID + record["status"] = check.Status + + tags["node"] = check.Node + tags["service_name"] = check.ServiceName + + acc.AddFields("consul_health_checks", record, tags) + } +} + +func (c *Consul) Gather(acc telegraf.Accumulator) error { + if c.client == nil { + newClient, err := c.createAPIClient() + + if err != nil { + return err + } + + c.client = newClient + } + + checks, _, err := c.client.Health().State("any", nil) + + if err != nil { + return err + } + + c.GatherHealthCheck(acc, checks) + + return nil +} + +func init() { + inputs.Add("consul", func() telegraf.Input { + return &Consul{} + }) +} diff --git a/plugins/inputs/consul/consul_test.go b/plugins/inputs/consul/consul_test.go new file mode 100644 index 000000000..772ccba91 --- /dev/null +++ b/plugins/inputs/consul/consul_test.go @@ -0,0 +1,42 @@ +package consul + +import ( + "testing" + + "github.com/hashicorp/consul/api" + "github.com/influxdata/telegraf/testutil" +) + +var sampleChecks = []*api.HealthCheck{ + &api.HealthCheck{ + Node: "localhost", + CheckID: "foo.health123", + Name: "foo.health", + Status: "passing", + Notes: "lorem ipsum", + Output: "OK", + ServiceID: "foo.123", + ServiceName: "foo", + }, +} + +func TestGatherHealtCheck(t *testing.T) { + expectedFields := map[string]interface{}{ + "check_id": "foo.health123", + "check_name": "foo.health", + "status": "passing", + "service_id": "foo.123", + } + + expectedTags := map[string]string{ + "node": "localhost", + "service_name": "foo", + } + + var acc testutil.Accumulator + + consul := &Consul{} + consul.GatherHealthCheck(&acc, sampleChecks) + + acc.AssertContainsTaggedFields(t, "consul_health_checks", expectedFields, expectedTags) +} From 6f991ec78a3f021a17176b8eaedd8514ae009229 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 1 Jun 2016 15:49:32 +0100 Subject: [PATCH 18/34] Sleep longer in tail test --- plugins/inputs/tail/tail_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/inputs/tail/tail_test.go b/plugins/inputs/tail/tail_test.go index 5d7c04a88..f9f6bff28 100644 --- a/plugins/inputs/tail/tail_test.go +++ b/plugins/inputs/tail/tail_test.go @@ -32,7 +32,8 @@ func TestTailFromBeginning(t *testing.T) { _, err = tmpfile.WriteString("cpu,mytag=foo usage_idle=100\n") require.NoError(t, err) require.NoError(t, tt.Gather(&acc)) - time.Sleep(time.Millisecond * 50) + // arbitrary sleep to wait for message to show up + time.Sleep(time.Millisecond * 250) acc.AssertContainsTaggedFields(t, "cpu", map[string]interface{}{ From 52b9fc837c599ba04abcb3e8d38188cfffb7bee5 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 1 Jun 2016 15:32:47 +0100 Subject: [PATCH 19/34] Adding active & inactive memory to mem plugin closes #1213 --- CHANGELOG.md | 1 + plugins/inputs/system/memory.go | 2 ++ plugins/inputs/system/memory_test.go | 6 ++++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90332b881..9f9caaa07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ time before a new metric is included by the plugin. - [#1197](https://github.com/influxdata/telegraf/pull/1197): Limit AWS GetMetricStatistics requests to 10 per second. - [#1278](https://github.com/influxdata/telegraf/pull/1278) & [#1288](https://github.com/influxdata/telegraf/pull/1288) & [#1295](https://github.com/influxdata/telegraf/pull/1295): RabbitMQ/Apache/InfluxDB inputs: made url(s) parameter optional by using reasonable input defaults if not specified - [#1296](https://github.com/influxdata/telegraf/issues/1296): Refactor of flush_jitter argument. +- [#1213](https://github.com/influxdata/telegraf/issues/1213): Add inactive & active memory to mem plugin. ### Bugfixes diff --git a/plugins/inputs/system/memory.go b/plugins/inputs/system/memory.go index 82ce9c9c8..c6dbff45e 100644 --- a/plugins/inputs/system/memory.go +++ b/plugins/inputs/system/memory.go @@ -30,6 +30,8 @@ func (s *MemStats) Gather(acc telegraf.Accumulator) error { "free": vm.Free, "cached": vm.Cached, "buffered": vm.Buffers, + "active": vm.Active, + "inactive": vm.Inactive, "used_percent": 100 * float64(vm.Used) / float64(vm.Total), "available_percent": 100 * float64(vm.Available) / float64(vm.Total), } diff --git a/plugins/inputs/system/memory_test.go b/plugins/inputs/system/memory_test.go index a7f7905f9..1fced6918 100644 --- a/plugins/inputs/system/memory_test.go +++ b/plugins/inputs/system/memory_test.go @@ -19,8 +19,8 @@ func TestMemStats(t *testing.T) { Available: 7600, Used: 5000, Free: 1235, - // Active: 8134, - // Inactive: 1124, + Active: 8134, + Inactive: 1124, // Buffers: 771, // Cached: 4312, // Wired: 134, @@ -52,6 +52,8 @@ func TestMemStats(t *testing.T) { "free": uint64(1235), "cached": uint64(0), "buffered": uint64(0), + "active": uint64(8134), + "inactive": uint64(1124), } acc.AssertContainsTaggedFields(t, "mem", memfields, make(map[string]string)) From fb5f40319ee9b7896abc6fb91b39403645ef8ce3 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 1 Jun 2016 15:10:57 +0100 Subject: [PATCH 20/34] update gitattributes for easier fork mngmnt --- .gitattributes | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 80edb3f0c..276cc7709 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,4 @@ CHANGELOG.md merge=union - +README.md merge=union +plugins/inputs/all/all.go merge=union +plugins/outputs/all/all.go merge=union From c842724b611bda16bac294509c2fa7fcdd428c6f Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 1 Jun 2016 16:00:55 +0100 Subject: [PATCH 21/34] Fix graylog test race --- plugins/outputs/graylog/graylog_test.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/plugins/outputs/graylog/graylog_test.go b/plugins/outputs/graylog/graylog_test.go index fd8a418f4..37816a7a2 100644 --- a/plugins/outputs/graylog/graylog_test.go +++ b/plugins/outputs/graylog/graylog_test.go @@ -15,8 +15,11 @@ import ( func TestWrite(t *testing.T) { var wg sync.WaitGroup + var wg2 sync.WaitGroup wg.Add(1) - go UDPServer(t, &wg) + wg2.Add(1) + go UDPServer(t, &wg, &wg2) + wg2.Wait() i := Graylog{ Servers: []string{"127.0.0.1:12201"}, @@ -33,12 +36,13 @@ func TestWrite(t *testing.T) { type GelfObject map[string]interface{} -func UDPServer(t *testing.T, wg *sync.WaitGroup) { +func UDPServer(t *testing.T, wg *sync.WaitGroup, wg2 *sync.WaitGroup) { serverAddr, _ := net.ResolveUDPAddr("udp", "127.0.0.1:12201") udpServer, _ := net.ListenUDP("udp", serverAddr) defer wg.Done() bufR := make([]byte, 1024) + wg2.Done() n, _, _ := udpServer.ReadFromUDP(bufR) b := bytes.NewReader(bufR[0:n]) From a4b36d12ddc160e65eb993de271c6c44fed8abe9 Mon Sep 17 00:00:00 2001 From: Ali Alrahahleh Date: Tue, 24 May 2016 15:25:23 -0700 Subject: [PATCH 22/34] add graylog plugin add unit test for graylog --- plugins/inputs/all/all.go | 1 + plugins/inputs/graylog/README.md | 46 ++++ plugins/inputs/graylog/graylog.go | 307 +++++++++++++++++++++++++ plugins/inputs/graylog/graylog_test.go | 199 ++++++++++++++++ 4 files changed, 553 insertions(+) create mode 100644 plugins/inputs/graylog/README.md create mode 100644 plugins/inputs/graylog/graylog.go create mode 100644 plugins/inputs/graylog/graylog_test.go diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 0dbbb613d..1a386d97c 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -20,6 +20,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/exec" _ "github.com/influxdata/telegraf/plugins/inputs/filestat" _ "github.com/influxdata/telegraf/plugins/inputs/github_webhooks" + _ "github.com/influxdata/telegraf/plugins/inputs/graylog" _ "github.com/influxdata/telegraf/plugins/inputs/haproxy" _ "github.com/influxdata/telegraf/plugins/inputs/http_response" _ "github.com/influxdata/telegraf/plugins/inputs/httpjson" diff --git a/plugins/inputs/graylog/README.md b/plugins/inputs/graylog/README.md new file mode 100644 index 000000000..9d033e20a --- /dev/null +++ b/plugins/inputs/graylog/README.md @@ -0,0 +1,46 @@ +# GrayLog plugin + +The Graylog plugin can collect data from remote Graylog service URLs. + +Plugin currently support two type of end points:- + +- multiple (Ex http://[graylog-server-ip]:12900/system/metrics/multiple) +- namespace (Ex http://[graylog-server-ip]:12900/system/metrics/namespace/{namespace}) + +End Point can be a mixe of one multiple end point and several namespaces end points + + +Note: if namespace end point specified metrics array will be ignored for that call. + +Sample configration +``` +[[inputs.graylog]] + ## API End Point, currently supported API: + ## - multiple (Ex http://[graylog-server-ip]:12900/system/metrics/multiple) + ## - namespace (Ex http://[graylog-server-ip]:12900/system/metrics/namespace/{namespace}) + ## Note if namespace end point specified metrics array will be ignored for that call. + ## End point can contain namespace and multiple type calls + ## Please check http://[graylog-server-ip]:12900/api-browser for full list end points + + servers = [ + "http://10.224.162.16:12900/system/metrics/multiple" + ] + + #Metrics define metric which will be pulled from GrayLog and reported to the defined Output + metrics = [ + "jvm.cl.loaded", + "jvm.memory.pools.Metaspace.committed" + ] + ## User name and password + username = "put-username-here" + password = "put-password-here" + + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false +``` + +Please refer to GrayLog metrics api browser for full metric end points http://10.224.162.16:12900/api-browser diff --git a/plugins/inputs/graylog/graylog.go b/plugins/inputs/graylog/graylog.go new file mode 100644 index 000000000..b9b5ade25 --- /dev/null +++ b/plugins/inputs/graylog/graylog.go @@ -0,0 +1,307 @@ +package graylog + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "net" + "net/http" + "net/url" + "strings" + "sync" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type ResponseMetrics struct { + total int + Metrics []Metric `json:"metrics"` +} + +type Metric struct { + FullName string `json:"full_name"` + Name string `json:"name"` + Type string `json:"type"` + Fields map[string]interface{} `json:"metric"` +} + +type GrayLog struct { + Servers []string + Metrics []string + Username string + Password string + + // Path to CA file + SSLCA string `toml:"ssl_ca"` + // Path to host cert file + SSLCert string `toml:"ssl_cert"` + // Path to cert key file + SSLKey string `toml:"ssl_key"` + // Use SSL but skip chain & host verification + InsecureSkipVerify bool + + client HTTPClient +} + +type HTTPClient interface { + // Returns the result of an http request + // + // Parameters: + // req: HTTP request object + // + // Returns: + // http.Response: HTTP respons object + // error : Any error that may have occurred + MakeRequest(req *http.Request) (*http.Response, error) + + SetHTTPClient(client *http.Client) + HTTPClient() *http.Client +} + +type Messagebody struct { + Metrics []string `json:"metrics"` +} + +type RealHTTPClient struct { + client *http.Client +} + +func (c *RealHTTPClient) MakeRequest(req *http.Request) (*http.Response, error) { + return c.client.Do(req) +} + +func (c *RealHTTPClient) SetHTTPClient(client *http.Client) { + c.client = client +} + +func (c *RealHTTPClient) HTTPClient() *http.Client { + return c.client +} + +var sampleConfig = ` + ## API End Point, currently supported API: + ## - multiple (Ex http://[graylog-server-ip]:12900/system/metrics/multiple) + ## - namespace (Ex http://[graylog-server-ip]:12900/system/metrics/namespace/{namespace}) + ## Note if namespace end point specified metrics array will be ignored for that call. + ## End point can contain namespace and multiple type calls + ## Please check http://[graylog-server-ip]:12900/api-browser for full list end points + servers = [ + "http://[graylog-server-ip]:12900/system/metrics/multiple", + ] + + ## metrics list + ## List of metrics can be found on Graylog webservice documentation + ## Or by hitting the the web service api http://[graylog-host]:12900/system/metrics + metrics = [ + "jvm.cl.loaded", + "jvm.memory.pools.Metaspace.committed" + ] + + ## User name and password + username = "put-username-here" + password = "put-password-here" + + ## Optional SSL Config + # ssl_ca = "/etc/telegraf/ca.pem" + # ssl_cert = "/etc/telegraf/cert.pem" + # ssl_key = "/etc/telegraf/key.pem" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false +` + +func (h *GrayLog) SampleConfig() string { + return sampleConfig +} + +func (h *GrayLog) Description() string { + return "Read flattened metrics from one or more GrayLog HTTP endpoints" +} + +// Gathers data for all servers. +func (h *GrayLog) Gather(acc telegraf.Accumulator) error { + var wg sync.WaitGroup + + if h.client.HTTPClient() == nil { + tlsCfg, err := internal.GetTLSConfig( + h.SSLCert, h.SSLKey, h.SSLCA, h.InsecureSkipVerify) + if err != nil { + return err + } + tr := &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), + TLSClientConfig: tlsCfg, + } + client := &http.Client{ + Transport: tr, + Timeout: time.Duration(4 * time.Second), + } + h.client.SetHTTPClient(client) + } + + errorChannel := make(chan error, len(h.Servers)) + + for _, server := range h.Servers { + wg.Add(1) + go func(server string) { + defer wg.Done() + if err := h.gatherServer(acc, server); err != nil { + errorChannel <- err + } + }(server) + } + + wg.Wait() + close(errorChannel) + + // Get all errors and return them as one giant error + errorStrings := []string{} + for err := range errorChannel { + errorStrings = append(errorStrings, err.Error()) + } + + if len(errorStrings) == 0 { + return nil + } + return errors.New(strings.Join(errorStrings, "\n")) +} + +// Gathers data from a particular server +// Parameters: +// acc : The telegraf Accumulator to use +// serverURL: endpoint to send request to +// service : the service being queried +// +// Returns: +// error: Any error that may have occurred +func (h *GrayLog) gatherServer( + acc telegraf.Accumulator, + serverURL string, +) error { + resp, _, err := h.sendRequest(serverURL) + if err != nil { + return err + } + requestURL, err := url.Parse(serverURL) + host, port, _ := net.SplitHostPort(requestURL.Host) + var dat ResponseMetrics + if err != nil { + return err + } + if err := json.Unmarshal([]byte(resp), &dat); err != nil { + return err + } + for _, m_item := range dat.Metrics { + fields := make(map[string]interface{}) + tags := map[string]string{ + "server": host, + "port": port, + "name": m_item.Name, + "type": m_item.Type, + } + h.flatten(m_item.Fields, fields, "") + acc.AddFields(m_item.FullName, fields, tags) + } + return nil +} + +// Flatten JSON hierarchy to produce field name and field value +// Parameters: +// item: Item map to flatten +// fields: Map to store generated fields. +// id: Prefix for top level metric (empty string "") +// Returns: +// void +func (h *GrayLog) flatten(item map[string]interface{}, fields map[string]interface{}, id string) { + if id != "" { + id = id + "_" + } + for k, i := range item { + switch i.(type) { + case int: + fields[id+k] = i.(float64) + case float64: + fields[id+k] = i.(float64) + case map[string]interface{}: + h.flatten(i.(map[string]interface{}), fields, id+k) + default: + } + } +} + +// Sends an HTTP request to the server using the GrayLog object's HTTPClient. +// Parameters: +// serverURL: endpoint to send request to +// +// Returns: +// string: body of the response +// error : Any error that may have occurred +func (h *GrayLog) sendRequest(serverURL string) (string, float64, error) { + headers := map[string]string{ + "Content-Type": "application/json", + "Accept": "application/json", + } + method := "GET" + content := bytes.NewBufferString("") + headers["Authorization"] = "Basic " + base64.URLEncoding.EncodeToString([]byte(h.Username+":"+h.Password)) + // Prepare URL + requestURL, err := url.Parse(serverURL) + if err != nil { + return "", -1, fmt.Errorf("Invalid server URL \"%s\"", serverURL) + } + if strings.Contains(requestURL.String(), "multiple") { + m := &Messagebody{Metrics: h.Metrics} + http_body, err := json.Marshal(m) + if err != nil { + return "", -1, fmt.Errorf("Invalid list of Metrics %s", h.Metrics) + } + method = "POST" + content = bytes.NewBuffer(http_body) + } + req, err := http.NewRequest(method, requestURL.String(), content) + if err != nil { + return "", -1, err + } + // Add header parameters + for k, v := range headers { + req.Header.Add(k, v) + } + start := time.Now() + resp, err := h.client.MakeRequest(req) + if err != nil { + return "", -1, err + } + + defer resp.Body.Close() + responseTime := time.Since(start).Seconds() + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return string(body), responseTime, err + } + + // Process response + if resp.StatusCode != http.StatusOK { + err = fmt.Errorf("Response from url \"%s\" has status code %d (%s), expected %d (%s)", + requestURL.String(), + resp.StatusCode, + http.StatusText(resp.StatusCode), + http.StatusOK, + http.StatusText(http.StatusOK)) + return string(body), responseTime, err + } + return string(body), responseTime, err +} + +func init() { + inputs.Add("graylog", func() telegraf.Input { + return &GrayLog{ + client: &RealHTTPClient{}, + } + }) +} diff --git a/plugins/inputs/graylog/graylog_test.go b/plugins/inputs/graylog/graylog_test.go new file mode 100644 index 000000000..09bca454d --- /dev/null +++ b/plugins/inputs/graylog/graylog_test.go @@ -0,0 +1,199 @@ +package graylog + +import ( + "io/ioutil" + "net/http" + "strings" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const validJSON = ` + { + "total": 3, + "metrics": [ + { + "full_name": "jvm.cl.loaded", + "metric": { + "value": 18910 + }, + "name": "loaded", + "type": "gauge" + }, + { + "full_name": "jvm.memory.pools.Metaspace.committed", + "metric": { + "value": 108040192 + }, + "name": "committed", + "type": "gauge" + }, + { + "full_name": "org.graylog2.shared.journal.KafkaJournal.writeTime", + "metric": { + "time": { + "min": 99 + }, + "rate": { + "total": 10, + "mean": 2 + }, + "duration_unit": "microseconds", + "rate_unit": "events/second" + }, + "name": "writeTime", + "type": "hdrtimer" + } + ] + }` + +var validTags = map[string]map[string]string{ + "jvm.cl.loaded": { + "name": "loaded", + "type": "gauge", + "port": "12900", + "server": "localhost", + }, + "jvm.memory.pools.Metaspace.committed": { + "name": "committed", + "type": "gauge", + "port": "12900", + "server": "localhost", + }, + "org.graylog2.shared.journal.KafkaJournal.writeTime": { + "name": "writeTime", + "type": "hdrtimer", + "port": "12900", + "server": "localhost", + }, +} + +var expectedFields = map[string]map[string]interface{}{ + "jvm.cl.loaded": { + "value": float64(18910), + }, + "jvm.memory.pools.Metaspace.committed": { + "value": float64(108040192), + }, + "org.graylog2.shared.journal.KafkaJournal.writeTime": { + "time_min": float64(99), + "rate_total": float64(10), + "rate_mean": float64(2), + }, +} + +const invalidJSON = "I don't think this is JSON" + +const empty = "" + +type mockHTTPClient struct { + responseBody string + statusCode int +} + +// Mock implementation of MakeRequest. Usually returns an http.Response with +// hard-coded responseBody and statusCode. However, if the request uses a +// nonstandard method, it uses status code 405 (method not allowed) +func (c *mockHTTPClient) MakeRequest(req *http.Request) (*http.Response, error) { + resp := http.Response{} + resp.StatusCode = c.statusCode + + // basic error checking on request method + allowedMethods := []string{"GET", "HEAD", "POST", "PUT", "DELETE", "TRACE", "CONNECT"} + methodValid := false + for _, method := range allowedMethods { + if req.Method == method { + methodValid = true + break + } + } + + if !methodValid { + resp.StatusCode = 405 // Method not allowed + } + + resp.Body = ioutil.NopCloser(strings.NewReader(c.responseBody)) + return &resp, nil +} + +func (c *mockHTTPClient) SetHTTPClient(_ *http.Client) { +} + +func (c *mockHTTPClient) HTTPClient() *http.Client { + return nil +} + +// Generates a pointer to an HttpJson object that uses a mock HTTP client. +// Parameters: +// response : Body of the response that the mock HTTP client should return +// statusCode: HTTP status code the mock HTTP client should return +// +// Returns: +// *HttpJson: Pointer to an HttpJson object that uses the generated mock HTTP client +func genMockGrayLog(response string, statusCode int) []*GrayLog { + return []*GrayLog{ + &GrayLog{ + client: &mockHTTPClient{responseBody: response, statusCode: statusCode}, + Servers: []string{ + "http://localhost:12900/system/metrics/multiple", + }, + Metrics: []string{ + "jvm.memory.pools.Metaspace.committed", + "jvm.cl.loaded", + "org.graylog2.shared.journal.KafkaJournal.writeTime", + }, + Username: "test", + Password: "test", + }, + } +} + +// Test that the proper values are ignored or collected +func TestNormalResponse(t *testing.T) { + graylog := genMockGrayLog(validJSON, 200) + + for _, service := range graylog { + var acc testutil.Accumulator + err := service.Gather(&acc) + require.NoError(t, err) + for k, v := range expectedFields { + acc.AssertContainsTaggedFields(t, k, v, validTags[k]) + } + } +} + +// Test response to HTTP 500 +func TestHttpJson500(t *testing.T) { + graylog := genMockGrayLog(validJSON, 500) + + var acc testutil.Accumulator + err := graylog[0].Gather(&acc) + + assert.NotNil(t, err) + assert.Equal(t, 0, acc.NFields()) +} + +// Test response to malformed JSON +func TestHttpJsonBadJson(t *testing.T) { + graylog := genMockGrayLog(invalidJSON, 200) + + var acc testutil.Accumulator + err := graylog[0].Gather(&acc) + + assert.NotNil(t, err) + assert.Equal(t, 0, acc.NFields()) +} + +// Test response to empty string as response objectgT +func TestHttpJsonEmptyResponse(t *testing.T) { + graylog := genMockGrayLog(empty, 200) + + var acc testutil.Accumulator + err := graylog[0].Gather(&acc) + + assert.NotNil(t, err) + assert.Equal(t, 0, acc.NFields()) +} From f08a27be5de482f70ac047f975bc23f81630370d Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 1 Jun 2016 18:44:18 +0100 Subject: [PATCH 23/34] graylog input doc tweaks closes #1261 --- etc/telegraf.conf | 68 ++++++++++++++++++++++++------- plugins/inputs/graylog/README.md | 41 +++++++++++-------- plugins/inputs/graylog/graylog.go | 31 ++++++++------ 3 files changed, 97 insertions(+), 43 deletions(-) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index e1ae2cdd7..b0715a441 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -573,6 +573,23 @@ # # value = 'p-example' +# # Gather health check statuses from services registered in Consul +# [[inputs.consul]] +# ## Most of these values defaults to the one configured on a Consul's agent level. +# ## Optional Consul server address (default: "localhost") +# # address = "localhost" +# ## Optional URI scheme for the Consul server (default: "http") +# # scheme = "http" +# ## Optional ACL token used in every request (default: "") +# # token = "" +# ## Optional username used for request HTTP Basic Authentication (default: "") +# # username = "" +# ## Optional password used for HTTP Basic Authentication (default: "") +# # password = "" +# ## Optional data centre to query the health checks from (default: "") +# # datacentre = "" + + # # Read metrics from one or many couchbase clusters # [[inputs.couchbase]] # ## specify servers via a url matching: @@ -700,6 +717,43 @@ # md5 = false +# # Read flattened metrics from one or more GrayLog HTTP endpoints +# [[inputs.graylog]] +# ## API endpoint, currently supported API: +# ## +# ## - multiple (Ex http://:12900/system/metrics/multiple) +# ## - namespace (Ex http://:12900/system/metrics/namespace/{namespace}) +# ## +# ## For namespace endpoint, the metrics array will be ignored for that call. +# ## Endpoint can contain namespace and multiple type calls. +# ## +# ## Please check http://[graylog-server-ip]:12900/api-browser for full list +# ## of endpoints +# servers = [ +# "http://[graylog-server-ip]:12900/system/metrics/multiple", +# ] +# +# ## Metrics list +# ## List of metrics can be found on Graylog webservice documentation. +# ## Or by hitting the the web service api at: +# ## http://[graylog-host]:12900/system/metrics +# metrics = [ +# "jvm.cl.loaded", +# "jvm.memory.pools.Metaspace.committed" +# ] +# +# ## Username and password +# username = "" +# password = "" +# +# ## Optional SSL Config +# # ssl_ca = "/etc/telegraf/ca.pem" +# # ssl_cert = "/etc/telegraf/cert.pem" +# # ssl_key = "/etc/telegraf/key.pem" +# ## Use SSL but skip chain & host verification +# # insecure_skip_verify = false + + # # Read metrics of haproxy, via socket or csv stats page # [[inputs.haproxy]] # ## An array of address to gather stats about. Specify an ip on hostname @@ -1405,20 +1459,6 @@ # stats = ["MAIN.cache_hit", "MAIN.cache_miss", "MAIN.uptime"] -# # Read metrics of ZFS from arcstats, zfetchstats and vdev_cache_stats -# [[inputs.zfs]] -# ## ZFS kstat path -# ## If not specified, then default is: -# kstatPath = "/proc/spl/kstat/zfs" -# -# ## By default, telegraf gather all zfs stats -# ## If not specified, then default is: -# kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] -# -# ## By default, don't gather zpool stats -# poolMetrics = false - - # # Reads 'mntr' stats from one or many zookeeper servers # [[inputs.zookeeper]] # ## An array of address to gather stats about. Specify an ip or hostname diff --git a/plugins/inputs/graylog/README.md b/plugins/inputs/graylog/README.md index 9d033e20a..6d4aa6131 100644 --- a/plugins/inputs/graylog/README.md +++ b/plugins/inputs/graylog/README.md @@ -1,6 +1,6 @@ # GrayLog plugin -The Graylog plugin can collect data from remote Graylog service URLs. +The Graylog plugin can collect data from remote Graylog service URLs. Plugin currently support two type of end points:- @@ -12,28 +12,37 @@ End Point can be a mixe of one multiple end point and several namespaces end p Note: if namespace end point specified metrics array will be ignored for that call. -Sample configration -``` -[[inputs.graylog]] - ## API End Point, currently supported API: - ## - multiple (Ex http://[graylog-server-ip]:12900/system/metrics/multiple) - ## - namespace (Ex http://[graylog-server-ip]:12900/system/metrics/namespace/{namespace}) - ## Note if namespace end point specified metrics array will be ignored for that call. - ## End point can contain namespace and multiple type calls - ## Please check http://[graylog-server-ip]:12900/api-browser for full list end points +### Configuration: +```toml +# Read flattened metrics from one or more GrayLog HTTP endpoints +[[inputs.graylog]] + ## API endpoint, currently supported API: + ## + ## - multiple (Ex http://:12900/system/metrics/multiple) + ## - namespace (Ex http://:12900/system/metrics/namespace/{namespace}) + ## + ## For namespace endpoint, the metrics array will be ignored for that call. + ## Endpoint can contain namespace and multiple type calls. + ## + ## Please check http://[graylog-server-ip]:12900/api-browser for full list + ## of endpoints servers = [ - "http://10.224.162.16:12900/system/metrics/multiple" + "http://[graylog-server-ip]:12900/system/metrics/multiple", ] - #Metrics define metric which will be pulled from GrayLog and reported to the defined Output + ## Metrics list + ## List of metrics can be found on Graylog webservice documentation. + ## Or by hitting the the web service api at: + ## http://[graylog-host]:12900/system/metrics metrics = [ "jvm.cl.loaded", "jvm.memory.pools.Metaspace.committed" ] - ## User name and password - username = "put-username-here" - password = "put-password-here" + + ## Username and password + username = "" + password = "" ## Optional SSL Config # ssl_ca = "/etc/telegraf/ca.pem" @@ -43,4 +52,4 @@ Sample configration # insecure_skip_verify = false ``` -Please refer to GrayLog metrics api browser for full metric end points http://10.224.162.16:12900/api-browser +Please refer to GrayLog metrics api browser for full metric end points http://host:12900/api-browser diff --git a/plugins/inputs/graylog/graylog.go b/plugins/inputs/graylog/graylog.go index b9b5ade25..52e2ef42a 100644 --- a/plugins/inputs/graylog/graylog.go +++ b/plugins/inputs/graylog/graylog.go @@ -85,27 +85,32 @@ func (c *RealHTTPClient) HTTPClient() *http.Client { } var sampleConfig = ` - ## API End Point, currently supported API: - ## - multiple (Ex http://[graylog-server-ip]:12900/system/metrics/multiple) - ## - namespace (Ex http://[graylog-server-ip]:12900/system/metrics/namespace/{namespace}) - ## Note if namespace end point specified metrics array will be ignored for that call. - ## End point can contain namespace and multiple type calls - ## Please check http://[graylog-server-ip]:12900/api-browser for full list end points + ## API endpoint, currently supported API: + ## + ## - multiple (Ex http://:12900/system/metrics/multiple) + ## - namespace (Ex http://:12900/system/metrics/namespace/{namespace}) + ## + ## For namespace endpoint, the metrics array will be ignored for that call. + ## Endpoint can contain namespace and multiple type calls. + ## + ## Please check http://[graylog-server-ip]:12900/api-browser for full list + ## of endpoints servers = [ "http://[graylog-server-ip]:12900/system/metrics/multiple", ] - - ## metrics list - ## List of metrics can be found on Graylog webservice documentation - ## Or by hitting the the web service api http://[graylog-host]:12900/system/metrics + + ## Metrics list + ## List of metrics can be found on Graylog webservice documentation. + ## Or by hitting the the web service api at: + ## http://[graylog-host]:12900/system/metrics metrics = [ "jvm.cl.loaded", "jvm.memory.pools.Metaspace.committed" ] - ## User name and password - username = "put-username-here" - password = "put-password-here" + ## Username and password + username = "" + password = "" ## Optional SSL Config # ssl_ca = "/etc/telegraf/ca.pem" From c4841843a9a86f2f7cb515b73ab71fd9728c7f22 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 1 Jun 2016 18:53:29 +0100 Subject: [PATCH 24/34] Create dummy zfs plugin file --- etc/telegraf.conf | 14 ++++++++++++++ plugins/inputs/zfs/zfs_other.go | 18 ++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 plugins/inputs/zfs/zfs_other.go diff --git a/etc/telegraf.conf b/etc/telegraf.conf index b0715a441..176b32f0f 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -1459,6 +1459,20 @@ # stats = ["MAIN.cache_hit", "MAIN.cache_miss", "MAIN.uptime"] +# # Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, and pools +# [[inputs.zfs]] +# ## ZFS kstat path. Ignored on FreeBSD +# ## If not specified, then default is: +# # kstatPath = "/proc/spl/kstat/zfs" +# +# ## By default, telegraf gather all zfs stats +# ## If not specified, then default is: +# # kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"] +# +# ## By default, don't gather zpool stats +# # poolMetrics = false + + # # Reads 'mntr' stats from one or many zookeeper servers # [[inputs.zookeeper]] # ## An array of address to gather stats about. Specify an ip or hostname diff --git a/plugins/inputs/zfs/zfs_other.go b/plugins/inputs/zfs/zfs_other.go new file mode 100644 index 000000000..3f6ad3ef6 --- /dev/null +++ b/plugins/inputs/zfs/zfs_other.go @@ -0,0 +1,18 @@ +// +build !linux !freebsd + +package zfs + +import ( + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +func (z *Zfs) Gather(acc telegraf.Accumulator) error { + return nil +} + +func init() { + inputs.Add("zfs", func() telegraf.Input { + return &Zfs{} + }) +} From 8173338f8a77b7df7a3da1b66a14929db681d09a Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Wed, 1 Jun 2016 18:58:54 +0100 Subject: [PATCH 25/34] fix build flags --- plugins/inputs/zfs/zfs_other.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/inputs/zfs/zfs_other.go b/plugins/inputs/zfs/zfs_other.go index 3f6ad3ef6..98de02be9 100644 --- a/plugins/inputs/zfs/zfs_other.go +++ b/plugins/inputs/zfs/zfs_other.go @@ -1,4 +1,4 @@ -// +build !linux !freebsd +// +build !linux,!freebsd package zfs From 675457873aebe550d151582978ddc63bdc5050d9 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 2 Jun 2016 11:22:07 +0100 Subject: [PATCH 26/34] haproxy input: fix potential race condition --- plugins/inputs/haproxy/haproxy.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/plugins/inputs/haproxy/haproxy.go b/plugins/inputs/haproxy/haproxy.go index d3f3d0696..396e3c934 100644 --- a/plugins/inputs/haproxy/haproxy.go +++ b/plugins/inputs/haproxy/haproxy.go @@ -6,6 +6,7 @@ import ( "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" "io" + "log" "net" "net/http" "net/url" @@ -113,20 +114,18 @@ func (g *haproxy) Gather(acc telegraf.Accumulator) error { } var wg sync.WaitGroup - - var outerr error - - for _, serv := range g.Servers { + for _, server := range g.Servers { wg.Add(1) go func(serv string) { defer wg.Done() - outerr = g.gatherServer(serv, acc) - }(serv) + if err := g.gatherServer(serv, acc); err != nil { + log.Printf("HAProxy error gathering server: %s, %s", serv, err) + } + }(server) } wg.Wait() - - return outerr + return nil } func (g *haproxy) gatherServerSocket(addr string, acc telegraf.Accumulator) error { From 1aabd38eb2cdac1c50037dd2a4db624d69f2dee6 Mon Sep 17 00:00:00 2001 From: Ali Alrahaleh Date: Thu, 2 Jun 2016 05:13:17 -0700 Subject: [PATCH 27/34] Add graylog input pluging change log (#1309) --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f9caaa07..f9c1bb871 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ in conjunction with wildcard dimension values as it will control the amount of time before a new metric is included by the plugin. ### Features +- [#1262](https://github.com/influxdata/telegraf/pull/1261): Add graylog input pluging. - [#1294](https://github.com/influxdata/telegraf/pull/1294): consul input plugin. Thanks @harnash - [#1164](https://github.com/influxdata/telegraf/pull/1164): conntrack input plugin. Thanks @robinpercy! - [#1165](https://github.com/influxdata/telegraf/pull/1165): vmstat input plugin. Thanks @jshim-xm! From 2c448e22e1cde9c9a5b4d0adc22914a1c3d5a54e Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 2 Jun 2016 12:34:03 +0100 Subject: [PATCH 28/34] New object: ErrChan for concurrent err handling --- internal/errchan/errchan.go | 37 +++++++++++++++++++ plugins/inputs/cloudwatch/cloudwatch.go | 20 +++++----- plugins/inputs/elasticsearch/elasticsearch.go | 19 ++-------- plugins/inputs/exec/exec.go | 15 ++------ plugins/inputs/haproxy/haproxy.go | 16 ++++---- plugins/inputs/rabbitmq/rabbitmq.go | 22 +++++------ 6 files changed, 75 insertions(+), 54 deletions(-) create mode 100644 internal/errchan/errchan.go diff --git a/internal/errchan/errchan.go b/internal/errchan/errchan.go new file mode 100644 index 000000000..467a0f4a7 --- /dev/null +++ b/internal/errchan/errchan.go @@ -0,0 +1,37 @@ +package errchan + +import ( + "fmt" + "strings" +) + +type ErrChan struct { + C chan error +} + +// New returns an error channel of max length 'n' +// errors can be sent to the ErrChan.C channel, and will be returned when +// ErrChan.Error() is called. +func New(n int) *ErrChan { + return &ErrChan{ + C: make(chan error, n), + } +} + +// Error closes the ErrChan.C channel and returns an error if there are any +// non-nil errors, otherwise returns nil. +func (e *ErrChan) Error() error { + close(e.C) + + var out string + for err := range e.C { + if err != nil { + out += "[" + err.Error() + "], " + } + } + + if out != "" { + return fmt.Errorf("Errors encountered: " + strings.TrimRight(out, ", ")) + } + return nil +} diff --git a/plugins/inputs/cloudwatch/cloudwatch.go b/plugins/inputs/cloudwatch/cloudwatch.go index 1bd2d5c07..f3019eb4b 100644 --- a/plugins/inputs/cloudwatch/cloudwatch.go +++ b/plugins/inputs/cloudwatch/cloudwatch.go @@ -3,6 +3,7 @@ package cloudwatch import ( "fmt" "strings" + "sync" "time" "github.com/aws/aws-sdk-go/aws" @@ -12,6 +13,7 @@ import ( "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" internalaws "github.com/influxdata/telegraf/internal/config/aws" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/internal/limiter" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -166,7 +168,7 @@ func (c *CloudWatch) Gather(acc telegraf.Accumulator) error { } metricCount := len(metrics) - var errChan = make(chan error, metricCount) + errChan := errchan.New(metricCount) now := time.Now() @@ -175,18 +177,18 @@ func (c *CloudWatch) Gather(acc telegraf.Accumulator) error { // http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_limits.html lmtr := limiter.NewRateLimiter(10, time.Second) defer lmtr.Stop() + var wg sync.WaitGroup + wg.Add(len(metrics)) for _, m := range metrics { <-lmtr.C - go c.gatherMetric(acc, m, now, errChan) + go func(inm *cloudwatch.Metric) { + defer wg.Done() + c.gatherMetric(acc, inm, now, errChan.C) + }(m) } + wg.Wait() - for i := 1; i <= metricCount; i++ { - err := <-errChan - if err != nil { - return err - } - } - return nil + return errChan.Error() } func init() { diff --git a/plugins/inputs/elasticsearch/elasticsearch.go b/plugins/inputs/elasticsearch/elasticsearch.go index 32bd58516..3839f6df6 100644 --- a/plugins/inputs/elasticsearch/elasticsearch.go +++ b/plugins/inputs/elasticsearch/elasticsearch.go @@ -2,14 +2,13 @@ package elasticsearch import ( "encoding/json" - "errors" "fmt" "net/http" - "strings" "sync" "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" jsonparser "github.com/influxdata/telegraf/plugins/parsers/json" ) @@ -102,7 +101,7 @@ func (e *Elasticsearch) Description() string { // Gather reads the stats from Elasticsearch and writes it to the // Accumulator. func (e *Elasticsearch) Gather(acc telegraf.Accumulator) error { - errChan := make(chan error, len(e.Servers)) + errChan := errchan.New(len(e.Servers)) var wg sync.WaitGroup wg.Add(len(e.Servers)) @@ -116,7 +115,7 @@ func (e *Elasticsearch) Gather(acc telegraf.Accumulator) error { url = s + statsPath } if err := e.gatherNodeStats(url, acc); err != nil { - errChan <- err + errChan.C <- err return } if e.ClusterHealth { @@ -126,17 +125,7 @@ func (e *Elasticsearch) Gather(acc telegraf.Accumulator) error { } wg.Wait() - close(errChan) - // Get all errors and return them as one giant error - errStrings := []string{} - for err := range errChan { - errStrings = append(errStrings, err.Error()) - } - - if len(errStrings) == 0 { - return nil - } - return errors.New(strings.Join(errStrings, "\n")) + return errChan.Error() } func (e *Elasticsearch) gatherNodeStats(url string, acc telegraf.Accumulator) error { diff --git a/plugins/inputs/exec/exec.go b/plugins/inputs/exec/exec.go index 1f5f12203..415831960 100644 --- a/plugins/inputs/exec/exec.go +++ b/plugins/inputs/exec/exec.go @@ -14,6 +14,7 @@ import ( "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/plugins/parsers/nagios" @@ -182,23 +183,15 @@ func (e *Exec) Gather(acc telegraf.Accumulator) error { } } - e.errChan = make(chan error, len(commands)) + errChan := errchan.New(len(commands)) + e.errChan = errChan.C e.wg.Add(len(commands)) for _, command := range commands { go e.ProcessCommand(command, acc) } e.wg.Wait() - - select { - default: - close(e.errChan) - return nil - case err := <-e.errChan: - close(e.errChan) - return err - } - + return errChan.Error() } func init() { diff --git a/plugins/inputs/haproxy/haproxy.go b/plugins/inputs/haproxy/haproxy.go index 396e3c934..0a0b3da82 100644 --- a/plugins/inputs/haproxy/haproxy.go +++ b/plugins/inputs/haproxy/haproxy.go @@ -3,10 +3,7 @@ package haproxy import ( "encoding/csv" "fmt" - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/plugins/inputs" "io" - "log" "net" "net/http" "net/url" @@ -14,6 +11,10 @@ import ( "strings" "sync" "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" + "github.com/influxdata/telegraf/plugins/inputs" ) //CSV format: https://cbonte.github.io/haproxy-dconv/configuration-1.5.html#9.1 @@ -114,18 +115,17 @@ func (g *haproxy) Gather(acc telegraf.Accumulator) error { } var wg sync.WaitGroup + errChan := errchan.New(len(g.Servers)) + wg.Add(len(g.Servers)) for _, server := range g.Servers { - wg.Add(1) go func(serv string) { defer wg.Done() - if err := g.gatherServer(serv, acc); err != nil { - log.Printf("HAProxy error gathering server: %s, %s", serv, err) - } + errChan.C <- g.gatherServer(serv, acc) }(server) } wg.Wait() - return nil + return errChan.Error() } func (g *haproxy) gatherServerSocket(addr string, acc telegraf.Accumulator) error { diff --git a/plugins/inputs/rabbitmq/rabbitmq.go b/plugins/inputs/rabbitmq/rabbitmq.go index bf6859002..18d666a08 100644 --- a/plugins/inputs/rabbitmq/rabbitmq.go +++ b/plugins/inputs/rabbitmq/rabbitmq.go @@ -5,9 +5,11 @@ import ( "fmt" "net/http" "strconv" + "sync" "time" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/errchan" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -129,20 +131,18 @@ func (r *RabbitMQ) Gather(acc telegraf.Accumulator) error { } } - var errChan = make(chan error, len(gatherFunctions)) - + var wg sync.WaitGroup + wg.Add(len(gatherFunctions)) + errChan := errchan.New(len(gatherFunctions)) for _, f := range gatherFunctions { - go f(r, acc, errChan) + go func(gf gatherFunc) { + defer wg.Done() + gf(r, acc, errChan.C) + }(f) } + wg.Wait() - for i := 1; i <= len(gatherFunctions); i++ { - err := <-errChan - if err != nil { - return err - } - } - - return nil + return errChan.Error() } func (r *RabbitMQ) requestJSON(u string, target interface{}) error { From 80368e3936b1670c9244c0c8856f03cb8f277800 Mon Sep 17 00:00:00 2001 From: Meng Ye Date: Thu, 2 Jun 2016 21:24:48 +0800 Subject: [PATCH 29/34] fix used_percent Calculation formula (#1313) --- plugins/inputs/system/MEM_README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/inputs/system/MEM_README.md b/plugins/inputs/system/MEM_README.md index 9b766e700..16a0f7996 100644 --- a/plugins/inputs/system/MEM_README.md +++ b/plugins/inputs/system/MEM_README.md @@ -18,7 +18,7 @@ It is supposed to be used to monitor actual memory usage in a cross platform fas designed for informational purposes only. - **free**: memory not being used at all (zeroed) that is readily available; note that this doesn't reflect the actual memory available (use 'available' instead). -- **used_percent**: the percentage usage calculated as `(total - used) / total * 100` +- **used_percent**: the percentage usage calculated as `used / total * 100` ## Measurements: #### Raw Memory measurements: From 3ff184c0617ded069e57c535dba1dfdbf31753fb Mon Sep 17 00:00:00 2001 From: Pierre Fersing Date: Thu, 2 Jun 2016 15:25:23 +0200 Subject: [PATCH 30/34] Removed leaked "database" tag on redis metrics (#1316) --- CHANGELOG.md | 1 + plugins/inputs/redis/redis.go | 6 +++++- plugins/inputs/redis/redis_test.go | 4 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9c1bb871..7148e0683 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ time before a new metric is included by the plugin. - [#1268](https://github.com/influxdata/telegraf/pull/1268): Fix potential influxdb input type assertion panic. - [#1283](https://github.com/influxdata/telegraf/pull/1283): Still send processes metrics if a process exited during metric collection. - [#1297](https://github.com/influxdata/telegraf/issues/1297): disk plugin panic when usage grab fails. +- [#1316](https://github.com/influxdata/telegraf/pull/1316): Removed leaked "database" tag on redis metrics. Thanks @PierreF! ## v0.13.1 [2016-05-24] diff --git a/plugins/inputs/redis/redis.go b/plugins/inputs/redis/redis.go index dee564c73..94f562471 100644 --- a/plugins/inputs/redis/redis.go +++ b/plugins/inputs/redis/redis.go @@ -241,10 +241,14 @@ func gatherKeyspaceLine( name string, line string, acc telegraf.Accumulator, - tags map[string]string, + global_tags map[string]string, ) { if strings.Contains(line, "keys=") { fields := make(map[string]interface{}) + tags := make(map[string]string) + for k, v := range global_tags { + tags[k] = v + } tags["database"] = name dbparts := strings.Split(line, ",") for _, dbp := range dbparts { diff --git a/plugins/inputs/redis/redis_test.go b/plugins/inputs/redis/redis_test.go index 612595cdb..b12950ee4 100644 --- a/plugins/inputs/redis/redis_test.go +++ b/plugins/inputs/redis/redis_test.go @@ -35,6 +35,7 @@ func TestRedis_ParseMetrics(t *testing.T) { err := gatherInfoOutput(rdr, &acc, tags) require.NoError(t, err) + tags = map[string]string{"host": "redis.net", "role": "master"} fields := map[string]interface{}{ "uptime": uint64(238), "clients": uint64(1), @@ -70,13 +71,14 @@ func TestRedis_ParseMetrics(t *testing.T) { "used_cpu_user_children": float64(0.00), "keyspace_hitrate": float64(0.50), } + keyspaceTags := map[string]string{"host": "redis.net", "role": "master", "database": "db0"} keyspaceFields := map[string]interface{}{ "avg_ttl": uint64(0), "expires": uint64(0), "keys": uint64(2), } acc.AssertContainsTaggedFields(t, "redis", fields, tags) - acc.AssertContainsTaggedFields(t, "redis_keyspace", keyspaceFields, tags) + acc.AssertContainsTaggedFields(t, "redis_keyspace", keyspaceFields, keyspaceTags) } const testOutput = `# Server From 94f952787f25328073ebdfe3ba0e796bda2f492c Mon Sep 17 00:00:00 2001 From: Ross McDonald Date: Thu, 2 Jun 2016 10:14:18 -0500 Subject: [PATCH 31/34] Add statically-linked amd64 builds to default build targets. Remove version and iteration from root packaging folder. (#1318) closes #1201 --- scripts/build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/build.py b/scripts/build.py index 8c54b68a2..426aa87bb 100755 --- a/scripts/build.py +++ b/scripts/build.py @@ -85,7 +85,7 @@ targets = { supported_builds = { "darwin": [ "amd64" ], "windows": [ "amd64" ], - "linux": [ "amd64", "i386", "armhf", "armel", "arm64" ], + "linux": [ "amd64", "i386", "armhf", "armel", "arm64", "static_amd64" ], "freebsd": [ "amd64" ] } @@ -553,7 +553,7 @@ def package(build_output, pkg_name, version, nightly=False, iteration=1, static= build_root = os.path.join(tmp_build_dir, platform, arch, - '{}-{}-{}'.format(PACKAGE_NAME, version, iteration)) + PACKAGE_NAME) os.makedirs(build_root) # Copy packaging scripts to build directory From a362352587d864995ee584247ee7be2d05ed2d7d Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 3 Jun 2016 12:22:08 +0100 Subject: [PATCH 32/34] Use glob match for finding /proc//stat files closes #1323 --- CHANGELOG.md | 3 ++- plugins/inputs/system/processes.go | 35 +++++++++++------------------- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7148e0683..39a356bb5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ in conjunction with wildcard dimension values as it will control the amount of time before a new metric is included by the plugin. ### Features -- [#1262](https://github.com/influxdata/telegraf/pull/1261): Add graylog input pluging. +- [#1262](https://github.com/influxdata/telegraf/pull/1261): Add graylog input pluging. - [#1294](https://github.com/influxdata/telegraf/pull/1294): consul input plugin. Thanks @harnash - [#1164](https://github.com/influxdata/telegraf/pull/1164): conntrack input plugin. Thanks @robinpercy! - [#1165](https://github.com/influxdata/telegraf/pull/1165): vmstat input plugin. Thanks @jshim-xm! @@ -48,6 +48,7 @@ time before a new metric is included by the plugin. - [#1283](https://github.com/influxdata/telegraf/pull/1283): Still send processes metrics if a process exited during metric collection. - [#1297](https://github.com/influxdata/telegraf/issues/1297): disk plugin panic when usage grab fails. - [#1316](https://github.com/influxdata/telegraf/pull/1316): Removed leaked "database" tag on redis metrics. Thanks @PierreF! +- [#1323](https://github.com/influxdata/telegraf/issues/1323): Processes plugin: fix potential error with /proc/net/stat directory. ## v0.13.1 [2016-05-24] diff --git a/plugins/inputs/system/processes.go b/plugins/inputs/system/processes.go index c2cd8525b..c16f7a480 100644 --- a/plugins/inputs/system/processes.go +++ b/plugins/inputs/system/processes.go @@ -9,7 +9,7 @@ import ( "log" "os" "os/exec" - "path" + "path/filepath" "runtime" "strconv" @@ -19,7 +19,7 @@ import ( type Processes struct { execPS func() ([]byte, error) - readProcFile func(statFile string) ([]byte, error) + readProcFile func(filename string) ([]byte, error) forcePS bool forceProc bool @@ -128,22 +128,16 @@ func (p *Processes) gatherFromPS(fields map[string]interface{}) error { // get process states from /proc/(pid)/stat files func (p *Processes) gatherFromProc(fields map[string]interface{}) error { - files, err := ioutil.ReadDir("/proc") + filenames, err := filepath.Glob("/proc/[0-9]*/stat") if err != nil { return err } - for _, file := range files { - if !file.IsDir() { - continue - } + for _, filename := range filenames { + _, err := os.Stat(filename) - statFile := path.Join("/proc", file.Name(), "stat") - data, err := p.readProcFile(statFile) + data, err := p.readProcFile(filename) if err != nil { - if !file.IsDir() { - continue - } return err } if data == nil { @@ -159,7 +153,7 @@ func (p *Processes) gatherFromProc(fields map[string]interface{}) error { stats := bytes.Fields(data) if len(stats) < 3 { - return fmt.Errorf("Something is terribly wrong with %s", statFile) + return fmt.Errorf("Something is terribly wrong with %s", filename) } switch stats[0][0] { case 'R': @@ -176,7 +170,7 @@ func (p *Processes) gatherFromProc(fields map[string]interface{}) error { fields["paging"] = fields["paging"].(int64) + int64(1) default: log.Printf("processes: Unknown state [ %s ] in file %s", - string(stats[0][0]), statFile) + string(stats[0][0]), filename) } fields["total"] = fields["total"].(int64) + int64(1) @@ -190,15 +184,12 @@ func (p *Processes) gatherFromProc(fields map[string]interface{}) error { return nil } -func readProcFile(statFile string) ([]byte, error) { - if _, err := os.Stat(statFile); os.IsNotExist(err) { - return nil, nil - } else if err != nil { - return nil, err - } - - data, err := ioutil.ReadFile(statFile) +func readProcFile(filename string) ([]byte, error) { + data, err := ioutil.ReadFile(filename) if err != nil { + if os.IsNotExist(err) { + return nil, nil + } return nil, err } From 821b30eb923b19b181c3120d80213b387b3b8c5a Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 3 Jun 2016 13:32:16 +0100 Subject: [PATCH 33/34] Add timeout param to exec readme (#1325) --- plugins/inputs/exec/README.md | 36 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/plugins/inputs/exec/README.md b/plugins/inputs/exec/README.md index 14acf0957..0e2563900 100644 --- a/plugins/inputs/exec/README.md +++ b/plugins/inputs/exec/README.md @@ -11,39 +11,22 @@ all scripts matching glob pattern ```/tmp/collect_*.sh``` are configured for ``` in JSON format. Glob patterns are matched on every run, so adding new scripts that match the pattern will cause them to be picked up immediately. -``` +```toml # Read flattened metrics from one or more commands that output JSON to stdout [[inputs.exec]] # Shell/commands array # Full command line to executable with parameters, or a glob pattern to run all matching files. commands = ["/tmp/test.sh", "/tmp/test2.sh", "/tmp/collect_*.sh"] + ## Timeout for each command to complete. + timeout = "5s" + # Data format to consume. # NOTE json only reads numerical measurements, strings and booleans are ignored. data_format = "json" # measurement name suffix (for separating different commands) name_suffix = "_mycollector" - - ## Below configuration will be used for data_format = "graphite", can be ignored for other data_format - ## If matching multiple measurement files, this string will be used to join the matched values. - #separator = "." - - ## Each template line requires a template pattern. It can have an optional - ## filter before the template and separated by spaces. It can also have optional extra - ## tags following the template. Multiple tags should be separated by commas and no spaces - ## similar to the line protocol format. The can be only one default template. - ## Templates support below format: - ## 1. filter + template - ## 2. filter + template + extra tag - ## 3. filter + template with field key - ## 4. default template - #templates = [ - # "*.app env.service.resource.measurement", - # "stats.* .host.measurement* region=us-west,agent=sensu", - # "stats2.* .host.measurement.field", - # "measurement*" - #] ``` Other options for modifying the measurement names are: @@ -82,7 +65,7 @@ in influx line-protocol format. #### Configuration -``` +```toml [[inputs.exec]] # Shell/commands array # compatible with old version @@ -90,6 +73,9 @@ in influx line-protocol format. # command = "/usr/bin/line_protocol_collector" commands = ["/usr/bin/line_protocol_collector","/tmp/test2.sh"] + ## Timeout for each command to complete. + timeout = "5s" + # Data format to consume. # NOTE json only reads numerical measurements, strings and booleans are ignored. data_format = "influx" @@ -123,12 +109,16 @@ We can also change the data_format to "graphite" to use the metrics collecting s In this example a script called /tmp/test.sh and a script called /tmp/test2.sh are configured for [[inputs.exec]] in graphite format. #### Configuration -``` + +```toml # Read flattened metrics from one or more commands that output JSON to stdout [[inputs.exec]] # Shell/commands array commands = ["/tmp/test.sh","/tmp/test2.sh"] + ## Timeout for each command to complete. + timeout = "5s" + # Data format to consume. # NOTE json only reads numerical measurements, strings and booleans are ignored. data_format = "graphite" From 8c3d7cd1458fbe5c56de626ad228aee8824f7945 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 3 Jun 2016 14:28:47 +0100 Subject: [PATCH 34/34] Fix rare panic in RHEL 5.2 diskio plugin (#1327) closes #1322 --- CHANGELOG.md | 1 + Godeps | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39a356bb5..9e6fc7ac8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ time before a new metric is included by the plugin. - [#1297](https://github.com/influxdata/telegraf/issues/1297): disk plugin panic when usage grab fails. - [#1316](https://github.com/influxdata/telegraf/pull/1316): Removed leaked "database" tag on redis metrics. Thanks @PierreF! - [#1323](https://github.com/influxdata/telegraf/issues/1323): Processes plugin: fix potential error with /proc/net/stat directory. +- [#1322](https://github.com/influxdata/telegraf/issues/1322): Fix rare RHEL 5.2 panic in gopsutil diskio gathering function. ## v0.13.1 [2016-05-24] diff --git a/Godeps b/Godeps index 05bd7ef16..2ac95a904 100644 --- a/Godeps +++ b/Godeps @@ -43,7 +43,7 @@ github.com/prometheus/client_model fa8ad6fec33561be4280a8f0514318c79d7f6cb6 github.com/prometheus/common e8eabff8812b05acf522b45fdcd725a785188e37 github.com/prometheus/procfs 406e5b7bfd8201a36e2bb5f7bdae0b03380c2ce8 github.com/samuel/go-zookeeper 218e9c81c0dd8b3b18172b2bbfad92cc7d6db55f -github.com/shirou/gopsutil 83c6e72cbdef6e8ada934549abf700ff0ba96776 +github.com/shirou/gopsutil 586bb697f3ec9f8ec08ffefe18f521a64534037c github.com/soniah/gosnmp b1b4f885b12c5dcbd021c5cee1c904110de6db7d github.com/streadway/amqp b4f3ceab0337f013208d31348b578d83c0064744 github.com/stretchr/testify 1f4a1643a57e798696635ea4c126e9127adb7d3c