From 3923c0297910e0904ebb3bcb8caa55623f69d6c1 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Thu, 10 Mar 2016 13:40:03 +0100 Subject: [PATCH] Add a "kernel" plugin for /proc/stat statistics see #235 --- CHANGELOG.md | 3 +- README.md | 1 + etc/telegraf.conf | 4 + plugins/inputs/system/KERNEL_README.md | 64 ++++++++++ plugins/inputs/system/kernel.go | 110 +++++++++++++++++ plugins/inputs/system/kernel_test.go | 164 +++++++++++++++++++++++++ 6 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 plugins/inputs/system/KERNEL_README.md create mode 100644 plugins/inputs/system/kernel.go create mode 100644 plugins/inputs/system/kernel_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 8392d62b3..4f7d245b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## v0.10.5 [unreleased] +## v0.11.0 [unreleased] ### Release Notes @@ -16,6 +16,7 @@ - [#776](https://github.com/influxdata/telegraf/pull/776): Add Zookeeper chroot option to kafka_consumer. Thanks @prune998! - [#811](https://github.com/influxdata/telegraf/pull/811): Add processes plugin for classifying total procs on system. Thanks @titilambert! - [#235](https://github.com/influxdata/telegraf/issues/235): Add number of users to the `system` input plugin. +- [#826](https://github.com/influxdata/telegraf/pull/826): "kernel" linux plugin for /proc/stat metrics (context switches, interrupts, etc.) ### Bugfixes - [#748](https://github.com/influxdata/telegraf/issues/748): Fix sensor plugin split on ":" diff --git a/README.md b/README.md index fb9363100..8f9b0bc33 100644 --- a/README.md +++ b/README.md @@ -215,6 +215,7 @@ Currently implemented sources: * diskio * swap * processes + * kernel (/proc/stat) Telegraf can also collect metrics via the following service plugins: diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 3deb7f895..0e740f5c8 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -119,6 +119,10 @@ # Uncomment the following line if you do not need disk serial numbers. # skip_serial_number = true +# Get kernel statistics from /proc/stat +[[inputs.kernel]] + # no configuration + # Read metrics about memory usage [[inputs.mem]] # no configuration diff --git a/plugins/inputs/system/KERNEL_README.md b/plugins/inputs/system/KERNEL_README.md new file mode 100644 index 000000000..3285e59ef --- /dev/null +++ b/plugins/inputs/system/KERNEL_README.md @@ -0,0 +1,64 @@ +# Kernel Input Plugin + +This plugin is only available on Linux. + +The kernel plugin gathers info about the kernel that doesn't fit into other +plugins. In general, it is the statistics available in `/proc/stat` that are +not covered by other plugins. + +The metrics are documented in `man proc` under the `/proc/stat` section. + +``` +/proc/stat +kernel/system statistics. Varies with architecture. Common entries include: + +page 5741 1808 +The number of pages the system paged in and the number that were paged out (from disk). + +swap 1 0 +The number of swap pages that have been brought in and out. + +intr 1462898 +This line shows counts of interrupts serviced since boot time, for each of +the possible system interrupts. The first column is the total of all +interrupts serviced; each subsequent column is the total for a particular interrupt. + +ctxt 115315 +The number of context switches that the system underwent. + +btime 769041601 +boot time, in seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC). + +processes 86031 +Number of forks since boot. +``` + +### Configuration: + +```toml +# Get kernel statistics from /proc/stat +[[inputs.kernel]] + # no configuration +``` + +### Measurements & Fields: + +- kernel + - boot_time (integer, seconds since epoch, `btime`) + - context_switches (integer, `ctxt`) + - disk_pages_in (integer, `page (0)`) + - disk_pages_out (integer, `page (1)`) + - interrupts (integer, `intr`) + - processes_forked (integer, `processes`) + +### Tags: + +None + +### Example Output: + +``` +$ telegraf -config ~/ws/telegraf.conf -input-filter kernel -test +* Plugin: kernel, Collection 1 +> kernel boot_time=1457505775i,context_switches=2626618i,disk_pages_in=5741i,disk_pages_out=1808i,interrupts=1472736i,processes_forked=10673i 1457613402960879816 +``` diff --git a/plugins/inputs/system/kernel.go b/plugins/inputs/system/kernel.go new file mode 100644 index 000000000..900400146 --- /dev/null +++ b/plugins/inputs/system/kernel.go @@ -0,0 +1,110 @@ +// +build linux + +package system + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "strconv" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +// /proc/stat file line prefixes to gather stats on: +var ( + interrupts = []byte("intr") + context_switches = []byte("ctxt") + processes_forked = []byte("processes") + disk_pages = []byte("page") + boot_time = []byte("btime") +) + +type Kernel struct { + statFile string +} + +func (k *Kernel) Description() string { + return "Get kernel statistics from /proc/stat" +} + +func (k *Kernel) SampleConfig() string { return "" } + +func (k *Kernel) Gather(acc telegraf.Accumulator) error { + data, err := k.getProcStat() + if err != nil { + return err + } + + fields := make(map[string]interface{}) + + dataFields := bytes.Fields(data) + for i, field := range dataFields { + switch { + case bytes.Equal(field, interrupts): + m, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + fields["interrupts"] = int64(m) + case bytes.Equal(field, context_switches): + m, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + fields["context_switches"] = int64(m) + case bytes.Equal(field, processes_forked): + m, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + fields["processes_forked"] = int64(m) + case bytes.Equal(field, boot_time): + m, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + fields["boot_time"] = int64(m) + case bytes.Equal(field, disk_pages): + in, err := strconv.Atoi(string(dataFields[i+1])) + if err != nil { + return err + } + out, err := strconv.Atoi(string(dataFields[i+2])) + if err != nil { + return err + } + fields["disk_pages_in"] = int64(in) + fields["disk_pages_out"] = int64(out) + } + } + + acc.AddFields("kernel", fields, map[string]string{}) + + return nil +} + +func (k *Kernel) getProcStat() ([]byte, error) { + if _, err := os.Stat(k.statFile); os.IsNotExist(err) { + return nil, fmt.Errorf("kernel: %s does not exist!", k.statFile) + } else if err != nil { + return nil, err + } + + data, err := ioutil.ReadFile(k.statFile) + if err != nil { + return nil, err + } + + return data, nil +} + +func init() { + inputs.Add("kernel", func() telegraf.Input { + return &Kernel{ + statFile: "/proc/stat", + } + }) +} diff --git a/plugins/inputs/system/kernel_test.go b/plugins/inputs/system/kernel_test.go new file mode 100644 index 000000000..398cba4cc --- /dev/null +++ b/plugins/inputs/system/kernel_test.go @@ -0,0 +1,164 @@ +// +build linux + +package system + +import ( + "io/ioutil" + "os" + "testing" + + "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" +) + +func TestFullProcFile(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(statFile_Full)) + defer os.Remove(tmpfile) + + k := Kernel{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.NoError(t, err) + + fields := map[string]interface{}{ + "boot_time": int64(1457505775), + "context_switches": int64(2626618), + "disk_pages_in": int64(5741), + "disk_pages_out": int64(1808), + "interrupts": int64(1472736), + "processes_forked": int64(10673), + } + acc.AssertContainsFields(t, "kernel", fields) +} + +func TestPartialProcFile(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(statFile_Partial)) + defer os.Remove(tmpfile) + + k := Kernel{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.NoError(t, err) + + fields := map[string]interface{}{ + "boot_time": int64(1457505775), + "context_switches": int64(2626618), + "disk_pages_in": int64(5741), + "disk_pages_out": int64(1808), + "interrupts": int64(1472736), + } + acc.AssertContainsFields(t, "kernel", fields) +} + +func TestInvalidProcFile1(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(statFile_Invalid)) + defer os.Remove(tmpfile) + + k := Kernel{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.Error(t, err) +} + +func TestInvalidProcFile2(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(statFile_Invalid2)) + defer os.Remove(tmpfile) + + k := Kernel{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.Error(t, err) +} + +func TestNoProcFile(t *testing.T) { + tmpfile := makeFakeStatFile([]byte(statFile_Invalid2)) + os.Remove(tmpfile) + + k := Kernel{ + statFile: tmpfile, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.Error(t, err) + assert.Contains(t, err.Error(), "does not exist") +} + +const statFile_Full = `cpu 6796 252 5655 10444977 175 0 101 0 0 0 +cpu0 6796 252 5655 10444977 175 0 101 0 0 0 +intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 2626618 +btime 1457505775 +processes 10673 +procs_running 2 +procs_blocked 0 +softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545 +page 5741 1808 +swap 1 0 +` + +const statFile_Partial = `cpu 6796 252 5655 10444977 175 0 101 0 0 0 +cpu0 6796 252 5655 10444977 175 0 101 0 0 0 +intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 2626618 +btime 1457505775 +procs_running 2 +procs_blocked 0 +softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545 +page 5741 1808 +` + +// missing btime measurement +const statFile_Invalid = `cpu 6796 252 5655 10444977 175 0 101 0 0 0 +cpu0 6796 252 5655 10444977 175 0 101 0 0 0 +intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 2626618 +btime +processes 10673 +procs_running 2 +procs_blocked 0 +softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545 +page 5741 1808 +swap 1 0 +` + +// missing second page measurement +const statFile_Invalid2 = `cpu 6796 252 5655 10444977 175 0 101 0 0 0 +cpu0 6796 252 5655 10444977 175 0 101 0 0 0 +intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 2626618 +processes 10673 +procs_running 2 +page 5741 +procs_blocked 0 +softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545 +` + +func makeFakeStatFile(content []byte) string { + tmpfile, err := ioutil.TempFile("", "kerneltest") + if err != nil { + panic(err) + } + + if _, err := tmpfile.Write(content); err != nil { + panic(err) + } + if err := tmpfile.Close(); err != nil { + panic(err) + } + + return tmpfile.Name() +}