Add a "kernel" plugin for /proc/stat statistics

see #235
This commit is contained in:
Cameron Sparr 2016-03-10 13:40:03 +01:00
parent ecbbb8426f
commit db8c24cc7b
6 changed files with 345 additions and 1 deletions

View File

@ -1,4 +1,4 @@
## v0.10.5 [unreleased]
## v0.11.0 [unreleased]
### Release Notes
@ -16,6 +16,7 @@
- [#776](https://github.com/influxdata/telegraf/pull/776): Add Zookeeper chroot option to kafka_consumer. Thanks @prune998!
- [#811](https://github.com/influxdata/telegraf/pull/811): Add processes plugin for classifying total procs on system. Thanks @titilambert!
- [#235](https://github.com/influxdata/telegraf/issues/235): Add number of users to the `system` input plugin.
- [#826](https://github.com/influxdata/telegraf/pull/826): "kernel" linux plugin for /proc/stat metrics (context switches, interrupts, etc.)
### Bugfixes
- [#748](https://github.com/influxdata/telegraf/issues/748): Fix sensor plugin split on ":"

View File

@ -215,6 +215,7 @@ Currently implemented sources:
* diskio
* swap
* processes
* kernel (/proc/stat)
Telegraf can also collect metrics via the following service plugins:

View File

@ -119,6 +119,10 @@
# Uncomment the following line if you do not need disk serial numbers.
# skip_serial_number = true
# Get kernel statistics from /proc/stat
[[inputs.kernel]]
# no configuration
# Read metrics about memory usage
[[inputs.mem]]
# no configuration

View File

@ -0,0 +1,64 @@
# Kernel Input Plugin
This plugin is only available on Linux.
The kernel plugin gathers info about the kernel that doesn't fit into other
plugins. In general, it is the statistics available in `/proc/stat` that are
not covered by other plugins.
The metrics are documented in `man proc` under the `/proc/stat` section.
```
/proc/stat
kernel/system statistics. Varies with architecture. Common entries include:
page 5741 1808
The number of pages the system paged in and the number that were paged out (from disk).
swap 1 0
The number of swap pages that have been brought in and out.
intr 1462898
This line shows counts of interrupts serviced since boot time, for each of
the possible system interrupts. The first column is the total of all
interrupts serviced; each subsequent column is the total for a particular interrupt.
ctxt 115315
The number of context switches that the system underwent.
btime 769041601
boot time, in seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC).
processes 86031
Number of forks since boot.
```
### Configuration:
```toml
# Get kernel statistics from /proc/stat
[[inputs.kernel]]
# no configuration
```
### Measurements & Fields:
- kernel
- boot_time (integer, seconds since epoch, `btime`)
- context_switches (integer, `ctxt`)
- disk_pages_in (integer, `page (0)`)
- disk_pages_out (integer, `page (1)`)
- interrupts (integer, `intr`)
- processes_forked (integer, `processes`)
### Tags:
None
### Example Output:
```
$ telegraf -config ~/ws/telegraf.conf -input-filter kernel -test
* Plugin: kernel, Collection 1
> kernel boot_time=1457505775i,context_switches=2626618i,disk_pages_in=5741i,disk_pages_out=1808i,interrupts=1472736i,processes_forked=10673i 1457613402960879816
```

View File

@ -0,0 +1,110 @@
// +build linux
package system
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"strconv"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
// /proc/stat file line prefixes to gather stats on:
var (
interrupts = []byte("intr")
context_switches = []byte("ctxt")
processes_forked = []byte("processes")
disk_pages = []byte("page")
boot_time = []byte("btime")
)
type Kernel struct {
statFile string
}
func (k *Kernel) Description() string {
return "Get kernel statistics from /proc/stat"
}
func (k *Kernel) SampleConfig() string { return "" }
func (k *Kernel) Gather(acc telegraf.Accumulator) error {
data, err := k.getProcStat()
if err != nil {
return err
}
fields := make(map[string]interface{})
dataFields := bytes.Fields(data)
for i, field := range dataFields {
switch {
case bytes.Equal(field, interrupts):
m, err := strconv.Atoi(string(dataFields[i+1]))
if err != nil {
return err
}
fields["interrupts"] = int64(m)
case bytes.Equal(field, context_switches):
m, err := strconv.Atoi(string(dataFields[i+1]))
if err != nil {
return err
}
fields["context_switches"] = int64(m)
case bytes.Equal(field, processes_forked):
m, err := strconv.Atoi(string(dataFields[i+1]))
if err != nil {
return err
}
fields["processes_forked"] = int64(m)
case bytes.Equal(field, boot_time):
m, err := strconv.Atoi(string(dataFields[i+1]))
if err != nil {
return err
}
fields["boot_time"] = int64(m)
case bytes.Equal(field, disk_pages):
in, err := strconv.Atoi(string(dataFields[i+1]))
if err != nil {
return err
}
out, err := strconv.Atoi(string(dataFields[i+2]))
if err != nil {
return err
}
fields["disk_pages_in"] = int64(in)
fields["disk_pages_out"] = int64(out)
}
}
acc.AddFields("kernel", fields, map[string]string{})
return nil
}
func (k *Kernel) getProcStat() ([]byte, error) {
if _, err := os.Stat(k.statFile); os.IsNotExist(err) {
return nil, fmt.Errorf("kernel: %s does not exist!", k.statFile)
} else if err != nil {
return nil, err
}
data, err := ioutil.ReadFile(k.statFile)
if err != nil {
return nil, err
}
return data, nil
}
func init() {
inputs.Add("kernel", func() telegraf.Input {
return &Kernel{
statFile: "/proc/stat",
}
})
}

View File

@ -0,0 +1,164 @@
// +build linux
package system
import (
"io/ioutil"
"os"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
func TestFullProcFile(t *testing.T) {
tmpfile := makeFakeStatFile([]byte(statFile_Full))
defer os.Remove(tmpfile)
k := Kernel{
statFile: tmpfile,
}
acc := testutil.Accumulator{}
err := k.Gather(&acc)
assert.NoError(t, err)
fields := map[string]interface{}{
"boot_time": int64(1457505775),
"context_switches": int64(2626618),
"disk_pages_in": int64(5741),
"disk_pages_out": int64(1808),
"interrupts": int64(1472736),
"processes_forked": int64(10673),
}
acc.AssertContainsFields(t, "kernel", fields)
}
func TestPartialProcFile(t *testing.T) {
tmpfile := makeFakeStatFile([]byte(statFile_Partial))
defer os.Remove(tmpfile)
k := Kernel{
statFile: tmpfile,
}
acc := testutil.Accumulator{}
err := k.Gather(&acc)
assert.NoError(t, err)
fields := map[string]interface{}{
"boot_time": int64(1457505775),
"context_switches": int64(2626618),
"disk_pages_in": int64(5741),
"disk_pages_out": int64(1808),
"interrupts": int64(1472736),
}
acc.AssertContainsFields(t, "kernel", fields)
}
func TestInvalidProcFile1(t *testing.T) {
tmpfile := makeFakeStatFile([]byte(statFile_Invalid))
defer os.Remove(tmpfile)
k := Kernel{
statFile: tmpfile,
}
acc := testutil.Accumulator{}
err := k.Gather(&acc)
assert.Error(t, err)
}
func TestInvalidProcFile2(t *testing.T) {
tmpfile := makeFakeStatFile([]byte(statFile_Invalid2))
defer os.Remove(tmpfile)
k := Kernel{
statFile: tmpfile,
}
acc := testutil.Accumulator{}
err := k.Gather(&acc)
assert.Error(t, err)
}
func TestNoProcFile(t *testing.T) {
tmpfile := makeFakeStatFile([]byte(statFile_Invalid2))
os.Remove(tmpfile)
k := Kernel{
statFile: tmpfile,
}
acc := testutil.Accumulator{}
err := k.Gather(&acc)
assert.Error(t, err)
assert.Contains(t, err.Error(), "does not exist")
}
const statFile_Full = `cpu 6796 252 5655 10444977 175 0 101 0 0 0
cpu0 6796 252 5655 10444977 175 0 101 0 0 0
intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ctxt 2626618
btime 1457505775
processes 10673
procs_running 2
procs_blocked 0
softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545
page 5741 1808
swap 1 0
`
const statFile_Partial = `cpu 6796 252 5655 10444977 175 0 101 0 0 0
cpu0 6796 252 5655 10444977 175 0 101 0 0 0
intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ctxt 2626618
btime 1457505775
procs_running 2
procs_blocked 0
softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545
page 5741 1808
`
// missing btime measurement
const statFile_Invalid = `cpu 6796 252 5655 10444977 175 0 101 0 0 0
cpu0 6796 252 5655 10444977 175 0 101 0 0 0
intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ctxt 2626618
btime
processes 10673
procs_running 2
procs_blocked 0
softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545
page 5741 1808
swap 1 0
`
// missing second page measurement
const statFile_Invalid2 = `cpu 6796 252 5655 10444977 175 0 101 0 0 0
cpu0 6796 252 5655 10444977 175 0 101 0 0 0
intr 1472736 57 10 0 0 0 0 0 0 0 0 0 0 156 0 0 0 0 0 0 111551 42541 12356 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ctxt 2626618
processes 10673
procs_running 2
page 5741
procs_blocked 0
softirq 1031662 0 649485 20946 111071 11620 0 1 0 994 237545
`
func makeFakeStatFile(content []byte) string {
tmpfile, err := ioutil.TempFile("", "kerneltest")
if err != nil {
panic(err)
}
if _, err := tmpfile.Write(content); err != nil {
panic(err)
}
if err := tmpfile.Close(); err != nil {
panic(err)
}
return tmpfile.Name()
}