Moved system package inputs out to top level (#4406)

This commit is contained in:
Steve Domino
2018-07-11 17:43:49 -06:00
committed by Daniel Nelson
parent 9a14d1f074
commit 7b73b0db3a
42 changed files with 126 additions and 89 deletions

View File

@@ -0,0 +1,129 @@
# DiskIO Input Plugin
The diskio input plugin gathers metrics about disk traffic and timing.
### Configuration:
```toml
# Read metrics about disk IO by device
[[inputs.diskio]]
## By default, telegraf will gather stats for all devices including
## disk partitions.
## Setting devices will restrict the stats to the specified devices.
# devices = ["sda", "sdb"]
## Uncomment the following line if you need disk serial numbers.
# skip_serial_number = false
#
## On systems which support it, device metadata can be added in the form of
## tags.
## Currently only Linux is supported via udev properties. You can view
## available properties for a device by running:
## 'udevadm info -q property -n /dev/sda'
# device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"]
#
## Using the same metadata source as device_tags, you can also customize the
## name of the device via templates.
## The 'name_templates' parameter is a list of templates to try and apply to
## the device. The template may contain variables in the form of '$PROPERTY' or
## '${PROPERTY}'. The first template which does not contain any variables not
## present for the device is used as the device name tag.
## The typical use case is for LVM volumes, to get the VG/LV name instead of
## the near-meaningless DM-0 name.
# name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"]
```
#### Docker container
To monitor the Docker engine host from within a container you will need to
mount the host's filesystem into the container and set the `HOST_PROC`
environment variable to the location of the `/proc` filesystem. Additionally,
it is required to use privileged mode to provide access to `/dev`.
If you are using the `device_tags` or `name_templates` options, you will need
to bind mount `/run/udev` into the container.
```
docker run --privileged -v /:/hostfs:ro -v /run/udev:/run/udev:ro -e HOST_PROC=/hostfs/proc telegraf
```
### Metrics:
- diskio
- tags:
- name (device name)
- serial (device serial number)
- fields:
- reads (integer, counter)
- writes (integer, counter)
- read_bytes (integer, counter, bytes)
- write_bytes (integer, counter, bytes)
- read_time (integer, counter, milliseconds)
- write_time (integer, counter, milliseconds)
- io_time (integer, counter, milliseconds)
- weighted_io_time (integer, counter, milliseconds)
- iops_in_progress (integer, gauge)
On linux these values correspond to the values in
[`/proc/diskstats`](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats)
and
[`/sys/block/<dev>/stat`](https://www.kernel.org/doc/Documentation/block/stat.txt).
#### `reads` & `writes`:
These values increment when an I/O request completes.
#### `read_bytes` & `write_bytes`:
These values count the number of bytes read from or written to this
block device.
#### `read_time` & `write_time`:
These values count the number of milliseconds that I/O requests have
waited on this block device. If there are multiple I/O requests waiting,
these values will increase at a rate greater than 1000/second; for
example, if 60 read requests wait for an average of 30 ms, the read_time
field will increase by 60*30 = 1800.
#### `io_time`:
This value counts the number of milliseconds during which the device has
had I/O requests queued.
#### `weighted_io_time`:
This value counts the number of milliseconds that I/O requests have waited
on this block device. If there are multiple I/O requests waiting, this
value will increase as the product of the number of milliseconds times the
number of requests waiting (see `read_time` above for an example).
#### `iops_in_progress`:
This value counts the number of I/O requests that have been issued to
the device driver but have not yet completed. It does not include I/O
requests that are in the queue but not yet issued to the device driver.
### Sample Queries:
#### Calculate percent IO utilization per disk and host:
```
SELECT non_negative_derivative(last("io_time"),1ms) FROM "diskio" WHERE time > now() - 30m GROUP BY "host","name",time(60s)
```
#### Calculate average queue depth:
`iops_in_progress` will give you an instantaneous value. This will give you the average between polling intervals.
```
SELECT non_negative_derivative(last("weighted_io_time",1ms)) from "diskio" WHERE time > now() - 30m GROUP BY "host","name",time(60s)
```
### Example Output:
```
diskio,name=sda weighted_io_time=8411917i,read_time=7446444i,write_time=971489i,io_time=866197i,write_bytes=5397686272i,iops_in_progress=0i,reads=2970519i,writes=361139i,read_bytes=119528903168i 1502467254359000000
diskio,name=sda1 reads=2149i,read_bytes=10753536i,write_bytes=20697088i,write_time=346i,weighted_io_time=505i,writes=2110i,read_time=161i,io_time=208i,iops_in_progress=0i 1502467254359000000
diskio,name=sda2 reads=2968279i,writes=359029i,write_bytes=5376989184i,iops_in_progress=0i,weighted_io_time=8411250i,read_bytes=119517334528i,read_time=7446249i,write_time=971143i,io_time=866010i 1502467254359000000
diskio,name=sdb writes=99391856i,write_time=466700894i,io_time=630259874i,weighted_io_time=4245949844i,reads=2750773828i,read_bytes=80667939499008i,write_bytes=6329347096576i,read_time=3783042534i,iops_in_progress=2i 1502467254359000000
diskio,name=centos/root read_time=7472461i,write_time=950014i,iops_in_progress=0i,weighted_io_time=8424447i,writes=298543i,read_bytes=119510105088i,io_time=837421i,reads=2971769i,write_bytes=5192795648i 1502467254359000000
diskio,name=centos/var_log reads=1065i,writes=69711i,read_time=1083i,write_time=35376i,read_bytes=6828032i,write_bytes=184193536i,io_time=29699i,iops_in_progress=0i,weighted_io_time=36460i 1502467254359000000
diskio,name=postgresql/pgsql write_time=478267417i,io_time=631098730i,iops_in_progress=2i,weighted_io_time=4263637564i,reads=2750777151i,writes=110044361i,read_bytes=80667939288064i,write_bytes=6329347096576i,read_time=3784499336i 1502467254359000000
```

View File

@@ -0,0 +1,197 @@
package diskio
import (
"fmt"
"log"
"regexp"
"strings"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/filter"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/inputs/system"
)
var (
varRegex = regexp.MustCompile(`\$(?:\w+|\{\w+\})`)
)
type DiskIO struct {
ps system.PS
Devices []string
DeviceTags []string
NameTemplates []string
SkipSerialNumber bool
infoCache map[string]diskInfoCache
deviceFilter filter.Filter
initialized bool
}
func (_ *DiskIO) Description() string {
return "Read metrics about disk IO by device"
}
var diskIOsampleConfig = `
## By default, telegraf will gather stats for all devices including
## disk partitions.
## Setting devices will restrict the stats to the specified devices.
# devices = ["sda", "sdb", "vd*"]
## Uncomment the following line if you need disk serial numbers.
# skip_serial_number = false
#
## On systems which support it, device metadata can be added in the form of
## tags.
## Currently only Linux is supported via udev properties. You can view
## available properties for a device by running:
## 'udevadm info -q property -n /dev/sda'
# device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"]
#
## Using the same metadata source as device_tags, you can also customize the
## name of the device via templates.
## The 'name_templates' parameter is a list of templates to try and apply to
## the device. The template may contain variables in the form of '$PROPERTY' or
## '${PROPERTY}'. The first template which does not contain any variables not
## present for the device is used as the device name tag.
## The typical use case is for LVM volumes, to get the VG/LV name instead of
## the near-meaningless DM-0 name.
# name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"]
`
func (_ *DiskIO) SampleConfig() string {
return diskIOsampleConfig
}
// hasMeta reports whether s contains any special glob characters.
func hasMeta(s string) bool {
return strings.IndexAny(s, "*?[") >= 0
}
func (s *DiskIO) init() error {
for _, device := range s.Devices {
if hasMeta(device) {
filter, err := filter.Compile(s.Devices)
if err != nil {
return fmt.Errorf("error compiling device pattern: %v", err)
}
s.deviceFilter = filter
}
}
s.initialized = true
return nil
}
func (s *DiskIO) Gather(acc telegraf.Accumulator) error {
if !s.initialized {
err := s.init()
if err != nil {
return err
}
}
devices := []string{}
if s.deviceFilter == nil {
devices = s.Devices
}
diskio, err := s.ps.DiskIO(devices)
if err != nil {
return fmt.Errorf("error getting disk io info: %s", err)
}
for _, io := range diskio {
if s.deviceFilter != nil && !s.deviceFilter.Match(io.Name) {
continue
}
tags := map[string]string{}
tags["name"] = s.diskName(io.Name)
for t, v := range s.diskTags(io.Name) {
tags[t] = v
}
if !s.SkipSerialNumber {
if len(io.SerialNumber) != 0 {
tags["serial"] = io.SerialNumber
} else {
tags["serial"] = "unknown"
}
}
fields := map[string]interface{}{
"reads": io.ReadCount,
"writes": io.WriteCount,
"read_bytes": io.ReadBytes,
"write_bytes": io.WriteBytes,
"read_time": io.ReadTime,
"write_time": io.WriteTime,
"io_time": io.IoTime,
"weighted_io_time": io.WeightedIO,
"iops_in_progress": io.IopsInProgress,
}
acc.AddCounter("diskio", fields, tags)
}
return nil
}
func (s *DiskIO) diskName(devName string) string {
if len(s.NameTemplates) == 0 {
return devName
}
di, err := s.diskInfo(devName)
if err != nil {
log.Printf("W! Error gathering disk info: %s", err)
return devName
}
for _, nt := range s.NameTemplates {
miss := false
name := varRegex.ReplaceAllStringFunc(nt, func(sub string) string {
sub = sub[1:] // strip leading '$'
if sub[0] == '{' {
sub = sub[1 : len(sub)-1] // strip leading & trailing '{' '}'
}
if v, ok := di[sub]; ok {
return v
}
miss = true
return ""
})
if !miss {
return name
}
}
return devName
}
func (s *DiskIO) diskTags(devName string) map[string]string {
if len(s.DeviceTags) == 0 {
return nil
}
di, err := s.diskInfo(devName)
if err != nil {
log.Printf("W! Error gathering disk info: %s", err)
return nil
}
tags := map[string]string{}
for _, dt := range s.DeviceTags {
if v, ok := di[dt]; ok {
tags[dt] = v
}
}
return tags
}
func init() {
ps := system.NewSystemPS()
inputs.Add("diskio", func() telegraf.Input {
return &DiskIO{ps: ps, SkipSerialNumber: true}
})
}

View File

@@ -0,0 +1,68 @@
package diskio
import (
"bufio"
"fmt"
"os"
"strings"
"golang.org/x/sys/unix"
)
type diskInfoCache struct {
udevDataPath string
values map[string]string
}
var udevPath = "/run/udev/data"
func (s *DiskIO) diskInfo(devName string) (map[string]string, error) {
var err error
var stat unix.Stat_t
path := "/dev/" + devName
err = unix.Stat(path, &stat)
if err != nil {
return nil, err
}
if s.infoCache == nil {
s.infoCache = map[string]diskInfoCache{}
}
ic, ok := s.infoCache[devName]
if ok {
return ic.values, nil
}
major := stat.Rdev >> 8 & 0xff
minor := stat.Rdev & 0xff
udevDataPath := fmt.Sprintf("%s/b%d:%d", udevPath, major, minor)
di := map[string]string{}
s.infoCache[devName] = diskInfoCache{
udevDataPath: udevDataPath,
values: di,
}
f, err := os.Open(udevDataPath)
if err != nil {
return nil, err
}
defer f.Close()
scnr := bufio.NewScanner(f)
for scnr.Scan() {
l := scnr.Text()
if len(l) < 4 || l[:2] != "E:" {
continue
}
kv := strings.SplitN(l[2:], "=", 2)
if len(kv) < 2 {
continue
}
di[kv[0]] = kv[1]
}
return di, nil
}

View File

@@ -0,0 +1,101 @@
// +build linux
package diskio
import (
"io/ioutil"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var nullDiskInfo = []byte(`
E:MY_PARAM_1=myval1
E:MY_PARAM_2=myval2
`)
// setupNullDisk sets up fake udev info as if /dev/null were a disk.
func setupNullDisk(t *testing.T) func() error {
td, err := ioutil.TempDir("", ".telegraf.TestDiskInfo")
require.NoError(t, err)
origUdevPath := udevPath
cleanFunc := func() error {
udevPath = origUdevPath
return os.RemoveAll(td)
}
udevPath = td
err = ioutil.WriteFile(td+"/b1:3", nullDiskInfo, 0644) // 1:3 is the 'null' device
if err != nil {
cleanFunc()
t.Fatal(err)
}
return cleanFunc
}
func TestDiskInfo(t *testing.T) {
clean := setupNullDisk(t)
defer clean()
s := &DiskIO{}
di, err := s.diskInfo("null")
require.NoError(t, err)
assert.Equal(t, "myval1", di["MY_PARAM_1"])
assert.Equal(t, "myval2", di["MY_PARAM_2"])
// test that data is cached
err = clean()
require.NoError(t, err)
di, err = s.diskInfo("null")
require.NoError(t, err)
assert.Equal(t, "myval1", di["MY_PARAM_1"])
assert.Equal(t, "myval2", di["MY_PARAM_2"])
// unfortunately we can't adjust mtime on /dev/null to test cache invalidation
}
// DiskIOStats.diskName isn't a linux specific function, but dependent
// functions are a no-op on non-Linux.
func TestDiskIOStats_diskName(t *testing.T) {
defer setupNullDisk(t)()
tests := []struct {
templates []string
expected string
}{
{[]string{"$MY_PARAM_1"}, "myval1"},
{[]string{"${MY_PARAM_1}"}, "myval1"},
{[]string{"x$MY_PARAM_1"}, "xmyval1"},
{[]string{"x${MY_PARAM_1}x"}, "xmyval1x"},
{[]string{"$MISSING", "$MY_PARAM_1"}, "myval1"},
{[]string{"$MY_PARAM_1", "$MY_PARAM_2"}, "myval1"},
{[]string{"$MISSING"}, "null"},
{[]string{"$MY_PARAM_1/$MY_PARAM_2"}, "myval1/myval2"},
{[]string{"$MY_PARAM_2/$MISSING"}, "null"},
}
for _, tc := range tests {
s := DiskIO{
NameTemplates: tc.templates,
}
assert.Equal(t, tc.expected, s.diskName("null"), "Templates: %#v", tc.templates)
}
}
// DiskIOStats.diskTags isn't a linux specific function, but dependent
// functions are a no-op on non-Linux.
func TestDiskIOStats_diskTags(t *testing.T) {
defer setupNullDisk(t)()
s := &DiskIO{
DeviceTags: []string{"MY_PARAM_2"},
}
dt := s.diskTags("null")
assert.Equal(t, map[string]string{"MY_PARAM_2": "myval2"}, dt)
}

View File

@@ -0,0 +1,9 @@
// +build !linux
package diskio
type diskInfoCache struct{}
func (s *DiskIO) diskInfo(devName string) (map[string]string, error) {
return nil, nil
}

View File

@@ -0,0 +1,122 @@
package diskio
import (
"testing"
"github.com/influxdata/telegraf/plugins/inputs/system"
"github.com/influxdata/telegraf/testutil"
"github.com/shirou/gopsutil/disk"
"github.com/stretchr/testify/require"
)
func TestDiskIO(t *testing.T) {
type Result struct {
stats map[string]disk.IOCountersStat
err error
}
type Metric struct {
tags map[string]string
fields map[string]interface{}
}
tests := []struct {
name string
devices []string
result Result
err error
metrics []Metric
}{
{
name: "minimal",
result: Result{
stats: map[string]disk.IOCountersStat{
"sda": disk.IOCountersStat{
ReadCount: 888,
WriteCount: 5341,
ReadBytes: 100000,
WriteBytes: 200000,
ReadTime: 7123,
WriteTime: 9087,
Name: "sda",
IoTime: 123552,
SerialNumber: "ab-123-ad",
},
},
err: nil,
},
err: nil,
metrics: []Metric{
Metric{
tags: map[string]string{
"name": "sda",
"serial": "ab-123-ad",
},
fields: map[string]interface{}{
"reads": uint64(888),
"writes": uint64(5341),
"read_bytes": uint64(100000),
"write_bytes": uint64(200000),
"read_time": uint64(7123),
"write_time": uint64(9087),
"io_time": uint64(123552),
"weighted_io_time": uint64(0),
"iops_in_progress": uint64(0),
},
},
},
},
{
name: "glob device",
devices: []string{"sd*"},
result: Result{
stats: map[string]disk.IOCountersStat{
"sda": disk.IOCountersStat{
Name: "sda",
ReadCount: 42,
},
"vda": disk.IOCountersStat{
Name: "vda",
ReadCount: 42,
},
},
err: nil,
},
err: nil,
metrics: []Metric{
Metric{
tags: map[string]string{
"name": "sda",
"serial": "unknown",
},
fields: map[string]interface{}{
"reads": uint64(42),
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var mps system.MockPS
mps.On("DiskIO").Return(tt.result.stats, tt.result.err)
var acc testutil.Accumulator
diskio := &DiskIO{
ps: &mps,
Devices: tt.devices,
}
err := diskio.Gather(&acc)
require.Equal(t, tt.err, err)
for _, metric := range tt.metrics {
for k, v := range metric.fields {
require.True(t, acc.HasPoint("diskio", metric.tags, k, v),
"missing point: diskio %v %q: %v", metric.tags, k, v)
}
}
require.Equal(t, len(tt.metrics), int(acc.NMetrics()), "unexpected number of metrics")
require.True(t, mps.AssertExpectations(t))
})
}
}