diff --git a/plugins/inputs/system/DISKIO_README.md b/plugins/inputs/system/DISKIO_README.md new file mode 100644 index 000000000..c71b6d76b --- /dev/null +++ b/plugins/inputs/system/DISKIO_README.md @@ -0,0 +1,129 @@ +# DiskIO Input Plugin + +The diskio input plugin gathers metrics about disk traffic and timing. + +### Configuration: + +```toml +# Read metrics about disk IO by device +[[inputs.diskio]] + ## By default, telegraf will gather stats for all devices including + ## disk partitions. + ## Setting devices will restrict the stats to the specified devices. + # devices = ["sda", "sdb"] + ## Uncomment the following line if you need disk serial numbers. + # skip_serial_number = false + # + ## On systems which support it, device metadata can be added in the form of + ## tags. + ## Currently only Linux is supported via udev properties. You can view + ## available properties for a device by running: + ## 'udevadm info -q property -n /dev/sda' + # device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"] + # + ## Using the same metadata source as device_tags, you can also customize the + ## name of the device via templates. + ## The 'name_templates' parameter is a list of templates to try and apply to + ## the device. The template may contain variables in the form of '$PROPERTY' or + ## '${PROPERTY}'. The first template which does not contain any variables not + ## present for the device is used as the device name tag. + ## The typical use case is for LVM volumes, to get the VG/LV name instead of + ## the near-meaningless DM-0 name. + # name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"] +``` + +#### Docker container + +To monitor the Docker engine host from within a container you will need to +mount the host's filesystem into the container and set the `HOST_PROC` +environment variable to the location of the `/proc` filesystem. Additionally, +it is required to use privileged mode to provide access to `/dev`. + +If you are using the `device_tags` or `name_templates` options, you will need +to bind mount `/run/udev` into the container. + +``` +docker run --privileged -v /:/hostfs:ro -v /run/udev:/run/udev:ro -e HOST_PROC=/hostfs/proc telegraf +``` + +### Metrics: + +- diskio + - tags: + - name (device name) + - serial (device serial number) + - fields: + - reads (integer, counter) + - writes (integer, counter) + - read_bytes (integer, counter, bytes) + - write_bytes (integer, counter, bytes) + - read_time (integer, counter, milliseconds) + - write_time (integer, counter, milliseconds) + - io_time (integer, counter, milliseconds) + - weighted_io_time (integer, counter, milliseconds) + - iops_in_progress (integer, gauge) + +On linux these values correspond to the values in +[`/proc/diskstats`](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) +and +[`/sys/block//stat`](https://www.kernel.org/doc/Documentation/block/stat.txt). + +#### `reads` & `writes`: + +These values increment when an I/O request completes. + +#### `read_bytes` & `write_bytes`: + +These values count the number of bytes read from or written to this +block device. + +#### `read_time` & `write_time`: + +These values count the number of milliseconds that I/O requests have +waited on this block device. If there are multiple I/O requests waiting, +these values will increase at a rate greater than 1000/second; for +example, if 60 read requests wait for an average of 30 ms, the read_time +field will increase by 60*30 = 1800. + +#### `io_time`: + +This value counts the number of milliseconds during which the device has +had I/O requests queued. + +#### `weighted_io_time`: + +This value counts the number of milliseconds that I/O requests have waited +on this block device. If there are multiple I/O requests waiting, this +value will increase as the product of the number of milliseconds times the +number of requests waiting (see `read_time` above for an example). + +#### `iops_in_progress`: + +This value counts the number of I/O requests that have been issued to +the device driver but have not yet completed. It does not include I/O +requests that are in the queue but not yet issued to the device driver. + +### Sample Queries: + +#### Calculate percent IO utilization per disk and host: +``` +SELECT derivative(last("io_time"),1ms) FROM "diskio" WHERE time > now() - 30m GROUP BY "host","name",time(60s) +``` + +#### Calculate average queue depth: +`iops_in_progress` will give you an instantaneous value. This will give you the average between polling intervals. +``` +SELECT derivative(last("weighted_io_time",1ms)) from "diskio" WHERE time > now() - 30m GROUP BY "host","name",time(60s) +``` + +### Example Output: + +``` +diskio,name=sda weighted_io_time=8411917i,read_time=7446444i,write_time=971489i,io_time=866197i,write_bytes=5397686272i,iops_in_progress=0i,reads=2970519i,writes=361139i,read_bytes=119528903168i 1502467254359000000 +diskio,name=sda1 reads=2149i,read_bytes=10753536i,write_bytes=20697088i,write_time=346i,weighted_io_time=505i,writes=2110i,read_time=161i,io_time=208i,iops_in_progress=0i 1502467254359000000 +diskio,name=sda2 reads=2968279i,writes=359029i,write_bytes=5376989184i,iops_in_progress=0i,weighted_io_time=8411250i,read_bytes=119517334528i,read_time=7446249i,write_time=971143i,io_time=866010i 1502467254359000000 +diskio,name=sdb writes=99391856i,write_time=466700894i,io_time=630259874i,weighted_io_time=4245949844i,reads=2750773828i,read_bytes=80667939499008i,write_bytes=6329347096576i,read_time=3783042534i,iops_in_progress=2i 1502467254359000000 +diskio,name=centos/root read_time=7472461i,write_time=950014i,iops_in_progress=0i,weighted_io_time=8424447i,writes=298543i,read_bytes=119510105088i,io_time=837421i,reads=2971769i,write_bytes=5192795648i 1502467254359000000 +diskio,name=centos/var_log reads=1065i,writes=69711i,read_time=1083i,write_time=35376i,read_bytes=6828032i,write_bytes=184193536i,io_time=29699i,iops_in_progress=0i,weighted_io_time=36460i 1502467254359000000 +diskio,name=postgresql/pgsql write_time=478267417i,io_time=631098730i,iops_in_progress=2i,weighted_io_time=4263637564i,reads=2750777151i,writes=110044361i,read_bytes=80667939288064i,write_bytes=6329347096576i,read_time=3784499336i 1502467254359000000 +``` diff --git a/plugins/inputs/system/DISK_README.md b/plugins/inputs/system/DISK_README.md index a09e818ed..b647f8128 100644 --- a/plugins/inputs/system/DISK_README.md +++ b/plugins/inputs/system/DISK_README.md @@ -8,27 +8,39 @@ https://en.wikipedia.org/wiki/Df_(Unix) for more details. ### Configuration: -``` +```toml # Read metrics about disk usage by mount point [[inputs.disk]] - # By default, telegraf gather stats for all mountpoints. - # Setting mountpoints will restrict the stats to the specified mountpoints. + ## By default stats will be gathered for all mount points. + ## Set mount_points will restrict the stats to only the specified mount points. # mount_points = ["/"] + + ## Ignore mount points by filesystem type. + ignore_fs = ["tmpfs", "devtmpfs", "devfs"] ``` -Additionally, the behavior of resolving the `mount_points` can be configured by using the `HOST_MOUNT_PREFIX` environment variable. -When present, this variable is prepended to the mountpoints discovered by the plugin before retrieving stats. -The prefix is stripped from the reported `path` in the measurement. -This settings is useful when running `telegraf` inside a docker container to report host machine metrics. -In this case, the host's root volume should be mounted into the container and the `HOST_MOUNT_PREFIX` and `HOST_PROC` environment variables set. +#### Docker container + +To monitor the Docker engine host from within a container you will need to +mount the host's filesystem into the container and set the `HOST_PROC` +environment variable to the location of the `/proc` filesystem. If desired, you can +also set the `HOST_MOUNT_PREFIX` environment variable to the prefix containing +the `/proc` directory, when present this variable is stripped from the +reported `path` tag. ``` docker run -v /:/hostfs:ro -e HOST_MOUNT_PREFIX=/hostfs -e HOST_PROC=/hostfs/proc telegraf ``` -### Measurements & Fields: +### Metrics: - disk + - tags: + - fstype (filesystem type) + - device (device file) + - path (mount point path) + - mode (whether the mount is rw or ro) + - fields: - free (integer, bytes) - total (integer, bytes) - used (integer, bytes) @@ -37,124 +49,13 @@ docker run -v /:/hostfs:ro -e HOST_MOUNT_PREFIX=/hostfs -e HOST_PROC=/hostfs/pro - inodes_total (integer, files) - inodes_used (integer, files) -### Tags: - -- All measurements have the following tags: - - fstype (filesystem type) - - path (mount point path) - - mode (whether the mount is rw or ro) - ### Example Output: ``` -% ./telegraf --config ~/ws/telegraf.conf --input-filter disk --test -* Plugin: disk, Collection 1 -> disk,fstype=hfs,mode=ro,path=/ free=398407520256i,inodes_free=97267461i,inodes_total=121847806i,inodes_used=24580345i,total=499088621568i,used=100418957312i,used_percent=20.131039916242397 1453832006274071563 -> disk,fstype=devfs,mode=rw,path=/dev free=0i,inodes_free=0i,inodes_total=628i,inodes_used=628i,total=185856i,used=185856i,used_percent=100 1453832006274137913 -> disk,fstype=autofs,mode=rw,path=/net free=0i,inodes_free=0i,inodes_total=0i,inodes_used=0i,total=0i,used=0i,used_percent=0 1453832006274157077 -> disk,fstype=autofs,mode=rw,path=/home free=0i,inodes_free=0i,inodes_total=0i,inodes_used=0i,total=0i,used=0i,used_percent=0 1453832006274169688 +disk,fstype=hfs,mode=ro,path=/ free=398407520256i,inodes_free=97267461i,inodes_total=121847806i,inodes_used=24580345i,total=499088621568i,used=100418957312i,used_percent=20.131039916242397 1453832006274071563 +disk,fstype=devfs,mode=rw,path=/dev free=0i,inodes_free=0i,inodes_total=628i,inodes_used=628i,total=185856i,used=185856i,used_percent=100 1453832006274137913 +disk,fstype=autofs,mode=rw,path=/net free=0i,inodes_free=0i,inodes_total=0i,inodes_used=0i,total=0i,used=0i,used_percent=0 1453832006274157077 +disk,fstype=autofs,mode=rw,path=/home free=0i,inodes_free=0i,inodes_total=0i,inodes_used=0i,total=0i,used=0i,used_percent=0 1453832006274169688 ``` -# DiskIO Input Plugin - -The diskio input plugin gathers metrics about disk traffic and timing. - -### Configuration: - -``` -# Read metrics about disk IO by device -[[inputs.diskio]] - ## By default, telegraf will gather stats for all devices including - ## disk partitions. - ## Setting devices will restrict the stats to the specified devices. - # devices = ["sda", "sdb"] - ## Uncomment the following line if you need disk serial numbers. - # skip_serial_number = false -``` - -Data collection is based on github.com/shirou/gopsutil. This package handles platform dependencies and converts all timing information to milliseconds. - -### Measurements & Fields: - -- diskio - - reads (integer, counter) - - writes (integer, counter) - - read_bytes (integer, counter, bytes) - - write_bytes (integer, counter, bytes) - - read_time (integer, counter, milliseconds) - - write_time (integer, counter, milliseconds) - - io_time (integer, counter, milliseconds) - - weighted_io_time (integer, counter, milliseconds) - - iops_in_progress (integer, gauge) - -On linux these values correspond to the values in [`/proc/diskstats`](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) and [`/sys/block//stat`](https://www.kernel.org/doc/Documentation/block/stat.txt). - -#### `reads` & `writes`: - -These values increment when an I/O request completes. - -#### `read_bytes` & `write_bytes`: - -These values count the number of bytes read from or written to this -block device. - -#### `read_time` & `write_time`: - -These values count the number of milliseconds that I/O requests have -waited on this block device. If there are multiple I/O requests waiting, -these values will increase at a rate greater than 1000/second; for -example, if 60 read requests wait for an average of 30 ms, the read_time -field will increase by 60*30 = 1800. - -#### `io_time`: - -This value counts the number of milliseconds during which the device has -had I/O requests queued. - -#### `weighted_io_time`: - -This value counts the number of milliseconds that I/O requests have waited -on this block device. If there are multiple I/O requests waiting, this -value will increase as the product of the number of milliseconds times the -number of requests waiting (see `read_time` above for an example). - -#### `iops_in_progress`: - -This value counts the number of I/O requests that have been issued to -the device driver but have not yet completed. It does not include I/O -requests that are in the queue but not yet issued to the device driver. - -### Tags: - -- All measurements have the following tags: - - name (device name) -- If configured to use serial numbers (default: disabled): - - serial (device serial number) - -### Sample Queries: - -#### Calculate percent IO utilization per disk and host: -``` -SELECT derivative(last("io_time"),1ms) FROM "diskio" WHERE time > now() - 30m GROUP BY "host","name",time(60s) -``` - -#### Calculate average queue depth: -`iops_in_progress` will give you an instantaneous value. This will give you the average between polling intervals. -``` -SELECT derivative(last("weighted_io_time",1ms)) from "diskio" WHERE time > now() - 30m GROUP BY "host","name",time(60s) -``` - -### Example Output: - -``` -% telegraf -config ~/.telegraf/telegraf.conf -input-filter diskio -test -* Plugin: inputs.diskio, Collection 1 -> diskio,name=sda weighted_io_time=8411917i,read_time=7446444i,write_time=971489i,io_time=866197i,write_bytes=5397686272i,iops_in_progress=0i,reads=2970519i,writes=361139i,read_bytes=119528903168i 1502467254359000000 -> diskio,name=sda1 reads=2149i,read_bytes=10753536i,write_bytes=20697088i,write_time=346i,weighted_io_time=505i,writes=2110i,read_time=161i,io_time=208i,iops_in_progress=0i 1502467254359000000 -> diskio,name=sda2 reads=2968279i,writes=359029i,write_bytes=5376989184i,iops_in_progress=0i,weighted_io_time=8411250i,read_bytes=119517334528i,read_time=7446249i,write_time=971143i,io_time=866010i 1502467254359000000 -> diskio,name=sdb writes=99391856i,write_time=466700894i,io_time=630259874i,weighted_io_time=4245949844i,reads=2750773828i,read_bytes=80667939499008i,write_bytes=6329347096576i,read_time=3783042534i,iops_in_progress=2i 1502467254359000000 -> diskio,name=centos/root read_time=7472461i,write_time=950014i,iops_in_progress=0i,weighted_io_time=8424447i,writes=298543i,read_bytes=119510105088i,io_time=837421i,reads=2971769i,write_bytes=5192795648i 1502467254359000000 -> diskio,name=centos/var_log reads=1065i,writes=69711i,read_time=1083i,write_time=35376i,read_bytes=6828032i,write_bytes=184193536i,io_time=29699i,iops_in_progress=0i,weighted_io_time=36460i 1502467254359000000 -> diskio,name=postgresql/pgsql write_time=478267417i,io_time=631098730i,iops_in_progress=2i,weighted_io_time=4263637564i,reads=2750777151i,writes=110044361i,read_bytes=80667939288064i,write_bytes=6329347096576i,read_time=3784499336i 1502467254359000000 -``` diff --git a/plugins/inputs/system/disk.go b/plugins/inputs/system/disk.go index 3cc99de05..864b28477 100644 --- a/plugins/inputs/system/disk.go +++ b/plugins/inputs/system/disk.go @@ -2,8 +2,6 @@ package system import ( "fmt" - "log" - "regexp" "strings" "github.com/influxdata/telegraf" @@ -25,12 +23,11 @@ func (_ *DiskStats) Description() string { } var diskSampleConfig = ` - ## By default, telegraf gather stats for all mountpoints. - ## Setting mountpoints will restrict the stats to the specified mountpoints. + ## By default stats will be gathered for all mount points. + ## Set mount_points will restrict the stats to only the specified mount points. # mount_points = ["/"] - ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually - ## present on /run, /var/run, /dev/shm or /dev). + ## Ignore mount points by filesystem type. ignore_fs = ["tmpfs", "devtmpfs", "devfs"] ` @@ -82,144 +79,6 @@ func (s *DiskStats) Gather(acc telegraf.Accumulator) error { return nil } -type DiskIOStats struct { - ps PS - - Devices []string - DeviceTags []string - NameTemplates []string - SkipSerialNumber bool - - infoCache map[string]diskInfoCache -} - -func (_ *DiskIOStats) Description() string { - return "Read metrics about disk IO by device" -} - -var diskIoSampleConfig = ` - ## By default, telegraf will gather stats for all devices including - ## disk partitions. - ## Setting devices will restrict the stats to the specified devices. - # devices = ["sda", "sdb"] - ## Uncomment the following line if you need disk serial numbers. - # skip_serial_number = false - # - ## On systems which support it, device metadata can be added in the form of - ## tags. - ## Currently only Linux is supported via udev properties. You can view - ## available properties for a device by running: - ## 'udevadm info -q property -n /dev/sda' - # device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"] - # - ## Using the same metadata source as device_tags, you can also customize the - ## name of the device via templates. - ## The 'name_templates' parameter is a list of templates to try and apply to - ## the device. The template may contain variables in the form of '$PROPERTY' or - ## '${PROPERTY}'. The first template which does not contain any variables not - ## present for the device is used as the device name tag. - ## The typical use case is for LVM volumes, to get the VG/LV name instead of - ## the near-meaningless DM-0 name. - # name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"] -` - -func (_ *DiskIOStats) SampleConfig() string { - return diskIoSampleConfig -} - -func (s *DiskIOStats) Gather(acc telegraf.Accumulator) error { - diskio, err := s.ps.DiskIO(s.Devices) - if err != nil { - return fmt.Errorf("error getting disk io info: %s", err) - } - - for _, io := range diskio { - tags := map[string]string{} - tags["name"] = s.diskName(io.Name) - for t, v := range s.diskTags(io.Name) { - tags[t] = v - } - if !s.SkipSerialNumber { - if len(io.SerialNumber) != 0 { - tags["serial"] = io.SerialNumber - } else { - tags["serial"] = "unknown" - } - } - - fields := map[string]interface{}{ - "reads": io.ReadCount, - "writes": io.WriteCount, - "read_bytes": io.ReadBytes, - "write_bytes": io.WriteBytes, - "read_time": io.ReadTime, - "write_time": io.WriteTime, - "io_time": io.IoTime, - "weighted_io_time": io.WeightedIO, - "iops_in_progress": io.IopsInProgress, - } - acc.AddCounter("diskio", fields, tags) - } - - return nil -} - -var varRegex = regexp.MustCompile(`\$(?:\w+|\{\w+\})`) - -func (s *DiskIOStats) diskName(devName string) string { - if len(s.NameTemplates) == 0 { - return devName - } - - di, err := s.diskInfo(devName) - if err != nil { - log.Printf("W! Error gathering disk info: %s", err) - return devName - } - - for _, nt := range s.NameTemplates { - miss := false - name := varRegex.ReplaceAllStringFunc(nt, func(sub string) string { - sub = sub[1:] // strip leading '$' - if sub[0] == '{' { - sub = sub[1 : len(sub)-1] // strip leading & trailing '{' '}' - } - if v, ok := di[sub]; ok { - return v - } - miss = true - return "" - }) - - if !miss { - return name - } - } - - return devName -} - -func (s *DiskIOStats) diskTags(devName string) map[string]string { - if len(s.DeviceTags) == 0 { - return nil - } - - di, err := s.diskInfo(devName) - if err != nil { - log.Printf("W! Error gathering disk info: %s", err) - return nil - } - - tags := map[string]string{} - for _, dt := range s.DeviceTags { - if v, ok := di[dt]; ok { - tags[dt] = v - } - } - - return tags -} - type MountOptions []string func (opts MountOptions) Mode() string { @@ -250,8 +109,4 @@ func init() { inputs.Add("disk", func() telegraf.Input { return &DiskStats{ps: ps} }) - - inputs.Add("diskio", func() telegraf.Input { - return &DiskIOStats{ps: ps, SkipSerialNumber: true} - }) } diff --git a/plugins/inputs/system/disk_test.go b/plugins/inputs/system/disk_test.go index 67494d712..8aeca5523 100644 --- a/plugins/inputs/system/disk_test.go +++ b/plugins/inputs/system/disk_test.go @@ -237,85 +237,3 @@ func TestDiskStats(t *testing.T) { err = (&DiskStats{ps: &mps, MountPoints: []string{"/", "/home"}}).Gather(&acc) assert.Equal(t, 2*expectedAllDiskMetrics+7, acc.NFields()) } - -// func TestDiskIOStats(t *testing.T) { -// var mps MockPS -// defer mps.AssertExpectations(t) -// var acc testutil.Accumulator -// var err error - -// diskio1 := disk.IOCountersStat{ -// ReadCount: 888, -// WriteCount: 5341, -// ReadBytes: 100000, -// WriteBytes: 200000, -// ReadTime: 7123, -// WriteTime: 9087, -// Name: "sda1", -// IoTime: 123552, -// SerialNumber: "ab-123-ad", -// } -// diskio2 := disk.IOCountersStat{ -// ReadCount: 444, -// WriteCount: 2341, -// ReadBytes: 200000, -// WriteBytes: 400000, -// ReadTime: 3123, -// WriteTime: 6087, -// Name: "sdb1", -// IoTime: 246552, -// SerialNumber: "bb-123-ad", -// } - -// mps.On("DiskIO").Return( -// map[string]disk.IOCountersStat{"sda1": diskio1, "sdb1": diskio2}, -// nil) - -// err = (&DiskIOStats{ps: &mps}).Gather(&acc) -// require.NoError(t, err) - -// numDiskIOMetrics := acc.NFields() -// expectedAllDiskIOMetrics := 14 -// assert.Equal(t, expectedAllDiskIOMetrics, numDiskIOMetrics) - -// dtags1 := map[string]string{ -// "name": "sda1", -// "serial": "ab-123-ad", -// } -// dtags2 := map[string]string{ -// "name": "sdb1", -// "serial": "bb-123-ad", -// } - -// assert.True(t, acc.CheckTaggedValue("reads", uint64(888), dtags1)) -// assert.True(t, acc.CheckTaggedValue("writes", uint64(5341), dtags1)) -// assert.True(t, acc.CheckTaggedValue("read_bytes", uint64(100000), dtags1)) -// assert.True(t, acc.CheckTaggedValue("write_bytes", uint64(200000), dtags1)) -// assert.True(t, acc.CheckTaggedValue("read_time", uint64(7123), dtags1)) -// assert.True(t, acc.CheckTaggedValue("write_time", uint64(9087), dtags1)) -// assert.True(t, acc.CheckTaggedValue("io_time", uint64(123552), dtags1)) -// assert.True(t, acc.CheckTaggedValue("reads", uint64(444), dtags2)) -// assert.True(t, acc.CheckTaggedValue("writes", uint64(2341), dtags2)) -// assert.True(t, acc.CheckTaggedValue("read_bytes", uint64(200000), dtags2)) -// assert.True(t, acc.CheckTaggedValue("write_bytes", uint64(400000), dtags2)) -// assert.True(t, acc.CheckTaggedValue("read_time", uint64(3123), dtags2)) -// assert.True(t, acc.CheckTaggedValue("write_time", uint64(6087), dtags2)) -// assert.True(t, acc.CheckTaggedValue("io_time", uint64(246552), dtags2)) - -// // We expect 7 more DiskIOMetrics to show up with an explicit match on "sdb1" -// // and serial should be missing from the tags with SkipSerialNumber set -// err = (&DiskIOStats{ps: &mps, Devices: []string{"sdb1"}, SkipSerialNumber: true}).Gather(&acc) -// assert.Equal(t, expectedAllDiskIOMetrics+7, acc.NFields()) - -// dtags3 := map[string]string{ -// "name": "sdb1", -// } - -// assert.True(t, acc.CheckTaggedValue("reads", uint64(444), dtags3)) -// assert.True(t, acc.CheckTaggedValue("writes", uint64(2341), dtags3)) -// assert.True(t, acc.CheckTaggedValue("read_bytes", uint64(200000), dtags3)) -// assert.True(t, acc.CheckTaggedValue("write_bytes", uint64(400000), dtags3)) -// assert.True(t, acc.CheckTaggedValue("read_time", uint64(3123), dtags3)) -// assert.True(t, acc.CheckTaggedValue("write_time", uint64(6087), dtags3)) -// assert.True(t, acc.CheckTaggedValue("io_time", uint64(246552), dtags3)) -// } diff --git a/plugins/inputs/system/diskio.go b/plugins/inputs/system/diskio.go new file mode 100644 index 000000000..21e70d5eb --- /dev/null +++ b/plugins/inputs/system/diskio.go @@ -0,0 +1,196 @@ +package system + +import ( + "fmt" + "log" + "regexp" + "strings" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/filter" + "github.com/influxdata/telegraf/plugins/inputs" +) + +var ( + varRegex = regexp.MustCompile(`\$(?:\w+|\{\w+\})`) +) + +type DiskIO struct { + ps PS + + Devices []string + DeviceTags []string + NameTemplates []string + SkipSerialNumber bool + + infoCache map[string]diskInfoCache + deviceFilter filter.Filter + initialized bool +} + +func (_ *DiskIO) Description() string { + return "Read metrics about disk IO by device" +} + +var diskIOsampleConfig = ` + ## By default, telegraf will gather stats for all devices including + ## disk partitions. + ## Setting devices will restrict the stats to the specified devices. + # devices = ["sda", "sdb", "vd*"] + ## Uncomment the following line if you need disk serial numbers. + # skip_serial_number = false + # + ## On systems which support it, device metadata can be added in the form of + ## tags. + ## Currently only Linux is supported via udev properties. You can view + ## available properties for a device by running: + ## 'udevadm info -q property -n /dev/sda' + # device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"] + # + ## Using the same metadata source as device_tags, you can also customize the + ## name of the device via templates. + ## The 'name_templates' parameter is a list of templates to try and apply to + ## the device. The template may contain variables in the form of '$PROPERTY' or + ## '${PROPERTY}'. The first template which does not contain any variables not + ## present for the device is used as the device name tag. + ## The typical use case is for LVM volumes, to get the VG/LV name instead of + ## the near-meaningless DM-0 name. + # name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"] +` + +func (_ *DiskIO) SampleConfig() string { + return diskIOsampleConfig +} + +// hasMeta reports whether s contains any special glob characters. +func hasMeta(s string) bool { + return strings.IndexAny(s, "*?[") >= 0 +} + +func (s *DiskIO) init() error { + for _, device := range s.Devices { + if hasMeta(device) { + filter, err := filter.Compile(s.Devices) + if err != nil { + return fmt.Errorf("error compiling device pattern: %v", err) + } + s.deviceFilter = filter + } + } + s.initialized = true + return nil +} + +func (s *DiskIO) Gather(acc telegraf.Accumulator) error { + if !s.initialized { + err := s.init() + if err != nil { + return err + } + } + + devices := []string{} + if s.deviceFilter == nil { + devices = s.Devices + } + + diskio, err := s.ps.DiskIO(devices) + if err != nil { + return fmt.Errorf("error getting disk io info: %s", err) + } + + for _, io := range diskio { + if s.deviceFilter != nil && !s.deviceFilter.Match(io.Name) { + continue + } + + tags := map[string]string{} + tags["name"] = s.diskName(io.Name) + for t, v := range s.diskTags(io.Name) { + tags[t] = v + } + if !s.SkipSerialNumber { + if len(io.SerialNumber) != 0 { + tags["serial"] = io.SerialNumber + } else { + tags["serial"] = "unknown" + } + } + + fields := map[string]interface{}{ + "reads": io.ReadCount, + "writes": io.WriteCount, + "read_bytes": io.ReadBytes, + "write_bytes": io.WriteBytes, + "read_time": io.ReadTime, + "write_time": io.WriteTime, + "io_time": io.IoTime, + "weighted_io_time": io.WeightedIO, + "iops_in_progress": io.IopsInProgress, + } + acc.AddCounter("diskio", fields, tags) + } + + return nil +} + +func (s *DiskIO) diskName(devName string) string { + if len(s.NameTemplates) == 0 { + return devName + } + + di, err := s.diskInfo(devName) + if err != nil { + log.Printf("W! Error gathering disk info: %s", err) + return devName + } + + for _, nt := range s.NameTemplates { + miss := false + name := varRegex.ReplaceAllStringFunc(nt, func(sub string) string { + sub = sub[1:] // strip leading '$' + if sub[0] == '{' { + sub = sub[1 : len(sub)-1] // strip leading & trailing '{' '}' + } + if v, ok := di[sub]; ok { + return v + } + miss = true + return "" + }) + + if !miss { + return name + } + } + + return devName +} + +func (s *DiskIO) diskTags(devName string) map[string]string { + if len(s.DeviceTags) == 0 { + return nil + } + + di, err := s.diskInfo(devName) + if err != nil { + log.Printf("W! Error gathering disk info: %s", err) + return nil + } + + tags := map[string]string{} + for _, dt := range s.DeviceTags { + if v, ok := di[dt]; ok { + tags[dt] = v + } + } + + return tags +} + +func init() { + ps := newSystemPS() + inputs.Add("diskio", func() telegraf.Input { + return &DiskIO{ps: ps, SkipSerialNumber: true} + }) +} diff --git a/plugins/inputs/system/disk_linux.go b/plugins/inputs/system/diskio_linux.go similarity index 93% rename from plugins/inputs/system/disk_linux.go rename to plugins/inputs/system/diskio_linux.go index d3fd691c4..b15f74383 100644 --- a/plugins/inputs/system/disk_linux.go +++ b/plugins/inputs/system/diskio_linux.go @@ -16,7 +16,7 @@ type diskInfoCache struct { var udevPath = "/run/udev/data" -func (s *DiskIOStats) diskInfo(devName string) (map[string]string, error) { +func (s *DiskIO) diskInfo(devName string) (map[string]string, error) { var err error var stat unix.Stat_t diff --git a/plugins/inputs/system/disk_linux_test.go b/plugins/inputs/system/diskio_linux_test.go similarity index 97% rename from plugins/inputs/system/disk_linux_test.go rename to plugins/inputs/system/diskio_linux_test.go index 801ad328a..96aed211b 100644 --- a/plugins/inputs/system/disk_linux_test.go +++ b/plugins/inputs/system/diskio_linux_test.go @@ -42,7 +42,7 @@ func TestDiskInfo(t *testing.T) { clean := setupNullDisk(t) defer clean() - s := &DiskIOStats{} + s := &DiskIO{} di, err := s.diskInfo("null") require.NoError(t, err) assert.Equal(t, "myval1", di["MY_PARAM_1"]) @@ -81,7 +81,7 @@ func TestDiskIOStats_diskName(t *testing.T) { } for _, tc := range tests { - s := DiskIOStats{ + s := DiskIO{ NameTemplates: tc.templates, } assert.Equal(t, tc.expected, s.diskName("null"), "Templates: %#v", tc.templates) @@ -93,7 +93,7 @@ func TestDiskIOStats_diskName(t *testing.T) { func TestDiskIOStats_diskTags(t *testing.T) { defer setupNullDisk(t)() - s := &DiskIOStats{ + s := &DiskIO{ DeviceTags: []string{"MY_PARAM_2"}, } dt := s.diskTags("null") diff --git a/plugins/inputs/system/disk_other.go b/plugins/inputs/system/diskio_other.go similarity index 51% rename from plugins/inputs/system/disk_other.go rename to plugins/inputs/system/diskio_other.go index fa9121cdf..0a3abb686 100644 --- a/plugins/inputs/system/disk_other.go +++ b/plugins/inputs/system/diskio_other.go @@ -4,6 +4,6 @@ package system type diskInfoCache struct{} -func (s *DiskIOStats) diskInfo(devName string) (map[string]string, error) { +func (s *DiskIO) diskInfo(devName string) (map[string]string, error) { return nil, nil } diff --git a/plugins/inputs/system/diskio_test.go b/plugins/inputs/system/diskio_test.go new file mode 100644 index 000000000..d8b908c3e --- /dev/null +++ b/plugins/inputs/system/diskio_test.go @@ -0,0 +1,121 @@ +package system + +import ( + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/shirou/gopsutil/disk" + "github.com/stretchr/testify/require" +) + +func TestDiskIO(t *testing.T) { + type Result struct { + stats map[string]disk.IOCountersStat + err error + } + type Metric struct { + tags map[string]string + fields map[string]interface{} + } + + tests := []struct { + name string + devices []string + result Result + err error + metrics []Metric + }{ + { + name: "minimal", + result: Result{ + stats: map[string]disk.IOCountersStat{ + "sda": disk.IOCountersStat{ + ReadCount: 888, + WriteCount: 5341, + ReadBytes: 100000, + WriteBytes: 200000, + ReadTime: 7123, + WriteTime: 9087, + Name: "sda", + IoTime: 123552, + SerialNumber: "ab-123-ad", + }, + }, + err: nil, + }, + err: nil, + metrics: []Metric{ + Metric{ + tags: map[string]string{ + "name": "sda", + "serial": "ab-123-ad", + }, + fields: map[string]interface{}{ + "reads": uint64(888), + "writes": uint64(5341), + "read_bytes": uint64(100000), + "write_bytes": uint64(200000), + "read_time": uint64(7123), + "write_time": uint64(9087), + "io_time": uint64(123552), + "weighted_io_time": uint64(0), + "iops_in_progress": uint64(0), + }, + }, + }, + }, + { + name: "glob device", + devices: []string{"sd*"}, + result: Result{ + stats: map[string]disk.IOCountersStat{ + "sda": disk.IOCountersStat{ + Name: "sda", + ReadCount: 42, + }, + "vda": disk.IOCountersStat{ + Name: "vda", + ReadCount: 42, + }, + }, + err: nil, + }, + err: nil, + metrics: []Metric{ + Metric{ + tags: map[string]string{ + "name": "sda", + "serial": "unknown", + }, + fields: map[string]interface{}{ + "reads": uint64(42), + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var mps MockPS + mps.On("DiskIO").Return(tt.result.stats, tt.result.err) + + var acc testutil.Accumulator + + diskio := &DiskIO{ + ps: &mps, + Devices: tt.devices, + } + err := diskio.Gather(&acc) + require.Equal(t, tt.err, err) + + for _, metric := range tt.metrics { + for k, v := range metric.fields { + require.True(t, acc.HasPoint("diskio", metric.tags, k, v), + "missing point: diskio %v %q: %v", metric.tags, k, v) + } + } + require.Equal(t, len(tt.metrics), int(acc.NMetrics()), "unexpected number of metrics") + require.True(t, mps.AssertExpectations(t)) + }) + } +} diff --git a/testutil/accumulator.go b/testutil/accumulator.go index 29c362c87..cefe8b787 100644 --- a/testutil/accumulator.go +++ b/testutil/accumulator.go @@ -537,6 +537,24 @@ func (a *Accumulator) Int64Field(measurement string, field string) (int64, bool) return 0, false } +// Uint64Field returns the int64 value of the given measurement and field or false. +func (a *Accumulator) Uint64Field(measurement string, field string) (uint64, bool) { + a.Lock() + defer a.Unlock() + for _, p := range a.Metrics { + if p.Measurement == measurement { + for fieldname, value := range p.Fields { + if fieldname == field { + v, ok := value.(uint64) + return v, ok + } + } + } + } + + return 0, false +} + // Int32Field returns the int32 value of the given measurement and field or false. func (a *Accumulator) Int32Field(measurement string, field string) (int32, bool) { a.Lock()