Moved system package inputs out to top level (#4406)

This commit is contained in:
Steve Domino
2018-07-11 17:43:49 -06:00
committed by Daniel Nelson
parent 9a14d1f074
commit 7b73b0db3a
42 changed files with 126 additions and 89 deletions

View File

@@ -0,0 +1,99 @@
# Telegraf plugin: CPU
#### Plugin arguments:
- **totalcpu** boolean: If true, include `cpu-total` data
- **percpu** boolean: If true, include data on a per-cpu basis `cpu0, cpu1, etc.`
##### Configuration:
```
[[inputs.cpu]]
## Whether to report per-cpu stats or not
percpu = true
## Whether to report total system cpu stats or not
totalcpu = true
## If true, collect raw CPU time metrics.
collect_cpu_time = false
## If true, compute and report the sum of all non-idle CPU states.
report_active = false
```
#### Description
The CPU plugin collects standard CPU metrics as defined in `man proc`. All
architectures do not support all of these metrics.
```
cpu 3357 0 4313 1362393
The amount of time, measured in units of USER_HZ (1/100ths of a second on
most architectures, use sysconf(_SC_CLK_TCK) to obtain the right value),
that the system spent in various states:
user (1) Time spent in user mode.
nice (2) Time spent in user mode with low priority (nice).
system (3) Time spent in system mode.
idle (4) Time spent in the idle task. This value should be USER_HZ times
the second entry in the /proc/uptime pseudo-file.
iowait (since Linux 2.5.41)
(5) Time waiting for I/O to complete.
irq (since Linux 2.6.0-test4)
(6) Time servicing interrupts.
softirq (since Linux 2.6.0-test4)
(7) Time servicing softirqs.
steal (since Linux 2.6.11)
(8) Stolen time, which is the time spent in other operating systems
when running in a virtualized environment
guest (since Linux 2.6.24)
(9) Time spent running a virtual CPU for guest operating systems
under the control of the Linux kernel.
guest_nice (since Linux 2.6.33)
(10) Time spent running a niced guest (virtual CPU for guest operating systems under the control of the Linux kernel).
```
# Measurements:
### CPU Time measurements:
Meta:
- units: CPU Time
- tags: `cpu=<cpuN> or <cpu-total>`
Measurement names:
- cpu_time_user
- cpu_time_system
- cpu_time_idle
- cpu_time_active (must be explicitly enabled by setting `report_active = true`)
- cpu_time_nice
- cpu_time_iowait
- cpu_time_irq
- cpu_time_softirq
- cpu_time_steal
- cpu_time_guest
- cpu_time_guest_nice
### CPU Usage Percent Measurements:
Meta:
- units: percent (out of 100)
- tags: `cpu=<cpuN> or <cpu-total>`
Measurement names:
- cpu_usage_user
- cpu_usage_system
- cpu_usage_idle
- cpu_usage_active (must be explicitly enabled by setting `report_active = true`)
- cpu_usage_nice
- cpu_usage_iowait
- cpu_usage_irq
- cpu_usage_softirq
- cpu_usage_steal
- cpu_usage_guest
- cpu_usage_guest_nice

153
plugins/inputs/cpu/cpu.go Normal file
View File

@@ -0,0 +1,153 @@
package cpu
import (
"fmt"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/inputs/system"
"github.com/shirou/gopsutil/cpu"
)
type CPUStats struct {
ps system.PS
lastStats map[string]cpu.TimesStat
PerCPU bool `toml:"percpu"`
TotalCPU bool `toml:"totalcpu"`
CollectCPUTime bool `toml:"collect_cpu_time"`
ReportActive bool `toml:"report_active"`
}
func NewCPUStats(ps system.PS) *CPUStats {
return &CPUStats{
ps: ps,
CollectCPUTime: true,
ReportActive: true,
}
}
func (_ *CPUStats) Description() string {
return "Read metrics about cpu usage"
}
var sampleConfig = `
## Whether to report per-cpu stats or not
percpu = true
## Whether to report total system cpu stats or not
totalcpu = true
## If true, collect raw CPU time metrics.
collect_cpu_time = false
## If true, compute and report the sum of all non-idle CPU states.
report_active = false
`
func (_ *CPUStats) SampleConfig() string {
return sampleConfig
}
func (s *CPUStats) Gather(acc telegraf.Accumulator) error {
times, err := s.ps.CPUTimes(s.PerCPU, s.TotalCPU)
if err != nil {
return fmt.Errorf("error getting CPU info: %s", err)
}
now := time.Now()
for _, cts := range times {
tags := map[string]string{
"cpu": cts.CPU,
}
total := totalCpuTime(cts)
active := activeCpuTime(cts)
if s.CollectCPUTime {
// Add cpu time metrics
fieldsC := map[string]interface{}{
"time_user": cts.User,
"time_system": cts.System,
"time_idle": cts.Idle,
"time_nice": cts.Nice,
"time_iowait": cts.Iowait,
"time_irq": cts.Irq,
"time_softirq": cts.Softirq,
"time_steal": cts.Steal,
"time_guest": cts.Guest,
"time_guest_nice": cts.GuestNice,
}
if s.ReportActive {
fieldsC["time_active"] = activeCpuTime(cts)
}
acc.AddCounter("cpu", fieldsC, tags, now)
}
// Add in percentage
if len(s.lastStats) == 0 {
// If it's the 1st gather, can't get CPU Usage stats yet
continue
}
lastCts, ok := s.lastStats[cts.CPU]
if !ok {
continue
}
lastTotal := totalCpuTime(lastCts)
lastActive := activeCpuTime(lastCts)
totalDelta := total - lastTotal
if totalDelta < 0 {
err = fmt.Errorf("Error: current total CPU time is less than previous total CPU time")
break
}
if totalDelta == 0 {
continue
}
fieldsG := map[string]interface{}{
"usage_user": 100 * (cts.User - lastCts.User - (cts.Guest - lastCts.Guest)) / totalDelta,
"usage_system": 100 * (cts.System - lastCts.System) / totalDelta,
"usage_idle": 100 * (cts.Idle - lastCts.Idle) / totalDelta,
"usage_nice": 100 * (cts.Nice - lastCts.Nice - (cts.GuestNice - lastCts.GuestNice)) / totalDelta,
"usage_iowait": 100 * (cts.Iowait - lastCts.Iowait) / totalDelta,
"usage_irq": 100 * (cts.Irq - lastCts.Irq) / totalDelta,
"usage_softirq": 100 * (cts.Softirq - lastCts.Softirq) / totalDelta,
"usage_steal": 100 * (cts.Steal - lastCts.Steal) / totalDelta,
"usage_guest": 100 * (cts.Guest - lastCts.Guest) / totalDelta,
"usage_guest_nice": 100 * (cts.GuestNice - lastCts.GuestNice) / totalDelta,
}
if s.ReportActive {
fieldsG["usage_active"] = 100 * (active - lastActive) / totalDelta
}
acc.AddGauge("cpu", fieldsG, tags, now)
}
s.lastStats = make(map[string]cpu.TimesStat)
for _, cts := range times {
s.lastStats[cts.CPU] = cts
}
return err
}
func totalCpuTime(t cpu.TimesStat) float64 {
total := t.User + t.System + t.Nice + t.Iowait + t.Irq + t.Softirq + t.Steal +
t.Idle
return total
}
func activeCpuTime(t cpu.TimesStat) float64 {
active := totalCpuTime(t) - t.Idle
return active
}
func init() {
inputs.Add("cpu", func() telegraf.Input {
return &CPUStats{
PerCPU: true,
TotalCPU: true,
ps: system.NewSystemPS(),
}
})
}

View File

@@ -0,0 +1,256 @@
package cpu
import (
"fmt"
"testing"
"github.com/influxdata/telegraf/plugins/inputs/system"
"github.com/influxdata/telegraf/testutil"
"github.com/shirou/gopsutil/cpu"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestCPUStats(t *testing.T) {
var mps system.MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator
cts := cpu.TimesStat{
CPU: "cpu0",
User: 8.8,
System: 8.2,
Idle: 80.1,
Nice: 1.3,
Iowait: 0.8389,
Irq: 0.6,
Softirq: 0.11,
Steal: 0.0511,
Guest: 3.1,
GuestNice: 0.324,
}
cts2 := cpu.TimesStat{
CPU: "cpu0",
User: 24.9, // increased by 16.1
System: 10.9, // increased by 2.7
Idle: 157.9798, // increased by 77.8798 (for total increase of 100)
Nice: 3.5, // increased by 2.2
Iowait: 0.929, // increased by 0.0901
Irq: 1.2, // increased by 0.6
Softirq: 0.31, // increased by 0.2
Steal: 0.2812, // increased by 0.2301
Guest: 11.4, // increased by 8.3
GuestNice: 2.524, // increased by 2.2
}
mps.On("CPUTimes").Return([]cpu.TimesStat{cts}, nil)
cs := NewCPUStats(&mps)
cputags := map[string]string{
"cpu": "cpu0",
}
err := cs.Gather(&acc)
require.NoError(t, err)
// Computed values are checked with delta > 0 because of floating point arithmatic
// imprecision
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 8.8, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_system", 8.2, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 80.1, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_active", 19.9, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_nice", 1.3, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 0.8389, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_irq", 0.6, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_softirq", 0.11, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_steal", 0.0511, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_guest", 3.1, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_guest_nice", 0.324, 0, cputags)
mps2 := system.MockPS{}
mps2.On("CPUTimes").Return([]cpu.TimesStat{cts2}, nil)
cs.ps = &mps2
// Should have added cpu percentages too
err = cs.Gather(&acc)
require.NoError(t, err)
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 24.9, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_system", 10.9, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 157.9798, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_active", 42.0202, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_nice", 3.5, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 0.929, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_irq", 1.2, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_softirq", 0.31, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_steal", 0.2812, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_guest", 11.4, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_guest_nice", 2.524, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_user", 7.8, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_system", 2.7, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_idle", 77.8798, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_active", 22.1202, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_nice", 0, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_iowait", 0.0901, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_irq", 0.6, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_softirq", 0.2, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_steal", 0.2301, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_guest", 8.3, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_guest_nice", 2.2, 0.0005, cputags)
}
// Asserts that a given accumulator contains a measurment of type float64 with
// specific tags within a certain distance of a given expected value. Asserts a failure
// if the measurement is of the wrong type, or if no matching measurements are found
//
// Parameters:
// t *testing.T : Testing object to use
// acc testutil.Accumulator: Accumulator to examine
// measurement string : Name of the measurement to examine
// expectedValue float64 : Value to search for within the measurement
// delta float64 : Maximum acceptable distance of an accumulated value
// from the expectedValue parameter. Useful when
// floating-point arithmatic imprecision makes looking
// for an exact match impractical
// tags map[string]string : Tag set the found measurement must have. Set to nil to
// ignore the tag set.
func assertContainsTaggedFloat(
t *testing.T,
acc *testutil.Accumulator,
measurement string,
field string,
expectedValue float64,
delta float64,
tags map[string]string,
) {
var actualValue float64
for _, pt := range acc.Metrics {
if pt.Measurement == measurement {
for fieldname, value := range pt.Fields {
if fieldname == field {
if value, ok := value.(float64); ok {
actualValue = value
if (value >= expectedValue-delta) && (value <= expectedValue+delta) {
// Found the point, return without failing
return
}
} else {
assert.Fail(t, fmt.Sprintf("Measurement \"%s\" does not have type float64",
measurement))
}
}
}
}
}
msg := fmt.Sprintf(
"Could not find measurement \"%s\" with requested tags within %f of %f, Actual: %f",
measurement, delta, expectedValue, actualValue)
assert.Fail(t, msg)
}
// TestCPUCountChange tests that no errors are encountered if the number of
// CPUs increases as reported with LXC.
func TestCPUCountIncrease(t *testing.T) {
var mps system.MockPS
var mps2 system.MockPS
var acc testutil.Accumulator
var err error
cs := NewCPUStats(&mps)
mps.On("CPUTimes").Return(
[]cpu.TimesStat{
cpu.TimesStat{
CPU: "cpu0",
},
}, nil)
err = cs.Gather(&acc)
require.NoError(t, err)
mps2.On("CPUTimes").Return(
[]cpu.TimesStat{
cpu.TimesStat{
CPU: "cpu0",
},
cpu.TimesStat{
CPU: "cpu1",
},
}, nil)
cs.ps = &mps2
err = cs.Gather(&acc)
require.NoError(t, err)
}
// TestCPUTimesDecrease tests that telegraf continue to works after
// CPU times decrease, which seems to occur when Linux system is suspended.
func TestCPUTimesDecrease(t *testing.T) {
var mps system.MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator
cts := cpu.TimesStat{
CPU: "cpu0",
User: 18,
Idle: 80,
Iowait: 2,
}
cts2 := cpu.TimesStat{
CPU: "cpu0",
User: 38, // increased by 20
Idle: 40, // decreased by 40
Iowait: 1, // decreased by 1
}
cts3 := cpu.TimesStat{
CPU: "cpu0",
User: 56, // increased by 18
Idle: 120, // increased by 80
Iowait: 3, // increased by 2
}
mps.On("CPUTimes").Return([]cpu.TimesStat{cts}, nil)
cs := NewCPUStats(&mps)
cputags := map[string]string{
"cpu": "cpu0",
}
err := cs.Gather(&acc)
require.NoError(t, err)
// Computed values are checked with delta > 0 because of floating point arithmatic
// imprecision
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 18, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 80, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 2, 0, cputags)
mps2 := system.MockPS{}
mps2.On("CPUTimes").Return([]cpu.TimesStat{cts2}, nil)
cs.ps = &mps2
// CPU times decreased. An error should be raised
err = cs.Gather(&acc)
require.Error(t, err)
mps3 := system.MockPS{}
mps3.On("CPUTimes").Return([]cpu.TimesStat{cts3}, nil)
cs.ps = &mps3
err = cs.Gather(&acc)
require.NoError(t, err)
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 56, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 120, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 3, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_user", 18, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_idle", 80, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_iowait", 2, 0.0005, cputags)
}