Moved system package inputs out to top level (#4406)
This commit is contained in:
committed by
Daniel Nelson
parent
9a14d1f074
commit
7b73b0db3a
99
plugins/inputs/cpu/README.md
Normal file
99
plugins/inputs/cpu/README.md
Normal file
@@ -0,0 +1,99 @@
|
||||
# Telegraf plugin: CPU
|
||||
|
||||
#### Plugin arguments:
|
||||
- **totalcpu** boolean: If true, include `cpu-total` data
|
||||
- **percpu** boolean: If true, include data on a per-cpu basis `cpu0, cpu1, etc.`
|
||||
|
||||
|
||||
##### Configuration:
|
||||
```
|
||||
[[inputs.cpu]]
|
||||
## Whether to report per-cpu stats or not
|
||||
percpu = true
|
||||
## Whether to report total system cpu stats or not
|
||||
totalcpu = true
|
||||
## If true, collect raw CPU time metrics.
|
||||
collect_cpu_time = false
|
||||
## If true, compute and report the sum of all non-idle CPU states.
|
||||
report_active = false
|
||||
```
|
||||
|
||||
#### Description
|
||||
|
||||
The CPU plugin collects standard CPU metrics as defined in `man proc`. All
|
||||
architectures do not support all of these metrics.
|
||||
|
||||
```
|
||||
cpu 3357 0 4313 1362393
|
||||
The amount of time, measured in units of USER_HZ (1/100ths of a second on
|
||||
most architectures, use sysconf(_SC_CLK_TCK) to obtain the right value),
|
||||
that the system spent in various states:
|
||||
|
||||
user (1) Time spent in user mode.
|
||||
|
||||
nice (2) Time spent in user mode with low priority (nice).
|
||||
|
||||
system (3) Time spent in system mode.
|
||||
|
||||
idle (4) Time spent in the idle task. This value should be USER_HZ times
|
||||
the second entry in the /proc/uptime pseudo-file.
|
||||
|
||||
iowait (since Linux 2.5.41)
|
||||
(5) Time waiting for I/O to complete.
|
||||
|
||||
irq (since Linux 2.6.0-test4)
|
||||
(6) Time servicing interrupts.
|
||||
|
||||
softirq (since Linux 2.6.0-test4)
|
||||
(7) Time servicing softirqs.
|
||||
|
||||
steal (since Linux 2.6.11)
|
||||
(8) Stolen time, which is the time spent in other operating systems
|
||||
when running in a virtualized environment
|
||||
|
||||
guest (since Linux 2.6.24)
|
||||
(9) Time spent running a virtual CPU for guest operating systems
|
||||
under the control of the Linux kernel.
|
||||
|
||||
guest_nice (since Linux 2.6.33)
|
||||
(10) Time spent running a niced guest (virtual CPU for guest operating systems under the control of the Linux kernel).
|
||||
```
|
||||
|
||||
# Measurements:
|
||||
### CPU Time measurements:
|
||||
|
||||
Meta:
|
||||
- units: CPU Time
|
||||
- tags: `cpu=<cpuN> or <cpu-total>`
|
||||
|
||||
Measurement names:
|
||||
- cpu_time_user
|
||||
- cpu_time_system
|
||||
- cpu_time_idle
|
||||
- cpu_time_active (must be explicitly enabled by setting `report_active = true`)
|
||||
- cpu_time_nice
|
||||
- cpu_time_iowait
|
||||
- cpu_time_irq
|
||||
- cpu_time_softirq
|
||||
- cpu_time_steal
|
||||
- cpu_time_guest
|
||||
- cpu_time_guest_nice
|
||||
|
||||
### CPU Usage Percent Measurements:
|
||||
|
||||
Meta:
|
||||
- units: percent (out of 100)
|
||||
- tags: `cpu=<cpuN> or <cpu-total>`
|
||||
|
||||
Measurement names:
|
||||
- cpu_usage_user
|
||||
- cpu_usage_system
|
||||
- cpu_usage_idle
|
||||
- cpu_usage_active (must be explicitly enabled by setting `report_active = true`)
|
||||
- cpu_usage_nice
|
||||
- cpu_usage_iowait
|
||||
- cpu_usage_irq
|
||||
- cpu_usage_softirq
|
||||
- cpu_usage_steal
|
||||
- cpu_usage_guest
|
||||
- cpu_usage_guest_nice
|
||||
153
plugins/inputs/cpu/cpu.go
Normal file
153
plugins/inputs/cpu/cpu.go
Normal file
@@ -0,0 +1,153 @@
|
||||
package cpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
"github.com/influxdata/telegraf/plugins/inputs/system"
|
||||
"github.com/shirou/gopsutil/cpu"
|
||||
)
|
||||
|
||||
type CPUStats struct {
|
||||
ps system.PS
|
||||
lastStats map[string]cpu.TimesStat
|
||||
|
||||
PerCPU bool `toml:"percpu"`
|
||||
TotalCPU bool `toml:"totalcpu"`
|
||||
CollectCPUTime bool `toml:"collect_cpu_time"`
|
||||
ReportActive bool `toml:"report_active"`
|
||||
}
|
||||
|
||||
func NewCPUStats(ps system.PS) *CPUStats {
|
||||
return &CPUStats{
|
||||
ps: ps,
|
||||
CollectCPUTime: true,
|
||||
ReportActive: true,
|
||||
}
|
||||
}
|
||||
|
||||
func (_ *CPUStats) Description() string {
|
||||
return "Read metrics about cpu usage"
|
||||
}
|
||||
|
||||
var sampleConfig = `
|
||||
## Whether to report per-cpu stats or not
|
||||
percpu = true
|
||||
## Whether to report total system cpu stats or not
|
||||
totalcpu = true
|
||||
## If true, collect raw CPU time metrics.
|
||||
collect_cpu_time = false
|
||||
## If true, compute and report the sum of all non-idle CPU states.
|
||||
report_active = false
|
||||
`
|
||||
|
||||
func (_ *CPUStats) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (s *CPUStats) Gather(acc telegraf.Accumulator) error {
|
||||
times, err := s.ps.CPUTimes(s.PerCPU, s.TotalCPU)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting CPU info: %s", err)
|
||||
}
|
||||
now := time.Now()
|
||||
|
||||
for _, cts := range times {
|
||||
tags := map[string]string{
|
||||
"cpu": cts.CPU,
|
||||
}
|
||||
|
||||
total := totalCpuTime(cts)
|
||||
active := activeCpuTime(cts)
|
||||
|
||||
if s.CollectCPUTime {
|
||||
// Add cpu time metrics
|
||||
fieldsC := map[string]interface{}{
|
||||
"time_user": cts.User,
|
||||
"time_system": cts.System,
|
||||
"time_idle": cts.Idle,
|
||||
"time_nice": cts.Nice,
|
||||
"time_iowait": cts.Iowait,
|
||||
"time_irq": cts.Irq,
|
||||
"time_softirq": cts.Softirq,
|
||||
"time_steal": cts.Steal,
|
||||
"time_guest": cts.Guest,
|
||||
"time_guest_nice": cts.GuestNice,
|
||||
}
|
||||
if s.ReportActive {
|
||||
fieldsC["time_active"] = activeCpuTime(cts)
|
||||
}
|
||||
acc.AddCounter("cpu", fieldsC, tags, now)
|
||||
}
|
||||
|
||||
// Add in percentage
|
||||
if len(s.lastStats) == 0 {
|
||||
// If it's the 1st gather, can't get CPU Usage stats yet
|
||||
continue
|
||||
}
|
||||
|
||||
lastCts, ok := s.lastStats[cts.CPU]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
lastTotal := totalCpuTime(lastCts)
|
||||
lastActive := activeCpuTime(lastCts)
|
||||
totalDelta := total - lastTotal
|
||||
|
||||
if totalDelta < 0 {
|
||||
err = fmt.Errorf("Error: current total CPU time is less than previous total CPU time")
|
||||
break
|
||||
}
|
||||
|
||||
if totalDelta == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
fieldsG := map[string]interface{}{
|
||||
"usage_user": 100 * (cts.User - lastCts.User - (cts.Guest - lastCts.Guest)) / totalDelta,
|
||||
"usage_system": 100 * (cts.System - lastCts.System) / totalDelta,
|
||||
"usage_idle": 100 * (cts.Idle - lastCts.Idle) / totalDelta,
|
||||
"usage_nice": 100 * (cts.Nice - lastCts.Nice - (cts.GuestNice - lastCts.GuestNice)) / totalDelta,
|
||||
"usage_iowait": 100 * (cts.Iowait - lastCts.Iowait) / totalDelta,
|
||||
"usage_irq": 100 * (cts.Irq - lastCts.Irq) / totalDelta,
|
||||
"usage_softirq": 100 * (cts.Softirq - lastCts.Softirq) / totalDelta,
|
||||
"usage_steal": 100 * (cts.Steal - lastCts.Steal) / totalDelta,
|
||||
"usage_guest": 100 * (cts.Guest - lastCts.Guest) / totalDelta,
|
||||
"usage_guest_nice": 100 * (cts.GuestNice - lastCts.GuestNice) / totalDelta,
|
||||
}
|
||||
if s.ReportActive {
|
||||
fieldsG["usage_active"] = 100 * (active - lastActive) / totalDelta
|
||||
}
|
||||
acc.AddGauge("cpu", fieldsG, tags, now)
|
||||
}
|
||||
|
||||
s.lastStats = make(map[string]cpu.TimesStat)
|
||||
for _, cts := range times {
|
||||
s.lastStats[cts.CPU] = cts
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func totalCpuTime(t cpu.TimesStat) float64 {
|
||||
total := t.User + t.System + t.Nice + t.Iowait + t.Irq + t.Softirq + t.Steal +
|
||||
t.Idle
|
||||
return total
|
||||
}
|
||||
|
||||
func activeCpuTime(t cpu.TimesStat) float64 {
|
||||
active := totalCpuTime(t) - t.Idle
|
||||
return active
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("cpu", func() telegraf.Input {
|
||||
return &CPUStats{
|
||||
PerCPU: true,
|
||||
TotalCPU: true,
|
||||
ps: system.NewSystemPS(),
|
||||
}
|
||||
})
|
||||
}
|
||||
256
plugins/inputs/cpu/cpu_test.go
Normal file
256
plugins/inputs/cpu/cpu_test.go
Normal file
@@ -0,0 +1,256 @@
|
||||
package cpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/influxdata/telegraf/plugins/inputs/system"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
"github.com/shirou/gopsutil/cpu"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCPUStats(t *testing.T) {
|
||||
var mps system.MockPS
|
||||
defer mps.AssertExpectations(t)
|
||||
var acc testutil.Accumulator
|
||||
|
||||
cts := cpu.TimesStat{
|
||||
CPU: "cpu0",
|
||||
User: 8.8,
|
||||
System: 8.2,
|
||||
Idle: 80.1,
|
||||
Nice: 1.3,
|
||||
Iowait: 0.8389,
|
||||
Irq: 0.6,
|
||||
Softirq: 0.11,
|
||||
Steal: 0.0511,
|
||||
Guest: 3.1,
|
||||
GuestNice: 0.324,
|
||||
}
|
||||
|
||||
cts2 := cpu.TimesStat{
|
||||
CPU: "cpu0",
|
||||
User: 24.9, // increased by 16.1
|
||||
System: 10.9, // increased by 2.7
|
||||
Idle: 157.9798, // increased by 77.8798 (for total increase of 100)
|
||||
Nice: 3.5, // increased by 2.2
|
||||
Iowait: 0.929, // increased by 0.0901
|
||||
Irq: 1.2, // increased by 0.6
|
||||
Softirq: 0.31, // increased by 0.2
|
||||
Steal: 0.2812, // increased by 0.2301
|
||||
Guest: 11.4, // increased by 8.3
|
||||
GuestNice: 2.524, // increased by 2.2
|
||||
}
|
||||
|
||||
mps.On("CPUTimes").Return([]cpu.TimesStat{cts}, nil)
|
||||
|
||||
cs := NewCPUStats(&mps)
|
||||
|
||||
cputags := map[string]string{
|
||||
"cpu": "cpu0",
|
||||
}
|
||||
|
||||
err := cs.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Computed values are checked with delta > 0 because of floating point arithmatic
|
||||
// imprecision
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 8.8, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_system", 8.2, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 80.1, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_active", 19.9, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_nice", 1.3, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 0.8389, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_irq", 0.6, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_softirq", 0.11, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_steal", 0.0511, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_guest", 3.1, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_guest_nice", 0.324, 0, cputags)
|
||||
|
||||
mps2 := system.MockPS{}
|
||||
mps2.On("CPUTimes").Return([]cpu.TimesStat{cts2}, nil)
|
||||
cs.ps = &mps2
|
||||
|
||||
// Should have added cpu percentages too
|
||||
err = cs.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 24.9, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_system", 10.9, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 157.9798, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_active", 42.0202, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_nice", 3.5, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 0.929, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_irq", 1.2, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_softirq", 0.31, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_steal", 0.2812, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_guest", 11.4, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_guest_nice", 2.524, 0, cputags)
|
||||
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_user", 7.8, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_system", 2.7, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_idle", 77.8798, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_active", 22.1202, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_nice", 0, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_iowait", 0.0901, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_irq", 0.6, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_softirq", 0.2, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_steal", 0.2301, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_guest", 8.3, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_guest_nice", 2.2, 0.0005, cputags)
|
||||
}
|
||||
|
||||
// Asserts that a given accumulator contains a measurment of type float64 with
|
||||
// specific tags within a certain distance of a given expected value. Asserts a failure
|
||||
// if the measurement is of the wrong type, or if no matching measurements are found
|
||||
//
|
||||
// Parameters:
|
||||
// t *testing.T : Testing object to use
|
||||
// acc testutil.Accumulator: Accumulator to examine
|
||||
// measurement string : Name of the measurement to examine
|
||||
// expectedValue float64 : Value to search for within the measurement
|
||||
// delta float64 : Maximum acceptable distance of an accumulated value
|
||||
// from the expectedValue parameter. Useful when
|
||||
// floating-point arithmatic imprecision makes looking
|
||||
// for an exact match impractical
|
||||
// tags map[string]string : Tag set the found measurement must have. Set to nil to
|
||||
// ignore the tag set.
|
||||
func assertContainsTaggedFloat(
|
||||
t *testing.T,
|
||||
acc *testutil.Accumulator,
|
||||
measurement string,
|
||||
field string,
|
||||
expectedValue float64,
|
||||
delta float64,
|
||||
tags map[string]string,
|
||||
) {
|
||||
var actualValue float64
|
||||
for _, pt := range acc.Metrics {
|
||||
if pt.Measurement == measurement {
|
||||
for fieldname, value := range pt.Fields {
|
||||
if fieldname == field {
|
||||
if value, ok := value.(float64); ok {
|
||||
actualValue = value
|
||||
if (value >= expectedValue-delta) && (value <= expectedValue+delta) {
|
||||
// Found the point, return without failing
|
||||
return
|
||||
}
|
||||
} else {
|
||||
assert.Fail(t, fmt.Sprintf("Measurement \"%s\" does not have type float64",
|
||||
measurement))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
msg := fmt.Sprintf(
|
||||
"Could not find measurement \"%s\" with requested tags within %f of %f, Actual: %f",
|
||||
measurement, delta, expectedValue, actualValue)
|
||||
assert.Fail(t, msg)
|
||||
}
|
||||
|
||||
// TestCPUCountChange tests that no errors are encountered if the number of
|
||||
// CPUs increases as reported with LXC.
|
||||
func TestCPUCountIncrease(t *testing.T) {
|
||||
var mps system.MockPS
|
||||
var mps2 system.MockPS
|
||||
var acc testutil.Accumulator
|
||||
var err error
|
||||
|
||||
cs := NewCPUStats(&mps)
|
||||
|
||||
mps.On("CPUTimes").Return(
|
||||
[]cpu.TimesStat{
|
||||
cpu.TimesStat{
|
||||
CPU: "cpu0",
|
||||
},
|
||||
}, nil)
|
||||
|
||||
err = cs.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
mps2.On("CPUTimes").Return(
|
||||
[]cpu.TimesStat{
|
||||
cpu.TimesStat{
|
||||
CPU: "cpu0",
|
||||
},
|
||||
cpu.TimesStat{
|
||||
CPU: "cpu1",
|
||||
},
|
||||
}, nil)
|
||||
cs.ps = &mps2
|
||||
|
||||
err = cs.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// TestCPUTimesDecrease tests that telegraf continue to works after
|
||||
// CPU times decrease, which seems to occur when Linux system is suspended.
|
||||
func TestCPUTimesDecrease(t *testing.T) {
|
||||
var mps system.MockPS
|
||||
defer mps.AssertExpectations(t)
|
||||
var acc testutil.Accumulator
|
||||
|
||||
cts := cpu.TimesStat{
|
||||
CPU: "cpu0",
|
||||
User: 18,
|
||||
Idle: 80,
|
||||
Iowait: 2,
|
||||
}
|
||||
|
||||
cts2 := cpu.TimesStat{
|
||||
CPU: "cpu0",
|
||||
User: 38, // increased by 20
|
||||
Idle: 40, // decreased by 40
|
||||
Iowait: 1, // decreased by 1
|
||||
}
|
||||
|
||||
cts3 := cpu.TimesStat{
|
||||
CPU: "cpu0",
|
||||
User: 56, // increased by 18
|
||||
Idle: 120, // increased by 80
|
||||
Iowait: 3, // increased by 2
|
||||
}
|
||||
|
||||
mps.On("CPUTimes").Return([]cpu.TimesStat{cts}, nil)
|
||||
|
||||
cs := NewCPUStats(&mps)
|
||||
|
||||
cputags := map[string]string{
|
||||
"cpu": "cpu0",
|
||||
}
|
||||
|
||||
err := cs.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Computed values are checked with delta > 0 because of floating point arithmatic
|
||||
// imprecision
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 18, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 80, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 2, 0, cputags)
|
||||
|
||||
mps2 := system.MockPS{}
|
||||
mps2.On("CPUTimes").Return([]cpu.TimesStat{cts2}, nil)
|
||||
cs.ps = &mps2
|
||||
|
||||
// CPU times decreased. An error should be raised
|
||||
err = cs.Gather(&acc)
|
||||
require.Error(t, err)
|
||||
|
||||
mps3 := system.MockPS{}
|
||||
mps3.On("CPUTimes").Return([]cpu.TimesStat{cts3}, nil)
|
||||
cs.ps = &mps3
|
||||
|
||||
err = cs.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 56, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 120, 0, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 3, 0, cputags)
|
||||
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_user", 18, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_idle", 80, 0.0005, cputags)
|
||||
assertContainsTaggedFloat(t, &acc, "cpu", "usage_iowait", 2, 0.0005, cputags)
|
||||
}
|
||||
Reference in New Issue
Block a user