Fix CPU system plugin gets stuck after system suspend (#3342)

This commit is contained in:
Pierre Fersing 2017-10-16 23:25:00 +02:00 committed by Daniel Nelson
parent d739aa448d
commit 7efce41d39
3 changed files with 73 additions and 2 deletions

View File

@ -50,6 +50,7 @@
- [#3136](https://github.com/influxdata/telegraf/issues/3136): Fix webhooks input address in use during reload. - [#3136](https://github.com/influxdata/telegraf/issues/3136): Fix webhooks input address in use during reload.
- [#3258](https://github.com/influxdata/telegraf/issues/3258): Unlock Statsd when stopping to prevent deadlock. - [#3258](https://github.com/influxdata/telegraf/issues/3258): Unlock Statsd when stopping to prevent deadlock.
- [#3319](https://github.com/influxdata/telegraf/issues/3319): Fix cloudwatch output requires unneeded permissions. - [#3319](https://github.com/influxdata/telegraf/issues/3319): Fix cloudwatch output requires unneeded permissions.
- [#3342](https://github.com/influxdata/telegraf/pull/3342): Fix CPU input plugin stuck after suspend on Linux.
## v1.4.3 [unreleased] ## v1.4.3 [unreleased]

View File

@ -96,7 +96,8 @@ func (s *CPUStats) Gather(acc telegraf.Accumulator) error {
totalDelta := total - lastTotal totalDelta := total - lastTotal
if totalDelta < 0 { if totalDelta < 0 {
return fmt.Errorf("Error: current total CPU time is less than previous total CPU time") err = fmt.Errorf("Error: current total CPU time is less than previous total CPU time")
break
} }
if totalDelta == 0 { if totalDelta == 0 {
@ -126,7 +127,7 @@ func (s *CPUStats) Gather(acc telegraf.Accumulator) error {
s.lastStats[cts.CPU] = cts s.lastStats[cts.CPU] = cts
} }
return nil return err
} }
func totalCpuTime(t cpu.TimesStat) float64 { func totalCpuTime(t cpu.TimesStat) float64 {

View File

@ -184,3 +184,72 @@ func TestCPUCountIncrease(t *testing.T) {
err = cs.Gather(&acc) err = cs.Gather(&acc)
require.NoError(t, err) require.NoError(t, err)
} }
// TestCPUTimesDecrease tests that telegraf continue to works after
// CPU times decrease, which seems to occur when Linux system is suspended.
func TestCPUTimesDecrease(t *testing.T) {
var mps MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator
cts := cpu.TimesStat{
CPU: "cpu0",
User: 18,
Idle: 80,
Iowait: 2,
}
cts2 := cpu.TimesStat{
CPU: "cpu0",
User: 38, // increased by 20
Idle: 40, // decreased by 40
Iowait: 1, // decreased by 1
}
cts3 := cpu.TimesStat{
CPU: "cpu0",
User: 56, // increased by 18
Idle: 120, // increased by 80
Iowait: 3, // increased by 2
}
mps.On("CPUTimes").Return([]cpu.TimesStat{cts}, nil)
cs := NewCPUStats(&mps)
cputags := map[string]string{
"cpu": "cpu0",
}
err := cs.Gather(&acc)
require.NoError(t, err)
// Computed values are checked with delta > 0 becasue of floating point arithmatic
// imprecision
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 18, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 80, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 2, 0, cputags)
mps2 := MockPS{}
mps2.On("CPUTimes").Return([]cpu.TimesStat{cts2}, nil)
cs.ps = &mps2
// CPU times decreased. An error should be raised
err = cs.Gather(&acc)
require.Error(t, err)
mps3 := MockPS{}
mps3.On("CPUTimes").Return([]cpu.TimesStat{cts3}, nil)
cs.ps = &mps3
err = cs.Gather(&acc)
require.NoError(t, err)
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 56, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 120, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 3, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_user", 18, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_idle", 80, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_iowait", 2, 0.0005, cputags)
}