Add configurable timeout setting to smart input (#6241)

This commit is contained in:
Marc Venturini 2019-08-14 01:24:44 +08:00 committed by Daniel Nelson
parent 23b86552fd
commit 5473872ac1
3 changed files with 54 additions and 37 deletions

View File

@ -60,6 +60,9 @@ smartctl -s on <device>
## done and all found will be included except for the ## done and all found will be included except for the
## excluded in excludes. ## excluded in excludes.
# devices = [ "/dev/ada0 -d atacam" ] # devices = [ "/dev/ada0 -d atacam" ]
## Timeout for the smartctl command to complete.
# timeout = "30s"
``` ```
### Permissions ### Permissions

View File

@ -119,6 +119,7 @@ type Smart struct {
Excludes []string Excludes []string
Devices []string Devices []string
UseSudo bool UseSudo bool
Timeout internal.Duration
} }
var sampleConfig = ` var sampleConfig = `
@ -151,8 +152,17 @@ var sampleConfig = `
## done and all found will be included except for the ## done and all found will be included except for the
## excluded in excludes. ## excluded in excludes.
# devices = [ "/dev/ada0 -d atacam" ] # devices = [ "/dev/ada0 -d atacam" ]
## Timeout for the smartctl command to complete.
# timeout = "30s"
` `
func NewSmart() *Smart {
return &Smart{
Timeout: internal.Duration{Duration: time.Second * 30},
}
}
func (m *Smart) SampleConfig() string { func (m *Smart) SampleConfig() string {
return sampleConfig return sampleConfig
} }
@ -180,17 +190,17 @@ func (m *Smart) Gather(acc telegraf.Accumulator) error {
} }
// Wrap with sudo // Wrap with sudo
var runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { var runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
cmd := exec.Command(command, args...) cmd := exec.Command(command, args...)
if sudo { if sudo {
cmd = exec.Command("sudo", append([]string{"-n", command}, args...)...) cmd = exec.Command("sudo", append([]string{"-n", command}, args...)...)
} }
return internal.CombinedOutputTimeout(cmd, time.Second*5) return internal.CombinedOutputTimeout(cmd, timeout.Duration)
} }
// Scan for S.M.A.R.T. devices // Scan for S.M.A.R.T. devices
func (m *Smart) scan() ([]string, error) { func (m *Smart) scan() ([]string, error) {
out, err := runCmd(m.UseSudo, m.Path, "--scan") out, err := runCmd(m.Timeout, m.UseSudo, m.Path, "--scan")
if err != nil { if err != nil {
return []string{}, fmt.Errorf("failed to run command '%s --scan': %s - %s", m.Path, err, string(out)) return []string{}, fmt.Errorf("failed to run command '%s --scan': %s - %s", m.Path, err, string(out))
} }
@ -226,7 +236,7 @@ func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) {
wg.Add(len(devices)) wg.Add(len(devices))
for _, device := range devices { for _, device := range devices {
go gatherDisk(acc, m.UseSudo, m.Attributes, m.Path, m.Nocheck, device, &wg) go gatherDisk(acc, m.Timeout, m.UseSudo, m.Attributes, m.Path, m.Nocheck, device, &wg)
} }
wg.Wait() wg.Wait()
@ -243,12 +253,12 @@ func exitStatus(err error) (int, error) {
return 0, err return 0, err
} }
func gatherDisk(acc telegraf.Accumulator, usesudo, collectAttributes bool, smartctl, nocheck, device string, wg *sync.WaitGroup) { func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, collectAttributes bool, smartctl, nocheck, device string, wg *sync.WaitGroup) {
defer wg.Done() defer wg.Done()
// smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n // smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n
args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nocheck, "--format=brief"} args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nocheck, "--format=brief"}
args = append(args, strings.Split(device, " ")...) args = append(args, strings.Split(device, " ")...)
out, e := runCmd(usesudo, smartctl, args...) out, e := runCmd(timeout, usesudo, smartctl, args...)
outStr := string(out) outStr := string(out)
// Ignore all exit statuses except if it is a command line parse error // Ignore all exit statuses except if it is a command line parse error
@ -436,14 +446,13 @@ func parseTemperature(fields, deviceFields map[string]interface{}, str string) e
} }
func init() { func init() {
m := Smart{} inputs.Add("smart", func() telegraf.Input {
m := NewSmart()
path, _ := exec.LookPath("smartctl") path, _ := exec.LookPath("smartctl")
if len(path) > 0 { if len(path) > 0 {
m.Path = path m.Path = path
} }
m.Nocheck = "standby" m.Nocheck = "standby"
return m
inputs.Add("smart", func() telegraf.Input {
return &m
}) })
} }

View File

@ -7,19 +7,22 @@ import (
"time" "time"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/testutil" "github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func TestGatherAttributes(t *testing.T) { func TestGatherAttributes(t *testing.T) {
s := &Smart{ s := NewSmart()
Path: "smartctl", s.Path = "smartctl"
Attributes: true, s.Attributes = true
}
assert.Equal(t, time.Second*30, s.Timeout.Duration)
var acc testutil.Accumulator var acc testutil.Accumulator
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
if len(args) > 0 { if len(args) > 0 {
if args[0] == "--scan" { if args[0] == "--scan" {
return []byte(mockScanData), nil return []byte(mockScanData), nil
@ -326,10 +329,12 @@ func TestGatherAttributes(t *testing.T) {
} }
func TestGatherNoAttributes(t *testing.T) { func TestGatherNoAttributes(t *testing.T) {
s := &Smart{ s := NewSmart()
Path: "smartctl", s.Path = "smartctl"
Attributes: false, s.Attributes = false
}
assert.Equal(t, time.Second*30, s.Timeout.Duration)
// overwriting exec commands with mock commands // overwriting exec commands with mock commands
var acc testutil.Accumulator var acc testutil.Accumulator
@ -374,7 +379,7 @@ func TestExcludedDev(t *testing.T) {
} }
func TestGatherSATAInfo(t *testing.T) { func TestGatherSATAInfo(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(hgstSATAInfoData), nil return []byte(hgstSATAInfoData), nil
} }
@ -384,13 +389,13 @@ func TestGatherSATAInfo(t *testing.T) {
) )
wg.Add(1) wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg) gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered") assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered")
} }
func TestGatherSATAInfo65(t *testing.T) { func TestGatherSATAInfo65(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(hgstSATAInfoData65), nil return []byte(hgstSATAInfoData65), nil
} }
@ -400,13 +405,13 @@ func TestGatherSATAInfo65(t *testing.T) {
) )
wg.Add(1) wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg) gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered") assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered")
} }
func TestGatherHgstSAS(t *testing.T) { func TestGatherHgstSAS(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(hgstSASInfoData), nil return []byte(hgstSASInfoData), nil
} }
@ -416,13 +421,13 @@ func TestGatherHgstSAS(t *testing.T) {
) )
wg.Add(1) wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg) gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered") assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered")
} }
func TestGatherHtSAS(t *testing.T) { func TestGatherHtSAS(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(htSASInfoData), nil return []byte(htSASInfoData), nil
} }
@ -432,13 +437,13 @@ func TestGatherHtSAS(t *testing.T) {
) )
wg.Add(1) wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg) gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 5, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, 5, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(3), acc.NMetrics(), "Wrong number of metrics gathered") assert.Equal(t, uint64(3), acc.NMetrics(), "Wrong number of metrics gathered")
} }
func TestGatherSSD(t *testing.T) { func TestGatherSSD(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(ssdInfoData), nil return []byte(ssdInfoData), nil
} }
@ -448,13 +453,13 @@ func TestGatherSSD(t *testing.T) {
) )
wg.Add(1) wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg) gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered") assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered")
} }
func TestGatherSSDRaid(t *testing.T) { func TestGatherSSDRaid(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(ssdRaidInfoData), nil return []byte(ssdRaidInfoData), nil
} }
@ -464,13 +469,13 @@ func TestGatherSSDRaid(t *testing.T) {
) )
wg.Add(1) wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg) gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered") assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered")
assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered") assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered")
} }
func TestGatherNvme(t *testing.T) { func TestGatherNvme(t *testing.T) {
runCmd = func(sudo bool, command string, args ...string) ([]byte, error) { runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) {
return []byte(nvmeInfoData), nil return []byte(nvmeInfoData), nil
} }
@ -480,7 +485,7 @@ func TestGatherNvme(t *testing.T) {
) )
wg.Add(1) wg.Add(1)
gatherDisk(acc, true, true, "", "", "", wg) gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg)
expected := []telegraf.Metric{ expected := []telegraf.Metric{
testutil.MustMetric("smart_device", testutil.MustMetric("smart_device",