Add smart input plugin for collecting S.M.A.R.T. data (#2449)
This commit is contained in:
parent
6b1d3edf6e
commit
3be58c6571
|
@ -76,6 +76,7 @@ import (
|
||||||
_ "github.com/influxdata/telegraf/plugins/inputs/riak"
|
_ "github.com/influxdata/telegraf/plugins/inputs/riak"
|
||||||
_ "github.com/influxdata/telegraf/plugins/inputs/salesforce"
|
_ "github.com/influxdata/telegraf/plugins/inputs/salesforce"
|
||||||
_ "github.com/influxdata/telegraf/plugins/inputs/sensors"
|
_ "github.com/influxdata/telegraf/plugins/inputs/sensors"
|
||||||
|
_ "github.com/influxdata/telegraf/plugins/inputs/smart"
|
||||||
_ "github.com/influxdata/telegraf/plugins/inputs/snmp"
|
_ "github.com/influxdata/telegraf/plugins/inputs/snmp"
|
||||||
_ "github.com/influxdata/telegraf/plugins/inputs/snmp_legacy"
|
_ "github.com/influxdata/telegraf/plugins/inputs/snmp_legacy"
|
||||||
_ "github.com/influxdata/telegraf/plugins/inputs/socket_listener"
|
_ "github.com/influxdata/telegraf/plugins/inputs/socket_listener"
|
||||||
|
|
|
@ -0,0 +1,135 @@
|
||||||
|
# Telegraf S.M.A.R.T. plugin
|
||||||
|
|
||||||
|
Get metrics using the command line utility `smartctl` for S.M.A.R.T. (Self-Monitoring, Analysis and Reporting Technology) storage devices. SMART is a monitoring system included in computer hard disk drives (HDDs) and solid-state drives (SSDs)[1] that detects and reports on various indicators of drive reliability, with the intent of enabling the anticipation of hardware failures.
|
||||||
|
See smartmontools (https://www.smartmontools.org/).
|
||||||
|
|
||||||
|
If no devices are specified, the plugin will scan for SMART devices via the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
smartctl --scan
|
||||||
|
```
|
||||||
|
|
||||||
|
Metrics will be reported from the following `smartctl` command:
|
||||||
|
|
||||||
|
```
|
||||||
|
smartctl --info --attributes --health -n <nocheck> --format=brief <device>
|
||||||
|
```
|
||||||
|
|
||||||
|
This plugin supports _smartmontools_ version 5.41 and above, but v. 5.41 and v. 5.42
|
||||||
|
might require setting `nocheck`, see the comment in the sample configuration.
|
||||||
|
|
||||||
|
To enable SMART on a storage device run:
|
||||||
|
|
||||||
|
```
|
||||||
|
smartctl -s on <device>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Measurements
|
||||||
|
|
||||||
|
- smart_device:
|
||||||
|
|
||||||
|
* Tags:
|
||||||
|
- `capacity`
|
||||||
|
- `device`
|
||||||
|
- `device_model`
|
||||||
|
- `enabled`
|
||||||
|
- `health`
|
||||||
|
- `serial_no`
|
||||||
|
- `wwn`
|
||||||
|
* Fields:
|
||||||
|
- `exit_status`
|
||||||
|
- `health_ok`
|
||||||
|
- `read_error_rate`
|
||||||
|
- `seek_error`
|
||||||
|
- `temp_c`
|
||||||
|
- `udma_crc_errors`
|
||||||
|
|
||||||
|
- smart_attribute:
|
||||||
|
|
||||||
|
* Tags:
|
||||||
|
- `device`
|
||||||
|
- `fail`
|
||||||
|
- `flags`
|
||||||
|
- `id`
|
||||||
|
- `name`
|
||||||
|
- `serial_no`
|
||||||
|
- `wwn`
|
||||||
|
* Fields:
|
||||||
|
- `exit_status`
|
||||||
|
- `raw_value`
|
||||||
|
- `threshold`
|
||||||
|
- `value`
|
||||||
|
- `worst`
|
||||||
|
|
||||||
|
### Flags
|
||||||
|
|
||||||
|
The interpretation of the tag `flags` is:
|
||||||
|
- *K* auto-keep
|
||||||
|
- *C* event count
|
||||||
|
- *R* error rate
|
||||||
|
- *S* speed/performance
|
||||||
|
- *O* updated online
|
||||||
|
- *P* prefailure warning
|
||||||
|
|
||||||
|
### Exit Status
|
||||||
|
|
||||||
|
The `exit_status` field captures the exit status of the smartctl command which
|
||||||
|
is defined by a bitmask. For the interpretation of the bitmask see the man page for
|
||||||
|
smartctl.
|
||||||
|
|
||||||
|
### Device Names
|
||||||
|
|
||||||
|
Device names, e.g., `/dev/sda`, are *not persistent*, and may be
|
||||||
|
subject to change across reboots or system changes. Instead, you can the
|
||||||
|
*World Wide Name* (WWN) or serial number to identify devices. On Linux block
|
||||||
|
devices can be referenced by the WWN in the following location:
|
||||||
|
`/dev/disk/by-id/`.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
```toml
|
||||||
|
# Read metrics from storage devices supporting S.M.A.R.T.
|
||||||
|
[[inputs.smart]]
|
||||||
|
## Optionally specify the path to the smartctl executable
|
||||||
|
# path = "/usr/bin/smartctl"
|
||||||
|
#
|
||||||
|
## On most platforms smartctl requires root access.
|
||||||
|
## Setting 'use_sudo' to true will make use of sudo to run smartctl.
|
||||||
|
## Sudo must be configured to to allow the telegraf user to run smartctl
|
||||||
|
## with out password.
|
||||||
|
# use_sudo = false
|
||||||
|
#
|
||||||
|
## Skip checking disks in this power mode. Defaults to
|
||||||
|
## "standby" to not wake up disks that have stoped rotating.
|
||||||
|
## See --nockeck in the man pages for smartctl.
|
||||||
|
## smartctl version 5.41 and 5.42 have faulty detection of
|
||||||
|
## power mode and might require changing this value to
|
||||||
|
## "never" depending on your storage device.
|
||||||
|
# nocheck = "standby"
|
||||||
|
#
|
||||||
|
## Gather detailed metrics for each SMART Attribute.
|
||||||
|
## Defaults to "false"
|
||||||
|
##
|
||||||
|
# attributes = false
|
||||||
|
#
|
||||||
|
## Optionally specify devices to exclude from reporting.
|
||||||
|
# excludes = [ "/dev/pass6" ]
|
||||||
|
#
|
||||||
|
## Optionally specify devices and device type, if unset
|
||||||
|
## a scan (smartctl --scan) for S.M.A.R.T. devices will
|
||||||
|
## done and all found will be included except for the
|
||||||
|
## excluded in excludes.
|
||||||
|
# devices = [ "/dev/ada0 -d atacam" ]
|
||||||
|
```
|
||||||
|
|
||||||
|
To run `smartctl` with `sudo` create a wrapper script and use `path` in
|
||||||
|
the configuration to execute that.
|
||||||
|
|
||||||
|
## Output
|
||||||
|
|
||||||
|
Example output from an _Apple SSD_:
|
||||||
|
```
|
||||||
|
> smart_attribute,serial_no=S1K5NYCD964433,wwn=5002538655584d30,id=199,name=UDMA_CRC_Error_Count,flags=-O-RC-,fail=-,host=mbpro.local,device=/dev/rdisk0 threshold=0i,raw_value=0i,exit_status=0i,value=200i,worst=200i 1502536854000000000
|
||||||
|
> smart_attribute,device=/dev/rdisk0,serial_no=S1K5NYCD964433,wwn=5002538655584d30,id=240,name=Unknown_SSD_Attribute,flags=-O---K,fail=-,host=mbpro.local exit_status=0i,value=100i,worst=100i,threshold=0i,raw_value=0i 1502536854000000000
|
||||||
|
> smart_device,enabled=Enabled,host=mbpro.local,device=/dev/rdisk0,model=APPLE\ SSD\ SM0512F,serial_no=S1K5NYCD964433,wwn=5002538655584d30,capacity=500277790720 udma_crc_errors=0i,exit_status=0i,health_ok=true,read_error_rate=0i,temp_c=40i 1502536854000000000
|
||||||
|
```
|
|
@ -0,0 +1,339 @@
|
||||||
|
package smart
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/internal"
|
||||||
|
"github.com/influxdata/telegraf/plugins/inputs"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
execCommand = exec.Command // execCommand is used to mock commands in tests.
|
||||||
|
|
||||||
|
// Device Model: APPLE SSD SM256E
|
||||||
|
modelInInfo = regexp.MustCompile("^Device Model:\\s+(.*)$")
|
||||||
|
// Serial Number: S0X5NZBC422720
|
||||||
|
serialInInfo = regexp.MustCompile("^Serial Number:\\s+(.*)$")
|
||||||
|
// LU WWN Device Id: 5 002538 655584d30
|
||||||
|
wwnInInfo = regexp.MustCompile("^LU WWN Device Id:\\s+(.*)$")
|
||||||
|
// User Capacity: 251,000,193,024 bytes [251 GB]
|
||||||
|
usercapacityInInfo = regexp.MustCompile("^User Capacity:\\s+([0-9,]+)\\s+bytes.*$")
|
||||||
|
// SMART support is: Enabled
|
||||||
|
smartEnabledInInfo = regexp.MustCompile("^SMART support is:\\s+(\\w+)$")
|
||||||
|
// SMART overall-health self-assessment test result: PASSED
|
||||||
|
// PASSED, FAILED, UNKNOWN
|
||||||
|
smartOverallHealth = regexp.MustCompile("^SMART overall-health self-assessment test result:\\s+(\\w+).*$")
|
||||||
|
|
||||||
|
// ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
|
||||||
|
// 1 Raw_Read_Error_Rate -O-RC- 200 200 000 - 0
|
||||||
|
// 5 Reallocated_Sector_Ct PO--CK 100 100 000 - 0
|
||||||
|
// 192 Power-Off_Retract_Count -O--C- 097 097 000 - 14716
|
||||||
|
attribute = regexp.MustCompile("^\\s*([0-9]+)\\s(\\S+)\\s+([-P][-O][-S][-R][-C][-K])\\s+([0-9]+)\\s+([0-9]+)\\s+([0-9]+)\\s+([-\\w]+)\\s+([\\w\\+\\.]+).*$")
|
||||||
|
|
||||||
|
deviceFieldIds = map[string]string{
|
||||||
|
"1": "read_error_rate",
|
||||||
|
"7": "seek_error_rate",
|
||||||
|
"194": "temp_c",
|
||||||
|
"199": "udma_crc_errors",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
type Smart struct {
|
||||||
|
Path string
|
||||||
|
Nocheck string
|
||||||
|
Attributes bool
|
||||||
|
Excludes []string
|
||||||
|
Devices []string
|
||||||
|
UseSudo bool
|
||||||
|
}
|
||||||
|
|
||||||
|
var sampleConfig = `
|
||||||
|
## Optionally specify the path to the smartctl executable
|
||||||
|
# path = "/usr/bin/smartctl"
|
||||||
|
#
|
||||||
|
## On most platforms smartctl requires root access.
|
||||||
|
## Setting 'use_sudo' to true will make use of sudo to run smartctl.
|
||||||
|
## Sudo must be configured to to allow the telegraf user to run smartctl
|
||||||
|
## with out password.
|
||||||
|
# use_sudo = false
|
||||||
|
#
|
||||||
|
## Skip checking disks in this power mode. Defaults to
|
||||||
|
## "standby" to not wake up disks that have stoped rotating.
|
||||||
|
## See --nocheck in the man pages for smartctl.
|
||||||
|
## smartctl version 5.41 and 5.42 have faulty detection of
|
||||||
|
## power mode and might require changing this value to
|
||||||
|
## "never" depending on your disks.
|
||||||
|
# nocheck = "standby"
|
||||||
|
#
|
||||||
|
## Gather detailed metrics for each SMART Attribute.
|
||||||
|
## Defaults to "false"
|
||||||
|
##
|
||||||
|
# attributes = false
|
||||||
|
#
|
||||||
|
## Optionally specify devices to exclude from reporting.
|
||||||
|
# excludes = [ "/dev/pass6" ]
|
||||||
|
#
|
||||||
|
## Optionally specify devices and device type, if unset
|
||||||
|
## a scan (smartctl --scan) for S.M.A.R.T. devices will
|
||||||
|
## done and all found will be included except for the
|
||||||
|
## excluded in excludes.
|
||||||
|
# devices = [ "/dev/ada0 -d atacam" ]
|
||||||
|
`
|
||||||
|
|
||||||
|
func (m *Smart) SampleConfig() string {
|
||||||
|
return sampleConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Smart) Description() string {
|
||||||
|
return "Read metrics from storage devices supporting S.M.A.R.T."
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Smart) Gather(acc telegraf.Accumulator) error {
|
||||||
|
if len(m.Path) == 0 {
|
||||||
|
return fmt.Errorf("smartctl not found: verify that smartctl is installed and that smartctl is in your PATH")
|
||||||
|
}
|
||||||
|
|
||||||
|
devices := m.Devices
|
||||||
|
if len(devices) == 0 {
|
||||||
|
var err error
|
||||||
|
devices, err = m.scan()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m.getAttributes(acc, devices)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wrap with sudo
|
||||||
|
func sudo(sudo bool, command string, args ...string) *exec.Cmd {
|
||||||
|
if sudo {
|
||||||
|
return execCommand("sudo", append([]string{"-n", command}, args...)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return execCommand(command, args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan for S.M.A.R.T. devices
|
||||||
|
func (m *Smart) scan() ([]string, error) {
|
||||||
|
|
||||||
|
cmd := sudo(m.UseSudo, m.Path, "--scan")
|
||||||
|
out, err := internal.CombinedOutputTimeout(cmd, time.Second*5)
|
||||||
|
if err != nil {
|
||||||
|
return []string{}, fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
devices := []string{}
|
||||||
|
for _, line := range strings.Split(string(out), "\n") {
|
||||||
|
dev := strings.Split(line, "#")
|
||||||
|
if len(dev) > 1 && !excludedDev(m.Excludes, strings.TrimSpace(dev[0])) {
|
||||||
|
devices = append(devices, strings.TrimSpace(dev[0]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return devices, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func excludedDev(excludes []string, deviceLine string) bool {
|
||||||
|
device := strings.Split(deviceLine, " ")
|
||||||
|
if len(device) != 0 {
|
||||||
|
for _, exclude := range excludes {
|
||||||
|
if device[0] == exclude {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get info and attributes for each S.M.A.R.T. device
|
||||||
|
func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) {
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(len(devices))
|
||||||
|
|
||||||
|
for _, device := range devices {
|
||||||
|
go gatherDisk(acc, m.UseSudo, m.Attributes, m.Path, m.Nocheck, device, &wg)
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Command line parse errors are denoted by the exit code having the 0 bit set.
|
||||||
|
// All other errors are drive/communication errors and should be ignored.
|
||||||
|
func exitStatus(err error) (int, error) {
|
||||||
|
if exiterr, ok := err.(*exec.ExitError); ok {
|
||||||
|
if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
|
||||||
|
return status.ExitStatus(), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func gatherDisk(acc telegraf.Accumulator, usesudo, attributes bool, path, nockeck, device string, wg *sync.WaitGroup) {
|
||||||
|
|
||||||
|
defer wg.Done()
|
||||||
|
// smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n
|
||||||
|
args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nockeck, "--format=brief"}
|
||||||
|
args = append(args, strings.Split(device, " ")...)
|
||||||
|
cmd := sudo(usesudo, path, args...)
|
||||||
|
out, e := internal.CombinedOutputTimeout(cmd, time.Second*5)
|
||||||
|
outStr := string(out)
|
||||||
|
|
||||||
|
// Ignore all exit statuses except if it is a command line parse error
|
||||||
|
exitStatus, er := exitStatus(e)
|
||||||
|
if er != nil {
|
||||||
|
acc.AddError(fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), e, outStr))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
device_tags := map[string]string{}
|
||||||
|
device_tags["device"] = strings.Split(device, " ")[0]
|
||||||
|
device_fields := make(map[string]interface{})
|
||||||
|
device_fields["exit_status"] = exitStatus
|
||||||
|
|
||||||
|
for _, line := range strings.Split(outStr, "\n") {
|
||||||
|
|
||||||
|
model := modelInInfo.FindStringSubmatch(line)
|
||||||
|
if len(model) > 1 {
|
||||||
|
device_tags["model"] = model[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
serial := serialInInfo.FindStringSubmatch(line)
|
||||||
|
if len(serial) > 1 {
|
||||||
|
device_tags["serial_no"] = serial[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
wwn := wwnInInfo.FindStringSubmatch(line)
|
||||||
|
if len(wwn) > 1 {
|
||||||
|
device_tags["wwn"] = strings.Replace(wwn[1], " ", "", -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
capacity := usercapacityInInfo.FindStringSubmatch(line)
|
||||||
|
if len(capacity) > 1 {
|
||||||
|
device_tags["capacity"] = strings.Replace(capacity[1], ",", "", -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
enabled := smartEnabledInInfo.FindStringSubmatch(line)
|
||||||
|
if len(enabled) > 1 {
|
||||||
|
device_tags["enabled"] = enabled[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
health := smartOverallHealth.FindStringSubmatch(line)
|
||||||
|
if len(health) > 1 {
|
||||||
|
device_fields["health_ok"] = (health[1] == "PASSED")
|
||||||
|
}
|
||||||
|
|
||||||
|
attr := attribute.FindStringSubmatch(line)
|
||||||
|
|
||||||
|
if len(attr) > 1 {
|
||||||
|
|
||||||
|
if attributes {
|
||||||
|
tags := map[string]string{}
|
||||||
|
fields := make(map[string]interface{})
|
||||||
|
|
||||||
|
tags["device"] = strings.Split(device, " ")[0]
|
||||||
|
|
||||||
|
if serial, ok := device_tags["serial_no"]; ok {
|
||||||
|
tags["serial_no"] = serial
|
||||||
|
}
|
||||||
|
if wwn, ok := device_tags["wwn"]; ok {
|
||||||
|
tags["wwn"] = wwn
|
||||||
|
}
|
||||||
|
tags["id"] = attr[1]
|
||||||
|
tags["name"] = attr[2]
|
||||||
|
tags["flags"] = attr[3]
|
||||||
|
|
||||||
|
fields["exit_status"] = exitStatus
|
||||||
|
if i, err := strconv.ParseInt(attr[4], 10, 64); err == nil {
|
||||||
|
fields["value"] = i
|
||||||
|
}
|
||||||
|
if i, err := strconv.ParseInt(attr[5], 10, 64); err == nil {
|
||||||
|
fields["worst"] = i
|
||||||
|
}
|
||||||
|
if i, err := strconv.ParseInt(attr[6], 10, 64); err == nil {
|
||||||
|
fields["threshold"] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
tags["fail"] = attr[7]
|
||||||
|
if val, err := parseRawValue(attr[8]); err == nil {
|
||||||
|
fields["raw_value"] = val
|
||||||
|
}
|
||||||
|
|
||||||
|
acc.AddFields("smart_attribute", fields, tags)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the attribute matches on the one in deviceFieldIds
|
||||||
|
// save the raw value to a field.
|
||||||
|
if field, ok := deviceFieldIds[attr[1]]; ok {
|
||||||
|
if val, err := parseRawValue(attr[8]); err == nil {
|
||||||
|
device_fields[field] = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
acc.AddFields("smart_device", device_fields, device_tags)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseRawValue(rawVal string) (int64, error) {
|
||||||
|
|
||||||
|
// Integer
|
||||||
|
if i, err := strconv.ParseInt(rawVal, 10, 64); err == nil {
|
||||||
|
return i, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Duration: 65h+33m+09.259s
|
||||||
|
unit := regexp.MustCompile("^(.*)([hms])$")
|
||||||
|
parts := strings.Split(rawVal, "+")
|
||||||
|
if len(parts) == 0 {
|
||||||
|
return 0, fmt.Errorf("Couldn't parse RAW_VALUE '%s'", rawVal)
|
||||||
|
}
|
||||||
|
|
||||||
|
duration := int64(0)
|
||||||
|
for _, part := range parts {
|
||||||
|
timePart := unit.FindStringSubmatch(part)
|
||||||
|
if len(timePart) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch timePart[2] {
|
||||||
|
case "h":
|
||||||
|
duration += parseInt(timePart[1]) * int64(3600)
|
||||||
|
case "m":
|
||||||
|
duration += parseInt(timePart[1]) * int64(60)
|
||||||
|
case "s":
|
||||||
|
// drop fractions of seconds
|
||||||
|
duration += parseInt(strings.Split(timePart[1], ".")[0])
|
||||||
|
default:
|
||||||
|
// Unknown, ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return duration, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseInt(str string) int64 {
|
||||||
|
if i, err := strconv.ParseInt(str, 10, 64); err == nil {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
m := Smart{}
|
||||||
|
path, _ := exec.LookPath("smartctl")
|
||||||
|
if len(path) > 0 {
|
||||||
|
m.Path = path
|
||||||
|
}
|
||||||
|
m.Nocheck = "standby"
|
||||||
|
|
||||||
|
inputs.Add("smart", func() telegraf.Input {
|
||||||
|
return &m
|
||||||
|
})
|
||||||
|
}
|
|
@ -0,0 +1,426 @@
|
||||||
|
package smart
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf/testutil"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
mockScanData = `/dev/ada0 -d atacam # /dev/ada0, ATA device
|
||||||
|
`
|
||||||
|
mockInfoAttributeData = `smartctl 6.5 2016-05-07 r4318 [Darwin 16.4.0 x86_64] (local build)
|
||||||
|
Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org
|
||||||
|
|
||||||
|
CHECK POWER MODE not implemented, ignoring -n option
|
||||||
|
=== START OF INFORMATION SECTION ===
|
||||||
|
Model Family: Apple SD/SM/TS...E/F SSDs
|
||||||
|
Device Model: APPLE SSD SM256E
|
||||||
|
Serial Number: S0X5NZBC422720
|
||||||
|
LU WWN Device Id: 5 002538 043584d30
|
||||||
|
Firmware Version: CXM09A1Q
|
||||||
|
User Capacity: 251,000,193,024 bytes [251 GB]
|
||||||
|
Sector Sizes: 512 bytes logical, 4096 bytes physical
|
||||||
|
Rotation Rate: Solid State Device
|
||||||
|
Device is: In smartctl database [for details use: -P show]
|
||||||
|
ATA Version is: ATA8-ACS T13/1699-D revision 4c
|
||||||
|
SATA Version is: SATA 3.0, 6.0 Gb/s (current: 6.0 Gb/s)
|
||||||
|
Local Time is: Thu Feb 9 16:48:45 2017 CET
|
||||||
|
SMART support is: Available - device has SMART capability.
|
||||||
|
SMART support is: Enabled
|
||||||
|
|
||||||
|
=== START OF READ SMART DATA SECTION ===
|
||||||
|
SMART overall-health self-assessment test result: PASSED
|
||||||
|
|
||||||
|
=== START OF READ SMART DATA SECTION ===
|
||||||
|
SMART Attributes Data Structure revision number: 1
|
||||||
|
Vendor Specific SMART Attributes with Thresholds:
|
||||||
|
ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
|
||||||
|
1 Raw_Read_Error_Rate -O-RC- 200 200 000 - 0
|
||||||
|
5 Reallocated_Sector_Ct PO--CK 100 100 000 - 0
|
||||||
|
9 Power_On_Hours -O--CK 099 099 000 - 2988
|
||||||
|
12 Power_Cycle_Count -O--CK 085 085 000 - 14879
|
||||||
|
169 Unknown_Attribute PO--C- 253 253 010 - 2044932921600
|
||||||
|
173 Wear_Leveling_Count -O--CK 185 185 100 - 957808640337
|
||||||
|
190 Airflow_Temperature_Cel -O---K 055 040 045 Past 45 (Min/Max 43/57 #2689)
|
||||||
|
192 Power-Off_Retract_Count -O--C- 097 097 000 - 14716
|
||||||
|
194 Temperature_Celsius -O---K 066 021 000 - 34 (Min/Max 14/79)
|
||||||
|
197 Current_Pending_Sector -O---K 100 100 000 - 0
|
||||||
|
199 UDMA_CRC_Error_Count -O-RC- 200 200 000 - 0
|
||||||
|
240 Head_Flying_Hours ------ 100 253 000 - 6585h+55m+23.234s
|
||||||
|
||||||_ K auto-keep
|
||||||
|
|||||__ C event count
|
||||||
|
||||___ R error rate
|
||||||
|
|||____ S speed/performance
|
||||||
|
||_____ O updated online
|
||||||
|
|______ P prefailure warning
|
||||||
|
`
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGatherAttributes(t *testing.T) {
|
||||||
|
s := &Smart{
|
||||||
|
Path: "smartctl",
|
||||||
|
Attributes: true,
|
||||||
|
}
|
||||||
|
// overwriting exec commands with mock commands
|
||||||
|
execCommand = fakeExecCommand
|
||||||
|
var acc testutil.Accumulator
|
||||||
|
|
||||||
|
err := s.Gather(&acc)
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 65, acc.NFields(), "Wrong number of fields gathered")
|
||||||
|
|
||||||
|
var testsAda0Attributes = []struct {
|
||||||
|
fields map[string]interface{}
|
||||||
|
tags map[string]string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(200),
|
||||||
|
"worst": int64(200),
|
||||||
|
"threshold": int64(0),
|
||||||
|
"raw_value": int64(0),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "1",
|
||||||
|
"name": "Raw_Read_Error_Rate",
|
||||||
|
"flags": "-O-RC-",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(100),
|
||||||
|
"worst": int64(100),
|
||||||
|
"threshold": int64(0),
|
||||||
|
"raw_value": int64(0),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "5",
|
||||||
|
"name": "Reallocated_Sector_Ct",
|
||||||
|
"flags": "PO--CK",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(99),
|
||||||
|
"worst": int64(99),
|
||||||
|
"threshold": int64(0),
|
||||||
|
"raw_value": int64(2988),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "9",
|
||||||
|
"name": "Power_On_Hours",
|
||||||
|
"flags": "-O--CK",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(85),
|
||||||
|
"worst": int64(85),
|
||||||
|
"threshold": int64(0),
|
||||||
|
"raw_value": int64(14879),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "12",
|
||||||
|
"name": "Power_Cycle_Count",
|
||||||
|
"flags": "-O--CK",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(253),
|
||||||
|
"worst": int64(253),
|
||||||
|
"threshold": int64(10),
|
||||||
|
"raw_value": int64(2044932921600),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "169",
|
||||||
|
"name": "Unknown_Attribute",
|
||||||
|
"flags": "PO--C-",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(185),
|
||||||
|
"worst": int64(185),
|
||||||
|
"threshold": int64(100),
|
||||||
|
"raw_value": int64(957808640337),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "173",
|
||||||
|
"name": "Wear_Leveling_Count",
|
||||||
|
"flags": "-O--CK",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(55),
|
||||||
|
"worst": int64(40),
|
||||||
|
"threshold": int64(45),
|
||||||
|
"raw_value": int64(45),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "190",
|
||||||
|
"name": "Airflow_Temperature_Cel",
|
||||||
|
"flags": "-O---K",
|
||||||
|
"fail": "Past",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(97),
|
||||||
|
"worst": int64(97),
|
||||||
|
"threshold": int64(0),
|
||||||
|
"raw_value": int64(14716),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "192",
|
||||||
|
"name": "Power-Off_Retract_Count",
|
||||||
|
"flags": "-O--C-",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(66),
|
||||||
|
"worst": int64(21),
|
||||||
|
"threshold": int64(0),
|
||||||
|
"raw_value": int64(34),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "194",
|
||||||
|
"name": "Temperature_Celsius",
|
||||||
|
"flags": "-O---K",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(100),
|
||||||
|
"worst": int64(100),
|
||||||
|
"threshold": int64(0),
|
||||||
|
"raw_value": int64(0),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "197",
|
||||||
|
"name": "Current_Pending_Sector",
|
||||||
|
"flags": "-O---K",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(200),
|
||||||
|
"worst": int64(200),
|
||||||
|
"threshold": int64(0),
|
||||||
|
"raw_value": int64(0),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "199",
|
||||||
|
"name": "UDMA_CRC_Error_Count",
|
||||||
|
"flags": "-O-RC-",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(100),
|
||||||
|
"worst": int64(253),
|
||||||
|
"threshold": int64(0),
|
||||||
|
"raw_value": int64(23709323),
|
||||||
|
"exit_status": int(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"id": "240",
|
||||||
|
"name": "Head_Flying_Hours",
|
||||||
|
"flags": "------",
|
||||||
|
"fail": "-",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range testsAda0Attributes {
|
||||||
|
acc.AssertContainsTaggedFields(t, "smart_attribute", test.fields, test.tags)
|
||||||
|
}
|
||||||
|
|
||||||
|
// tags = map[string]string{}
|
||||||
|
|
||||||
|
var testsAda0Device = []struct {
|
||||||
|
fields map[string]interface{}
|
||||||
|
tags map[string]string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"exit_status": int(0),
|
||||||
|
"health_ok": bool(true),
|
||||||
|
"read_error_rate": int64(0),
|
||||||
|
"temp_c": int64(34),
|
||||||
|
"udma_crc_errors": int64(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"model": "APPLE SSD SM256E",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"enabled": "Enabled",
|
||||||
|
"capacity": "251000193024",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range testsAda0Device {
|
||||||
|
acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGatherNoAttributes(t *testing.T) {
|
||||||
|
s := &Smart{
|
||||||
|
Path: "smartctl",
|
||||||
|
Attributes: false,
|
||||||
|
}
|
||||||
|
// overwriting exec commands with mock commands
|
||||||
|
execCommand = fakeExecCommand
|
||||||
|
var acc testutil.Accumulator
|
||||||
|
|
||||||
|
err := s.Gather(&acc)
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 5, acc.NFields(), "Wrong number of fields gathered")
|
||||||
|
acc.AssertDoesNotContainMeasurement(t, "smart_attribute")
|
||||||
|
|
||||||
|
// tags = map[string]string{}
|
||||||
|
|
||||||
|
var testsAda0Device = []struct {
|
||||||
|
fields map[string]interface{}
|
||||||
|
tags map[string]string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
map[string]interface{}{
|
||||||
|
"exit_status": int(0),
|
||||||
|
"health_ok": bool(true),
|
||||||
|
"read_error_rate": int64(0),
|
||||||
|
"temp_c": int64(34),
|
||||||
|
"udma_crc_errors": int64(0),
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"device": "/dev/ada0",
|
||||||
|
"model": "APPLE SSD SM256E",
|
||||||
|
"serial_no": "S0X5NZBC422720",
|
||||||
|
"wwn": "5002538043584d30",
|
||||||
|
"enabled": "Enabled",
|
||||||
|
"capacity": "251000193024",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range testsAda0Device {
|
||||||
|
acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludedDev(t *testing.T) {
|
||||||
|
assert.Equal(t, true, excludedDev([]string{"/dev/pass6"}, "/dev/pass6 -d atacam"), "Should be excluded.")
|
||||||
|
assert.Equal(t, false, excludedDev([]string{}, "/dev/pass6 -d atacam"), "Shouldn't be excluded.")
|
||||||
|
assert.Equal(t, false, excludedDev([]string{"/dev/pass6"}, "/dev/pass1 -d atacam"), "Shouldn't be excluded.")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// fackeExecCommand is a helper function that mock
|
||||||
|
// the exec.Command call (and call the test binary)
|
||||||
|
func fakeExecCommand(command string, args ...string) *exec.Cmd {
|
||||||
|
cs := []string{"-test.run=TestHelperProcess", "--", command}
|
||||||
|
cs = append(cs, args...)
|
||||||
|
cmd := exec.Command(os.Args[0], cs...)
|
||||||
|
cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"}
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHelperProcess isn't a real test. It's used to mock exec.Command
|
||||||
|
// For example, if you run:
|
||||||
|
// GO_WANT_HELPER_PROCESS=1 go test -test.run=TestHelperProcess -- --scan
|
||||||
|
// it returns below mockScanData.
|
||||||
|
func TestHelperProcess(t *testing.T) {
|
||||||
|
if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
args := os.Args
|
||||||
|
|
||||||
|
// Previous arguments are tests stuff, that looks like :
|
||||||
|
// /tmp/go-build970079519/…/_test/integration.test -test.run=TestHelperProcess --
|
||||||
|
cmd, arg1, args := args[3], args[4], args[5:]
|
||||||
|
|
||||||
|
if cmd == "smartctl" {
|
||||||
|
if arg1 == "--scan" {
|
||||||
|
fmt.Fprint(os.Stdout, mockScanData)
|
||||||
|
}
|
||||||
|
if arg1 == "--info" {
|
||||||
|
fmt.Fprint(os.Stdout, mockInfoAttributeData)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fmt.Fprint(os.Stdout, "command not found")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
Loading…
Reference in New Issue