Update smart input plugin to support more drive types (#5765)
This commit is contained in:
@@ -3,6 +3,7 @@ package smart
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"path"
|
||||
"regexp"
|
||||
@@ -18,31 +19,46 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
execCommand = exec.Command // execCommand is used to mock commands in tests.
|
||||
|
||||
// Device Model: APPLE SSD SM256E
|
||||
modelInInfo = regexp.MustCompile("^Device Model:\\s+(.*)$")
|
||||
// Product: HUH721212AL5204
|
||||
// Model Number: TS128GMTE850
|
||||
modelInfo = regexp.MustCompile("^(Device Model|Product|Model Number):\\s+(.*)$")
|
||||
// Serial Number: S0X5NZBC422720
|
||||
serialInInfo = regexp.MustCompile("^Serial Number:\\s+(.*)$")
|
||||
serialInfo = regexp.MustCompile("^Serial Number:\\s+(.*)$")
|
||||
// LU WWN Device Id: 5 002538 655584d30
|
||||
wwnInInfo = regexp.MustCompile("^LU WWN Device Id:\\s+(.*)$")
|
||||
wwnInfo = regexp.MustCompile("^LU WWN Device Id:\\s+(.*)$")
|
||||
// User Capacity: 251,000,193,024 bytes [251 GB]
|
||||
usercapacityInInfo = regexp.MustCompile("^User Capacity:\\s+([0-9,]+)\\s+bytes.*$")
|
||||
usercapacityInfo = regexp.MustCompile("^User Capacity:\\s+([0-9,]+)\\s+bytes.*$")
|
||||
// SMART support is: Enabled
|
||||
smartEnabledInInfo = regexp.MustCompile("^SMART support is:\\s+(\\w+)$")
|
||||
smartEnabledInfo = regexp.MustCompile("^SMART support is:\\s+(\\w+)$")
|
||||
// SMART overall-health self-assessment test result: PASSED
|
||||
// SMART Health Status: OK
|
||||
// PASSED, FAILED, UNKNOWN
|
||||
smartOverallHealth = regexp.MustCompile("^SMART overall-health self-assessment test result:\\s+(\\w+).*$")
|
||||
smartOverallHealth = regexp.MustCompile("^(SMART overall-health self-assessment test result|SMART Health Status):\\s+(\\w+).*$")
|
||||
|
||||
// Accumulated start-stop cycles: 7
|
||||
sasStartStopAttr = regexp.MustCompile("^Accumulated start-stop cycles:\\s+(.*)$")
|
||||
// Accumulated load-unload cycles: 39
|
||||
sasLoadCycleAttr = regexp.MustCompile("^Accumulated load-unload cycles:\\s+(.*)$")
|
||||
// Current Drive Temperature: 34 C
|
||||
sasTempAttr = regexp.MustCompile("^Current Drive Temperature:\\s+(.*)\\s+C(.*)$")
|
||||
// Temperature: 38 Celsius
|
||||
nvmeTempAttr = regexp.MustCompile("^Temperature:\\s+(.*)\\s+(.*)$")
|
||||
// Power Cycles: 472
|
||||
nvmePowerCycleAttr = regexp.MustCompile("^Power Cycles:\\s+(.*)$")
|
||||
// Power On Hours: 6,038
|
||||
nvmePowerOnAttr = regexp.MustCompile("^Power On Hours:\\s+(.*)$")
|
||||
|
||||
// ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
|
||||
// 1 Raw_Read_Error_Rate -O-RC- 200 200 000 - 0
|
||||
// 5 Reallocated_Sector_Ct PO--CK 100 100 000 - 0
|
||||
// 192 Power-Off_Retract_Count -O--C- 097 097 000 - 14716
|
||||
attribute = regexp.MustCompile("^\\s*([0-9]+)\\s(\\S+)\\s+([-P][-O][-S][-R][-C][-K])\\s+([0-9]+)\\s+([0-9]+)\\s+([0-9]+)\\s+([-\\w]+)\\s+([\\w\\+\\.]+).*$")
|
||||
attribute = regexp.MustCompile("^\\s*([0-9]+)\\s(\\S+)\\s+([-P][-O][-S][-R][-C][-K])\\s+([0-9]+)\\s+([0-9]+)\\s+([0-9-]+)\\s+([-\\w]+)\\s+([\\w\\+\\.]+).*$")
|
||||
|
||||
deviceFieldIds = map[string]string{
|
||||
"1": "read_error_rate",
|
||||
"7": "seek_error_rate",
|
||||
"190": "temp_c",
|
||||
"194": "temp_c",
|
||||
"199": "udma_crc_errors",
|
||||
}
|
||||
@@ -60,13 +76,13 @@ type Smart struct {
|
||||
var sampleConfig = `
|
||||
## Optionally specify the path to the smartctl executable
|
||||
# path = "/usr/bin/smartctl"
|
||||
#
|
||||
|
||||
## On most platforms smartctl requires root access.
|
||||
## Setting 'use_sudo' to true will make use of sudo to run smartctl.
|
||||
## Sudo must be configured to to allow the telegraf user to run smartctl
|
||||
## with out password.
|
||||
## without a password.
|
||||
# use_sudo = false
|
||||
#
|
||||
|
||||
## Skip checking disks in this power mode. Defaults to
|
||||
## "standby" to not wake up disks that have stoped rotating.
|
||||
## See --nocheck in the man pages for smartctl.
|
||||
@@ -74,15 +90,13 @@ var sampleConfig = `
|
||||
## power mode and might require changing this value to
|
||||
## "never" depending on your disks.
|
||||
# nocheck = "standby"
|
||||
#
|
||||
|
||||
## Gather detailed metrics for each SMART Attribute.
|
||||
## Defaults to "false"
|
||||
##
|
||||
# attributes = false
|
||||
#
|
||||
|
||||
## Optionally specify devices to exclude from reporting.
|
||||
# excludes = [ "/dev/pass6" ]
|
||||
#
|
||||
|
||||
## Optionally specify devices and device type, if unset
|
||||
## a scan (smartctl --scan) for S.M.A.R.T. devices will
|
||||
## done and all found will be included except for the
|
||||
@@ -111,34 +125,36 @@ func (m *Smart) Gather(acc telegraf.Accumulator) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
log.Printf("D! [inputs.smart] devices: %+#v", devices)
|
||||
|
||||
m.getAttributes(acc, devices)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Wrap with sudo
|
||||
func sudo(sudo bool, command string, args ...string) *exec.Cmd {
|
||||
var runCmd = func(sudo bool, command string, args ...string) ([]byte, error) {
|
||||
cmd := exec.Command(command, args...)
|
||||
if sudo {
|
||||
return execCommand("sudo", append([]string{"-n", command}, args...)...)
|
||||
cmd = exec.Command("sudo", append([]string{"-n", command}, args...)...)
|
||||
}
|
||||
|
||||
return execCommand(command, args...)
|
||||
return internal.CombinedOutputTimeout(cmd, time.Second*5)
|
||||
}
|
||||
|
||||
// Scan for S.M.A.R.T. devices
|
||||
func (m *Smart) scan() ([]string, error) {
|
||||
|
||||
cmd := sudo(m.UseSudo, m.Path, "--scan")
|
||||
out, err := internal.CombinedOutputTimeout(cmd, time.Second*5)
|
||||
out, err := runCmd(m.UseSudo, m.Path, "--scan")
|
||||
if err != nil {
|
||||
return []string{}, fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out))
|
||||
return []string{}, fmt.Errorf("failed to run command '%s --scan': %s - %s", m.Path, err, string(out))
|
||||
}
|
||||
|
||||
devices := []string{}
|
||||
for _, line := range strings.Split(string(out), "\n") {
|
||||
dev := strings.Split(line, " ")
|
||||
if len(dev) > 1 && !excludedDev(m.Excludes, strings.TrimSpace(dev[0])) {
|
||||
log.Printf("D! [inputs.smart] adding device: %+#v", dev)
|
||||
devices = append(devices, strings.TrimSpace(dev[0]))
|
||||
} else {
|
||||
log.Printf("D! [inputs.smart] skipping device: %+#v", dev)
|
||||
}
|
||||
}
|
||||
return devices, nil
|
||||
@@ -158,7 +174,6 @@ func excludedDev(excludes []string, deviceLine string) bool {
|
||||
|
||||
// Get info and attributes for each S.M.A.R.T. device
|
||||
func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) {
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(devices))
|
||||
|
||||
@@ -180,79 +195,77 @@ func exitStatus(err error) (int, error) {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
func gatherDisk(acc telegraf.Accumulator, usesudo, attributes bool, smartctl, nockeck, device string, wg *sync.WaitGroup) {
|
||||
|
||||
func gatherDisk(acc telegraf.Accumulator, usesudo, collectAttributes bool, smartctl, nocheck, device string, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
// smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n
|
||||
args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nockeck, "--format=brief"}
|
||||
args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nocheck, "--format=brief"}
|
||||
args = append(args, strings.Split(device, " ")...)
|
||||
cmd := sudo(usesudo, smartctl, args...)
|
||||
out, e := internal.CombinedOutputTimeout(cmd, time.Second*5)
|
||||
out, e := runCmd(usesudo, smartctl, args...)
|
||||
outStr := string(out)
|
||||
|
||||
// Ignore all exit statuses except if it is a command line parse error
|
||||
exitStatus, er := exitStatus(e)
|
||||
if er != nil {
|
||||
acc.AddError(fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), e, outStr))
|
||||
acc.AddError(fmt.Errorf("failed to run command '%s %s': %s - %s", smartctl, strings.Join(args, " "), e, outStr))
|
||||
return
|
||||
}
|
||||
|
||||
device_tags := map[string]string{}
|
||||
device_node := strings.Split(device, " ")[0]
|
||||
device_tags["device"] = path.Base(device_node)
|
||||
device_fields := make(map[string]interface{})
|
||||
device_fields["exit_status"] = exitStatus
|
||||
deviceTags := map[string]string{}
|
||||
deviceNode := strings.Split(device, " ")[0]
|
||||
deviceTags["device"] = path.Base(deviceNode)
|
||||
deviceFields := make(map[string]interface{})
|
||||
deviceFields["exit_status"] = exitStatus
|
||||
|
||||
log.Printf("D! [inputs.smart] gatherDisk '%s'", deviceNode)
|
||||
|
||||
scanner := bufio.NewScanner(strings.NewReader(outStr))
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
model := modelInInfo.FindStringSubmatch(line)
|
||||
if len(model) > 1 {
|
||||
device_tags["model"] = model[1]
|
||||
model := modelInfo.FindStringSubmatch(line)
|
||||
if len(model) > 2 {
|
||||
deviceTags["model"] = model[2]
|
||||
}
|
||||
|
||||
serial := serialInInfo.FindStringSubmatch(line)
|
||||
serial := serialInfo.FindStringSubmatch(line)
|
||||
if len(serial) > 1 {
|
||||
device_tags["serial_no"] = serial[1]
|
||||
deviceTags["serial_no"] = serial[1]
|
||||
}
|
||||
|
||||
wwn := wwnInInfo.FindStringSubmatch(line)
|
||||
wwn := wwnInfo.FindStringSubmatch(line)
|
||||
if len(wwn) > 1 {
|
||||
device_tags["wwn"] = strings.Replace(wwn[1], " ", "", -1)
|
||||
deviceTags["wwn"] = strings.Replace(wwn[1], " ", "", -1)
|
||||
}
|
||||
|
||||
capacity := usercapacityInInfo.FindStringSubmatch(line)
|
||||
capacity := usercapacityInfo.FindStringSubmatch(line)
|
||||
if len(capacity) > 1 {
|
||||
device_tags["capacity"] = strings.Replace(capacity[1], ",", "", -1)
|
||||
deviceTags["capacity"] = strings.Replace(capacity[1], ",", "", -1)
|
||||
}
|
||||
|
||||
enabled := smartEnabledInInfo.FindStringSubmatch(line)
|
||||
enabled := smartEnabledInfo.FindStringSubmatch(line)
|
||||
if len(enabled) > 1 {
|
||||
device_tags["enabled"] = enabled[1]
|
||||
deviceTags["enabled"] = enabled[1]
|
||||
}
|
||||
|
||||
health := smartOverallHealth.FindStringSubmatch(line)
|
||||
if len(health) > 1 {
|
||||
device_fields["health_ok"] = (health[1] == "PASSED")
|
||||
if len(health) > 2 {
|
||||
deviceFields["health_ok"] = (health[2] == "PASSED" || health[2] == "OK")
|
||||
}
|
||||
|
||||
tags := map[string]string{}
|
||||
fields := make(map[string]interface{})
|
||||
|
||||
attr := attribute.FindStringSubmatch(line)
|
||||
|
||||
if len(attr) > 1 {
|
||||
if collectAttributes {
|
||||
deviceNode := strings.Split(device, " ")[0]
|
||||
tags["device"] = path.Base(deviceNode)
|
||||
|
||||
if attributes {
|
||||
tags := map[string]string{}
|
||||
fields := make(map[string]interface{})
|
||||
|
||||
device_node := strings.Split(device, " ")[0]
|
||||
tags["device"] = path.Base(device_node)
|
||||
|
||||
if serial, ok := device_tags["serial_no"]; ok {
|
||||
if serial, ok := deviceTags["serial_no"]; ok {
|
||||
tags["serial_no"] = serial
|
||||
}
|
||||
if wwn, ok := device_tags["wwn"]; ok {
|
||||
if wwn, ok := deviceTags["wwn"]; ok {
|
||||
tags["wwn"] = wwn
|
||||
}
|
||||
tags["id"] = attr[1]
|
||||
@@ -282,16 +295,95 @@ func gatherDisk(acc telegraf.Accumulator, usesudo, attributes bool, smartctl, no
|
||||
// save the raw value to a field.
|
||||
if field, ok := deviceFieldIds[attr[1]]; ok {
|
||||
if val, err := parseRawValue(attr[8]); err == nil {
|
||||
device_fields[field] = val
|
||||
deviceFields[field] = val
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if collectAttributes {
|
||||
if startStop := sasStartStopAttr.FindStringSubmatch(line); len(startStop) > 1 {
|
||||
tags["id"] = "4"
|
||||
tags["name"] = "Start_Stop_Count"
|
||||
i, err := strconv.ParseInt(strings.Replace(startStop[1], ",", "", -1), 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
fields["raw_value"] = i
|
||||
|
||||
acc.AddFields("smart_attribute", fields, tags)
|
||||
continue
|
||||
}
|
||||
|
||||
if powerCycle := nvmePowerCycleAttr.FindStringSubmatch(line); len(powerCycle) > 1 {
|
||||
tags["id"] = "12"
|
||||
tags["name"] = "Power_Cycle_Count"
|
||||
i, err := strconv.ParseInt(strings.Replace(powerCycle[1], ",", "", -1), 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
fields["raw_value"] = i
|
||||
|
||||
acc.AddFields("smart_attribute", fields, tags)
|
||||
continue
|
||||
}
|
||||
|
||||
if powerOn := nvmePowerOnAttr.FindStringSubmatch(line); len(powerOn) > 1 {
|
||||
tags["id"] = "9"
|
||||
tags["name"] = "Power_On_Hours"
|
||||
i, err := strconv.ParseInt(strings.Replace(powerOn[1], ",", "", -1), 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
fields["raw_value"] = i
|
||||
|
||||
acc.AddFields("smart_attribute", fields, tags)
|
||||
continue
|
||||
}
|
||||
|
||||
if loadCycle := sasLoadCycleAttr.FindStringSubmatch(line); len(loadCycle) > 1 {
|
||||
tags["id"] = "193"
|
||||
tags["name"] = "Load_Cycle_Count"
|
||||
i, err := strconv.ParseInt(strings.Replace(loadCycle[1], ",", "", -1), 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
fields["raw_value"] = i
|
||||
|
||||
acc.AddFields("smart_attribute", fields, tags)
|
||||
continue
|
||||
}
|
||||
|
||||
if temp := sasTempAttr.FindStringSubmatch(line); len(temp) > 1 {
|
||||
tags["id"] = "194"
|
||||
tags["name"] = "Temperature_Celsius"
|
||||
tempC, err := strconv.ParseInt(temp[1], 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
fields["raw_value"] = tempC
|
||||
deviceFields["temp_c"] = tempC
|
||||
|
||||
acc.AddFields("smart_attribute", fields, tags)
|
||||
}
|
||||
|
||||
if temp := nvmeTempAttr.FindStringSubmatch(line); len(temp) > 1 {
|
||||
tags["id"] = "194"
|
||||
tags["name"] = "Temperature_Celsius"
|
||||
tempC, err := strconv.ParseInt(temp[1], 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
fields["raw_value"] = tempC
|
||||
deviceFields["temp_c"] = tempC
|
||||
|
||||
acc.AddFields("smart_attribute", fields, tags)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
acc.AddFields("smart_device", device_fields, device_tags)
|
||||
acc.AddFields("smart_device", deviceFields, deviceTags)
|
||||
}
|
||||
|
||||
func parseRawValue(rawVal string) (int64, error) {
|
||||
|
||||
// Integer
|
||||
if i, err := strconv.ParseInt(rawVal, 10, 64); err == nil {
|
||||
return i, nil
|
||||
|
||||
Reference in New Issue
Block a user