Add extra attributes for NVMe devices to smart input (#6079)
This commit is contained in:
parent
e5158107c1
commit
43c16aa898
|
@ -36,18 +36,8 @@ var (
|
||||||
// PASSED, FAILED, UNKNOWN
|
// PASSED, FAILED, UNKNOWN
|
||||||
smartOverallHealth = regexp.MustCompile("^(SMART overall-health self-assessment test result|SMART Health Status):\\s+(\\w+).*$")
|
smartOverallHealth = regexp.MustCompile("^(SMART overall-health self-assessment test result|SMART Health Status):\\s+(\\w+).*$")
|
||||||
|
|
||||||
// Accumulated start-stop cycles: 7
|
// sasNvmeAttr is a SAS or NVME SMART attribute
|
||||||
sasStartStopAttr = regexp.MustCompile("^Accumulated start-stop cycles:\\s+(.*)$")
|
sasNvmeAttr = regexp.MustCompile(`^([^:]+):\s+(.+)$`)
|
||||||
// Accumulated load-unload cycles: 39
|
|
||||||
sasLoadCycleAttr = regexp.MustCompile("^Accumulated load-unload cycles:\\s+(.*)$")
|
|
||||||
// Current Drive Temperature: 34 C
|
|
||||||
sasTempAttr = regexp.MustCompile("^Current Drive Temperature:\\s+(.*)\\s+C(.*)$")
|
|
||||||
// Temperature: 38 Celsius
|
|
||||||
nvmeTempAttr = regexp.MustCompile("^Temperature:\\s+(.*)\\s+(.*)$")
|
|
||||||
// Power Cycles: 472
|
|
||||||
nvmePowerCycleAttr = regexp.MustCompile("^Power Cycles:\\s+(.*)$")
|
|
||||||
// Power On Hours: 6,038
|
|
||||||
nvmePowerOnAttr = regexp.MustCompile("^Power On Hours:\\s+(.*)$")
|
|
||||||
|
|
||||||
// ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
|
// ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
|
||||||
// 1 Raw_Read_Error_Rate -O-RC- 200 200 000 - 0
|
// 1 Raw_Read_Error_Rate -O-RC- 200 200 000 - 0
|
||||||
|
@ -62,6 +52,64 @@ var (
|
||||||
"194": "temp_c",
|
"194": "temp_c",
|
||||||
"199": "udma_crc_errors",
|
"199": "udma_crc_errors",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sasNvmeAttributes = map[string]struct {
|
||||||
|
ID string
|
||||||
|
Name string
|
||||||
|
Parse func(fields, deviceFields map[string]interface{}, str string) error
|
||||||
|
}{
|
||||||
|
"Accumulated start-stop cycles": {
|
||||||
|
ID: "4",
|
||||||
|
Name: "Start_Stop_Count",
|
||||||
|
},
|
||||||
|
"Accumulated load-unload cycles": {
|
||||||
|
ID: "193",
|
||||||
|
Name: "Load_Cycle_Count",
|
||||||
|
},
|
||||||
|
"Current Drive Temperature": {
|
||||||
|
ID: "194",
|
||||||
|
Name: "Temperature_Celsius",
|
||||||
|
Parse: parseTemperature,
|
||||||
|
},
|
||||||
|
"Temperature": {
|
||||||
|
ID: "194",
|
||||||
|
Name: "Temperature_Celsius",
|
||||||
|
Parse: parseTemperature,
|
||||||
|
},
|
||||||
|
"Power Cycles": {
|
||||||
|
ID: "12",
|
||||||
|
Name: "Power_Cycle_Count",
|
||||||
|
},
|
||||||
|
"Power On Hours": {
|
||||||
|
ID: "9",
|
||||||
|
Name: "Power_On_Hours",
|
||||||
|
},
|
||||||
|
"Media and Data Integrity Errors": {
|
||||||
|
Name: "Media_and_Data_Integrity_Errors",
|
||||||
|
},
|
||||||
|
"Error Information Log Entries": {
|
||||||
|
Name: "Error_Information_Log_Entries",
|
||||||
|
},
|
||||||
|
"Critical Warning": {
|
||||||
|
Name: "Critical_Warning",
|
||||||
|
Parse: func(fields, _ map[string]interface{}, str string) error {
|
||||||
|
var value int64
|
||||||
|
if _, err := fmt.Sscanf(str, "0x%x", &value); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fields["raw_value"] = value
|
||||||
|
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"Available Spare": {
|
||||||
|
Name: "Available_Spare",
|
||||||
|
Parse: func(fields, deviceFields map[string]interface{}, str string) error {
|
||||||
|
return parseCommaSeperatedInt(fields, deviceFields, strings.TrimSuffix(str, "%"))
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
type Smart struct {
|
type Smart struct {
|
||||||
|
@ -300,82 +348,24 @@ func gatherDisk(acc telegraf.Accumulator, usesudo, collectAttributes bool, smart
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if collectAttributes {
|
if collectAttributes {
|
||||||
if startStop := sasStartStopAttr.FindStringSubmatch(line); len(startStop) > 1 {
|
if matches := sasNvmeAttr.FindStringSubmatch(line); len(matches) > 2 {
|
||||||
tags["id"] = "4"
|
if attr, ok := sasNvmeAttributes[matches[1]]; ok {
|
||||||
tags["name"] = "Start_Stop_Count"
|
tags["name"] = attr.Name
|
||||||
i, err := strconv.ParseInt(strings.Replace(startStop[1], ",", "", -1), 10, 64)
|
if attr.ID != "" {
|
||||||
if err != nil {
|
tags["id"] = attr.ID
|
||||||
continue
|
|
||||||
}
|
|
||||||
fields["raw_value"] = i
|
|
||||||
|
|
||||||
acc.AddFields("smart_attribute", fields, tags)
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if powerCycle := nvmePowerCycleAttr.FindStringSubmatch(line); len(powerCycle) > 1 {
|
parse := parseCommaSeperatedInt
|
||||||
tags["id"] = "12"
|
if attr.Parse != nil {
|
||||||
tags["name"] = "Power_Cycle_Count"
|
parse = attr.Parse
|
||||||
i, err := strconv.ParseInt(strings.Replace(powerCycle[1], ",", "", -1), 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
fields["raw_value"] = i
|
|
||||||
|
|
||||||
acc.AddFields("smart_attribute", fields, tags)
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if powerOn := nvmePowerOnAttr.FindStringSubmatch(line); len(powerOn) > 1 {
|
if err := parse(fields, deviceFields, matches[2]); err != nil {
|
||||||
tags["id"] = "9"
|
|
||||||
tags["name"] = "Power_On_Hours"
|
|
||||||
i, err := strconv.ParseInt(strings.Replace(powerOn[1], ",", "", -1), 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
fields["raw_value"] = i
|
|
||||||
|
|
||||||
acc.AddFields("smart_attribute", fields, tags)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if loadCycle := sasLoadCycleAttr.FindStringSubmatch(line); len(loadCycle) > 1 {
|
|
||||||
tags["id"] = "193"
|
|
||||||
tags["name"] = "Load_Cycle_Count"
|
|
||||||
i, err := strconv.ParseInt(strings.Replace(loadCycle[1], ",", "", -1), 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
fields["raw_value"] = i
|
|
||||||
|
|
||||||
acc.AddFields("smart_attribute", fields, tags)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if temp := sasTempAttr.FindStringSubmatch(line); len(temp) > 1 {
|
|
||||||
tags["id"] = "194"
|
|
||||||
tags["name"] = "Temperature_Celsius"
|
|
||||||
tempC, err := strconv.ParseInt(temp[1], 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
fields["raw_value"] = tempC
|
|
||||||
deviceFields["temp_c"] = tempC
|
|
||||||
|
|
||||||
acc.AddFields("smart_attribute", fields, tags)
|
acc.AddFields("smart_attribute", fields, tags)
|
||||||
}
|
}
|
||||||
|
|
||||||
if temp := nvmeTempAttr.FindStringSubmatch(line); len(temp) > 1 {
|
|
||||||
tags["id"] = "194"
|
|
||||||
tags["name"] = "Temperature_Celsius"
|
|
||||||
tempC, err := strconv.ParseInt(temp[1], 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
fields["raw_value"] = tempC
|
|
||||||
deviceFields["temp_c"] = tempC
|
|
||||||
|
|
||||||
acc.AddFields("smart_attribute", fields, tags)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -424,6 +414,29 @@ func parseInt(str string) int64 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseCommaSeperatedInt(fields, _ map[string]interface{}, str string) error {
|
||||||
|
i, err := strconv.ParseInt(strings.Replace(str, ",", "", -1), 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fields["raw_value"] = i
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseTemperature(fields, deviceFields map[string]interface{}, str string) error {
|
||||||
|
var temp int64
|
||||||
|
if _, err := fmt.Sscanf(str, "%d C", &temp); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fields["raw_value"] = temp
|
||||||
|
deviceFields["temp_c"] = temp
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
m := Smart{}
|
m := Smart{}
|
||||||
path, _ := exec.LookPath("smartctl")
|
path, _ := exec.LookPath("smartctl")
|
||||||
|
|
|
@ -484,6 +484,39 @@ func TestGatherNvme(t *testing.T) {
|
||||||
},
|
},
|
||||||
time.Now(),
|
time.Now(),
|
||||||
),
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": ".",
|
||||||
|
"name": "Media_and_Data_Integrity_Errors",
|
||||||
|
"serial_no": "D704940282?",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 0,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": ".",
|
||||||
|
"name": "Error_Information_Log_Entries",
|
||||||
|
"serial_no": "D704940282?",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 119699,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": ".",
|
||||||
|
"name": "Available_Spare",
|
||||||
|
"serial_no": "D704940282?",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 100,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
testutil.MustMetric("smart_attribute",
|
testutil.MustMetric("smart_attribute",
|
||||||
map[string]string{
|
map[string]string{
|
||||||
"device": ".",
|
"device": ".",
|
||||||
|
@ -496,6 +529,17 @@ func TestGatherNvme(t *testing.T) {
|
||||||
},
|
},
|
||||||
time.Now(),
|
time.Now(),
|
||||||
),
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": ".",
|
||||||
|
"name": "Critical_Warning",
|
||||||
|
"serial_no": "D704940282?",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": int64(9),
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(),
|
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(),
|
||||||
|
@ -934,7 +978,7 @@ Local Time is: Fri Jun 15 11:41:35 2018 UTC
|
||||||
SMART overall-health self-assessment test result: PASSED
|
SMART overall-health self-assessment test result: PASSED
|
||||||
|
|
||||||
SMART/Health Information (NVMe Log 0x02, NSID 0xffffffff)
|
SMART/Health Information (NVMe Log 0x02, NSID 0xffffffff)
|
||||||
Critical Warning: 0x00
|
Critical Warning: 0x09
|
||||||
Temperature: 38 Celsius
|
Temperature: 38 Celsius
|
||||||
Available Spare: 100%
|
Available Spare: 100%
|
||||||
Available Spare Threshold: 10%
|
Available Spare Threshold: 10%
|
||||||
|
|
Loading…
Reference in New Issue