Split multiple sensor keys in ipmi input (#4450)

This commit is contained in:
Jonathan G 2018-07-31 17:56:03 -06:00 committed by Greg
parent efe61eeb73
commit b93460dd06
4 changed files with 361 additions and 38 deletions

View File

@ -1976,6 +1976,9 @@
# ## Timeout for the ipmitool command to complete # ## Timeout for the ipmitool command to complete
# timeout = "20s" # timeout = "20s"
# ## Schema Version: (Optional, defaults to version 1)
# schemaVersion = 2
# # Gather packets and bytes counters from Linux ipsets # # Gather packets and bytes counters from Linux ipsets
# [[inputs.ipset]] # [[inputs.ipset]]

View File

@ -8,6 +8,10 @@ If no servers are specified, the plugin will query the local machine sensor stat
``` ```
ipmitool sdr ipmitool sdr
``` ```
or with the version 2 schema:
```
ipmitool sdr elist
```
When one or more servers are specified, the plugin will use the following command to collect remote host sensor stats: When one or more servers are specified, the plugin will use the following command to collect remote host sensor stats:
@ -41,19 +45,36 @@ ipmitool -I lan -H SERVER -U USERID -P PASSW0RD sdr
## Timeout for the ipmitool command to complete. Default is 20 seconds. ## Timeout for the ipmitool command to complete. Default is 20 seconds.
timeout = "20s" timeout = "20s"
## Schema Version: (Optional, defaults to version 1)
metric_version = 2
``` ```
### Measurements ### Measurements
Version 1 schema:
- ipmi_sensor: - ipmi_sensor:
- tags: - tags:
- name - name
- unit - unit
- host
- server (only when retrieving stats from remote servers) - server (only when retrieving stats from remote servers)
- fields: - fields:
- status (int) - status (int, 1=ok status_code/0=anything else)
- value (float) - value (float)
Version 2 schema:
- ipmi_sensor:
- tags:
- name
- entity_id (can help uniquify duplicate names)
- status_code (two letter code from IPMI documentation)
- status_desc (extended status description field)
- unit (only on analog values)
- host
- server (only when retrieving stats from remote)
- fields:
- value (float)
#### Permissions #### Permissions
@ -68,24 +89,36 @@ KERNEL=="ipmi*", MODE="660", GROUP="telegraf"
### Example Output ### Example Output
#### Version 1 Schema
When retrieving stats from a remote server: When retrieving stats from a remote server:
``` ```
ipmi_sensor,server=10.20.2.203,unit=degrees_c,name=ambient_temp status=1i,value=20 1458488465012559455 ipmi_sensor,server=10.20.2.203,name=uid_light value=0,status=1i 1517125513000000000
ipmi_sensor,server=10.20.2.203,unit=feet,name=altitude status=1i,value=80 1458488465012688613 ipmi_sensor,server=10.20.2.203,name=sys._health_led status=1i,value=0 1517125513000000000
ipmi_sensor,server=10.20.2.203,unit=watts,name=avg_power status=1i,value=220 1458488465012776511 ipmi_sensor,server=10.20.2.203,name=power_supply_1,unit=watts status=1i,value=110 1517125513000000000
ipmi_sensor,server=10.20.2.203,unit=volts,name=planar_3.3v status=1i,value=3.28 1458488465012861875 ipmi_sensor,server=10.20.2.203,name=power_supply_2,unit=watts status=1i,value=120 1517125513000000000
ipmi_sensor,server=10.20.2.203,unit=volts,name=planar_vbat status=1i,value=3.04 1458488465013072508 ipmi_sensor,server=10.20.2.203,name=power_supplies value=0,status=1i 1517125513000000000
ipmi_sensor,server=10.20.2.203,unit=rpm,name=fan_1a_tach status=1i,value=2610 1458488465013137932 ipmi_sensor,server=10.20.2.203,name=fan_1,unit=percent status=1i,value=43.12 1517125513000000000
ipmi_sensor,server=10.20.2.203,unit=rpm,name=fan_1b_tach status=1i,value=1775 1458488465013279896
``` ```
When retrieving stats from the local machine (no server specified): When retrieving stats from the local machine (no server specified):
``` ```
ipmi_sensor,unit=degrees_c,name=ambient_temp status=1i,value=20 1458488465012559455 ipmi_sensor,name=uid_light value=0,status=1i 1517125513000000000
ipmi_sensor,unit=feet,name=altitude status=1i,value=80 1458488465012688613 ipmi_sensor,name=sys._health_led status=1i,value=0 1517125513000000000
ipmi_sensor,unit=watts,name=avg_power status=1i,value=220 1458488465012776511 ipmi_sensor,name=power_supply_1,unit=watts status=1i,value=110 1517125513000000000
ipmi_sensor,unit=volts,name=planar_3.3v status=1i,value=3.28 1458488465012861875 ipmi_sensor,name=power_supply_2,unit=watts status=1i,value=120 1517125513000000000
ipmi_sensor,unit=volts,name=planar_vbat status=1i,value=3.04 1458488465013072508 ipmi_sensor,name=power_supplies value=0,status=1i 1517125513000000000
ipmi_sensor,unit=rpm,name=fan_1a_tach status=1i,value=2610 1458488465013137932 ipmi_sensor,name=fan_1,unit=percent status=1i,value=43.12 1517125513000000000
ipmi_sensor,unit=rpm,name=fan_1b_tach status=1i,value=1775 1458488465013279896 ```
#### Version 2 Schema
When retrieving stats from the local machine (no server specified):
```
ipmi_sensor,name=uid_light,entity_id=23.1,status_code=ok,status_desc=ok value=0 1517125474000000000
ipmi_sensor,name=sys._health_led,entity_id=23.2,status_code=ok,status_desc=ok value=0 1517125474000000000
ipmi_sensor,entity_id=10.1,name=power_supply_1,status_code=ok,status_desc=presence_detected,unit=watts value=110 1517125474000000000
ipmi_sensor,name=power_supply_2,entity_id=10.2,status_code=ok,unit=watts,status_desc=presence_detected value=125 1517125474000000000
ipmi_sensor,name=power_supplies,entity_id=10.3,status_code=ok,status_desc=fully_redundant value=0 1517125474000000000
ipmi_sensor,entity_id=7.1,name=fan_1,status_code=ok,status_desc=transition_to_running,unit=percent value=43.12 1517125474000000000
``` ```

View File

@ -1,8 +1,11 @@
package ipmi_sensor package ipmi_sensor
import ( import (
"bufio"
"bytes"
"fmt" "fmt"
"os/exec" "os/exec"
"regexp"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
@ -14,14 +17,20 @@ import (
) )
var ( var (
execCommand = exec.Command // execCommand is used to mock commands in tests. execCommand = exec.Command // execCommand is used to mock commands in tests.
re_v1_parse_line = regexp.MustCompile(`^(?P<name>[^|]*)\|(?P<description>[^|]*)\|(?P<status_code>.*)`)
re_v2_parse_line = regexp.MustCompile(`^(?P<name>[^|]*)\|[^|]+\|(?P<status_code>[^|]*)\|(?P<entity_id>[^|]*)\|(?:(?P<description>[^|]+))?`)
re_v2_parse_description = regexp.MustCompile(`^(?P<analogValue>[0-9.]+)\s(?P<analogUnit>.*)|(?P<status>.+)|^$`)
re_v2_parse_unit = regexp.MustCompile(`^(?P<realAnalogUnit>[^,]+)(?:,\s*(?P<statusDesc>.*))?`)
) )
// Ipmi stores the configuration values for the ipmi_sensor input plugin
type Ipmi struct { type Ipmi struct {
Path string Path string
Privilege string Privilege string
Servers []string Servers []string
Timeout internal.Duration Timeout internal.Duration
MetricVersion int
} }
var sampleConfig = ` var sampleConfig = `
@ -46,16 +55,22 @@ var sampleConfig = `
## Timeout for the ipmitool command to complete ## Timeout for the ipmitool command to complete
timeout = "20s" timeout = "20s"
## Schema Version: (Optional, defaults to version 1)
metric_version = 2
` `
// SampleConfig returns the documentation about the sample configuration
func (m *Ipmi) SampleConfig() string { func (m *Ipmi) SampleConfig() string {
return sampleConfig return sampleConfig
} }
// Description returns a basic description for the plugin functions
func (m *Ipmi) Description() string { func (m *Ipmi) Description() string {
return "Read metrics from the bare metal servers via IPMI" return "Read metrics from the bare metal servers via IPMI"
} }
// Gather is the main execution function for the plugin
func (m *Ipmi) Gather(acc telegraf.Accumulator) error { func (m *Ipmi) Gather(acc telegraf.Accumulator) error {
if len(m.Path) == 0 { if len(m.Path) == 0 {
return fmt.Errorf("ipmitool not found: verify that ipmitool is installed and that ipmitool is in your PATH") return fmt.Errorf("ipmitool not found: verify that ipmitool is installed and that ipmitool is in your PATH")
@ -93,23 +108,33 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
opts = conn.options() opts = conn.options()
} }
opts = append(opts, "sdr") opts = append(opts, "sdr")
if m.MetricVersion == 2 {
opts = append(opts, "elist")
}
cmd := execCommand(m.Path, opts...) cmd := execCommand(m.Path, opts...)
out, err := internal.CombinedOutputTimeout(cmd, m.Timeout.Duration) out, err := internal.CombinedOutputTimeout(cmd, m.Timeout.Duration)
timestamp := time.Now()
if err != nil { if err != nil {
return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out)) return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out))
} }
if m.MetricVersion == 2 {
return parseV2(acc, hostname, out, timestamp)
}
return parseV1(acc, hostname, out, timestamp)
}
func parseV1(acc telegraf.Accumulator, hostname string, cmdOut []byte, measured_at time.Time) error {
// each line will look something like // each line will look something like
// Planar VBAT | 3.05 Volts | ok // Planar VBAT | 3.05 Volts | ok
lines := strings.Split(string(out), "\n") scanner := bufio.NewScanner(bytes.NewReader(cmdOut))
for i := 0; i < len(lines); i++ { for scanner.Scan() {
vals := strings.Split(lines[i], "|") ipmiFields := extractFieldsFromRegex(re_v1_parse_line, scanner.Text())
if len(vals) != 3 { if len(ipmiFields) != 3 {
continue continue
} }
tags := map[string]string{ tags := map[string]string{
"name": transform(vals[0]), "name": transform(ipmiFields["name"]),
} }
// tag the server is we have one // tag the server is we have one
@ -118,18 +143,20 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
} }
fields := make(map[string]interface{}) fields := make(map[string]interface{})
if strings.EqualFold("ok", trim(vals[2])) { if strings.EqualFold("ok", trim(ipmiFields["status_code"])) {
fields["status"] = 1 fields["status"] = 1
} else { } else {
fields["status"] = 0 fields["status"] = 0
} }
val1 := trim(vals[1]) if strings.Index(ipmiFields["description"], " ") > 0 {
if strings.Index(val1, " ") > 0 {
// split middle column into value and unit // split middle column into value and unit
valunit := strings.SplitN(val1, " ", 2) valunit := strings.SplitN(ipmiFields["description"], " ", 2)
fields["value"] = Atofloat(valunit[0]) var err error
fields["value"], err = aToFloat(valunit[0])
if err != nil {
continue
}
if len(valunit) > 1 { if len(valunit) > 1 {
tags["unit"] = transform(valunit[1]) tags["unit"] = transform(valunit[1])
} }
@ -137,19 +164,85 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
fields["value"] = 0.0 fields["value"] = 0.0
} }
acc.AddFields("ipmi_sensor", fields, tags, time.Now()) acc.AddFields("ipmi_sensor", fields, tags, measured_at)
} }
return nil return scanner.Err()
} }
func Atofloat(val string) float64 { func parseV2(acc telegraf.Accumulator, hostname string, cmdOut []byte, measured_at time.Time) error {
// each line will look something like
// CMOS Battery | 65h | ok | 7.1 |
// Temp | 0Eh | ok | 3.1 | 55 degrees C
// Drive 0 | A0h | ok | 7.1 | Drive Present
scanner := bufio.NewScanner(bytes.NewReader(cmdOut))
for scanner.Scan() {
ipmiFields := extractFieldsFromRegex(re_v2_parse_line, scanner.Text())
if len(ipmiFields) < 3 || len(ipmiFields) > 4 {
continue
}
tags := map[string]string{
"name": transform(ipmiFields["name"]),
}
// tag the server is we have one
if hostname != "" {
tags["server"] = hostname
}
tags["entity_id"] = transform(ipmiFields["entity_id"])
tags["status_code"] = trim(ipmiFields["status_code"])
fields := make(map[string]interface{})
descriptionResults := extractFieldsFromRegex(re_v2_parse_description, trim(ipmiFields["description"]))
// This is an analog value with a unit
if descriptionResults["analogValue"] != "" && len(descriptionResults["analogUnit"]) >= 1 {
var err error
fields["value"], err = aToFloat(descriptionResults["analogValue"])
if err != nil {
continue
}
// Some implementations add an extra status to their analog units
unitResults := extractFieldsFromRegex(re_v2_parse_unit, descriptionResults["analogUnit"])
tags["unit"] = transform(unitResults["realAnalogUnit"])
if unitResults["statusDesc"] != "" {
tags["status_desc"] = transform(unitResults["statusDesc"])
}
} else {
// This is a status value
fields["value"] = 0.0
// Extended status descriptions aren't required, in which case for consistency re-use the status code
if descriptionResults["status"] != "" {
tags["status_desc"] = transform(descriptionResults["status"])
} else {
tags["status_desc"] = transform(ipmiFields["status_code"])
}
}
acc.AddFields("ipmi_sensor", fields, tags, measured_at)
}
return scanner.Err()
}
// extractFieldsFromRegex consumes a regex with named capture groups and returns a kvp map of strings with the results
func extractFieldsFromRegex(re *regexp.Regexp, input string) map[string]string {
submatches := re.FindStringSubmatch(input)
results := make(map[string]string)
for i, name := range re.SubexpNames() {
if name != input && name != "" && input != "" {
results[name] = trim(submatches[i])
}
}
return results
}
// aToFloat converts string representations of numbers to float64 values
func aToFloat(val string) (float64, error) {
f, err := strconv.ParseFloat(val, 64) f, err := strconv.ParseFloat(val, 64)
if err != nil { if err != nil {
return 0.0 return 0.0, err
} else {
return f
} }
return f, nil
} }
func trim(s string) string { func trim(s string) string {

View File

@ -28,7 +28,7 @@ func TestGather(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, acc.NFields(), 266, "non-numeric measurements should be ignored") assert.Equal(t, acc.NFields(), 262, "non-numeric measurements should be ignored")
conn := NewConnection(i.Servers[0], i.Privilege) conn := NewConnection(i.Servers[0], i.Privilege)
assert.Equal(t, "USERID", conn.Username) assert.Equal(t, "USERID", conn.Username)
@ -127,6 +127,7 @@ func TestGather(t *testing.T) {
} }
err = acc.GatherError(i.Gather) err = acc.GatherError(i.Gather)
require.NoError(t, err)
var testsWithoutServer = []struct { var testsWithoutServer = []struct {
fields map[string]interface{} fields map[string]interface{}
@ -378,3 +379,196 @@ OS RealTime Mod | 0x00 | ok
} }
os.Exit(0) os.Exit(0)
} }
func TestGatherV2(t *testing.T) {
i := &Ipmi{
Servers: []string{"USERID:PASSW0RD@lan(192.168.1.1)"},
Path: "ipmitool",
Privilege: "USER",
Timeout: internal.Duration{Duration: time.Second * 5},
MetricVersion: 2,
}
// overwriting exec commands with mock commands
execCommand = fakeExecCommandV2
var acc testutil.Accumulator
err := acc.GatherError(i.Gather)
require.NoError(t, err)
conn := NewConnection(i.Servers[0], i.Privilege)
assert.Equal(t, "USERID", conn.Username)
assert.Equal(t, "lan", conn.Interface)
var testsWithServer = []struct {
fields map[string]interface{}
tags map[string]string
}{
//SEL | 72h | ns | 7.1 | No Reading
{
map[string]interface{}{
"value": float64(0),
},
map[string]string{
"name": "sel",
"entity_id": "7.1",
"status_code": "ns",
"status_desc": "no_reading",
"server": "192.168.1.1",
},
},
}
for _, test := range testsWithServer {
acc.AssertContainsTaggedFields(t, "ipmi_sensor", test.fields, test.tags)
}
i = &Ipmi{
Path: "ipmitool",
Timeout: internal.Duration{Duration: time.Second * 5},
MetricVersion: 2,
}
err = acc.GatherError(i.Gather)
require.NoError(t, err)
var testsWithoutServer = []struct {
fields map[string]interface{}
tags map[string]string
}{
//SEL | 72h | ns | 7.1 | No Reading
{
map[string]interface{}{
"value": float64(0),
},
map[string]string{
"name": "sel",
"entity_id": "7.1",
"status_code": "ns",
"status_desc": "no_reading",
},
},
//Intrusion | 73h | ok | 7.1 |
{
map[string]interface{}{
"value": float64(0),
},
map[string]string{
"name": "intrusion",
"entity_id": "7.1",
"status_code": "ok",
"status_desc": "ok",
},
},
//Fan1 | 30h | ok | 7.1 | 5040 RPM
{
map[string]interface{}{
"value": float64(5040),
},
map[string]string{
"name": "fan1",
"entity_id": "7.1",
"status_code": "ok",
"unit": "rpm",
},
},
//Inlet Temp | 04h | ok | 7.1 | 25 degrees C
{
map[string]interface{}{
"value": float64(25),
},
map[string]string{
"name": "inlet_temp",
"entity_id": "7.1",
"status_code": "ok",
"unit": "degrees_c",
},
},
//USB Cable Pres | 50h | ok | 7.1 | Connected
{
map[string]interface{}{
"value": float64(0),
},
map[string]string{
"name": "usb_cable_pres",
"entity_id": "7.1",
"status_code": "ok",
"status_desc": "connected",
},
},
//Current 1 | 6Ah | ok | 10.1 | 7.20 Amps
{
map[string]interface{}{
"value": float64(7.2),
},
map[string]string{
"name": "current_1",
"entity_id": "10.1",
"status_code": "ok",
"unit": "amps",
},
},
//Power Supply 1 | 03h | ok | 10.1 | 110 Watts, Presence detected
{
map[string]interface{}{
"value": float64(110),
},
map[string]string{
"name": "power_supply_1",
"entity_id": "10.1",
"status_code": "ok",
"unit": "watts",
"status_desc": "presence_detected",
},
},
}
for _, test := range testsWithoutServer {
acc.AssertContainsTaggedFields(t, "ipmi_sensor", test.fields, test.tags)
}
}
// fackeExecCommandV2 is a helper function that mock
// the exec.Command call (and call the test binary)
func fakeExecCommandV2(command string, args ...string) *exec.Cmd {
cs := []string{"-test.run=TestHelperProcessV2", "--", command}
cs = append(cs, args...)
cmd := exec.Command(os.Args[0], cs...)
cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"}
return cmd
}
// TestHelperProcessV2 isn't a real test. It's used to mock exec.Command
// For example, if you run:
// GO_WANT_HELPER_PROCESS=1 go test -test.run=TestHelperProcessV2 -- chrony tracking
// it returns below mockData.
func TestHelperProcessV2(t *testing.T) {
if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
return
}
// Curated list of use cases instead of full dumps
mockData := `SEL | 72h | ns | 7.1 | No Reading
Intrusion | 73h | ok | 7.1 |
Fan1 | 30h | ok | 7.1 | 5040 RPM
Inlet Temp | 04h | ok | 7.1 | 25 degrees C
USB Cable Pres | 50h | ok | 7.1 | Connected
Current 1 | 6Ah | ok | 10.1 | 7.20 Amps
Power Supply 1 | 03h | ok | 10.1 | 110 Watts, Presence detected
`
args := os.Args
// Previous arguments are tests stuff, that looks like :
// /tmp/go-build970079519/…/_test/integration.test -test.run=TestHelperProcess --
cmd, args := args[3], args[4:]
if cmd == "ipmitool" {
fmt.Fprint(os.Stdout, mockData)
} else {
fmt.Fprint(os.Stdout, "command not found")
os.Exit(1)
}
os.Exit(0)
}