From 6839e5573cabb355beedea74eab4864b4e1aeef1 Mon Sep 17 00:00:00 2001 From: Benjamin Schweizer <234864+benschweizer@users.noreply.github.com> Date: Tue, 3 Dec 2019 01:05:50 +0100 Subject: [PATCH] Add new "systemd_units" input plugin (#4532) --- CHANGELOG.md | 1 + plugins/inputs/all/all.go | 1 + plugins/inputs/systemd_units/README.md | 140 +++++++++++ .../systemd_units/systemd_units_linux.go | 221 ++++++++++++++++++ .../systemd_units/systemd_units_linux_test.go | 100 ++++++++ .../systemd_units/systemd_units_notlinux.go | 3 + 6 files changed, 466 insertions(+) create mode 100644 plugins/inputs/systemd_units/README.md create mode 100644 plugins/inputs/systemd_units/systemd_units_linux.go create mode 100644 plugins/inputs/systemd_units/systemd_units_linux_test.go create mode 100644 plugins/inputs/systemd_units/systemd_units_notlinux.go diff --git a/CHANGELOG.md b/CHANGELOG.md index c5b7750c8..00cb89e4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - [snmp_trap](/plugins/inputs/snmp_trap/README.md) - Contributed by @influxdata - [suricata](/plugins/inputs/suricata/README.md) - Contributed by @satta - [synproxy](/plugins/inputs/synproxy/README.md) - Contributed by @rfrenayworldstream +- [systemd_units](/plugins/inputs/systemd_units/README.md) - Contributed by @benschweizer #### New Processors diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index ca0aa4a32..3ce9823f6 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -148,6 +148,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/syslog" _ "github.com/influxdata/telegraf/plugins/inputs/sysstat" _ "github.com/influxdata/telegraf/plugins/inputs/system" + _ "github.com/influxdata/telegraf/plugins/inputs/systemd_units" _ "github.com/influxdata/telegraf/plugins/inputs/tail" _ "github.com/influxdata/telegraf/plugins/inputs/tcp_listener" _ "github.com/influxdata/telegraf/plugins/inputs/teamspeak" diff --git a/plugins/inputs/systemd_units/README.md b/plugins/inputs/systemd_units/README.md new file mode 100644 index 000000000..c9d4a85da --- /dev/null +++ b/plugins/inputs/systemd_units/README.md @@ -0,0 +1,140 @@ +# Systemd Units Plugin + +The systemd_units plugin gathers systemd unit status on Linux. It relies on +`systemctl list-units --all --type=service` to collect data on service status. + +The results are tagged with the unit name and provide enumerated fields for +loaded, active and running fields, indicating the unit health. + +This plugin is related to the [win_services module](../win_services/), which +fulfills the same purpose on windows. + +In addition to services, this plugin can gather other unit types as well, +see `systemctl list-units --all --type help` for possible options. + +### Configuration +``` +[[inputs.systemd_units]] + ## Set timeout for systemctl execution + # timeout = "1s" + # + ## Filter for a specific unit type, default is "service", other possible + ## values are "socket", "target", "device", "mount", "automount", "swap", + ## "timer", "path", "slice" and "scope ": + # unittype = "service" +``` + +### Metrics +- systemd_units: + - tags: + - name (string, unit name) + - load (string, load state) + - active (string, active state) + - sub (string, sub state) + - fields: + - load_code (int, see below) + - active_code (int, see below) + - sub_code (int, see below) + +#### Load + +enumeration of [unit_load_state_table](https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c#L87) + +| Value | Meaning | Description | +| ----- | ------- | ----------- | +| 0 | loaded | unit is ~ | +| 1 | stub | unit is ~ | +| 2 | not-found | unit is ~ | +| 3 | bad-setting | unit is ~ | +| 4 | error | unit is ~ | +| 5 | merged | unit is ~ | +| 6 | masked | unit is ~ | + +#### Active + +enumeration of [unit_active_state_table](https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c#L99) + +| Value | Meaning | Description | +| ----- | ------- | ----------- | +| 0 | active | unit is ~ | +| 1 | reloading | unit is ~ | +| 2 | inactive | unit is ~ | +| 3 | failed | unit is ~ | +| 4 | activating | unit is ~ | +| 5 | deactivating | unit is ~ | + +#### Sub + +enumeration of sub states, see various [unittype_state_tables](https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c#L163); +duplicates were removed, tables are hex aligned to keep some space for future +values + +| Value | Meaning | Description | +| ----- | ------- | ----------- | +| | | service_state_table start at 0x0000 | +| 0x0000 | running | unit is ~ | +| 0x0001 | dead | unit is ~ | +| 0x0002 | start-pre | unit is ~ | +| 0x0003 | start | unit is ~ | +| 0x0004 | exited | unit is ~ | +| 0x0005 | reload | unit is ~ | +| 0x0006 | stop | unit is ~ | +| 0x0007 | stop-watchdog | unit is ~ | +| 0x0008 | stop-sigterm | unit is ~ | +| 0x0009 | stop-sigkill | unit is ~ | +| 0x000a | stop-post | unit is ~ | +| 0x000b | final-sigterm | unit is ~ | +| 0x000c | failed | unit is ~ | +| 0x000d | auto-restart | unit is ~ | +| | | service_state_table start at 0x0010 | +| 0x0010 | waiting | unit is ~ | +| | | service_state_table start at 0x0020 | +| 0x0020 | tentative | unit is ~ | +| 0x0021 | plugged | unit is ~ | +| | | service_state_table start at 0x0030 | +| 0x0030 | mounting | unit is ~ | +| 0x0031 | mounting-done | unit is ~ | +| 0x0032 | mounted | unit is ~ | +| 0x0033 | remounting | unit is ~ | +| 0x0034 | unmounting | unit is ~ | +| 0x0035 | remounting-sigterm | unit is ~ | +| 0x0036 | remounting-sigkill | unit is ~ | +| 0x0037 | unmounting-sigterm | unit is ~ | +| 0x0038 | unmounting-sigkill | unit is ~ | +| | | service_state_table start at 0x0040 | +| | | service_state_table start at 0x0050 | +| 0x0050 | abandoned | unit is ~ | +| | | service_state_table start at 0x0060 | +| 0x0060 | active | unit is ~ | +| | | service_state_table start at 0x0070 | +| 0x0070 | start-chown | unit is ~ | +| 0x0071 | start-post | unit is ~ | +| 0x0072 | listening | unit is ~ | +| 0x0073 | stop-pre | unit is ~ | +| 0x0074 | stop-pre-sigterm | unit is ~ | +| 0x0075 | stop-pre-sigkill | unit is ~ | +| 0x0076 | final-sigkill | unit is ~ | +| | | service_state_table start at 0x0080 | +| 0x0080 | activating | unit is ~ | +| 0x0081 | activating-done | unit is ~ | +| 0x0082 | deactivating | unit is ~ | +| 0x0083 | deactivating-sigterm | unit is ~ | +| 0x0084 | deactivating-sigkill | unit is ~ | +| | | service_state_table start at 0x0090 | +| | | service_state_table start at 0x00a0 | +| 0x00a0 | elapsed | unit is ~ | +| | | | + +### Example Output + +Linux Systemd Units: +``` +$ telegraf --test --config /tmp/telegraf.conf +> systemd_units,host=host1.example.com,name=dbus.service,load=loaded,active=active,sub=running load_code=0i,active_code=0i,sub_code=0i 1533730725000000000 +> systemd_units,host=host1.example.com,name=networking.service,load=loaded,active=failed,sub=failed load_code=0i,active_code=3i,sub_code=12i 1533730725000000000 +> systemd_units,host=host1.example.com,name=ssh.service,load=loaded,active=active,sub=running load_code=0i,active_code=0i,sub_code=0i 1533730725000000000 +... +``` + +### Possible Improvements +- add blacklist to filter names diff --git a/plugins/inputs/systemd_units/systemd_units_linux.go b/plugins/inputs/systemd_units/systemd_units_linux.go new file mode 100644 index 000000000..64caf03d0 --- /dev/null +++ b/plugins/inputs/systemd_units/systemd_units_linux.go @@ -0,0 +1,221 @@ +package systemd_units + +import ( + "bufio" + "bytes" + "fmt" + "os/exec" + "strings" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" +) + +// SystemdUnits is a telegraf plugin to gather systemd unit status +type SystemdUnits struct { + Timeout internal.Duration + UnitType string `toml:"unittype"` + systemctl systemctl +} + +type systemctl func(Timeout internal.Duration, UnitType string) (*bytes.Buffer, error) + +const measurement = "systemd_units" + +// Below are mappings of systemd state tables as defined in +// https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c +// Duplicate strings are removed from this list. +var load_map = map[string]int{ + "loaded": 0, + "stub": 1, + "not-found": 2, + "bad-setting": 3, + "error": 4, + "merged": 5, + "masked": 6, +} + +var active_map = map[string]int{ + "active": 0, + "reloading": 1, + "inactive": 2, + "failed": 3, + "activating": 4, + "deactivating": 5, +} + +var sub_map = map[string]int{ + // service_state_table, offset 0x0000 + "running": 0x0000, + "dead": 0x0001, + "start-pre": 0x0002, + "start": 0x0003, + "exited": 0x0004, + "reload": 0x0005, + "stop": 0x0006, + "stop-watchdog": 0x0007, + "stop-sigterm": 0x0008, + "stop-sigkill": 0x0009, + "stop-post": 0x000a, + "final-sigterm": 0x000b, + "failed": 0x000c, + "auto-restart": 0x000d, + + // automount_state_table, offset 0x0010 + "waiting": 0x0010, + + // device_state_table, offset 0x0020 + "tentative": 0x0020, + "plugged": 0x0021, + + // mount_state_table, offset 0x0030 + "mounting": 0x0030, + "mounting-done": 0x0031, + "mounted": 0x0032, + "remounting": 0x0033, + "unmounting": 0x0034, + "remounting-sigterm": 0x0035, + "remounting-sigkill": 0x0036, + "unmounting-sigterm": 0x0037, + "unmounting-sigkill": 0x0038, + + // path_state_table, offset 0x0040 + + // scope_state_table, offset 0x0050 + "abandoned": 0x0050, + + // slice_state_table, offset 0x0060 + "active": 0x0060, + + // socket_state_table, offset 0x0070 + "start-chown": 0x0070, + "start-post": 0x0071, + "listening": 0x0072, + "stop-pre": 0x0073, + "stop-pre-sigterm": 0x0074, + "stop-pre-sigkill": 0x0075, + "final-sigkill": 0x0076, + + // swap_state_table, offset 0x0080 + "activating": 0x0080, + "activating-done": 0x0081, + "deactivating": 0x0082, + "deactivating-sigterm": 0x0083, + "deactivating-sigkill": 0x0084, + + // target_state_table, offset 0x0090 + + // timer_state_table, offset 0x00a0 + "elapsed": 0x00a0, +} + +var ( + defaultTimeout = internal.Duration{Duration: time.Second} + defaultUnitType = "service" +) + +// Description returns a short description of the plugin +func (s *SystemdUnits) Description() string { + return "Gather systemd units state" +} + +// SampleConfig returns sample configuration options. +func (s *SystemdUnits) SampleConfig() string { + return ` + ## Set timeout for systemctl execution + # timeout = "1s" + # + ## Filter for a specific unit type, default is "service", other possible + ## values are "socket", "target", "device", "mount", "automount", "swap", + ## "timer", "path", "slice" and "scope ": + # unittype = "service" +` +} + +// Gather parses systemctl outputs and adds counters to the Accumulator +func (s *SystemdUnits) Gather(acc telegraf.Accumulator) error { + out, err := s.systemctl(s.Timeout, s.UnitType) + if err != nil { + return err + } + + scanner := bufio.NewScanner(out) + for scanner.Scan() { + line := scanner.Text() + + data := strings.Fields(line) + if len(data) < 4 { + acc.AddError(fmt.Errorf("Error parsing line (expected at least 4 fields): %s", line)) + continue + } + name := data[0] + load := data[1] + active := data[2] + sub := data[3] + tags := map[string]string{ + "name": name, + "load": load, + "active": active, + "sub": sub, + } + + var ( + load_code int + active_code int + sub_code int + ok bool + ) + if load_code, ok = load_map[load]; !ok { + acc.AddError(fmt.Errorf("Error parsing field 'load', value not in map: %s", load)) + continue + } + if active_code, ok = active_map[active]; !ok { + acc.AddError(fmt.Errorf("Error parsing field 'active', value not in map: %s", active)) + continue + } + if sub_code, ok = sub_map[sub]; !ok { + acc.AddError(fmt.Errorf("Error parsing field 'sub', value not in map: %s", sub)) + continue + } + fields := map[string]interface{}{ + "load_code": load_code, + "active_code": active_code, + "sub_code": sub_code, + } + + acc.AddFields(measurement, fields, tags) + } + + return nil +} + +func setSystemctl(Timeout internal.Duration, UnitType string) (*bytes.Buffer, error) { + // is systemctl available ? + systemctlPath, err := exec.LookPath("systemctl") + if err != nil { + return nil, err + } + + cmd := exec.Command(systemctlPath, "list-units", "--all", fmt.Sprintf("--type=%s", UnitType), "--no-legend") + + var out bytes.Buffer + cmd.Stdout = &out + err = internal.RunTimeout(cmd, Timeout.Duration) + if err != nil { + return &out, fmt.Errorf("error running systemctl list-units --all --type=%s --no-legend: %s", UnitType, err) + } + + return &out, nil +} + +func init() { + inputs.Add("systemd_units", func() telegraf.Input { + return &SystemdUnits{ + systemctl: setSystemctl, + Timeout: defaultTimeout, + UnitType: defaultUnitType, + } + }) +} diff --git a/plugins/inputs/systemd_units/systemd_units_linux_test.go b/plugins/inputs/systemd_units/systemd_units_linux_test.go new file mode 100644 index 000000000..f45922bb9 --- /dev/null +++ b/plugins/inputs/systemd_units/systemd_units_linux_test.go @@ -0,0 +1,100 @@ +package systemd_units + +import ( + "bytes" + "fmt" + "reflect" + "testing" + + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/testutil" +) + +func TestSystemdUnits(t *testing.T) { + tests := []struct { + name string + line string + tags map[string]string + fields map[string]interface{} + status int + err error + }{ + { + name: "example loaded active running", + line: "example.service loaded active running example service description", + tags: map[string]string{"name": "example.service", "load": "loaded", "active": "active", "sub": "running"}, + fields: map[string]interface{}{ + "load_code": 0, + "active_code": 0, + "sub_code": 0, + }, + }, + { + name: "example loaded active exited", + line: "example.service loaded active exited example service description", + tags: map[string]string{"name": "example.service", "load": "loaded", "active": "active", "sub": "exited"}, + fields: map[string]interface{}{ + "load_code": 0, + "active_code": 0, + "sub_code": 4, + }, + }, + { + name: "example loaded failed failed", + line: "example.service loaded failed failed example service description", + tags: map[string]string{"name": "example.service", "load": "loaded", "active": "failed", "sub": "failed"}, + fields: map[string]interface{}{ + "load_code": 0, + "active_code": 3, + "sub_code": 12, + }, + }, + { + name: "example not-found inactive dead", + line: "example.service not-found inactive dead example service description", + tags: map[string]string{"name": "example.service", "load": "not-found", "active": "inactive", "sub": "dead"}, + fields: map[string]interface{}{ + "load_code": 2, + "active_code": 2, + "sub_code": 1, + }, + }, + { + name: "example unknown unknown unknown", + line: "example.service unknown unknown unknown example service description", + err: fmt.Errorf("Error parsing field 'load', value not in map: %s", "unknown"), + }, + { + name: "example too few fields", + line: "example.service loaded fai", + err: fmt.Errorf("Error parsing line (expected at least 4 fields): %s", "example.service loaded fai"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + systemd_units := &SystemdUnits{ + systemctl: func(Timeout internal.Duration, UnitType string) (*bytes.Buffer, error) { + return bytes.NewBufferString(tt.line), nil + }, + } + acc := new(testutil.Accumulator) + err := acc.GatherError(systemd_units.Gather) + if !reflect.DeepEqual(tt.err, err) { + t.Errorf("%s: expected error '%#v' got '%#v'", tt.name, tt.err, err) + } + if len(acc.Metrics) > 0 { + m := acc.Metrics[0] + if !reflect.DeepEqual(m.Measurement, measurement) { + t.Errorf("%s: expected measurement '%#v' got '%#v'\n", tt.name, measurement, m.Measurement) + } + if !reflect.DeepEqual(m.Tags, tt.tags) { + t.Errorf("%s: expected tags\n%#v got\n%#v\n", tt.name, tt.tags, m.Tags) + } + if !reflect.DeepEqual(m.Fields, tt.fields) { + t.Errorf("%s: expected fields\n%#v got\n%#v\n", tt.name, tt.fields, m.Fields) + } + } + }) + } +} diff --git a/plugins/inputs/systemd_units/systemd_units_notlinux.go b/plugins/inputs/systemd_units/systemd_units_notlinux.go new file mode 100644 index 000000000..f53cea3de --- /dev/null +++ b/plugins/inputs/systemd_units/systemd_units_notlinux.go @@ -0,0 +1,3 @@ +// +build !linux + +package systemd_units