Add new "systemd_units" input plugin (#4532)
This commit is contained in:
parent
fd2e9889ac
commit
6839e5573c
|
@ -16,6 +16,7 @@
|
|||
- [snmp_trap](/plugins/inputs/snmp_trap/README.md) - Contributed by @influxdata
|
||||
- [suricata](/plugins/inputs/suricata/README.md) - Contributed by @satta
|
||||
- [synproxy](/plugins/inputs/synproxy/README.md) - Contributed by @rfrenayworldstream
|
||||
- [systemd_units](/plugins/inputs/systemd_units/README.md) - Contributed by @benschweizer
|
||||
|
||||
#### New Processors
|
||||
|
||||
|
|
|
@ -148,6 +148,7 @@ import (
|
|||
_ "github.com/influxdata/telegraf/plugins/inputs/syslog"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/sysstat"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/system"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/systemd_units"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/tail"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/tcp_listener"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/teamspeak"
|
||||
|
|
|
@ -0,0 +1,140 @@
|
|||
# Systemd Units Plugin
|
||||
|
||||
The systemd_units plugin gathers systemd unit status on Linux. It relies on
|
||||
`systemctl list-units --all --type=service` to collect data on service status.
|
||||
|
||||
The results are tagged with the unit name and provide enumerated fields for
|
||||
loaded, active and running fields, indicating the unit health.
|
||||
|
||||
This plugin is related to the [win_services module](../win_services/), which
|
||||
fulfills the same purpose on windows.
|
||||
|
||||
In addition to services, this plugin can gather other unit types as well,
|
||||
see `systemctl list-units --all --type help` for possible options.
|
||||
|
||||
### Configuration
|
||||
```
|
||||
[[inputs.systemd_units]]
|
||||
## Set timeout for systemctl execution
|
||||
# timeout = "1s"
|
||||
#
|
||||
## Filter for a specific unit type, default is "service", other possible
|
||||
## values are "socket", "target", "device", "mount", "automount", "swap",
|
||||
## "timer", "path", "slice" and "scope ":
|
||||
# unittype = "service"
|
||||
```
|
||||
|
||||
### Metrics
|
||||
- systemd_units:
|
||||
- tags:
|
||||
- name (string, unit name)
|
||||
- load (string, load state)
|
||||
- active (string, active state)
|
||||
- sub (string, sub state)
|
||||
- fields:
|
||||
- load_code (int, see below)
|
||||
- active_code (int, see below)
|
||||
- sub_code (int, see below)
|
||||
|
||||
#### Load
|
||||
|
||||
enumeration of [unit_load_state_table](https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c#L87)
|
||||
|
||||
| Value | Meaning | Description |
|
||||
| ----- | ------- | ----------- |
|
||||
| 0 | loaded | unit is ~ |
|
||||
| 1 | stub | unit is ~ |
|
||||
| 2 | not-found | unit is ~ |
|
||||
| 3 | bad-setting | unit is ~ |
|
||||
| 4 | error | unit is ~ |
|
||||
| 5 | merged | unit is ~ |
|
||||
| 6 | masked | unit is ~ |
|
||||
|
||||
#### Active
|
||||
|
||||
enumeration of [unit_active_state_table](https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c#L99)
|
||||
|
||||
| Value | Meaning | Description |
|
||||
| ----- | ------- | ----------- |
|
||||
| 0 | active | unit is ~ |
|
||||
| 1 | reloading | unit is ~ |
|
||||
| 2 | inactive | unit is ~ |
|
||||
| 3 | failed | unit is ~ |
|
||||
| 4 | activating | unit is ~ |
|
||||
| 5 | deactivating | unit is ~ |
|
||||
|
||||
#### Sub
|
||||
|
||||
enumeration of sub states, see various [unittype_state_tables](https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c#L163);
|
||||
duplicates were removed, tables are hex aligned to keep some space for future
|
||||
values
|
||||
|
||||
| Value | Meaning | Description |
|
||||
| ----- | ------- | ----------- |
|
||||
| | | service_state_table start at 0x0000 |
|
||||
| 0x0000 | running | unit is ~ |
|
||||
| 0x0001 | dead | unit is ~ |
|
||||
| 0x0002 | start-pre | unit is ~ |
|
||||
| 0x0003 | start | unit is ~ |
|
||||
| 0x0004 | exited | unit is ~ |
|
||||
| 0x0005 | reload | unit is ~ |
|
||||
| 0x0006 | stop | unit is ~ |
|
||||
| 0x0007 | stop-watchdog | unit is ~ |
|
||||
| 0x0008 | stop-sigterm | unit is ~ |
|
||||
| 0x0009 | stop-sigkill | unit is ~ |
|
||||
| 0x000a | stop-post | unit is ~ |
|
||||
| 0x000b | final-sigterm | unit is ~ |
|
||||
| 0x000c | failed | unit is ~ |
|
||||
| 0x000d | auto-restart | unit is ~ |
|
||||
| | | service_state_table start at 0x0010 |
|
||||
| 0x0010 | waiting | unit is ~ |
|
||||
| | | service_state_table start at 0x0020 |
|
||||
| 0x0020 | tentative | unit is ~ |
|
||||
| 0x0021 | plugged | unit is ~ |
|
||||
| | | service_state_table start at 0x0030 |
|
||||
| 0x0030 | mounting | unit is ~ |
|
||||
| 0x0031 | mounting-done | unit is ~ |
|
||||
| 0x0032 | mounted | unit is ~ |
|
||||
| 0x0033 | remounting | unit is ~ |
|
||||
| 0x0034 | unmounting | unit is ~ |
|
||||
| 0x0035 | remounting-sigterm | unit is ~ |
|
||||
| 0x0036 | remounting-sigkill | unit is ~ |
|
||||
| 0x0037 | unmounting-sigterm | unit is ~ |
|
||||
| 0x0038 | unmounting-sigkill | unit is ~ |
|
||||
| | | service_state_table start at 0x0040 |
|
||||
| | | service_state_table start at 0x0050 |
|
||||
| 0x0050 | abandoned | unit is ~ |
|
||||
| | | service_state_table start at 0x0060 |
|
||||
| 0x0060 | active | unit is ~ |
|
||||
| | | service_state_table start at 0x0070 |
|
||||
| 0x0070 | start-chown | unit is ~ |
|
||||
| 0x0071 | start-post | unit is ~ |
|
||||
| 0x0072 | listening | unit is ~ |
|
||||
| 0x0073 | stop-pre | unit is ~ |
|
||||
| 0x0074 | stop-pre-sigterm | unit is ~ |
|
||||
| 0x0075 | stop-pre-sigkill | unit is ~ |
|
||||
| 0x0076 | final-sigkill | unit is ~ |
|
||||
| | | service_state_table start at 0x0080 |
|
||||
| 0x0080 | activating | unit is ~ |
|
||||
| 0x0081 | activating-done | unit is ~ |
|
||||
| 0x0082 | deactivating | unit is ~ |
|
||||
| 0x0083 | deactivating-sigterm | unit is ~ |
|
||||
| 0x0084 | deactivating-sigkill | unit is ~ |
|
||||
| | | service_state_table start at 0x0090 |
|
||||
| | | service_state_table start at 0x00a0 |
|
||||
| 0x00a0 | elapsed | unit is ~ |
|
||||
| | | |
|
||||
|
||||
### Example Output
|
||||
|
||||
Linux Systemd Units:
|
||||
```
|
||||
$ telegraf --test --config /tmp/telegraf.conf
|
||||
> systemd_units,host=host1.example.com,name=dbus.service,load=loaded,active=active,sub=running load_code=0i,active_code=0i,sub_code=0i 1533730725000000000
|
||||
> systemd_units,host=host1.example.com,name=networking.service,load=loaded,active=failed,sub=failed load_code=0i,active_code=3i,sub_code=12i 1533730725000000000
|
||||
> systemd_units,host=host1.example.com,name=ssh.service,load=loaded,active=active,sub=running load_code=0i,active_code=0i,sub_code=0i 1533730725000000000
|
||||
...
|
||||
```
|
||||
|
||||
### Possible Improvements
|
||||
- add blacklist to filter names
|
|
@ -0,0 +1,221 @@
|
|||
package systemd_units
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
// SystemdUnits is a telegraf plugin to gather systemd unit status
|
||||
type SystemdUnits struct {
|
||||
Timeout internal.Duration
|
||||
UnitType string `toml:"unittype"`
|
||||
systemctl systemctl
|
||||
}
|
||||
|
||||
type systemctl func(Timeout internal.Duration, UnitType string) (*bytes.Buffer, error)
|
||||
|
||||
const measurement = "systemd_units"
|
||||
|
||||
// Below are mappings of systemd state tables as defined in
|
||||
// https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c
|
||||
// Duplicate strings are removed from this list.
|
||||
var load_map = map[string]int{
|
||||
"loaded": 0,
|
||||
"stub": 1,
|
||||
"not-found": 2,
|
||||
"bad-setting": 3,
|
||||
"error": 4,
|
||||
"merged": 5,
|
||||
"masked": 6,
|
||||
}
|
||||
|
||||
var active_map = map[string]int{
|
||||
"active": 0,
|
||||
"reloading": 1,
|
||||
"inactive": 2,
|
||||
"failed": 3,
|
||||
"activating": 4,
|
||||
"deactivating": 5,
|
||||
}
|
||||
|
||||
var sub_map = map[string]int{
|
||||
// service_state_table, offset 0x0000
|
||||
"running": 0x0000,
|
||||
"dead": 0x0001,
|
||||
"start-pre": 0x0002,
|
||||
"start": 0x0003,
|
||||
"exited": 0x0004,
|
||||
"reload": 0x0005,
|
||||
"stop": 0x0006,
|
||||
"stop-watchdog": 0x0007,
|
||||
"stop-sigterm": 0x0008,
|
||||
"stop-sigkill": 0x0009,
|
||||
"stop-post": 0x000a,
|
||||
"final-sigterm": 0x000b,
|
||||
"failed": 0x000c,
|
||||
"auto-restart": 0x000d,
|
||||
|
||||
// automount_state_table, offset 0x0010
|
||||
"waiting": 0x0010,
|
||||
|
||||
// device_state_table, offset 0x0020
|
||||
"tentative": 0x0020,
|
||||
"plugged": 0x0021,
|
||||
|
||||
// mount_state_table, offset 0x0030
|
||||
"mounting": 0x0030,
|
||||
"mounting-done": 0x0031,
|
||||
"mounted": 0x0032,
|
||||
"remounting": 0x0033,
|
||||
"unmounting": 0x0034,
|
||||
"remounting-sigterm": 0x0035,
|
||||
"remounting-sigkill": 0x0036,
|
||||
"unmounting-sigterm": 0x0037,
|
||||
"unmounting-sigkill": 0x0038,
|
||||
|
||||
// path_state_table, offset 0x0040
|
||||
|
||||
// scope_state_table, offset 0x0050
|
||||
"abandoned": 0x0050,
|
||||
|
||||
// slice_state_table, offset 0x0060
|
||||
"active": 0x0060,
|
||||
|
||||
// socket_state_table, offset 0x0070
|
||||
"start-chown": 0x0070,
|
||||
"start-post": 0x0071,
|
||||
"listening": 0x0072,
|
||||
"stop-pre": 0x0073,
|
||||
"stop-pre-sigterm": 0x0074,
|
||||
"stop-pre-sigkill": 0x0075,
|
||||
"final-sigkill": 0x0076,
|
||||
|
||||
// swap_state_table, offset 0x0080
|
||||
"activating": 0x0080,
|
||||
"activating-done": 0x0081,
|
||||
"deactivating": 0x0082,
|
||||
"deactivating-sigterm": 0x0083,
|
||||
"deactivating-sigkill": 0x0084,
|
||||
|
||||
// target_state_table, offset 0x0090
|
||||
|
||||
// timer_state_table, offset 0x00a0
|
||||
"elapsed": 0x00a0,
|
||||
}
|
||||
|
||||
var (
|
||||
defaultTimeout = internal.Duration{Duration: time.Second}
|
||||
defaultUnitType = "service"
|
||||
)
|
||||
|
||||
// Description returns a short description of the plugin
|
||||
func (s *SystemdUnits) Description() string {
|
||||
return "Gather systemd units state"
|
||||
}
|
||||
|
||||
// SampleConfig returns sample configuration options.
|
||||
func (s *SystemdUnits) SampleConfig() string {
|
||||
return `
|
||||
## Set timeout for systemctl execution
|
||||
# timeout = "1s"
|
||||
#
|
||||
## Filter for a specific unit type, default is "service", other possible
|
||||
## values are "socket", "target", "device", "mount", "automount", "swap",
|
||||
## "timer", "path", "slice" and "scope ":
|
||||
# unittype = "service"
|
||||
`
|
||||
}
|
||||
|
||||
// Gather parses systemctl outputs and adds counters to the Accumulator
|
||||
func (s *SystemdUnits) Gather(acc telegraf.Accumulator) error {
|
||||
out, err := s.systemctl(s.Timeout, s.UnitType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(out)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
data := strings.Fields(line)
|
||||
if len(data) < 4 {
|
||||
acc.AddError(fmt.Errorf("Error parsing line (expected at least 4 fields): %s", line))
|
||||
continue
|
||||
}
|
||||
name := data[0]
|
||||
load := data[1]
|
||||
active := data[2]
|
||||
sub := data[3]
|
||||
tags := map[string]string{
|
||||
"name": name,
|
||||
"load": load,
|
||||
"active": active,
|
||||
"sub": sub,
|
||||
}
|
||||
|
||||
var (
|
||||
load_code int
|
||||
active_code int
|
||||
sub_code int
|
||||
ok bool
|
||||
)
|
||||
if load_code, ok = load_map[load]; !ok {
|
||||
acc.AddError(fmt.Errorf("Error parsing field 'load', value not in map: %s", load))
|
||||
continue
|
||||
}
|
||||
if active_code, ok = active_map[active]; !ok {
|
||||
acc.AddError(fmt.Errorf("Error parsing field 'active', value not in map: %s", active))
|
||||
continue
|
||||
}
|
||||
if sub_code, ok = sub_map[sub]; !ok {
|
||||
acc.AddError(fmt.Errorf("Error parsing field 'sub', value not in map: %s", sub))
|
||||
continue
|
||||
}
|
||||
fields := map[string]interface{}{
|
||||
"load_code": load_code,
|
||||
"active_code": active_code,
|
||||
"sub_code": sub_code,
|
||||
}
|
||||
|
||||
acc.AddFields(measurement, fields, tags)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func setSystemctl(Timeout internal.Duration, UnitType string) (*bytes.Buffer, error) {
|
||||
// is systemctl available ?
|
||||
systemctlPath, err := exec.LookPath("systemctl")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cmd := exec.Command(systemctlPath, "list-units", "--all", fmt.Sprintf("--type=%s", UnitType), "--no-legend")
|
||||
|
||||
var out bytes.Buffer
|
||||
cmd.Stdout = &out
|
||||
err = internal.RunTimeout(cmd, Timeout.Duration)
|
||||
if err != nil {
|
||||
return &out, fmt.Errorf("error running systemctl list-units --all --type=%s --no-legend: %s", UnitType, err)
|
||||
}
|
||||
|
||||
return &out, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("systemd_units", func() telegraf.Input {
|
||||
return &SystemdUnits{
|
||||
systemctl: setSystemctl,
|
||||
Timeout: defaultTimeout,
|
||||
UnitType: defaultUnitType,
|
||||
}
|
||||
})
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
package systemd_units
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func TestSystemdUnits(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
line string
|
||||
tags map[string]string
|
||||
fields map[string]interface{}
|
||||
status int
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "example loaded active running",
|
||||
line: "example.service loaded active running example service description",
|
||||
tags: map[string]string{"name": "example.service", "load": "loaded", "active": "active", "sub": "running"},
|
||||
fields: map[string]interface{}{
|
||||
"load_code": 0,
|
||||
"active_code": 0,
|
||||
"sub_code": 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "example loaded active exited",
|
||||
line: "example.service loaded active exited example service description",
|
||||
tags: map[string]string{"name": "example.service", "load": "loaded", "active": "active", "sub": "exited"},
|
||||
fields: map[string]interface{}{
|
||||
"load_code": 0,
|
||||
"active_code": 0,
|
||||
"sub_code": 4,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "example loaded failed failed",
|
||||
line: "example.service loaded failed failed example service description",
|
||||
tags: map[string]string{"name": "example.service", "load": "loaded", "active": "failed", "sub": "failed"},
|
||||
fields: map[string]interface{}{
|
||||
"load_code": 0,
|
||||
"active_code": 3,
|
||||
"sub_code": 12,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "example not-found inactive dead",
|
||||
line: "example.service not-found inactive dead example service description",
|
||||
tags: map[string]string{"name": "example.service", "load": "not-found", "active": "inactive", "sub": "dead"},
|
||||
fields: map[string]interface{}{
|
||||
"load_code": 2,
|
||||
"active_code": 2,
|
||||
"sub_code": 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "example unknown unknown unknown",
|
||||
line: "example.service unknown unknown unknown example service description",
|
||||
err: fmt.Errorf("Error parsing field 'load', value not in map: %s", "unknown"),
|
||||
},
|
||||
{
|
||||
name: "example too few fields",
|
||||
line: "example.service loaded fai",
|
||||
err: fmt.Errorf("Error parsing line (expected at least 4 fields): %s", "example.service loaded fai"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
systemd_units := &SystemdUnits{
|
||||
systemctl: func(Timeout internal.Duration, UnitType string) (*bytes.Buffer, error) {
|
||||
return bytes.NewBufferString(tt.line), nil
|
||||
},
|
||||
}
|
||||
acc := new(testutil.Accumulator)
|
||||
err := acc.GatherError(systemd_units.Gather)
|
||||
if !reflect.DeepEqual(tt.err, err) {
|
||||
t.Errorf("%s: expected error '%#v' got '%#v'", tt.name, tt.err, err)
|
||||
}
|
||||
if len(acc.Metrics) > 0 {
|
||||
m := acc.Metrics[0]
|
||||
if !reflect.DeepEqual(m.Measurement, measurement) {
|
||||
t.Errorf("%s: expected measurement '%#v' got '%#v'\n", tt.name, measurement, m.Measurement)
|
||||
}
|
||||
if !reflect.DeepEqual(m.Tags, tt.tags) {
|
||||
t.Errorf("%s: expected tags\n%#v got\n%#v\n", tt.name, tt.tags, m.Tags)
|
||||
}
|
||||
if !reflect.DeepEqual(m.Fields, tt.fields) {
|
||||
t.Errorf("%s: expected fields\n%#v got\n%#v\n", tt.name, tt.fields, m.Fields)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
// +build !linux
|
||||
|
||||
package systemd_units
|
Loading…
Reference in New Issue