Moved system package inputs out to top level (#4406)

This commit is contained in:
Steve Domino
2018-07-11 17:43:49 -06:00
committed by Daniel Nelson
parent 9a14d1f074
commit 7b73b0db3a
42 changed files with 126 additions and 89 deletions

View File

@@ -0,0 +1,65 @@
# Processes Input Plugin
This plugin gathers info about the total number of processes and groups
them by status (zombie, sleeping, running, etc.)
On linux this plugin requires access to procfs (/proc), on other OSes
it requires access to execute `ps`.
### Configuration:
```toml
# Get the number of processes and group them by status
[[inputs.processes]]
# no configuration
```
Another possible configuration is to define an alternative path for resolving the /proc location.
Using the environment variable `HOST_PROC` the plugin will retrieve process information from the specified location.
`docker run -v /proc:/rootfs/proc:ro -e HOST_PROC=/rootfs/proc`
### Measurements & Fields:
- processes
- blocked (aka disk sleep or uninterruptible sleep)
- running
- sleeping
- stopped
- total
- zombie
- dead
- wait (freebsd only)
- idle (bsd and Linux 4+ only)
- paging (linux only)
- total_threads (linux only)
### Process State Mappings
Different OSes use slightly different State codes for their processes, these
state codes are documented in `man ps`, and I will give a mapping of what major
OS state codes correspond to in telegraf metrics:
```
Linux FreeBSD Darwin meaning
R R R running
S S S sleeping
Z Z Z zombie
X none none dead
T T T stopped
I I I idle (sleeping for longer than about 20 seconds)
D D,L U blocked (waiting in uninterruptible sleep, or locked)
W W none paging (linux kernel < 2.6 only), wait (freebsd)
```
### Tags:
None
### Example Output:
```
$ telegraf --config ~/ws/telegraf.conf --input-filter processes --test
* Plugin: processes, Collection 1
> processes blocked=8i,running=1i,sleeping=265i,stopped=0i,total=274i,zombie=0i,dead=0i,paging=0i,total_threads=687i 1457478636980905042
```

View File

@@ -0,0 +1,237 @@
// +build !windows
package processes
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"syscall"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/inputs/linux_sysctl_fs"
)
type Processes struct {
execPS func() ([]byte, error)
readProcFile func(filename string) ([]byte, error)
forcePS bool
forceProc bool
}
func (p *Processes) Description() string {
return "Get the number of processes and group them by status"
}
func (p *Processes) SampleConfig() string { return "" }
func (p *Processes) Gather(acc telegraf.Accumulator) error {
// Get an empty map of metric fields
fields := getEmptyFields()
// Decide if we will use 'ps' to get stats (use procfs otherwise)
usePS := true
if runtime.GOOS == "linux" {
usePS = false
}
if p.forcePS {
usePS = true
} else if p.forceProc {
usePS = false
}
// Gather stats from 'ps' or procfs
if usePS {
if err := p.gatherFromPS(fields); err != nil {
return err
}
} else {
if err := p.gatherFromProc(fields); err != nil {
return err
}
}
acc.AddGauge("processes", fields, nil)
return nil
}
// Gets empty fields of metrics based on the OS
func getEmptyFields() map[string]interface{} {
fields := map[string]interface{}{
"blocked": int64(0),
"zombies": int64(0),
"stopped": int64(0),
"running": int64(0),
"sleeping": int64(0),
"total": int64(0),
"unknown": int64(0),
}
switch runtime.GOOS {
case "freebsd":
fields["idle"] = int64(0)
fields["wait"] = int64(0)
case "darwin":
fields["idle"] = int64(0)
case "openbsd":
fields["idle"] = int64(0)
case "linux":
fields["dead"] = int64(0)
fields["paging"] = int64(0)
fields["total_threads"] = int64(0)
fields["idle"] = int64(0)
}
return fields
}
// exec `ps` to get all process states
func (p *Processes) gatherFromPS(fields map[string]interface{}) error {
out, err := p.execPS()
if err != nil {
return err
}
for i, status := range bytes.Fields(out) {
if i == 0 && string(status) == "STAT" {
// This is a header, skip it
continue
}
switch status[0] {
case 'W':
fields["wait"] = fields["wait"].(int64) + int64(1)
case 'U', 'D', 'L':
// Also known as uninterruptible sleep or disk sleep
fields["blocked"] = fields["blocked"].(int64) + int64(1)
case 'Z':
fields["zombies"] = fields["zombies"].(int64) + int64(1)
case 'X':
fields["dead"] = fields["dead"].(int64) + int64(1)
case 'T':
fields["stopped"] = fields["stopped"].(int64) + int64(1)
case 'R':
fields["running"] = fields["running"].(int64) + int64(1)
case 'S':
fields["sleeping"] = fields["sleeping"].(int64) + int64(1)
case 'I':
fields["idle"] = fields["idle"].(int64) + int64(1)
case '?':
fields["unknown"] = fields["unknown"].(int64) + int64(1)
default:
log.Printf("I! processes: Unknown state [ %s ] from ps",
string(status[0]))
}
fields["total"] = fields["total"].(int64) + int64(1)
}
return nil
}
// get process states from /proc/(pid)/stat files
func (p *Processes) gatherFromProc(fields map[string]interface{}) error {
filenames, err := filepath.Glob(linux_sysctl_fs.GetHostProc() + "/[0-9]*/stat")
if err != nil {
return err
}
for _, filename := range filenames {
_, err := os.Stat(filename)
data, err := p.readProcFile(filename)
if err != nil {
return err
}
if data == nil {
continue
}
// Parse out data after (<cmd name>)
i := bytes.LastIndex(data, []byte(")"))
if i == -1 {
continue
}
data = data[i+2:]
stats := bytes.Fields(data)
if len(stats) < 3 {
return fmt.Errorf("Something is terribly wrong with %s", filename)
}
switch stats[0][0] {
case 'R':
fields["running"] = fields["running"].(int64) + int64(1)
case 'S':
fields["sleeping"] = fields["sleeping"].(int64) + int64(1)
case 'D':
fields["blocked"] = fields["blocked"].(int64) + int64(1)
case 'Z':
fields["zombies"] = fields["zombies"].(int64) + int64(1)
case 'X':
fields["dead"] = fields["dead"].(int64) + int64(1)
case 'T', 't':
fields["stopped"] = fields["stopped"].(int64) + int64(1)
case 'W':
fields["paging"] = fields["paging"].(int64) + int64(1)
case 'I':
fields["idle"] = fields["idle"].(int64) + int64(1)
default:
log.Printf("I! processes: Unknown state [ %s ] in file %s",
string(stats[0][0]), filename)
}
fields["total"] = fields["total"].(int64) + int64(1)
threads, err := strconv.Atoi(string(stats[17]))
if err != nil {
log.Printf("I! processes: Error parsing thread count: %s", err)
continue
}
fields["total_threads"] = fields["total_threads"].(int64) + int64(threads)
}
return nil
}
func readProcFile(filename string) ([]byte, error) {
data, err := ioutil.ReadFile(filename)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
// Reading from /proc/<PID> fails with ESRCH if the process has
// been terminated between open() and read().
if perr, ok := err.(*os.PathError); ok && perr.Err == syscall.ESRCH {
return nil, nil
}
return nil, err
}
return data, nil
}
func execPS() ([]byte, error) {
bin, err := exec.LookPath("ps")
if err != nil {
return nil, err
}
out, err := exec.Command(bin, "axo", "state").Output()
if err != nil {
return nil, err
}
return out, err
}
func init() {
inputs.Add("processes", func() telegraf.Input {
return &Processes{
execPS: execPS,
readProcFile: readProcFile,
}
})
}

View File

@@ -0,0 +1,186 @@
// +build !windows
package processes
import (
"fmt"
"runtime"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestProcesses(t *testing.T) {
processes := &Processes{
execPS: execPS,
readProcFile: readProcFile,
}
var acc testutil.Accumulator
err := processes.Gather(&acc)
require.NoError(t, err)
assert.True(t, acc.HasInt64Field("processes", "running"))
assert.True(t, acc.HasInt64Field("processes", "sleeping"))
assert.True(t, acc.HasInt64Field("processes", "stopped"))
assert.True(t, acc.HasInt64Field("processes", "total"))
total, ok := acc.Get("processes")
require.True(t, ok)
assert.True(t, total.Fields["total"].(int64) > 0)
}
func TestFromPS(t *testing.T) {
processes := &Processes{
execPS: testExecPS,
forcePS: true,
}
var acc testutil.Accumulator
err := processes.Gather(&acc)
require.NoError(t, err)
fields := getEmptyFields()
fields["blocked"] = int64(4)
fields["zombies"] = int64(1)
fields["running"] = int64(4)
fields["sleeping"] = int64(34)
fields["idle"] = int64(2)
fields["total"] = int64(45)
acc.AssertContainsTaggedFields(t, "processes", fields, map[string]string{})
}
func TestFromPSError(t *testing.T) {
processes := &Processes{
execPS: testExecPSError,
forcePS: true,
}
var acc testutil.Accumulator
err := processes.Gather(&acc)
require.Error(t, err)
}
func TestFromProcFiles(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("This test only runs on linux")
}
tester := tester{}
processes := &Processes{
readProcFile: tester.testProcFile,
forceProc: true,
}
var acc testutil.Accumulator
err := processes.Gather(&acc)
require.NoError(t, err)
fields := getEmptyFields()
fields["sleeping"] = tester.calls
fields["total_threads"] = tester.calls * 2
fields["total"] = tester.calls
acc.AssertContainsTaggedFields(t, "processes", fields, map[string]string{})
}
func TestFromProcFilesWithSpaceInCmd(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("This test only runs on linux")
}
tester := tester{}
processes := &Processes{
readProcFile: tester.testProcFile2,
forceProc: true,
}
var acc testutil.Accumulator
err := processes.Gather(&acc)
require.NoError(t, err)
fields := getEmptyFields()
fields["sleeping"] = tester.calls
fields["total_threads"] = tester.calls * 2
fields["total"] = tester.calls
acc.AssertContainsTaggedFields(t, "processes", fields, map[string]string{})
}
func testExecPS() ([]byte, error) {
return []byte(testPSOut), nil
}
// struct for counting calls to testProcFile
type tester struct {
calls int64
}
func (t *tester) testProcFile(_ string) ([]byte, error) {
t.calls++
return []byte(fmt.Sprintf(testProcStat, "S", "2")), nil
}
func (t *tester) testProcFile2(_ string) ([]byte, error) {
t.calls++
return []byte(fmt.Sprintf(testProcStat2, "S", "2")), nil
}
func testExecPSError() ([]byte, error) {
return []byte(testPSOut), fmt.Errorf("ERROR!")
}
const testPSOut = `
STAT
S
S
S
S
R
R
S
S
Ss
Ss
S
SNs
Ss
Ss
S
R+
S
U
S
S
S
S
Ss
S+
Ss
S
S+
S+
Ss
S+
Ss
S
R+
Ss
S
S+
S+
Ss
L
U
Z
D
S+
I
I
`
const testProcStat = `10 (rcuob/0) %s 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 20 0 %s 0 11 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 18446744073709551615 0 0 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0
`
const testProcStat2 = `10 (rcuob 0) %s 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 20 0 %s 0 11 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 18446744073709551615 0 0 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0
`

View File

@@ -0,0 +1,3 @@
// +build windows
package processes