telegraf/plugins/inputs/procstat/procstat.go

419 lines
10 KiB
Go

package procstat
import (
"bytes"
"fmt"
"io/ioutil"
"os/exec"
"path/filepath"
"strconv"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/shirou/gopsutil/process"
)
var (
defaultPIDFinder = NewPgrep
defaultProcess = NewProc
)
type PID int32
type Procstat struct {
PidFinder string `toml:"pid_finder"`
PidFile string `toml:"pid_file"`
Exe string
Pattern string
Prefix string
ProcessName string
User string
SystemdUnit string
CGroup string `toml:"cgroup"`
PidTag bool
WinService string `tom:"win_service"`
finder PIDFinder
createPIDFinder func() (PIDFinder, error)
procs map[PID]Process
createProcess func(PID) (Process, error)
}
var sampleConfig = `
## PID file to monitor process
pid_file = "/var/run/nginx.pid"
## executable name (ie, pgrep <exe>)
# exe = "nginx"
## pattern as argument for pgrep (ie, pgrep -f <pattern>)
# pattern = "nginx"
## user as argument for pgrep (ie, pgrep -u <user>)
# user = "nginx"
## Systemd unit name
# systemd_unit = "nginx.service"
## CGroup name or path
# cgroup = "systemd/system.slice/nginx.service"
## Windows service name
# win_service = ""
## override for process_name
## This is optional; default is sourced from /proc/<pid>/status
# process_name = "bar"
## Field name prefix
# prefix = ""
## Add PID as a tag instead of a field; useful to differentiate between
## processes whose tags are otherwise the same. Can create a large number
## of series, use judiciously.
# pid_tag = false
## Method to use when finding process IDs. Can be one of 'pgrep', or
## 'native'. The pgrep finder calls the pgrep executable in the PATH while
## the native finder performs the search directly in a manor dependent on the
## platform. Default is 'pgrep'
# pid_finder = "pgrep"
`
func (_ *Procstat) SampleConfig() string {
return sampleConfig
}
func (_ *Procstat) Description() string {
return "Monitor process cpu and memory usage"
}
func (p *Procstat) Gather(acc telegraf.Accumulator) error {
if p.createPIDFinder == nil {
switch p.PidFinder {
case "native":
p.createPIDFinder = NewNativeFinder
case "pgrep":
p.createPIDFinder = NewPgrep
default:
p.createPIDFinder = defaultPIDFinder
}
}
if p.createProcess == nil {
p.createProcess = defaultProcess
}
procs, err := p.updateProcesses(acc, p.procs)
if err != nil {
acc.AddError(fmt.Errorf("E! Error: procstat getting process, exe: [%s] pidfile: [%s] pattern: [%s] user: [%s] %s",
p.Exe, p.PidFile, p.Pattern, p.User, err.Error()))
}
p.procs = procs
for _, proc := range p.procs {
p.addMetrics(proc, acc)
}
return nil
}
// Add metrics a single Process
func (p *Procstat) addMetrics(proc Process, acc telegraf.Accumulator) {
var prefix string
if p.Prefix != "" {
prefix = p.Prefix + "_"
}
fields := map[string]interface{}{}
//If process_name tag is not already set, set to actual name
if _, nameInTags := proc.Tags()["process_name"]; !nameInTags {
name, err := proc.Name()
if err == nil {
proc.Tags()["process_name"] = name
}
}
//If user tag is not already set, set to actual name
if _, ok := proc.Tags()["user"]; !ok {
user, err := proc.Username()
if err == nil {
proc.Tags()["user"] = user
}
}
//If pid is not present as a tag, include it as a field.
if _, pidInTags := proc.Tags()["pid"]; !pidInTags {
fields["pid"] = int32(proc.PID())
}
numThreads, err := proc.NumThreads()
if err == nil {
fields[prefix+"num_threads"] = numThreads
}
fds, err := proc.NumFDs()
if err == nil {
fields[prefix+"num_fds"] = fds
}
ctx, err := proc.NumCtxSwitches()
if err == nil {
fields[prefix+"voluntary_context_switches"] = ctx.Voluntary
fields[prefix+"involuntary_context_switches"] = ctx.Involuntary
}
io, err := proc.IOCounters()
if err == nil {
fields[prefix+"read_count"] = io.ReadCount
fields[prefix+"write_count"] = io.WriteCount
fields[prefix+"read_bytes"] = io.ReadBytes
fields[prefix+"write_bytes"] = io.WriteBytes
}
cpu_time, err := proc.Times()
if err == nil {
fields[prefix+"cpu_time_user"] = cpu_time.User
fields[prefix+"cpu_time_system"] = cpu_time.System
fields[prefix+"cpu_time_idle"] = cpu_time.Idle
fields[prefix+"cpu_time_nice"] = cpu_time.Nice
fields[prefix+"cpu_time_iowait"] = cpu_time.Iowait
fields[prefix+"cpu_time_irq"] = cpu_time.Irq
fields[prefix+"cpu_time_soft_irq"] = cpu_time.Softirq
fields[prefix+"cpu_time_steal"] = cpu_time.Steal
fields[prefix+"cpu_time_stolen"] = cpu_time.Stolen
fields[prefix+"cpu_time_guest"] = cpu_time.Guest
fields[prefix+"cpu_time_guest_nice"] = cpu_time.GuestNice
}
cpu_perc, err := proc.Percent(time.Duration(0))
if err == nil {
fields[prefix+"cpu_usage"] = cpu_perc
}
mem, err := proc.MemoryInfo()
if err == nil {
fields[prefix+"memory_rss"] = mem.RSS
fields[prefix+"memory_vms"] = mem.VMS
fields[prefix+"memory_swap"] = mem.Swap
fields[prefix+"memory_data"] = mem.Data
fields[prefix+"memory_stack"] = mem.Stack
fields[prefix+"memory_locked"] = mem.Locked
}
rlims, err := proc.RlimitUsage(true)
if err == nil {
for _, rlim := range rlims {
var name string
switch rlim.Resource {
case process.RLIMIT_CPU:
name = "cpu_time"
case process.RLIMIT_DATA:
name = "memory_data"
case process.RLIMIT_STACK:
name = "memory_stack"
case process.RLIMIT_RSS:
name = "memory_rss"
case process.RLIMIT_NOFILE:
name = "num_fds"
case process.RLIMIT_MEMLOCK:
name = "memory_locked"
case process.RLIMIT_AS:
name = "memory_vms"
case process.RLIMIT_LOCKS:
name = "file_locks"
case process.RLIMIT_SIGPENDING:
name = "signals_pending"
case process.RLIMIT_NICE:
name = "nice_priority"
case process.RLIMIT_RTPRIO:
name = "realtime_priority"
default:
continue
}
fields[prefix+"rlimit_"+name+"_soft"] = rlim.Soft
fields[prefix+"rlimit_"+name+"_hard"] = rlim.Hard
if name != "file_locks" { // gopsutil doesn't currently track the used file locks count
fields[prefix+name] = rlim.Used
}
}
}
acc.AddFields("procstat", fields, proc.Tags())
}
// Update monitored Processes
func (p *Procstat) updateProcesses(acc telegraf.Accumulator, prevInfo map[PID]Process) (map[PID]Process, error) {
pids, tags, err := p.findPids(acc)
if err != nil {
return nil, err
}
procs := make(map[PID]Process, len(prevInfo))
for _, pid := range pids {
info, ok := prevInfo[pid]
if ok {
procs[pid] = info
} else {
proc, err := p.createProcess(pid)
if err != nil {
// No problem; process may have ended after we found it
continue
}
procs[pid] = proc
// Add initial tags
for k, v := range tags {
proc.Tags()[k] = v
}
// Add pid tag if needed
if p.PidTag {
proc.Tags()["pid"] = strconv.Itoa(int(pid))
}
if p.ProcessName != "" {
proc.Tags()["process_name"] = p.ProcessName
}
}
}
return procs, nil
}
// Create and return PIDGatherer lazily
func (p *Procstat) getPIDFinder() (PIDFinder, error) {
if p.finder == nil {
f, err := p.createPIDFinder()
if err != nil {
return nil, err
}
p.finder = f
}
return p.finder, nil
}
// Get matching PIDs and their initial tags
func (p *Procstat) findPids(acc telegraf.Accumulator) ([]PID, map[string]string, error) {
var pids []PID
tags := make(map[string]string)
var err error
f, err := p.getPIDFinder()
if err != nil {
return nil, nil, err
}
if p.PidFile != "" {
pids, err = f.PidFile(p.PidFile)
tags = map[string]string{"pidfile": p.PidFile}
} else if p.Exe != "" {
pids, err = f.Pattern(p.Exe)
tags = map[string]string{"exe": p.Exe}
} else if p.Pattern != "" {
pids, err = f.FullPattern(p.Pattern)
tags = map[string]string{"pattern": p.Pattern}
} else if p.User != "" {
pids, err = f.Uid(p.User)
tags = map[string]string{"user": p.User}
} else if p.SystemdUnit != "" {
pids, err = p.systemdUnitPIDs()
tags = map[string]string{"systemd_unit": p.SystemdUnit}
} else if p.CGroup != "" {
pids, err = p.cgroupPIDs()
tags = map[string]string{"cgroup": p.CGroup}
} else if p.WinService != "" {
pids, err = p.winServicePIDs()
tags = map[string]string{"win_service": p.WinService}
} else {
err = fmt.Errorf("Either exe, pid_file, user, pattern, systemd_unit, cgroup, or win_service must be specified")
}
rTags := make(map[string]string)
for k, v := range tags {
rTags[k] = v
}
//adds a metric with info on the pgrep query
fields := make(map[string]interface{})
tags["pid_finder"] = p.PidFinder
fields["pid_count"] = len(pids)
acc.AddFields("procstat_lookup", fields, tags)
return pids, rTags, err
}
// execCommand is so tests can mock out exec.Command usage.
var execCommand = exec.Command
func (p *Procstat) systemdUnitPIDs() ([]PID, error) {
var pids []PID
cmd := execCommand("systemctl", "show", p.SystemdUnit)
out, err := cmd.Output()
if err != nil {
return nil, err
}
for _, line := range bytes.Split(out, []byte{'\n'}) {
kv := bytes.SplitN(line, []byte{'='}, 2)
if len(kv) != 2 {
continue
}
if !bytes.Equal(kv[0], []byte("MainPID")) {
continue
}
if len(kv[1]) == 0 {
return nil, nil
}
pid, err := strconv.Atoi(string(kv[1]))
if err != nil {
return nil, fmt.Errorf("invalid pid '%s'", kv[1])
}
pids = append(pids, PID(pid))
}
return pids, nil
}
func (p *Procstat) cgroupPIDs() ([]PID, error) {
var pids []PID
procsPath := p.CGroup
if procsPath[0] != '/' {
procsPath = "/sys/fs/cgroup/" + procsPath
}
procsPath = filepath.Join(procsPath, "cgroup.procs")
out, err := ioutil.ReadFile(procsPath)
if err != nil {
return nil, err
}
for _, pidBS := range bytes.Split(out, []byte{'\n'}) {
if len(pidBS) == 0 {
continue
}
pid, err := strconv.Atoi(string(pidBS))
if err != nil {
return nil, fmt.Errorf("invalid pid '%s'", pidBS)
}
pids = append(pids, PID(pid))
}
return pids, nil
}
func (p *Procstat) winServicePIDs() ([]PID, error) {
var pids []PID
pid, err := queryPidWithWinServiceName(p.WinService)
if err != nil {
return pids, err
}
pids = append(pids, PID(pid))
return pids, nil
}
func init() {
inputs.Add("procstat", func() telegraf.Input {
return &Procstat{}
})
}