Add process resource limits to procstat input (#3231)

This commit is contained in:
Patrick Hemmer 2017-09-15 14:16:44 -04:00 committed by Daniel Nelson
parent 575a07c985
commit 0339dc7faf
6 changed files with 100 additions and 14 deletions

View File

@ -18,6 +18,7 @@
- [#3211](https://github.com/influxdata/telegraf/pull/3211): Add timeout option for kubernetes input.
- [#3224](https://github.com/influxdata/telegraf/pull/3224): Preserve url path prefix in influx output.
- [#3234](https://github.com/influxdata/telegraf/pull/3234): Add support for timing sums in statsd input.
- [#2617](https://github.com/influxdata/telegraf/issues/2617): Add resource limit monitoring to procstat.
### Bugfixes

2
Godeps
View File

@ -60,7 +60,7 @@ github.com/prometheus/procfs 1878d9fbb537119d24b21ca07effd591627cd160
github.com/rcrowley/go-metrics 1f30fe9094a513ce4c700b9a54458bbb0c96996c
github.com/samuel/go-zookeeper 1d7be4effb13d2d908342d349d71a284a7542693
github.com/satori/go.uuid 5bf94b69c6b68ee1b541973bb8e1144db23a194b
github.com/shirou/gopsutil 9a4a9167ad3b4355dbf1c2c7a0f5f0d3fb1e9ab9
github.com/shirou/gopsutil a452de7c734a0fa0f16d2e5725b0fa5934d9fbec
github.com/shirou/w32 3c9377fc6748f222729a8270fe2775d149a249ad
github.com/Shopify/sarama c01858abb625b73a3af51d0798e4ad42c8147093
github.com/Sirupsen/logrus 61e43dc76f7ee59a82bdf3d71033dc12bea4c77d

View File

@ -39,8 +39,8 @@ Example:
The above configuration would result in output like:
```
> procstat,pidfile=/var/run/lxc/dnsmasq.pid,process_name=dnsmasq,pid=44979 cpu_user=0.14,cpu_system=0.07
> procstat,exe=influxd,process_name=influxd,pid=34337 influxd_cpu_user=25.43,influxd_cpu_system=21.82
> procstat,pidfile=/var/run/lxc/dnsmasq.pid,process_name=dnsmasq rlimit_file_locks_soft=2147483647i,rlimit_signals_pending_hard=1758i,voluntary_context_switches=478i,read_bytes=307200i,cpu_time_user=0.01,cpu_time_guest=0,memory_swap=0i,memory_locked=0i,rlimit_num_fds_hard=4096i,rlimit_nice_priority_hard=0i,num_fds=11i,involuntary_context_switches=20i,read_count=23i,memory_rss=1388544i,rlimit_memory_rss_soft=2147483647i,rlimit_memory_rss_hard=2147483647i,nice_priority=20i,rlimit_cpu_time_hard=2147483647i,cpu_time=0i,write_bytes=0i,cpu_time_idle=0,cpu_time_nice=0,memory_data=229376i,memory_stack=135168i,rlimit_cpu_time_soft=2147483647i,rlimit_memory_data_hard=2147483647i,rlimit_memory_locked_hard=65536i,rlimit_signals_pending_soft=1758i,write_count=11i,cpu_time_iowait=0,cpu_time_steal=0,cpu_time_stolen=0,rlimit_memory_stack_soft=8388608i,cpu_time_system=0.02,cpu_time_guest_nice=0,rlimit_memory_locked_soft=65536i,rlimit_memory_vms_soft=2147483647i,rlimit_file_locks_hard=2147483647i,rlimit_realtime_priority_hard=0i,pid=828i,num_threads=1i,cpu_time_soft_irq=0,rlimit_memory_vms_hard=2147483647i,rlimit_realtime_priority_soft=0i,memory_vms=15884288i,rlimit_memory_stack_hard=2147483647i,cpu_time_irq=0,rlimit_memory_data_soft=2147483647i,rlimit_num_fds_soft=1024i,signals_pending=0i,rlimit_nice_priority_soft=0i,realtime_priority=0i
> procstat,exe=influxd,process_name=influxd rlimit_num_fds_hard=16384i,rlimit_signals_pending_hard=1758i,realtime_priority=0i,rlimit_memory_vms_hard=2147483647i,rlimit_signals_pending_soft=1758i,cpu_time_stolen=0,rlimit_memory_stack_hard=2147483647i,rlimit_realtime_priority_hard=0i,cpu_time=0i,pid=500i,voluntary_context_switches=975i,cpu_time_idle=0,memory_rss=3072000i,memory_locked=0i,rlimit_nice_priority_soft=0i,signals_pending=0i,nice_priority=20i,read_bytes=823296i,cpu_time_soft_irq=0,rlimit_memory_data_hard=2147483647i,rlimit_memory_locked_soft=65536i,write_count=8i,cpu_time_irq=0,memory_vms=33501184i,rlimit_memory_stack_soft=8388608i,cpu_time_iowait=0,rlimit_memory_vms_soft=2147483647i,rlimit_nice_priority_hard=0i,num_fds=29i,memory_data=229376i,rlimit_cpu_time_soft=2147483647i,rlimit_file_locks_soft=2147483647i,num_threads=1i,write_bytes=0i,cpu_time_steal=0,rlimit_memory_rss_hard=2147483647i,cpu_time_guest=0,cpu_time_guest_nice=0,cpu_usage=0,rlimit_memory_locked_hard=65536i,rlimit_file_locks_hard=2147483647i,involuntary_context_switches=38i,read_count=16851i,memory_swap=0i,rlimit_memory_data_soft=2147483647i,cpu_time_user=0.11,rlimit_cpu_time_hard=2147483647i,rlimit_num_fds_soft=16384i,rlimit_realtime_priority_soft=0i,cpu_time_system=0.27,cpu_time_nice=0,memory_stack=135168i,rlimit_memory_rss_soft=2147483647i
```
# Measurements
@ -53,6 +53,13 @@ Threads related measurement names:
File descriptor related measurement names (*telegraf* needs to run as **root**):
- procstat_[prefix_]num_fds value=4
Priority related measurement names:
- procstat_[prefix_]realtime_priority value=0
- procstat_[prefix_]nice_priority value=20
Signals related measurement names:
- procstat_[prefix_]signals_pending value=0
Context switch related measurement names:
- procstat_[prefix_]voluntary_context_switches value=250
- procstat_[prefix_]involuntary_context_switches value=0
@ -64,19 +71,49 @@ I/O related measurement names (*telegraf* needs to run as **root**):
- procstat_[prefix_]write_bytes value=1
CPU related measurement names:
- procstat_[prefix_]cpu_user value=0
- procstat_[prefix_]cpu_system value=0.01
- procstat_[prefix_]cpu_idle value=0
- procstat_[prefix_]cpu_nice value=0
- procstat_[prefix_]cpu_iowait value=0
- procstat_[prefix_]cpu_irq value=0
- procstat_[prefix_]cpu_soft_irq value=0
- procstat_[prefix_]cpu_soft_steal value=0
- procstat_[prefix_]cpu_soft_stolen value=0
- procstat_[prefix_]cpu_soft_guest value=0
- procstat_[prefix_]cpu_soft_guest_nice value=0
- procstat_[prefix_]cpu_time value=0.01
- procstat_[prefix_]cpu_time_user value=0
- procstat_[prefix_]cpu_time_system value=0.01
- procstat_[prefix_]cpu_time_idle value=0
- procstat_[prefix_]cpu_time_nice value=0
- procstat_[prefix_]cpu_time_iowait value=0
- procstat_[prefix_]cpu_time_irq value=0
- procstat_[prefix_]cpu_time_soft_irq value=0
- procstat_[prefix_]cpu_time_steal value=0
- procstat_[prefix_]cpu_time_stolen value=0
- procstat_[prefix_]cpu_time_guest value=0
- procstat_[prefix_]cpu_time_guest_nice value=0
Memory related measurement names:
- procstat_[prefix_]memory_rss value=1777664
- procstat_[prefix_]memory_vms value=24227840
- procstat_[prefix_]memory_swap value=282624
- procstat_[prefix_]memory_data value=229376
- procstat_[prefix_]memory_stack value=135168
- procstat_[prefix_]memory_locked value=0
Resource limits:
- procstat_[prefix_]rlimit_cpu_time_hard value=2147483647
- procstat_[prefix_]rlimit_cpu_time_soft value=2147483647
- procstat_[prefix_]rlimit_file_locks_hard value=2147483647
- procstat_[prefix_]rlimit_file_locks_soft value=2147483647
- procstat_[prefix_]rlimit_memory_data_hard value=2147483647
- procstat_[prefix_]rlimit_memory_data_soft value=2147483647
- procstat_[prefix_]rlimit_memory_locked_hard value=65536
- procstat_[prefix_]rlimit_memory_locked_soft value=65536
- procstat_[prefix_]rlimit_memory_rss_hard value=2147483647
- procstat_[prefix_]rlimit_memory_rss_soft value=2147483647
- procstat_[prefix_]rlimit_memory_stack_hard value=2147483647
- procstat_[prefix_]rlimit_memory_stack_soft value=8388608
- procstat_[prefix_]rlimit_memory_vms_hard value=2147483647
- procstat_[prefix_]rlimit_memory_vms_soft value=2147483647
- procstat_[prefix_]rlimit_nice_priority_hard value=0
- procstat_[prefix_]rlimit_nice_priority_soft value=0
- procstat_[prefix_]rlimit_num_fds_hard value=16384
- procstat_[prefix_]rlimit_num_fds_soft value=16384
- procstat_[prefix_]rlimit_realtime_priority_hard value=0
- procstat_[prefix_]rlimit_realtime_priority_soft value=0
- procstat_[prefix_]rlimit_signals_pending_hard value=1758
- procstat_[prefix_]rlimit_signals_pending_soft value=1758
*NOTE: Due to a limitation in an underlying library Telegraf uses, any resource limit > 2147483647 will be misreported as 2147483647.*

View File

@ -20,6 +20,7 @@ type Process interface {
NumThreads() (int32, error)
Percent(interval time.Duration) (float64, error)
Times() (*cpu.TimesStat, error)
RlimitUsage(bool) ([]process.RlimitStat, error)
}
type Proc struct {

View File

@ -7,6 +7,7 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/shirou/gopsutil/process"
)
var (
@ -154,6 +155,48 @@ func (p *Procstat) addMetrics(proc Process, acc telegraf.Accumulator) {
fields[prefix+"memory_rss"] = mem.RSS
fields[prefix+"memory_vms"] = mem.VMS
fields[prefix+"memory_swap"] = mem.Swap
fields[prefix+"memory_data"] = mem.Data
fields[prefix+"memory_stack"] = mem.Stack
fields[prefix+"memory_locked"] = mem.Locked
}
rlims, err := proc.RlimitUsage(true)
if err == nil {
for _, rlim := range rlims {
var name string
switch rlim.Resource {
case process.RLIMIT_CPU:
name = "cpu_time"
case process.RLIMIT_DATA:
name = "memory_data"
case process.RLIMIT_STACK:
name = "memory_stack"
case process.RLIMIT_RSS:
name = "memory_rss"
case process.RLIMIT_NOFILE:
name = "num_fds"
case process.RLIMIT_MEMLOCK:
name = "memory_locked"
case process.RLIMIT_AS:
name = "memory_vms"
case process.RLIMIT_LOCKS:
name = "file_locks"
case process.RLIMIT_SIGPENDING:
name = "signals_pending"
case process.RLIMIT_NICE:
name = "nice_priority"
case process.RLIMIT_RTPRIO:
name = "realtime_priority"
default:
continue
}
fields[prefix+"rlimit_"+name+"_soft"] = rlim.Soft
fields[prefix+"rlimit_"+name+"_hard"] = rlim.Hard
if name != "file_locks" { // gopsutil doesn't currently track the used file locks count
fields[prefix+name] = rlim.Used
}
}
}
acc.AddFields("procstat", fields, proc.Tags())

View File

@ -95,6 +95,10 @@ func (p *testProc) Times() (*cpu.TimesStat, error) {
return &cpu.TimesStat{}, nil
}
func (p *testProc) RlimitUsage(gatherUsage bool) ([]process.RlimitStat, error) {
return []process.RlimitStat{}, nil
}
var pid PID = PID(42)
var exe string = "foo"