Add process resource limits to procstat input (#3231)
This commit is contained in:
parent
575a07c985
commit
0339dc7faf
|
@ -18,6 +18,7 @@
|
|||
- [#3211](https://github.com/influxdata/telegraf/pull/3211): Add timeout option for kubernetes input.
|
||||
- [#3224](https://github.com/influxdata/telegraf/pull/3224): Preserve url path prefix in influx output.
|
||||
- [#3234](https://github.com/influxdata/telegraf/pull/3234): Add support for timing sums in statsd input.
|
||||
- [#2617](https://github.com/influxdata/telegraf/issues/2617): Add resource limit monitoring to procstat.
|
||||
|
||||
### Bugfixes
|
||||
|
||||
|
|
2
Godeps
2
Godeps
|
@ -60,7 +60,7 @@ github.com/prometheus/procfs 1878d9fbb537119d24b21ca07effd591627cd160
|
|||
github.com/rcrowley/go-metrics 1f30fe9094a513ce4c700b9a54458bbb0c96996c
|
||||
github.com/samuel/go-zookeeper 1d7be4effb13d2d908342d349d71a284a7542693
|
||||
github.com/satori/go.uuid 5bf94b69c6b68ee1b541973bb8e1144db23a194b
|
||||
github.com/shirou/gopsutil 9a4a9167ad3b4355dbf1c2c7a0f5f0d3fb1e9ab9
|
||||
github.com/shirou/gopsutil a452de7c734a0fa0f16d2e5725b0fa5934d9fbec
|
||||
github.com/shirou/w32 3c9377fc6748f222729a8270fe2775d149a249ad
|
||||
github.com/Shopify/sarama c01858abb625b73a3af51d0798e4ad42c8147093
|
||||
github.com/Sirupsen/logrus 61e43dc76f7ee59a82bdf3d71033dc12bea4c77d
|
||||
|
|
|
@ -39,8 +39,8 @@ Example:
|
|||
The above configuration would result in output like:
|
||||
|
||||
```
|
||||
> procstat,pidfile=/var/run/lxc/dnsmasq.pid,process_name=dnsmasq,pid=44979 cpu_user=0.14,cpu_system=0.07
|
||||
> procstat,exe=influxd,process_name=influxd,pid=34337 influxd_cpu_user=25.43,influxd_cpu_system=21.82
|
||||
> procstat,pidfile=/var/run/lxc/dnsmasq.pid,process_name=dnsmasq rlimit_file_locks_soft=2147483647i,rlimit_signals_pending_hard=1758i,voluntary_context_switches=478i,read_bytes=307200i,cpu_time_user=0.01,cpu_time_guest=0,memory_swap=0i,memory_locked=0i,rlimit_num_fds_hard=4096i,rlimit_nice_priority_hard=0i,num_fds=11i,involuntary_context_switches=20i,read_count=23i,memory_rss=1388544i,rlimit_memory_rss_soft=2147483647i,rlimit_memory_rss_hard=2147483647i,nice_priority=20i,rlimit_cpu_time_hard=2147483647i,cpu_time=0i,write_bytes=0i,cpu_time_idle=0,cpu_time_nice=0,memory_data=229376i,memory_stack=135168i,rlimit_cpu_time_soft=2147483647i,rlimit_memory_data_hard=2147483647i,rlimit_memory_locked_hard=65536i,rlimit_signals_pending_soft=1758i,write_count=11i,cpu_time_iowait=0,cpu_time_steal=0,cpu_time_stolen=0,rlimit_memory_stack_soft=8388608i,cpu_time_system=0.02,cpu_time_guest_nice=0,rlimit_memory_locked_soft=65536i,rlimit_memory_vms_soft=2147483647i,rlimit_file_locks_hard=2147483647i,rlimit_realtime_priority_hard=0i,pid=828i,num_threads=1i,cpu_time_soft_irq=0,rlimit_memory_vms_hard=2147483647i,rlimit_realtime_priority_soft=0i,memory_vms=15884288i,rlimit_memory_stack_hard=2147483647i,cpu_time_irq=0,rlimit_memory_data_soft=2147483647i,rlimit_num_fds_soft=1024i,signals_pending=0i,rlimit_nice_priority_soft=0i,realtime_priority=0i
|
||||
> procstat,exe=influxd,process_name=influxd rlimit_num_fds_hard=16384i,rlimit_signals_pending_hard=1758i,realtime_priority=0i,rlimit_memory_vms_hard=2147483647i,rlimit_signals_pending_soft=1758i,cpu_time_stolen=0,rlimit_memory_stack_hard=2147483647i,rlimit_realtime_priority_hard=0i,cpu_time=0i,pid=500i,voluntary_context_switches=975i,cpu_time_idle=0,memory_rss=3072000i,memory_locked=0i,rlimit_nice_priority_soft=0i,signals_pending=0i,nice_priority=20i,read_bytes=823296i,cpu_time_soft_irq=0,rlimit_memory_data_hard=2147483647i,rlimit_memory_locked_soft=65536i,write_count=8i,cpu_time_irq=0,memory_vms=33501184i,rlimit_memory_stack_soft=8388608i,cpu_time_iowait=0,rlimit_memory_vms_soft=2147483647i,rlimit_nice_priority_hard=0i,num_fds=29i,memory_data=229376i,rlimit_cpu_time_soft=2147483647i,rlimit_file_locks_soft=2147483647i,num_threads=1i,write_bytes=0i,cpu_time_steal=0,rlimit_memory_rss_hard=2147483647i,cpu_time_guest=0,cpu_time_guest_nice=0,cpu_usage=0,rlimit_memory_locked_hard=65536i,rlimit_file_locks_hard=2147483647i,involuntary_context_switches=38i,read_count=16851i,memory_swap=0i,rlimit_memory_data_soft=2147483647i,cpu_time_user=0.11,rlimit_cpu_time_hard=2147483647i,rlimit_num_fds_soft=16384i,rlimit_realtime_priority_soft=0i,cpu_time_system=0.27,cpu_time_nice=0,memory_stack=135168i,rlimit_memory_rss_soft=2147483647i
|
||||
```
|
||||
|
||||
# Measurements
|
||||
|
@ -53,6 +53,13 @@ Threads related measurement names:
|
|||
File descriptor related measurement names (*telegraf* needs to run as **root**):
|
||||
- procstat_[prefix_]num_fds value=4
|
||||
|
||||
Priority related measurement names:
|
||||
- procstat_[prefix_]realtime_priority value=0
|
||||
- procstat_[prefix_]nice_priority value=20
|
||||
|
||||
Signals related measurement names:
|
||||
- procstat_[prefix_]signals_pending value=0
|
||||
|
||||
Context switch related measurement names:
|
||||
- procstat_[prefix_]voluntary_context_switches value=250
|
||||
- procstat_[prefix_]involuntary_context_switches value=0
|
||||
|
@ -64,19 +71,49 @@ I/O related measurement names (*telegraf* needs to run as **root**):
|
|||
- procstat_[prefix_]write_bytes value=1
|
||||
|
||||
CPU related measurement names:
|
||||
- procstat_[prefix_]cpu_user value=0
|
||||
- procstat_[prefix_]cpu_system value=0.01
|
||||
- procstat_[prefix_]cpu_idle value=0
|
||||
- procstat_[prefix_]cpu_nice value=0
|
||||
- procstat_[prefix_]cpu_iowait value=0
|
||||
- procstat_[prefix_]cpu_irq value=0
|
||||
- procstat_[prefix_]cpu_soft_irq value=0
|
||||
- procstat_[prefix_]cpu_soft_steal value=0
|
||||
- procstat_[prefix_]cpu_soft_stolen value=0
|
||||
- procstat_[prefix_]cpu_soft_guest value=0
|
||||
- procstat_[prefix_]cpu_soft_guest_nice value=0
|
||||
- procstat_[prefix_]cpu_time value=0.01
|
||||
- procstat_[prefix_]cpu_time_user value=0
|
||||
- procstat_[prefix_]cpu_time_system value=0.01
|
||||
- procstat_[prefix_]cpu_time_idle value=0
|
||||
- procstat_[prefix_]cpu_time_nice value=0
|
||||
- procstat_[prefix_]cpu_time_iowait value=0
|
||||
- procstat_[prefix_]cpu_time_irq value=0
|
||||
- procstat_[prefix_]cpu_time_soft_irq value=0
|
||||
- procstat_[prefix_]cpu_time_steal value=0
|
||||
- procstat_[prefix_]cpu_time_stolen value=0
|
||||
- procstat_[prefix_]cpu_time_guest value=0
|
||||
- procstat_[prefix_]cpu_time_guest_nice value=0
|
||||
|
||||
Memory related measurement names:
|
||||
- procstat_[prefix_]memory_rss value=1777664
|
||||
- procstat_[prefix_]memory_vms value=24227840
|
||||
- procstat_[prefix_]memory_swap value=282624
|
||||
- procstat_[prefix_]memory_data value=229376
|
||||
- procstat_[prefix_]memory_stack value=135168
|
||||
- procstat_[prefix_]memory_locked value=0
|
||||
|
||||
Resource limits:
|
||||
- procstat_[prefix_]rlimit_cpu_time_hard value=2147483647
|
||||
- procstat_[prefix_]rlimit_cpu_time_soft value=2147483647
|
||||
- procstat_[prefix_]rlimit_file_locks_hard value=2147483647
|
||||
- procstat_[prefix_]rlimit_file_locks_soft value=2147483647
|
||||
- procstat_[prefix_]rlimit_memory_data_hard value=2147483647
|
||||
- procstat_[prefix_]rlimit_memory_data_soft value=2147483647
|
||||
- procstat_[prefix_]rlimit_memory_locked_hard value=65536
|
||||
- procstat_[prefix_]rlimit_memory_locked_soft value=65536
|
||||
- procstat_[prefix_]rlimit_memory_rss_hard value=2147483647
|
||||
- procstat_[prefix_]rlimit_memory_rss_soft value=2147483647
|
||||
- procstat_[prefix_]rlimit_memory_stack_hard value=2147483647
|
||||
- procstat_[prefix_]rlimit_memory_stack_soft value=8388608
|
||||
- procstat_[prefix_]rlimit_memory_vms_hard value=2147483647
|
||||
- procstat_[prefix_]rlimit_memory_vms_soft value=2147483647
|
||||
- procstat_[prefix_]rlimit_nice_priority_hard value=0
|
||||
- procstat_[prefix_]rlimit_nice_priority_soft value=0
|
||||
- procstat_[prefix_]rlimit_num_fds_hard value=16384
|
||||
- procstat_[prefix_]rlimit_num_fds_soft value=16384
|
||||
- procstat_[prefix_]rlimit_realtime_priority_hard value=0
|
||||
- procstat_[prefix_]rlimit_realtime_priority_soft value=0
|
||||
- procstat_[prefix_]rlimit_signals_pending_hard value=1758
|
||||
- procstat_[prefix_]rlimit_signals_pending_soft value=1758
|
||||
|
||||
*NOTE: Due to a limitation in an underlying library Telegraf uses, any resource limit > 2147483647 will be misreported as 2147483647.*
|
||||
|
|
|
@ -20,6 +20,7 @@ type Process interface {
|
|||
NumThreads() (int32, error)
|
||||
Percent(interval time.Duration) (float64, error)
|
||||
Times() (*cpu.TimesStat, error)
|
||||
RlimitUsage(bool) ([]process.RlimitStat, error)
|
||||
}
|
||||
|
||||
type Proc struct {
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
"github.com/shirou/gopsutil/process"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -154,6 +155,48 @@ func (p *Procstat) addMetrics(proc Process, acc telegraf.Accumulator) {
|
|||
fields[prefix+"memory_rss"] = mem.RSS
|
||||
fields[prefix+"memory_vms"] = mem.VMS
|
||||
fields[prefix+"memory_swap"] = mem.Swap
|
||||
fields[prefix+"memory_data"] = mem.Data
|
||||
fields[prefix+"memory_stack"] = mem.Stack
|
||||
fields[prefix+"memory_locked"] = mem.Locked
|
||||
}
|
||||
|
||||
rlims, err := proc.RlimitUsage(true)
|
||||
if err == nil {
|
||||
for _, rlim := range rlims {
|
||||
var name string
|
||||
switch rlim.Resource {
|
||||
case process.RLIMIT_CPU:
|
||||
name = "cpu_time"
|
||||
case process.RLIMIT_DATA:
|
||||
name = "memory_data"
|
||||
case process.RLIMIT_STACK:
|
||||
name = "memory_stack"
|
||||
case process.RLIMIT_RSS:
|
||||
name = "memory_rss"
|
||||
case process.RLIMIT_NOFILE:
|
||||
name = "num_fds"
|
||||
case process.RLIMIT_MEMLOCK:
|
||||
name = "memory_locked"
|
||||
case process.RLIMIT_AS:
|
||||
name = "memory_vms"
|
||||
case process.RLIMIT_LOCKS:
|
||||
name = "file_locks"
|
||||
case process.RLIMIT_SIGPENDING:
|
||||
name = "signals_pending"
|
||||
case process.RLIMIT_NICE:
|
||||
name = "nice_priority"
|
||||
case process.RLIMIT_RTPRIO:
|
||||
name = "realtime_priority"
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
fields[prefix+"rlimit_"+name+"_soft"] = rlim.Soft
|
||||
fields[prefix+"rlimit_"+name+"_hard"] = rlim.Hard
|
||||
if name != "file_locks" { // gopsutil doesn't currently track the used file locks count
|
||||
fields[prefix+name] = rlim.Used
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
acc.AddFields("procstat", fields, proc.Tags())
|
||||
|
|
|
@ -95,6 +95,10 @@ func (p *testProc) Times() (*cpu.TimesStat, error) {
|
|||
return &cpu.TimesStat{}, nil
|
||||
}
|
||||
|
||||
func (p *testProc) RlimitUsage(gatherUsage bool) ([]process.RlimitStat, error) {
|
||||
return []process.RlimitStat{}, nil
|
||||
}
|
||||
|
||||
var pid PID = PID(42)
|
||||
var exe string = "foo"
|
||||
|
||||
|
|
Loading…
Reference in New Issue