diff --git a/CHANGELOG.md b/CHANGELOG.md index dbdb9cc60..c24594ed4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ - [#3211](https://github.com/influxdata/telegraf/pull/3211): Add timeout option for kubernetes input. - [#3224](https://github.com/influxdata/telegraf/pull/3224): Preserve url path prefix in influx output. - [#3234](https://github.com/influxdata/telegraf/pull/3234): Add support for timing sums in statsd input. +- [#2617](https://github.com/influxdata/telegraf/issues/2617): Add resource limit monitoring to procstat. ### Bugfixes diff --git a/Godeps b/Godeps index 48f9138e8..654a345eb 100644 --- a/Godeps +++ b/Godeps @@ -60,7 +60,7 @@ github.com/prometheus/procfs 1878d9fbb537119d24b21ca07effd591627cd160 github.com/rcrowley/go-metrics 1f30fe9094a513ce4c700b9a54458bbb0c96996c github.com/samuel/go-zookeeper 1d7be4effb13d2d908342d349d71a284a7542693 github.com/satori/go.uuid 5bf94b69c6b68ee1b541973bb8e1144db23a194b -github.com/shirou/gopsutil 9a4a9167ad3b4355dbf1c2c7a0f5f0d3fb1e9ab9 +github.com/shirou/gopsutil a452de7c734a0fa0f16d2e5725b0fa5934d9fbec github.com/shirou/w32 3c9377fc6748f222729a8270fe2775d149a249ad github.com/Shopify/sarama c01858abb625b73a3af51d0798e4ad42c8147093 github.com/Sirupsen/logrus 61e43dc76f7ee59a82bdf3d71033dc12bea4c77d diff --git a/plugins/inputs/procstat/README.md b/plugins/inputs/procstat/README.md index 13f7e86ab..57ede8391 100644 --- a/plugins/inputs/procstat/README.md +++ b/plugins/inputs/procstat/README.md @@ -39,8 +39,8 @@ Example: The above configuration would result in output like: ``` -> procstat,pidfile=/var/run/lxc/dnsmasq.pid,process_name=dnsmasq,pid=44979 cpu_user=0.14,cpu_system=0.07 -> procstat,exe=influxd,process_name=influxd,pid=34337 influxd_cpu_user=25.43,influxd_cpu_system=21.82 +> procstat,pidfile=/var/run/lxc/dnsmasq.pid,process_name=dnsmasq rlimit_file_locks_soft=2147483647i,rlimit_signals_pending_hard=1758i,voluntary_context_switches=478i,read_bytes=307200i,cpu_time_user=0.01,cpu_time_guest=0,memory_swap=0i,memory_locked=0i,rlimit_num_fds_hard=4096i,rlimit_nice_priority_hard=0i,num_fds=11i,involuntary_context_switches=20i,read_count=23i,memory_rss=1388544i,rlimit_memory_rss_soft=2147483647i,rlimit_memory_rss_hard=2147483647i,nice_priority=20i,rlimit_cpu_time_hard=2147483647i,cpu_time=0i,write_bytes=0i,cpu_time_idle=0,cpu_time_nice=0,memory_data=229376i,memory_stack=135168i,rlimit_cpu_time_soft=2147483647i,rlimit_memory_data_hard=2147483647i,rlimit_memory_locked_hard=65536i,rlimit_signals_pending_soft=1758i,write_count=11i,cpu_time_iowait=0,cpu_time_steal=0,cpu_time_stolen=0,rlimit_memory_stack_soft=8388608i,cpu_time_system=0.02,cpu_time_guest_nice=0,rlimit_memory_locked_soft=65536i,rlimit_memory_vms_soft=2147483647i,rlimit_file_locks_hard=2147483647i,rlimit_realtime_priority_hard=0i,pid=828i,num_threads=1i,cpu_time_soft_irq=0,rlimit_memory_vms_hard=2147483647i,rlimit_realtime_priority_soft=0i,memory_vms=15884288i,rlimit_memory_stack_hard=2147483647i,cpu_time_irq=0,rlimit_memory_data_soft=2147483647i,rlimit_num_fds_soft=1024i,signals_pending=0i,rlimit_nice_priority_soft=0i,realtime_priority=0i +> procstat,exe=influxd,process_name=influxd rlimit_num_fds_hard=16384i,rlimit_signals_pending_hard=1758i,realtime_priority=0i,rlimit_memory_vms_hard=2147483647i,rlimit_signals_pending_soft=1758i,cpu_time_stolen=0,rlimit_memory_stack_hard=2147483647i,rlimit_realtime_priority_hard=0i,cpu_time=0i,pid=500i,voluntary_context_switches=975i,cpu_time_idle=0,memory_rss=3072000i,memory_locked=0i,rlimit_nice_priority_soft=0i,signals_pending=0i,nice_priority=20i,read_bytes=823296i,cpu_time_soft_irq=0,rlimit_memory_data_hard=2147483647i,rlimit_memory_locked_soft=65536i,write_count=8i,cpu_time_irq=0,memory_vms=33501184i,rlimit_memory_stack_soft=8388608i,cpu_time_iowait=0,rlimit_memory_vms_soft=2147483647i,rlimit_nice_priority_hard=0i,num_fds=29i,memory_data=229376i,rlimit_cpu_time_soft=2147483647i,rlimit_file_locks_soft=2147483647i,num_threads=1i,write_bytes=0i,cpu_time_steal=0,rlimit_memory_rss_hard=2147483647i,cpu_time_guest=0,cpu_time_guest_nice=0,cpu_usage=0,rlimit_memory_locked_hard=65536i,rlimit_file_locks_hard=2147483647i,involuntary_context_switches=38i,read_count=16851i,memory_swap=0i,rlimit_memory_data_soft=2147483647i,cpu_time_user=0.11,rlimit_cpu_time_hard=2147483647i,rlimit_num_fds_soft=16384i,rlimit_realtime_priority_soft=0i,cpu_time_system=0.27,cpu_time_nice=0,memory_stack=135168i,rlimit_memory_rss_soft=2147483647i ``` # Measurements @@ -53,6 +53,13 @@ Threads related measurement names: File descriptor related measurement names (*telegraf* needs to run as **root**): - procstat_[prefix_]num_fds value=4 +Priority related measurement names: +- procstat_[prefix_]realtime_priority value=0 +- procstat_[prefix_]nice_priority value=20 + +Signals related measurement names: +- procstat_[prefix_]signals_pending value=0 + Context switch related measurement names: - procstat_[prefix_]voluntary_context_switches value=250 - procstat_[prefix_]involuntary_context_switches value=0 @@ -64,19 +71,49 @@ I/O related measurement names (*telegraf* needs to run as **root**): - procstat_[prefix_]write_bytes value=1 CPU related measurement names: -- procstat_[prefix_]cpu_user value=0 -- procstat_[prefix_]cpu_system value=0.01 -- procstat_[prefix_]cpu_idle value=0 -- procstat_[prefix_]cpu_nice value=0 -- procstat_[prefix_]cpu_iowait value=0 -- procstat_[prefix_]cpu_irq value=0 -- procstat_[prefix_]cpu_soft_irq value=0 -- procstat_[prefix_]cpu_soft_steal value=0 -- procstat_[prefix_]cpu_soft_stolen value=0 -- procstat_[prefix_]cpu_soft_guest value=0 -- procstat_[prefix_]cpu_soft_guest_nice value=0 +- procstat_[prefix_]cpu_time value=0.01 +- procstat_[prefix_]cpu_time_user value=0 +- procstat_[prefix_]cpu_time_system value=0.01 +- procstat_[prefix_]cpu_time_idle value=0 +- procstat_[prefix_]cpu_time_nice value=0 +- procstat_[prefix_]cpu_time_iowait value=0 +- procstat_[prefix_]cpu_time_irq value=0 +- procstat_[prefix_]cpu_time_soft_irq value=0 +- procstat_[prefix_]cpu_time_steal value=0 +- procstat_[prefix_]cpu_time_stolen value=0 +- procstat_[prefix_]cpu_time_guest value=0 +- procstat_[prefix_]cpu_time_guest_nice value=0 Memory related measurement names: - procstat_[prefix_]memory_rss value=1777664 - procstat_[prefix_]memory_vms value=24227840 - procstat_[prefix_]memory_swap value=282624 +- procstat_[prefix_]memory_data value=229376 +- procstat_[prefix_]memory_stack value=135168 +- procstat_[prefix_]memory_locked value=0 + +Resource limits: +- procstat_[prefix_]rlimit_cpu_time_hard value=2147483647 +- procstat_[prefix_]rlimit_cpu_time_soft value=2147483647 +- procstat_[prefix_]rlimit_file_locks_hard value=2147483647 +- procstat_[prefix_]rlimit_file_locks_soft value=2147483647 +- procstat_[prefix_]rlimit_memory_data_hard value=2147483647 +- procstat_[prefix_]rlimit_memory_data_soft value=2147483647 +- procstat_[prefix_]rlimit_memory_locked_hard value=65536 +- procstat_[prefix_]rlimit_memory_locked_soft value=65536 +- procstat_[prefix_]rlimit_memory_rss_hard value=2147483647 +- procstat_[prefix_]rlimit_memory_rss_soft value=2147483647 +- procstat_[prefix_]rlimit_memory_stack_hard value=2147483647 +- procstat_[prefix_]rlimit_memory_stack_soft value=8388608 +- procstat_[prefix_]rlimit_memory_vms_hard value=2147483647 +- procstat_[prefix_]rlimit_memory_vms_soft value=2147483647 +- procstat_[prefix_]rlimit_nice_priority_hard value=0 +- procstat_[prefix_]rlimit_nice_priority_soft value=0 +- procstat_[prefix_]rlimit_num_fds_hard value=16384 +- procstat_[prefix_]rlimit_num_fds_soft value=16384 +- procstat_[prefix_]rlimit_realtime_priority_hard value=0 +- procstat_[prefix_]rlimit_realtime_priority_soft value=0 +- procstat_[prefix_]rlimit_signals_pending_hard value=1758 +- procstat_[prefix_]rlimit_signals_pending_soft value=1758 + +*NOTE: Due to a limitation in an underlying library Telegraf uses, any resource limit > 2147483647 will be misreported as 2147483647.* diff --git a/plugins/inputs/procstat/process.go b/plugins/inputs/procstat/process.go index ec2363f6e..3470a8a94 100644 --- a/plugins/inputs/procstat/process.go +++ b/plugins/inputs/procstat/process.go @@ -20,6 +20,7 @@ type Process interface { NumThreads() (int32, error) Percent(interval time.Duration) (float64, error) Times() (*cpu.TimesStat, error) + RlimitUsage(bool) ([]process.RlimitStat, error) } type Proc struct { diff --git a/plugins/inputs/procstat/procstat.go b/plugins/inputs/procstat/procstat.go index 3715d390b..a216e0e38 100644 --- a/plugins/inputs/procstat/procstat.go +++ b/plugins/inputs/procstat/procstat.go @@ -7,6 +7,7 @@ import ( "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" + "github.com/shirou/gopsutil/process" ) var ( @@ -154,6 +155,48 @@ func (p *Procstat) addMetrics(proc Process, acc telegraf.Accumulator) { fields[prefix+"memory_rss"] = mem.RSS fields[prefix+"memory_vms"] = mem.VMS fields[prefix+"memory_swap"] = mem.Swap + fields[prefix+"memory_data"] = mem.Data + fields[prefix+"memory_stack"] = mem.Stack + fields[prefix+"memory_locked"] = mem.Locked + } + + rlims, err := proc.RlimitUsage(true) + if err == nil { + for _, rlim := range rlims { + var name string + switch rlim.Resource { + case process.RLIMIT_CPU: + name = "cpu_time" + case process.RLIMIT_DATA: + name = "memory_data" + case process.RLIMIT_STACK: + name = "memory_stack" + case process.RLIMIT_RSS: + name = "memory_rss" + case process.RLIMIT_NOFILE: + name = "num_fds" + case process.RLIMIT_MEMLOCK: + name = "memory_locked" + case process.RLIMIT_AS: + name = "memory_vms" + case process.RLIMIT_LOCKS: + name = "file_locks" + case process.RLIMIT_SIGPENDING: + name = "signals_pending" + case process.RLIMIT_NICE: + name = "nice_priority" + case process.RLIMIT_RTPRIO: + name = "realtime_priority" + default: + continue + } + + fields[prefix+"rlimit_"+name+"_soft"] = rlim.Soft + fields[prefix+"rlimit_"+name+"_hard"] = rlim.Hard + if name != "file_locks" { // gopsutil doesn't currently track the used file locks count + fields[prefix+name] = rlim.Used + } + } } acc.AddFields("procstat", fields, proc.Tags()) diff --git a/plugins/inputs/procstat/procstat_test.go b/plugins/inputs/procstat/procstat_test.go index 97cf34582..bc052939f 100644 --- a/plugins/inputs/procstat/procstat_test.go +++ b/plugins/inputs/procstat/procstat_test.go @@ -95,6 +95,10 @@ func (p *testProc) Times() (*cpu.TimesStat, error) { return &cpu.TimesStat{}, nil } +func (p *testProc) RlimitUsage(gatherUsage bool) ([]process.RlimitStat, error) { + return []process.RlimitStat{}, nil +} + var pid PID = PID(42) var exe string = "foo"