Implement telegraf collecting stats on itself

closes #1348
This commit is contained in:
Cameron Sparr
2016-11-07 08:34:46 +00:00
parent d518d7d806
commit d71a42cd1b
26 changed files with 975 additions and 169 deletions

View File

@@ -27,6 +27,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/http_response"
_ "github.com/influxdata/telegraf/plugins/inputs/httpjson"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb"
_ "github.com/influxdata/telegraf/plugins/inputs/internal"
_ "github.com/influxdata/telegraf/plugins/inputs/ipmi_sensor"
_ "github.com/influxdata/telegraf/plugins/inputs/iptables"
_ "github.com/influxdata/telegraf/plugins/inputs/jolokia"

View File

@@ -14,6 +14,7 @@ import (
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers/influx"
"github.com/influxdata/telegraf/selfstat"
)
const (
@@ -43,6 +44,18 @@ type HTTPListener struct {
parser influx.InfluxParser
acc telegraf.Accumulator
pool *pool
BytesRecv selfstat.Stat
RequestsServed selfstat.Stat
WritesServed selfstat.Stat
QueriesServed selfstat.Stat
PingsServed selfstat.Stat
RequestsRecv selfstat.Stat
WritesRecv selfstat.Stat
QueriesRecv selfstat.Stat
PingsRecv selfstat.Stat
NotFoundsServed selfstat.Stat
BuffersCreated selfstat.Stat
}
const sampleConfig = `
@@ -72,7 +85,7 @@ func (h *HTTPListener) Description() string {
}
func (h *HTTPListener) Gather(_ telegraf.Accumulator) error {
log.Printf("D! The http_listener has created %d buffers", h.pool.ncreated())
h.BuffersCreated.Set(h.pool.ncreated())
return nil
}
@@ -81,6 +94,21 @@ func (h *HTTPListener) Start(acc telegraf.Accumulator) error {
h.mu.Lock()
defer h.mu.Unlock()
tags := map[string]string{
"address": h.ServiceAddress,
}
h.BytesRecv = selfstat.Register("http_listener", "bytes_received", tags)
h.RequestsServed = selfstat.Register("http_listener", "requests_served", tags)
h.WritesServed = selfstat.Register("http_listener", "writes_served", tags)
h.QueriesServed = selfstat.Register("http_listener", "queries_served", tags)
h.PingsServed = selfstat.Register("http_listener", "pings_served", tags)
h.RequestsRecv = selfstat.Register("http_listener", "requests_received", tags)
h.WritesRecv = selfstat.Register("http_listener", "writes_received", tags)
h.QueriesRecv = selfstat.Register("http_listener", "queries_received", tags)
h.PingsRecv = selfstat.Register("http_listener", "pings_received", tags)
h.NotFoundsServed = selfstat.Register("http_listener", "not_founds_served", tags)
h.BuffersCreated = selfstat.Register("http_listener", "buffers_created", tags)
if h.MaxBodySize == 0 {
h.MaxBodySize = DEFAULT_MAX_BODY_SIZE
}
@@ -141,10 +169,16 @@ func (h *HTTPListener) httpListen() error {
}
func (h *HTTPListener) ServeHTTP(res http.ResponseWriter, req *http.Request) {
h.RequestsRecv.Incr(1)
defer h.RequestsServed.Incr(1)
switch req.URL.Path {
case "/write":
h.WritesRecv.Incr(1)
defer h.WritesServed.Incr(1)
h.serveWrite(res, req)
case "/query":
h.QueriesRecv.Incr(1)
defer h.QueriesServed.Incr(1)
// Deliver a dummy response to the query endpoint, as some InfluxDB
// clients test endpoint availability with a query
res.Header().Set("Content-Type", "application/json")
@@ -152,9 +186,12 @@ func (h *HTTPListener) ServeHTTP(res http.ResponseWriter, req *http.Request) {
res.WriteHeader(http.StatusOK)
res.Write([]byte("{\"results\":[]}"))
case "/ping":
h.PingsRecv.Incr(1)
defer h.PingsServed.Incr(1)
// respond to ping requests
res.WriteHeader(http.StatusNoContent)
default:
defer h.NotFoundsServed.Incr(1)
// Don't know how to respond to calls to other endpoints
http.NotFound(res, req)
}
@@ -195,6 +232,7 @@ func (h *HTTPListener) serveWrite(res http.ResponseWriter, req *http.Request) {
badRequest(res)
return
}
h.BytesRecv.Incr(int64(n))
if err == io.EOF {
if return400 {

View File

@@ -0,0 +1,83 @@
# Internal Input Plugin
The `internal` plugin collects metrics about the telegraf agent itself.
Note that some metrics are aggregates across all instances of one type of
plugin.
### Configuration:
```toml
# Collect statistics about itself
[[inputs.internal]]
## If true, collect telegraf memory stats.
# collect_memstats = true
```
### Measurements & Fields:
memstats are taken from the Go runtime: https://golang.org/pkg/runtime/#MemStats
- internal\_memstats
- alloc\_bytes
- frees
- heap\_alloc\_bytes
- heap\_idle\_bytes
- heap\_in\_use\_bytes
- heap\_objects\_bytes
- heap\_released\_bytes
- heap\_sys\_bytes
- mallocs
- num\_gc
- pointer\_lookups
- sys\_bytes
- total\_alloc\_bytes
agent stats collect aggregate stats on all telegraf plugins.
- internal\_agent
- gather\_errors
- metrics\_dropped
- metrics\_gathered
- metrics\_written
internal\_gather stats collect aggregate stats on all input plugins
that are of the same input type. They are tagged with `input=<plugin_name>`.
- internal\_gather
- gather\_time\_ns
- metrics\_gathered
internal\_write stats collect aggregate stats on all output plugins
that are of the same input type. They are tagged with `output=<plugin_name>`.
- internal\_write
- buffer\_limit
- buffer\_size
- metrics\_written
- metrics\_filtered
- write\_time\_ns
internal\_\<plugin\_name\> are metrics which are defined on a per-plugin basis, and
usually contain tags which differentiate each instance of a particular type of
plugin.
- internal\_\<plugin\_name\>
- individual plugin-specific fields, such as requests counts.
### Tags:
All measurements for specific plugins are tagged with information relevant
to each particular plugin.
### Example Output:
```
internal_memstats,host=tyrion alloc_bytes=4457408i,sys_bytes=10590456i,pointer_lookups=7i,mallocs=17642i,frees=7473i,heap_sys_bytes=6848512i,heap_idle_bytes=1368064i,heap_in_use_bytes=5480448i,heap_released_bytes=0i,total_alloc_bytes=6875560i,heap_alloc_bytes=4457408i,heap_objects_bytes=10169i,num_gc=2i 1480682800000000000
internal_agent,host=tyrion metrics_written=18i,metrics_dropped=0i,metrics_gathered=19i,gather_errors=0i 1480682800000000000
internal_write,output=file,host=tyrion buffer_limit=10000i,write_time_ns=636609i,metrics_written=18i,buffer_size=0i 1480682800000000000
internal_gather,input=internal,host=tyrion metrics_gathered=19i,gather_time_ns=442114i 1480682800000000000
internal_gather,input=http_listener,host=tyrion metrics_gathered=0i,gather_time_ns=167285i 1480682800000000000
internal_http_listener,address=:8186,host=tyrion queries_received=0i,writes_received=0i,requests_received=0i,buffers_created=0i,requests_served=0i,pings_received=0i,bytes_received=0i,not_founds_served=0i,pings_served=0i,queries_served=0i,writes_served=0i 1480682800000000000
```

View File

@@ -0,0 +1,66 @@
package internal
import (
"runtime"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/selfstat"
)
type Self struct {
CollectMemstats bool
}
func NewSelf() telegraf.Input {
return &Self{
CollectMemstats: true,
}
}
var sampleConfig = `
## If true, collect telegraf memory stats.
# collect_memstats = true
`
func (s *Self) Description() string {
return "Collect statistics about itself"
}
func (s *Self) SampleConfig() string {
return sampleConfig
}
func (s *Self) Gather(acc telegraf.Accumulator) error {
if s.CollectMemstats {
m := &runtime.MemStats{}
runtime.ReadMemStats(m)
fields := map[string]interface{}{
"alloc_bytes": m.Alloc, // bytes allocated and not yet freed
"total_alloc_bytes": m.TotalAlloc, // bytes allocated (even if freed)
"sys_bytes": m.Sys, // bytes obtained from system (sum of XxxSys below)
"pointer_lookups": m.Lookups, // number of pointer lookups
"mallocs": m.Mallocs, // number of mallocs
"frees": m.Frees, // number of frees
// Main allocation heap statistics.
"heap_alloc_bytes": m.HeapAlloc, // bytes allocated and not yet freed (same as Alloc above)
"heap_sys_bytes": m.HeapSys, // bytes obtained from system
"heap_idle_bytes": m.HeapIdle, // bytes in idle spans
"heap_in_use_bytes": m.HeapInuse, // bytes in non-idle span
"heap_released_bytes": m.HeapReleased, // bytes released to the OS
"heap_objects_bytes": m.HeapObjects, // total number of allocated objects
"num_gc": m.NumGC,
}
acc.AddFields("internal_memstats", fields, map[string]string{})
}
for _, m := range selfstat.Metrics() {
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
}
return nil
}
func init() {
inputs.Add("internal", NewSelf)
}

View File

@@ -0,0 +1,62 @@
package internal
import (
"testing"
"github.com/influxdata/telegraf/selfstat"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
func TestSelfPlugin(t *testing.T) {
s := NewSelf()
acc := &testutil.Accumulator{}
s.Gather(acc)
assert.True(t, acc.HasMeasurement("internal_memstats"))
// test that a registered stat is incremented
stat := selfstat.Register("mytest", "test", map[string]string{"test": "foo"})
stat.Incr(1)
stat.Incr(2)
s.Gather(acc)
acc.AssertContainsTaggedFields(t, "internal_mytest",
map[string]interface{}{
"test": int64(3),
},
map[string]string{
"test": "foo",
},
)
acc.ClearMetrics()
// test that a registered stat is set properly
stat.Set(101)
s.Gather(acc)
acc.AssertContainsTaggedFields(t, "internal_mytest",
map[string]interface{}{
"test": int64(101),
},
map[string]string{
"test": "foo",
},
)
acc.ClearMetrics()
// test that regular and timing stats can share the same measurement, and
// that timings are set properly.
timing := selfstat.RegisterTiming("mytest", "test_ns", map[string]string{"test": "foo"})
timing.Incr(100)
timing.Incr(200)
s.Gather(acc)
acc.AssertContainsTaggedFields(t, "internal_mytest",
map[string]interface{}{
"test": int64(101),
"test_ns": int64(150),
},
map[string]string{
"test": "foo",
},
)
}

View File

@@ -11,6 +11,7 @@ import (
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/selfstat"
)
type TcpListener struct {
@@ -41,6 +42,12 @@ type TcpListener struct {
parser parsers.Parser
acc telegraf.Accumulator
MaxConnections selfstat.Stat
CurrentConnections selfstat.Stat
TotalConnections selfstat.Stat
PacketsRecv selfstat.Stat
BytesRecv selfstat.Stat
}
var dropwarn = "E! Error: tcp_listener message queue full. " +
@@ -91,6 +98,16 @@ func (t *TcpListener) Start(acc telegraf.Accumulator) error {
t.Lock()
defer t.Unlock()
tags := map[string]string{
"address": t.ServiceAddress,
}
t.MaxConnections = selfstat.Register("tcp_listener", "max_connections", tags)
t.MaxConnections.Set(int64(t.MaxTCPConnections))
t.CurrentConnections = selfstat.Register("tcp_listener", "current_connections", tags)
t.TotalConnections = selfstat.Register("tcp_listener", "total_connections", tags)
t.PacketsRecv = selfstat.Register("tcp_listener", "packets_received", tags)
t.BytesRecv = selfstat.Register("tcp_listener", "bytes_received", tags)
t.acc = acc
t.in = make(chan []byte, t.AllowedPendingMessages)
t.done = make(chan struct{})
@@ -189,6 +206,8 @@ func (t *TcpListener) refuser(conn *net.TCPConn) {
// handler handles a single TCP Connection
func (t *TcpListener) handler(conn *net.TCPConn, id string) {
t.CurrentConnections.Incr(1)
t.TotalConnections.Incr(1)
// connection cleanup function
defer func() {
t.wg.Done()
@@ -196,6 +215,7 @@ func (t *TcpListener) handler(conn *net.TCPConn, id string) {
// Add one connection potential back to channel when this one closes
t.accept <- true
t.forget(id)
t.CurrentConnections.Incr(-1)
}()
var n int
@@ -212,6 +232,8 @@ func (t *TcpListener) handler(conn *net.TCPConn, id string) {
if n == 0 {
continue
}
t.BytesRecv.Incr(int64(n))
t.PacketsRecv.Incr(1)
bufCopy := make([]byte, n+1)
copy(bufCopy, scanner.Bytes())
bufCopy[n] = '\n'

View File

@@ -9,6 +9,7 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/selfstat"
)
// UdpListener main struct for the collector
@@ -48,6 +49,9 @@ type UdpListener struct {
acc telegraf.Accumulator
listener *net.UDPConn
PacketsRecv selfstat.Stat
BytesRecv selfstat.Stat
}
// UDP_MAX_PACKET_SIZE is packet limit, see
@@ -102,6 +106,12 @@ func (u *UdpListener) Start(acc telegraf.Accumulator) error {
u.Lock()
defer u.Unlock()
tags := map[string]string{
"address": u.ServiceAddress,
}
u.PacketsRecv = selfstat.Register("udp_listener", "packets_received", tags)
u.BytesRecv = selfstat.Register("udp_listener", "bytes_received", tags)
u.acc = acc
u.in = make(chan []byte, u.AllowedPendingMessages)
u.done = make(chan struct{})
@@ -162,6 +172,8 @@ func (u *UdpListener) udpListen() error {
}
continue
}
u.BytesRecv.Incr(int64(n))
u.PacketsRecv.Incr(1)
bufCopy := make([]byte, n)
copy(bufCopy, buf[:n])