Add TCP listener for statsd input (#2293)

This commit is contained in:
Slawomir Skowron 2017-08-08 20:41:26 +02:00 committed by Daniel Nelson
parent 6ebb93abcc
commit b9b5b74ede
5 changed files with 350 additions and 8 deletions

View File

@ -26,6 +26,7 @@
- [#2487](https://github.com/influxdata/telegraf/pull/2487): Add Kafka 0.9+ consumer support
- [#2773](https://github.com/influxdata/telegraf/pull/2773): Add support for self-signed certs to InfluxDB input plugin
- [#2293](https://github.com/influxdata/telegraf/pull/2293): Add TCP listener for statsd input
- [#2581](https://github.com/influxdata/telegraf/pull/2581): Add Docker container environment variables as tags. Only whitelisted
- [#2817](https://github.com/influxdata/telegraf/pull/2817): Add timeout option to IPMI sensor plugin
- [#2883](https://github.com/influxdata/telegraf/pull/2883): Add support for an optional SSL/TLS configuration to nginx input plugin

View File

@ -2452,6 +2452,10 @@
# # Statsd Server
# [[inputs.statsd]]
# ## Protocol, must be "tcp" or "udp"
# protocol = "udp"
# ## Maximum number of concurrent TCP connections to allow
# max_tcp_connections = 250
# ## Address and port to host UDP listener on
# service_address = ":8125"
#

View File

@ -5,6 +5,12 @@
```toml
# Statsd Server
[[inputs.statsd]]
## Protocol, must be "tcp" or "udp" (default=udp)
protocol = "udp"
## MaxTCPConnection - applicable when protocol is set to tcp (default=250)
max_tcp_connections = 250
## Address and port to host UDP listener on
service_address = ":8125"
@ -146,6 +152,9 @@ metric type:
### Plugin arguments
- **protocol** string: Protocol used in listener - tcp or udp options
- **max_tcp_connections** []int: Maximum number of concurrent TCP connections
to allow. Used when protocol is set to tcp.
- **service_address** string: Address to listen for statsd UDP packets on
- **delete_gauges** boolean: Delete gauges on every collection interval
- **delete_counters** boolean: Delete counters on every collection interval

View File

@ -1,6 +1,7 @@
package statsd
import (
"bufio"
"errors"
"fmt"
"log"
@ -14,7 +15,9 @@ import (
"github.com/influxdata/telegraf/plugins/parsers/graphite"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/selfstat"
)
const (
@ -24,15 +27,24 @@ const (
defaultFieldName = "value"
defaultProtocol = "udp"
defaultSeparator = "_"
defaultAllowPendingMessage = 10000
MaxTCPConnections = 250
)
var dropwarn = "E! Error: statsd message queue full. " +
"We have dropped %d messages so far. " +
"You may want to increase allowed_pending_messages in the config\n"
var malformedwarn = "E! Statsd over TCP has received %d malformed packets" +
" thus far."
type Statsd struct {
// Protocol used on listener - udp or tcp
Protocol string `toml:"protocol"`
// Address & Port to serve from
ServiceAddress string
@ -64,9 +76,17 @@ type Statsd struct {
UDPPacketSize int `toml:"udp_packet_size"`
sync.Mutex
// Lock for preventing a data race during resource cleanup
cleanup sync.Mutex
wg sync.WaitGroup
// accept channel tracks how many active connections there are, if there
// is an available bool in accept, then we are below the maximum and can
// accept the connection
accept chan bool
// drops tracks the number of dropped metrics.
drops int
// malformed tracks the number of malformed packets
malformed int
// Channel for all incoming statsd packets
in chan []byte
@ -83,9 +103,24 @@ type Statsd struct {
// bucket -> influx templates
Templates []string
listener *net.UDPConn
// Protocol listeners
UDPlistener *net.UDPConn
TCPlistener *net.TCPListener
// track current connections so we can close them in Stop()
conns map[string]*net.TCPConn
MaxTCPConnections int `toml:"max_tcp_connections"`
graphiteParser *graphite.GraphiteParser
acc telegraf.Accumulator
MaxConnections selfstat.Stat
CurrentConnections selfstat.Stat
TotalConnections selfstat.Stat
PacketsRecv selfstat.Stat
BytesRecv selfstat.Stat
}
// One statsd metric, form is <bucket>:<value>|<mtype>|@<samplerate>
@ -128,10 +163,16 @@ type cachedtimings struct {
}
func (_ *Statsd) Description() string {
return "Statsd Server"
return "Statsd UDP/TCP Server"
}
const sampleConfig = `
## Protocol, must be "tcp" or "udp" (default=udp)
protocol = "udp"
## MaxTCPConnection - applicable when protocol is set to tcp (default=250)
max_tcp_connections = 250
## Address and port to host UDP listener on
service_address = ":8125"
@ -247,6 +288,27 @@ func (s *Statsd) Start(_ telegraf.Accumulator) error {
s.sets = make(map[string]cachedset)
s.timings = make(map[string]cachedtimings)
s.Lock()
defer s.Unlock()
//
tags := map[string]string{
"address": s.ServiceAddress,
}
s.MaxConnections = selfstat.Register("statsd", "tcp_max_connections", tags)
s.MaxConnections.Set(int64(s.MaxTCPConnections))
s.CurrentConnections = selfstat.Register("statsd", "tcp_current_connections", tags)
s.TotalConnections = selfstat.Register("statsd", "tcp_total_connections", tags)
s.PacketsRecv = selfstat.Register("statsd", "tcp_packets_received", tags)
s.BytesRecv = selfstat.Register("statsd", "tcp_bytes_received", tags)
s.in = make(chan []byte, s.AllowedPendingMessages)
s.done = make(chan struct{})
s.accept = make(chan bool, s.MaxTCPConnections)
s.conns = make(map[string]*net.TCPConn)
for i := 0; i < s.MaxTCPConnections; i++ {
s.accept <- true
}
if s.ConvertNames {
log.Printf("I! WARNING statsd: convert_names config option is deprecated," +
" please use metric_separator instead")
@ -258,23 +320,67 @@ func (s *Statsd) Start(_ telegraf.Accumulator) error {
s.wg.Add(2)
// Start the UDP listener
switch s.Protocol {
case "udp":
go s.udpListen()
case "tcp":
go s.tcpListen()
}
// Start the line parser
go s.parser()
log.Printf("I! Started the statsd service on %s\n", s.ServiceAddress)
return nil
}
// tcpListen() starts listening for udp packets on the configured port.
func (s *Statsd) tcpListen() error {
defer s.wg.Done()
// Start listener
var err error
address, _ := net.ResolveTCPAddr("tcp", s.ServiceAddress)
s.TCPlistener, err = net.ListenTCP("tcp", address)
if err != nil {
log.Fatalf("ERROR: ListenTCP - %s", err)
return err
}
log.Println("I! TCP Statsd listening on: ", s.TCPlistener.Addr().String())
for {
select {
case <-s.done:
return nil
default:
// Accept connection:
conn, err := s.TCPlistener.AcceptTCP()
if err != nil {
return err
}
select {
case <-s.accept:
// not over connection limit, handle the connection properly.
s.wg.Add(1)
// generate a random id for this TCPConn
id := internal.RandomString(6)
s.remember(id, conn)
go s.handler(conn, id)
default:
// We are over the connection limit, refuse & close.
s.refuser(conn)
}
}
}
}
// udpListen starts listening for udp packets on the configured port.
func (s *Statsd) udpListen() error {
defer s.wg.Done()
var err error
address, _ := net.ResolveUDPAddr("udp", s.ServiceAddress)
s.listener, err = net.ListenUDP("udp", address)
s.UDPlistener, err = net.ListenUDP("udp", address)
if err != nil {
log.Fatalf("ERROR: ListenUDP - %s", err)
}
log.Println("I! Statsd listener listening on: ", s.listener.LocalAddr().String())
log.Println("I! Statsd UDP listener listening on: ", s.UDPlistener.LocalAddr().String())
buf := make([]byte, UDP_MAX_PACKET_SIZE)
for {
@ -282,7 +388,7 @@ func (s *Statsd) udpListen() error {
case <-s.done:
return nil
default:
n, _, err := s.listener.ReadFromUDP(buf)
n, _, err := s.UDPlistener.ReadFromUDP(buf)
if err != nil && !strings.Contains(err.Error(), "closed network") {
log.Printf("E! Error READ: %s\n", err.Error())
continue
@ -637,20 +743,111 @@ func (s *Statsd) aggregate(m metric) {
}
}
// handler handles a single TCP Connection
func (s *Statsd) handler(conn *net.TCPConn, id string) {
s.CurrentConnections.Incr(1)
s.TotalConnections.Incr(1)
// connection cleanup function
defer func() {
s.wg.Done()
conn.Close()
// Add one connection potential back to channel when this one closes
s.accept <- true
s.forget(id)
s.CurrentConnections.Incr(-1)
}()
var n int
scanner := bufio.NewScanner(conn)
for {
select {
case <-s.done:
return
default:
if !scanner.Scan() {
return
}
n = len(scanner.Bytes())
if n == 0 {
continue
}
s.BytesRecv.Incr(int64(n))
s.PacketsRecv.Incr(1)
bufCopy := make([]byte, n+1)
copy(bufCopy, scanner.Bytes())
bufCopy[n] = '\n'
select {
case s.in <- bufCopy:
default:
s.drops++
if s.drops == 1 || s.drops%s.AllowedPendingMessages == 0 {
log.Printf(dropwarn, s.drops)
}
}
}
}
}
// refuser refuses a TCP connection
func (s *Statsd) refuser(conn *net.TCPConn) {
conn.Close()
log.Printf("I! Refused TCP Connection from %s", conn.RemoteAddr())
log.Printf("I! WARNING: Maximum TCP Connections reached, you may want to" +
" adjust max_tcp_connections")
}
// forget a TCP connection
func (s *Statsd) forget(id string) {
s.cleanup.Lock()
defer s.cleanup.Unlock()
delete(s.conns, id)
}
// remember a TCP connection
func (s *Statsd) remember(id string, conn *net.TCPConn) {
s.cleanup.Lock()
defer s.cleanup.Unlock()
s.conns[id] = conn
}
func (s *Statsd) Stop() {
s.Lock()
defer s.Unlock()
log.Println("I! Stopping the statsd service")
close(s.done)
s.listener.Close()
switch s.Protocol {
case "udp":
s.UDPlistener.Close()
case "tcp":
s.TCPlistener.Close()
// Close all open TCP connections
// - get all conns from the s.conns map and put into slice
// - this is so the forget() function doesnt conflict with looping
// over the s.conns map
var conns []*net.TCPConn
s.cleanup.Lock()
for _, conn := range s.conns {
conns = append(conns, conn)
}
s.cleanup.Unlock()
for _, conn := range conns {
conn.Close()
}
default:
s.UDPlistener.Close()
}
s.wg.Wait()
close(s.in)
log.Println("I! Stopped Statsd listener service on ", s.ServiceAddress)
}
func init() {
inputs.Add("statsd", func() telegraf.Input {
return &Statsd{
Protocol: defaultProtocol,
ServiceAddress: ":8125",
MaxTCPConnections: 250,
MetricSeparator: "_",
AllowedPendingMessages: defaultAllowPendingMessage,
DeleteCounters: true,

View File

@ -3,11 +3,32 @@ package statsd
import (
"errors"
"fmt"
"net"
"testing"
"time"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
testMsg = "test.tcp.msg:100|c"
)
func newTestTcpListener() (*Statsd, chan []byte) {
in := make(chan []byte, 1500)
listener := &Statsd{
Protocol: "tcp",
ServiceAddress: ":8125",
AllowedPendingMessages: 10000,
MaxTCPConnections: 250,
in: in,
done: make(chan struct{}),
}
return listener, in
}
func NewTestStatsd() *Statsd {
s := Statsd{}
@ -24,6 +45,116 @@ func NewTestStatsd() *Statsd {
return &s
}
// Test that MaxTCPConections is respected
func TestConcurrentConns(t *testing.T) {
listener := Statsd{
Protocol: "tcp",
ServiceAddress: ":8125",
AllowedPendingMessages: 10000,
MaxTCPConnections: 2,
}
acc := &testutil.Accumulator{}
require.NoError(t, listener.Start(acc))
defer listener.Stop()
time.Sleep(time.Millisecond * 25)
_, err := net.Dial("tcp", "127.0.0.1:8125")
assert.NoError(t, err)
_, err = net.Dial("tcp", "127.0.0.1:8125")
assert.NoError(t, err)
// Connection over the limit:
conn, err := net.Dial("tcp", "127.0.0.1:8125")
assert.NoError(t, err)
net.Dial("tcp", "127.0.0.1:8125")
assert.NoError(t, err)
_, err = conn.Write([]byte(testMsg))
assert.NoError(t, err)
time.Sleep(time.Millisecond * 10)
assert.Zero(t, acc.NFields())
}
// Test that MaxTCPConections is respected when max==1
func TestConcurrentConns1(t *testing.T) {
listener := Statsd{
Protocol: "tcp",
ServiceAddress: ":8125",
AllowedPendingMessages: 10000,
MaxTCPConnections: 1,
}
acc := &testutil.Accumulator{}
require.NoError(t, listener.Start(acc))
defer listener.Stop()
time.Sleep(time.Millisecond * 25)
_, err := net.Dial("tcp", "127.0.0.1:8125")
assert.NoError(t, err)
// Connection over the limit:
conn, err := net.Dial("tcp", "127.0.0.1:8125")
assert.NoError(t, err)
net.Dial("tcp", "127.0.0.1:8125")
assert.NoError(t, err)
_, err = conn.Write([]byte(testMsg))
assert.NoError(t, err)
time.Sleep(time.Millisecond * 10)
assert.Zero(t, acc.NFields())
}
// Test that MaxTCPConections is respected
func TestCloseConcurrentConns(t *testing.T) {
listener := Statsd{
Protocol: "tcp",
ServiceAddress: ":8125",
AllowedPendingMessages: 10000,
MaxTCPConnections: 2,
}
acc := &testutil.Accumulator{}
require.NoError(t, listener.Start(acc))
time.Sleep(time.Millisecond * 25)
_, err := net.Dial("tcp", "127.0.0.1:8125")
assert.NoError(t, err)
_, err = net.Dial("tcp", "127.0.0.1:8125")
assert.NoError(t, err)
listener.Stop()
}
// benchmark how long it takes to accept & process 100,000 metrics:
func BenchmarkTCP(b *testing.B) {
listener := Statsd{
Protocol: "tcp",
ServiceAddress: ":8125",
AllowedPendingMessages: 250000,
MaxTCPConnections: 250,
}
acc := &testutil.Accumulator{Discard: true}
// send multiple messages to socket
for n := 0; n < b.N; n++ {
err := listener.Start(acc)
if err != nil {
panic(err)
}
time.Sleep(time.Millisecond * 25)
conn, err := net.Dial("tcp", "127.0.0.1:8125")
if err != nil {
panic(err)
}
for i := 0; i < 250000; i++ {
fmt.Fprintf(conn, testMsg)
}
// wait for 250,000 metrics to get added to accumulator
time.Sleep(time.Millisecond)
listener.Stop()
}
}
// Valid lines should be parsed and their values should be cached
func TestParse_ValidLines(t *testing.T) {
s := NewTestStatsd()