Fix kafka plugin and rename to kafka_consumer

fixes #371
This commit is contained in:
Cameron Sparr 2015-11-16 13:12:45 -07:00
parent a3feddd8ed
commit 970bfce997
11 changed files with 462 additions and 430 deletions

View File

@ -1,12 +1,20 @@
## v0.2.3 [unreleased] ## v0.2.3 [unreleased]
### Release Notes ### Release Notes
- **breaking change** The `kafka` plugin has been renamed to `kafka_consumer`.
and most of the config option names have changed.
This only affects the kafka consumer _plugin_ (not the
output). There were a number of problems with the kafka plugin that led to it
only collecting data once at startup, so the kafka plugin was basically non-
functional.
- Riemann output added - Riemann output added
### Features ### Features
- [#379](https://github.com/influxdb/telegraf/pull/379): Riemann output, thanks @allenj! - [#379](https://github.com/influxdb/telegraf/pull/379): Riemann output, thanks @allenj!
- [#375](https://github.com/influxdb/telegraf/pull/375): kafka_consumer service plugin.
### Bugfixes ### Bugfixes
- [#371](https://github.com/influxdb/telegraf/issues/371): Kafka consumer plugin not functioning.
## v0.2.2 [2015-11-18] ## v0.2.2 [2015-11-18]

View File

@ -164,7 +164,6 @@ Telegraf currently has support for collecting metrics from:
* haproxy * haproxy
* httpjson (generic JSON-emitting http service plugin) * httpjson (generic JSON-emitting http service plugin)
* jolokia (remote JMX with JSON over HTTP) * jolokia (remote JMX with JSON over HTTP)
* kafka_consumer
* leofs * leofs
* lustre2 * lustre2
* memcached * memcached
@ -197,6 +196,7 @@ Telegraf currently has support for collecting metrics from:
Telegraf can collect metrics via the following services: Telegraf can collect metrics via the following services:
* statsd * statsd
* kafka_consumer
We'll be adding support for many more over the coming months. Read on if you We'll be adding support for many more over the coming months. Read on if you
want to add support for another service or third-party API. want to add support for another service or third-party API.

View File

@ -8,7 +8,7 @@ import (
"github.com/influxdb/telegraf/plugins" "github.com/influxdb/telegraf/plugins"
"github.com/influxdb/telegraf/plugins/exec" "github.com/influxdb/telegraf/plugins/exec"
"github.com/influxdb/telegraf/plugins/kafka_consumer" "github.com/influxdb/telegraf/plugins/memcached"
"github.com/influxdb/telegraf/plugins/procstat" "github.com/influxdb/telegraf/plugins/procstat"
"github.com/naoina/toml" "github.com/naoina/toml"
"github.com/naoina/toml/ast" "github.com/naoina/toml/ast"
@ -205,17 +205,14 @@ func TestConfig_parsePlugin(t *testing.T) {
pluginConfigurationFieldsSet: make(map[string][]string), pluginConfigurationFieldsSet: make(map[string][]string),
} }
subtbl := tbl.Fields["kafka"].(*ast.Table) subtbl := tbl.Fields["memcached"].(*ast.Table)
err = c.parsePlugin("kafka", subtbl) err = c.parsePlugin("memcached", subtbl)
kafka := plugins.Plugins["kafka"]().(*kafka_consumer.Kafka) memcached := plugins.Plugins["memcached"]().(*memcached.Memcached)
kafka.ConsumerGroupName = "telegraf_metrics_consumers" memcached.Servers = []string{"localhost"}
kafka.Topic = "topic_with_metrics"
kafka.ZookeeperPeers = []string{"test.example.com:2181"}
kafka.BatchSize = 1000
kConfig := &ConfiguredPlugin{ mConfig := &ConfiguredPlugin{
Name: "kafka", Name: "memcached",
Drop: []string{"other", "stuff"}, Drop: []string{"other", "stuff"},
Pass: []string{"some", "strings"}, Pass: []string{"some", "strings"},
TagDrop: []TagFilter{ TagDrop: []TagFilter{
@ -233,10 +230,10 @@ func TestConfig_parsePlugin(t *testing.T) {
Interval: 5 * time.Second, Interval: 5 * time.Second,
} }
assert.Equal(t, kafka, c.plugins["kafka"], assert.Equal(t, memcached, c.plugins["memcached"],
"Testdata did not produce a correct kafka struct.") "Testdata did not produce a correct memcached struct.")
assert.Equal(t, kConfig, c.pluginConfigurations["kafka"], assert.Equal(t, mConfig, c.pluginConfigurations["memcached"],
"Testdata did not produce correct kafka metadata.") "Testdata did not produce correct memcached metadata.")
} }
func TestConfig_LoadDirectory(t *testing.T) { func TestConfig_LoadDirectory(t *testing.T) {
@ -249,14 +246,11 @@ func TestConfig_LoadDirectory(t *testing.T) {
t.Error(err) t.Error(err)
} }
kafka := plugins.Plugins["kafka"]().(*kafka_consumer.Kafka) memcached := plugins.Plugins["memcached"]().(*memcached.Memcached)
kafka.ConsumerGroupName = "telegraf_metrics_consumers" memcached.Servers = []string{"192.168.1.1"}
kafka.Topic = "topic_with_metrics"
kafka.ZookeeperPeers = []string{"test.example.com:2181"}
kafka.BatchSize = 10000
kConfig := &ConfiguredPlugin{ mConfig := &ConfiguredPlugin{
Name: "kafka", Name: "memcached",
Drop: []string{"other", "stuff"}, Drop: []string{"other", "stuff"},
Pass: []string{"some", "strings"}, Pass: []string{"some", "strings"},
TagDrop: []TagFilter{ TagDrop: []TagFilter{
@ -296,10 +290,10 @@ func TestConfig_LoadDirectory(t *testing.T) {
pConfig := &ConfiguredPlugin{Name: "procstat"} pConfig := &ConfiguredPlugin{Name: "procstat"}
assert.Equal(t, kafka, c.plugins["kafka"], assert.Equal(t, memcached, c.plugins["memcached"],
"Merged Testdata did not produce a correct kafka struct.") "Merged Testdata did not produce a correct memcached struct.")
assert.Equal(t, kConfig, c.pluginConfigurations["kafka"], assert.Equal(t, mConfig, c.pluginConfigurations["memcached"],
"Merged Testdata did not produce correct kafka metadata.") "Merged Testdata did not produce correct memcached metadata.")
assert.Equal(t, ex, c.plugins["exec"], assert.Equal(t, ex, c.plugins["exec"],
"Merged Testdata did not produce a correct exec struct.") "Merged Testdata did not produce a correct exec struct.")

View File

@ -29,8 +29,9 @@ type InfluxDB struct {
} }
var sampleConfig = ` var sampleConfig = `
# The full HTTP or UDP endpoint URL for your InfluxDB instance # The full HTTP or UDP endpoint URL for your InfluxDB instance.
# Multiple urls can be specified for InfluxDB cluster support. # Multiple urls can be specified but it is assumed that they are part of the same
# cluster, this means that only ONE of the urls will be written to each interval.
# urls = ["udp://localhost:8089"] # UDP endpoint example # urls = ["udp://localhost:8089"] # UDP endpoint example
urls = ["http://localhost:8086"] # required urls = ["http://localhost:8086"] # required
# The target database for metrics (telegraf will create it if not exists) # The target database for metrics (telegraf will create it if not exists)

View File

@ -1,36 +1,51 @@
package kafka_consumer package kafka_consumer
import ( import (
"os" "log"
"os/signal" "strings"
"time" "sync"
"github.com/Shopify/sarama"
"github.com/influxdb/influxdb/models" "github.com/influxdb/influxdb/models"
"github.com/influxdb/telegraf/plugins" "github.com/influxdb/telegraf/plugins"
"github.com/Shopify/sarama"
"github.com/wvanbergen/kafka/consumergroup" "github.com/wvanbergen/kafka/consumergroup"
) )
type Kafka struct { type Kafka struct {
ConsumerGroupName string ConsumerGroup string
Topic string Topics []string
ZookeeperPeers []string ZookeeperPeers []string
Consumer *consumergroup.ConsumerGroup Consumer *consumergroup.ConsumerGroup
BatchSize int PointBuffer int
Offset string
sync.Mutex
// channel for all incoming kafka messages
in <-chan *sarama.ConsumerMessage
// channel for all kafka consumer errors
errs <-chan *sarama.ConsumerError
// channel for all incoming parsed kafka points
pointChan chan models.Point
done chan struct{}
// doNotCommitMsgs tells the parser not to call CommitUpTo on the consumer
// this is mostly for test purposes, but there may be a use-case for it later.
doNotCommitMsgs bool
} }
var sampleConfig = ` var sampleConfig = `
# topic to consume # topic(s) to consume
topic = "topic_with_metrics" topics = ["telegraf"]
# the name of the consumer group
consumerGroupName = "telegraf_metrics_consumers"
# an array of Zookeeper connection strings # an array of Zookeeper connection strings
zookeeperPeers = ["localhost:2181"] zookeeper_peers = ["localhost:2181"]
# the name of the consumer group
# Batch size of points sent to InfluxDB consumer_group = "telegraf_metrics_consumers"
batchSize = 1000 # Maximum number of points to buffer between collection intervals
point_buffer = 100000
# Offset (must be either "oldest" or "newest")
offset = "oldest"
` `
func (k *Kafka) SampleConfig() string { func (k *Kafka) SampleConfig() string {
@ -38,127 +53,114 @@ func (k *Kafka) SampleConfig() string {
} }
func (k *Kafka) Description() string { func (k *Kafka) Description() string {
return "read metrics from a Kafka topic" return "Read line-protocol metrics from Kafka topic(s)"
} }
type Metric struct { func (k *Kafka) Start() error {
Measurement string `json:"measurement"` k.Lock()
Values map[string]interface{} `json:"values"` defer k.Unlock()
Tags map[string]string `json:"tags"`
Time time.Time `json:"time"`
}
func (k *Kafka) Gather(acc plugins.Accumulator) error {
var consumerErr error var consumerErr error
metricQueue := make(chan []byte, 200)
if k.Consumer == nil { config := consumergroup.NewConfig()
switch strings.ToLower(k.Offset) {
case "oldest", "":
config.Offsets.Initial = sarama.OffsetOldest
case "newest":
config.Offsets.Initial = sarama.OffsetNewest
default:
log.Printf("WARNING: Kafka consumer invalid offset '%s', using 'oldest'\n",
k.Offset)
config.Offsets.Initial = sarama.OffsetOldest
}
if k.Consumer == nil || k.Consumer.Closed() {
k.Consumer, consumerErr = consumergroup.JoinConsumerGroup( k.Consumer, consumerErr = consumergroup.JoinConsumerGroup(
k.ConsumerGroupName, k.ConsumerGroup,
[]string{k.Topic}, k.Topics,
k.ZookeeperPeers, k.ZookeeperPeers,
nil, config,
) )
if consumerErr != nil { if consumerErr != nil {
return consumerErr return consumerErr
} }
c := make(chan os.Signal, 1) // Setup message and error channels
halt := make(chan bool, 1) k.in = k.Consumer.Messages()
signal.Notify(c, os.Interrupt) k.errs = k.Consumer.Errors()
go func() {
<-c
halt <- true
emitMetrics(k, acc, metricQueue)
k.Consumer.Close()
}()
go readFromKafka(k.Consumer.Messages(),
metricQueue,
k.BatchSize,
k.Consumer.CommitUpto,
halt)
} }
return emitMetrics(k, acc, metricQueue) k.done = make(chan struct{})
if k.PointBuffer == 0 {
k.PointBuffer = 100000
}
k.pointChan = make(chan models.Point, k.PointBuffer)
// Start the kafka message reader
go k.parser()
log.Printf("Started the kafka consumer service, peers: %v, topics: %v\n",
k.ZookeeperPeers, k.Topics)
return nil
} }
func emitMetrics(k *Kafka, acc plugins.Accumulator, metricConsumer <-chan []byte) error { // parser() reads all incoming messages from the consumer, and parses them into
timeout := time.After(1 * time.Second) // influxdb metric points.
func (k *Kafka) parser() {
for { for {
select { select {
case batch := <-metricConsumer: case <-k.done:
var points []models.Point return
var err error case err := <-k.errs:
if points, err = models.ParsePoints(batch); err != nil { log.Printf("Kafka Consumer Error: %s\n", err.Error())
return err case msg := <-k.in:
points, err := models.ParsePoints(msg.Value)
if err != nil {
log.Printf("Could not parse kafka message: %s, error: %s",
string(msg.Value), err.Error())
} }
for _, point := range points { for _, point := range points {
acc.AddFields(point.Name(), point.Fields(), point.Tags(), point.Time()) select {
case k.pointChan <- point:
continue
default:
log.Printf("Kafka Consumer buffer is full, dropping a point." +
" You may want to increase the point_buffer setting")
}
}
if !k.doNotCommitMsgs {
// TODO(cam) this locking can be removed if this PR gets merged:
// https://github.com/wvanbergen/kafka/pull/84
k.Lock()
k.Consumer.CommitUpto(msg)
k.Unlock()
} }
case <-timeout:
return nil
} }
} }
} }
const millisecond = 1000000 * time.Nanosecond func (k *Kafka) Stop() {
k.Lock()
type ack func(*sarama.ConsumerMessage) error defer k.Unlock()
close(k.done)
func readFromKafka( if err := k.Consumer.Close(); err != nil {
kafkaMsgs <-chan *sarama.ConsumerMessage, log.Printf("Error closing kafka consumer: %s\n", err.Error())
metricProducer chan<- []byte,
maxBatchSize int,
ackMsg ack,
halt <-chan bool,
) {
batch := make([]byte, 0)
currentBatchSize := 0
timeout := time.After(500 * millisecond)
var msg *sarama.ConsumerMessage
for {
select {
case msg = <-kafkaMsgs:
if currentBatchSize != 0 {
batch = append(batch, '\n')
}
batch = append(batch, msg.Value...)
currentBatchSize++
if currentBatchSize == maxBatchSize {
metricProducer <- batch
currentBatchSize = 0
batch = make([]byte, 0)
ackMsg(msg)
}
case <-timeout:
if currentBatchSize != 0 {
metricProducer <- batch
currentBatchSize = 0
batch = make([]byte, 0)
ackMsg(msg)
}
timeout = time.After(500 * millisecond)
case <-halt:
if currentBatchSize != 0 {
metricProducer <- batch
ackMsg(msg)
}
return
}
} }
} }
func (k *Kafka) Gather(acc plugins.Accumulator) error {
k.Lock()
defer k.Unlock()
npoints := len(k.pointChan)
for i := 0; i < npoints; i++ {
point := <-k.pointChan
acc.AddFields(point.Name(), point.Fields(), point.Tags(), point.Time())
}
return nil
}
func init() { func init() {
plugins.Add("kafka", func() plugins.Plugin { plugins.Add("kafka_consumer", func() plugins.Plugin {
return &Kafka{} return &Kafka{}
}) })
} }

View File

@ -15,43 +15,77 @@ func TestReadsMetricsFromKafka(t *testing.T) {
if testing.Short() { if testing.Short() {
t.Skip("Skipping integration test in short mode") t.Skip("Skipping integration test in short mode")
} }
var zkPeers, brokerPeers []string
zkPeers = []string{testutil.GetLocalHost() + ":2181"} brokerPeers := []string{testutil.GetLocalHost() + ":9092"}
brokerPeers = []string{testutil.GetLocalHost() + ":9092"} zkPeers := []string{testutil.GetLocalHost() + ":2181"}
testTopic := fmt.Sprintf("telegraf_test_topic_%d", time.Now().Unix())
k := &Kafka{
ConsumerGroupName: "telegraf_test_consumers",
Topic: fmt.Sprintf("telegraf_test_topic_%d", time.Now().Unix()),
ZookeeperPeers: zkPeers,
}
// Send a Kafka message to the kafka host
msg := "cpu_load_short,direction=in,host=server01,region=us-west value=23422.0 1422568543702900257" msg := "cpu_load_short,direction=in,host=server01,region=us-west value=23422.0 1422568543702900257"
producer, err := sarama.NewSyncProducer(brokerPeers, nil) producer, err := sarama.NewSyncProducer(brokerPeers, nil)
require.NoError(t, err) require.NoError(t, err)
_, _, err = producer.SendMessage(
_, _, err = producer.SendMessage(&sarama.ProducerMessage{Topic: k.Topic, Value: sarama.StringEncoder(msg)}) &sarama.ProducerMessage{
Topic: testTopic,
Value: sarama.StringEncoder(msg),
})
require.NoError(t, err) require.NoError(t, err)
defer producer.Close()
producer.Close() // Start the Kafka Consumer
k := &Kafka{
ConsumerGroup: "telegraf_test_consumers",
Topics: []string{testTopic},
ZookeeperPeers: zkPeers,
PointBuffer: 100000,
Offset: "oldest",
}
if err := k.Start(); err != nil {
t.Fatal(err.Error())
} else {
defer k.Stop()
}
waitForPoint(k, t)
// Verify that we can now gather the sent message
var acc testutil.Accumulator var acc testutil.Accumulator
// Sanity check // Sanity check
assert.Equal(t, 0, len(acc.Points), "there should not be any points") assert.Equal(t, 0, len(acc.Points), "There should not be any points")
// Gather points
err = k.Gather(&acc) err = k.Gather(&acc)
require.NoError(t, err) require.NoError(t, err)
if len(acc.Points) == 1 {
assert.Equal(t, 1, len(acc.Points), "there should be a single point") point := acc.Points[0]
assert.Equal(t, "cpu_load_short", point.Measurement)
point := acc.Points[0] assert.Equal(t, map[string]interface{}{"value": 23422.0}, point.Fields)
assert.Equal(t, "cpu_load_short", point.Measurement) assert.Equal(t, map[string]string{
assert.Equal(t, map[string]interface{}{"value": 23422.0}, point.Fields) "host": "server01",
assert.Equal(t, map[string]string{ "direction": "in",
"host": "server01", "region": "us-west",
"direction": "in", }, point.Tags)
"region": "us-west", assert.Equal(t, time.Unix(0, 1422568543702900257).Unix(), point.Time.Unix())
}, point.Tags) } else {
assert.Equal(t, time.Unix(0, 1422568543702900257).Unix(), point.Time.Unix()) t.Errorf("No points found in accumulator, expected 1")
}
}
// Waits for the metric that was sent to the kafka broker to arrive at the kafka
// consumer
func waitForPoint(k *Kafka, t *testing.T) {
// Give the kafka container up to 2 seconds to get the point to the consumer
ticker := time.NewTicker(5 * time.Millisecond)
counter := 0
for {
select {
case <-ticker.C:
counter++
if counter > 1000 {
t.Fatal("Waited for 5s, point never arrived to consumer")
} else if len(k.pointChan) == 1 {
return
}
}
}
} }

View File

@ -1,92 +1,91 @@
package kafka_consumer package kafka_consumer
import ( import (
"strings"
"testing" "testing"
"time" "time"
"github.com/Shopify/sarama" "github.com/influxdb/influxdb/models"
"github.com/influxdb/telegraf/testutil" "github.com/influxdb/telegraf/testutil"
"github.com/Shopify/sarama"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
const testMsg = "cpu_load_short,direction=in,host=server01,region=us-west value=23422.0 1422568543702900257" const (
testMsg = "cpu_load_short,host=server01 value=23422.0 1422568543702900257"
invalidMsg = "cpu_load_short,host=server01 1422568543702900257"
pointBuffer = 5
)
func TestReadFromKafkaBatchesMsgsOnBatchSize(t *testing.T) { func NewTestKafka() (*Kafka, chan *sarama.ConsumerMessage) {
halt := make(chan bool, 1) in := make(chan *sarama.ConsumerMessage, pointBuffer)
metricChan := make(chan []byte, 1) k := Kafka{
kafkaChan := make(chan *sarama.ConsumerMessage, 10) ConsumerGroup: "test",
for i := 0; i < 10; i++ { Topics: []string{"telegraf"},
kafkaChan <- saramaMsg(testMsg) ZookeeperPeers: []string{"localhost:2181"},
PointBuffer: pointBuffer,
Offset: "oldest",
in: in,
doNotCommitMsgs: true,
errs: make(chan *sarama.ConsumerError, pointBuffer),
done: make(chan struct{}),
pointChan: make(chan models.Point, pointBuffer),
} }
return &k, in
expectedBatch := strings.Repeat(testMsg+"\n", 9) + testMsg
readFromKafka(kafkaChan, metricChan, 10, func(msg *sarama.ConsumerMessage) error {
batch := <-metricChan
assert.Equal(t, expectedBatch, string(batch))
halt <- true
return nil
}, halt)
} }
func TestReadFromKafkaBatchesMsgsOnTimeout(t *testing.T) { // Test that the parser parses kafka messages into points
halt := make(chan bool, 1) func TestRunParser(t *testing.T) {
metricChan := make(chan []byte, 1) k, in := NewTestKafka()
kafkaChan := make(chan *sarama.ConsumerMessage, 10) defer close(k.done)
for i := 0; i < 3; i++ {
kafkaChan <- saramaMsg(testMsg)
}
expectedBatch := strings.Repeat(testMsg+"\n", 2) + testMsg go k.parser()
readFromKafka(kafkaChan, metricChan, 10, func(msg *sarama.ConsumerMessage) error { in <- saramaMsg(testMsg)
batch := <-metricChan time.Sleep(time.Millisecond)
assert.Equal(t, expectedBatch, string(batch))
halt <- true assert.Equal(t, len(k.pointChan), 1)
return nil
}, halt)
} }
func TestEmitMetricsSendMetricsToAcc(t *testing.T) { // Test that the parser ignores invalid messages
k := &Kafka{} func TestRunParserInvalidMsg(t *testing.T) {
var acc testutil.Accumulator k, in := NewTestKafka()
testChan := make(chan []byte, 1) defer close(k.done)
testChan <- []byte(testMsg)
err := emitMetrics(k, &acc, testChan) go k.parser()
require.NoError(t, err) in <- saramaMsg(invalidMsg)
time.Sleep(time.Millisecond)
assert.Equal(t, 1, len(acc.Points), "there should be a single point") assert.Equal(t, len(k.pointChan), 0)
point := acc.Points[0]
assert.Equal(t, "cpu_load_short", point.Measurement)
assert.Equal(t, map[string]interface{}{"value": 23422.0}, point.Fields)
assert.Equal(t, map[string]string{
"host": "server01",
"direction": "in",
"region": "us-west",
}, point.Tags)
if time.Unix(0, 1422568543702900257).Unix() != point.Time.Unix() {
t.Errorf("Expected: %v, received %v\n",
time.Unix(0, 1422568543702900257).Unix(),
point.Time.Unix())
}
} }
func TestEmitMetricsTimesOut(t *testing.T) { // Test that points are dropped when we hit the buffer limit
k := &Kafka{} func TestRunParserRespectsBuffer(t *testing.T) {
var acc testutil.Accumulator k, in := NewTestKafka()
testChan := make(chan []byte) defer close(k.done)
err := emitMetrics(k, &acc, testChan) go k.parser()
require.NoError(t, err) for i := 0; i < pointBuffer+1; i++ {
in <- saramaMsg(testMsg)
}
time.Sleep(time.Millisecond)
assert.Equal(t, 0, len(acc.Points), "there should not be a any points") assert.Equal(t, len(k.pointChan), 5)
}
// Test that the parser parses kafka messages into points
func TestRunParserAndGather(t *testing.T) {
k, in := NewTestKafka()
defer close(k.done)
go k.parser()
in <- saramaMsg(testMsg)
time.Sleep(time.Millisecond)
acc := testutil.Accumulator{}
k.Gather(&acc)
assert.Equal(t, len(acc.Points), 1)
assert.True(t, acc.CheckValue("cpu_load_short", 23422.0))
} }
func saramaMsg(val string) *sarama.ConsumerMessage { func saramaMsg(val string) *sarama.ConsumerMessage {

View File

@ -183,8 +183,6 @@ func (s *Statsd) Gather(acc plugins.Accumulator) error {
} }
func (s *Statsd) Start() error { func (s *Statsd) Start() error {
log.Println("Starting up the statsd service")
// Make data structures // Make data structures
s.done = make(chan struct{}) s.done = make(chan struct{})
s.in = make(chan string, s.AllowedPendingMessages) s.in = make(chan string, s.AllowedPendingMessages)
@ -197,6 +195,7 @@ func (s *Statsd) Start() error {
go s.udpListen() go s.udpListen()
// Start the line parser // Start the line parser
go s.parser() go s.parser()
log.Printf("Started the statsd service on %s\n", s.ServiceAddress)
return nil return nil
} }

View File

@ -1,12 +1,9 @@
[kafka] [memcached]
topic = "topic_with_metrics" servers = ["localhost"]
consumerGroupName = "telegraf_metrics_consumers"
zookeeperPeers = ["test.example.com:2181"]
batchSize = 1000
pass = ["some", "strings"] pass = ["some", "strings"]
drop = ["other", "stuff"] drop = ["other", "stuff"]
interval = "5s" interval = "5s"
[kafka.tagpass] [memcached.tagpass]
goodtag = ["mytag"] goodtag = ["mytag"]
[kafka.tagdrop] [memcached.tagdrop]
badtag = ["othertag"] badtag = ["othertag"]

View File

@ -1,10 +1,9 @@
[kafka] [memcached]
zookeeperPeers = ["test.example.com:2181"] servers = ["192.168.1.1"]
batchSize = 10000
pass = ["some", "strings"] pass = ["some", "strings"]
drop = ["other", "stuff"] drop = ["other", "stuff"]
interval = "5s" interval = "5s"
[kafka.tagpass] [memcached.tagpass]
goodtag = ["mytag"] goodtag = ["mytag"]
[kafka.tagdrop] [memcached.tagdrop]
badtag = ["othertag"] badtag = ["othertag"]

View File

@ -21,25 +21,25 @@
# Tags can also be specified via a normal map, but only one form at a time: # Tags can also be specified via a normal map, but only one form at a time:
[tags] [tags]
# dc = "us-east-1" # dc = "us-east-1"
# Configuration for telegraf agent # Configuration for telegraf agent
[agent] [agent]
# Default data collection interval for all plugins # Default data collection interval for all plugins
interval = "10s" interval = "10s"
# If utc = false, uses local time (utc is highly recommended) # If utc = false, uses local time (utc is highly recommended)
utc = true utc = true
# Precision of writes, valid values are n, u, ms, s, m, and h # Precision of writes, valid values are n, u, ms, s, m, and h
# note: using second precision greatly helps InfluxDB compression # note: using second precision greatly helps InfluxDB compression
precision = "s" precision = "s"
# run telegraf in debug mode # run telegraf in debug mode
debug = false debug = false
# Override default hostname, if empty use os.Hostname() # Override default hostname, if empty use os.Hostname()
hostname = "" hostname = ""
############################################################################### ###############################################################################
@ -50,24 +50,24 @@
# Configuration for influxdb server to send metrics to # Configuration for influxdb server to send metrics to
[[outputs.influxdb]] [[outputs.influxdb]]
# The full HTTP endpoint URL for your InfluxDB instance # The full HTTP endpoint URL for your InfluxDB instance
# Multiple urls can be specified for InfluxDB cluster support. Server to # Multiple urls can be specified for InfluxDB cluster support. Server to
# write to will be randomly chosen each interval. # write to will be randomly chosen each interval.
urls = ["http://localhost:8086"] # required. urls = ["http://localhost:8086"] # required.
# The target database for metrics. This database must already exist # The target database for metrics. This database must already exist
database = "telegraf" # required. database = "telegraf" # required.
# Connection timeout (for the connection with InfluxDB), formatted as a string. # Connection timeout (for the connection with InfluxDB), formatted as a string.
# Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". # Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".
# If not provided, will default to 0 (no timeout) # If not provided, will default to 0 (no timeout)
# timeout = "5s" # timeout = "5s"
# username = "telegraf" # username = "telegraf"
# password = "metricsmetricsmetricsmetrics" # password = "metricsmetricsmetricsmetrics"
# Set the user agent for the POSTs (can be useful for log differentiation) # Set the user agent for the POSTs (can be useful for log differentiation)
# user_agent = "telegraf" # user_agent = "telegraf"
[[outputs.influxdb]] [[outputs.influxdb]]
urls = ["udp://localhost:8089"] urls = ["udp://localhost:8089"]
@ -75,13 +75,13 @@
# Configuration for the Kafka server to send metrics to # Configuration for the Kafka server to send metrics to
[[outputs.kafka]] [[outputs.kafka]]
# URLs of kafka brokers # URLs of kafka brokers
brokers = ["localhost:9092"] brokers = ["localhost:9092"]
# Kafka topic for producer messages # Kafka topic for producer messages
topic = "telegraf" topic = "telegraf"
# Telegraf tag to use as a routing key # Telegraf tag to use as a routing key
# ie, if this tag exists, it's value will be used as the routing key # ie, if this tag exists, it's value will be used as the routing key
routing_tag = "host" routing_tag = "host"
############################################################################### ###############################################################################
@ -95,239 +95,238 @@ urls = ["http://localhost/server-status?auto"]
# Read metrics about cpu usage # Read metrics about cpu usage
[cpu] [cpu]
# Whether to report per-cpu stats or not # Whether to report per-cpu stats or not
percpu = true percpu = true
# Whether to report total system cpu stats or not # Whether to report total system cpu stats or not
totalcpu = true totalcpu = true
# Comment this line if you want the raw CPU time metrics # Comment this line if you want the raw CPU time metrics
drop = ["cpu_time"] drop = ["cpu_time"]
# Read metrics about disk usage by mount point # Read metrics about disk usage by mount point
[disk] [disk]
# no configuration # no configuration
# Read metrics from one or many disque servers # Read metrics from one or many disque servers
[disque] [disque]
# An array of URI to gather stats about. Specify an ip or hostname # An array of URI to gather stats about. Specify an ip or hostname
# with optional port and password. ie disque://localhost, disque://10.10.3.33:18832, # with optional port and password. ie disque://localhost, disque://10.10.3.33:18832,
# 10.0.0.1:10000, etc. # 10.0.0.1:10000, etc.
# #
# If no servers are specified, then localhost is used as the host. # If no servers are specified, then localhost is used as the host.
servers = ["localhost"] servers = ["localhost"]
# Read stats from one or more Elasticsearch servers or clusters # Read stats from one or more Elasticsearch servers or clusters
[elasticsearch] [elasticsearch]
# specify a list of one or more Elasticsearch servers # specify a list of one or more Elasticsearch servers
servers = ["http://localhost:9200"] servers = ["http://localhost:9200"]
# set local to false when you want to read the indices stats from all nodes # set local to false when you want to read the indices stats from all nodes
# within the cluster # within the cluster
local = true local = true
# Read flattened metrics from one or more commands that output JSON to stdout # Read flattened metrics from one or more commands that output JSON to stdout
[exec] [exec]
# specify commands via an array of tables # specify commands via an array of tables
[[exec.commands]] [[exec.commands]]
# the command to run # the command to run
command = "/usr/bin/mycollector --foo=bar" command = "/usr/bin/mycollector --foo=bar"
# name of the command (used as a prefix for measurements) # name of the command (used as a prefix for measurements)
name = "mycollector" name = "mycollector"
# Read metrics of haproxy, via socket or csv stats page # Read metrics of haproxy, via socket or csv stats page
[haproxy] [haproxy]
# An array of address to gather stats about. Specify an ip on hostname # An array of address to gather stats about. Specify an ip on hostname
# with optional port. ie localhost, 10.10.3.33:1936, etc. # with optional port. ie localhost, 10.10.3.33:1936, etc.
# #
# If no servers are specified, then default to 127.0.0.1:1936 # If no servers are specified, then default to 127.0.0.1:1936
servers = ["http://myhaproxy.com:1936", "http://anotherhaproxy.com:1936"] servers = ["http://myhaproxy.com:1936", "http://anotherhaproxy.com:1936"]
# Or you can also use local socket(not work yet) # Or you can also use local socket(not work yet)
# servers = ["socket:/run/haproxy/admin.sock"] # servers = ["socket:/run/haproxy/admin.sock"]
# Read flattened metrics from one or more JSON HTTP endpoints # Read flattened metrics from one or more JSON HTTP endpoints
[httpjson] [httpjson]
# Specify services via an array of tables # Specify services via an array of tables
[[httpjson.services]] [[httpjson.services]]
# a name for the service being polled # a name for the service being polled
name = "webserver_stats" name = "webserver_stats"
# URL of each server in the service's cluster # URL of each server in the service's cluster
servers = [ servers = [
"http://localhost:9999/stats/", "http://localhost:9999/stats/",
"http://localhost:9998/stats/", "http://localhost:9998/stats/",
] ]
# HTTP method to use (case-sensitive) # HTTP method to use (case-sensitive)
method = "GET" method = "GET"
# HTTP parameters (all values must be strings) # HTTP parameters (all values must be strings)
[httpjson.services.parameters] [httpjson.services.parameters]
event_type = "cpu_spike" event_type = "cpu_spike"
threshold = "0.75" threshold = "0.75"
# Read metrics about disk IO by device # Read metrics about disk IO by device
[io] [io]
# no configuration # no configuration
# read metrics from a Kafka topic # read metrics from a Kafka topic
[kafka] [kafka_consumer]
# topic to consume # topic(s) to consume
topic = "topic_with_metrics" topics = ["telegraf"]
# an array of Zookeeper connection strings
# the name of the consumer group zookeeper_peers = ["localhost:2181"]
consumerGroupName = "telegraf_metrics_consumers" # the name of the consumer group
consumer_group = "telegraf_metrics_consumers"
# an array of Zookeeper connection strings # Maximum number of points to buffer between collection intervals
zookeeperPeers = ["localhost:2181"] point_buffer = 100000
# Offset (must be either "oldest" or "newest")
# Batch size of points sent to InfluxDB offset = "oldest"
batchSize = 1000
# Read metrics from a LeoFS Server via SNMP # Read metrics from a LeoFS Server via SNMP
[leofs] [leofs]
# An array of URI to gather stats about LeoFS. # An array of URI to gather stats about LeoFS.
# Specify an ip or hostname with port. ie 127.0.0.1:4020 # Specify an ip or hostname with port. ie 127.0.0.1:4020
# #
# If no servers are specified, then 127.0.0.1 is used as the host and 4020 as the port. # If no servers are specified, then 127.0.0.1 is used as the host and 4020 as the port.
servers = ["127.0.0.1:4021"] servers = ["127.0.0.1:4021"]
# Read metrics from local Lustre service on OST, MDS # Read metrics from local Lustre service on OST, MDS
[lustre2] [lustre2]
# An array of /proc globs to search for Lustre stats # An array of /proc globs to search for Lustre stats
# If not specified, the default will work on Lustre 2.5.x # If not specified, the default will work on Lustre 2.5.x
# #
# ost_procfiles = ["/proc/fs/lustre/obdfilter/*/stats", "/proc/fs/lustre/osd-ldiskfs/*/stats"] # ost_procfiles = ["/proc/fs/lustre/obdfilter/*/stats", "/proc/fs/lustre/osd-ldiskfs/*/stats"]
# mds_procfiles = ["/proc/fs/lustre/mdt/*/md_stats"] # mds_procfiles = ["/proc/fs/lustre/mdt/*/md_stats"]
# Read metrics about memory usage # Read metrics about memory usage
[mem] [mem]
# no configuration # no configuration
# Read metrics from one or many memcached servers # Read metrics from one or many memcached servers
[memcached] [memcached]
# An array of address to gather stats about. Specify an ip on hostname # An array of address to gather stats about. Specify an ip on hostname
# with optional port. ie localhost, 10.0.0.1:11211, etc. # with optional port. ie localhost, 10.0.0.1:11211, etc.
# #
# If no servers are specified, then localhost is used as the host. # If no servers are specified, then localhost is used as the host.
servers = ["localhost"] servers = ["localhost"]
# Read metrics from one or many MongoDB servers # Read metrics from one or many MongoDB servers
[mongodb] [mongodb]
# An array of URI to gather stats about. Specify an ip or hostname # An array of URI to gather stats about. Specify an ip or hostname
# with optional port add password. ie mongodb://user:auth_key@10.10.3.30:27017, # with optional port add password. ie mongodb://user:auth_key@10.10.3.30:27017,
# mongodb://10.10.3.33:18832, 10.0.0.1:10000, etc. # mongodb://10.10.3.33:18832, 10.0.0.1:10000, etc.
# #
# If no servers are specified, then 127.0.0.1 is used as the host and 27107 as the port. # If no servers are specified, then 127.0.0.1 is used as the host and 27107 as the port.
servers = ["127.0.0.1:27017"] servers = ["127.0.0.1:27017"]
# Read metrics from one or many mysql servers # Read metrics from one or many mysql servers
[mysql] [mysql]
# specify servers via a url matching: # specify servers via a url matching:
# [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify]] # [username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify]]
# e.g. # e.g.
# servers = ["root:root@http://10.0.0.18/?tls=false"] # servers = ["root:root@http://10.0.0.18/?tls=false"]
# servers = ["root:passwd@tcp(127.0.0.1:3306)/"] # servers = ["root:passwd@tcp(127.0.0.1:3306)/"]
# #
# If no servers are specified, then localhost is used as the host. # If no servers are specified, then localhost is used as the host.
servers = ["localhost"] servers = ["localhost"]
# Read metrics about network interface usage # Read metrics about network interface usage
[net] [net]
# By default, telegraf gathers stats from any up interface (excluding loopback) # By default, telegraf gathers stats from any up interface (excluding loopback)
# Setting interfaces will tell it to gather these explicit interfaces, # Setting interfaces will tell it to gather these explicit interfaces,
# regardless of status. # regardless of status.
# #
# interfaces = ["eth0", ... ] # interfaces = ["eth0", ... ]
# Read Nginx's basic status information (ngx_http_stub_status_module) # Read Nginx's basic status information (ngx_http_stub_status_module)
[nginx] [nginx]
# An array of Nginx stub_status URI to gather stats. # An array of Nginx stub_status URI to gather stats.
urls = ["http://localhost/status"] urls = ["http://localhost/status"]
# Ping given url(s) and return statistics # Ping given url(s) and return statistics
[ping] [ping]
# urls to ping # urls to ping
urls = ["www.google.com"] # required urls = ["www.google.com"] # required
# number of pings to send (ping -c <COUNT>) # number of pings to send (ping -c <COUNT>)
count = 1 # required count = 1 # required
# interval, in s, at which to ping. 0 == default (ping -i <PING_INTERVAL>) # interval, in s, at which to ping. 0 == default (ping -i <PING_INTERVAL>)
ping_interval = 0.0 ping_interval = 0.0
# ping timeout, in s. 0 == no timeout (ping -t <TIMEOUT>) # ping timeout, in s. 0 == no timeout (ping -t <TIMEOUT>)
timeout = 0.0 timeout = 0.0
# interface to send ping from (ping -I <INTERFACE>) # interface to send ping from (ping -I <INTERFACE>)
interface = "" interface = ""
# Read metrics from one or many postgresql servers # Read metrics from one or many postgresql servers
[postgresql] [postgresql]
# specify servers via an array of tables # specify servers via an array of tables
[[postgresql.servers]] [[postgresql.servers]]
# specify address via a url matching: # specify address via a url matching:
# postgres://[pqgotest[:password]]@localhost[/dbname]?sslmode=[disable|verify-ca|verify-full] # postgres://[pqgotest[:password]]@localhost[/dbname]?sslmode=[disable|verify-ca|verify-full]
# or a simple string: # or a simple string:
# host=localhost user=pqotest password=... sslmode=... dbname=app_production # host=localhost user=pqotest password=... sslmode=... dbname=app_production
# #
# All connection parameters are optional. By default, the host is localhost # All connection parameters are optional. By default, the host is localhost
# and the user is the currently running user. For localhost, we default # and the user is the currently running user. For localhost, we default
# to sslmode=disable as well. # to sslmode=disable as well.
# #
# Without the dbname parameter, the driver will default to a database # Without the dbname parameter, the driver will default to a database
# with the same name as the user. This dbname is just for instantiating a # with the same name as the user. This dbname is just for instantiating a
# connection with the server and doesn't restrict the databases we are trying # connection with the server and doesn't restrict the databases we are trying
# to grab metrics for. # to grab metrics for.
# #
address = "sslmode=disable" address = "sslmode=disable"
# A list of databases to pull metrics about. If not specified, metrics for all # A list of databases to pull metrics about. If not specified, metrics for all
# databases are gathered. # databases are gathered.
# databases = ["app_production", "blah_testing"] # databases = ["app_production", "blah_testing"]
# [[postgresql.servers]] # [[postgresql.servers]]
# address = "influx@remoteserver" # address = "influx@remoteserver"
# Read metrics from one or many prometheus clients # Read metrics from one or many prometheus clients
[prometheus] [prometheus]
# An array of urls to scrape metrics from. # An array of urls to scrape metrics from.
urls = ["http://localhost:9100/metrics"] urls = ["http://localhost:9100/metrics"]
# Read metrics from one or many RabbitMQ servers via the management API # Read metrics from one or many RabbitMQ servers via the management API
[rabbitmq] [rabbitmq]
# Specify servers via an array of tables # Specify servers via an array of tables
[[rabbitmq.servers]] [[rabbitmq.servers]]
# name = "rmq-server-1" # optional tag # name = "rmq-server-1" # optional tag
# url = "http://localhost:15672" # url = "http://localhost:15672"
# username = "guest" # username = "guest"
# password = "guest" # password = "guest"
# A list of nodes to pull metrics about. If not specified, metrics for # A list of nodes to pull metrics about. If not specified, metrics for
# all nodes are gathered. # all nodes are gathered.
# nodes = ["rabbit@node1", "rabbit@node2"] # nodes = ["rabbit@node1", "rabbit@node2"]
# Read metrics from one or many redis servers # Read metrics from one or many redis servers
[redis] [redis]
# An array of URI to gather stats about. Specify an ip or hostname # An array of URI to gather stats about. Specify an ip or hostname
# with optional port add password. ie redis://localhost, redis://10.10.3.33:18832, # with optional port add password. ie redis://localhost, redis://10.10.3.33:18832,
# 10.0.0.1:10000, etc. # 10.0.0.1:10000, etc.
# #
# If no servers are specified, then localhost is used as the host. # If no servers are specified, then localhost is used as the host.
servers = ["localhost"] servers = ["localhost"]
# Read metrics from one or many RethinkDB servers # Read metrics from one or many RethinkDB servers
[rethinkdb] [rethinkdb]
# An array of URI to gather stats about. Specify an ip or hostname # An array of URI to gather stats about. Specify an ip or hostname
# with optional port add password. ie rethinkdb://user:auth_key@10.10.3.30:28105, # with optional port add password. ie rethinkdb://user:auth_key@10.10.3.30:28105,
# rethinkdb://10.10.3.33:18832, 10.0.0.1:10000, etc. # rethinkdb://10.10.3.33:18832, 10.0.0.1:10000, etc.
# #
# If no servers are specified, then 127.0.0.1 is used as the host and 28015 as the port. # If no servers are specified, then 127.0.0.1 is used as the host and 28015 as the port.
servers = ["127.0.0.1:28015"] servers = ["127.0.0.1:28015"]
# Read metrics about swap memory usage # Read metrics about swap memory usage
[swap] [swap]
# no configuration # no configuration
# Read metrics about system load & uptime # Read metrics about system load & uptime
[system] [system]
# no configuration # no configuration