Remove outputs blocking inputs when output is slow (#4938)

This commit is contained in:
Daniel Nelson
2018-11-05 13:34:28 -08:00
committed by GitHub
parent 74667cd681
commit 6e5c2f8bb6
59 changed files with 3615 additions and 2189 deletions

View File

@@ -1,9 +1,9 @@
# NSQ Consumer Input Plugin
The [NSQ](http://nsq.io/) consumer plugin polls a specified NSQD
topic and adds messages to InfluxDB. This plugin allows a message to be in any of the supported `data_format` types.
The [NSQ][nsq] consumer plugin reads from NSQD and creates metrics using one
of the supported [input data formats][].
## Configuration
### Configuration:
```toml
# Read metrics from NSQD topic(s)
@@ -18,6 +18,16 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
channel = "consumer"
max_in_flight = 100
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
@@ -25,5 +35,5 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
data_format = "influx"
```
## Testing
The `nsq_consumer_test` mocks out the interaction with `NSQD`. It requires no outside dependencies.
[nsq]: https://nsq.io
[input data formats]: /docs/DATA_FORMATS_INPUT.md

View File

@@ -1,7 +1,9 @@
package nsq_consumer
import (
"fmt"
"context"
"log"
"sync"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
@@ -9,17 +11,38 @@ import (
nsq "github.com/nsqio/go-nsq"
)
const (
defaultMaxUndeliveredMessages = 1000
)
type empty struct{}
type semaphore chan empty
type logger struct{}
func (l *logger) Output(calldepth int, s string) error {
log.Println("D! [inputs.nsq_consumer] " + s)
return nil
}
//NSQConsumer represents the configuration of the plugin
type NSQConsumer struct {
Server string
Nsqd []string
Nsqlookupd []string
Topic string
Channel string
MaxInFlight int
parser parsers.Parser
consumer *nsq.Consumer
acc telegraf.Accumulator
Server string `toml:"server"`
Nsqd []string `toml:"nsqd"`
Nsqlookupd []string `toml:"nsqlookupd"`
Topic string `toml:"topic"`
Channel string `toml:"channel"`
MaxInFlight int `toml:"max_in_flight"`
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
parser parsers.Parser
consumer *nsq.Consumer
mu sync.Mutex
messages map[telegraf.TrackingID]*nsq.Message
wg sync.WaitGroup
cancel context.CancelFunc
}
var sampleConfig = `
@@ -33,6 +56,16 @@ var sampleConfig = `
channel = "consumer"
max_in_flight = 100
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
@@ -40,12 +73,6 @@ var sampleConfig = `
data_format = "influx"
`
func init() {
inputs.Add("nsq_consumer", func() telegraf.Input {
return &NSQConsumer{}
})
}
// SetParser takes the data_format from the config and finds the right parser for that format
func (n *NSQConsumer) SetParser(parser parsers.Parser) {
n.parser = parser
@@ -62,32 +89,88 @@ func (n *NSQConsumer) Description() string {
}
// Start pulls data from nsq
func (n *NSQConsumer) Start(acc telegraf.Accumulator) error {
n.acc = acc
func (n *NSQConsumer) Start(ac telegraf.Accumulator) error {
acc := ac.WithTracking(n.MaxUndeliveredMessages)
sem := make(semaphore, n.MaxUndeliveredMessages)
n.messages = make(map[telegraf.TrackingID]*nsq.Message, n.MaxUndeliveredMessages)
ctx, cancel := context.WithCancel(context.Background())
n.cancel = cancel
n.connect()
n.consumer.AddConcurrentHandlers(nsq.HandlerFunc(func(message *nsq.Message) error {
n.consumer.SetLogger(&logger{}, nsq.LogLevelInfo)
n.consumer.AddHandler(nsq.HandlerFunc(func(message *nsq.Message) error {
metrics, err := n.parser.Parse(message.Body)
if err != nil {
acc.AddError(fmt.Errorf("E! NSQConsumer Parse Error\nmessage:%s\nerror:%s", string(message.Body), err.Error()))
acc.AddError(err)
// Remove the message from the queue
message.Finish()
return nil
}
for _, metric := range metrics {
n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
if len(metrics) == 0 {
message.Finish()
return nil
}
message.Finish()
select {
case <-ctx.Done():
return ctx.Err()
case sem <- empty{}:
break
}
n.mu.Lock()
id := acc.AddTrackingMetricGroup(metrics)
n.messages[id] = message
n.mu.Unlock()
message.DisableAutoResponse()
return nil
}), n.MaxInFlight)
}))
if len(n.Nsqlookupd) > 0 {
n.consumer.ConnectToNSQLookupds(n.Nsqlookupd)
}
n.consumer.ConnectToNSQDs(append(n.Nsqd, n.Server))
n.wg.Add(1)
go func() {
defer n.wg.Done()
n.onDelivery(ctx, acc, sem)
}()
return nil
}
func (n *NSQConsumer) onDelivery(ctx context.Context, acc telegraf.TrackingAccumulator, sem semaphore) {
for {
select {
case <-ctx.Done():
return
case info := <-acc.Delivered():
n.mu.Lock()
msg, ok := n.messages[info.ID()]
if !ok {
n.mu.Unlock()
continue
}
<-sem
delete(n.messages, info.ID())
n.mu.Unlock()
if info.Delivered() {
msg.Finish()
} else {
msg.Requeue(-1)
}
}
}
}
// Stop processing messages
func (n *NSQConsumer) Stop() {
n.cancel()
n.wg.Wait()
n.consumer.Stop()
<-n.consumer.StopChan
}
// Gather is a noop
@@ -107,3 +190,11 @@ func (n *NSQConsumer) connect() error {
}
return nil
}
func init() {
inputs.Add("nsq_consumer", func() telegraf.Input {
return &NSQConsumer{
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
}
})
}

View File

@@ -36,11 +36,12 @@ func TestReadsMetricsFromNSQ(t *testing.T) {
newMockNSQD(script, addr.String())
consumer := &NSQConsumer{
Server: "127.0.0.1:4155",
Topic: "telegraf",
Channel: "consume",
MaxInFlight: 1,
Nsqd: []string{"127.0.0.1:4155"},
Server: "127.0.0.1:4155",
Topic: "telegraf",
Channel: "consume",
MaxInFlight: 1,
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
Nsqd: []string{"127.0.0.1:4155"},
}
p, _ := parsers.NewInfluxParser()