Remove outputs blocking inputs when output is slow (#4938)
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
# NSQ Consumer Input Plugin
|
||||
|
||||
The [NSQ](http://nsq.io/) consumer plugin polls a specified NSQD
|
||||
topic and adds messages to InfluxDB. This plugin allows a message to be in any of the supported `data_format` types.
|
||||
The [NSQ][nsq] consumer plugin reads from NSQD and creates metrics using one
|
||||
of the supported [input data formats][].
|
||||
|
||||
## Configuration
|
||||
### Configuration:
|
||||
|
||||
```toml
|
||||
# Read metrics from NSQD topic(s)
|
||||
@@ -18,6 +18,16 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
|
||||
channel = "consumer"
|
||||
max_in_flight = 100
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
@@ -25,5 +35,5 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
|
||||
data_format = "influx"
|
||||
```
|
||||
|
||||
## Testing
|
||||
The `nsq_consumer_test` mocks out the interaction with `NSQD`. It requires no outside dependencies.
|
||||
[nsq]: https://nsq.io
|
||||
[input data formats]: /docs/DATA_FORMATS_INPUT.md
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
package nsq_consumer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"context"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
@@ -9,17 +11,38 @@ import (
|
||||
nsq "github.com/nsqio/go-nsq"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultMaxUndeliveredMessages = 1000
|
||||
)
|
||||
|
||||
type empty struct{}
|
||||
type semaphore chan empty
|
||||
|
||||
type logger struct{}
|
||||
|
||||
func (l *logger) Output(calldepth int, s string) error {
|
||||
log.Println("D! [inputs.nsq_consumer] " + s)
|
||||
return nil
|
||||
}
|
||||
|
||||
//NSQConsumer represents the configuration of the plugin
|
||||
type NSQConsumer struct {
|
||||
Server string
|
||||
Nsqd []string
|
||||
Nsqlookupd []string
|
||||
Topic string
|
||||
Channel string
|
||||
MaxInFlight int
|
||||
parser parsers.Parser
|
||||
consumer *nsq.Consumer
|
||||
acc telegraf.Accumulator
|
||||
Server string `toml:"server"`
|
||||
Nsqd []string `toml:"nsqd"`
|
||||
Nsqlookupd []string `toml:"nsqlookupd"`
|
||||
Topic string `toml:"topic"`
|
||||
Channel string `toml:"channel"`
|
||||
MaxInFlight int `toml:"max_in_flight"`
|
||||
|
||||
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||
|
||||
parser parsers.Parser
|
||||
consumer *nsq.Consumer
|
||||
|
||||
mu sync.Mutex
|
||||
messages map[telegraf.TrackingID]*nsq.Message
|
||||
wg sync.WaitGroup
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
var sampleConfig = `
|
||||
@@ -33,6 +56,16 @@ var sampleConfig = `
|
||||
channel = "consumer"
|
||||
max_in_flight = 100
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
@@ -40,12 +73,6 @@ var sampleConfig = `
|
||||
data_format = "influx"
|
||||
`
|
||||
|
||||
func init() {
|
||||
inputs.Add("nsq_consumer", func() telegraf.Input {
|
||||
return &NSQConsumer{}
|
||||
})
|
||||
}
|
||||
|
||||
// SetParser takes the data_format from the config and finds the right parser for that format
|
||||
func (n *NSQConsumer) SetParser(parser parsers.Parser) {
|
||||
n.parser = parser
|
||||
@@ -62,32 +89,88 @@ func (n *NSQConsumer) Description() string {
|
||||
}
|
||||
|
||||
// Start pulls data from nsq
|
||||
func (n *NSQConsumer) Start(acc telegraf.Accumulator) error {
|
||||
n.acc = acc
|
||||
func (n *NSQConsumer) Start(ac telegraf.Accumulator) error {
|
||||
acc := ac.WithTracking(n.MaxUndeliveredMessages)
|
||||
sem := make(semaphore, n.MaxUndeliveredMessages)
|
||||
n.messages = make(map[telegraf.TrackingID]*nsq.Message, n.MaxUndeliveredMessages)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
n.cancel = cancel
|
||||
|
||||
n.connect()
|
||||
n.consumer.AddConcurrentHandlers(nsq.HandlerFunc(func(message *nsq.Message) error {
|
||||
n.consumer.SetLogger(&logger{}, nsq.LogLevelInfo)
|
||||
n.consumer.AddHandler(nsq.HandlerFunc(func(message *nsq.Message) error {
|
||||
metrics, err := n.parser.Parse(message.Body)
|
||||
if err != nil {
|
||||
acc.AddError(fmt.Errorf("E! NSQConsumer Parse Error\nmessage:%s\nerror:%s", string(message.Body), err.Error()))
|
||||
acc.AddError(err)
|
||||
// Remove the message from the queue
|
||||
message.Finish()
|
||||
return nil
|
||||
}
|
||||
for _, metric := range metrics {
|
||||
n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
|
||||
if len(metrics) == 0 {
|
||||
message.Finish()
|
||||
return nil
|
||||
}
|
||||
message.Finish()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case sem <- empty{}:
|
||||
break
|
||||
}
|
||||
|
||||
n.mu.Lock()
|
||||
id := acc.AddTrackingMetricGroup(metrics)
|
||||
n.messages[id] = message
|
||||
n.mu.Unlock()
|
||||
message.DisableAutoResponse()
|
||||
return nil
|
||||
}), n.MaxInFlight)
|
||||
}))
|
||||
|
||||
if len(n.Nsqlookupd) > 0 {
|
||||
n.consumer.ConnectToNSQLookupds(n.Nsqlookupd)
|
||||
}
|
||||
n.consumer.ConnectToNSQDs(append(n.Nsqd, n.Server))
|
||||
|
||||
n.wg.Add(1)
|
||||
go func() {
|
||||
defer n.wg.Done()
|
||||
n.onDelivery(ctx, acc, sem)
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NSQConsumer) onDelivery(ctx context.Context, acc telegraf.TrackingAccumulator, sem semaphore) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case info := <-acc.Delivered():
|
||||
n.mu.Lock()
|
||||
msg, ok := n.messages[info.ID()]
|
||||
if !ok {
|
||||
n.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
<-sem
|
||||
delete(n.messages, info.ID())
|
||||
n.mu.Unlock()
|
||||
|
||||
if info.Delivered() {
|
||||
msg.Finish()
|
||||
} else {
|
||||
msg.Requeue(-1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stop processing messages
|
||||
func (n *NSQConsumer) Stop() {
|
||||
n.cancel()
|
||||
n.wg.Wait()
|
||||
n.consumer.Stop()
|
||||
<-n.consumer.StopChan
|
||||
}
|
||||
|
||||
// Gather is a noop
|
||||
@@ -107,3 +190,11 @@ func (n *NSQConsumer) connect() error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("nsq_consumer", func() telegraf.Input {
|
||||
return &NSQConsumer{
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -36,11 +36,12 @@ func TestReadsMetricsFromNSQ(t *testing.T) {
|
||||
newMockNSQD(script, addr.String())
|
||||
|
||||
consumer := &NSQConsumer{
|
||||
Server: "127.0.0.1:4155",
|
||||
Topic: "telegraf",
|
||||
Channel: "consume",
|
||||
MaxInFlight: 1,
|
||||
Nsqd: []string{"127.0.0.1:4155"},
|
||||
Server: "127.0.0.1:4155",
|
||||
Topic: "telegraf",
|
||||
Channel: "consume",
|
||||
MaxInFlight: 1,
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
Nsqd: []string{"127.0.0.1:4155"},
|
||||
}
|
||||
|
||||
p, _ := parsers.NewInfluxParser()
|
||||
|
||||
Reference in New Issue
Block a user