Remove outputs blocking inputs when output is slow (#4938)

This commit is contained in:
Daniel Nelson
2018-11-05 13:34:28 -08:00
committed by GitHub
parent 74667cd681
commit 6e5c2f8bb6
59 changed files with 3615 additions and 2189 deletions

View File

@@ -13,7 +13,6 @@ For an introduction to AMQP see:
The following defaults are known to work with RabbitMQ:
```toml
# AMQP consumer plugin
[[inputs.amqp_consumer]]
## Broker to consume from.
## deprecated in 1.7; use the brokers option
@@ -46,16 +45,26 @@ The following defaults are known to work with RabbitMQ:
## AMQP queue name
queue = "telegraf"
## AMQP queue durability can be "transient" or "durable".
queue_durability = "durable"
## Binding Key
binding_key = "#"
## Maximum number of messages server should give to the worker.
# prefetch_count = 50
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Auth method. PLAIN and EXTERNAL are supported
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
## described here: https://www.rabbitmq.com/plugins.html

View File

@@ -1,6 +1,7 @@
package amqp_consumer
import (
"context"
"errors"
"fmt"
"log"
@@ -9,25 +10,32 @@ import (
"sync"
"time"
"github.com/streadway/amqp"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/streadway/amqp"
)
const (
defaultMaxUndeliveredMessages = 1000
)
type empty struct{}
type semaphore chan empty
// AMQPConsumer is the top level struct for this plugin
type AMQPConsumer struct {
URL string `toml:"url"` // deprecated in 1.7; use brokers
Brokers []string `toml:"brokers"`
Username string `toml:"username"`
Password string `toml:"password"`
Exchange string `toml:"exchange"`
ExchangeType string `toml:"exchange_type"`
ExchangeDurability string `toml:"exchange_durability"`
ExchangePassive bool `toml:"exchange_passive"`
ExchangeArguments map[string]string `toml:"exchange_arguments"`
URL string `toml:"url"` // deprecated in 1.7; use brokers
Brokers []string `toml:"brokers"`
Username string `toml:"username"`
Password string `toml:"password"`
Exchange string `toml:"exchange"`
ExchangeType string `toml:"exchange_type"`
ExchangeDurability string `toml:"exchange_durability"`
ExchangePassive bool `toml:"exchange_passive"`
ExchangeArguments map[string]string `toml:"exchange_arguments"`
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
// Queue Name
Queue string `toml:"queue"`
@@ -44,9 +52,12 @@ type AMQPConsumer struct {
AuthMethod string
tls.ClientConfig
deliveries map[telegraf.TrackingID]amqp.Delivery
parser parsers.Parser
conn *amqp.Connection
wg *sync.WaitGroup
cancel context.CancelFunc
}
type externalAuth struct{}
@@ -114,6 +125,16 @@ func (a *AMQPConsumer) SampleConfig() string {
## Maximum number of messages server should give to the worker.
# prefetch_count = 50
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Auth method. PLAIN and EXTERNAL are supported
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
## described here: https://www.rabbitmq.com/plugins.html
@@ -185,9 +206,15 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
return err
}
ctx, cancel := context.WithCancel(context.Background())
a.cancel = cancel
a.wg = &sync.WaitGroup{}
a.wg.Add(1)
go a.process(msgs, acc)
go func() {
defer a.wg.Done()
a.process(ctx, msgs, acc)
}()
go func() {
for {
@@ -196,7 +223,7 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
break
}
log.Printf("I! AMQP consumer connection closed: %s; trying to reconnect", err)
log.Printf("I! [inputs.amqp_consumer] connection closed: %s; trying to reconnect", err)
for {
msgs, err := a.connect(amqpConf)
if err != nil {
@@ -206,7 +233,10 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
}
a.wg.Add(1)
go a.process(msgs, acc)
go func() {
defer a.wg.Done()
a.process(ctx, msgs, acc)
}()
break
}
}
@@ -224,14 +254,14 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
p := rand.Perm(len(brokers))
for _, n := range p {
broker := brokers[n]
log.Printf("D! [amqp_consumer] connecting to %q", broker)
log.Printf("D! [inputs.amqp_consumer] connecting to %q", broker)
conn, err := amqp.DialConfig(broker, *amqpConf)
if err == nil {
a.conn = conn
log.Printf("D! [amqp_consumer] connected to %q", broker)
log.Printf("D! [inputs.amqp_consumer] connected to %q", broker)
break
}
log.Printf("D! [amqp_consumer] error connecting to %q", broker)
log.Printf("D! [inputs.amqp_consumer] error connecting to %q", broker)
}
if a.conn == nil {
@@ -320,7 +350,6 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
return nil, fmt.Errorf("Failed establishing connection to queue: %s", err)
}
log.Println("I! Started AMQP consumer")
return msgs, err
}
@@ -361,42 +390,101 @@ func declareExchange(
}
// Read messages from queue and add them to the Accumulator
func (a *AMQPConsumer) process(msgs <-chan amqp.Delivery, acc telegraf.Accumulator) {
defer a.wg.Done()
for d := range msgs {
metrics, err := a.parser.Parse(d.Body)
if err != nil {
log.Printf("E! %v: error parsing metric - %v", err, string(d.Body))
} else {
for _, m := range metrics {
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
func (a *AMQPConsumer) process(ctx context.Context, msgs <-chan amqp.Delivery, ac telegraf.Accumulator) {
a.deliveries = make(map[telegraf.TrackingID]amqp.Delivery)
acc := ac.WithTracking(a.MaxUndeliveredMessages)
sem := make(semaphore, a.MaxUndeliveredMessages)
for {
select {
case <-ctx.Done():
return
case track := <-acc.Delivered():
if a.onDelivery(track) {
<-sem
}
case sem <- empty{}:
select {
case <-ctx.Done():
return
case track := <-acc.Delivered():
if a.onDelivery(track) {
<-sem
<-sem
}
case d, ok := <-msgs:
if !ok {
return
}
err := a.onMessage(acc, d)
if err != nil {
acc.AddError(err)
<-sem
}
}
}
d.Ack(false)
}
log.Printf("I! AMQP consumer queue closed")
}
func (a *AMQPConsumer) onMessage(acc telegraf.TrackingAccumulator, d amqp.Delivery) error {
metrics, err := a.parser.Parse(d.Body)
if err != nil {
return err
}
id := acc.AddTrackingMetricGroup(metrics)
a.deliveries[id] = d
return nil
}
func (a *AMQPConsumer) onDelivery(track telegraf.DeliveryInfo) bool {
delivery, ok := a.deliveries[track.ID()]
if !ok {
// Added by a previous connection
return false
}
if track.Delivered() {
err := delivery.Ack(false)
if err != nil {
log.Printf("E! [inputs.amqp_consumer] Unable to ack written delivery: %d: %v",
delivery.DeliveryTag, err)
a.conn.Close()
}
} else {
err := delivery.Reject(false)
if err != nil {
log.Printf("E! [inputs.amqp_consumer] Unable to reject failed delivery: %d: %v",
delivery.DeliveryTag, err)
a.conn.Close()
}
}
delete(a.deliveries, track.ID())
return true
}
func (a *AMQPConsumer) Stop() {
a.cancel()
a.wg.Wait()
err := a.conn.Close()
if err != nil && err != amqp.ErrClosed {
log.Printf("E! Error closing AMQP connection: %s", err)
log.Printf("E! [inputs.amqp_consumer] Error closing AMQP connection: %s", err)
return
}
a.wg.Wait()
log.Println("I! Stopped AMQP service")
}
func init() {
inputs.Add("amqp_consumer", func() telegraf.Input {
return &AMQPConsumer{
URL: DefaultBroker,
AuthMethod: DefaultAuthMethod,
ExchangeType: DefaultExchangeType,
ExchangeDurability: DefaultExchangeDurability,
QueueDurability: DefaultQueueDurability,
PrefetchCount: DefaultPrefetchCount,
URL: DefaultBroker,
AuthMethod: DefaultAuthMethod,
ExchangeType: DefaultExchangeType,
ExchangeDurability: DefaultExchangeDurability,
QueueDurability: DefaultQueueDurability,
PrefetchCount: DefaultPrefetchCount,
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
}
})
}