Remove outputs blocking inputs when output is slow (#4938)
This commit is contained in:
@@ -13,7 +13,6 @@ For an introduction to AMQP see:
|
||||
The following defaults are known to work with RabbitMQ:
|
||||
|
||||
```toml
|
||||
# AMQP consumer plugin
|
||||
[[inputs.amqp_consumer]]
|
||||
## Broker to consume from.
|
||||
## deprecated in 1.7; use the brokers option
|
||||
@@ -46,16 +45,26 @@ The following defaults are known to work with RabbitMQ:
|
||||
|
||||
## AMQP queue name
|
||||
queue = "telegraf"
|
||||
|
||||
|
||||
## AMQP queue durability can be "transient" or "durable".
|
||||
queue_durability = "durable"
|
||||
|
||||
|
||||
## Binding Key
|
||||
binding_key = "#"
|
||||
|
||||
## Maximum number of messages server should give to the worker.
|
||||
# prefetch_count = 50
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Auth method. PLAIN and EXTERNAL are supported
|
||||
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
|
||||
## described here: https://www.rabbitmq.com/plugins.html
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package amqp_consumer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
@@ -9,25 +10,32 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/streadway/amqp"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal/tls"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
"github.com/streadway/amqp"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultMaxUndeliveredMessages = 1000
|
||||
)
|
||||
|
||||
type empty struct{}
|
||||
type semaphore chan empty
|
||||
|
||||
// AMQPConsumer is the top level struct for this plugin
|
||||
type AMQPConsumer struct {
|
||||
URL string `toml:"url"` // deprecated in 1.7; use brokers
|
||||
Brokers []string `toml:"brokers"`
|
||||
Username string `toml:"username"`
|
||||
Password string `toml:"password"`
|
||||
Exchange string `toml:"exchange"`
|
||||
ExchangeType string `toml:"exchange_type"`
|
||||
ExchangeDurability string `toml:"exchange_durability"`
|
||||
ExchangePassive bool `toml:"exchange_passive"`
|
||||
ExchangeArguments map[string]string `toml:"exchange_arguments"`
|
||||
URL string `toml:"url"` // deprecated in 1.7; use brokers
|
||||
Brokers []string `toml:"brokers"`
|
||||
Username string `toml:"username"`
|
||||
Password string `toml:"password"`
|
||||
Exchange string `toml:"exchange"`
|
||||
ExchangeType string `toml:"exchange_type"`
|
||||
ExchangeDurability string `toml:"exchange_durability"`
|
||||
ExchangePassive bool `toml:"exchange_passive"`
|
||||
ExchangeArguments map[string]string `toml:"exchange_arguments"`
|
||||
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||
|
||||
// Queue Name
|
||||
Queue string `toml:"queue"`
|
||||
@@ -44,9 +52,12 @@ type AMQPConsumer struct {
|
||||
AuthMethod string
|
||||
tls.ClientConfig
|
||||
|
||||
deliveries map[telegraf.TrackingID]amqp.Delivery
|
||||
|
||||
parser parsers.Parser
|
||||
conn *amqp.Connection
|
||||
wg *sync.WaitGroup
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
type externalAuth struct{}
|
||||
@@ -114,6 +125,16 @@ func (a *AMQPConsumer) SampleConfig() string {
|
||||
## Maximum number of messages server should give to the worker.
|
||||
# prefetch_count = 50
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Auth method. PLAIN and EXTERNAL are supported
|
||||
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
|
||||
## described here: https://www.rabbitmq.com/plugins.html
|
||||
@@ -185,9 +206,15 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
|
||||
return err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
a.cancel = cancel
|
||||
|
||||
a.wg = &sync.WaitGroup{}
|
||||
a.wg.Add(1)
|
||||
go a.process(msgs, acc)
|
||||
go func() {
|
||||
defer a.wg.Done()
|
||||
a.process(ctx, msgs, acc)
|
||||
}()
|
||||
|
||||
go func() {
|
||||
for {
|
||||
@@ -196,7 +223,7 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
|
||||
break
|
||||
}
|
||||
|
||||
log.Printf("I! AMQP consumer connection closed: %s; trying to reconnect", err)
|
||||
log.Printf("I! [inputs.amqp_consumer] connection closed: %s; trying to reconnect", err)
|
||||
for {
|
||||
msgs, err := a.connect(amqpConf)
|
||||
if err != nil {
|
||||
@@ -206,7 +233,10 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
|
||||
}
|
||||
|
||||
a.wg.Add(1)
|
||||
go a.process(msgs, acc)
|
||||
go func() {
|
||||
defer a.wg.Done()
|
||||
a.process(ctx, msgs, acc)
|
||||
}()
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -224,14 +254,14 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
|
||||
p := rand.Perm(len(brokers))
|
||||
for _, n := range p {
|
||||
broker := brokers[n]
|
||||
log.Printf("D! [amqp_consumer] connecting to %q", broker)
|
||||
log.Printf("D! [inputs.amqp_consumer] connecting to %q", broker)
|
||||
conn, err := amqp.DialConfig(broker, *amqpConf)
|
||||
if err == nil {
|
||||
a.conn = conn
|
||||
log.Printf("D! [amqp_consumer] connected to %q", broker)
|
||||
log.Printf("D! [inputs.amqp_consumer] connected to %q", broker)
|
||||
break
|
||||
}
|
||||
log.Printf("D! [amqp_consumer] error connecting to %q", broker)
|
||||
log.Printf("D! [inputs.amqp_consumer] error connecting to %q", broker)
|
||||
}
|
||||
|
||||
if a.conn == nil {
|
||||
@@ -320,7 +350,6 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
|
||||
return nil, fmt.Errorf("Failed establishing connection to queue: %s", err)
|
||||
}
|
||||
|
||||
log.Println("I! Started AMQP consumer")
|
||||
return msgs, err
|
||||
}
|
||||
|
||||
@@ -361,42 +390,101 @@ func declareExchange(
|
||||
}
|
||||
|
||||
// Read messages from queue and add them to the Accumulator
|
||||
func (a *AMQPConsumer) process(msgs <-chan amqp.Delivery, acc telegraf.Accumulator) {
|
||||
defer a.wg.Done()
|
||||
for d := range msgs {
|
||||
metrics, err := a.parser.Parse(d.Body)
|
||||
if err != nil {
|
||||
log.Printf("E! %v: error parsing metric - %v", err, string(d.Body))
|
||||
} else {
|
||||
for _, m := range metrics {
|
||||
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
|
||||
func (a *AMQPConsumer) process(ctx context.Context, msgs <-chan amqp.Delivery, ac telegraf.Accumulator) {
|
||||
a.deliveries = make(map[telegraf.TrackingID]amqp.Delivery)
|
||||
|
||||
acc := ac.WithTracking(a.MaxUndeliveredMessages)
|
||||
sem := make(semaphore, a.MaxUndeliveredMessages)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case track := <-acc.Delivered():
|
||||
if a.onDelivery(track) {
|
||||
<-sem
|
||||
}
|
||||
case sem <- empty{}:
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case track := <-acc.Delivered():
|
||||
if a.onDelivery(track) {
|
||||
<-sem
|
||||
<-sem
|
||||
}
|
||||
case d, ok := <-msgs:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
err := a.onMessage(acc, d)
|
||||
if err != nil {
|
||||
acc.AddError(err)
|
||||
<-sem
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
d.Ack(false)
|
||||
}
|
||||
log.Printf("I! AMQP consumer queue closed")
|
||||
}
|
||||
|
||||
func (a *AMQPConsumer) onMessage(acc telegraf.TrackingAccumulator, d amqp.Delivery) error {
|
||||
metrics, err := a.parser.Parse(d.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
id := acc.AddTrackingMetricGroup(metrics)
|
||||
a.deliveries[id] = d
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *AMQPConsumer) onDelivery(track telegraf.DeliveryInfo) bool {
|
||||
delivery, ok := a.deliveries[track.ID()]
|
||||
if !ok {
|
||||
// Added by a previous connection
|
||||
return false
|
||||
}
|
||||
|
||||
if track.Delivered() {
|
||||
err := delivery.Ack(false)
|
||||
if err != nil {
|
||||
log.Printf("E! [inputs.amqp_consumer] Unable to ack written delivery: %d: %v",
|
||||
delivery.DeliveryTag, err)
|
||||
a.conn.Close()
|
||||
}
|
||||
} else {
|
||||
err := delivery.Reject(false)
|
||||
if err != nil {
|
||||
log.Printf("E! [inputs.amqp_consumer] Unable to reject failed delivery: %d: %v",
|
||||
delivery.DeliveryTag, err)
|
||||
a.conn.Close()
|
||||
}
|
||||
}
|
||||
|
||||
delete(a.deliveries, track.ID())
|
||||
return true
|
||||
}
|
||||
|
||||
func (a *AMQPConsumer) Stop() {
|
||||
a.cancel()
|
||||
a.wg.Wait()
|
||||
err := a.conn.Close()
|
||||
if err != nil && err != amqp.ErrClosed {
|
||||
log.Printf("E! Error closing AMQP connection: %s", err)
|
||||
log.Printf("E! [inputs.amqp_consumer] Error closing AMQP connection: %s", err)
|
||||
return
|
||||
}
|
||||
a.wg.Wait()
|
||||
log.Println("I! Stopped AMQP service")
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("amqp_consumer", func() telegraf.Input {
|
||||
return &AMQPConsumer{
|
||||
URL: DefaultBroker,
|
||||
AuthMethod: DefaultAuthMethod,
|
||||
ExchangeType: DefaultExchangeType,
|
||||
ExchangeDurability: DefaultExchangeDurability,
|
||||
QueueDurability: DefaultQueueDurability,
|
||||
PrefetchCount: DefaultPrefetchCount,
|
||||
URL: DefaultBroker,
|
||||
AuthMethod: DefaultAuthMethod,
|
||||
ExchangeType: DefaultExchangeType,
|
||||
ExchangeDurability: DefaultExchangeDurability,
|
||||
QueueDurability: DefaultQueueDurability,
|
||||
PrefetchCount: DefaultPrefetchCount,
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user