Remove outputs blocking inputs when output is slow (#4938)
This commit is contained in:
@@ -1,25 +1,31 @@
|
||||
package mqtt_consumer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/eclipse/paho.mqtt.golang"
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/internal/tls"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
|
||||
"github.com/eclipse/paho.mqtt.golang"
|
||||
)
|
||||
|
||||
// 30 Seconds is the default used by paho.mqtt.golang
|
||||
var defaultConnectionTimeout = internal.Duration{Duration: 30 * time.Second}
|
||||
var (
|
||||
// 30 Seconds is the default used by paho.mqtt.golang
|
||||
defaultConnectionTimeout = internal.Duration{Duration: 30 * time.Second}
|
||||
|
||||
defaultMaxUndeliveredMessages = 1000
|
||||
)
|
||||
|
||||
type ConnectionState int
|
||||
type empty struct{}
|
||||
type semaphore chan empty
|
||||
|
||||
const (
|
||||
Disconnected ConnectionState = iota
|
||||
@@ -28,12 +34,13 @@ const (
|
||||
)
|
||||
|
||||
type MQTTConsumer struct {
|
||||
Servers []string
|
||||
Topics []string
|
||||
Username string
|
||||
Password string
|
||||
QoS int `toml:"qos"`
|
||||
ConnectionTimeout internal.Duration `toml:"connection_timeout"`
|
||||
Servers []string
|
||||
Topics []string
|
||||
Username string
|
||||
Password string
|
||||
QoS int `toml:"qos"`
|
||||
ConnectionTimeout internal.Duration `toml:"connection_timeout"`
|
||||
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||
|
||||
parser parsers.Parser
|
||||
|
||||
@@ -45,9 +52,14 @@ type MQTTConsumer struct {
|
||||
tls.ClientConfig
|
||||
|
||||
client mqtt.Client
|
||||
acc telegraf.Accumulator
|
||||
acc telegraf.TrackingAccumulator
|
||||
state ConnectionState
|
||||
subscribed bool
|
||||
sem semaphore
|
||||
messages map[telegraf.TrackingID]bool
|
||||
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
var sampleConfig = `
|
||||
@@ -67,6 +79,16 @@ var sampleConfig = `
|
||||
## Connection timeout for initial connection in seconds
|
||||
connection_timeout = "30s"
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Topics to subscribe to
|
||||
topics = [
|
||||
"telegraf/host01/cpu",
|
||||
@@ -118,7 +140,6 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
|
||||
return errors.New("persistent_session requires client_id")
|
||||
}
|
||||
|
||||
m.acc = acc
|
||||
if m.QoS > 2 || m.QoS < 0 {
|
||||
return fmt.Errorf("qos value must be 0, 1, or 2: %d", m.QoS)
|
||||
}
|
||||
@@ -127,6 +148,9 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
|
||||
return fmt.Errorf("connection_timeout must be greater than 1s: %s", m.ConnectionTimeout.Duration)
|
||||
}
|
||||
|
||||
m.acc = acc.WithTracking(m.MaxUndeliveredMessages)
|
||||
m.ctx, m.cancel = context.WithCancel(context.Background())
|
||||
|
||||
opts, err := m.createOpts()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -146,8 +170,10 @@ func (m *MQTTConsumer) connect() error {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Printf("I! [inputs.mqtt_consumer]: connected %v", m.Servers)
|
||||
log.Printf("I! [inputs.mqtt_consumer] Connected %v", m.Servers)
|
||||
m.state = Connected
|
||||
m.sem = make(semaphore, m.MaxUndeliveredMessages)
|
||||
m.messages = make(map[telegraf.TrackingID]bool)
|
||||
|
||||
// Only subscribe on first connection when using persistent sessions. On
|
||||
// subsequent connections the subscriptions should be stored in the
|
||||
@@ -172,38 +198,64 @@ func (m *MQTTConsumer) connect() error {
|
||||
|
||||
func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) {
|
||||
m.acc.AddError(fmt.Errorf("connection lost: %v", err))
|
||||
log.Printf("D! [inputs.mqtt_consumer]: disconnected %v", m.Servers)
|
||||
log.Printf("D! [inputs.mqtt_consumer] Disconnected %v", m.Servers)
|
||||
m.state = Disconnected
|
||||
return
|
||||
}
|
||||
|
||||
func (m *MQTTConsumer) recvMessage(c mqtt.Client, msg mqtt.Message) {
|
||||
topic := msg.Topic()
|
||||
for {
|
||||
select {
|
||||
case track := <-m.acc.Delivered():
|
||||
_, ok := m.messages[track.ID()]
|
||||
if !ok {
|
||||
// Added by a previous connection
|
||||
continue
|
||||
}
|
||||
<-m.sem
|
||||
// No ack, MQTT does not support durable handling
|
||||
delete(m.messages, track.ID())
|
||||
case m.sem <- empty{}:
|
||||
err := m.onMessage(m.acc, msg)
|
||||
if err != nil {
|
||||
m.acc.AddError(err)
|
||||
<-m.sem
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MQTTConsumer) onMessage(acc telegraf.TrackingAccumulator, msg mqtt.Message) error {
|
||||
metrics, err := m.parser.Parse(msg.Payload())
|
||||
if err != nil {
|
||||
m.acc.AddError(err)
|
||||
return err
|
||||
}
|
||||
|
||||
topic := msg.Topic()
|
||||
for _, metric := range metrics {
|
||||
tags := metric.Tags()
|
||||
tags["topic"] = topic
|
||||
m.acc.AddFields(metric.Name(), metric.Fields(), tags, metric.Time())
|
||||
metric.AddTag("topic", topic)
|
||||
}
|
||||
|
||||
id := acc.AddTrackingMetricGroup(metrics)
|
||||
m.messages[id] = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MQTTConsumer) Stop() {
|
||||
if m.state == Connected {
|
||||
log.Printf("D! [inputs.mqtt_consumer]: disconnecting %v", m.Servers)
|
||||
log.Printf("D! [inputs.mqtt_consumer] Disconnecting %v", m.Servers)
|
||||
m.client.Disconnect(200)
|
||||
log.Printf("D! [inputs.mqtt_consumer]: disconnected %v", m.Servers)
|
||||
log.Printf("D! [inputs.mqtt_consumer] Disconnected %v", m.Servers)
|
||||
m.state = Disconnected
|
||||
}
|
||||
m.cancel()
|
||||
}
|
||||
|
||||
func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error {
|
||||
if m.state == Disconnected {
|
||||
m.state = Connecting
|
||||
log.Printf("D! [inputs.mqtt_consumer]: connecting %v", m.Servers)
|
||||
log.Printf("D! [inputs.mqtt_consumer] Connecting %v", m.Servers)
|
||||
m.connect()
|
||||
}
|
||||
|
||||
@@ -246,7 +298,7 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
|
||||
for _, server := range m.Servers {
|
||||
// Preserve support for host:port style servers; deprecated in Telegraf 1.4.4
|
||||
if !strings.Contains(server, "://") {
|
||||
log.Printf("W! [inputs.mqtt_consumer] server %q should be updated to use `scheme://host:port` format", server)
|
||||
log.Printf("W! [inputs.mqtt_consumer] Server %q should be updated to use `scheme://host:port` format", server)
|
||||
if tlsCfg == nil {
|
||||
server = "tcp://" + server
|
||||
} else {
|
||||
@@ -267,8 +319,9 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
|
||||
func init() {
|
||||
inputs.Add("mqtt_consumer", func() telegraf.Input {
|
||||
return &MQTTConsumer{
|
||||
ConnectionTimeout: defaultConnectionTimeout,
|
||||
state: Disconnected,
|
||||
ConnectionTimeout: defaultConnectionTimeout,
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
state: Disconnected,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user