Fix amqp output block on write if disconnected (#2727)

fixes #2603
This commit is contained in:
Daniel Nelson 2017-04-27 11:10:30 -07:00 committed by GitHub
parent 153304d92b
commit 5fe5c46c6d
3 changed files with 62 additions and 15 deletions

View File

@ -114,6 +114,7 @@ be deprecated eventually.
- [#2450](https://github.com/influxdata/telegraf/issues/2450): Network statistics not collected when system has alias interfaces - [#2450](https://github.com/influxdata/telegraf/issues/2450): Network statistics not collected when system has alias interfaces
- [#1911](https://github.com/influxdata/telegraf/issues/1911): Sysstat plugin needs LANG=C or similar locale - [#1911](https://github.com/influxdata/telegraf/issues/1911): Sysstat plugin needs LANG=C or similar locale
- [#2528](https://github.com/influxdata/telegraf/issues/2528): File output closes standard streams on reload. - [#2528](https://github.com/influxdata/telegraf/issues/2528): File output closes standard streams on reload.
- [#2603](https://github.com/influxdata/telegraf/issues/2603): AMQP output disconnect blocks all outputs
## v1.2.1 [2017-02-01] ## v1.2.1 [2017-02-01]

View File

@ -35,6 +35,10 @@ For an introduction to AMQP see:
## InfluxDB database ## InfluxDB database
# database = "telegraf" # database = "telegraf"
## Write timeout, formatted as a string. If not provided, will default
## to 5s. 0s means no timeout (not recommended).
# timeout = "5s"
## Optional SSL Config ## Optional SSL Config
# ssl_ca = "/etc/telegraf/ca.pem" # ssl_ca = "/etc/telegraf/ca.pem"
# ssl_cert = "/etc/telegraf/cert.pem" # ssl_cert = "/etc/telegraf/cert.pem"

View File

@ -3,6 +3,7 @@ package amqp
import ( import (
"fmt" "fmt"
"log" "log"
"net"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -15,6 +16,12 @@ import (
"github.com/streadway/amqp" "github.com/streadway/amqp"
) )
type client struct {
conn *amqp.Connection
channel *amqp.Channel
headers amqp.Table
}
type AMQP struct { type AMQP struct {
// AMQP brokers to send metrics to // AMQP brokers to send metrics to
URL string URL string
@ -30,6 +37,8 @@ type AMQP struct {
RetentionPolicy string RetentionPolicy string
// InfluxDB precision (DEPRECATED) // InfluxDB precision (DEPRECATED)
Precision string Precision string
// Connection timeout
Timeout internal.Duration
// Path to CA file // Path to CA file
SSLCA string `toml:"ssl_ca"` SSLCA string `toml:"ssl_ca"`
@ -40,10 +49,8 @@ type AMQP struct {
// Use SSL but skip chain & host verification // Use SSL but skip chain & host verification
InsecureSkipVerify bool InsecureSkipVerify bool
conn *amqp.Connection
channel *amqp.Channel
sync.Mutex sync.Mutex
headers amqp.Table c *client
serializer serializers.Serializer serializer serializers.Serializer
} }
@ -81,6 +88,10 @@ var sampleConfig = `
## InfluxDB database ## InfluxDB database
# database = "telegraf" # database = "telegraf"
## Write timeout, formatted as a string. If not provided, will default
## to 5s. 0s means no timeout (not recommended).
# timeout = "5s"
## Optional SSL Config ## Optional SSL Config
# ssl_ca = "/etc/telegraf/ca.pem" # ssl_ca = "/etc/telegraf/ca.pem"
# ssl_cert = "/etc/telegraf/cert.pem" # ssl_cert = "/etc/telegraf/cert.pem"
@ -100,10 +111,7 @@ func (a *AMQP) SetSerializer(serializer serializers.Serializer) {
} }
func (q *AMQP) Connect() error { func (q *AMQP) Connect() error {
q.Lock() headers := amqp.Table{
defer q.Unlock()
q.headers = amqp.Table{
"database": q.Database, "database": q.Database,
"retention_policy": q.RetentionPolicy, "retention_policy": q.RetentionPolicy,
} }
@ -126,13 +134,15 @@ func (q *AMQP) Connect() error {
amqpConf := amqp.Config{ amqpConf := amqp.Config{
TLSClientConfig: tls, TLSClientConfig: tls,
SASL: sasl, // if nil, it will be PLAIN SASL: sasl, // if nil, it will be PLAIN
Dial: func(network, addr string) (net.Conn, error) {
return net.DialTimeout(network, addr, q.Timeout.Duration)
},
} }
connection, err = amqp.DialConfig(q.URL, amqpConf) connection, err = amqp.DialConfig(q.URL, amqpConf)
if err != nil { if err != nil {
return err return err
} }
q.conn = connection
channel, err := connection.Channel() channel, err := connection.Channel()
if err != nil { if err != nil {
@ -151,25 +161,38 @@ func (q *AMQP) Connect() error {
if err != nil { if err != nil {
return fmt.Errorf("Failed to declare an exchange: %s", err) return fmt.Errorf("Failed to declare an exchange: %s", err)
} }
q.channel = channel
q.setClient(&client{
conn: connection,
channel: channel,
headers: headers,
})
go func() { go func() {
err := <-connection.NotifyClose(make(chan *amqp.Error)) err := <-connection.NotifyClose(make(chan *amqp.Error))
if err == nil { if err == nil {
return return
} }
q.setClient(nil)
log.Printf("I! Closing: %s", err) log.Printf("I! Closing: %s", err)
log.Printf("I! Trying to reconnect") log.Printf("I! Trying to reconnect")
for err := q.Connect(); err != nil; err = q.Connect() { for err := q.Connect(); err != nil; err = q.Connect() {
log.Println("E! ", err.Error()) log.Println("E! ", err.Error())
time.Sleep(10 * time.Second) time.Sleep(10 * time.Second)
} }
}() }()
return nil return nil
} }
func (q *AMQP) Close() error { func (q *AMQP) Close() error {
err := q.conn.Close() c := q.getClient()
if c == nil {
return nil
}
err := c.conn.Close()
if err != nil && err != amqp.ErrClosed { if err != nil && err != amqp.ErrClosed {
log.Printf("E! Error closing AMQP connection: %s", err) log.Printf("E! Error closing AMQP connection: %s", err)
return err return err
@ -186,11 +209,15 @@ func (q *AMQP) Description() string {
} }
func (q *AMQP) Write(metrics []telegraf.Metric) error { func (q *AMQP) Write(metrics []telegraf.Metric) error {
q.Lock()
defer q.Unlock()
if len(metrics) == 0 { if len(metrics) == 0 {
return nil return nil
} }
c := q.getClient()
if c == nil {
return fmt.Errorf("connection is not open")
}
outbuf := make(map[string][]byte) outbuf := make(map[string][]byte)
for _, metric := range metrics { for _, metric := range metrics {
@ -210,13 +237,15 @@ func (q *AMQP) Write(metrics []telegraf.Metric) error {
} }
for key, buf := range outbuf { for key, buf := range outbuf {
err := q.channel.Publish( // Note that since the channel is not in confirm mode, the absence of
// an error does not indicate successful delivery.
err := c.channel.Publish(
q.Exchange, // exchange q.Exchange, // exchange
key, // routing key key, // routing key
false, // mandatory false, // mandatory
false, // immediate false, // immediate
amqp.Publishing{ amqp.Publishing{
Headers: q.headers, Headers: c.headers,
ContentType: "text/plain", ContentType: "text/plain",
Body: buf, Body: buf,
}) })
@ -227,12 +256,25 @@ func (q *AMQP) Write(metrics []telegraf.Metric) error {
return nil return nil
} }
func (q *AMQP) getClient() *client {
q.Lock()
defer q.Unlock()
return q.c
}
func (q *AMQP) setClient(c *client) {
q.Lock()
q.c = c
q.Unlock()
}
func init() { func init() {
outputs.Add("amqp", func() telegraf.Output { outputs.Add("amqp", func() telegraf.Output {
return &AMQP{ return &AMQP{
AuthMethod: DefaultAuthMethod, AuthMethod: DefaultAuthMethod,
Database: DefaultDatabase, Database: DefaultDatabase,
RetentionPolicy: DefaultRetentionPolicy, RetentionPolicy: DefaultRetentionPolicy,
Timeout: internal.Duration{Duration: time.Second * 5},
} }
}) })
} }