Fix problem with graphite talking to closed connections (#2171)

We were having problems with telegraf talking to
carbon-relay-ng using the graphite output. When
the carbon-relay-ng server restarted the connection
the telegraf side would go into CLOSE_WAIT but telegraf
would continue to send statistics through the connection.

Reading around it seems you need to a read from the connection
and see a EOF error. We've implemented this and added a test
that replicates roughly the error we were having.

Pair: @whpearson @joshmyers
This commit is contained in:
Will Pearson
2017-01-24 20:50:29 +00:00
committed by Cameron Sparr
parent 29933d0835
commit e1faf06974
2 changed files with 65 additions and 7 deletions

View File

@@ -43,7 +43,8 @@ func TestGraphiteOK(t *testing.T) {
var wg sync.WaitGroup
// Start TCP server
wg.Add(1)
go TCPServer(t, &wg)
t.Log("Starting server")
go TCPServer1(t, &wg)
// Give the fake graphite TCP server some time to start:
time.Sleep(time.Millisecond * 100)
@@ -51,6 +52,7 @@ func TestGraphiteOK(t *testing.T) {
g := Graphite{
Prefix: "my.prefix",
}
// Init metrics
m1, _ := metric.New(
"mymeasurement",
@@ -72,29 +74,58 @@ func TestGraphiteOK(t *testing.T) {
)
// Prepare point list
metrics := []telegraf.Metric{m1, m2, m3}
metrics := []telegraf.Metric{m1}
metrics2 := []telegraf.Metric{m2, m3}
err1 := g.Connect()
require.NoError(t, err1)
// Send Data
t.Log("Send first data")
err2 := g.Write(metrics)
require.NoError(t, err2)
// Waiting TCPserver
wg.Wait()
t.Log("Finished Waiting for first data")
var wg2 sync.WaitGroup
// Start TCP server
time.Sleep(time.Millisecond * 100)
wg2.Add(1)
go TCPServer2(t, &wg2)
time.Sleep(time.Millisecond * 100)
//Write but expect an error, but reconnect
g.Write(metrics2)
err3 := g.Write(metrics2)
t.Log("Finished writing second data, it should have failed")
//Actually write the new metrics
require.NoError(t, err3)
t.Log("Finished writing third data")
wg2.Wait()
g.Close()
}
func TCPServer(t *testing.T, wg *sync.WaitGroup) {
tcpServer, _ := net.Listen("tcp", "127.0.0.1:2003")
func TCPServer1(t *testing.T, wg *sync.WaitGroup) {
defer wg.Done()
conn, _ := tcpServer.Accept()
tcpServer, _ := net.Listen("tcp", "127.0.0.1:2003")
conn, _ := (tcpServer).Accept()
reader := bufio.NewReader(conn)
tp := textproto.NewReader(reader)
data1, _ := tp.ReadLine()
assert.Equal(t, "my.prefix.192_168_0_1.mymeasurement.myfield 3.14 1289430000", data1)
conn.Close()
tcpServer.Close()
}
func TCPServer2(t *testing.T, wg *sync.WaitGroup) {
defer wg.Done()
tcpServer, _ := net.Listen("tcp", "127.0.0.1:2003")
conn2, _ := (tcpServer).Accept()
reader := bufio.NewReader(conn2)
tp := textproto.NewReader(reader)
data2, _ := tp.ReadLine()
assert.Equal(t, "my.prefix.192_168_0_1.mymeasurement 3.14 1289430000", data2)
data3, _ := tp.ReadLine()
assert.Equal(t, "my.prefix.192_168_0_1.my_measurement 3.14 1289430000", data3)
conn.Close()
conn2.Close()
tcpServer.Close()
}