Fix some inputs panic will lead to the telegraf exit

closes #585
closes #584
This commit is contained in:
Wu Taizeng 2016-01-26 16:19:34 +08:00 committed by Cameron Sparr
parent 47ea2d5fb4
commit cf568487c8
2 changed files with 17 additions and 0 deletions

View File

@ -40,6 +40,7 @@ specifying a docker endpoint to get metrics from.
- [#440](https://github.com/influxdata/telegraf/issues/440): Don't query filtered devices for disk stats.
- [#463](https://github.com/influxdata/telegraf/issues/463): Docker plugin not working on AWS Linux
- [#568](https://github.com/influxdata/telegraf/issues/568): Multiple output race condition.
- [#585](https://github.com/influxdata/telegraf/pull/585): Log stack trace and continue on Telegraf panic. Thanks @wutaizeng!
## v0.10.0 [2016-01-12]

View File

@ -7,6 +7,7 @@ import (
"math/big"
"math/rand"
"os"
"runtime"
"sync"
"time"
@ -87,6 +88,18 @@ func (a *Agent) Close() error {
return err
}
func panicRecover(input *models.RunningInput) {
if err := recover(); err != nil {
trace := make([]byte, 2048)
runtime.Stack(trace, true)
log.Printf("FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
input.Name, err, trace)
log.Println("PLEASE REPORT THIS PANIC ON GITHUB with " +
"stack trace, configuration, and OS information: " +
"https://github.com/influxdata/telegraf/issues/new")
}
}
// gatherParallel runs the inputs that are using the same reporting interval
// as the telegraf agent.
func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
@ -103,6 +116,7 @@ func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
wg.Add(1)
counter++
go func(input *models.RunningInput) {
defer panicRecover(input)
defer wg.Done()
acc := NewAccumulator(input.Config, pointChan)
@ -148,6 +162,8 @@ func (a *Agent) gatherSeparate(
input *models.RunningInput,
pointChan chan *client.Point,
) error {
defer panicRecover(input)
ticker := time.NewTicker(input.Config.Interval)
for {