Fix some inputs panic will lead to the telegraf exit
closes #585 closes #584
This commit is contained in:
parent
47ea2d5fb4
commit
cf568487c8
|
@ -40,6 +40,7 @@ specifying a docker endpoint to get metrics from.
|
||||||
- [#440](https://github.com/influxdata/telegraf/issues/440): Don't query filtered devices for disk stats.
|
- [#440](https://github.com/influxdata/telegraf/issues/440): Don't query filtered devices for disk stats.
|
||||||
- [#463](https://github.com/influxdata/telegraf/issues/463): Docker plugin not working on AWS Linux
|
- [#463](https://github.com/influxdata/telegraf/issues/463): Docker plugin not working on AWS Linux
|
||||||
- [#568](https://github.com/influxdata/telegraf/issues/568): Multiple output race condition.
|
- [#568](https://github.com/influxdata/telegraf/issues/568): Multiple output race condition.
|
||||||
|
- [#585](https://github.com/influxdata/telegraf/pull/585): Log stack trace and continue on Telegraf panic. Thanks @wutaizeng!
|
||||||
|
|
||||||
## v0.10.0 [2016-01-12]
|
## v0.10.0 [2016-01-12]
|
||||||
|
|
||||||
|
|
16
agent.go
16
agent.go
|
@ -7,6 +7,7 @@ import (
|
||||||
"math/big"
|
"math/big"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -87,6 +88,18 @@ func (a *Agent) Close() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func panicRecover(input *models.RunningInput) {
|
||||||
|
if err := recover(); err != nil {
|
||||||
|
trace := make([]byte, 2048)
|
||||||
|
runtime.Stack(trace, true)
|
||||||
|
log.Printf("FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
|
||||||
|
input.Name, err, trace)
|
||||||
|
log.Println("PLEASE REPORT THIS PANIC ON GITHUB with " +
|
||||||
|
"stack trace, configuration, and OS information: " +
|
||||||
|
"https://github.com/influxdata/telegraf/issues/new")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// gatherParallel runs the inputs that are using the same reporting interval
|
// gatherParallel runs the inputs that are using the same reporting interval
|
||||||
// as the telegraf agent.
|
// as the telegraf agent.
|
||||||
func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
|
func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
|
||||||
|
@ -103,6 +116,7 @@ func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
counter++
|
counter++
|
||||||
go func(input *models.RunningInput) {
|
go func(input *models.RunningInput) {
|
||||||
|
defer panicRecover(input)
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
acc := NewAccumulator(input.Config, pointChan)
|
acc := NewAccumulator(input.Config, pointChan)
|
||||||
|
@ -148,6 +162,8 @@ func (a *Agent) gatherSeparate(
|
||||||
input *models.RunningInput,
|
input *models.RunningInput,
|
||||||
pointChan chan *client.Point,
|
pointChan chan *client.Point,
|
||||||
) error {
|
) error {
|
||||||
|
defer panicRecover(input)
|
||||||
|
|
||||||
ticker := time.NewTicker(input.Config.Interval)
|
ticker := time.NewTicker(input.Config.Interval)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
|
|
Loading…
Reference in New Issue