Recover from prometheus multiple handler panic

closes #1339
This commit is contained in:
Cameron Sparr 2016-06-10 17:18:38 +01:00
parent a6365a6086
commit f62c493c77
2 changed files with 27 additions and 18 deletions

View File

@ -40,6 +40,7 @@ should now look like:
- [#1399](https://github.com/influxdata/telegraf/issues/1399): Add `read_repairs` statistics to riak plugin. - [#1399](https://github.com/influxdata/telegraf/issues/1399): Add `read_repairs` statistics to riak plugin.
- [#1405](https://github.com/influxdata/telegraf/issues/1405): Fix memory/connection leak in prometheus input plugin. - [#1405](https://github.com/influxdata/telegraf/issues/1405): Fix memory/connection leak in prometheus input plugin.
- [#1378](https://github.com/influxdata/telegraf/issues/1378): Trim BOM from config file for Windows support. - [#1378](https://github.com/influxdata/telegraf/issues/1378): Trim BOM from config file for Windows support.
- [#1339](https://github.com/influxdata/telegraf/issues/1339): Prometheus client output panic on service reload.
## v1.0 beta 2 [2016-06-21] ## v1.0 beta 2 [2016-06-21]

View File

@ -26,7 +26,6 @@ var (
type PrometheusClient struct { type PrometheusClient struct {
Listen string Listen string
metrics map[string]*prometheus.UntypedVec
} }
var sampleConfig = ` var sampleConfig = `
@ -35,6 +34,14 @@ var sampleConfig = `
` `
func (p *PrometheusClient) Start() error { func (p *PrometheusClient) Start() error {
defer func() {
if r := recover(); r != nil {
// recovering from panic here because there is no way to stop a
// running http go server except by a kill signal. Since the server
// does not stop on SIGHUP, Start() will panic when the process
// is reloaded.
}
}()
if p.Listen == "" { if p.Listen == "" {
p.Listen = "localhost:9126" p.Listen = "localhost:9126"
} }
@ -44,7 +51,6 @@ func (p *PrometheusClient) Start() error {
Addr: p.Listen, Addr: p.Listen,
} }
p.metrics = make(map[string]*prometheus.UntypedVec)
go server.ListenAndServe() go server.ListenAndServe()
return nil return nil
} }
@ -118,24 +124,26 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error {
continue continue
} }
// Create a new metric if it hasn't been created yet. mVec := prometheus.NewUntypedVec(
if _, ok := p.metrics[mname]; !ok {
p.metrics[mname] = prometheus.NewUntypedVec(
prometheus.UntypedOpts{ prometheus.UntypedOpts{
Name: mname, Name: mname,
Help: "Telegraf collected metric", Help: "Telegraf collected metric",
}, },
labels, labels,
) )
if err := prometheus.Register(p.metrics[mname]); err != nil { collector, err := prometheus.RegisterOrGet(mVec)
if err != nil {
log.Printf("prometheus_client: Metric failed to register with prometheus, %s", err) log.Printf("prometheus_client: Metric failed to register with prometheus, %s", err)
continue continue
} }
mVec, ok := collector.(*prometheus.UntypedVec)
if !ok {
continue
} }
switch val := val.(type) { switch val := val.(type) {
case int64: case int64:
m, err := p.metrics[mname].GetMetricWith(l) m, err := mVec.GetMetricWith(l)
if err != nil { if err != nil {
log.Printf("ERROR Getting metric in Prometheus output, "+ log.Printf("ERROR Getting metric in Prometheus output, "+
"key: %s, labels: %v,\nerr: %s\n", "key: %s, labels: %v,\nerr: %s\n",
@ -144,7 +152,7 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error {
} }
m.Set(float64(val)) m.Set(float64(val))
case float64: case float64:
m, err := p.metrics[mname].GetMetricWith(l) m, err := mVec.GetMetricWith(l)
if err != nil { if err != nil {
log.Printf("ERROR Getting metric in Prometheus output, "+ log.Printf("ERROR Getting metric in Prometheus output, "+
"key: %s, labels: %v,\nerr: %s\n", "key: %s, labels: %v,\nerr: %s\n",