diff --git a/CHANGELOG.md b/CHANGELOG.md index ce2a883e1..f614f4422 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ should now look like: - [#1399](https://github.com/influxdata/telegraf/issues/1399): Add `read_repairs` statistics to riak plugin. - [#1405](https://github.com/influxdata/telegraf/issues/1405): Fix memory/connection leak in prometheus input plugin. - [#1378](https://github.com/influxdata/telegraf/issues/1378): Trim BOM from config file for Windows support. +- [#1339](https://github.com/influxdata/telegraf/issues/1339): Prometheus client output panic on service reload. ## v1.0 beta 2 [2016-06-21] diff --git a/plugins/outputs/prometheus_client/prometheus_client.go b/plugins/outputs/prometheus_client/prometheus_client.go index d5e3f1ced..804ae1fad 100644 --- a/plugins/outputs/prometheus_client/prometheus_client.go +++ b/plugins/outputs/prometheus_client/prometheus_client.go @@ -25,8 +25,7 @@ var ( ) type PrometheusClient struct { - Listen string - metrics map[string]*prometheus.UntypedVec + Listen string } var sampleConfig = ` @@ -35,6 +34,14 @@ var sampleConfig = ` ` func (p *PrometheusClient) Start() error { + defer func() { + if r := recover(); r != nil { + // recovering from panic here because there is no way to stop a + // running http go server except by a kill signal. Since the server + // does not stop on SIGHUP, Start() will panic when the process + // is reloaded. + } + }() if p.Listen == "" { p.Listen = "localhost:9126" } @@ -44,7 +51,6 @@ func (p *PrometheusClient) Start() error { Addr: p.Listen, } - p.metrics = make(map[string]*prometheus.UntypedVec) go server.ListenAndServe() return nil } @@ -118,24 +124,26 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { continue } - // Create a new metric if it hasn't been created yet. - if _, ok := p.metrics[mname]; !ok { - p.metrics[mname] = prometheus.NewUntypedVec( - prometheus.UntypedOpts{ - Name: mname, - Help: "Telegraf collected metric", - }, - labels, - ) - if err := prometheus.Register(p.metrics[mname]); err != nil { - log.Printf("prometheus_client: Metric failed to register with prometheus, %s", err) - continue - } + mVec := prometheus.NewUntypedVec( + prometheus.UntypedOpts{ + Name: mname, + Help: "Telegraf collected metric", + }, + labels, + ) + collector, err := prometheus.RegisterOrGet(mVec) + if err != nil { + log.Printf("prometheus_client: Metric failed to register with prometheus, %s", err) + continue + } + mVec, ok := collector.(*prometheus.UntypedVec) + if !ok { + continue } switch val := val.(type) { case int64: - m, err := p.metrics[mname].GetMetricWith(l) + m, err := mVec.GetMetricWith(l) if err != nil { log.Printf("ERROR Getting metric in Prometheus output, "+ "key: %s, labels: %v,\nerr: %s\n", @@ -144,7 +152,7 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { } m.Set(float64(val)) case float64: - m, err := p.metrics[mname].GetMetricWith(l) + m, err := mVec.GetMetricWith(l) if err != nil { log.Printf("ERROR Getting metric in Prometheus output, "+ "key: %s, labels: %v,\nerr: %s\n",