From dfddcc514656592a5232bdeb3a98f01811a4a400 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Sat, 21 Jan 2017 15:37:53 -0800 Subject: [PATCH] Fix prometheus_client reload behavior fixes #2282 --- CHANGELOG.md | 1 + agent/agent.go | 1 + circle.yml | 6 ++-- .../prometheus_client/prometheus_client.go | 30 +++++++++---------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 283c5ccba..2aaf94adb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ be deprecated eventually. - [#2356](https://github.com/influxdata/telegraf/issues/2356): cpu input panic when /proc/stat is empty. - [#2341](https://github.com/influxdata/telegraf/issues/2341): telegraf swallowing panics in --test mode. - [#2358](https://github.com/influxdata/telegraf/pull/2358): Create pidfile with 644 permissions & defer file deletion. +- [#2282](https://github.com/influxdata/telegraf/issues/2282): Reloading telegraf freezes prometheus output. ## v1.2.1 [2017-02-01] diff --git a/agent/agent.go b/agent/agent.go index 7909a4c8a..e82caf148 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -398,5 +398,6 @@ func (a *Agent) Run(shutdown chan struct{}) error { } wg.Wait() + a.Close() return nil } diff --git a/circle.yml b/circle.yml index c237040a2..a7d15368d 100644 --- a/circle.yml +++ b/circle.yml @@ -4,9 +4,9 @@ machine: post: - sudo service zookeeper stop - go version - - go version | grep 1.7.5 || sudo rm -rf /usr/local/go - - wget https://storage.googleapis.com/golang/go1.7.5.linux-amd64.tar.gz - - sudo tar -C /usr/local -xzf go1.7.5.linux-amd64.tar.gz + - sudo rm -rf /usr/local/go + - wget https://storage.googleapis.com/golang/go1.8.linux-amd64.tar.gz + - sudo tar -C /usr/local -xzf go1.8.linux-amd64.tar.gz - go version dependencies: diff --git a/plugins/outputs/prometheus_client/prometheus_client.go b/plugins/outputs/prometheus_client/prometheus_client.go index e86a0a526..8c52b3358 100644 --- a/plugins/outputs/prometheus_client/prometheus_client.go +++ b/plugins/outputs/prometheus_client/prometheus_client.go @@ -1,6 +1,7 @@ package prometheus_client import ( + "context" "fmt" "log" "net/http" @@ -24,6 +25,7 @@ type MetricWithExpiration struct { type PrometheusClient struct { Listen string ExpirationInterval internal.Duration `toml:"expiration_interval"` + server *http.Server metrics map[string]*MetricWithExpiration @@ -41,30 +43,25 @@ var sampleConfig = ` func (p *PrometheusClient) Start() error { p.metrics = make(map[string]*MetricWithExpiration) prometheus.Register(p) - defer func() { - if r := recover(); r != nil { - // recovering from panic here because there is no way to stop a - // running http go server except by a kill signal. Since the server - // does not stop on SIGHUP, Start() will panic when the process - // is reloaded. - } - }() + if p.Listen == "" { p.Listen = "localhost:9126" } - http.Handle("/metrics", prometheus.Handler()) - server := &http.Server{ - Addr: p.Listen, + mux := http.NewServeMux() + mux.Handle("/metrics", prometheus.Handler()) + + p.server = &http.Server{ + Addr: p.Listen, + Handler: mux, } - go server.ListenAndServe() + go p.server.ListenAndServe() return nil } func (p *PrometheusClient) Stop() { - // TODO: Use a listener for http.Server that counts active connections - // that can be stopped and closed gracefully + // plugin gets cleaned up in Close() already. } func (p *PrometheusClient) Connect() error { @@ -73,8 +70,9 @@ func (p *PrometheusClient) Connect() error { } func (p *PrometheusClient) Close() error { - // This service output does not need to close any of its connections - return nil + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + return p.server.Shutdown(ctx) } func (p *PrometheusClient) SampleConfig() string {