Fix prometheus_client reload behavior

fixes #2282
This commit is contained in:
Cameron Sparr 2017-01-21 15:37:53 -08:00
parent e0a36c38df
commit dfddcc5146
4 changed files with 19 additions and 19 deletions

View File

@ -60,6 +60,7 @@ be deprecated eventually.
- [#2356](https://github.com/influxdata/telegraf/issues/2356): cpu input panic when /proc/stat is empty. - [#2356](https://github.com/influxdata/telegraf/issues/2356): cpu input panic when /proc/stat is empty.
- [#2341](https://github.com/influxdata/telegraf/issues/2341): telegraf swallowing panics in --test mode. - [#2341](https://github.com/influxdata/telegraf/issues/2341): telegraf swallowing panics in --test mode.
- [#2358](https://github.com/influxdata/telegraf/pull/2358): Create pidfile with 644 permissions & defer file deletion. - [#2358](https://github.com/influxdata/telegraf/pull/2358): Create pidfile with 644 permissions & defer file deletion.
- [#2282](https://github.com/influxdata/telegraf/issues/2282): Reloading telegraf freezes prometheus output.
## v1.2.1 [2017-02-01] ## v1.2.1 [2017-02-01]

View File

@ -398,5 +398,6 @@ func (a *Agent) Run(shutdown chan struct{}) error {
} }
wg.Wait() wg.Wait()
a.Close()
return nil return nil
} }

View File

@ -4,9 +4,9 @@ machine:
post: post:
- sudo service zookeeper stop - sudo service zookeeper stop
- go version - go version
- go version | grep 1.7.5 || sudo rm -rf /usr/local/go - sudo rm -rf /usr/local/go
- wget https://storage.googleapis.com/golang/go1.7.5.linux-amd64.tar.gz - wget https://storage.googleapis.com/golang/go1.8.linux-amd64.tar.gz
- sudo tar -C /usr/local -xzf go1.7.5.linux-amd64.tar.gz - sudo tar -C /usr/local -xzf go1.8.linux-amd64.tar.gz
- go version - go version
dependencies: dependencies:

View File

@ -1,6 +1,7 @@
package prometheus_client package prometheus_client
import ( import (
"context"
"fmt" "fmt"
"log" "log"
"net/http" "net/http"
@ -24,6 +25,7 @@ type MetricWithExpiration struct {
type PrometheusClient struct { type PrometheusClient struct {
Listen string Listen string
ExpirationInterval internal.Duration `toml:"expiration_interval"` ExpirationInterval internal.Duration `toml:"expiration_interval"`
server *http.Server
metrics map[string]*MetricWithExpiration metrics map[string]*MetricWithExpiration
@ -41,30 +43,25 @@ var sampleConfig = `
func (p *PrometheusClient) Start() error { func (p *PrometheusClient) Start() error {
p.metrics = make(map[string]*MetricWithExpiration) p.metrics = make(map[string]*MetricWithExpiration)
prometheus.Register(p) prometheus.Register(p)
defer func() {
if r := recover(); r != nil {
// recovering from panic here because there is no way to stop a
// running http go server except by a kill signal. Since the server
// does not stop on SIGHUP, Start() will panic when the process
// is reloaded.
}
}()
if p.Listen == "" { if p.Listen == "" {
p.Listen = "localhost:9126" p.Listen = "localhost:9126"
} }
http.Handle("/metrics", prometheus.Handler()) mux := http.NewServeMux()
server := &http.Server{ mux.Handle("/metrics", prometheus.Handler())
Addr: p.Listen,
p.server = &http.Server{
Addr: p.Listen,
Handler: mux,
} }
go server.ListenAndServe() go p.server.ListenAndServe()
return nil return nil
} }
func (p *PrometheusClient) Stop() { func (p *PrometheusClient) Stop() {
// TODO: Use a listener for http.Server that counts active connections // plugin gets cleaned up in Close() already.
// that can be stopped and closed gracefully
} }
func (p *PrometheusClient) Connect() error { func (p *PrometheusClient) Connect() error {
@ -73,8 +70,9 @@ func (p *PrometheusClient) Connect() error {
} }
func (p *PrometheusClient) Close() error { func (p *PrometheusClient) Close() error {
// This service output does not need to close any of its connections ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
return nil defer cancel()
return p.server.Shutdown(ctx)
} }
func (p *PrometheusClient) SampleConfig() string { func (p *PrometheusClient) SampleConfig() string {