Add support for k8s service DNS discovery to prometheus input (#3236)

This commit is contained in:
Christian Meilke 2017-09-19 00:06:11 +02:00 committed by Daniel Nelson
parent c47cc28950
commit af7710dcb8
3 changed files with 142 additions and 15 deletions

View File

@ -5,9 +5,9 @@ exposing metrics with Prometheus format
### Configuration: ### Configuration:
Example for Kubernetes apiserver Example for Kubernetes API server
```toml ```toml
# Get all metrics from Kube-apiserver # Get all metrics from Kubernetes API server
[[inputs.prometheus]] [[inputs.prometheus]]
# An array of urls to scrape metrics from. # An array of urls to scrape metrics from.
urls = ["http://my-kube-apiserver:8080/metrics"] urls = ["http://my-kube-apiserver:8080/metrics"]
@ -15,7 +15,7 @@ Example for Kubernetes apiserver
Specify a 10 second timeout for slower/over-loaded clients Specify a 10 second timeout for slower/over-loaded clients
```toml ```toml
# Get all metrics from Kube-apiserver # Get all metrics from Kubernetes API server
[[inputs.prometheus]] [[inputs.prometheus]]
# An array of urls to scrape metrics from. # An array of urls to scrape metrics from.
urls = ["http://my-kube-apiserver:8080/metrics"] urls = ["http://my-kube-apiserver:8080/metrics"]
@ -28,7 +28,7 @@ You can use more complex configuration
to filter and some tags to filter and some tags
```toml ```toml
# Get all metrics from Kube-apiserver # Get all metrics from Kubernetes API server
[[inputs.prometheus]] [[inputs.prometheus]]
# An array of urls to scrape metrics from. # An array of urls to scrape metrics from.
urls = ["http://my-kube-apiserver:8080/metrics"] urls = ["http://my-kube-apiserver:8080/metrics"]
@ -61,6 +61,18 @@ to filter and some tags
ssl_key = '/path/to/keyfile' ssl_key = '/path/to/keyfile'
``` ```
```toml
# Use with [Kubernetes headless services](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services)
[[inputs.prometheus]]
# An array of urls to scrape metrics from.
urls = ["https://my-kube-apiserver:8080/metrics"]
# An array of Kubernetes services to scrape metrics from.
# The IP addresses of all pods behind these services will be resolved and
# then scraped
kubernetes_services = ["https://my-headless-service.my-namespace:8080/metrics"]
```
### Usage for Caddy HTTP server ### Usage for Caddy HTTP server
If you want to monitor Caddy, you need to use Caddy with its Prometheus plugin: If you want to monitor Caddy, you need to use Caddy with its Prometheus plugin:

View File

@ -4,7 +4,10 @@ import (
"errors" "errors"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log"
"net"
"net/http" "net/http"
"net/url"
"sync" "sync"
"time" "time"
@ -16,8 +19,12 @@ import (
const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3` const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3`
type Prometheus struct { type Prometheus struct {
// An array of urls to scrape metrics from.
Urls []string Urls []string
// An array of Kubernetes services to scrape metrics from.
KubernetesServices []string
// Bearer Token authorization file path // Bearer Token authorization file path
BearerToken string `toml:"bearer_token"` BearerToken string `toml:"bearer_token"`
@ -39,6 +46,9 @@ var sampleConfig = `
## An array of urls to scrape metrics from. ## An array of urls to scrape metrics from.
urls = ["http://localhost:9100/metrics"] urls = ["http://localhost:9100/metrics"]
## An array of Kubernetes services to scrape metrics from.
kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
## Use bearer token for authorization ## Use bearer token for authorization
# bearer_token = /path/to/bearer/token # bearer_token = /path/to/bearer/token
@ -63,6 +73,53 @@ func (p *Prometheus) Description() string {
var ErrProtocolError = errors.New("prometheus protocol error") var ErrProtocolError = errors.New("prometheus protocol error")
func (p *Prometheus) AddressToURL(u *url.URL, address string) string {
host := address
if u.Port() != "" {
host = address + ":" + u.Port()
}
reconstructedUrl := url.URL{
Scheme: u.Scheme,
Opaque: u.Opaque,
User: u.User,
Path: u.Path,
RawPath: u.RawPath,
ForceQuery: u.ForceQuery,
RawQuery: u.RawQuery,
Fragment: u.Fragment,
Host: host,
}
return reconstructedUrl.String()
}
type UrlAndAddress struct {
Url string
Address string
}
func (p *Prometheus) GetAllURLs() ([]UrlAndAddress, error) {
allUrls := make([]UrlAndAddress, 0)
for _, url := range p.Urls {
allUrls = append(allUrls, UrlAndAddress{Url: url})
}
for _, service := range p.KubernetesServices {
u, err := url.Parse(service)
if err != nil {
return nil, err
}
resolvedAddresses, err := net.LookupHost(u.Hostname())
if err != nil {
log.Printf("prometheus: Could not resolve %s, skipping it. Error: %s", u.Host, err)
continue
}
for _, resolved := range resolvedAddresses {
serviceUrl := p.AddressToURL(u, resolved)
allUrls = append(allUrls, UrlAndAddress{Url: serviceUrl, Address: resolved})
}
}
return allUrls, nil
}
// Reads stats from all configured servers accumulates stats. // Reads stats from all configured servers accumulates stats.
// Returns one of the errors encountered while gather stats (if any). // Returns one of the errors encountered while gather stats (if any).
func (p *Prometheus) Gather(acc telegraf.Accumulator) error { func (p *Prometheus) Gather(acc telegraf.Accumulator) error {
@ -76,12 +133,16 @@ func (p *Prometheus) Gather(acc telegraf.Accumulator) error {
var wg sync.WaitGroup var wg sync.WaitGroup
for _, serv := range p.Urls { allUrls, err := p.GetAllURLs()
if err != nil {
return err
}
for _, url := range allUrls {
wg.Add(1) wg.Add(1)
go func(serv string) { go func(serviceUrl UrlAndAddress) {
defer wg.Done() defer wg.Done()
acc.AddError(p.gatherURL(serv, acc)) acc.AddError(p.gatherURL(serviceUrl, acc))
}(serv) }(url)
} }
wg.Wait() wg.Wait()
@ -116,8 +177,8 @@ func (p *Prometheus) createHttpClient() (*http.Client, error) {
return client, nil return client, nil
} }
func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error { func (p *Prometheus) gatherURL(url UrlAndAddress, acc telegraf.Accumulator) error {
var req, err = http.NewRequest("GET", url, nil) var req, err = http.NewRequest("GET", url.Url, nil)
req.Header.Add("Accept", acceptHeader) req.Header.Add("Accept", acceptHeader)
var token []byte var token []byte
var resp *http.Response var resp *http.Response
@ -132,11 +193,11 @@ func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error {
resp, err = p.client.Do(req) resp, err = p.client.Do(req)
if err != nil { if err != nil {
return fmt.Errorf("error making HTTP request to %s: %s", url, err) return fmt.Errorf("error making HTTP request to %s: %s", url.Url, err)
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return fmt.Errorf("%s returned HTTP status %s", url, resp.Status) return fmt.Errorf("%s returned HTTP status %s", url.Url, resp.Status)
} }
body, err := ioutil.ReadAll(resp.Body) body, err := ioutil.ReadAll(resp.Body)
@ -147,12 +208,15 @@ func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error {
metrics, err := Parse(body, resp.Header) metrics, err := Parse(body, resp.Header)
if err != nil { if err != nil {
return fmt.Errorf("error reading metrics for %s: %s", return fmt.Errorf("error reading metrics for %s: %s",
url, err) url.Url, err)
} }
// Add (or not) collected metrics // Add (or not) collected metrics
for _, metric := range metrics { for _, metric := range metrics {
tags := metric.Tags() tags := metric.Tags()
tags["url"] = url tags["url"] = url.Url
if url.Address != "" {
tags["address"] = url.Address
}
acc.AddFields(metric.Name(), metric.Fields(), tags, metric.Time()) acc.AddFields(metric.Name(), metric.Fields(), tags, metric.Time())
} }

View File

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"net/url"
"testing" "testing"
"time" "time"
@ -48,5 +49,55 @@ func TestPrometheusGeneratesMetrics(t *testing.T) {
assert.True(t, acc.HasFloatField("go_goroutines", "gauge")) assert.True(t, acc.HasFloatField("go_goroutines", "gauge"))
assert.True(t, acc.HasFloatField("test_metric", "value")) assert.True(t, acc.HasFloatField("test_metric", "value"))
assert.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0))) assert.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
assert.False(t, acc.HasTag("test_metric", "address"))
}
func TestPrometheusGeneratesMetricsWithHostNameTag(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, sampleTextFormat)
}))
defer ts.Close()
p := &Prometheus{
KubernetesServices: []string{ts.URL},
}
u, _ := url.Parse(ts.URL)
tsAddress := u.Hostname()
var acc testutil.Accumulator
err := acc.GatherError(p.Gather)
require.NoError(t, err)
assert.True(t, acc.HasFloatField("go_gc_duration_seconds", "count"))
assert.True(t, acc.HasFloatField("go_goroutines", "gauge"))
assert.True(t, acc.HasFloatField("test_metric", "value"))
assert.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
assert.True(t, acc.TagValue("test_metric", "address") == tsAddress)
}
func TestPrometheusGeneratesMetricsAlthoughFirstDNSFails(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, sampleTextFormat)
}))
defer ts.Close()
p := &Prometheus{
Urls: []string{ts.URL},
KubernetesServices: []string{"http://random.telegraf.local:88/metrics"},
}
var acc testutil.Accumulator
err := acc.GatherError(p.Gather)
require.NoError(t, err)
assert.True(t, acc.HasFloatField("go_gc_duration_seconds", "count"))
assert.True(t, acc.HasFloatField("go_goroutines", "gauge"))
assert.True(t, acc.HasFloatField("test_metric", "value"))
assert.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
} }