Add support for k8s service DNS discovery to prometheus input (#3236)

This commit is contained in:
Christian Meilke 2017-09-19 00:06:11 +02:00 committed by Daniel Nelson
parent c47cc28950
commit af7710dcb8
3 changed files with 142 additions and 15 deletions

View File

@ -5,9 +5,9 @@ exposing metrics with Prometheus format
### Configuration:
Example for Kubernetes apiserver
Example for Kubernetes API server
```toml
# Get all metrics from Kube-apiserver
# Get all metrics from Kubernetes API server
[[inputs.prometheus]]
# An array of urls to scrape metrics from.
urls = ["http://my-kube-apiserver:8080/metrics"]
@ -15,7 +15,7 @@ Example for Kubernetes apiserver
Specify a 10 second timeout for slower/over-loaded clients
```toml
# Get all metrics from Kube-apiserver
# Get all metrics from Kubernetes API server
[[inputs.prometheus]]
# An array of urls to scrape metrics from.
urls = ["http://my-kube-apiserver:8080/metrics"]
@ -28,7 +28,7 @@ You can use more complex configuration
to filter and some tags
```toml
# Get all metrics from Kube-apiserver
# Get all metrics from Kubernetes API server
[[inputs.prometheus]]
# An array of urls to scrape metrics from.
urls = ["http://my-kube-apiserver:8080/metrics"]
@ -61,6 +61,18 @@ to filter and some tags
ssl_key = '/path/to/keyfile'
```
```toml
# Use with [Kubernetes headless services](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services)
[[inputs.prometheus]]
# An array of urls to scrape metrics from.
urls = ["https://my-kube-apiserver:8080/metrics"]
# An array of Kubernetes services to scrape metrics from.
# The IP addresses of all pods behind these services will be resolved and
# then scraped
kubernetes_services = ["https://my-headless-service.my-namespace:8080/metrics"]
```
### Usage for Caddy HTTP server
If you want to monitor Caddy, you need to use Caddy with its Prometheus plugin:

View File

@ -4,7 +4,10 @@ import (
"errors"
"fmt"
"io/ioutil"
"log"
"net"
"net/http"
"net/url"
"sync"
"time"
@ -16,8 +19,12 @@ import (
const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3`
type Prometheus struct {
// An array of urls to scrape metrics from.
Urls []string
// An array of Kubernetes services to scrape metrics from.
KubernetesServices []string
// Bearer Token authorization file path
BearerToken string `toml:"bearer_token"`
@ -39,6 +46,9 @@ var sampleConfig = `
## An array of urls to scrape metrics from.
urls = ["http://localhost:9100/metrics"]
## An array of Kubernetes services to scrape metrics from.
kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
## Use bearer token for authorization
# bearer_token = /path/to/bearer/token
@ -63,6 +73,53 @@ func (p *Prometheus) Description() string {
var ErrProtocolError = errors.New("prometheus protocol error")
func (p *Prometheus) AddressToURL(u *url.URL, address string) string {
host := address
if u.Port() != "" {
host = address + ":" + u.Port()
}
reconstructedUrl := url.URL{
Scheme: u.Scheme,
Opaque: u.Opaque,
User: u.User,
Path: u.Path,
RawPath: u.RawPath,
ForceQuery: u.ForceQuery,
RawQuery: u.RawQuery,
Fragment: u.Fragment,
Host: host,
}
return reconstructedUrl.String()
}
type UrlAndAddress struct {
Url string
Address string
}
func (p *Prometheus) GetAllURLs() ([]UrlAndAddress, error) {
allUrls := make([]UrlAndAddress, 0)
for _, url := range p.Urls {
allUrls = append(allUrls, UrlAndAddress{Url: url})
}
for _, service := range p.KubernetesServices {
u, err := url.Parse(service)
if err != nil {
return nil, err
}
resolvedAddresses, err := net.LookupHost(u.Hostname())
if err != nil {
log.Printf("prometheus: Could not resolve %s, skipping it. Error: %s", u.Host, err)
continue
}
for _, resolved := range resolvedAddresses {
serviceUrl := p.AddressToURL(u, resolved)
allUrls = append(allUrls, UrlAndAddress{Url: serviceUrl, Address: resolved})
}
}
return allUrls, nil
}
// Reads stats from all configured servers accumulates stats.
// Returns one of the errors encountered while gather stats (if any).
func (p *Prometheus) Gather(acc telegraf.Accumulator) error {
@ -76,12 +133,16 @@ func (p *Prometheus) Gather(acc telegraf.Accumulator) error {
var wg sync.WaitGroup
for _, serv := range p.Urls {
allUrls, err := p.GetAllURLs()
if err != nil {
return err
}
for _, url := range allUrls {
wg.Add(1)
go func(serv string) {
go func(serviceUrl UrlAndAddress) {
defer wg.Done()
acc.AddError(p.gatherURL(serv, acc))
}(serv)
acc.AddError(p.gatherURL(serviceUrl, acc))
}(url)
}
wg.Wait()
@ -116,8 +177,8 @@ func (p *Prometheus) createHttpClient() (*http.Client, error) {
return client, nil
}
func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error {
var req, err = http.NewRequest("GET", url, nil)
func (p *Prometheus) gatherURL(url UrlAndAddress, acc telegraf.Accumulator) error {
var req, err = http.NewRequest("GET", url.Url, nil)
req.Header.Add("Accept", acceptHeader)
var token []byte
var resp *http.Response
@ -132,11 +193,11 @@ func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error {
resp, err = p.client.Do(req)
if err != nil {
return fmt.Errorf("error making HTTP request to %s: %s", url, err)
return fmt.Errorf("error making HTTP request to %s: %s", url.Url, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("%s returned HTTP status %s", url, resp.Status)
return fmt.Errorf("%s returned HTTP status %s", url.Url, resp.Status)
}
body, err := ioutil.ReadAll(resp.Body)
@ -147,12 +208,15 @@ func (p *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error {
metrics, err := Parse(body, resp.Header)
if err != nil {
return fmt.Errorf("error reading metrics for %s: %s",
url, err)
url.Url, err)
}
// Add (or not) collected metrics
for _, metric := range metrics {
tags := metric.Tags()
tags["url"] = url
tags["url"] = url.Url
if url.Address != "" {
tags["address"] = url.Address
}
acc.AddFields(metric.Name(), metric.Fields(), tags, metric.Time())
}

View File

@ -4,6 +4,7 @@ import (
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"testing"
"time"
@ -48,5 +49,55 @@ func TestPrometheusGeneratesMetrics(t *testing.T) {
assert.True(t, acc.HasFloatField("go_goroutines", "gauge"))
assert.True(t, acc.HasFloatField("test_metric", "value"))
assert.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
assert.False(t, acc.HasTag("test_metric", "address"))
}
func TestPrometheusGeneratesMetricsWithHostNameTag(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, sampleTextFormat)
}))
defer ts.Close()
p := &Prometheus{
KubernetesServices: []string{ts.URL},
}
u, _ := url.Parse(ts.URL)
tsAddress := u.Hostname()
var acc testutil.Accumulator
err := acc.GatherError(p.Gather)
require.NoError(t, err)
assert.True(t, acc.HasFloatField("go_gc_duration_seconds", "count"))
assert.True(t, acc.HasFloatField("go_goroutines", "gauge"))
assert.True(t, acc.HasFloatField("test_metric", "value"))
assert.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
assert.True(t, acc.TagValue("test_metric", "address") == tsAddress)
}
func TestPrometheusGeneratesMetricsAlthoughFirstDNSFails(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, sampleTextFormat)
}))
defer ts.Close()
p := &Prometheus{
Urls: []string{ts.URL},
KubernetesServices: []string{"http://random.telegraf.local:88/metrics"},
}
var acc testutil.Accumulator
err := acc.GatherError(p.Gather)
require.NoError(t, err)
assert.True(t, acc.HasFloatField("go_gc_duration_seconds", "count"))
assert.True(t, acc.HasFloatField("go_goroutines", "gauge"))
assert.True(t, acc.HasFloatField("test_metric", "value"))
assert.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
}