2015-10-22 16:17:57 +00:00
|
|
|
package prometheus_client
|
|
|
|
|
|
|
|
import (
|
2017-01-21 23:37:53 +00:00
|
|
|
"context"
|
2015-10-22 16:17:57 +00:00
|
|
|
"fmt"
|
2015-12-01 17:08:38 +00:00
|
|
|
"log"
|
2015-10-28 22:19:13 +00:00
|
|
|
"net/http"
|
2017-10-18 21:51:08 +00:00
|
|
|
"os"
|
2016-03-22 16:34:33 +00:00
|
|
|
"regexp"
|
2017-06-14 01:04:26 +00:00
|
|
|
"sort"
|
2017-10-24 23:28:52 +00:00
|
|
|
"strconv"
|
2017-06-14 01:04:26 +00:00
|
|
|
"strings"
|
2016-07-10 13:47:47 +00:00
|
|
|
"sync"
|
2016-11-15 11:33:39 +00:00
|
|
|
"time"
|
2015-10-28 22:19:13 +00:00
|
|
|
|
2016-01-27 21:21:36 +00:00
|
|
|
"github.com/influxdata/telegraf"
|
2016-11-15 11:33:39 +00:00
|
|
|
"github.com/influxdata/telegraf/internal"
|
2016-01-27 23:15:14 +00:00
|
|
|
"github.com/influxdata/telegraf/plugins/outputs"
|
2015-10-22 16:17:57 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2017-10-18 21:51:08 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
2015-10-22 16:17:57 +00:00
|
|
|
)
|
|
|
|
|
2016-07-20 08:24:34 +00:00
|
|
|
var invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
2016-03-22 16:34:33 +00:00
|
|
|
|
2017-06-14 01:04:26 +00:00
|
|
|
// SampleID uniquely identifies a Sample
|
|
|
|
type SampleID string
|
|
|
|
|
|
|
|
// Sample represents the current value of a series.
|
|
|
|
type Sample struct {
|
|
|
|
// Labels are the Prometheus labels.
|
|
|
|
Labels map[string]string
|
2017-10-24 23:28:52 +00:00
|
|
|
// Value is the value in the Prometheus output. Only one of these will populated.
|
|
|
|
Value float64
|
|
|
|
HistogramValue map[float64]uint64
|
|
|
|
SummaryValue map[float64]float64
|
|
|
|
// Histograms and Summaries need a count and a sum
|
|
|
|
Count uint64
|
|
|
|
Sum float64
|
2017-06-14 01:04:26 +00:00
|
|
|
// Expiration is the deadline that this Sample is valid until.
|
2016-11-15 11:33:39 +00:00
|
|
|
Expiration time.Time
|
|
|
|
}
|
|
|
|
|
2017-06-14 01:04:26 +00:00
|
|
|
// MetricFamily contains the data required to build valid prometheus Metrics.
|
|
|
|
type MetricFamily struct {
|
|
|
|
// Samples are the Sample belonging to this MetricFamily.
|
|
|
|
Samples map[SampleID]*Sample
|
2017-10-24 23:28:52 +00:00
|
|
|
// Need the telegraf ValueType because there isn't a Prometheus ValueType
|
|
|
|
// representing Histogram or Summary
|
|
|
|
TelegrafValueType telegraf.ValueType
|
2017-06-14 01:04:26 +00:00
|
|
|
// LabelSet is the label counts for all Samples.
|
|
|
|
LabelSet map[string]int
|
|
|
|
}
|
|
|
|
|
2015-10-22 16:17:57 +00:00
|
|
|
type PrometheusClient struct {
|
2016-11-15 11:33:39 +00:00
|
|
|
Listen string
|
|
|
|
ExpirationInterval internal.Duration `toml:"expiration_interval"`
|
2017-09-19 18:27:11 +00:00
|
|
|
Path string `toml:"path"`
|
2017-10-18 21:51:08 +00:00
|
|
|
CollectorsExclude []string `toml:"collectors_exclude"`
|
2016-07-10 13:47:47 +00:00
|
|
|
|
2017-06-14 01:04:26 +00:00
|
|
|
server *http.Server
|
2016-07-10 13:47:47 +00:00
|
|
|
|
|
|
|
sync.Mutex
|
2017-06-14 01:04:26 +00:00
|
|
|
// fam is the non-expired MetricFamily by Prometheus metric name.
|
|
|
|
fam map[string]*MetricFamily
|
|
|
|
// now returns the current time.
|
|
|
|
now func() time.Time
|
2015-10-22 16:17:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var sampleConfig = `
|
2016-02-18 21:26:51 +00:00
|
|
|
## Address to listen on
|
2017-06-29 21:03:42 +00:00
|
|
|
# listen = ":9273"
|
2016-11-15 11:33:39 +00:00
|
|
|
|
|
|
|
## Interval to expire metrics and not deliver to prometheus, 0 == no expiration
|
|
|
|
# expiration_interval = "60s"
|
2017-10-18 21:51:08 +00:00
|
|
|
|
|
|
|
## Collectors to enable, valid entries are "gocollector" and "process".
|
|
|
|
## If unset, both are enabled.
|
|
|
|
collectors_exclude = ["gocollector", "process"]
|
2015-10-22 16:17:57 +00:00
|
|
|
`
|
|
|
|
|
|
|
|
func (p *PrometheusClient) Start() error {
|
2017-12-12 02:00:19 +00:00
|
|
|
defaultCollectors := map[string]bool{
|
|
|
|
"gocollector": true,
|
|
|
|
"process": true,
|
|
|
|
}
|
2017-10-18 21:51:08 +00:00
|
|
|
for _, collector := range p.CollectorsExclude {
|
2017-12-12 02:00:19 +00:00
|
|
|
delete(defaultCollectors, collector)
|
|
|
|
}
|
|
|
|
|
|
|
|
registry := prometheus.NewRegistry()
|
|
|
|
for collector, _ := range defaultCollectors {
|
2017-10-18 21:51:08 +00:00
|
|
|
switch collector {
|
|
|
|
case "gocollector":
|
2017-12-12 02:00:19 +00:00
|
|
|
registry.Register(prometheus.NewGoCollector())
|
2017-10-18 21:51:08 +00:00
|
|
|
case "process":
|
2017-12-12 02:00:19 +00:00
|
|
|
registry.Register(prometheus.NewProcessCollector(os.Getpid(), ""))
|
2017-10-18 21:51:08 +00:00
|
|
|
default:
|
|
|
|
return fmt.Errorf("unrecognized collector %s", collector)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-12 02:00:19 +00:00
|
|
|
registry.Register(p)
|
|
|
|
|
2015-10-22 16:17:57 +00:00
|
|
|
if p.Listen == "" {
|
2017-06-29 21:03:42 +00:00
|
|
|
p.Listen = "localhost:9273"
|
2015-10-22 16:17:57 +00:00
|
|
|
}
|
2015-10-28 22:19:13 +00:00
|
|
|
|
2017-09-19 18:27:11 +00:00
|
|
|
if p.Path == "" {
|
|
|
|
p.Path = "/metrics"
|
|
|
|
}
|
|
|
|
|
2017-01-21 23:37:53 +00:00
|
|
|
mux := http.NewServeMux()
|
2017-10-18 21:51:08 +00:00
|
|
|
mux.Handle(p.Path, promhttp.HandlerFor(
|
2017-12-12 02:00:19 +00:00
|
|
|
registry, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}))
|
2017-01-21 23:37:53 +00:00
|
|
|
|
|
|
|
p.server = &http.Server{
|
|
|
|
Addr: p.Listen,
|
|
|
|
Handler: mux,
|
2015-10-22 16:17:57 +00:00
|
|
|
}
|
2015-10-28 22:19:13 +00:00
|
|
|
|
2017-07-05 21:28:44 +00:00
|
|
|
go func() {
|
|
|
|
if err := p.server.ListenAndServe(); err != nil {
|
2017-07-25 22:41:18 +00:00
|
|
|
if err != http.ErrServerClosed {
|
|
|
|
log.Printf("E! Error creating prometheus metric endpoint, err: %s\n",
|
|
|
|
err.Error())
|
|
|
|
}
|
2017-07-05 21:28:44 +00:00
|
|
|
}
|
|
|
|
}()
|
2015-10-22 16:17:57 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *PrometheusClient) Stop() {
|
2017-01-21 23:37:53 +00:00
|
|
|
// plugin gets cleaned up in Close() already.
|
2015-10-22 16:17:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p *PrometheusClient) Connect() error {
|
|
|
|
// This service output does not need to make any further connections
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *PrometheusClient) Close() error {
|
2017-01-21 23:37:53 +00:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
|
|
|
defer cancel()
|
2017-07-25 22:41:18 +00:00
|
|
|
err := p.server.Shutdown(ctx)
|
|
|
|
prometheus.Unregister(p)
|
|
|
|
return err
|
2015-10-22 16:17:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p *PrometheusClient) SampleConfig() string {
|
|
|
|
return sampleConfig
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *PrometheusClient) Description() string {
|
|
|
|
return "Configuration for the Prometheus client to spawn"
|
|
|
|
}
|
|
|
|
|
2016-07-10 13:47:47 +00:00
|
|
|
// Implements prometheus.Collector
|
|
|
|
func (p *PrometheusClient) Describe(ch chan<- *prometheus.Desc) {
|
|
|
|
prometheus.NewGauge(prometheus.GaugeOpts{Name: "Dummy", Help: "Dummy"}).Describe(ch)
|
|
|
|
}
|
|
|
|
|
2017-06-14 01:04:26 +00:00
|
|
|
// Expire removes Samples that have expired.
|
|
|
|
func (p *PrometheusClient) Expire() {
|
|
|
|
now := p.now()
|
|
|
|
for name, family := range p.fam {
|
|
|
|
for key, sample := range family.Samples {
|
|
|
|
if p.ExpirationInterval.Duration != 0 && now.After(sample.Expiration) {
|
|
|
|
for k, _ := range sample.Labels {
|
|
|
|
family.LabelSet[k]--
|
|
|
|
}
|
|
|
|
delete(family.Samples, key)
|
|
|
|
|
|
|
|
if len(family.Samples) == 0 {
|
|
|
|
delete(p.fam, name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Collect implements prometheus.Collector
|
2016-07-10 13:47:47 +00:00
|
|
|
func (p *PrometheusClient) Collect(ch chan<- prometheus.Metric) {
|
|
|
|
p.Lock()
|
|
|
|
defer p.Unlock()
|
|
|
|
|
2017-06-14 01:04:26 +00:00
|
|
|
p.Expire()
|
|
|
|
|
|
|
|
for name, family := range p.fam {
|
|
|
|
// Get list of all labels on MetricFamily
|
|
|
|
var labelNames []string
|
|
|
|
for k, v := range family.LabelSet {
|
|
|
|
if v > 0 {
|
|
|
|
labelNames = append(labelNames, k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
desc := prometheus.NewDesc(name, "Telegraf collected metric", labelNames, nil)
|
|
|
|
|
|
|
|
for _, sample := range family.Samples {
|
|
|
|
// Get labels for this sample; unset labels will be set to the
|
|
|
|
// empty string
|
|
|
|
var labels []string
|
|
|
|
for _, label := range labelNames {
|
|
|
|
v := sample.Labels[label]
|
|
|
|
labels = append(labels, v)
|
|
|
|
}
|
|
|
|
|
2017-10-24 23:28:52 +00:00
|
|
|
var metric prometheus.Metric
|
|
|
|
var err error
|
|
|
|
switch family.TelegrafValueType {
|
|
|
|
case telegraf.Summary:
|
|
|
|
metric, err = prometheus.NewConstSummary(desc, sample.Count, sample.Sum, sample.SummaryValue, labels...)
|
|
|
|
case telegraf.Histogram:
|
|
|
|
metric, err = prometheus.NewConstHistogram(desc, sample.Count, sample.Sum, sample.HistogramValue, labels...)
|
|
|
|
default:
|
|
|
|
metric, err = prometheus.NewConstMetric(desc, getPromValueType(family.TelegrafValueType), sample.Value, labels...)
|
|
|
|
}
|
2017-06-14 01:04:26 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Printf("E! Error creating prometheus metric, "+
|
|
|
|
"key: %s, labels: %v,\nerr: %s\n",
|
|
|
|
name, labels, err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
ch <- metric
|
2016-09-16 14:43:53 +00:00
|
|
|
}
|
2016-07-10 13:47:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-14 01:04:26 +00:00
|
|
|
func sanitize(value string) string {
|
|
|
|
return invalidNameCharRE.ReplaceAllString(value, "_")
|
|
|
|
}
|
|
|
|
|
2017-10-24 23:28:52 +00:00
|
|
|
func getPromValueType(tt telegraf.ValueType) prometheus.ValueType {
|
2017-06-14 01:04:26 +00:00
|
|
|
switch tt {
|
|
|
|
case telegraf.Counter:
|
|
|
|
return prometheus.CounterValue
|
|
|
|
case telegraf.Gauge:
|
|
|
|
return prometheus.GaugeValue
|
|
|
|
default:
|
|
|
|
return prometheus.UntypedValue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// CreateSampleID creates a SampleID based on the tags of a telegraf.Metric.
|
|
|
|
func CreateSampleID(tags map[string]string) SampleID {
|
|
|
|
pairs := make([]string, 0, len(tags))
|
|
|
|
for k, v := range tags {
|
|
|
|
pairs = append(pairs, fmt.Sprintf("%s=%s", k, v))
|
|
|
|
}
|
|
|
|
sort.Strings(pairs)
|
|
|
|
return SampleID(strings.Join(pairs, ","))
|
|
|
|
}
|
|
|
|
|
2017-10-24 23:28:52 +00:00
|
|
|
func addSample(fam *MetricFamily, sample *Sample, sampleID SampleID) {
|
|
|
|
|
|
|
|
for k, _ := range sample.Labels {
|
|
|
|
fam.LabelSet[k]++
|
|
|
|
}
|
|
|
|
|
|
|
|
fam.Samples[sampleID] = sample
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *PrometheusClient) addMetricFamily(point telegraf.Metric, sample *Sample, mname string, sampleID SampleID) {
|
|
|
|
var fam *MetricFamily
|
|
|
|
var ok bool
|
|
|
|
if fam, ok = p.fam[mname]; !ok {
|
|
|
|
fam = &MetricFamily{
|
|
|
|
Samples: make(map[SampleID]*Sample),
|
|
|
|
TelegrafValueType: point.Type(),
|
|
|
|
LabelSet: make(map[string]int),
|
|
|
|
}
|
|
|
|
p.fam[mname] = fam
|
|
|
|
}
|
|
|
|
|
|
|
|
addSample(fam, sample, sampleID)
|
|
|
|
}
|
|
|
|
|
2016-01-27 23:15:14 +00:00
|
|
|
func (p *PrometheusClient) Write(metrics []telegraf.Metric) error {
|
2016-07-10 13:47:47 +00:00
|
|
|
p.Lock()
|
|
|
|
defer p.Unlock()
|
|
|
|
|
2017-06-14 01:04:26 +00:00
|
|
|
now := p.now()
|
2015-10-22 16:17:57 +00:00
|
|
|
|
2016-01-27 23:15:14 +00:00
|
|
|
for _, point := range metrics {
|
2017-06-14 01:04:26 +00:00
|
|
|
tags := point.Tags()
|
|
|
|
sampleID := CreateSampleID(tags)
|
2015-10-22 16:17:57 +00:00
|
|
|
|
2017-06-14 01:04:26 +00:00
|
|
|
labels := make(map[string]string)
|
|
|
|
for k, v := range tags {
|
2017-06-21 19:36:29 +00:00
|
|
|
labels[sanitize(k)] = v
|
2016-08-30 17:09:48 +00:00
|
|
|
}
|
|
|
|
|
2017-10-19 00:42:30 +00:00
|
|
|
// Prometheus doesn't have a string value type, so convert string
|
|
|
|
// fields to labels.
|
|
|
|
for fn, fv := range point.Fields() {
|
|
|
|
switch fv := fv.(type) {
|
|
|
|
case string:
|
|
|
|
labels[sanitize(fn)] = fv
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-24 23:28:52 +00:00
|
|
|
switch point.Type() {
|
|
|
|
case telegraf.Summary:
|
|
|
|
var mname string
|
|
|
|
var sum float64
|
|
|
|
var count uint64
|
|
|
|
summaryvalue := make(map[float64]float64)
|
|
|
|
for fn, fv := range point.Fields() {
|
|
|
|
var value float64
|
|
|
|
switch fv := fv.(type) {
|
|
|
|
case int64:
|
|
|
|
value = float64(fv)
|
|
|
|
case float64:
|
|
|
|
value = fv
|
|
|
|
default:
|
|
|
|
continue
|
|
|
|
}
|
2016-03-23 14:57:05 +00:00
|
|
|
|
2017-10-24 23:28:52 +00:00
|
|
|
switch fn {
|
|
|
|
case "sum":
|
|
|
|
sum = value
|
|
|
|
case "count":
|
|
|
|
count = uint64(value)
|
|
|
|
default:
|
|
|
|
limit, err := strconv.ParseFloat(fn, 64)
|
|
|
|
if err == nil {
|
|
|
|
summaryvalue[limit] = value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-06-14 01:04:26 +00:00
|
|
|
sample := &Sample{
|
2017-10-24 23:28:52 +00:00
|
|
|
Labels: labels,
|
|
|
|
SummaryValue: summaryvalue,
|
|
|
|
Count: count,
|
|
|
|
Sum: sum,
|
|
|
|
Expiration: now.Add(p.ExpirationInterval.Duration),
|
2017-06-14 01:04:26 +00:00
|
|
|
}
|
2017-10-24 23:28:52 +00:00
|
|
|
mname = sanitize(point.Name())
|
2017-06-14 01:04:26 +00:00
|
|
|
|
2017-10-24 23:28:52 +00:00
|
|
|
p.addMetricFamily(point, sample, mname, sampleID)
|
|
|
|
|
|
|
|
case telegraf.Histogram:
|
2016-03-08 18:33:57 +00:00
|
|
|
var mname string
|
2017-10-24 23:28:52 +00:00
|
|
|
var sum float64
|
|
|
|
var count uint64
|
|
|
|
histogramvalue := make(map[float64]uint64)
|
|
|
|
for fn, fv := range point.Fields() {
|
|
|
|
var value float64
|
|
|
|
switch fv := fv.(type) {
|
|
|
|
case int64:
|
|
|
|
value = float64(fv)
|
|
|
|
case float64:
|
|
|
|
value = fv
|
|
|
|
default:
|
|
|
|
continue
|
2017-10-18 21:51:08 +00:00
|
|
|
}
|
2017-10-24 23:28:52 +00:00
|
|
|
|
|
|
|
switch fn {
|
|
|
|
case "sum":
|
|
|
|
sum = value
|
|
|
|
case "count":
|
|
|
|
count = uint64(value)
|
|
|
|
default:
|
|
|
|
limit, err := strconv.ParseFloat(fn, 64)
|
|
|
|
if err == nil {
|
|
|
|
histogramvalue[limit] = uint64(value)
|
|
|
|
}
|
2017-10-18 21:51:08 +00:00
|
|
|
}
|
|
|
|
}
|
2017-10-24 23:28:52 +00:00
|
|
|
sample := &Sample{
|
|
|
|
Labels: labels,
|
|
|
|
HistogramValue: histogramvalue,
|
|
|
|
Count: count,
|
|
|
|
Sum: sum,
|
|
|
|
Expiration: now.Add(p.ExpirationInterval.Duration),
|
2016-03-08 18:33:57 +00:00
|
|
|
}
|
2017-10-24 23:28:52 +00:00
|
|
|
mname = sanitize(point.Name())
|
2016-03-22 16:34:33 +00:00
|
|
|
|
2017-10-24 23:28:52 +00:00
|
|
|
p.addMetricFamily(point, sample, mname, sampleID)
|
|
|
|
|
|
|
|
default:
|
|
|
|
for fn, fv := range point.Fields() {
|
|
|
|
// Ignore string and bool fields.
|
|
|
|
var value float64
|
|
|
|
switch fv := fv.(type) {
|
|
|
|
case int64:
|
|
|
|
value = float64(fv)
|
|
|
|
case float64:
|
|
|
|
value = fv
|
|
|
|
default:
|
|
|
|
continue
|
2017-06-14 01:04:26 +00:00
|
|
|
}
|
2017-10-24 23:28:52 +00:00
|
|
|
|
|
|
|
sample := &Sample{
|
|
|
|
Labels: labels,
|
|
|
|
Value: value,
|
|
|
|
Expiration: now.Add(p.ExpirationInterval.Duration),
|
2017-08-10 17:19:28 +00:00
|
|
|
}
|
|
|
|
|
2017-10-24 23:28:52 +00:00
|
|
|
// Special handling of value field; supports passthrough from
|
|
|
|
// the prometheus input.
|
|
|
|
var mname string
|
|
|
|
switch point.Type() {
|
|
|
|
case telegraf.Counter:
|
|
|
|
if fn == "counter" {
|
|
|
|
mname = sanitize(point.Name())
|
|
|
|
}
|
|
|
|
case telegraf.Gauge:
|
|
|
|
if fn == "gauge" {
|
|
|
|
mname = sanitize(point.Name())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if mname == "" {
|
|
|
|
if fn == "value" {
|
|
|
|
mname = sanitize(point.Name())
|
|
|
|
} else {
|
|
|
|
mname = sanitize(fmt.Sprintf("%s_%s", point.Name(), fn))
|
|
|
|
}
|
2017-06-14 01:04:26 +00:00
|
|
|
}
|
2016-11-15 11:33:39 +00:00
|
|
|
|
2017-10-24 23:28:52 +00:00
|
|
|
p.addMetricFamily(point, sample, mname, sampleID)
|
2017-06-14 01:04:26 +00:00
|
|
|
|
2017-10-24 23:28:52 +00:00
|
|
|
}
|
2015-10-22 16:17:57 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func init() {
|
2016-01-27 21:21:36 +00:00
|
|
|
outputs.Add("prometheus_client", func() telegraf.Output {
|
2016-11-15 11:33:39 +00:00
|
|
|
return &PrometheusClient{
|
|
|
|
ExpirationInterval: internal.Duration{Duration: time.Second * 60},
|
2017-06-14 01:04:26 +00:00
|
|
|
fam: make(map[string]*MetricFamily),
|
|
|
|
now: time.Now,
|
2016-11-15 11:33:39 +00:00
|
|
|
}
|
2015-10-22 16:17:57 +00:00
|
|
|
})
|
|
|
|
}
|