telegraf/plugins/outputs/influxdb/influxdb.go

488 lines
11 KiB
Go
Raw Normal View History

package influxdb
import (
2015-09-09 21:56:10 +00:00
"errors"
"fmt"
"log"
2015-09-09 21:56:10 +00:00
"math/rand"
"net/url"
"strings"
2016-07-14 04:33:21 +00:00
"sync"
2015-12-15 17:08:13 +00:00
"time"
"github.com/influxdata/telegraf"
2016-01-20 18:57:35 +00:00
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/outputs"
"github.com/influxdata/influxdb/client/v2"
)
type InfluxDB struct {
2015-09-09 21:56:10 +00:00
// URL is only for backwards compatability
URL string
URLs []string `toml:"urls"`
Username string
Password string
Database string
UserAgent string
RetentionPolicy string
WriteConsistency string
Timeout internal.Duration
UDPPayload int `toml:"udp_payload"`
2016-07-15 07:17:20 +00:00
Downsampler *Downsampling
// Path to CA file
SSLCA string `toml:"ssl_ca"`
// Path to host cert file
SSLCert string `toml:"ssl_cert"`
// Path to cert key file
SSLKey string `toml:"ssl_key"`
// Use SSL but skip chain & host verification
InsecureSkipVerify bool
// Precision is only here for legacy support. It will be ignored.
Precision string
conns []client.Client
}
var sampleConfig = `
## The full HTTP or UDP endpoint URL for your InfluxDB instance.
## Multiple urls can be specified as part of the same cluster,
## this means that only ONE of the urls will be written to each interval.
# urls = ["udp://localhost:8089"] # UDP endpoint example
urls = ["http://localhost:8086"] # required
## The target database for metrics (telegraf will create it if not exists).
database = "telegraf" # required
## Retention policy to write to. Empty string writes to the default rp.
retention_policy = ""
## Write consistency (clusters only), can be: "any", "one", "quorum", "all"
write_consistency = "any"
## Write timeout (for the InfluxDB client), formatted as a string.
## If not provided, will default to 5s. 0s means no timeout (not recommended).
timeout = "5s"
# username = "telegraf"
# password = "metricsmetricsmetricsmetrics"
## Set the user agent for HTTP POSTs (can be useful for log differentiation)
# user_agent = "telegraf"
## Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes)
# udp_payload = 512
## Optional SSL Config
# ssl_ca = "/etc/telegraf/ca.pem"
# ssl_cert = "/etc/telegraf/cert.pem"
# ssl_key = "/etc/telegraf/key.pem"
## Use SSL but skip chain & host verification
# insecure_skip_verify = false
`
func (i *InfluxDB) Connect() error {
var urls []string
for _, u := range i.URLs {
2015-09-09 21:56:10 +00:00
urls = append(urls, u)
}
2015-09-09 21:56:10 +00:00
// Backward-compatability with single Influx URL config files
// This could eventually be removed in favor of specifying the urls as a list
if i.URL != "" {
urls = append(urls, i.URL)
2015-09-09 21:56:10 +00:00
}
tlsCfg, err := internal.GetTLSConfig(
i.SSLCert, i.SSLKey, i.SSLCA, i.InsecureSkipVerify)
if err != nil {
return err
}
var conns []client.Client
for _, u := range urls {
switch {
case strings.HasPrefix(u, "udp"):
2015-11-12 21:54:43 +00:00
parsed_url, err := url.Parse(u)
if err != nil {
return err
}
if i.UDPPayload == 0 {
i.UDPPayload = client.UDPPayloadSize
}
c, err := client.NewUDPClient(client.UDPConfig{
Addr: parsed_url.Host,
PayloadSize: i.UDPPayload,
})
if err != nil {
return err
}
conns = append(conns, c)
default:
// If URL doesn't start with "udp", assume HTTP client
c, err := client.NewHTTPClient(client.HTTPConfig{
2015-11-12 21:54:43 +00:00
Addr: u,
Username: i.Username,
Password: i.Password,
UserAgent: i.UserAgent,
Timeout: i.Timeout.Duration,
TLSConfig: tlsCfg,
})
if err != nil {
return err
}
err = createDatabase(c, i.Database)
if err != nil {
log.Println("Database creation failed: " + err.Error())
continue
}
conns = append(conns, c)
2015-09-09 21:56:10 +00:00
}
}
2015-09-09 21:56:10 +00:00
i.conns = conns
2015-12-15 17:08:13 +00:00
rand.Seed(time.Now().UnixNano())
return nil
}
func createDatabase(c client.Client, database string) error {
// Create Database if it doesn't exist
_, err := c.Query(client.Query{
Command: fmt.Sprintf("CREATE DATABASE \"%s\"", database),
})
return err
}
func (i *InfluxDB) Close() error {
var errS string
for j, _ := range i.conns {
if err := i.conns[j].Close(); err != nil {
errS += err.Error()
}
}
if errS != "" {
return fmt.Errorf("output influxdb close failed: %s", errS)
}
return nil
}
func (i *InfluxDB) SampleConfig() string {
return sampleConfig
}
func (i *InfluxDB) Description() string {
return "Configuration for influxdb server to send metrics to"
}
2016-07-26 18:27:30 +00:00
func (i *InfluxDB) flush(bp client.BatchPoints) error {
// This will get set to nil if a successful write occurs
err := errors.New("Could not write to any InfluxDB server in cluster")
p := rand.Perm(len(i.conns))
for _, n := range p {
if e := i.conns[n].Write(bp); e != nil {
// Log write failure
log.Printf("ERROR: %s", e)
// If the database was not found, try to recreate it
if strings.Contains(e.Error(), "database not found") {
if errc := createDatabase(i.conns[n], i.Database); errc != nil {
log.Printf("ERROR: Database %s not found and failed to recreate\n",
i.Database)
}
}
} else {
err = nil
break
}
}
return err
}
2015-09-09 21:56:10 +00:00
// Choose a random server in the cluster to write to until a successful write
// occurs, logging each unsuccessful. If all servers fail, return error.
func (i *InfluxDB) Write(metrics []telegraf.Metric) error {
if len(i.conns) == 0 {
err := i.Connect()
if err != nil {
return err
}
}
2016-07-26 18:27:30 +00:00
bp, err := i.batchPointsFromMetrics(metrics...)
if err != nil {
return err
}
i.Downsampler.Add(metrics...)
err = i.flush(bp)
return err
}
func (i *InfluxDB) batchPointsFromMetrics(metrics ...telegraf.Metric) (client.BatchPoints, error) {
bp, err := client.NewBatchPoints(client.BatchPointsConfig{
Database: i.Database,
RetentionPolicy: i.RetentionPolicy,
WriteConsistency: i.WriteConsistency,
})
if err != nil {
2016-07-26 18:27:30 +00:00
return bp, err
2016-07-22 09:17:16 +00:00
}
2016-07-14 04:33:21 +00:00
for _, metric := range metrics {
bp.AddPoint(metric.Point())
}
2015-09-09 21:56:10 +00:00
2016-07-26 18:27:30 +00:00
return bp, nil
}
2015-09-09 21:56:10 +00:00
2016-07-26 18:27:30 +00:00
func (i *InfluxDB) Run() {
tick := time.Tick(i.Downsampler.TimeRange)
for {
select {
case <-tick:
aggrData, err := i.Downsampler.Aggregate()
if err != nil {
continue
}
i.Downsampler.Lock()
i.Downsampler.Metrics = nil
i.Downsampler.Unlock()
if len(i.conns) == 0 {
err := i.Connect()
if err != nil {
return
}
}
2016-07-26 18:27:30 +00:00
bp, err := i.batchPointsFromMetrics(aggrData)
if err != nil {
return
}
err = i.flush(bp)
if err != nil {
return
}
2015-09-09 21:56:10 +00:00
}
}
}
2016-07-15 07:17:20 +00:00
func init() {
influxdb := &InfluxDB{
2016-07-26 18:27:30 +00:00
Timeout: internal.Duration{Duration: time.Second * 5},
Downsampler: &Downsampling{
TimeRange: time.Duration(time.Minute * 2),
},
2016-07-15 07:17:20 +00:00
}
2016-07-26 18:27:30 +00:00
go influxdb.Run()
2016-07-15 07:17:20 +00:00
outputs.Add("influxdb", func() telegraf.Output {
return influxdb
})
}
2016-07-14 04:33:21 +00:00
// Downsampling
type Downsampling struct {
sync.RWMutex
2016-07-23 12:41:58 +00:00
Name string
Metrics []telegraf.Metric
TimeRange time.Duration
Aggregations map[string][]Aggregation
2016-07-14 04:33:21 +00:00
}
2016-07-26 18:27:30 +00:00
func NewDownsampling(name string, timeRange time.Duration) *Downsampling {
return &Downsampling{
Name: name,
TimeRange: timeRange,
Aggregations: make(map[string][]Aggregation),
}
}
2016-07-22 11:15:41 +00:00
// Aggregation maps the field names to aggregation function for them
2016-07-23 12:41:58 +00:00
type Aggregation struct {
FieldName string
FuncName string
Alias string
}
func (d *Downsampling) AddAggregations(aggrs ...Aggregation) {
2016-07-26 18:27:30 +00:00
if d.Aggregations == nil {
d.Aggregations = make(map[string][]Aggregation)
}
2016-07-23 12:41:58 +00:00
for _, aggr := range aggrs {
switch aggr.FuncName {
case "mean":
d.Aggregations["mean"] = append(d.Aggregations["mean"], aggr)
case "sum":
d.Aggregations["sum"] = append(d.Aggregations["sum"], aggr)
default:
}
}
}
2016-07-14 04:33:21 +00:00
2016-07-22 11:15:41 +00:00
// Add appends metrics to the metrics that will be aggregated
2016-07-26 18:27:30 +00:00
func (d *Downsampling) Add(metrics ...telegraf.Metric) {
2016-07-14 04:33:21 +00:00
d.Lock()
d.Metrics = append(d.Metrics, metrics...)
d.Unlock()
2016-07-26 18:27:30 +00:00
return
2016-07-14 04:33:21 +00:00
}
// Aggregate calculates the mean value of fields by given time
2016-07-23 12:41:58 +00:00
func (d *Downsampling) Aggregate() (telegraf.Metric, error) {
metrics := map[string]interface{}{}
var (
aggrMetric, sum, mean telegraf.Metric
err error
)
for name, aggr := range d.Aggregations {
switch name {
case "sum":
sum, err = d.Sum(aggr...)
if err != nil {
return aggrMetric, err
}
case "mean":
mean, err = d.Mean(aggr...)
if err != nil {
return aggrMetric, err
}
default:
}
}
2016-07-22 11:15:41 +00:00
2016-07-26 18:27:30 +00:00
if sum != nil && sum.Fields() != nil {
for k, v := range sum.Fields() {
metrics[k] = v
}
2016-07-23 12:41:58 +00:00
}
2016-07-26 18:27:30 +00:00
if mean != nil && mean.Fields() != nil {
for k, v := range mean.Fields() {
metrics[k] = v
}
2016-07-23 12:41:58 +00:00
}
aggrMetric, err = telegraf.NewMetric(
d.Name,
map[string]string{},
metrics,
time.Now(),
)
return aggrMetric, err
2016-07-14 04:33:21 +00:00
}
2016-07-23 12:41:58 +00:00
// Sum calculate the sum values of given fields
func (d *Downsampling) Sum(fields ...Aggregation) (telegraf.Metric, error) {
2016-07-22 11:15:41 +00:00
var (
sumMetric telegraf.Metric
sums = make(map[string]interface{})
)
d.RLock()
for _, metric := range d.Metrics {
2016-07-23 12:41:58 +00:00
for _, field := range fields {
value, ok := metric.Fields()[field.FieldName]
2016-07-22 11:15:41 +00:00
if !ok {
continue
}
2016-07-24 09:58:41 +00:00
oldVal, ok := sums[field.Alias]
if !ok {
sums[field.Alias] = value
continue
}
2016-07-22 11:15:41 +00:00
switch value := value.(type) {
case int:
2016-07-24 09:58:41 +00:00
sums[field.Alias] = oldVal.(int) + int(value)
2016-07-22 11:15:41 +00:00
case int32:
2016-07-24 09:58:41 +00:00
sums[field.Alias] = oldVal.(int32) + int32(value)
2016-07-22 11:15:41 +00:00
case int64:
2016-07-24 09:58:41 +00:00
sums[field.Alias] = oldVal.(int64) + int64(value)
2016-07-22 11:15:41 +00:00
case float32:
2016-07-24 09:58:41 +00:00
sums[field.Alias] = oldVal.(float32) + float32(value)
2016-07-22 11:15:41 +00:00
case float64:
2016-07-24 09:58:41 +00:00
sums[field.Alias] = oldVal.(float64) + float64(value)
2016-07-22 11:15:41 +00:00
default:
continue
}
}
}
d.RUnlock()
sumMetric, err := telegraf.NewMetric(
d.Name,
map[string]string{},
sums,
time.Now(),
)
return sumMetric, err
}
// Mean calculates the mean values of given fields
2016-07-23 12:41:58 +00:00
func (d *Downsampling) Mean(fields ...Aggregation) (telegraf.Metric, error) {
2016-07-15 07:17:20 +00:00
var (
aggrMetric telegraf.Metric
sums = make(map[string]interface{})
)
d.RLock()
2016-07-26 20:46:55 +00:00
var size = len(d.Metrics)
2016-07-15 07:17:20 +00:00
for _, metric := range d.Metrics {
2016-07-23 12:41:58 +00:00
for _, field := range fields {
value, ok := metric.Fields()[field.FieldName]
2016-07-22 09:17:16 +00:00
if !ok {
continue
}
2016-07-24 09:58:41 +00:00
oldVal, ok := sums[field.Alias]
if !ok {
sums[field.Alias] = value
continue
}
2016-07-15 07:17:20 +00:00
switch value := value.(type) {
case int:
2016-07-24 09:58:41 +00:00
sums[field.Alias] = oldVal.(int) + int(value)
2016-07-15 07:17:20 +00:00
case int32:
2016-07-24 09:58:41 +00:00
sums[field.Alias] = oldVal.(int32) + int32(value)
2016-07-15 07:17:20 +00:00
case int64:
2016-07-24 09:58:41 +00:00
sums[field.Alias] = oldVal.(int64) + int64(value)
2016-07-15 07:17:20 +00:00
case float32:
2016-07-24 09:58:41 +00:00
sums[field.Alias] = oldVal.(float32) + float32(value)
2016-07-15 07:17:20 +00:00
case float64:
2016-07-24 09:58:41 +00:00
sums[field.Alias] = oldVal.(float64) + float64(value)
2016-07-15 07:17:20 +00:00
default:
continue
}
}
}
d.RUnlock()
for i := range sums {
2016-07-22 09:17:16 +00:00
switch value := sums[i].(type) {
case int:
sums[i] = value / int(size)
case int32:
sums[i] = value / int32(size)
case int64:
sums[i] = value / int64(size)
case float32:
sums[i] = value / float32(size)
case float64:
sums[i] = value / float64(size)
default:
continue
}
2016-07-15 07:17:20 +00:00
}
aggrMetric, err := telegraf.NewMetric(
d.Name,
map[string]string{},
2016-07-22 09:17:16 +00:00
sums,
2016-07-15 07:17:20 +00:00
time.Now(),
)
return aggrMetric, err
}