Remove outputs blocking inputs when output is slow (#4938)

This commit is contained in:
Daniel Nelson 2018-11-05 13:34:28 -08:00 committed by GitHub
parent 74667cd681
commit 6e5c2f8bb6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
59 changed files with 3615 additions and 2189 deletions

View File

@ -1,489 +1,52 @@
## Steps for Contributing: ### Contributing
1. [Sign the CLA](http://influxdb.com/community/cla.html) 1. [Sign the CLA][cla].
1. Make changes or write plugin (see below for details) 1. Open a [new issue][] to discuss the changes you would like to make. This is
1. Add your plugin to one of: `plugins/{inputs,outputs,aggregators,processors}/all/all.go` not strictly required but it may help reduce the amount of rework you need
1. If your plugin requires a new Go package, to do later.
[add it](https://github.com/influxdata/telegraf/blob/master/CONTRIBUTING.md#adding-a-dependency) 1. Make changes or write plugin using the guidelines in the following
1. Write a README for your plugin, if it's an input plugin, it should be structured documents:
like the [input example here](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/EXAMPLE_README.md). - [Input Plugins][inputs]
Output plugins READMEs are less structured, - [Processor Plugins][processors]
but any information you can provide on how the data will look is appreciated. - [Aggregator Plugins][aggregators]
See the [OpenTSDB output](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/opentsdb) - [Output Plugins][outputs]
for a good example. 1. Ensure you have added proper unit tests and documentation.
1. **Optional:** Help users of your plugin by including example queries for populating dashboards. Include these sample queries in the `README.md` for the plugin. 1. Open a new [pull request][].
1. **Optional:** Write a [tickscript](https://docs.influxdata.com/kapacitor/v1.0/tick/syntax/) for your plugin and add it to [Kapacitor](https://github.com/influxdata/kapacitor/tree/master/examples/telegraf).
## GoDoc ### GoDoc
Public interfaces for inputs, outputs, processors, aggregators, metrics, Public interfaces for inputs, outputs, processors, aggregators, metrics,
and the accumulator can be found on the GoDoc and the accumulator can be found in the GoDoc:
[![GoDoc](https://godoc.org/github.com/influxdata/telegraf?status.svg)](https://godoc.org/github.com/influxdata/telegraf) [![GoDoc](https://godoc.org/github.com/influxdata/telegraf?status.svg)](https://godoc.org/github.com/influxdata/telegraf)
## Sign the CLA ### Common development tasks
Before we can merge a pull request, you will need to sign the CLA, **Adding a dependency:**
which can be found [on our website](http://influxdb.com/community/cla.html)
## Adding a dependency
Assuming you can already build the project, run these in the telegraf directory: Assuming you can already build the project, run these in the telegraf directory:
1. `dep ensure -vendor-only` 1. `dep ensure -vendor-only`
2. `dep ensure -add github.com/[dependency]/[new-package]` 2. `dep ensure -add github.com/[dependency]/[new-package]`
## Input Plugins **Unit Tests:**
This section is for developers who want to create new collection inputs.
Telegraf is entirely plugin driven. This interface allows for operators to
pick and chose what is gathered and makes it easy for developers
to create new ways of generating metrics.
Plugin authorship is kept as simple as possible to promote people to develop
and submit new inputs.
### Input Plugin Guidelines
* A plugin must conform to the [`telegraf.Input`](https://godoc.org/github.com/influxdata/telegraf#Input) interface.
* Input Plugins should call `inputs.Add` in their `init` function to register themselves.
See below for a quick example.
* Input Plugins must be added to the
`github.com/influxdata/telegraf/plugins/inputs/all/all.go` file.
* The `SampleConfig` function should return valid toml that describes how the
plugin can be configured. This is included in `telegraf config`. Please
consult the [SampleConfig](https://github.com/influxdata/telegraf/wiki/SampleConfig)
page for the latest style guidelines.
* The `Description` function should say in one line what this plugin does.
Let's say you've written a plugin that emits metrics about processes on the
current host.
### Input Plugin Example
```go
package simple
// simple.go
import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
type Simple struct {
Ok bool
}
func (s *Simple) Description() string {
return "a demo plugin"
}
func (s *Simple) SampleConfig() string {
return `
## Indicate if everything is fine
ok = true
`
}
func (s *Simple) Gather(acc telegraf.Accumulator) error {
if s.Ok {
acc.AddFields("state", map[string]interface{}{"value": "pretty good"}, nil)
} else {
acc.AddFields("state", map[string]interface{}{"value": "not great"}, nil)
}
return nil
}
func init() {
inputs.Add("simple", func() telegraf.Input { return &Simple{} })
}
```
### Input Plugin Development
* Run `make static` followed by `make plugin-[pluginName]` to spin up a docker dev environment
using docker-compose.
* ***[Optional]*** When developing a plugin, add a `dev` directory with a `docker-compose.yml` and `telegraf.conf`
as well as any other supporting files, where sensible.
## Adding Typed Metrics
In addition the the `AddFields` function, the accumulator also supports an
`AddGauge` and `AddCounter` function. These functions are for adding _typed_
metrics. Metric types are ignored for the InfluxDB output, but can be used
for other outputs, such as [prometheus](https://prometheus.io/docs/concepts/metric_types/).
## Input Plugins Accepting Arbitrary Data Formats
Some input plugins (such as
[exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec))
accept arbitrary input data formats. An overview of these data formats can
be found
[here](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
In order to enable this, you must specify a `SetParser(parser parsers.Parser)`
function on the plugin object (see the exec plugin for an example), as well as
defining `parser` as a field of the object.
You can then utilize the parser internally in your plugin, parsing data as you
see fit. Telegraf's configuration layer will take care of instantiating and
creating the `Parser` object.
You should also add the following to your SampleConfig() return:
```toml
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
```
Below is the `Parser` interface.
```go
// Parser is an interface defining functions that a parser plugin must satisfy.
type Parser interface {
// Parse takes a byte buffer separated by newlines
// ie, `cpu.usage.idle 90\ncpu.usage.busy 10`
// and parses it into telegraf metrics
Parse(buf []byte) ([]telegraf.Metric, error)
// ParseLine takes a single string metric
// ie, "cpu.usage.idle 90"
// and parses it into a telegraf metric.
ParseLine(line string) (telegraf.Metric, error)
}
```
And you can view the code
[here.](https://github.com/influxdata/telegraf/blob/henrypfhu-master/plugins/parsers/registry.go)
## Service Input Plugins
This section is for developers who want to create new "service" collection
inputs. A service plugin differs from a regular plugin in that it operates
a background service while Telegraf is running. One example would be the `statsd`
plugin, which operates a statsd server.
Service Input Plugins are substantially more complicated than a regular plugin, as they
will require threads and locks to verify data integrity. Service Input Plugins should
be avoided unless there is no way to create their behavior with a regular plugin.
Their interface is quite similar to a regular plugin, with the addition of `Start()`
and `Stop()` methods.
### Service Plugin Guidelines
* Same as the `Plugin` guidelines, except that they must conform to the
[`telegraf.ServiceInput`](https://godoc.org/github.com/influxdata/telegraf#ServiceInput) interface.
## Output Plugins
This section is for developers who want to create a new output sink. Outputs
are created in a similar manner as collection plugins, and their interface has
similar constructs.
### Output Plugin Guidelines
* An output must conform to the [`telegraf.Output`](https://godoc.org/github.com/influxdata/telegraf#Output) interface.
* Outputs should call `outputs.Add` in their `init` function to register themselves.
See below for a quick example.
* To be available within Telegraf itself, plugins must add themselves to the
`github.com/influxdata/telegraf/plugins/outputs/all/all.go` file.
* The `SampleConfig` function should return valid toml that describes how the
plugin can be configured. This is included in `telegraf config`. Please
consult the [SampleConfig](https://github.com/influxdata/telegraf/wiki/SampleConfig)
page for the latest style guidelines.
* The `Description` function should say in one line what this output does.
### Output Example
```go
package simpleoutput
// simpleoutput.go
import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/outputs"
)
type Simple struct {
Ok bool
}
func (s *Simple) Description() string {
return "a demo output"
}
func (s *Simple) SampleConfig() string {
return `
ok = true
`
}
func (s *Simple) Connect() error {
// Make a connection to the URL here
return nil
}
func (s *Simple) Close() error {
// Close connection to the URL here
return nil
}
func (s *Simple) Write(metrics []telegraf.Metric) error {
for _, metric := range metrics {
// write `metric` to the output sink here
}
return nil
}
func init() {
outputs.Add("simpleoutput", func() telegraf.Output { return &Simple{} })
}
```
## Output Plugins Writing Arbitrary Data Formats
Some output plugins (such as
[file](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/file))
can write arbitrary output data formats. An overview of these data formats can
be found
[here](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md).
In order to enable this, you must specify a
`SetSerializer(serializer serializers.Serializer)`
function on the plugin object (see the file plugin for an example), as well as
defining `serializer` as a field of the object.
You can then utilize the serializer internally in your plugin, serializing data
before it's written. Telegraf's configuration layer will take care of
instantiating and creating the `Serializer` object.
You should also add the following to your SampleConfig() return:
```toml
## Data format to output.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
data_format = "influx"
```
## Service Output Plugins
This section is for developers who want to create new "service" output. A
service output differs from a regular output in that it operates a background service
while Telegraf is running. One example would be the `prometheus_client` output,
which operates an HTTP server.
Their interface is quite similar to a regular output, with the addition of `Start()`
and `Stop()` methods.
### Service Output Guidelines
* Same as the `Output` guidelines, except that they must conform to the
`output.ServiceOutput` interface.
## Processor Plugins
This section is for developers who want to create a new processor plugin.
### Processor Plugin Guidelines
* A processor must conform to the [`telegraf.Processor`](https://godoc.org/github.com/influxdata/telegraf#Processor) interface.
* Processors should call `processors.Add` in their `init` function to register themselves.
See below for a quick example.
* To be available within Telegraf itself, plugins must add themselves to the
`github.com/influxdata/telegraf/plugins/processors/all/all.go` file.
* The `SampleConfig` function should return valid toml that describes how the
processor can be configured. This is include in the output of `telegraf config`.
* The `Description` function should say in one line what this processor does.
### Processor Example
```go
package printer
// printer.go
import (
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/processors"
)
type Printer struct {
}
var sampleConfig = `
`
func (p *Printer) SampleConfig() string {
return sampleConfig
}
func (p *Printer) Description() string {
return "Print all metrics that pass through this filter."
}
func (p *Printer) Apply(in ...telegraf.Metric) []telegraf.Metric {
for _, metric := range in {
fmt.Println(metric.String())
}
return in
}
func init() {
processors.Add("printer", func() telegraf.Processor {
return &Printer{}
})
}
```
## Aggregator Plugins
This section is for developers who want to create a new aggregator plugin.
### Aggregator Plugin Guidelines
* A aggregator must conform to the [`telegraf.Aggregator`](https://godoc.org/github.com/influxdata/telegraf#Aggregator) interface.
* Aggregators should call `aggregators.Add` in their `init` function to register themselves.
See below for a quick example.
* To be available within Telegraf itself, plugins must add themselves to the
`github.com/influxdata/telegraf/plugins/aggregators/all/all.go` file.
* The `SampleConfig` function should return valid toml that describes how the
aggregator can be configured. This is include in `telegraf config`.
* The `Description` function should say in one line what this aggregator does.
* The Aggregator plugin will need to keep caches of metrics that have passed
through it. This should be done using the builtin `HashID()` function of each
metric.
* When the `Reset()` function is called, all caches should be cleared.
### Aggregator Example
```go
package min
// min.go
import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/aggregators"
)
type Min struct {
// caches for metric fields, names, and tags
fieldCache map[uint64]map[string]float64
nameCache map[uint64]string
tagCache map[uint64]map[string]string
}
func NewMin() telegraf.Aggregator {
m := &Min{}
m.Reset()
return m
}
var sampleConfig = `
## period is the flush & clear interval of the aggregator.
period = "30s"
## If true drop_original will drop the original metrics and
## only send aggregates.
drop_original = false
`
func (m *Min) SampleConfig() string {
return sampleConfig
}
func (m *Min) Description() string {
return "Keep the aggregate min of each metric passing through."
}
func (m *Min) Add(in telegraf.Metric) {
id := in.HashID()
if _, ok := m.nameCache[id]; !ok {
// hit an uncached metric, create caches for first time:
m.nameCache[id] = in.Name()
m.tagCache[id] = in.Tags()
m.fieldCache[id] = make(map[string]float64)
for k, v := range in.Fields() {
if fv, ok := convert(v); ok {
m.fieldCache[id][k] = fv
}
}
} else {
for k, v := range in.Fields() {
if fv, ok := convert(v); ok {
if _, ok := m.fieldCache[id][k]; !ok {
// hit an uncached field of a cached metric
m.fieldCache[id][k] = fv
continue
}
if fv < m.fieldCache[id][k] {
// set new minimum
m.fieldCache[id][k] = fv
}
}
}
}
}
func (m *Min) Push(acc telegraf.Accumulator) {
for id, _ := range m.nameCache {
fields := map[string]interface{}{}
for k, v := range m.fieldCache[id] {
fields[k+"_min"] = v
}
acc.AddFields(m.nameCache[id], fields, m.tagCache[id])
}
}
func (m *Min) Reset() {
m.fieldCache = make(map[uint64]map[string]float64)
m.nameCache = make(map[uint64]string)
m.tagCache = make(map[uint64]map[string]string)
}
func convert(in interface{}) (float64, bool) {
switch v := in.(type) {
case float64:
return v, true
case int64:
return float64(v), true
default:
return 0, false
}
}
func init() {
aggregators.Add("min", func() telegraf.Aggregator {
return NewMin()
})
}
```
## Unit Tests
Before opening a pull request you should run the linter checks and Before opening a pull request you should run the linter checks and
the short tests. the short tests.
### Execute linter **Run static analysis:**
execute `make check` ```
make check
```
### Execute short tests **Run short tests:**
execute `make test` ```
make test
```
### Execute integration tests **Execute integration tests:**
Running the integration tests requires several docker containers to be Running the integration tests requires several docker containers to be
running. You can start the containers with: running. You can start the containers with:
@ -497,3 +60,12 @@ make test-all
``` ```
Use `make docker-kill` to stop the containers. Use `make docker-kill` to stop the containers.
[cla]: https://www.influxdata.com/legal/cla/
[new issue]: https://github.com/influxdata/telegraf/issues/new/choose
[pull request]: https://github.com/influxdata/telegraf/compare
[inputs]: /docs/INPUTS.md
[processors]: /docs/PROCESSORS.md
[aggregators]: /docs/AGGREGATORS.md
[outputs]: /docs/OUTPUTS.md

View File

@ -1,16 +1,14 @@
package telegraf package telegraf
import "time" import (
"time"
)
// Accumulator is an interface for "accumulating" metrics from plugin(s). // Accumulator allows adding metrics to the processing flow.
// The metrics are sent down a channel shared between all plugins.
type Accumulator interface { type Accumulator interface {
// AddFields adds a metric to the accumulator with the given measurement // AddFields adds a metric to the accumulator with the given measurement
// name, fields, and tags (and timestamp). If a timestamp is not provided, // name, fields, and tags (and timestamp). If a timestamp is not provided,
// then the accumulator sets it to "now". // then the accumulator sets it to "now".
// Create a point with a value, decorating it with tags
// NOTE: tags is expected to be owned by the caller, don't mutate
// it after passing to Add.
AddFields(measurement string, AddFields(measurement string,
fields map[string]interface{}, fields map[string]interface{},
tags map[string]string, tags map[string]string,
@ -40,7 +38,49 @@ type Accumulator interface {
tags map[string]string, tags map[string]string,
t ...time.Time) t ...time.Time)
// AddMetric adds an metric to the accumulator.
AddMetric(Metric)
// SetPrecision takes two time.Duration objects. If the first is non-zero,
// it sets that as the precision. Otherwise, it takes the second argument
// as the order of time that the metrics should be rounded to, with the
// maximum being 1s.
SetPrecision(precision, interval time.Duration) SetPrecision(precision, interval time.Duration)
// Report an error.
AddError(err error) AddError(err error)
// Upgrade to a TrackingAccumulator with space for maxTracked
// metrics/batches.
WithTracking(maxTracked int) TrackingAccumulator
}
// TrackingID uniquely identifies a tracked metric group
type TrackingID uint64
// DeliveryInfo provides the results of a delivered metric group.
type DeliveryInfo interface {
// ID is the TrackingID
ID() TrackingID
// Delivered returns true if the metric was processed successfully.
Delivered() bool
}
// TrackingAccumulator is an Accumulator that provides a signal when the
// metric has been fully processed. Sending more metrics than the accumulator
// has been allocated for without reading status from the Accepted or Rejected
// channels is an error.
type TrackingAccumulator interface {
Accumulator
// Add the Metric and arrange for tracking feedback after processing..
AddTrackingMetric(m Metric) TrackingID
// Add a group of Metrics and arrange for a signal when the group has been
// processed.
AddTrackingMetricGroup(group []Metric) TrackingID
// Delivered returns a channel that will contain the tracking results.
Delivered() <-chan DeliveryInfo
} }

View File

@ -20,13 +20,13 @@ type MetricMaker interface {
type accumulator struct { type accumulator struct {
maker MetricMaker maker MetricMaker
metrics chan telegraf.Metric metrics chan<- telegraf.Metric
precision time.Duration precision time.Duration
} }
func NewAccumulator( func NewAccumulator(
maker MetricMaker, maker MetricMaker,
metrics chan telegraf.Metric, metrics chan<- telegraf.Metric,
) telegraf.Accumulator { ) telegraf.Accumulator {
acc := accumulator{ acc := accumulator{
maker: maker, maker: maker,
@ -42,7 +42,7 @@ func (ac *accumulator) AddFields(
tags map[string]string, tags map[string]string,
t ...time.Time, t ...time.Time,
) { ) {
ac.addMetric(measurement, tags, fields, telegraf.Untyped, t...) ac.addFields(measurement, tags, fields, telegraf.Untyped, t...)
} }
func (ac *accumulator) AddGauge( func (ac *accumulator) AddGauge(
@ -51,7 +51,7 @@ func (ac *accumulator) AddGauge(
tags map[string]string, tags map[string]string,
t ...time.Time, t ...time.Time,
) { ) {
ac.addMetric(measurement, tags, fields, telegraf.Gauge, t...) ac.addFields(measurement, tags, fields, telegraf.Gauge, t...)
} }
func (ac *accumulator) AddCounter( func (ac *accumulator) AddCounter(
@ -60,7 +60,7 @@ func (ac *accumulator) AddCounter(
tags map[string]string, tags map[string]string,
t ...time.Time, t ...time.Time,
) { ) {
ac.addMetric(measurement, tags, fields, telegraf.Counter, t...) ac.addFields(measurement, tags, fields, telegraf.Counter, t...)
} }
func (ac *accumulator) AddSummary( func (ac *accumulator) AddSummary(
@ -69,7 +69,7 @@ func (ac *accumulator) AddSummary(
tags map[string]string, tags map[string]string,
t ...time.Time, t ...time.Time,
) { ) {
ac.addMetric(measurement, tags, fields, telegraf.Summary, t...) ac.addFields(measurement, tags, fields, telegraf.Summary, t...)
} }
func (ac *accumulator) AddHistogram( func (ac *accumulator) AddHistogram(
@ -78,10 +78,16 @@ func (ac *accumulator) AddHistogram(
tags map[string]string, tags map[string]string,
t ...time.Time, t ...time.Time,
) { ) {
ac.addMetric(measurement, tags, fields, telegraf.Histogram, t...) ac.addFields(measurement, tags, fields, telegraf.Histogram, t...)
} }
func (ac *accumulator) addMetric( func (ac *accumulator) AddMetric(m telegraf.Metric) {
if m := ac.maker.MakeMetric(m); m != nil {
ac.metrics <- m
}
}
func (ac *accumulator) addFields(
measurement string, measurement string,
tags map[string]string, tags map[string]string,
fields map[string]interface{}, fields map[string]interface{},
@ -104,13 +110,9 @@ func (ac *accumulator) AddError(err error) {
return return
} }
NErrors.Incr(1) NErrors.Incr(1)
log.Printf("E! Error in plugin [%s]: %s", ac.maker.Name(), err) log.Printf("E! [%s]: Error in plugin: %v", ac.maker.Name(), err)
} }
// SetPrecision takes two time.Duration objects. If the first is non-zero,
// it sets that as the precision. Otherwise, it takes the second argument
// as the order of time that the metrics should be rounded to, with the
// maximum being 1s.
func (ac *accumulator) SetPrecision(precision, interval time.Duration) { func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
if precision > 0 { if precision > 0 {
ac.precision = precision ac.precision = precision
@ -128,7 +130,7 @@ func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
} }
} }
func (ac accumulator) getTime(t []time.Time) time.Time { func (ac *accumulator) getTime(t []time.Time) time.Time {
var timestamp time.Time var timestamp time.Time
if len(t) > 0 { if len(t) > 0 {
timestamp = t[0] timestamp = t[0]
@ -137,3 +139,43 @@ func (ac accumulator) getTime(t []time.Time) time.Time {
} }
return timestamp.Round(ac.precision) return timestamp.Round(ac.precision)
} }
func (ac *accumulator) WithTracking(maxTracked int) telegraf.TrackingAccumulator {
return &trackingAccumulator{
Accumulator: ac,
delivered: make(chan telegraf.DeliveryInfo, maxTracked),
}
}
type trackingAccumulator struct {
telegraf.Accumulator
delivered chan telegraf.DeliveryInfo
}
func (a *trackingAccumulator) AddTrackingMetric(m telegraf.Metric) telegraf.TrackingID {
dm, id := metric.WithTracking(m, a.onDelivery)
a.AddMetric(dm)
return id
}
func (a *trackingAccumulator) AddTrackingMetricGroup(group []telegraf.Metric) telegraf.TrackingID {
db, id := metric.WithGroupTracking(group, a.onDelivery)
for _, m := range db {
a.AddMetric(m)
}
return id
}
func (a *trackingAccumulator) Delivered() <-chan telegraf.DeliveryInfo {
return a.delivered
}
func (a *trackingAccumulator) onDelivery(info telegraf.DeliveryInfo) {
select {
case a.delivered <- info:
default:
// This is a programming error in the input. More items were sent for
// tracking than space requested.
panic("channel is full")
}
}

View File

@ -1,9 +1,9 @@
package agent package agent
import ( import (
"context"
"fmt" "fmt"
"log" "log"
"os"
"runtime" "runtime"
"sync" "sync"
"time" "time"
@ -12,187 +12,157 @@ import (
"github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/config" "github.com/influxdata/telegraf/internal/config"
"github.com/influxdata/telegraf/internal/models" "github.com/influxdata/telegraf/internal/models"
"github.com/influxdata/telegraf/selfstat" "github.com/influxdata/telegraf/plugins/serializers/influx"
) )
// Agent runs telegraf and collects data based on the given config // Agent runs a set of plugins.
type Agent struct { type Agent struct {
Config *config.Config Config *config.Config
} }
// NewAgent returns an Agent struct based off the given Config // NewAgent returns an Agent for the given Config.
func NewAgent(config *config.Config) (*Agent, error) { func NewAgent(config *config.Config) (*Agent, error) {
a := &Agent{ a := &Agent{
Config: config, Config: config,
} }
if !a.Config.Agent.OmitHostname {
if a.Config.Agent.Hostname == "" {
hostname, err := os.Hostname()
if err != nil {
return nil, err
}
a.Config.Agent.Hostname = hostname
}
config.Tags["host"] = a.Config.Agent.Hostname
}
return a, nil return a, nil
} }
// Connect connects to all configured outputs // Run starts and runs the Agent until the context is done.
func (a *Agent) Connect() error { func (a *Agent) Run(ctx context.Context) error {
for _, o := range a.Config.Outputs { log.Printf("I! [agent] Config: Interval:%s, Quiet:%#v, Hostname:%#v, "+
switch ot := o.Output.(type) { "Flush Interval:%s",
case telegraf.ServiceOutput: a.Config.Agent.Interval.Duration, a.Config.Agent.Quiet,
if err := ot.Start(); err != nil { a.Config.Agent.Hostname, a.Config.Agent.FlushInterval.Duration)
log.Printf("E! Service for output %s failed to start, exiting\n%s\n",
o.Name, err.Error()) if ctx.Err() != nil {
return err return ctx.Err()
} }
log.Printf("D! [agent] Connecting outputs")
err := a.connectOutputs(ctx)
if err != nil {
return err
}
inputC := make(chan telegraf.Metric, 100)
procC := make(chan telegraf.Metric, 100)
outputC := make(chan telegraf.Metric, 100)
startTime := time.Now()
log.Printf("D! [agent] Starting service inputs")
err = a.startServiceInputs(ctx, inputC)
if err != nil {
return err
}
var wg sync.WaitGroup
src := inputC
dst := inputC
wg.Add(1)
go func(dst chan telegraf.Metric) {
defer wg.Done()
err := a.runInputs(ctx, startTime, dst)
if err != nil {
log.Printf("E! [agent] Error running inputs: %v", err)
} }
log.Printf("D! Attempting connection to output: %s\n", o.Name) log.Printf("D! [agent] Stopping service inputs")
err := o.Output.Connect() a.stopServiceInputs()
if err != nil {
log.Printf("E! Failed to connect to output %s, retrying in 15s, "+ close(dst)
"error was '%s' \n", o.Name, err) log.Printf("D! [agent] Input channel closed")
time.Sleep(15 * time.Second) }(dst)
err = o.Output.Connect()
src = dst
if len(a.Config.Processors) > 0 {
dst = procC
wg.Add(1)
go func(src, dst chan telegraf.Metric) {
defer wg.Done()
err := a.runProcessors(src, dst)
if err != nil { if err != nil {
return err log.Printf("E! [agent] Error running processors: %v", err)
} }
} close(dst)
log.Printf("D! Successfully connected to output: %s\n", o.Name) log.Printf("D! [agent] Processor channel closed")
}(src, dst)
src = dst
} }
if len(a.Config.Aggregators) > 0 {
dst = outputC
wg.Add(1)
go func(src, dst chan telegraf.Metric) {
defer wg.Done()
err := a.runAggregators(startTime, src, dst)
if err != nil {
log.Printf("E! [agent] Error running aggregators: %v", err)
}
close(dst)
log.Printf("D! [agent] Output channel closed")
}(src, dst)
src = dst
}
wg.Add(1)
go func(src chan telegraf.Metric) {
defer wg.Done()
err := a.runOutputs(startTime, src)
if err != nil {
log.Printf("E! [agent] Error running outputs: %v", err)
}
}(src)
wg.Wait()
log.Printf("D! [agent] Closing outputs")
err = a.closeOutputs()
if err != nil {
return err
}
return nil return nil
} }
// Close closes the connection to all configured outputs // Test runs the inputs once and prints the output to stdout in line protocol.
func (a *Agent) Close() error { func (a *Agent) Test() error {
var err error var wg sync.WaitGroup
for _, o := range a.Config.Outputs { metricC := make(chan telegraf.Metric)
err = o.Output.Close() defer func() {
switch ot := o.Output.(type) { close(metricC)
case telegraf.ServiceOutput: wg.Wait()
ot.Stop()
}
}
return err
}
func panicRecover(input *models.RunningInput) {
if err := recover(); err != nil {
trace := make([]byte, 2048)
runtime.Stack(trace, true)
log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
input.Name(), err, trace)
log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
"stack trace, configuration, and OS information: " +
"https://github.com/influxdata/telegraf/issues/new")
}
}
// gatherer runs the inputs that have been configured with their own
// reporting interval.
func (a *Agent) gatherer(
shutdown chan struct{},
input *models.RunningInput,
interval time.Duration,
metricC chan telegraf.Metric,
) {
defer panicRecover(input)
GatherTime := selfstat.RegisterTiming("gather",
"gather_time_ns",
map[string]string{"input": input.Config.Name},
)
acc := NewAccumulator(input, metricC)
acc.SetPrecision(a.Config.Agent.Precision.Duration,
a.Config.Agent.Interval.Duration)
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
internal.RandomSleep(a.Config.Agent.CollectionJitter.Duration, shutdown)
start := time.Now()
gatherWithTimeout(shutdown, input, acc, interval)
elapsed := time.Since(start)
GatherTime.Incr(elapsed.Nanoseconds())
select {
case <-shutdown:
return
case <-ticker.C:
continue
}
}
}
// gatherWithTimeout gathers from the given input, with the given timeout.
// when the given timeout is reached, gatherWithTimeout logs an error message
// but continues waiting for it to return. This is to avoid leaving behind
// hung processes, and to prevent re-calling the same hung process over and
// over.
func gatherWithTimeout(
shutdown chan struct{},
input *models.RunningInput,
acc telegraf.Accumulator,
timeout time.Duration,
) {
ticker := time.NewTicker(timeout)
defer ticker.Stop()
done := make(chan error)
go func() {
done <- input.Input.Gather(acc)
}() }()
for { wg.Add(1)
select {
case err := <-done:
if err != nil {
acc.AddError(err)
}
return
case <-ticker.C:
err := fmt.Errorf("took longer to collect than collection interval (%s)",
timeout)
acc.AddError(err)
continue
case <-shutdown:
return
}
}
}
// Test verifies that we can 'Gather' from all inputs with their configured
// Config struct
func (a *Agent) Test() error {
shutdown := make(chan struct{})
defer close(shutdown)
metricC := make(chan telegraf.Metric)
// dummy receiver for the point channel
go func() { go func() {
for { defer wg.Done()
select {
case <-metricC: s := influx.NewSerializer()
// do nothing s.SetFieldSortOrder(influx.SortFields)
case <-shutdown: for metric := range metricC {
return octets, err := s.Serialize(metric)
if err == nil {
fmt.Print("> ", string(octets))
} }
} }
}() }()
for _, input := range a.Config.Inputs { for _, input := range a.Config.Inputs {
if _, ok := input.Input.(telegraf.ServiceInput); ok { if _, ok := input.Input.(telegraf.ServiceInput); ok {
fmt.Printf("\nWARNING: skipping plugin [[%s]]: service inputs not supported in --test mode\n", log.Printf("W!: [agent] skipping plugin [[%s]]: service inputs not supported in --test mode",
input.Name()) input.Name())
continue continue
} }
@ -200,7 +170,6 @@ func (a *Agent) Test() error {
acc := NewAccumulator(input, metricC) acc := NewAccumulator(input, metricC)
acc.SetPrecision(a.Config.Agent.Precision.Duration, acc.SetPrecision(a.Config.Agent.Precision.Duration,
a.Config.Agent.Interval.Duration) a.Config.Agent.Interval.Duration)
input.SetTrace(true)
input.SetDefaultTags(a.Config.Tags) input.SetDefaultTags(a.Config.Tags)
if err := input.Input.Gather(acc); err != nil { if err := input.Input.Gather(acc); err != nil {
@ -218,216 +187,445 @@ func (a *Agent) Test() error {
} }
} }
return nil return nil
} }
// flush writes a list of metrics to all configured outputs // runInputs starts and triggers the periodic gather for Inputs.
func (a *Agent) flush() { //
var wg sync.WaitGroup // When the context is done the timers are stopped and this function returns
// after all ongoing Gather calls complete.
wg.Add(len(a.Config.Outputs)) func (a *Agent) runInputs(
for _, o := range a.Config.Outputs { ctx context.Context,
go func(output *models.RunningOutput) { startTime time.Time,
defer wg.Done() dst chan<- telegraf.Metric,
err := output.Write()
if err != nil {
log.Printf("E! Error writing to output [%s]: %s\n",
output.Name, err.Error())
}
}(o)
}
wg.Wait()
}
// flusher monitors the metrics input channel and flushes on the minimum interval
func (a *Agent) flusher(
shutdown chan struct{},
metricC chan telegraf.Metric,
aggMetricC chan telegraf.Metric,
outMetricC chan telegraf.Metric,
) error { ) error {
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for {
select {
case <-shutdown:
if len(outMetricC) > 0 {
// keep going until channel is empty
continue
}
return
case metric := <-outMetricC:
for i, o := range a.Config.Outputs {
if i == len(a.Config.Outputs)-1 {
o.AddMetric(metric)
} else {
o.AddMetric(metric.Copy())
}
}
}
}
}()
wg.Add(1)
go func() {
defer wg.Done()
for metric := range aggMetricC {
// Apply Processors
metrics := []telegraf.Metric{metric}
for _, processor := range a.Config.Processors {
metrics = processor.Apply(metrics...)
}
outMetricC <- metric
}
}()
wg.Add(1)
go func() {
defer wg.Done()
for {
select {
case <-shutdown:
if len(metricC) > 0 {
// keep going until channel is empty
continue
}
close(aggMetricC)
return
case metric := <-metricC:
// Apply Processors
metrics := []telegraf.Metric{metric}
for _, processor := range a.Config.Processors {
metrics = processor.Apply(metrics...)
}
for _, metric := range metrics {
// Apply Aggregators
var dropOriginal bool
for _, agg := range a.Config.Aggregators {
if ok := agg.Add(metric.Copy()); ok {
dropOriginal = true
}
}
// Forward metric to Outputs
if !dropOriginal {
outMetricC <- metric
}
}
}
}
}()
ticker := time.NewTicker(a.Config.Agent.FlushInterval.Duration)
semaphore := make(chan struct{}, 1)
for {
select {
case <-shutdown:
log.Println("I! Hang on, flushing any cached metrics before shutdown")
// wait for outMetricC to get flushed before flushing outputs
wg.Wait()
a.flush()
return nil
case <-ticker.C:
go func() {
select {
case semaphore <- struct{}{}:
internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown)
a.flush()
<-semaphore
default:
// skipping this flush because one is already happening
log.Println("W! Skipping a scheduled flush because there is" +
" already a flush ongoing.")
}
}()
}
}
}
// Run runs the agent daemon, gathering every Interval
func (a *Agent) Run(shutdown chan struct{}) error {
var wg sync.WaitGroup
log.Printf("I! Agent Config: Interval:%s, Quiet:%#v, Hostname:%#v, "+
"Flush Interval:%s \n",
a.Config.Agent.Interval.Duration, a.Config.Agent.Quiet,
a.Config.Agent.Hostname, a.Config.Agent.FlushInterval.Duration)
// Channel shared between all input threads for accumulating metrics
metricC := make(chan telegraf.Metric, 100)
// Channel for metrics ready to be output
outMetricC := make(chan telegraf.Metric, 100)
// Channel for aggregated metrics
aggMetricC := make(chan telegraf.Metric, 100)
// Round collection to nearest interval by sleeping
if a.Config.Agent.RoundInterval {
i := int64(a.Config.Agent.Interval.Duration)
time.Sleep(time.Duration(i - (time.Now().UnixNano() % i)))
}
wg.Add(1)
go func() {
defer wg.Done()
if err := a.flusher(shutdown, metricC, aggMetricC, outMetricC); err != nil {
log.Printf("E! Flusher routine failed, exiting: %s\n", err.Error())
close(shutdown)
}
}()
wg.Add(len(a.Config.Aggregators))
for _, aggregator := range a.Config.Aggregators {
go func(agg *models.RunningAggregator) {
defer wg.Done()
acc := NewAccumulator(agg, aggMetricC)
acc.SetPrecision(a.Config.Agent.Precision.Duration,
a.Config.Agent.Interval.Duration)
agg.Run(acc, shutdown)
}(aggregator)
}
// Service inputs may immediately add metrics, if metrics are added before
// the aggregator starts they will be dropped. Generally this occurs
// only during testing but it is an outstanding issue.
//
// https://github.com/influxdata/telegraf/issues/4394
for _, input := range a.Config.Inputs {
input.SetDefaultTags(a.Config.Tags)
switch p := input.Input.(type) {
case telegraf.ServiceInput:
acc := NewAccumulator(input, metricC)
// Service input plugins should set their own precision of their
// metrics.
acc.SetPrecision(time.Nanosecond, 0)
if err := p.Start(acc); err != nil {
log.Printf("E! Service for input %s failed to start, exiting\n%s\n",
input.Name(), err.Error())
return err
}
defer p.Stop()
}
}
wg.Add(len(a.Config.Inputs))
for _, input := range a.Config.Inputs { for _, input := range a.Config.Inputs {
interval := a.Config.Agent.Interval.Duration interval := a.Config.Agent.Interval.Duration
// overwrite global interval if this plugin has it's own. precision := a.Config.Agent.Precision.Duration
jitter := a.Config.Agent.CollectionJitter.Duration
// Overwrite agent interval if this plugin has its own.
if input.Config.Interval != 0 { if input.Config.Interval != 0 {
interval = input.Config.Interval interval = input.Config.Interval
} }
go func(in *models.RunningInput, interv time.Duration) {
acc := NewAccumulator(input, dst)
acc.SetPrecision(precision, interval)
wg.Add(1)
go func(input *models.RunningInput) {
defer wg.Done() defer wg.Done()
a.gatherer(shutdown, in, interv, metricC)
}(input, interval) if a.Config.Agent.RoundInterval {
err := internal.SleepContext(
ctx, internal.AlignDuration(startTime, interval))
if err != nil {
return
}
}
a.gatherOnInterval(ctx, acc, input, interval, jitter)
}(input)
}
wg.Wait()
return nil
}
// gather runs an input's gather function periodically until the context is
// done.
func (a *Agent) gatherOnInterval(
ctx context.Context,
acc telegraf.Accumulator,
input *models.RunningInput,
interval time.Duration,
jitter time.Duration,
) {
defer panicRecover(input)
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
err := internal.SleepContext(ctx, internal.RandomDuration(jitter))
if err != nil {
return
}
err = a.gatherOnce(acc, input, interval)
if err != nil {
acc.AddError(err)
}
select {
case <-ticker.C:
continue
case <-ctx.Done():
return
}
}
}
// gatherOnce runs the input's Gather function once, logging a warning each
// interval it fails to complete before.
func (a *Agent) gatherOnce(
acc telegraf.Accumulator,
input *models.RunningInput,
timeout time.Duration,
) error {
ticker := time.NewTicker(timeout)
defer ticker.Stop()
done := make(chan error)
go func() {
done <- input.Gather(acc)
}()
for {
select {
case err := <-done:
return err
case <-ticker.C:
log.Printf("W! [agent] input %q did not complete within its interval",
input.Name())
}
}
}
// runProcessors applies processors to metrics.
func (a *Agent) runProcessors(
src <-chan telegraf.Metric,
agg chan<- telegraf.Metric,
) error {
for metric := range src {
metrics := a.applyProcessors(metric)
for _, metric := range metrics {
agg <- metric
}
}
return nil
}
// applyProcessors applies all processors to a metric.
func (a *Agent) applyProcessors(m telegraf.Metric) []telegraf.Metric {
metrics := []telegraf.Metric{m}
for _, processor := range a.Config.Processors {
metrics = processor.Apply(metrics...)
}
return metrics
}
// runAggregators triggers the periodic push for Aggregators.
//
// When the context is done a final push will occur and then this function
// will return.
func (a *Agent) runAggregators(
startTime time.Time,
src <-chan telegraf.Metric,
dst chan<- telegraf.Metric,
) error {
ctx, cancel := context.WithCancel(context.Background())
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for metric := range src {
var dropOriginal bool
for _, agg := range a.Config.Aggregators {
if ok := agg.Add(metric); ok {
dropOriginal = true
}
}
if !dropOriginal {
dst <- metric
}
}
cancel()
}()
precision := a.Config.Agent.Precision.Duration
interval := a.Config.Agent.Interval.Duration
aggregations := make(chan telegraf.Metric, 100)
for _, agg := range a.Config.Aggregators {
wg.Add(1)
go func(agg *models.RunningAggregator) {
defer wg.Done()
if a.Config.Agent.RoundInterval {
// Aggregators are aligned to the agent interval regardless of
// their period.
err := internal.SleepContext(ctx, internal.AlignDuration(startTime, interval))
if err != nil {
return
}
}
agg.SetPeriodStart(startTime)
acc := NewAccumulator(agg, aggregations)
acc.SetPrecision(precision, interval)
a.push(ctx, agg, acc)
close(aggregations)
}(agg)
}
for metric := range aggregations {
metrics := a.applyProcessors(metric)
for _, metric := range metrics {
dst <- metric
}
} }
wg.Wait() wg.Wait()
a.Close()
return nil return nil
} }
// push runs the push for a single aggregator every period. More simple than
// the output/input version as timeout should be less likely.... not really
// because the output channel can block for now.
func (a *Agent) push(
ctx context.Context,
aggregator *models.RunningAggregator,
acc telegraf.Accumulator,
) {
ticker := time.NewTicker(aggregator.Period())
defer ticker.Stop()
for {
select {
case <-ticker.C:
break
case <-ctx.Done():
aggregator.Push(acc)
return
}
aggregator.Push(acc)
}
}
// runOutputs triggers the periodic write for Outputs.
//
// When the context is done, outputs continue to run until their buffer is
// closed, afterwich they run flush once more.
func (a *Agent) runOutputs(
startTime time.Time,
src <-chan telegraf.Metric,
) error {
interval := a.Config.Agent.FlushInterval.Duration
jitter := a.Config.Agent.FlushJitter.Duration
ctx, cancel := context.WithCancel(context.Background())
var wg sync.WaitGroup
for _, output := range a.Config.Outputs {
interval := interval
// Overwrite agent flush_interval if this plugin has its own.
if output.Config.FlushInterval != 0 {
interval = output.Config.FlushInterval
}
wg.Add(1)
go func(output *models.RunningOutput) {
defer wg.Done()
if a.Config.Agent.RoundInterval {
err := internal.SleepContext(
ctx, internal.AlignDuration(startTime, interval))
if err != nil {
return
}
}
a.flush(ctx, output, interval, jitter)
}(output)
}
for metric := range src {
for i, output := range a.Config.Outputs {
if i == len(a.Config.Outputs)-1 {
output.AddMetric(metric)
} else {
output.AddMetric(metric.Copy())
}
}
}
log.Println("I! [agent] Hang on, flushing any cached metrics before shutdown")
cancel()
wg.Wait()
return nil
}
// flush runs an output's flush function periodically until the context is
// done.
func (a *Agent) flush(
ctx context.Context,
output *models.RunningOutput,
interval time.Duration,
jitter time.Duration,
) {
// since we are watching two channels we need a ticker with the jitter
// integrated.
ticker := NewTicker(interval, jitter)
defer ticker.Stop()
logError := func(err error) {
if err != nil {
log.Printf("E! [agent] Error writing to output [%s]: %v", output.Name, err)
}
}
for {
// Favor shutdown over other methods.
select {
case <-ctx.Done():
logError(a.flushOnce(output, interval, output.Write))
return
default:
}
select {
case <-ticker.C:
logError(a.flushOnce(output, interval, output.Write))
case <-output.BatchReady:
// Favor the ticker over batch ready
select {
case <-ticker.C:
logError(a.flushOnce(output, interval, output.Write))
default:
logError(a.flushOnce(output, interval, output.WriteBatch))
}
case <-ctx.Done():
logError(a.flushOnce(output, interval, output.Write))
return
}
}
}
// flushOnce runs the output's Write function once, logging a warning each
// interval it fails to complete before.
func (a *Agent) flushOnce(
output *models.RunningOutput,
timeout time.Duration,
writeFunc func() error,
) error {
ticker := time.NewTicker(timeout)
defer ticker.Stop()
done := make(chan error)
go func() {
done <- writeFunc()
}()
for {
select {
case err := <-done:
output.LogBufferStatus()
return err
case <-ticker.C:
log.Printf("W! [agent] output %q did not complete within its flush interval",
output.Name)
output.LogBufferStatus()
}
}
}
// connectOutputs connects to all outputs.
func (a *Agent) connectOutputs(ctx context.Context) error {
for _, output := range a.Config.Outputs {
log.Printf("D! [agent] Attempting connection to output: %s\n", output.Name)
err := output.Output.Connect()
if err != nil {
log.Printf("E! [agent] Failed to connect to output %s, retrying in 15s, "+
"error was '%s' \n", output.Name, err)
err := internal.SleepContext(ctx, 15*time.Second)
if err != nil {
return err
}
err = output.Output.Connect()
if err != nil {
return err
}
}
log.Printf("D! [agent] Successfully connected to output: %s\n", output.Name)
}
return nil
}
// closeOutputs closes all outputs.
func (a *Agent) closeOutputs() error {
var err error
for _, output := range a.Config.Outputs {
err = output.Output.Close()
}
return err
}
// startServiceInputs starts all service inputs.
func (a *Agent) startServiceInputs(
ctx context.Context,
dst chan<- telegraf.Metric,
) error {
started := []telegraf.ServiceInput{}
for _, input := range a.Config.Inputs {
if si, ok := input.Input.(telegraf.ServiceInput); ok {
// Service input plugins are not subject to timestamp rounding.
// This only applies to the accumulator passed to Start(), the
// Gather() accumulator does apply rounding according to the
// precision agent setting.
acc := NewAccumulator(input, dst)
acc.SetPrecision(time.Nanosecond, 0)
err := si.Start(acc)
if err != nil {
log.Printf("E! [agent] Service for input %s failed to start: %v",
input.Name(), err)
for _, si := range started {
si.Stop()
}
return err
}
started = append(started, si)
}
}
return nil
}
// stopServiceInputs stops all service inputs.
func (a *Agent) stopServiceInputs() {
for _, input := range a.Config.Inputs {
if si, ok := input.Input.(telegraf.ServiceInput); ok {
si.Stop()
}
}
}
// panicRecover displays an error if an input panics.
func panicRecover(input *models.RunningInput) {
if err := recover(); err != nil {
trace := make([]byte, 2048)
runtime.Stack(trace, true)
log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
input.Name(), err, trace)
log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
"stack trace, configuration, and OS information: " +
"https://github.com/influxdata/telegraf/issues/new/choose")
}
}

57
agent/tick.go Normal file
View File

@ -0,0 +1,57 @@
package agent
import (
"context"
"sync"
"time"
"github.com/influxdata/telegraf/internal"
)
type Ticker struct {
C chan time.Time
ticker *time.Ticker
jitter time.Duration
wg sync.WaitGroup
cancelFunc context.CancelFunc
}
func NewTicker(
interval time.Duration,
jitter time.Duration,
) *Ticker {
ctx, cancel := context.WithCancel(context.Background())
t := &Ticker{
C: make(chan time.Time, 1),
ticker: time.NewTicker(interval),
jitter: jitter,
cancelFunc: cancel,
}
t.wg.Add(1)
go t.relayTime(ctx)
return t
}
func (t *Ticker) Stop() {
t.cancelFunc()
t.wg.Wait()
}
func (t *Ticker) relayTime(ctx context.Context) {
defer t.wg.Done()
for {
select {
case tm := <-t.ticker.C:
internal.SleepContext(ctx, internal.RandomDuration(t.jitter))
select {
case t.C <- tm:
default:
}
case <-ctx.Done():
return
}
}
}

View File

@ -1,6 +1,8 @@
package main package main
import ( import (
"context"
"errors"
"flag" "flag"
"fmt" "fmt"
"log" "log"
@ -78,112 +80,111 @@ func reloadLoop(
for <-reload { for <-reload {
reload <- false reload <- false
// If no other options are specified, load the config file and run. ctx, cancel := context.WithCancel(context.Background())
c := config.NewConfig()
c.OutputFilters = outputFilters
c.InputFilters = inputFilters
err := c.LoadConfig(*fConfig)
if err != nil {
log.Fatal("E! " + err.Error())
}
if *fConfigDirectory != "" {
err = c.LoadDirectory(*fConfigDirectory)
if err != nil {
log.Fatal("E! " + err.Error())
}
}
if !*fTest && len(c.Outputs) == 0 {
log.Fatalf("E! Error: no outputs found, did you provide a valid config file?")
}
if len(c.Inputs) == 0 {
log.Fatalf("E! Error: no inputs found, did you provide a valid config file?")
}
if int64(c.Agent.Interval.Duration) <= 0 {
log.Fatalf("E! Agent interval must be positive, found %s",
c.Agent.Interval.Duration)
}
if int64(c.Agent.FlushInterval.Duration) <= 0 {
log.Fatalf("E! Agent flush_interval must be positive; found %s",
c.Agent.Interval.Duration)
}
ag, err := agent.NewAgent(c)
if err != nil {
log.Fatal("E! " + err.Error())
}
// Setup logging
logger.SetupLogging(
ag.Config.Agent.Debug || *fDebug,
ag.Config.Agent.Quiet || *fQuiet,
ag.Config.Agent.Logfile,
)
if *fTest {
err = ag.Test()
if err != nil {
log.Fatal("E! " + err.Error())
}
os.Exit(0)
}
err = ag.Connect()
if err != nil {
log.Fatal("E! " + err.Error())
}
shutdown := make(chan struct{})
signals := make(chan os.Signal) signals := make(chan os.Signal)
signal.Notify(signals, os.Interrupt, syscall.SIGHUP, syscall.SIGTERM) signal.Notify(signals, os.Interrupt, syscall.SIGHUP, syscall.SIGTERM)
go func() { go func() {
select { select {
case sig := <-signals: case sig := <-signals:
if sig == os.Interrupt || sig == syscall.SIGTERM {
close(shutdown)
}
if sig == syscall.SIGHUP { if sig == syscall.SIGHUP {
log.Printf("I! Reloading Telegraf config\n") log.Printf("I! Reloading Telegraf config")
<-reload <-reload
reload <- true reload <- true
close(shutdown)
} }
cancel()
case <-stop: case <-stop:
close(shutdown) cancel()
} }
}() }()
log.Printf("I! Starting Telegraf %s\n", version) err := runAgent(ctx, inputFilters, outputFilters)
log.Printf("I! Loaded inputs: %s", strings.Join(c.InputNames(), " ")) if err != nil {
log.Printf("I! Loaded aggregators: %s", strings.Join(c.AggregatorNames(), " ")) log.Fatalf("E! [telegraf] Error running agent: %v", err)
log.Printf("I! Loaded processors: %s", strings.Join(c.ProcessorNames(), " "))
log.Printf("I! Loaded outputs: %s", strings.Join(c.OutputNames(), " "))
log.Printf("I! Tags enabled: %s", c.ListTags())
if *fPidfile != "" {
f, err := os.OpenFile(*fPidfile, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Printf("E! Unable to create pidfile: %s", err)
} else {
fmt.Fprintf(f, "%d\n", os.Getpid())
f.Close()
defer func() {
err := os.Remove(*fPidfile)
if err != nil {
log.Printf("E! Unable to remove pidfile: %s", err)
}
}()
}
} }
ag.Run(shutdown)
} }
} }
func runAgent(ctx context.Context,
inputFilters []string,
outputFilters []string,
) error {
// If no other options are specified, load the config file and run.
c := config.NewConfig()
c.OutputFilters = outputFilters
c.InputFilters = inputFilters
err := c.LoadConfig(*fConfig)
if err != nil {
return err
}
if *fConfigDirectory != "" {
err = c.LoadDirectory(*fConfigDirectory)
if err != nil {
return err
}
}
if !*fTest && len(c.Outputs) == 0 {
return errors.New("Error: no outputs found, did you provide a valid config file?")
}
if len(c.Inputs) == 0 {
return errors.New("Error: no inputs found, did you provide a valid config file?")
}
if int64(c.Agent.Interval.Duration) <= 0 {
return fmt.Errorf("Agent interval must be positive, found %s",
c.Agent.Interval.Duration)
}
if int64(c.Agent.FlushInterval.Duration) <= 0 {
return fmt.Errorf("Agent flush_interval must be positive; found %s",
c.Agent.Interval.Duration)
}
ag, err := agent.NewAgent(c)
if err != nil {
return err
}
// Setup logging
logger.SetupLogging(
ag.Config.Agent.Debug || *fDebug,
ag.Config.Agent.Quiet || *fQuiet,
ag.Config.Agent.Logfile,
)
if *fTest {
return ag.Test()
}
log.Printf("I! Starting Telegraf %s\n", version)
log.Printf("I! Loaded inputs: %s", strings.Join(c.InputNames(), " "))
log.Printf("I! Loaded aggregators: %s", strings.Join(c.AggregatorNames(), " "))
log.Printf("I! Loaded processors: %s", strings.Join(c.ProcessorNames(), " "))
log.Printf("I! Loaded outputs: %s", strings.Join(c.OutputNames(), " "))
log.Printf("I! Tags enabled: %s", c.ListTags())
if *fPidfile != "" {
f, err := os.OpenFile(*fPidfile, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Printf("E! Unable to create pidfile: %s", err)
} else {
fmt.Fprintf(f, "%d\n", os.Getpid())
f.Close()
defer func() {
err := os.Remove(*fPidfile)
if err != nil {
log.Printf("E! Unable to remove pidfile: %s", err)
}
}()
}
}
return ag.Run(ctx)
}
func usageExit(rc int) { func usageExit(rc int) {
fmt.Println(internal.Usage) fmt.Println(internal.Usage)
os.Exit(rc) os.Exit(rc)

126
docs/AGGREGATORS.md Normal file
View File

@ -0,0 +1,126 @@
### Aggregator Plugins
This section is for developers who want to create a new aggregator plugin.
### Aggregator Plugin Guidelines
* A aggregator must conform to the [telegraf.Aggregator][] interface.
* Aggregators should call `aggregators.Add` in their `init` function to
register themselves. See below for a quick example.
* To be available within Telegraf itself, plugins must add themselves to the
`github.com/influxdata/telegraf/plugins/aggregators/all/all.go` file.
- The `SampleConfig` function should return valid toml that describes how the
plugin can be configured. This is included in `telegraf config`. Please
consult the [SampleConfig][] page for the latest style guidelines.
* The `Description` function should say in one line what this aggregator does.
* The Aggregator plugin will need to keep caches of metrics that have passed
through it. This should be done using the builtin `HashID()` function of
each metric.
* When the `Reset()` function is called, all caches should be cleared.
### Aggregator Plugin Example
```go
package min
// min.go
import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/aggregators"
)
type Min struct {
// caches for metric fields, names, and tags
fieldCache map[uint64]map[string]float64
nameCache map[uint64]string
tagCache map[uint64]map[string]string
}
func NewMin() telegraf.Aggregator {
m := &Min{}
m.Reset()
return m
}
var sampleConfig = `
## period is the flush & clear interval of the aggregator.
period = "30s"
## If true drop_original will drop the original metrics and
## only send aggregates.
drop_original = false
`
func (m *Min) SampleConfig() string {
return sampleConfig
}
func (m *Min) Description() string {
return "Keep the aggregate min of each metric passing through."
}
func (m *Min) Add(in telegraf.Metric) {
id := in.HashID()
if _, ok := m.nameCache[id]; !ok {
// hit an uncached metric, create caches for first time:
m.nameCache[id] = in.Name()
m.tagCache[id] = in.Tags()
m.fieldCache[id] = make(map[string]float64)
for k, v := range in.Fields() {
if fv, ok := convert(v); ok {
m.fieldCache[id][k] = fv
}
}
} else {
for k, v := range in.Fields() {
if fv, ok := convert(v); ok {
if _, ok := m.fieldCache[id][k]; !ok {
// hit an uncached field of a cached metric
m.fieldCache[id][k] = fv
continue
}
if fv < m.fieldCache[id][k] {
// set new minimum
m.fieldCache[id][k] = fv
}
}
}
}
}
func (m *Min) Push(acc telegraf.Accumulator) {
for id, _ := range m.nameCache {
fields := map[string]interface{}{}
for k, v := range m.fieldCache[id] {
fields[k+"_min"] = v
}
acc.AddFields(m.nameCache[id], fields, m.tagCache[id])
}
}
func (m *Min) Reset() {
m.fieldCache = make(map[uint64]map[string]float64)
m.nameCache = make(map[uint64]string)
m.tagCache = make(map[uint64]map[string]string)
}
func convert(in interface{}) (float64, bool) {
switch v := in.(type) {
case float64:
return v, true
case int64:
return float64(v), true
default:
return 0, false
}
}
func init() {
aggregators.Add("min", func() telegraf.Aggregator {
return NewMin()
})
}
```
[telegraf.Aggregator]: https://godoc.org/github.com/influxdata/telegraf#Aggregator
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig

View File

@ -106,6 +106,14 @@ emitted from the input plugin.
### Output Configuration ### Output Configuration
- **flush_interval**: The maximum time between flushes. Use this setting to
override the agent `flush_interval` on a per plugin basis.
- **metric_batch_size**: The maximum number of metrics to send at once. Use
this setting to override the agent `metric_batch_size` on a per plugin basis.
- **metric_buffer_limit**: The maximum number of unsent metrics to buffer.
Use this setting to override the agent `metric_buffer_limit` on a per plugin
basis.
The [metric filtering](#metric-filtering) parameters can be used to limit what metrics are The [metric filtering](#metric-filtering) parameters can be used to limit what metrics are
emitted from the output plugin. emitted from the output plugin.

143
docs/INPUTS.md Normal file
View File

@ -0,0 +1,143 @@
### Input Plugins
This section is for developers who want to create new collection inputs.
Telegraf is entirely plugin driven. This interface allows for operators to
pick and chose what is gathered and makes it easy for developers
to create new ways of generating metrics.
Plugin authorship is kept as simple as possible to promote people to develop
and submit new inputs.
### Input Plugin Guidelines
- A plugin must conform to the [telegraf.Input][] interface.
- Input Plugins should call `inputs.Add` in their `init` function to register
themselves. See below for a quick example.
- Input Plugins must be added to the
`github.com/influxdata/telegraf/plugins/inputs/all/all.go` file.
- The `SampleConfig` function should return valid toml that describes how the
plugin can be configured. This is included in `telegraf config`. Please
consult the [SampleConfig][] page for the latest style
guidelines.
- The `Description` function should say in one line what this plugin does.
Let's say you've written a plugin that emits metrics about processes on the
current host.
### Input Plugin Example
```go
package simple
// simple.go
import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
type Simple struct {
Ok bool
}
func (s *Simple) Description() string {
return "a demo plugin"
}
func (s *Simple) SampleConfig() string {
return `
## Indicate if everything is fine
ok = true
`
}
func (s *Simple) Gather(acc telegraf.Accumulator) error {
if s.Ok {
acc.AddFields("state", map[string]interface{}{"value": "pretty good"}, nil)
} else {
acc.AddFields("state", map[string]interface{}{"value": "not great"}, nil)
}
return nil
}
func init() {
inputs.Add("simple", func() telegraf.Input { return &Simple{} })
}
```
### Development
* Run `make static` followed by `make plugin-[pluginName]` to spin up a docker
dev environment using docker-compose.
* ***[Optional]*** When developing a plugin, add a `dev` directory with a
`docker-compose.yml` and `telegraf.conf` as well as any other supporting
files, where sensible.
### Typed Metrics
In addition the the `AddFields` function, the accumulator also supports
functions to add typed metrics: `AddGauge`, `AddCounter`, etc. Metric types
are ignored by the InfluxDB output, but can be used for other outputs, such as
[prometheus][prom metric types].
### Data Formats
Some input plugins, such as the [exec][] plugin, can accept any supported
[input data formats][].
In order to enable this, you must specify a `SetParser(parser parsers.Parser)`
function on the plugin object (see the exec plugin for an example), as well as
defining `parser` as a field of the object.
You can then utilize the parser internally in your plugin, parsing data as you
see fit. Telegraf's configuration layer will take care of instantiating and
creating the `Parser` object.
Add the following to the `SampleConfig()`:
```toml
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
```
### Service Input Plugins
This section is for developers who want to create new "service" collection
inputs. A service plugin differs from a regular plugin in that it operates a
background service while Telegraf is running. One example would be the
`statsd` plugin, which operates a statsd server.
Service Input Plugins are substantially more complicated than a regular
plugin, as they will require threads and locks to verify data integrity.
Service Input Plugins should be avoided unless there is no way to create their
behavior with a regular plugin.
To create a Service Input implement the [telegraf.ServiceInput][] interface.
### Metric Tracking
Metric Tracking provides a system to be notified when metrics have been
successfully written to their outputs or otherwise discarded. This allows
inputs to be created that function as reliable queue consumers.
To get started with metric tracking begin by calling `WithTracking` on the
[telegraf.Accumulator][]. Add metrics using the `AddTrackingMetricGroup`
function on the returned [telegraf.TrackingAccumulator][] and store the
`TrackingID`. The `Delivered()` channel will return a type with information
about the final delivery status of the metric group.
Check the [amqp_consumer][] for an example implementation.
[exec]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec
[amqp_consumer]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/amqp_consumer
[prom metric types]: https://prometheus.io/docs/concepts/metric_types/
[input data formats]: https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
[telegraf.Input]: https://godoc.org/github.com/influxdata/telegraf#Input
[telegraf.ServiceInput]: https://godoc.org/github.com/influxdata/telegraf#ServiceInput
[telegraf.Accumulator]: https://godoc.org/github.com/influxdata/telegraf#Accumulator
[telegraf.TrackingAccumulator]: https://godoc.org/github.com/influxdata/telegraf#Accumulator

95
docs/OUTPUTS.md Normal file
View File

@ -0,0 +1,95 @@
### Output Plugins
This section is for developers who want to create a new output sink. Outputs
are created in a similar manner as collection plugins, and their interface has
similar constructs.
### Output Plugin Guidelines
- An output must conform to the [telegraf.Output][] interface.
- Outputs should call `outputs.Add` in their `init` function to register
themselves. See below for a quick example.
- To be available within Telegraf itself, plugins must add themselves to the
`github.com/influxdata/telegraf/plugins/outputs/all/all.go` file.
- The `SampleConfig` function should return valid toml that describes how the
plugin can be configured. This is included in `telegraf config`. Please
consult the [SampleConfig][] page for the latest style guidelines.
- The `Description` function should say in one line what this output does.
### Output Plugin Example
```go
package simpleoutput
// simpleoutput.go
import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/outputs"
)
type Simple struct {
Ok bool
}
func (s *Simple) Description() string {
return "a demo output"
}
func (s *Simple) SampleConfig() string {
return `
ok = true
`
}
func (s *Simple) Connect() error {
// Make a connection to the URL here
return nil
}
func (s *Simple) Close() error {
// Close connection to the URL here
return nil
}
func (s *Simple) Write(metrics []telegraf.Metric) error {
for _, metric := range metrics {
// write `metric` to the output sink here
}
return nil
}
func init() {
outputs.Add("simpleoutput", func() telegraf.Output { return &Simple{} })
}
```
## Data Formats
Some output plugins, such as the [file][] plugin, can write in any supported
[output data formats][].
In order to enable this, you must specify a
`SetSerializer(serializer serializers.Serializer)`
function on the plugin object (see the file plugin for an example), as well as
defining `serializer` as a field of the object.
You can then utilize the serializer internally in your plugin, serializing data
before it's written. Telegraf's configuration layer will take care of
instantiating and creating the `Serializer` object.
You should also add the following to your `SampleConfig()`:
```toml
## Data format to output.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
data_format = "influx"
```
[file]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/file
[output data formats]: https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
[telegraf.Output]: https://godoc.org/github.com/influxdata/telegraf#Output

63
docs/PROCESSORS.md Normal file
View File

@ -0,0 +1,63 @@
### Processor Plugins
This section is for developers who want to create a new processor plugin.
### Processor Plugin Guidelines
* A processor must conform to the [telegraf.Processor][] interface.
* Processors should call `processors.Add` in their `init` function to register
themselves. See below for a quick example.
* To be available within Telegraf itself, plugins must add themselves to the
`github.com/influxdata/telegraf/plugins/processors/all/all.go` file.
* The `SampleConfig` function should return valid toml that describes how the
processor can be configured. This is include in the output of `telegraf
config`.
- The `SampleConfig` function should return valid toml that describes how the
plugin can be configured. This is included in `telegraf config`. Please
consult the [SampleConfig][] page for the latest style guidelines.
* The `Description` function should say in one line what this processor does.
### Processor Plugin Example
```go
package printer
// printer.go
import (
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/processors"
)
type Printer struct {
}
var sampleConfig = `
`
func (p *Printer) SampleConfig() string {
return sampleConfig
}
func (p *Printer) Description() string {
return "Print all metrics that pass through this filter."
}
func (p *Printer) Apply(in ...telegraf.Metric) []telegraf.Metric {
for _, metric := range in {
fmt.Println(metric.String())
}
return in
}
func init() {
processors.Add("printer", func() telegraf.Processor {
return &Printer{}
})
}
```
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
[telegraf.Processor]: https://godoc.org/github.com/influxdata/telegraf#Processor

View File

@ -13,17 +13,10 @@ type Input interface {
} }
type ServiceInput interface { type ServiceInput interface {
// SampleConfig returns the default configuration of the Input Input
SampleConfig() string
// Description returns a one-sentence description on the Input // Start the ServiceInput. The Accumulator may be retained and used until
Description() string // Stop returns.
// Gather takes in an accumulator and adds the metrics that the Input
// gathers. This is called every "interval"
Gather(Accumulator) error
// Start starts the ServiceInput's service, whatever that may be
Start(Accumulator) error Start(Accumulator) error
// Stop stops the services and closes any necessary channels and connections // Stop stops the services and closes any necessary channels and connections

View File

@ -1,130 +0,0 @@
package buffer
import (
"sync"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/selfstat"
)
var (
MetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
MetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
)
// Buffer is an object for storing metrics in a circular buffer.
type Buffer struct {
sync.Mutex
buf []telegraf.Metric
first int
last int
size int
empty bool
}
// NewBuffer returns a Buffer
// size is the maximum number of metrics that Buffer will cache. If Add is
// called when the buffer is full, then the oldest metric(s) will be dropped.
func NewBuffer(size int) *Buffer {
return &Buffer{
buf: make([]telegraf.Metric, size),
first: 0,
last: 0,
size: size,
empty: true,
}
}
// IsEmpty returns true if Buffer is empty.
func (b *Buffer) IsEmpty() bool {
return b.empty
}
// Len returns the current length of the buffer.
func (b *Buffer) Len() int {
if b.empty {
return 0
} else if b.first <= b.last {
return b.last - b.first + 1
}
// Spans the end of array.
// size - gap in the middle
return b.size - (b.first - b.last - 1) // size - gap
}
func (b *Buffer) push(m telegraf.Metric) {
// Empty
if b.empty {
b.last = b.first // Reset
b.buf[b.last] = m
b.empty = false
return
}
b.last++
b.last %= b.size
// Full
if b.first == b.last {
MetricsDropped.Incr(1)
b.first = (b.first + 1) % b.size
}
b.buf[b.last] = m
}
// Add adds metrics to the buffer.
func (b *Buffer) Add(metrics ...telegraf.Metric) {
b.Lock()
defer b.Unlock()
for i := range metrics {
MetricsWritten.Incr(1)
b.push(metrics[i])
}
}
// Batch returns a batch of metrics of size batchSize.
// the batch will be of maximum length batchSize. It can be less than batchSize,
// if the length of Buffer is less than batchSize.
func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
b.Lock()
defer b.Unlock()
outLen := min(b.Len(), batchSize)
out := make([]telegraf.Metric, outLen)
if outLen == 0 {
return out
}
// We copy everything right of first up to last, count or end
// b.last >= rightInd || b.last < b.first
// therefore wont copy past b.last
rightInd := min(b.size, b.first+outLen) - 1
copyCount := copy(out, b.buf[b.first:rightInd+1])
// We've emptied the ring
if rightInd == b.last {
b.empty = true
}
b.first = rightInd + 1
b.first %= b.size
// We circle back for the rest
if copyCount < outLen {
right := min(b.last, outLen-copyCount)
copy(out[copyCount:], b.buf[b.first:right+1])
// We've emptied the ring
if right == b.last {
b.empty = true
}
b.first = right + 1
b.first %= b.size
}
return out
}
func min(a, b int) int {
if b < a {
return b
}
return a
}

View File

@ -1,203 +0,0 @@
package buffer
import (
"sync"
"sync/atomic"
"testing"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
var metricList = []telegraf.Metric{
testutil.TestMetric(2, "mymetric1"),
testutil.TestMetric(1, "mymetric2"),
testutil.TestMetric(11, "mymetric3"),
testutil.TestMetric(15, "mymetric4"),
testutil.TestMetric(8, "mymetric5"),
}
func makeBench5(b *testing.B, freq, batchSize int) {
const k = 1000
var wg sync.WaitGroup
buf := NewBuffer(10000)
m := testutil.TestMetric(1, "mymetric")
for i := 0; i < b.N; i++ {
buf.Add(m, m, m, m, m)
if i%(freq*k) == 0 {
wg.Add(1)
go func() {
buf.Batch(batchSize * k)
wg.Done()
}()
}
}
// Flush
buf.Batch(b.N)
wg.Wait()
}
func makeBenchStrict(b *testing.B, freq, batchSize int) {
const k = 1000
var count uint64
var wg sync.WaitGroup
buf := NewBuffer(10000)
m := testutil.TestMetric(1, "mymetric")
for i := 0; i < b.N; i++ {
buf.Add(m)
if i%(freq*k) == 0 {
wg.Add(1)
go func() {
defer wg.Done()
l := len(buf.Batch(batchSize * k))
atomic.AddUint64(&count, uint64(l))
}()
}
}
// Flush
wg.Add(1)
go func() {
l := len(buf.Batch(b.N))
atomic.AddUint64(&count, uint64(l))
wg.Done()
}()
wg.Wait()
if count != uint64(b.N) {
b.Errorf("not all metrics came out. %d of %d", count, b.N)
}
}
func makeBench(b *testing.B, freq, batchSize int) {
const k = 1000
var wg sync.WaitGroup
buf := NewBuffer(10000)
m := testutil.TestMetric(1, "mymetric")
for i := 0; i < b.N; i++ {
buf.Add(m)
if i%(freq*k) == 0 {
wg.Add(1)
go func() {
buf.Batch(batchSize * k)
wg.Done()
}()
}
}
wg.Wait()
// Flush
buf.Batch(b.N)
}
func BenchmarkBufferBatch5Add(b *testing.B) {
makeBench5(b, 100, 101)
}
func BenchmarkBufferBigInfrequentBatchCatchup(b *testing.B) {
makeBench(b, 100, 101)
}
func BenchmarkBufferOftenBatch(b *testing.B) {
makeBench(b, 1, 1)
}
func BenchmarkBufferAlmostBatch(b *testing.B) {
makeBench(b, 10, 9)
}
func BenchmarkBufferSlowBatch(b *testing.B) {
makeBench(b, 10, 1)
}
func BenchmarkBufferBatchNoDrop(b *testing.B) {
makeBenchStrict(b, 1, 4)
}
func BenchmarkBufferCatchup(b *testing.B) {
buf := NewBuffer(10000)
m := testutil.TestMetric(1, "mymetric")
for i := 0; i < b.N; i++ {
buf.Add(m)
}
buf.Batch(b.N)
}
func BenchmarkAddMetrics(b *testing.B) {
buf := NewBuffer(10000)
m := testutil.TestMetric(1, "mymetric")
for n := 0; n < b.N; n++ {
buf.Add(m)
}
}
func TestNewBufferBasicFuncs(t *testing.T) {
b := NewBuffer(10)
MetricsDropped.Set(0)
MetricsWritten.Set(0)
assert.True(t, b.IsEmpty())
assert.Zero(t, b.Len())
assert.Zero(t, MetricsDropped.Get())
assert.Zero(t, MetricsWritten.Get())
m := testutil.TestMetric(1, "mymetric")
b.Add(m)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 1)
assert.Equal(t, int64(0), MetricsDropped.Get())
assert.Equal(t, int64(1), MetricsWritten.Get())
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 6)
assert.Equal(t, int64(0), MetricsDropped.Get())
assert.Equal(t, int64(6), MetricsWritten.Get())
}
func TestDroppingMetrics(t *testing.T) {
b := NewBuffer(10)
MetricsDropped.Set(0)
MetricsWritten.Set(0)
// Add up to the size of the buffer
b.Add(metricList...)
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 10)
assert.Equal(t, int64(0), MetricsDropped.Get())
assert.Equal(t, int64(10), MetricsWritten.Get())
// Add 5 more and verify they were dropped
b.Add(metricList...)
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 10)
assert.Equal(t, int64(5), MetricsDropped.Get())
assert.Equal(t, int64(15), MetricsWritten.Get())
}
func TestGettingBatches(t *testing.T) {
b := NewBuffer(20)
MetricsDropped.Set(0)
MetricsWritten.Set(0)
// Verify that the buffer returned is smaller than requested when there are
// not as many items as requested.
b.Add(metricList...)
batch := b.Batch(10)
assert.Len(t, batch, 5)
// Verify that the buffer is now empty
assert.True(t, b.IsEmpty())
assert.Zero(t, b.Len())
assert.Zero(t, MetricsDropped.Get())
assert.Equal(t, int64(5), MetricsWritten.Get())
// Verify that the buffer returned is not more than the size requested
b.Add(metricList...)
batch = b.Batch(3)
assert.Len(t, batch, 3)
// Verify that buffer is not empty
assert.False(t, b.IsEmpty())
assert.Equal(t, b.Len(), 2)
assert.Equal(t, int64(0), MetricsDropped.Get())
assert.Equal(t, int64(10), MetricsWritten.Get())
}

View File

@ -9,7 +9,6 @@ import (
"math" "math"
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
"runtime" "runtime"
"sort" "sort"
@ -26,7 +25,6 @@ import (
"github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/plugins/processors" "github.com/influxdata/telegraf/plugins/processors"
"github.com/influxdata/telegraf/plugins/serializers" "github.com/influxdata/telegraf/plugins/serializers"
"github.com/influxdata/toml" "github.com/influxdata/toml"
"github.com/influxdata/toml/ast" "github.com/influxdata/toml/ast"
) )
@ -622,6 +620,19 @@ func (c *Config) LoadConfig(path string) error {
} }
} }
if !c.Agent.OmitHostname {
if c.Agent.Hostname == "" {
hostname, err := os.Hostname()
if err != nil {
return err
}
c.Agent.Hostname = hostname
}
c.Tags["host"] = c.Agent.Hostname
}
// Parse all the rest of the plugins: // Parse all the rest of the plugins:
for name, val := range tbl.Fields { for name, val := range tbl.Fields {
subTable, ok := val.(*ast.Table) subTable, ok := val.(*ast.Table)
@ -709,6 +720,7 @@ func (c *Config) LoadConfig(path string) error {
if len(c.Processors) > 1 { if len(c.Processors) > 1 {
sort.Sort(c.Processors) sort.Sort(c.Processors)
} }
return nil return nil
} }
@ -876,6 +888,7 @@ func (c *Config) addInput(name string, table *ast.Table) error {
} }
rp := models.NewRunningInput(input, pluginConfig) rp := models.NewRunningInput(input, pluginConfig)
rp.SetDefaultTags(c.Tags)
c.Inputs = append(c.Inputs, rp) c.Inputs = append(c.Inputs, rp)
return nil return nil
} }
@ -1751,6 +1764,8 @@ func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
Name: name, Name: name,
Filter: filter, Filter: filter,
} }
// TODO
// Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass // Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass
if len(oc.Filter.FieldDrop) > 0 { if len(oc.Filter.FieldDrop) > 0 {
oc.Filter.NameDrop = oc.Filter.FieldDrop oc.Filter.NameDrop = oc.Filter.FieldDrop
@ -1758,5 +1773,47 @@ func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
if len(oc.Filter.FieldPass) > 0 { if len(oc.Filter.FieldPass) > 0 {
oc.Filter.NamePass = oc.Filter.FieldPass oc.Filter.NamePass = oc.Filter.FieldPass
} }
if node, ok := tbl.Fields["flush_interval"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if str, ok := kv.Value.(*ast.String); ok {
dur, err := time.ParseDuration(str.Value)
if err != nil {
return nil, err
}
oc.FlushInterval = dur
}
}
}
if node, ok := tbl.Fields["metric_buffer_limit"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if integer, ok := kv.Value.(*ast.Integer); ok {
v, err := integer.Int()
if err != nil {
return nil, err
}
oc.MetricBufferLimit = int(v)
}
}
}
if node, ok := tbl.Fields["metric_batch_size"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if integer, ok := kv.Value.(*ast.Integer); ok {
v, err := integer.Int()
if err != nil {
return nil, err
}
oc.MetricBatchSize = int(v)
}
}
}
delete(tbl.Fields, "flush_interval")
delete(tbl.Fields, "metric_buffer_limit")
delete(tbl.Fields, "metric_batch_size")
return oc, nil return oc, nil
} }

View File

@ -4,6 +4,7 @@ import (
"bufio" "bufio"
"bytes" "bytes"
"compress/gzip" "compress/gzip"
"context"
"crypto/rand" "crypto/rand"
"errors" "errors"
"io" "io"
@ -246,6 +247,51 @@ func RandomSleep(max time.Duration, shutdown chan struct{}) {
} }
} }
// RandomDuration returns a random duration between 0 and max.
func RandomDuration(max time.Duration) time.Duration {
if max == 0 {
return 0
}
var sleepns int64
maxSleep := big.NewInt(max.Nanoseconds())
if j, err := rand.Int(rand.Reader, maxSleep); err == nil {
sleepns = j.Int64()
}
return time.Duration(sleepns)
}
// SleepContext sleeps until the context is closed or the duration is reached.
func SleepContext(ctx context.Context, duration time.Duration) error {
if duration == 0 {
return nil
}
t := time.NewTimer(duration)
select {
case <-t.C:
return nil
case <-ctx.Done():
t.Stop()
return ctx.Err()
}
}
// AlignDuration returns the duration until next aligned interval.
func AlignDuration(tm time.Time, interval time.Duration) time.Duration {
return AlignTime(tm, interval).Sub(tm)
}
// AlignTime returns the time of the next aligned interval.
func AlignTime(tm time.Time, interval time.Duration) time.Time {
truncated := tm.Truncate(interval)
if truncated == tm {
return tm
}
return truncated.Add(interval)
}
// Exit status takes the error from exec.Command // Exit status takes the error from exec.Command
// and returns the exit status and true // and returns the exit status and true
// if error is not exit status, will return 0 and false // if error is not exit status, will return 0 and false

View File

@ -9,6 +9,7 @@ import (
"time" "time"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
type SnakeTest struct { type SnakeTest struct {
@ -217,3 +218,55 @@ func TestVersionAlreadySet(t *testing.T) {
assert.Equal(t, "foo", Version()) assert.Equal(t, "foo", Version())
} }
func TestAlignDuration(t *testing.T) {
tests := []struct {
name string
now time.Time
interval time.Duration
expected time.Duration
}{
{
name: "aligned",
now: time.Date(2018, 1, 1, 1, 1, 0, 0, time.UTC),
interval: 10 * time.Second,
expected: 0 * time.Second,
},
{
name: "standard interval",
now: time.Date(2018, 1, 1, 1, 1, 1, 0, time.UTC),
interval: 10 * time.Second,
expected: 9 * time.Second,
},
{
name: "odd interval",
now: time.Date(2018, 1, 1, 1, 1, 1, 0, time.UTC),
interval: 3 * time.Second,
expected: 2 * time.Second,
},
{
name: "sub second interval",
now: time.Date(2018, 1, 1, 1, 1, 0, 5e8, time.UTC),
interval: 1 * time.Second,
expected: 500 * time.Millisecond,
},
{
name: "non divisible not aligned on minutes",
now: time.Date(2018, 1, 1, 1, 0, 0, 0, time.UTC),
interval: 1*time.Second + 100*time.Millisecond,
expected: 400 * time.Millisecond,
},
{
name: "long interval",
now: time.Date(2018, 1, 1, 1, 1, 0, 0, time.UTC),
interval: 1 * time.Hour,
expected: 59 * time.Minute,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := AlignDuration(tt.now, tt.interval)
require.Equal(t, tt.expected, actual)
})
}
}

214
internal/models/buffer.go Normal file
View File

@ -0,0 +1,214 @@
package models
import (
"sync"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/selfstat"
)
var (
AgentMetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
AgentMetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
)
// Buffer stores metrics in a circular buffer.
type Buffer struct {
sync.Mutex
buf []telegraf.Metric
first int // index of the first/oldest metric
last int // one after the index of the last/newest metric
size int // number of metrics currently in the buffer
cap int // the capacity of the buffer
batchFirst int // index of the first metric in the batch
batchLast int // one after the index of the last metric in the batch
batchSize int // number of metrics current in the batch
MetricsAdded selfstat.Stat
MetricsWritten selfstat.Stat
MetricsDropped selfstat.Stat
}
// NewBuffer returns a new empty Buffer with the given capacity.
func NewBuffer(name string, capacity int) *Buffer {
b := &Buffer{
buf: make([]telegraf.Metric, capacity),
first: 0,
last: 0,
size: 0,
cap: capacity,
MetricsAdded: selfstat.Register(
"write",
"metrics_added",
map[string]string{"output": name},
),
MetricsWritten: selfstat.Register(
"write",
"metrics_written",
map[string]string{"output": name},
),
MetricsDropped: selfstat.Register(
"write",
"metrics_dropped",
map[string]string{"output": name},
),
}
return b
}
// Len returns the number of metrics currently in the buffer.
func (b *Buffer) Len() int {
b.Lock()
defer b.Unlock()
return b.size
}
func (b *Buffer) metricAdded() {
b.MetricsAdded.Incr(1)
}
func (b *Buffer) metricWritten(metric telegraf.Metric) {
AgentMetricsWritten.Incr(1)
b.MetricsWritten.Incr(1)
metric.Accept()
}
func (b *Buffer) metricDropped(metric telegraf.Metric) {
AgentMetricsDropped.Incr(1)
b.MetricsDropped.Incr(1)
metric.Reject()
}
func (b *Buffer) inBatch() bool {
if b.batchSize == 0 {
return false
}
if b.batchFirst < b.batchLast {
return b.last >= b.batchFirst && b.last < b.batchLast
} else {
return b.last >= b.batchFirst || b.last < b.batchLast
}
}
func (b *Buffer) add(m telegraf.Metric) {
// Check if Buffer is full
if b.size == b.cap {
if b.batchSize == 0 {
// No batch taken by the output, we can drop the metric now.
b.metricDropped(b.buf[b.last])
} else if b.inBatch() {
// There is an outstanding batch and this will overwrite a metric
// in it, delay the dropping only in case the batch gets rejected.
b.batchSize--
b.batchFirst++
b.batchFirst %= b.cap
} else {
// There is an outstanding batch, but this overwrites a metric
// outside of it.
b.metricDropped(b.buf[b.last])
}
}
b.metricAdded()
b.buf[b.last] = m
b.last++
b.last %= b.cap
if b.size == b.cap {
b.first++
b.first %= b.cap
}
b.size = min(b.size+1, b.cap)
}
// Add adds metrics to the buffer
func (b *Buffer) Add(metrics ...telegraf.Metric) {
b.Lock()
defer b.Unlock()
for i := range metrics {
b.add(metrics[i])
}
}
// Batch returns a slice containing up to batchSize of the most recently added
// metrics.
//
// The metrics contained in the batch are not removed from the buffer, instead
// the last batch is recorded and removed only if Accept is called.
func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
b.Lock()
defer b.Unlock()
outLen := min(b.size, batchSize)
out := make([]telegraf.Metric, outLen)
if outLen == 0 {
return out
}
b.batchFirst = b.first
b.batchLast = b.first + outLen
b.batchLast %= b.cap
b.batchSize = outLen
until := min(b.cap, b.first+outLen)
n := copy(out, b.buf[b.first:until])
if n < outLen {
copy(out[n:], b.buf[:outLen-n])
}
return out
}
// Accept removes the metrics contained in the last batch.
func (b *Buffer) Accept(batch []telegraf.Metric) {
b.Lock()
defer b.Unlock()
for _, m := range batch {
b.metricWritten(m)
}
if b.batchSize > 0 {
b.size -= b.batchSize
b.first += b.batchSize
b.first %= b.cap
}
b.resetBatch()
}
// Reject clears the current batch record so that calls to Accept will have no
// effect.
func (b *Buffer) Reject(batch []telegraf.Metric) {
b.Lock()
defer b.Unlock()
if len(batch) > b.batchSize {
// Part or all of the batch was dropped before reject was called.
for _, m := range batch[b.batchSize:] {
b.metricDropped(m)
}
}
b.resetBatch()
}
func (b *Buffer) resetBatch() {
b.batchFirst = 0
b.batchLast = 0
b.batchSize = 0
}
func min(a, b int) int {
if b < a {
return b
}
return a
}

View File

@ -0,0 +1,385 @@
package models
import (
"testing"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/stretchr/testify/require"
)
type MockMetric struct {
telegraf.Metric
AcceptF func()
RejectF func()
DropF func()
}
func (m *MockMetric) Accept() {
m.AcceptF()
}
func (m *MockMetric) Reject() {
m.RejectF()
}
func (m *MockMetric) Drop() {
m.DropF()
}
func Metric() telegraf.Metric {
m, err := metric.New(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42.0,
},
time.Unix(0, 0),
)
if err != nil {
panic(err)
}
return m
}
func BenchmarkAddMetrics(b *testing.B) {
buf := NewBuffer("test", 10000)
m := Metric()
for n := 0; n < b.N; n++ {
buf.Add(m)
}
}
func setup(b *Buffer) *Buffer {
b.MetricsAdded.Set(0)
b.MetricsWritten.Set(0)
b.MetricsDropped.Set(0)
return b
}
func TestBuffer_LenEmpty(t *testing.T) {
b := setup(NewBuffer("test", 5))
require.Equal(t, 0, b.Len())
}
func TestBuffer_LenOne(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m)
require.Equal(t, 1, b.Len())
}
func TestBuffer_LenFull(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
require.Equal(t, 5, b.Len())
}
func TestBuffer_LenOverfill(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
setup(b)
b.Add(m, m, m, m, m, m)
require.Equal(t, 5, b.Len())
}
func TestBuffer_BatchLenZero(t *testing.T) {
b := setup(NewBuffer("test", 5))
batch := b.Batch(0)
require.Len(t, batch, 0)
}
func TestBuffer_BatchLenBufferEmpty(t *testing.T) {
b := setup(NewBuffer("test", 5))
batch := b.Batch(2)
require.Len(t, batch, 0)
}
func TestBuffer_BatchLenUnderfill(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m)
batch := b.Batch(2)
require.Len(t, batch, 1)
}
func TestBuffer_BatchLenFill(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m)
batch := b.Batch(2)
require.Len(t, batch, 2)
}
func TestBuffer_BatchLenExact(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m)
batch := b.Batch(2)
require.Len(t, batch, 2)
}
func TestBuffer_BatchLenLargerThanBuffer(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(6)
require.Len(t, batch, 5)
}
func TestBuffer_BatchWrap(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(2)
b.Accept(batch)
b.Add(m, m)
batch = b.Batch(5)
require.Len(t, batch, 5)
}
func TestBuffer_AddDropsOverwrittenMetrics(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
b.Add(m, m, m, m, m)
require.Equal(t, int64(5), b.MetricsDropped.Get())
require.Equal(t, int64(0), b.MetricsWritten.Get())
}
func TestBuffer_AcceptRemovesBatch(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m)
batch := b.Batch(2)
b.Accept(batch)
require.Equal(t, 1, b.Len())
}
func TestBuffer_RejectLeavesBatch(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m)
batch := b.Batch(2)
b.Reject(batch)
require.Equal(t, 3, b.Len())
}
func TestBuffer_AcceptWritesOverwrittenBatch(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(5)
b.Add(m, m, m, m, m)
b.Accept(batch)
require.Equal(t, int64(0), b.MetricsDropped.Get())
require.Equal(t, int64(5), b.MetricsWritten.Get())
}
func TestBuffer_BatchRejectDropsOverwrittenBatch(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(5)
b.Add(m, m, m, m, m)
b.Reject(batch)
require.Equal(t, int64(5), b.MetricsDropped.Get())
require.Equal(t, int64(0), b.MetricsWritten.Get())
}
func TestBuffer_MetricsOverwriteBatchAccept(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(3)
b.Add(m, m, m)
b.Accept(batch)
require.Equal(t, int64(0), b.MetricsDropped.Get())
require.Equal(t, int64(3), b.MetricsWritten.Get())
}
func TestBuffer_MetricsOverwriteBatchReject(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(3)
b.Add(m, m, m)
b.Reject(batch)
require.Equal(t, int64(3), b.MetricsDropped.Get())
require.Equal(t, int64(0), b.MetricsWritten.Get())
}
func TestBuffer_MetricsBatchAcceptRemoved(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(3)
b.Add(m, m, m, m, m)
b.Accept(batch)
require.Equal(t, int64(2), b.MetricsDropped.Get())
require.Equal(t, int64(3), b.MetricsWritten.Get())
}
func TestBuffer_WrapWithBatch(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m)
b.Batch(3)
b.Add(m, m, m, m, m, m)
require.Equal(t, int64(1), b.MetricsDropped.Get())
}
func TestBuffer_BatchNotRemoved(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
b.Batch(2)
require.Equal(t, 5, b.Len())
}
func TestBuffer_BatchRejectAcceptNoop(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(2)
b.Reject(batch)
b.Accept(batch)
require.Equal(t, 5, b.Len())
}
func TestBuffer_AcceptCallsMetricAccept(t *testing.T) {
var accept int
mm := &MockMetric{
Metric: Metric(),
AcceptF: func() {
accept++
},
}
b := setup(NewBuffer("test", 5))
b.Add(mm, mm, mm)
batch := b.Batch(2)
b.Accept(batch)
require.Equal(t, 2, accept)
}
func TestBuffer_AddCallsMetricRejectWhenNoBatch(t *testing.T) {
var reject int
mm := &MockMetric{
Metric: Metric(),
RejectF: func() {
reject++
},
}
b := setup(NewBuffer("test", 5))
setup(b)
b.Add(mm, mm, mm, mm, mm)
b.Add(mm, mm)
require.Equal(t, 2, reject)
}
func TestBuffer_AddCallsMetricRejectWhenNotInBatch(t *testing.T) {
var reject int
mm := &MockMetric{
Metric: Metric(),
RejectF: func() {
reject++
},
}
b := setup(NewBuffer("test", 5))
setup(b)
b.Add(mm, mm, mm, mm, mm)
batch := b.Batch(2)
b.Add(mm, mm, mm, mm)
// metric[2] and metric[3] rejected
require.Equal(t, 2, reject)
b.Reject(batch)
// metric[1] and metric[2] now rejected
require.Equal(t, 4, reject)
}
func TestBuffer_RejectCallsMetricRejectWithOverwritten(t *testing.T) {
var reject int
mm := &MockMetric{
Metric: Metric(),
RejectF: func() {
reject++
},
}
b := setup(NewBuffer("test", 5))
b.Add(mm, mm, mm, mm, mm)
batch := b.Batch(5)
b.Add(mm, mm)
require.Equal(t, 0, reject)
b.Reject(batch)
require.Equal(t, 2, reject)
}
func TestBuffer_AddOverwriteAndReject(t *testing.T) {
var reject int
mm := &MockMetric{
Metric: Metric(),
RejectF: func() {
reject++
},
}
b := setup(NewBuffer("test", 5))
b.Add(mm, mm, mm, mm, mm)
batch := b.Batch(5)
b.Add(mm, mm, mm, mm, mm)
b.Add(mm, mm, mm, mm, mm)
b.Add(mm, mm, mm, mm, mm)
b.Add(mm, mm, mm, mm, mm)
require.Equal(t, 15, reject)
b.Reject(batch)
require.Equal(t, 20, reject)
}
func TestBuffer_AddOverwriteAndRejectOffset(t *testing.T) {
var reject int
var accept int
mm := &MockMetric{
Metric: Metric(),
RejectF: func() {
reject++
},
AcceptF: func() {
accept++
},
}
b := setup(NewBuffer("test", 5))
b.Add(mm, mm, mm)
b.Add(mm, mm, mm, mm)
require.Equal(t, 2, reject)
batch := b.Batch(5)
b.Add(mm, mm, mm, mm)
require.Equal(t, 2, reject)
b.Add(mm, mm, mm, mm)
require.Equal(t, 5, reject)
b.Add(mm, mm, mm, mm)
require.Equal(t, 9, reject)
b.Add(mm, mm, mm, mm)
require.Equal(t, 13, reject)
b.Accept(batch)
require.Equal(t, 13, reject)
require.Equal(t, 5, accept)
}

View File

@ -6,6 +6,7 @@ import (
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric" "github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -480,3 +481,45 @@ func TestFilter_FilterTagsPassAndDrop(t *testing.T) {
} }
} }
func BenchmarkFilter(b *testing.B) {
tests := []struct {
name string
filter Filter
metric telegraf.Metric
}{
{
name: "empty filter",
filter: Filter{},
metric: testutil.MustMetric("cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
},
{
name: "namepass",
filter: Filter{
NamePass: []string{"cpu"},
},
metric: testutil.MustMetric("cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
},
}
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
require.NoError(b, tt.filter.Compile())
for n := 0; n < b.N; n++ {
tt.filter.Select(tt.metric)
}
})
}
}

View File

@ -1,30 +1,53 @@
package models package models
import ( import (
"log" "sync"
"time" "time"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/selfstat"
) )
type RunningAggregator struct { type RunningAggregator struct {
a telegraf.Aggregator sync.Mutex
Config *AggregatorConfig Aggregator telegraf.Aggregator
Config *AggregatorConfig
metrics chan telegraf.Metric
periodStart time.Time periodStart time.Time
periodEnd time.Time periodEnd time.Time
MetricsPushed selfstat.Stat
MetricsFiltered selfstat.Stat
MetricsDropped selfstat.Stat
PushTime selfstat.Stat
} }
func NewRunningAggregator( func NewRunningAggregator(
a telegraf.Aggregator, aggregator telegraf.Aggregator,
conf *AggregatorConfig, config *AggregatorConfig,
) *RunningAggregator { ) *RunningAggregator {
return &RunningAggregator{ return &RunningAggregator{
a: a, Aggregator: aggregator,
Config: conf, Config: config,
metrics: make(chan telegraf.Metric, 100), MetricsPushed: selfstat.Register(
"aggregate",
"metrics_pushed",
map[string]string{"aggregator": config.Name},
),
MetricsFiltered: selfstat.Register(
"aggregate",
"metrics_filtered",
map[string]string{"aggregator": config.Name},
),
MetricsDropped: selfstat.Register(
"aggregate",
"metrics_dropped",
map[string]string{"aggregator": config.Name},
),
PushTime: selfstat.Register(
"aggregate",
"push_time_ns",
map[string]string{"aggregator": config.Name},
),
} }
} }
@ -46,6 +69,15 @@ func (r *RunningAggregator) Name() string {
return "aggregators." + r.Config.Name return "aggregators." + r.Config.Name
} }
func (r *RunningAggregator) Period() time.Duration {
return r.Config.Period
}
func (r *RunningAggregator) SetPeriodStart(start time.Time) {
r.periodStart = start
r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
}
func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric { func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
m := makemetric( m := makemetric(
metric, metric,
@ -59,9 +91,21 @@ func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
m.SetAggregate(true) m.SetAggregate(true)
} }
r.MetricsPushed.Incr(1)
return m return m
} }
func (r *RunningAggregator) metricFiltered(metric telegraf.Metric) {
r.MetricsFiltered.Incr(1)
metric.Accept()
}
func (r *RunningAggregator) metricDropped(metric telegraf.Metric) {
r.MetricsDropped.Incr(1)
metric.Accept()
}
// Add a metric to the aggregator and return true if the original metric // Add a metric to the aggregator and return true if the original metric
// should be dropped. // should be dropped.
func (r *RunningAggregator) Add(metric telegraf.Metric) bool { func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
@ -74,75 +118,31 @@ func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
return r.Config.DropOriginal return r.Config.DropOriginal
} }
r.metrics <- metric r.Lock()
defer r.Unlock()
if r.periodStart.IsZero() || metric.Time().Before(r.periodStart) || metric.Time().After(r.periodEnd) {
r.metricDropped(metric)
return false
}
r.Aggregator.Add(metric)
return r.Config.DropOriginal return r.Config.DropOriginal
} }
func (r *RunningAggregator) add(in telegraf.Metric) { func (r *RunningAggregator) Push(acc telegraf.Accumulator) {
r.a.Add(in) r.Lock()
defer r.Unlock()
r.periodStart = r.periodEnd
r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
r.push(acc)
r.Aggregator.Reset()
} }
func (r *RunningAggregator) push(acc telegraf.Accumulator) { func (r *RunningAggregator) push(acc telegraf.Accumulator) {
r.a.Push(acc) start := time.Now()
} r.Aggregator.Push(acc)
elapsed := time.Since(start)
func (r *RunningAggregator) reset() { r.PushTime.Incr(elapsed.Nanoseconds())
r.a.Reset()
}
// Run runs the running aggregator, listens for incoming metrics, and waits
// for period ticks to tell it when to push and reset the aggregator.
func (r *RunningAggregator) Run(
acc telegraf.Accumulator,
shutdown chan struct{},
) {
// The start of the period is truncated to the nearest second.
//
// Every metric then gets it's timestamp checked and is dropped if it
// is not within:
//
// start < t < end + truncation + delay
//
// So if we start at now = 00:00.2 with a 10s period and 0.3s delay:
// now = 00:00.2
// start = 00:00
// truncation = 00:00.2
// end = 00:10
// 1st interval: 00:00 - 00:10.5
// 2nd interval: 00:10 - 00:20.5
// etc.
//
now := time.Now()
r.periodStart = now.Truncate(time.Second)
truncation := now.Sub(r.periodStart)
r.periodEnd = r.periodStart.Add(r.Config.Period)
time.Sleep(r.Config.Delay)
periodT := time.NewTicker(r.Config.Period)
defer periodT.Stop()
for {
select {
case <-shutdown:
if len(r.metrics) > 0 {
// wait until metrics are flushed before exiting
continue
}
return
case m := <-r.metrics:
if m.Time().Before(r.periodStart) ||
m.Time().After(r.periodEnd.Add(truncation).Add(r.Config.Delay)) {
// the metric is outside the current aggregation period, so
// skip it.
log.Printf("D! aggregator: metric \"%s\" is not in the current timewindow, skipping", m.Name())
continue
}
r.add(m)
case <-periodT.C:
r.periodStart = r.periodEnd
r.periodEnd = r.periodStart.Add(r.Config.Period)
r.push(acc)
r.reset()
}
}
} }

View File

@ -1,16 +1,13 @@
package models package models
import ( import (
"sync"
"sync/atomic" "sync/atomic"
"testing" "testing"
"time" "time"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil" "github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -23,28 +20,24 @@ func TestAdd(t *testing.T) {
}, },
Period: time.Millisecond * 500, Period: time.Millisecond * 500,
}) })
assert.NoError(t, ra.Config.Filter.Compile()) require.NoError(t, ra.Config.Filter.Compile())
acc := testutil.Accumulator{} acc := testutil.Accumulator{}
go ra.Run(&acc, make(chan struct{}))
m, err := metric.New("RITest", now := time.Now()
ra.SetPeriodStart(now)
m := testutil.MustMetric("RITest",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
"value": int64(101), "value": int64(101),
}, },
time.Now().Add(time.Millisecond*150), time.Now().Add(time.Millisecond*150),
telegraf.Untyped) telegraf.Untyped)
require.NoError(t, err) require.False(t, ra.Add(m))
ra.Push(&acc)
assert.False(t, ra.Add(m)) require.Equal(t, 1, len(acc.Metrics))
require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
for {
time.Sleep(time.Millisecond)
if atomic.LoadInt64(&a.sum) > 0 {
break
}
}
assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
} }
func TestAddMetricsOutsideCurrentPeriod(t *testing.T) { func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
@ -56,50 +49,45 @@ func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
}, },
Period: time.Millisecond * 500, Period: time.Millisecond * 500,
}) })
assert.NoError(t, ra.Config.Filter.Compile()) require.NoError(t, ra.Config.Filter.Compile())
acc := testutil.Accumulator{} acc := testutil.Accumulator{}
go ra.Run(&acc, make(chan struct{})) now := time.Now()
ra.SetPeriodStart(now)
m, err := metric.New("RITest", m := testutil.MustMetric("RITest",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
"value": int64(101), "value": int64(101),
}, },
time.Now().Add(-time.Hour), now.Add(-time.Hour),
telegraf.Untyped) telegraf.Untyped,
require.NoError(t, err) )
require.False(t, ra.Add(m))
assert.False(t, ra.Add(m))
// metric after current period // metric after current period
m, err = metric.New("RITest", m = testutil.MustMetric("RITest",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
"value": int64(101), "value": int64(101),
}, },
time.Now().Add(time.Hour), now.Add(time.Hour),
telegraf.Untyped) telegraf.Untyped,
require.NoError(t, err) )
assert.False(t, ra.Add(m)) require.False(t, ra.Add(m))
// "now" metric // "now" metric
m, err = metric.New("RITest", m = testutil.MustMetric("RITest",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
"value": int64(101), "value": int64(101),
}, },
time.Now().Add(time.Millisecond*50), time.Now().Add(time.Millisecond*50),
telegraf.Untyped) telegraf.Untyped)
require.NoError(t, err) require.False(t, ra.Add(m))
assert.False(t, ra.Add(m))
for { ra.Push(&acc)
time.Sleep(time.Millisecond) require.Equal(t, 1, len(acc.Metrics))
if atomic.LoadInt64(&a.sum) > 0 { require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
break
}
}
assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
} }
func TestAddAndPushOnePeriod(t *testing.T) { func TestAddAndPushOnePeriod(t *testing.T) {
@ -111,37 +99,24 @@ func TestAddAndPushOnePeriod(t *testing.T) {
}, },
Period: time.Millisecond * 500, Period: time.Millisecond * 500,
}) })
assert.NoError(t, ra.Config.Filter.Compile()) require.NoError(t, ra.Config.Filter.Compile())
acc := testutil.Accumulator{} acc := testutil.Accumulator{}
shutdown := make(chan struct{})
var wg sync.WaitGroup now := time.Now()
wg.Add(1) ra.SetPeriodStart(now)
go func() {
defer wg.Done()
ra.Run(&acc, shutdown)
}()
m, err := metric.New("RITest", m := testutil.MustMetric("RITest",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
"value": int64(101), "value": int64(101),
}, },
time.Now().Add(time.Millisecond*100), time.Now().Add(time.Millisecond*100),
telegraf.Untyped) telegraf.Untyped)
require.NoError(t, err) require.False(t, ra.Add(m))
assert.False(t, ra.Add(m))
ra.Push(&acc)
for {
time.Sleep(time.Millisecond)
if acc.NMetrics() > 0 {
break
}
}
acc.AssertContainsFields(t, "TestMetric", map[string]interface{}{"sum": int64(101)}) acc.AssertContainsFields(t, "TestMetric", map[string]interface{}{"sum": int64(101)})
close(shutdown)
wg.Wait()
} }
func TestAddDropOriginal(t *testing.T) { func TestAddDropOriginal(t *testing.T) {
@ -152,28 +127,29 @@ func TestAddDropOriginal(t *testing.T) {
}, },
DropOriginal: true, DropOriginal: true,
}) })
assert.NoError(t, ra.Config.Filter.Compile()) require.NoError(t, ra.Config.Filter.Compile())
m, err := metric.New("RITest", now := time.Now()
ra.SetPeriodStart(now)
m := testutil.MustMetric("RITest",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
"value": int64(101), "value": int64(101),
}, },
time.Now(), now,
telegraf.Untyped) telegraf.Untyped)
require.NoError(t, err) require.True(t, ra.Add(m))
assert.True(t, ra.Add(m))
// this metric name doesn't match the filter, so Add will return false // this metric name doesn't match the filter, so Add will return false
m2, err := metric.New("foobar", m2 := testutil.MustMetric("foobar",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
"value": int64(101), "value": int64(101),
}, },
time.Now(), now,
telegraf.Untyped) telegraf.Untyped)
require.NoError(t, err) require.False(t, ra.Add(m2))
assert.False(t, ra.Add(m2))
} }
type TestAggregator struct { type TestAggregator struct {

View File

@ -1,11 +1,9 @@
package models package models
import ( import (
"fmt"
"time" "time"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/serializers/influx"
"github.com/influxdata/telegraf/selfstat" "github.com/influxdata/telegraf/selfstat"
) )
@ -15,16 +13,13 @@ type RunningInput struct {
Input telegraf.Input Input telegraf.Input
Config *InputConfig Config *InputConfig
trace bool
defaultTags map[string]string defaultTags map[string]string
MetricsGathered selfstat.Stat MetricsGathered selfstat.Stat
GatherTime selfstat.Stat
} }
func NewRunningInput( func NewRunningInput(input telegraf.Input, config *InputConfig) *RunningInput {
input telegraf.Input,
config *InputConfig,
) *RunningInput {
return &RunningInput{ return &RunningInput{
Input: input, Input: input,
Config: config, Config: config,
@ -33,6 +28,11 @@ func NewRunningInput(
"metrics_gathered", "metrics_gathered",
map[string]string{"input": config.Name}, map[string]string{"input": config.Name},
), ),
GatherTime: selfstat.RegisterTiming(
"gather",
"gather_time_ns",
map[string]string{"input": config.Name},
),
} }
} }
@ -52,13 +52,19 @@ func (r *RunningInput) Name() string {
return "inputs." + r.Config.Name return "inputs." + r.Config.Name
} }
func (r *RunningInput) metricFiltered(metric telegraf.Metric) {
metric.Drop()
}
func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric { func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
if ok := r.Config.Filter.Select(metric); !ok { if ok := r.Config.Filter.Select(metric); !ok {
r.metricFiltered(metric)
return nil return nil
} }
r.Config.Filter.Modify(metric) r.Config.Filter.Modify(metric)
if len(metric.FieldList()) == 0 { if len(metric.FieldList()) == 0 {
r.metricFiltered(metric)
return nil return nil
} }
@ -70,26 +76,17 @@ func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
r.Config.Tags, r.Config.Tags,
r.defaultTags) r.defaultTags)
if r.trace && m != nil {
s := influx.NewSerializer()
s.SetFieldSortOrder(influx.SortFields)
octets, err := s.Serialize(m)
if err == nil {
fmt.Print("> " + string(octets))
}
}
r.MetricsGathered.Incr(1) r.MetricsGathered.Incr(1)
GlobalMetricsGathered.Incr(1) GlobalMetricsGathered.Incr(1)
return m return m
} }
func (r *RunningInput) Trace() bool { func (r *RunningInput) Gather(acc telegraf.Accumulator) error {
return r.trace start := time.Now()
} err := r.Input.Gather(acc)
elapsed := time.Since(start)
func (r *RunningInput) SetTrace(trace bool) { r.GatherTime.Incr(elapsed.Nanoseconds())
r.trace = trace return err
} }
func (r *RunningInput) SetDefaultTags(tags map[string]string) { func (r *RunningInput) SetDefaultTags(tags map[string]string) {

View File

@ -6,6 +6,7 @@ import (
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric" "github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -66,17 +67,13 @@ func TestMakeMetricWithPluginTags(t *testing.T) {
}, },
}) })
ri.SetTrace(true) m := testutil.MustMetric("RITest",
assert.Equal(t, true, ri.Trace())
m, err := metric.New("RITest",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
"value": int64(101), "value": int64(101),
}, },
now, now,
telegraf.Untyped) telegraf.Untyped)
require.NoError(t, err)
m = ri.MakeMetric(m) m = ri.MakeMetric(m)
expected, err := metric.New("RITest", expected, err := metric.New("RITest",
@ -102,8 +99,6 @@ func TestMakeMetricFilteredOut(t *testing.T) {
Filter: Filter{NamePass: []string{"foobar"}}, Filter: Filter{NamePass: []string{"foobar"}},
}) })
ri.SetTrace(true)
assert.Equal(t, true, ri.Trace())
assert.NoError(t, ri.Config.Filter.Compile()) assert.NoError(t, ri.Config.Filter.Compile())
m, err := metric.New("RITest", m, err := metric.New("RITest",
@ -127,17 +122,13 @@ func TestMakeMetricWithDaemonTags(t *testing.T) {
"foo": "bar", "foo": "bar",
}) })
ri.SetTrace(true) m := testutil.MustMetric("RITest",
assert.Equal(t, true, ri.Trace())
m, err := metric.New("RITest",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
"value": int64(101), "value": int64(101),
}, },
now, now,
telegraf.Untyped) telegraf.Untyped)
require.NoError(t, err)
m = ri.MakeMetric(m) m = ri.MakeMetric(m)
expected, err := metric.New("RITest", expected, err := metric.New("RITest",
map[string]string{ map[string]string{

View File

@ -6,7 +6,6 @@ import (
"time" "time"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/buffer"
"github.com/influxdata/telegraf/selfstat" "github.com/influxdata/telegraf/selfstat"
) )
@ -18,6 +17,16 @@ const (
DEFAULT_METRIC_BUFFER_LIMIT = 10000 DEFAULT_METRIC_BUFFER_LIMIT = 10000
) )
// OutputConfig containing name and filter
type OutputConfig struct {
Name string
Filter Filter
FlushInterval time.Duration
MetricBufferLimit int
MetricBatchSize int
}
// RunningOutput contains the output configuration // RunningOutput contains the output configuration
type RunningOutput struct { type RunningOutput struct {
Name string Name string
@ -27,24 +36,16 @@ type RunningOutput struct {
MetricBatchSize int MetricBatchSize int
MetricsFiltered selfstat.Stat MetricsFiltered selfstat.Stat
MetricsWritten selfstat.Stat
BufferSize selfstat.Stat BufferSize selfstat.Stat
BufferLimit selfstat.Stat BufferLimit selfstat.Stat
WriteTime selfstat.Stat WriteTime selfstat.Stat
metrics *buffer.Buffer batch []telegraf.Metric
failMetrics *buffer.Buffer buffer *Buffer
BatchReady chan time.Time
// Guards against concurrent calls to Add, Push, Reset aggMutex sync.Mutex
aggMutex sync.Mutex batchMutex sync.Mutex
// Guards against concurrent calls to the Output as described in #3009
writeMutex sync.Mutex
}
// OutputConfig containing name and filter
type OutputConfig struct {
Name string
Filter Filter
} }
func NewRunningOutput( func NewRunningOutput(
@ -54,25 +55,27 @@ func NewRunningOutput(
batchSize int, batchSize int,
bufferLimit int, bufferLimit int,
) *RunningOutput { ) *RunningOutput {
if conf.MetricBufferLimit > 0 {
bufferLimit = conf.MetricBufferLimit
}
if bufferLimit == 0 { if bufferLimit == 0 {
bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
} }
if conf.MetricBatchSize > 0 {
batchSize = conf.MetricBatchSize
}
if batchSize == 0 { if batchSize == 0 {
batchSize = DEFAULT_METRIC_BATCH_SIZE batchSize = DEFAULT_METRIC_BATCH_SIZE
} }
ro := &RunningOutput{ ro := &RunningOutput{
Name: name, Name: name,
metrics: buffer.NewBuffer(batchSize), batch: make([]telegraf.Metric, 0, batchSize),
failMetrics: buffer.NewBuffer(bufferLimit), buffer: NewBuffer(name, bufferLimit),
BatchReady: make(chan time.Time, 1),
Output: output, Output: output,
Config: conf, Config: conf,
MetricBufferLimit: bufferLimit, MetricBufferLimit: bufferLimit,
MetricBatchSize: batchSize, MetricBatchSize: batchSize,
MetricsWritten: selfstat.Register(
"write",
"metrics_written",
map[string]string{"output": name},
),
MetricsFiltered: selfstat.Register( MetricsFiltered: selfstat.Register(
"write", "write",
"metrics_filtered", "metrics_filtered",
@ -94,20 +97,28 @@ func NewRunningOutput(
map[string]string{"output": name}, map[string]string{"output": name},
), ),
} }
ro.BufferLimit.Set(int64(ro.MetricBufferLimit)) ro.BufferLimit.Set(int64(ro.MetricBufferLimit))
return ro return ro
} }
// AddMetric adds a metric to the output. This function can also write cached func (ro *RunningOutput) metricFiltered(metric telegraf.Metric) {
// points if FlushBufferWhenFull is true. ro.MetricsFiltered.Incr(1)
metric.Drop()
}
// AddMetric adds a metric to the output.
//
// Takes ownership of metric
func (ro *RunningOutput) AddMetric(metric telegraf.Metric) { func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
if ok := ro.Config.Filter.Select(metric); !ok { if ok := ro.Config.Filter.Select(metric); !ok {
ro.MetricsFiltered.Incr(1) ro.metricFiltered(metric)
return return
} }
ro.Config.Filter.Modify(metric) ro.Config.Filter.Modify(metric)
if len(metric.FieldList()) == 0 { if len(metric.FieldList()) == 0 {
ro.metricFiltered(metric)
return return
} }
@ -118,85 +129,98 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
return return
} }
ro.metrics.Add(metric) ro.batchMutex.Lock()
if ro.metrics.Len() == ro.MetricBatchSize {
batch := ro.metrics.Batch(ro.MetricBatchSize) ro.batch = append(ro.batch, metric)
err := ro.write(batch) if len(ro.batch) == ro.MetricBatchSize {
if err != nil { ro.addBatchToBuffer()
ro.failMetrics.Add(batch...)
log.Printf("E! Error writing to output [%s]: %v", ro.Name, err) nBuffer := ro.buffer.Len()
ro.BufferSize.Set(int64(nBuffer))
select {
case ro.BatchReady <- time.Now():
default:
} }
} }
ro.batchMutex.Unlock()
} }
// Write writes all cached points to this output. // AddBatchToBuffer moves the metrics from the batch into the metric buffer.
func (ro *RunningOutput) addBatchToBuffer() {
ro.buffer.Add(ro.batch...)
ro.batch = ro.batch[:0]
}
// Write writes all metrics to the output, stopping when all have been sent on
// or error.
func (ro *RunningOutput) Write() error { func (ro *RunningOutput) Write() error {
if output, ok := ro.Output.(telegraf.AggregatingOutput); ok { if output, ok := ro.Output.(telegraf.AggregatingOutput); ok {
ro.aggMutex.Lock() ro.aggMutex.Lock()
metrics := output.Push() metrics := output.Push()
ro.metrics.Add(metrics...) ro.buffer.Add(metrics...)
output.Reset() output.Reset()
ro.aggMutex.Unlock() ro.aggMutex.Unlock()
} }
// add and write can be called concurrently
ro.batchMutex.Lock()
ro.addBatchToBuffer()
ro.batchMutex.Unlock()
nFails, nMetrics := ro.failMetrics.Len(), ro.metrics.Len() nBuffer := ro.buffer.Len()
ro.BufferSize.Set(int64(nFails + nMetrics))
log.Printf("D! Output [%s] buffer fullness: %d / %d metrics. ",
ro.Name, nFails+nMetrics, ro.MetricBufferLimit)
var err error
if !ro.failMetrics.IsEmpty() {
// how many batches of failed writes we need to write.
nBatches := nFails/ro.MetricBatchSize + 1
batchSize := ro.MetricBatchSize
for i := 0; i < nBatches; i++ { // Only process the metrics in the buffer now. Metrics added while we are
// If it's the last batch, only grab the metrics that have not had // writing will be sent on the next call.
// a write attempt already (this is primarily to preserve order). nBatches := nBuffer/ro.MetricBatchSize + 1
if i == nBatches-1 { for i := 0; i < nBatches; i++ {
batchSize = nFails % ro.MetricBatchSize batch := ro.buffer.Batch(ro.MetricBatchSize)
} if len(batch) == 0 {
batch := ro.failMetrics.Batch(batchSize) break
// If we've already failed previous writes, don't bother trying to
// write to this output again. We are not exiting the loop just so
// that we can rotate the metrics to preserve order.
if err == nil {
err = ro.write(batch)
}
if err != nil {
ro.failMetrics.Add(batch...)
}
} }
}
batch := ro.metrics.Batch(ro.MetricBatchSize) err := ro.write(batch)
// see comment above about not trying to write to an already failed output. if err != nil {
// if ro.failMetrics is empty then err will always be nil at this point. ro.buffer.Reject(batch)
if err == nil { return err
err = ro.write(batch) }
} ro.buffer.Accept(batch)
if err != nil {
ro.failMetrics.Add(batch...)
return err
} }
return nil return nil
} }
func (ro *RunningOutput) write(metrics []telegraf.Metric) error { // WriteBatch writes only the batch metrics to the output.
nMetrics := len(metrics) func (ro *RunningOutput) WriteBatch() error {
if nMetrics == 0 { batch := ro.buffer.Batch(ro.MetricBatchSize)
if len(batch) == 0 {
return nil return nil
} }
ro.writeMutex.Lock()
defer ro.writeMutex.Unlock() err := ro.write(batch)
if err != nil {
ro.buffer.Reject(batch)
return err
}
ro.buffer.Accept(batch)
return nil
}
func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
start := time.Now() start := time.Now()
err := ro.Output.Write(metrics) err := ro.Output.Write(metrics)
elapsed := time.Since(start) elapsed := time.Since(start)
ro.WriteTime.Incr(elapsed.Nanoseconds())
if err == nil { if err == nil {
log.Printf("D! Output [%s] wrote batch of %d metrics in %s\n", log.Printf("D! [outputs.%s] wrote batch of %d metrics in %s\n",
ro.Name, nMetrics, elapsed) ro.Name, len(metrics), elapsed)
ro.MetricsWritten.Incr(int64(nMetrics))
ro.WriteTime.Incr(elapsed.Nanoseconds())
} }
return err return err
} }
func (ro *RunningOutput) LogBufferStatus() {
nBuffer := ro.buffer.Len()
log.Printf("D! [outputs.%s] buffer fullness: %d / %d metrics. ",
ro.Name, nBuffer, ro.MetricBufferLimit)
}

View File

@ -231,56 +231,6 @@ func TestRunningOutputDefault(t *testing.T) {
assert.Len(t, m.Metrics(), 10) assert.Len(t, m.Metrics(), 10)
} }
// Test that running output doesn't flush until it's full when
// FlushBufferWhenFull is set.
func TestRunningOutputFlushWhenFull(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{},
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 6, 10)
// Fill buffer to 1 under limit
for _, metric := range first5 {
ro.AddMetric(metric)
}
// no flush yet
assert.Len(t, m.Metrics(), 0)
// add one more metric
ro.AddMetric(next5[0])
// now it flushed
assert.Len(t, m.Metrics(), 6)
// add one more metric and write it manually
ro.AddMetric(next5[1])
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 7)
}
// Test that running output doesn't flush until it's full when
// FlushBufferWhenFull is set, twice.
func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{},
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 4, 12)
// Fill buffer past limit twive
for _, metric := range first5 {
ro.AddMetric(metric)
}
for _, metric := range next5 {
ro.AddMetric(metric)
}
// flushed twice
assert.Len(t, m.Metrics(), 8)
}
func TestRunningOutputWriteFail(t *testing.T) { func TestRunningOutputWriteFail(t *testing.T) {
conf := &OutputConfig{ conf := &OutputConfig{
Filter: Filter{}, Filter: Filter{},

View File

@ -27,6 +27,19 @@ type ProcessorConfig struct {
Filter Filter Filter Filter
} }
func (rp *RunningProcessor) metricFiltered(metric telegraf.Metric) {
metric.Drop()
}
func containsMetric(item telegraf.Metric, metrics []telegraf.Metric) bool {
for _, m := range metrics {
if item == m {
return true
}
}
return false
}
func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric { func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
rp.Lock() rp.Lock()
defer rp.Unlock() defer rp.Unlock()
@ -43,6 +56,7 @@ func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
rp.Config.Filter.Modify(metric) rp.Config.Filter.Modify(metric)
if len(metric.FieldList()) == 0 { if len(metric.FieldList()) == 0 {
rp.metricFiltered(metric)
continue continue
} }

View File

@ -6,7 +6,7 @@ import (
"time" "time"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric" "github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -41,20 +41,6 @@ func TagProcessor(key, value string) *MockProcessor {
} }
} }
func Metric(
name string,
tags map[string]string,
fields map[string]interface{},
tm time.Time,
tp ...telegraf.ValueType,
) telegraf.Metric {
m, err := metric.New(name, tags, fields, tm, tp...)
if err != nil {
panic(err)
}
return m
}
func TestRunningProcessor_Apply(t *testing.T) { func TestRunningProcessor_Apply(t *testing.T) {
type args struct { type args struct {
Processor telegraf.Processor Processor telegraf.Processor
@ -76,7 +62,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
}, },
}, },
input: []telegraf.Metric{ input: []telegraf.Metric{
Metric( testutil.MustMetric(
"cpu", "cpu",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
@ -86,7 +72,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
), ),
}, },
expected: []telegraf.Metric{ expected: []telegraf.Metric{
Metric( testutil.MustMetric(
"cpu", "cpu",
map[string]string{ map[string]string{
"apply": "true", "apply": "true",
@ -109,7 +95,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
}, },
}, },
input: []telegraf.Metric{ input: []telegraf.Metric{
Metric( testutil.MustMetric(
"cpu", "cpu",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
@ -119,7 +105,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
), ),
}, },
expected: []telegraf.Metric{ expected: []telegraf.Metric{
Metric( testutil.MustMetric(
"cpu", "cpu",
map[string]string{ map[string]string{
"apply": "true", "apply": "true",
@ -142,7 +128,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
}, },
}, },
input: []telegraf.Metric{ input: []telegraf.Metric{
Metric( testutil.MustMetric(
"cpu", "cpu",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{
@ -152,7 +138,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
), ),
}, },
expected: []telegraf.Metric{ expected: []telegraf.Metric{
Metric( testutil.MustMetric(
"cpu", "cpu",
map[string]string{}, map[string]string{},
map[string]interface{}{ map[string]interface{}{

View File

@ -62,6 +62,17 @@ type Metric interface {
// Copy returns a deep copy of the Metric. // Copy returns a deep copy of the Metric.
Copy() Metric Copy() Metric
// Accept marks the metric as processed successfully and written to an
// output.
Accept()
// Reject marks the metric as processed unsuccessfully.
Reject()
// Drop marks the metric as processed successfully without being written
// to any output.
Drop()
// Mark Metric as an aggregate // Mark Metric as an aggregate
SetAggregate(bool) SetAggregate(bool)
IsAggregate() bool IsAggregate() bool

View File

@ -248,6 +248,15 @@ func (m *metric) HashID() uint64 {
return h.Sum64() return h.Sum64()
} }
func (m *metric) Accept() {
}
func (m *metric) Reject() {
}
func (m *metric) Drop() {
}
// Convert field to a supported type or nil if unconvertible // Convert field to a supported type or nil if unconvertible
func convertField(v interface{}) interface{} { func convertField(v interface{}) interface{} {
switch v := v.(type) { switch v := v.(type) {

171
metric/tracking.go Normal file
View File

@ -0,0 +1,171 @@
package metric
import (
"log"
"runtime"
"sync/atomic"
"github.com/influxdata/telegraf"
)
// NotifyFunc is called when a tracking metric is done being processed with
// the tracking information.
type NotifyFunc = func(track telegraf.DeliveryInfo)
// WithTracking adds tracking to the metric and registers the notify function
// to be called when processing is complete.
func WithTracking(metric telegraf.Metric, fn NotifyFunc) (telegraf.Metric, telegraf.TrackingID) {
return newTrackingMetric(metric, fn)
}
// WithBatchTracking adds tracking to the metrics and registers the notify
// function to be called when processing is complete.
func WithGroupTracking(metric []telegraf.Metric, fn NotifyFunc) ([]telegraf.Metric, telegraf.TrackingID) {
return newTrackingMetricGroup(metric, fn)
}
func EnableDebugFinalizer() {
finalizer = debugFinalizer
}
var (
lastID uint64
finalizer func(*trackingData)
)
func newTrackingID() telegraf.TrackingID {
atomic.AddUint64(&lastID, 1)
return telegraf.TrackingID(lastID)
}
func debugFinalizer(d *trackingData) {
rc := atomic.LoadInt32(&d.rc)
if rc != 0 {
log.Fatalf("E! [agent] metric collected with non-zero reference count rc: %d", rc)
}
}
type trackingData struct {
id telegraf.TrackingID
rc int32
acceptCount int32
rejectCount int32
notify NotifyFunc
}
func (d *trackingData) incr() {
atomic.AddInt32(&d.rc, 1)
}
func (d *trackingData) decr() int32 {
return atomic.AddInt32(&d.rc, -1)
}
func (d *trackingData) accept() {
atomic.AddInt32(&d.acceptCount, 1)
}
func (d *trackingData) reject() {
atomic.AddInt32(&d.rejectCount, 1)
}
type trackingMetric struct {
telegraf.Metric
d *trackingData
}
func newTrackingMetric(metric telegraf.Metric, fn NotifyFunc) (telegraf.Metric, telegraf.TrackingID) {
m := &trackingMetric{
Metric: metric,
d: &trackingData{
id: newTrackingID(),
rc: 1,
acceptCount: 0,
rejectCount: 0,
notify: fn,
},
}
if finalizer != nil {
runtime.SetFinalizer(m.d, finalizer)
}
return m, m.d.id
}
func newTrackingMetricGroup(group []telegraf.Metric, fn NotifyFunc) ([]telegraf.Metric, telegraf.TrackingID) {
d := &trackingData{
id: newTrackingID(),
rc: 0,
acceptCount: 0,
rejectCount: 0,
notify: fn,
}
for i, m := range group {
d.incr()
dm := &trackingMetric{
Metric: m,
d: d,
}
group[i] = dm
}
if finalizer != nil {
runtime.SetFinalizer(d, finalizer)
}
return group, d.id
}
func (m *trackingMetric) Copy() telegraf.Metric {
m.d.incr()
return &trackingMetric{
Metric: m.Metric.Copy(),
d: m.d,
}
}
func (m *trackingMetric) Accept() {
m.d.accept()
m.decr()
}
func (m *trackingMetric) Reject() {
m.d.reject()
m.decr()
}
func (m *trackingMetric) Drop() {
m.decr()
}
func (m *trackingMetric) decr() {
v := m.d.decr()
if v < 0 {
panic("negative refcount")
}
if v == 0 {
m.d.notify(
&deliveryInfo{
id: m.d.id,
accepted: int(m.d.acceptCount),
rejected: int(m.d.rejectCount),
},
)
}
}
type deliveryInfo struct {
id telegraf.TrackingID
accepted int
rejected int
}
func (r *deliveryInfo) ID() telegraf.TrackingID {
return r.id
}
func (r *deliveryInfo) Delivered() bool {
return r.rejected == 0
}

260
metric/tracking_test.go Normal file
View File

@ -0,0 +1,260 @@
package metric
import (
"testing"
"time"
"github.com/influxdata/telegraf"
"github.com/stretchr/testify/require"
)
func mustMetric(
name string,
tags map[string]string,
fields map[string]interface{},
tm time.Time,
tp ...telegraf.ValueType,
) telegraf.Metric {
m, err := New(name, tags, fields, tm, tp...)
if err != nil {
panic("mustMetric")
}
return m
}
type deliveries struct {
Info map[telegraf.TrackingID]telegraf.DeliveryInfo
}
func (d *deliveries) onDelivery(info telegraf.DeliveryInfo) {
d.Info[info.ID()] = info
}
func TestTracking(t *testing.T) {
tests := []struct {
name string
metric telegraf.Metric
actions func(metric telegraf.Metric)
delivered bool
}{
{
name: "accept",
metric: mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
actions: func(m telegraf.Metric) {
m.Accept()
},
delivered: true,
},
{
name: "reject",
metric: mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
actions: func(m telegraf.Metric) {
m.Reject()
},
delivered: false,
},
{
name: "accept copy",
metric: mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
actions: func(m telegraf.Metric) {
m2 := m.Copy()
m.Accept()
m2.Accept()
},
delivered: true,
},
{
name: "copy with accept and done",
metric: mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
actions: func(m telegraf.Metric) {
m2 := m.Copy()
m.Accept()
m2.Drop()
},
delivered: true,
},
{
name: "copy with mixed delivery",
metric: mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
actions: func(m telegraf.Metric) {
m2 := m.Copy()
m.Accept()
m2.Reject()
},
delivered: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
d := &deliveries{
Info: make(map[telegraf.TrackingID]telegraf.DeliveryInfo),
}
metric, id := WithTracking(tt.metric, d.onDelivery)
tt.actions(metric)
info := d.Info[id]
require.Equal(t, tt.delivered, info.Delivered())
})
}
}
func TestGroupTracking(t *testing.T) {
tests := []struct {
name string
metrics []telegraf.Metric
actions func(metrics []telegraf.Metric)
delivered bool
}{
{
name: "accept",
metrics: []telegraf.Metric{
mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
},
actions: func(metrics []telegraf.Metric) {
metrics[0].Accept()
metrics[1].Accept()
},
delivered: true,
},
{
name: "reject",
metrics: []telegraf.Metric{
mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
},
actions: func(metrics []telegraf.Metric) {
metrics[0].Reject()
metrics[1].Reject()
},
delivered: false,
},
{
name: "remove",
metrics: []telegraf.Metric{
mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
},
actions: func(metrics []telegraf.Metric) {
metrics[0].Drop()
metrics[1].Drop()
},
delivered: true,
},
{
name: "mixed",
metrics: []telegraf.Metric{
mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
mustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
},
actions: func(metrics []telegraf.Metric) {
metrics[0].Accept()
metrics[1].Reject()
},
delivered: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
d := &deliveries{
Info: make(map[telegraf.TrackingID]telegraf.DeliveryInfo),
}
metrics, id := WithGroupTracking(tt.metrics, d.onDelivery)
tt.actions(metrics)
info := d.Info[id]
require.Equal(t, tt.delivered, info.Delivered())
})
}
}

View File

@ -17,16 +17,7 @@ type Output interface {
// if the Output only accepts a fixed set of aggregations over a time period. // if the Output only accepts a fixed set of aggregations over a time period.
// These functions may be called concurrently to the Write function. // These functions may be called concurrently to the Write function.
type AggregatingOutput interface { type AggregatingOutput interface {
// Connect to the Output Output
Connect() error
// Close any connections to the Output
Close() error
// Description returns a one-sentence description on the Output
Description() string
// SampleConfig returns the default configuration of the Output
SampleConfig() string
// Write takes in group of points to be written to the Output
Write(metrics []Metric) error
// Add the metric to the aggregator // Add the metric to the aggregator
Add(in Metric) Add(in Metric)
@ -35,21 +26,3 @@ type AggregatingOutput interface {
// Reset signals the the aggregator period is completed. // Reset signals the the aggregator period is completed.
Reset() Reset()
} }
type ServiceOutput interface {
// Connect to the Output
Connect() error
// Close any connections to the Output
Close() error
// Description returns a one-sentence description on the Output
Description() string
// SampleConfig returns the default configuration of the Output
SampleConfig() string
// Write takes in group of points to be written to the Output
Write(metrics []Metric) error
// Start the "service" that will provide an Output
Start() error
// Stop the "service" that will provide an Output
Stop()
}

View File

@ -133,7 +133,6 @@ func (m *BasicStats) Add(in telegraf.Metric) {
} }
func (m *BasicStats) Push(acc telegraf.Accumulator) { func (m *BasicStats) Push(acc telegraf.Accumulator) {
config := getConfiguredStats(m) config := getConfiguredStats(m)
for _, aggregate := range m.cache { for _, aggregate := range m.cache {

View File

@ -13,7 +13,6 @@ For an introduction to AMQP see:
The following defaults are known to work with RabbitMQ: The following defaults are known to work with RabbitMQ:
```toml ```toml
# AMQP consumer plugin
[[inputs.amqp_consumer]] [[inputs.amqp_consumer]]
## Broker to consume from. ## Broker to consume from.
## deprecated in 1.7; use the brokers option ## deprecated in 1.7; use the brokers option
@ -56,6 +55,16 @@ The following defaults are known to work with RabbitMQ:
## Maximum number of messages server should give to the worker. ## Maximum number of messages server should give to the worker.
# prefetch_count = 50 # prefetch_count = 50
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Auth method. PLAIN and EXTERNAL are supported ## Auth method. PLAIN and EXTERNAL are supported
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as ## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
## described here: https://www.rabbitmq.com/plugins.html ## described here: https://www.rabbitmq.com/plugins.html

View File

@ -1,6 +1,7 @@
package amqp_consumer package amqp_consumer
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"log" "log"
@ -9,25 +10,32 @@ import (
"sync" "sync"
"time" "time"
"github.com/streadway/amqp"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/tls" "github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/plugins/parsers"
"github.com/streadway/amqp"
) )
const (
defaultMaxUndeliveredMessages = 1000
)
type empty struct{}
type semaphore chan empty
// AMQPConsumer is the top level struct for this plugin // AMQPConsumer is the top level struct for this plugin
type AMQPConsumer struct { type AMQPConsumer struct {
URL string `toml:"url"` // deprecated in 1.7; use brokers URL string `toml:"url"` // deprecated in 1.7; use brokers
Brokers []string `toml:"brokers"` Brokers []string `toml:"brokers"`
Username string `toml:"username"` Username string `toml:"username"`
Password string `toml:"password"` Password string `toml:"password"`
Exchange string `toml:"exchange"` Exchange string `toml:"exchange"`
ExchangeType string `toml:"exchange_type"` ExchangeType string `toml:"exchange_type"`
ExchangeDurability string `toml:"exchange_durability"` ExchangeDurability string `toml:"exchange_durability"`
ExchangePassive bool `toml:"exchange_passive"` ExchangePassive bool `toml:"exchange_passive"`
ExchangeArguments map[string]string `toml:"exchange_arguments"` ExchangeArguments map[string]string `toml:"exchange_arguments"`
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
// Queue Name // Queue Name
Queue string `toml:"queue"` Queue string `toml:"queue"`
@ -44,9 +52,12 @@ type AMQPConsumer struct {
AuthMethod string AuthMethod string
tls.ClientConfig tls.ClientConfig
deliveries map[telegraf.TrackingID]amqp.Delivery
parser parsers.Parser parser parsers.Parser
conn *amqp.Connection conn *amqp.Connection
wg *sync.WaitGroup wg *sync.WaitGroup
cancel context.CancelFunc
} }
type externalAuth struct{} type externalAuth struct{}
@ -114,6 +125,16 @@ func (a *AMQPConsumer) SampleConfig() string {
## Maximum number of messages server should give to the worker. ## Maximum number of messages server should give to the worker.
# prefetch_count = 50 # prefetch_count = 50
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Auth method. PLAIN and EXTERNAL are supported ## Auth method. PLAIN and EXTERNAL are supported
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as ## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
## described here: https://www.rabbitmq.com/plugins.html ## described here: https://www.rabbitmq.com/plugins.html
@ -185,9 +206,15 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
return err return err
} }
ctx, cancel := context.WithCancel(context.Background())
a.cancel = cancel
a.wg = &sync.WaitGroup{} a.wg = &sync.WaitGroup{}
a.wg.Add(1) a.wg.Add(1)
go a.process(msgs, acc) go func() {
defer a.wg.Done()
a.process(ctx, msgs, acc)
}()
go func() { go func() {
for { for {
@ -196,7 +223,7 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
break break
} }
log.Printf("I! AMQP consumer connection closed: %s; trying to reconnect", err) log.Printf("I! [inputs.amqp_consumer] connection closed: %s; trying to reconnect", err)
for { for {
msgs, err := a.connect(amqpConf) msgs, err := a.connect(amqpConf)
if err != nil { if err != nil {
@ -206,7 +233,10 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
} }
a.wg.Add(1) a.wg.Add(1)
go a.process(msgs, acc) go func() {
defer a.wg.Done()
a.process(ctx, msgs, acc)
}()
break break
} }
} }
@ -224,14 +254,14 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
p := rand.Perm(len(brokers)) p := rand.Perm(len(brokers))
for _, n := range p { for _, n := range p {
broker := brokers[n] broker := brokers[n]
log.Printf("D! [amqp_consumer] connecting to %q", broker) log.Printf("D! [inputs.amqp_consumer] connecting to %q", broker)
conn, err := amqp.DialConfig(broker, *amqpConf) conn, err := amqp.DialConfig(broker, *amqpConf)
if err == nil { if err == nil {
a.conn = conn a.conn = conn
log.Printf("D! [amqp_consumer] connected to %q", broker) log.Printf("D! [inputs.amqp_consumer] connected to %q", broker)
break break
} }
log.Printf("D! [amqp_consumer] error connecting to %q", broker) log.Printf("D! [inputs.amqp_consumer] error connecting to %q", broker)
} }
if a.conn == nil { if a.conn == nil {
@ -320,7 +350,6 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
return nil, fmt.Errorf("Failed establishing connection to queue: %s", err) return nil, fmt.Errorf("Failed establishing connection to queue: %s", err)
} }
log.Println("I! Started AMQP consumer")
return msgs, err return msgs, err
} }
@ -361,42 +390,101 @@ func declareExchange(
} }
// Read messages from queue and add them to the Accumulator // Read messages from queue and add them to the Accumulator
func (a *AMQPConsumer) process(msgs <-chan amqp.Delivery, acc telegraf.Accumulator) { func (a *AMQPConsumer) process(ctx context.Context, msgs <-chan amqp.Delivery, ac telegraf.Accumulator) {
defer a.wg.Done() a.deliveries = make(map[telegraf.TrackingID]amqp.Delivery)
for d := range msgs {
metrics, err := a.parser.Parse(d.Body) acc := ac.WithTracking(a.MaxUndeliveredMessages)
if err != nil { sem := make(semaphore, a.MaxUndeliveredMessages)
log.Printf("E! %v: error parsing metric - %v", err, string(d.Body))
} else { for {
for _, m := range metrics { select {
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) case <-ctx.Done():
return
case track := <-acc.Delivered():
if a.onDelivery(track) {
<-sem
}
case sem <- empty{}:
select {
case <-ctx.Done():
return
case track := <-acc.Delivered():
if a.onDelivery(track) {
<-sem
<-sem
}
case d, ok := <-msgs:
if !ok {
return
}
err := a.onMessage(acc, d)
if err != nil {
acc.AddError(err)
<-sem
}
} }
} }
d.Ack(false)
} }
log.Printf("I! AMQP consumer queue closed") }
func (a *AMQPConsumer) onMessage(acc telegraf.TrackingAccumulator, d amqp.Delivery) error {
metrics, err := a.parser.Parse(d.Body)
if err != nil {
return err
}
id := acc.AddTrackingMetricGroup(metrics)
a.deliveries[id] = d
return nil
}
func (a *AMQPConsumer) onDelivery(track telegraf.DeliveryInfo) bool {
delivery, ok := a.deliveries[track.ID()]
if !ok {
// Added by a previous connection
return false
}
if track.Delivered() {
err := delivery.Ack(false)
if err != nil {
log.Printf("E! [inputs.amqp_consumer] Unable to ack written delivery: %d: %v",
delivery.DeliveryTag, err)
a.conn.Close()
}
} else {
err := delivery.Reject(false)
if err != nil {
log.Printf("E! [inputs.amqp_consumer] Unable to reject failed delivery: %d: %v",
delivery.DeliveryTag, err)
a.conn.Close()
}
}
delete(a.deliveries, track.ID())
return true
} }
func (a *AMQPConsumer) Stop() { func (a *AMQPConsumer) Stop() {
a.cancel()
a.wg.Wait()
err := a.conn.Close() err := a.conn.Close()
if err != nil && err != amqp.ErrClosed { if err != nil && err != amqp.ErrClosed {
log.Printf("E! Error closing AMQP connection: %s", err) log.Printf("E! [inputs.amqp_consumer] Error closing AMQP connection: %s", err)
return return
} }
a.wg.Wait()
log.Println("I! Stopped AMQP service")
} }
func init() { func init() {
inputs.Add("amqp_consumer", func() telegraf.Input { inputs.Add("amqp_consumer", func() telegraf.Input {
return &AMQPConsumer{ return &AMQPConsumer{
URL: DefaultBroker, URL: DefaultBroker,
AuthMethod: DefaultAuthMethod, AuthMethod: DefaultAuthMethod,
ExchangeType: DefaultExchangeType, ExchangeType: DefaultExchangeType,
ExchangeDurability: DefaultExchangeDurability, ExchangeDurability: DefaultExchangeDurability,
QueueDurability: DefaultQueueDurability, QueueDurability: DefaultQueueDurability,
PrefetchCount: DefaultPrefetchCount, PrefetchCount: DefaultPrefetchCount,
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
} }
}) })
} }

View File

@ -18,52 +18,54 @@ plugin.
memstats are taken from the Go runtime: https://golang.org/pkg/runtime/#MemStats memstats are taken from the Go runtime: https://golang.org/pkg/runtime/#MemStats
- internal\_memstats - internal_memstats
- alloc\_bytes - alloc_bytes
- frees - frees
- heap\_alloc\_bytes - heap_alloc_bytes
- heap\_idle\_bytes - heap_idle_bytes
- heap\_in\_use\_bytes - heap_in_use_bytes
- heap\_objects\_bytes - heap_objects_bytes
- heap\_released\_bytes - heap_released_bytes
- heap\_sys\_bytes - heap_sys_bytes
- mallocs - mallocs
- num\_gc - num_gc
- pointer\_lookups - pointer_lookups
- sys\_bytes - sys_bytes
- total\_alloc\_bytes - total_alloc_bytes
agent stats collect aggregate stats on all telegraf plugins. agent stats collect aggregate stats on all telegraf plugins.
- internal\_agent - internal_agent
- gather\_errors - gather_errors
- metrics\_dropped - metrics_dropped
- metrics\_gathered - metrics_gathered
- metrics\_written - metrics_written
internal\_gather stats collect aggregate stats on all input plugins internal_gather stats collect aggregate stats on all input plugins
that are of the same input type. They are tagged with `input=<plugin_name>`. that are of the same input type. They are tagged with `input=<plugin_name>`.
- internal\_gather - internal_gather
- gather\_time\_ns - gather_time_ns
- metrics\_gathered - metrics_gathered
internal\_write stats collect aggregate stats on all output plugins internal_write stats collect aggregate stats on all output plugins
that are of the same input type. They are tagged with `output=<plugin_name>`. that are of the same input type. They are tagged with `output=<plugin_name>`.
- internal\_write - internal_write
- buffer\_limit - buffer_limit
- buffer\_size - buffer_size
- metrics\_written - metrics_added
- metrics\_filtered - metrics_written
- write\_time\_ns - metrics_dropped
- metrics_filtered
- write_time_ns
internal\_\<plugin\_name\> are metrics which are defined on a per-plugin basis, and internal_<plugin_name> are metrics which are defined on a per-plugin basis, and
usually contain tags which differentiate each instance of a particular type of usually contain tags which differentiate each instance of a particular type of
plugin. plugin.
- internal\_\<plugin\_name\> - internal_<plugin_name>
- individual plugin-specific fields, such as requests counts. - individual plugin-specific fields, such as requests counts.
### Tags: ### Tags:
@ -76,7 +78,7 @@ to each particular plugin.
``` ```
internal_memstats,host=tyrion alloc_bytes=4457408i,sys_bytes=10590456i,pointer_lookups=7i,mallocs=17642i,frees=7473i,heap_sys_bytes=6848512i,heap_idle_bytes=1368064i,heap_in_use_bytes=5480448i,heap_released_bytes=0i,total_alloc_bytes=6875560i,heap_alloc_bytes=4457408i,heap_objects_bytes=10169i,num_gc=2i 1480682800000000000 internal_memstats,host=tyrion alloc_bytes=4457408i,sys_bytes=10590456i,pointer_lookups=7i,mallocs=17642i,frees=7473i,heap_sys_bytes=6848512i,heap_idle_bytes=1368064i,heap_in_use_bytes=5480448i,heap_released_bytes=0i,total_alloc_bytes=6875560i,heap_alloc_bytes=4457408i,heap_objects_bytes=10169i,num_gc=2i 1480682800000000000
internal_agent,host=tyrion metrics_written=18i,metrics_dropped=0i,metrics_gathered=19i,gather_errors=0i 1480682800000000000 internal_agent,host=tyrion metrics_written=18i,metrics_dropped=0i,metrics_gathered=19i,gather_errors=0i 1480682800000000000
internal_write,output=file,host=tyrion buffer_limit=10000i,write_time_ns=636609i,metrics_written=18i,buffer_size=0i 1480682800000000000 internal_write,output=file,host=tyrion buffer_limit=10000i,write_time_ns=636609i,metrics_added=18i,metrics_written=18i,buffer_size=0i 1480682800000000000
internal_gather,input=internal,host=tyrion metrics_gathered=19i,gather_time_ns=442114i 1480682800000000000 internal_gather,input=internal,host=tyrion metrics_gathered=19i,gather_time_ns=442114i 1480682800000000000
internal_gather,input=http_listener,host=tyrion metrics_gathered=0i,gather_time_ns=167285i 1480682800000000000 internal_gather,input=http_listener,host=tyrion metrics_gathered=0i,gather_time_ns=167285i 1480682800000000000
internal_http_listener,address=:8186,host=tyrion queries_received=0i,writes_received=0i,requests_received=0i,buffers_created=0i,requests_served=0i,pings_received=0i,bytes_received=0i,not_founds_served=0i,pings_served=0i,queries_served=0i,writes_served=0i 1480682800000000000 internal_http_listener,address=:8186,host=tyrion queries_received=0i,writes_received=0i,requests_received=0i,buffers_created=0i,requests_served=0i,pings_received=0i,bytes_received=0i,not_founds_served=0i,pings_served=0i,queries_served=0i,writes_served=0i 1480682800000000000

View File

@ -1,18 +1,14 @@
# Kafka Consumer Input Plugin # Kafka Consumer Input Plugin
The [Kafka](http://kafka.apache.org/) consumer plugin polls a specified Kafka The [Kafka][kafka] consumer plugin reads from Kafka
topic and adds messages to InfluxDB. The plugin assumes messages follow the and creates metrics using one of the supported [input data formats][].
line protocol. [Consumer Group](http://godoc.org/github.com/wvanbergen/kafka/consumergroup)
is used to talk to the Kafka cluster so multiple instances of telegraf can read
from the same topic in parallel.
For old kafka version (< 0.8), please use the kafka_consumer_legacy input plugin For old kafka version (< 0.8), please use the [kafka_consumer_legacy][] input plugin
and use the old zookeeper connection method. and use the old zookeeper connection method.
## Configuration ### Configuration
```toml ```toml
# Read metrics from Kafka topic(s)
[[inputs.kafka_consumer]] [[inputs.kafka_consumer]]
## kafka servers ## kafka servers
brokers = ["localhost:9092"] brokers = ["localhost:9092"]
@ -44,18 +40,27 @@ and use the old zookeeper connection method.
## Offset (must be either "oldest" or "newest") ## Offset (must be either "oldest" or "newest")
offset = "oldest" offset = "oldest"
## Maximum length of a message to consume, in bytes (default 0/unlimited);
## larger messages are dropped
max_message_len = 1000000
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Data format to consume. ## Data format to consume.
## Each data format has its own unique set of configuration options, read ## Each data format has its own unique set of configuration options, read
## more about them here: ## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx" data_format = "influx"
## Maximum length of a message to consume, in bytes (default 0/unlimited);
## larger messages are dropped
max_message_len = 1000000
``` ```
## Testing [kafka]: https://kafka.apache.org
[kafka_consumer_legacy]: /plugins/inputs/kafka_consumer_legacy/README.md
Running integration tests requires running Zookeeper & Kafka. See Makefile [input data formats]: /docs/DATA_FORMATS_INPUT.md
for kafka container command.

View File

@ -1,55 +1,54 @@
package kafka_consumer package kafka_consumer
import ( import (
"context"
"fmt" "fmt"
"log" "log"
"strings" "strings"
"sync" "sync"
"github.com/Shopify/sarama"
cluster "github.com/bsm/sarama-cluster"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/tls" "github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/plugins/parsers"
"github.com/Shopify/sarama"
cluster "github.com/bsm/sarama-cluster"
) )
const (
defaultMaxUndeliveredMessages = 1000
)
type empty struct{}
type semaphore chan empty
type Consumer interface {
Errors() <-chan error
Messages() <-chan *sarama.ConsumerMessage
MarkOffset(msg *sarama.ConsumerMessage, metadata string)
Close() error
}
type Kafka struct { type Kafka struct {
ConsumerGroup string ConsumerGroup string `toml:"consumer_group"`
ClientID string `toml:"client_id"` ClientID string `toml:"client_id"`
Topics []string Topics []string `toml:"topics"`
Brokers []string Brokers []string `toml:"brokers"`
MaxMessageLen int MaxMessageLen int `toml:"max_message_len"`
Version string `toml:"version"` Version string `toml:"version"`
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
Cluster *cluster.Consumer Offset string `toml:"offset"`
SASLUsername string `toml:"sasl_username"`
SASLPassword string `toml:"sasl_password"`
tls.ClientConfig tls.ClientConfig
// SASL Username cluster Consumer
SASLUsername string `toml:"sasl_username"` parser parsers.Parser
// SASL Password wg *sync.WaitGroup
SASLPassword string `toml:"sasl_password"` cancel context.CancelFunc
// Legacy metric buffer support // Unconfirmed messages
MetricBuffer int messages map[telegraf.TrackingID]*sarama.ConsumerMessage
// TODO remove PointBuffer, legacy support
PointBuffer int
Offset string
parser parsers.Parser
sync.Mutex
// channel for all incoming kafka messages
in <-chan *sarama.ConsumerMessage
// channel for all kafka consumer errors
errs <-chan error
done chan struct{}
// keep the accumulator internally:
acc telegraf.Accumulator
// doNotCommitMsgs tells the parser not to call CommitUpTo on the consumer // doNotCommitMsgs tells the parser not to call CommitUpTo on the consumer
// this is mostly for test purposes, but there may be a use-case for it later. // this is mostly for test purposes, but there may be a use-case for it later.
@ -86,16 +85,25 @@ var sampleConfig = `
consumer_group = "telegraf_metrics_consumers" consumer_group = "telegraf_metrics_consumers"
## Offset (must be either "oldest" or "newest") ## Offset (must be either "oldest" or "newest")
offset = "oldest" offset = "oldest"
## Maximum length of a message to consume, in bytes (default 0/unlimited);
## larger messages are dropped
max_message_len = 1000000
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Data format to consume. ## Data format to consume.
## Each data format has its own unique set of configuration options, read ## Each data format has its own unique set of configuration options, read
## more about them here: ## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx" data_format = "influx"
## Maximum length of a message to consume, in bytes (default 0/unlimited);
## larger messages are dropped
max_message_len = 1000000
` `
func (k *Kafka) SampleConfig() string { func (k *Kafka) SampleConfig() string {
@ -111,12 +119,8 @@ func (k *Kafka) SetParser(parser parsers.Parser) {
} }
func (k *Kafka) Start(acc telegraf.Accumulator) error { func (k *Kafka) Start(acc telegraf.Accumulator) error {
k.Lock()
defer k.Unlock()
var clusterErr error var clusterErr error
k.acc = acc
config := cluster.NewConfig() config := cluster.NewConfig()
if k.Version != "" { if k.Version != "" {
@ -159,13 +163,13 @@ func (k *Kafka) Start(acc telegraf.Accumulator) error {
case "newest": case "newest":
config.Consumer.Offsets.Initial = sarama.OffsetNewest config.Consumer.Offsets.Initial = sarama.OffsetNewest
default: default:
log.Printf("I! WARNING: Kafka consumer invalid offset '%s', using 'oldest'\n", log.Printf("I! WARNING: Kafka consumer invalid offset '%s', using 'oldest'",
k.Offset) k.Offset)
config.Consumer.Offsets.Initial = sarama.OffsetOldest config.Consumer.Offsets.Initial = sarama.OffsetOldest
} }
if k.Cluster == nil { if k.cluster == nil {
k.Cluster, clusterErr = cluster.NewConsumer( k.cluster, clusterErr = cluster.NewConsumer(
k.Brokers, k.Brokers,
k.ConsumerGroup, k.ConsumerGroup,
k.Topics, k.Topics,
@ -173,67 +177,110 @@ func (k *Kafka) Start(acc telegraf.Accumulator) error {
) )
if clusterErr != nil { if clusterErr != nil {
log.Printf("E! Error when creating Kafka Consumer, brokers: %v, topics: %v\n", log.Printf("E! Error when creating Kafka Consumer, brokers: %v, topics: %v",
k.Brokers, k.Topics) k.Brokers, k.Topics)
return clusterErr return clusterErr
} }
// Setup message and error channels
k.in = k.Cluster.Messages()
k.errs = k.Cluster.Errors()
} }
k.done = make(chan struct{}) ctx, cancel := context.WithCancel(context.Background())
// Start the kafka message reader k.cancel = cancel
go k.receiver()
log.Printf("I! Started the kafka consumer service, brokers: %v, topics: %v\n", // Start consumer goroutine
k.wg = &sync.WaitGroup{}
k.wg.Add(1)
go func() {
defer k.wg.Done()
k.receiver(ctx, acc)
}()
log.Printf("I! Started the kafka consumer service, brokers: %v, topics: %v",
k.Brokers, k.Topics) k.Brokers, k.Topics)
return nil return nil
} }
// receiver() reads all incoming messages from the consumer, and parses them into // receiver() reads all incoming messages from the consumer, and parses them into
// influxdb metric points. // influxdb metric points.
func (k *Kafka) receiver() { func (k *Kafka) receiver(ctx context.Context, ac telegraf.Accumulator) {
k.messages = make(map[telegraf.TrackingID]*sarama.ConsumerMessage)
acc := ac.WithTracking(k.MaxUndeliveredMessages)
sem := make(semaphore, k.MaxUndeliveredMessages)
for { for {
select { select {
case <-k.done: case <-ctx.Done():
return return
case err := <-k.errs: case track := <-acc.Delivered():
if err != nil { <-sem
k.acc.AddError(fmt.Errorf("Consumer Error: %s\n", err)) k.onDelivery(track)
} case err := <-k.cluster.Errors():
case msg := <-k.in: acc.AddError(err)
if k.MaxMessageLen != 0 && len(msg.Value) > k.MaxMessageLen { case sem <- empty{}:
k.acc.AddError(fmt.Errorf("Message longer than max_message_len (%d > %d)", select {
len(msg.Value), k.MaxMessageLen)) case <-ctx.Done():
} else { return
metrics, err := k.parser.Parse(msg.Value) case track := <-acc.Delivered():
// Once for the delivered message, once to leave the case
<-sem
<-sem
k.onDelivery(track)
case err := <-k.cluster.Errors():
<-sem
acc.AddError(err)
case msg := <-k.cluster.Messages():
err := k.onMessage(acc, msg)
if err != nil { if err != nil {
k.acc.AddError(fmt.Errorf("Message Parse Error\nmessage: %s\nerror: %s", acc.AddError(err)
string(msg.Value), err.Error())) <-sem
} }
for _, metric := range metrics {
k.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
}
}
if !k.doNotCommitMsgs {
// TODO(cam) this locking can be removed if this PR gets merged:
// https://github.com/wvanbergen/kafka/pull/84
k.Lock()
k.Cluster.MarkOffset(msg, "")
k.Unlock()
} }
} }
} }
} }
func (k *Kafka) markOffset(msg *sarama.ConsumerMessage) {
if !k.doNotCommitMsgs {
k.cluster.MarkOffset(msg, "")
}
}
func (k *Kafka) onMessage(acc telegraf.TrackingAccumulator, msg *sarama.ConsumerMessage) error {
if k.MaxMessageLen != 0 && len(msg.Value) > k.MaxMessageLen {
k.markOffset(msg)
return fmt.Errorf("Message longer than max_message_len (%d > %d)",
len(msg.Value), k.MaxMessageLen)
}
metrics, err := k.parser.Parse(msg.Value)
if err != nil {
return err
}
id := acc.AddTrackingMetricGroup(metrics)
k.messages[id] = msg
return nil
}
func (k *Kafka) onDelivery(track telegraf.DeliveryInfo) {
msg, ok := k.messages[track.ID()]
if !ok {
log.Printf("E! [inputs.kafka_consumer] Could not mark message delivered: %d", track.ID())
}
if track.Delivered() {
k.markOffset(msg)
}
delete(k.messages, track.ID())
}
func (k *Kafka) Stop() { func (k *Kafka) Stop() {
k.Lock() k.cancel()
defer k.Unlock() k.wg.Wait()
close(k.done)
if err := k.Cluster.Close(); err != nil { if err := k.cluster.Close(); err != nil {
k.acc.AddError(fmt.Errorf("Error closing consumer: %s\n", err.Error())) log.Printf("E! [inputs.kafka_consumer] Error closing consumer: %v", err)
} }
} }
@ -243,6 +290,8 @@ func (k *Kafka) Gather(acc telegraf.Accumulator) error {
func init() { func init() {
inputs.Add("kafka_consumer", func() telegraf.Input { inputs.Add("kafka_consumer", func() telegraf.Input {
return &Kafka{} return &Kafka{
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
}
}) })
} }

View File

@ -38,7 +38,6 @@ func TestReadsMetricsFromKafka(t *testing.T) {
ConsumerGroup: "telegraf_test_consumers", ConsumerGroup: "telegraf_test_consumers",
Topics: []string{testTopic}, Topics: []string{testTopic},
Brokers: brokerPeers, Brokers: brokerPeers,
PointBuffer: 100000,
Offset: "oldest", Offset: "oldest",
} }
p, _ := parsers.NewInfluxParser() p, _ := parsers.NewInfluxParser()

View File

@ -1,13 +1,14 @@
package kafka_consumer package kafka_consumer
import ( import (
"context"
"strings" "strings"
"testing" "testing"
"github.com/Shopify/sarama"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/testutil" "github.com/influxdata/telegraf/testutil"
"github.com/Shopify/sarama"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
@ -18,31 +19,57 @@ const (
invalidMsg = "cpu_load_short,host=server01 1422568543702900257\n" invalidMsg = "cpu_load_short,host=server01 1422568543702900257\n"
) )
func newTestKafka() (*Kafka, chan *sarama.ConsumerMessage) { type TestConsumer struct {
in := make(chan *sarama.ConsumerMessage, 1000) errors chan error
k := Kafka{ messages chan *sarama.ConsumerMessage
ConsumerGroup: "test", }
Topics: []string{"telegraf"},
Brokers: []string{"localhost:9092"}, func (c *TestConsumer) Errors() <-chan error {
Offset: "oldest", return c.errors
in: in, }
doNotCommitMsgs: true,
errs: make(chan error, 1000), func (c *TestConsumer) Messages() <-chan *sarama.ConsumerMessage {
done: make(chan struct{}), return c.messages
}
func (c *TestConsumer) MarkOffset(msg *sarama.ConsumerMessage, metadata string) {
}
func (c *TestConsumer) Close() error {
return nil
}
func (c *TestConsumer) Inject(msg *sarama.ConsumerMessage) {
c.messages <- msg
}
func newTestKafka() (*Kafka, *TestConsumer) {
consumer := &TestConsumer{
errors: make(chan error),
messages: make(chan *sarama.ConsumerMessage, 1000),
} }
return &k, in k := Kafka{
cluster: consumer,
ConsumerGroup: "test",
Topics: []string{"telegraf"},
Brokers: []string{"localhost:9092"},
Offset: "oldest",
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
doNotCommitMsgs: true,
messages: make(map[telegraf.TrackingID]*sarama.ConsumerMessage),
}
return &k, consumer
} }
// Test that the parser parses kafka messages into points // Test that the parser parses kafka messages into points
func TestRunParser(t *testing.T) { func TestRunParser(t *testing.T) {
k, in := newTestKafka() k, consumer := newTestKafka()
acc := testutil.Accumulator{} acc := testutil.Accumulator{}
k.acc = &acc ctx := context.Background()
defer close(k.done)
k.parser, _ = parsers.NewInfluxParser() k.parser, _ = parsers.NewInfluxParser()
go k.receiver() go k.receiver(ctx, &acc)
in <- saramaMsg(testMsg) consumer.Inject(saramaMsg(testMsg))
acc.Wait(1) acc.Wait(1)
assert.Equal(t, acc.NFields(), 1) assert.Equal(t, acc.NFields(), 1)
@ -50,14 +77,13 @@ func TestRunParser(t *testing.T) {
// Test that the parser ignores invalid messages // Test that the parser ignores invalid messages
func TestRunParserInvalidMsg(t *testing.T) { func TestRunParserInvalidMsg(t *testing.T) {
k, in := newTestKafka() k, consumer := newTestKafka()
acc := testutil.Accumulator{} acc := testutil.Accumulator{}
k.acc = &acc ctx := context.Background()
defer close(k.done)
k.parser, _ = parsers.NewInfluxParser() k.parser, _ = parsers.NewInfluxParser()
go k.receiver() go k.receiver(ctx, &acc)
in <- saramaMsg(invalidMsg) consumer.Inject(saramaMsg(invalidMsg))
acc.WaitError(1) acc.WaitError(1)
assert.Equal(t, acc.NFields(), 0) assert.Equal(t, acc.NFields(), 0)
@ -66,15 +92,14 @@ func TestRunParserInvalidMsg(t *testing.T) {
// Test that overlong messages are dropped // Test that overlong messages are dropped
func TestDropOverlongMsg(t *testing.T) { func TestDropOverlongMsg(t *testing.T) {
const maxMessageLen = 64 * 1024 const maxMessageLen = 64 * 1024
k, in := newTestKafka() k, consumer := newTestKafka()
k.MaxMessageLen = maxMessageLen k.MaxMessageLen = maxMessageLen
acc := testutil.Accumulator{} acc := testutil.Accumulator{}
k.acc = &acc ctx := context.Background()
defer close(k.done)
overlongMsg := strings.Repeat("v", maxMessageLen+1) overlongMsg := strings.Repeat("v", maxMessageLen+1)
go k.receiver() go k.receiver(ctx, &acc)
in <- saramaMsg(overlongMsg) consumer.Inject(saramaMsg(overlongMsg))
acc.WaitError(1) acc.WaitError(1)
assert.Equal(t, acc.NFields(), 0) assert.Equal(t, acc.NFields(), 0)
@ -82,14 +107,13 @@ func TestDropOverlongMsg(t *testing.T) {
// Test that the parser parses kafka messages into points // Test that the parser parses kafka messages into points
func TestRunParserAndGather(t *testing.T) { func TestRunParserAndGather(t *testing.T) {
k, in := newTestKafka() k, consumer := newTestKafka()
acc := testutil.Accumulator{} acc := testutil.Accumulator{}
k.acc = &acc ctx := context.Background()
defer close(k.done)
k.parser, _ = parsers.NewInfluxParser() k.parser, _ = parsers.NewInfluxParser()
go k.receiver() go k.receiver(ctx, &acc)
in <- saramaMsg(testMsg) consumer.Inject(saramaMsg(testMsg))
acc.Wait(1) acc.Wait(1)
acc.GatherError(k.Gather) acc.GatherError(k.Gather)
@ -101,14 +125,13 @@ func TestRunParserAndGather(t *testing.T) {
// Test that the parser parses kafka messages into points // Test that the parser parses kafka messages into points
func TestRunParserAndGatherGraphite(t *testing.T) { func TestRunParserAndGatherGraphite(t *testing.T) {
k, in := newTestKafka() k, consumer := newTestKafka()
acc := testutil.Accumulator{} acc := testutil.Accumulator{}
k.acc = &acc ctx := context.Background()
defer close(k.done)
k.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil) k.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil)
go k.receiver() go k.receiver(ctx, &acc)
in <- saramaMsg(testMsgGraphite) consumer.Inject(saramaMsg(testMsgGraphite))
acc.Wait(1) acc.Wait(1)
acc.GatherError(k.Gather) acc.GatherError(k.Gather)
@ -120,17 +143,16 @@ func TestRunParserAndGatherGraphite(t *testing.T) {
// Test that the parser parses kafka messages into points // Test that the parser parses kafka messages into points
func TestRunParserAndGatherJSON(t *testing.T) { func TestRunParserAndGatherJSON(t *testing.T) {
k, in := newTestKafka() k, consumer := newTestKafka()
acc := testutil.Accumulator{} acc := testutil.Accumulator{}
k.acc = &acc ctx := context.Background()
defer close(k.done)
k.parser, _ = parsers.NewParser(&parsers.Config{ k.parser, _ = parsers.NewParser(&parsers.Config{
DataFormat: "json", DataFormat: "json",
MetricName: "kafka_json_test", MetricName: "kafka_json_test",
}) })
go k.receiver() go k.receiver(ctx, &acc)
in <- saramaMsg(testMsgJSON) consumer.Inject(saramaMsg(testMsgJSON))
acc.Wait(1) acc.Wait(1)
acc.GatherError(k.Gather) acc.GatherError(k.Gather)

View File

@ -1,14 +1,11 @@
# MQTT Consumer Input Plugin # MQTT Consumer Input Plugin
The [MQTT](http://mqtt.org/) consumer plugin reads from The [MQTT][mqtt] consumer plugin reads from the specified MQTT topics
specified MQTT topics and adds messages to InfluxDB. and creates metrics using one of the supported [input data formats][].
The plugin expects messages in the
[Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
### Configuration: ### Configuration:
```toml ```toml
# Read metrics from MQTT topic(s)
[[inputs.mqtt_consumer]] [[inputs.mqtt_consumer]]
## MQTT broker URLs to be used. The format should be scheme://host:port, ## MQTT broker URLs to be used. The format should be scheme://host:port,
## schema can be tcp, ssl, or ws. ## schema can be tcp, ssl, or ws.
@ -26,6 +23,16 @@ The plugin expects messages in the
## Connection timeout for initial connection in seconds ## Connection timeout for initial connection in seconds
connection_timeout = "30s" connection_timeout = "30s"
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Topics to subscribe to ## Topics to subscribe to
topics = [ topics = [
"telegraf/host01/cpu", "telegraf/host01/cpu",
@ -62,3 +69,6 @@ The plugin expects messages in the
- All measurements are tagged with the incoming topic, ie - All measurements are tagged with the incoming topic, ie
`topic=telegraf/host01/cpu` `topic=telegraf/host01/cpu`
[mqtt]: https://mqtt.org
[input data formats]: /docs/DATA_FORMATS_INPUT.md

View File

@ -1,25 +1,31 @@
package mqtt_consumer package mqtt_consumer
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"log" "log"
"strings" "strings"
"time" "time"
"github.com/eclipse/paho.mqtt.golang"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/tls" "github.com/influxdata/telegraf/internal/tls"
"github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/plugins/parsers"
"github.com/eclipse/paho.mqtt.golang"
) )
// 30 Seconds is the default used by paho.mqtt.golang var (
var defaultConnectionTimeout = internal.Duration{Duration: 30 * time.Second} // 30 Seconds is the default used by paho.mqtt.golang
defaultConnectionTimeout = internal.Duration{Duration: 30 * time.Second}
defaultMaxUndeliveredMessages = 1000
)
type ConnectionState int type ConnectionState int
type empty struct{}
type semaphore chan empty
const ( const (
Disconnected ConnectionState = iota Disconnected ConnectionState = iota
@ -28,12 +34,13 @@ const (
) )
type MQTTConsumer struct { type MQTTConsumer struct {
Servers []string Servers []string
Topics []string Topics []string
Username string Username string
Password string Password string
QoS int `toml:"qos"` QoS int `toml:"qos"`
ConnectionTimeout internal.Duration `toml:"connection_timeout"` ConnectionTimeout internal.Duration `toml:"connection_timeout"`
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
parser parsers.Parser parser parsers.Parser
@ -45,9 +52,14 @@ type MQTTConsumer struct {
tls.ClientConfig tls.ClientConfig
client mqtt.Client client mqtt.Client
acc telegraf.Accumulator acc telegraf.TrackingAccumulator
state ConnectionState state ConnectionState
subscribed bool subscribed bool
sem semaphore
messages map[telegraf.TrackingID]bool
ctx context.Context
cancel context.CancelFunc
} }
var sampleConfig = ` var sampleConfig = `
@ -67,6 +79,16 @@ var sampleConfig = `
## Connection timeout for initial connection in seconds ## Connection timeout for initial connection in seconds
connection_timeout = "30s" connection_timeout = "30s"
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Topics to subscribe to ## Topics to subscribe to
topics = [ topics = [
"telegraf/host01/cpu", "telegraf/host01/cpu",
@ -118,7 +140,6 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
return errors.New("persistent_session requires client_id") return errors.New("persistent_session requires client_id")
} }
m.acc = acc
if m.QoS > 2 || m.QoS < 0 { if m.QoS > 2 || m.QoS < 0 {
return fmt.Errorf("qos value must be 0, 1, or 2: %d", m.QoS) return fmt.Errorf("qos value must be 0, 1, or 2: %d", m.QoS)
} }
@ -127,6 +148,9 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
return fmt.Errorf("connection_timeout must be greater than 1s: %s", m.ConnectionTimeout.Duration) return fmt.Errorf("connection_timeout must be greater than 1s: %s", m.ConnectionTimeout.Duration)
} }
m.acc = acc.WithTracking(m.MaxUndeliveredMessages)
m.ctx, m.cancel = context.WithCancel(context.Background())
opts, err := m.createOpts() opts, err := m.createOpts()
if err != nil { if err != nil {
return err return err
@ -146,8 +170,10 @@ func (m *MQTTConsumer) connect() error {
return err return err
} }
log.Printf("I! [inputs.mqtt_consumer]: connected %v", m.Servers) log.Printf("I! [inputs.mqtt_consumer] Connected %v", m.Servers)
m.state = Connected m.state = Connected
m.sem = make(semaphore, m.MaxUndeliveredMessages)
m.messages = make(map[telegraf.TrackingID]bool)
// Only subscribe on first connection when using persistent sessions. On // Only subscribe on first connection when using persistent sessions. On
// subsequent connections the subscriptions should be stored in the // subsequent connections the subscriptions should be stored in the
@ -172,38 +198,64 @@ func (m *MQTTConsumer) connect() error {
func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) { func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) {
m.acc.AddError(fmt.Errorf("connection lost: %v", err)) m.acc.AddError(fmt.Errorf("connection lost: %v", err))
log.Printf("D! [inputs.mqtt_consumer]: disconnected %v", m.Servers) log.Printf("D! [inputs.mqtt_consumer] Disconnected %v", m.Servers)
m.state = Disconnected m.state = Disconnected
return return
} }
func (m *MQTTConsumer) recvMessage(c mqtt.Client, msg mqtt.Message) { func (m *MQTTConsumer) recvMessage(c mqtt.Client, msg mqtt.Message) {
topic := msg.Topic() for {
select {
case track := <-m.acc.Delivered():
_, ok := m.messages[track.ID()]
if !ok {
// Added by a previous connection
continue
}
<-m.sem
// No ack, MQTT does not support durable handling
delete(m.messages, track.ID())
case m.sem <- empty{}:
err := m.onMessage(m.acc, msg)
if err != nil {
m.acc.AddError(err)
<-m.sem
}
return
}
}
}
func (m *MQTTConsumer) onMessage(acc telegraf.TrackingAccumulator, msg mqtt.Message) error {
metrics, err := m.parser.Parse(msg.Payload()) metrics, err := m.parser.Parse(msg.Payload())
if err != nil { if err != nil {
m.acc.AddError(err) return err
} }
topic := msg.Topic()
for _, metric := range metrics { for _, metric := range metrics {
tags := metric.Tags() metric.AddTag("topic", topic)
tags["topic"] = topic
m.acc.AddFields(metric.Name(), metric.Fields(), tags, metric.Time())
} }
id := acc.AddTrackingMetricGroup(metrics)
m.messages[id] = true
return nil
} }
func (m *MQTTConsumer) Stop() { func (m *MQTTConsumer) Stop() {
if m.state == Connected { if m.state == Connected {
log.Printf("D! [inputs.mqtt_consumer]: disconnecting %v", m.Servers) log.Printf("D! [inputs.mqtt_consumer] Disconnecting %v", m.Servers)
m.client.Disconnect(200) m.client.Disconnect(200)
log.Printf("D! [inputs.mqtt_consumer]: disconnected %v", m.Servers) log.Printf("D! [inputs.mqtt_consumer] Disconnected %v", m.Servers)
m.state = Disconnected m.state = Disconnected
} }
m.cancel()
} }
func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error { func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error {
if m.state == Disconnected { if m.state == Disconnected {
m.state = Connecting m.state = Connecting
log.Printf("D! [inputs.mqtt_consumer]: connecting %v", m.Servers) log.Printf("D! [inputs.mqtt_consumer] Connecting %v", m.Servers)
m.connect() m.connect()
} }
@ -246,7 +298,7 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
for _, server := range m.Servers { for _, server := range m.Servers {
// Preserve support for host:port style servers; deprecated in Telegraf 1.4.4 // Preserve support for host:port style servers; deprecated in Telegraf 1.4.4
if !strings.Contains(server, "://") { if !strings.Contains(server, "://") {
log.Printf("W! [inputs.mqtt_consumer] server %q should be updated to use `scheme://host:port` format", server) log.Printf("W! [inputs.mqtt_consumer] Server %q should be updated to use `scheme://host:port` format", server)
if tlsCfg == nil { if tlsCfg == nil {
server = "tcp://" + server server = "tcp://" + server
} else { } else {
@ -267,8 +319,9 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
func init() { func init() {
inputs.Add("mqtt_consumer", func() telegraf.Input { inputs.Add("mqtt_consumer", func() telegraf.Input {
return &MQTTConsumer{ return &MQTTConsumer{
ConnectionTimeout: defaultConnectionTimeout, ConnectionTimeout: defaultConnectionTimeout,
state: Disconnected, MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
state: Disconnected,
} }
}) })
} }

View File

@ -3,12 +3,9 @@ package mqtt_consumer
import ( import (
"testing" "testing"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/eclipse/paho.mqtt.golang" "github.com/eclipse/paho.mqtt.golang"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
) )
const ( const (
@ -71,47 +68,6 @@ func TestPersistentClientIDFail(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
} }
func TestRunParser(t *testing.T) {
n := newTestMQTTConsumer()
acc := testutil.Accumulator{}
n.acc = &acc
n.parser, _ = parsers.NewInfluxParser()
n.recvMessage(nil, mqttMsg(testMsg))
if a := acc.NFields(); a != 1 {
t.Errorf("got %v, expected %v", a, 1)
}
}
// Test that the parser ignores invalid messages
func TestRunParserInvalidMsg(t *testing.T) {
n := newTestMQTTConsumer()
acc := testutil.Accumulator{}
n.acc = &acc
n.parser, _ = parsers.NewInfluxParser()
n.recvMessage(nil, mqttMsg(invalidMsg))
if a := acc.NFields(); a != 0 {
t.Errorf("got %v, expected %v", a, 0)
}
assert.Len(t, acc.Errors, 1)
}
// Test that the parser parses line format messages into metrics
func TestRunParserAndGather(t *testing.T) {
n := newTestMQTTConsumer()
acc := testutil.Accumulator{}
n.acc = &acc
n.parser, _ = parsers.NewInfluxParser()
n.recvMessage(nil, mqttMsg(testMsg))
acc.AssertContainsFields(t, "cpu_load_short",
map[string]interface{}{"value": float64(23422)})
}
func mqttMsg(val string) mqtt.Message { func mqttMsg(val string) mqtt.Message {
return &message{ return &message{
topic: "telegraf/unit_test", topic: "telegraf/unit_test",

View File

@ -1,16 +1,14 @@
# NATS Consumer Input Plugin # NATS Consumer Input Plugin
The [NATS](http://www.nats.io/about/) consumer plugin reads from The [NATS][nats] consumer plugin reads from the specified NATS subjects and
specified NATS subjects and adds messages to InfluxDB. The plugin expects messages creates metrics using one of the supported [input data formats][].
in the [Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
A [Queue Group](http://www.nats.io/documentation/concepts/nats-queueing/)
is used when subscribing to subjects so multiple instances of telegraf can read
from a NATS cluster in parallel.
## Configuration A [Queue Group][queue group] is used when subscribing to subjects so multiple
instances of telegraf can read from a NATS cluster in parallel.
### Configuration:
```toml ```toml
# Read metrics from NATS subject(s)
[[inputs.nats_consumer]] [[inputs.nats_consumer]]
## urls of NATS servers ## urls of NATS servers
servers = ["nats://localhost:4222"] servers = ["nats://localhost:4222"]
@ -20,13 +18,29 @@ from a NATS cluster in parallel.
subjects = ["telegraf"] subjects = ["telegraf"]
## name a queue group ## name a queue group
queue_group = "telegraf_consumers" queue_group = "telegraf_consumers"
## Maximum number of metrics to buffer between collection intervals
metric_buffer = 100000 ## Sets the limits for pending msgs and bytes for each subscription
## These shouldn't need to be adjusted except in very high throughput scenarios
# pending_message_limit = 65536
# pending_bytes_limit = 67108864
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Data format to consume. ## Data format to consume.
## Each data format has its own unique set of configuration options, read ## Each data format has its own unique set of configuration options, read
## more about them here: ## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx" data_format = "influx"
``` ```
[nats]: https://www.nats.io/about/
[input data formats]: /docs/DATA_FORMATS_INPUT.md
[queue group]: https://www.nats.io/documentation/concepts/nats-queueing/

View File

@ -1,6 +1,7 @@
package natsconsumer package natsconsumer
import ( import (
"context"
"fmt" "fmt"
"log" "log"
"sync" "sync"
@ -11,6 +12,13 @@ import (
nats "github.com/nats-io/go-nats" nats "github.com/nats-io/go-nats"
) )
var (
defaultMaxUndeliveredMessages = 1000
)
type empty struct{}
type semaphore chan empty
type natsError struct { type natsError struct {
conn *nats.Conn conn *nats.Conn
sub *nats.Subscription sub *nats.Subscription
@ -23,48 +31,58 @@ func (e natsError) Error() string {
} }
type natsConsumer struct { type natsConsumer struct {
QueueGroup string QueueGroup string `toml:"queue_group"`
Subjects []string Subjects []string `toml:"subjects"`
Servers []string Servers []string `toml:"servers"`
Secure bool Secure bool `toml:"secure"`
// Client pending limits: // Client pending limits:
PendingMessageLimit int PendingMessageLimit int `toml:"pending_message_limit"`
PendingBytesLimit int PendingBytesLimit int `toml:"pending_bytes_limit"`
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
// Legacy metric buffer support; deprecated in v0.10.3 // Legacy metric buffer support; deprecated in v0.10.3
MetricBuffer int MetricBuffer int
conn *nats.Conn
subs []*nats.Subscription
parser parsers.Parser parser parsers.Parser
sync.Mutex
wg sync.WaitGroup
Conn *nats.Conn
Subs []*nats.Subscription
// channel for all incoming NATS messages // channel for all incoming NATS messages
in chan *nats.Msg in chan *nats.Msg
// channel for all NATS read errors // channel for all NATS read errors
errs chan error errs chan error
done chan struct{} acc telegraf.TrackingAccumulator
acc telegraf.Accumulator wg sync.WaitGroup
cancel context.CancelFunc
} }
var sampleConfig = ` var sampleConfig = `
## urls of NATS servers ## urls of NATS servers
# servers = ["nats://localhost:4222"] servers = ["nats://localhost:4222"]
## Use Transport Layer Security ## Use Transport Layer Security
# secure = false secure = false
## subject(s) to consume ## subject(s) to consume
# subjects = ["telegraf"] subjects = ["telegraf"]
## name a queue group ## name a queue group
# queue_group = "telegraf_consumers" queue_group = "telegraf_consumers"
## Sets the limits for pending msgs and bytes for each subscription ## Sets the limits for pending msgs and bytes for each subscription
## These shouldn't need to be adjusted except in very high throughput scenarios ## These shouldn't need to be adjusted except in very high throughput scenarios
# pending_message_limit = 65536 # pending_message_limit = 65536
# pending_bytes_limit = 67108864 # pending_bytes_limit = 67108864
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Data format to consume. ## Data format to consume.
## Each data format has its own unique set of configuration options, read ## Each data format has its own unique set of configuration options, read
## more about them here: ## more about them here:
@ -94,10 +112,7 @@ func (n *natsConsumer) natsErrHandler(c *nats.Conn, s *nats.Subscription, e erro
// Start the nats consumer. Caller must call *natsConsumer.Stop() to clean up. // Start the nats consumer. Caller must call *natsConsumer.Stop() to clean up.
func (n *natsConsumer) Start(acc telegraf.Accumulator) error { func (n *natsConsumer) Start(acc telegraf.Accumulator) error {
n.Lock() n.acc = acc.WithTracking(n.MaxUndeliveredMessages)
defer n.Unlock()
n.acc = acc
var connectErr error var connectErr error
@ -112,89 +127,106 @@ func (n *natsConsumer) Start(acc telegraf.Accumulator) error {
opts.Secure = n.Secure opts.Secure = n.Secure
if n.Conn == nil || n.Conn.IsClosed() { if n.conn == nil || n.conn.IsClosed() {
n.Conn, connectErr = opts.Connect() n.conn, connectErr = opts.Connect()
if connectErr != nil { if connectErr != nil {
return connectErr return connectErr
} }
// Setup message and error channels // Setup message and error channels
n.errs = make(chan error) n.errs = make(chan error)
n.Conn.SetErrorHandler(n.natsErrHandler) n.conn.SetErrorHandler(n.natsErrHandler)
n.in = make(chan *nats.Msg, 1000) n.in = make(chan *nats.Msg, 1000)
for _, subj := range n.Subjects { for _, subj := range n.Subjects {
sub, err := n.Conn.QueueSubscribe(subj, n.QueueGroup, func(m *nats.Msg) { sub, err := n.conn.QueueSubscribe(subj, n.QueueGroup, func(m *nats.Msg) {
n.in <- m n.in <- m
}) })
if err != nil { if err != nil {
return err return err
} }
// ensure that the subscription has been processed by the server // ensure that the subscription has been processed by the server
if err = n.Conn.Flush(); err != nil { if err = n.conn.Flush(); err != nil {
return err return err
} }
// set the subscription pending limits // set the subscription pending limits
if err = sub.SetPendingLimits(n.PendingMessageLimit, n.PendingBytesLimit); err != nil { if err = sub.SetPendingLimits(n.PendingMessageLimit, n.PendingBytesLimit); err != nil {
return err return err
} }
n.Subs = append(n.Subs, sub) n.subs = append(n.subs, sub)
} }
} }
n.done = make(chan struct{}) ctx, cancel := context.WithCancel(context.Background())
n.cancel = cancel
// Start the message reader // Start the message reader
n.wg.Add(1) n.wg.Add(1)
go n.receiver() go func() {
defer n.wg.Done()
go n.receiver(ctx)
}()
log.Printf("I! Started the NATS consumer service, nats: %v, subjects: %v, queue: %v\n", log.Printf("I! Started the NATS consumer service, nats: %v, subjects: %v, queue: %v\n",
n.Conn.ConnectedUrl(), n.Subjects, n.QueueGroup) n.conn.ConnectedUrl(), n.Subjects, n.QueueGroup)
return nil return nil
} }
// receiver() reads all incoming messages from NATS, and parses them into // receiver() reads all incoming messages from NATS, and parses them into
// telegraf metrics. // telegraf metrics.
func (n *natsConsumer) receiver() { func (n *natsConsumer) receiver(ctx context.Context) {
defer n.wg.Done() sem := make(semaphore, n.MaxUndeliveredMessages)
for { for {
select { select {
case <-n.done: case <-ctx.Done():
return return
case <-n.acc.Delivered():
<-sem
case err := <-n.errs: case err := <-n.errs:
n.acc.AddError(fmt.Errorf("E! error reading from %s\n", err.Error())) n.acc.AddError(err)
case msg := <-n.in: case sem <- empty{}:
metrics, err := n.parser.Parse(msg.Data) select {
if err != nil { case <-ctx.Done():
n.acc.AddError(fmt.Errorf("E! subject: %s, error: %s", msg.Subject, err.Error())) return
} case err := <-n.errs:
<-sem
n.acc.AddError(err)
case <-n.acc.Delivered():
<-sem
<-sem
case msg := <-n.in:
metrics, err := n.parser.Parse(msg.Data)
if err != nil {
n.acc.AddError(fmt.Errorf("subject: %s, error: %s", msg.Subject, err.Error()))
<-sem
continue
}
for _, metric := range metrics { n.acc.AddTrackingMetricGroup(metrics)
n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
} }
} }
} }
} }
func (n *natsConsumer) clean() { func (n *natsConsumer) clean() {
for _, sub := range n.Subs { for _, sub := range n.subs {
if err := sub.Unsubscribe(); err != nil { if err := sub.Unsubscribe(); err != nil {
n.acc.AddError(fmt.Errorf("E! Error unsubscribing from subject %s in queue %s: %s\n", n.acc.AddError(fmt.Errorf("Error unsubscribing from subject %s in queue %s: %s\n",
sub.Subject, sub.Queue, err.Error())) sub.Subject, sub.Queue, err.Error()))
} }
} }
if n.Conn != nil && !n.Conn.IsClosed() { if n.conn != nil && !n.conn.IsClosed() {
n.Conn.Close() n.conn.Close()
} }
} }
func (n *natsConsumer) Stop() { func (n *natsConsumer) Stop() {
n.Lock() n.cancel()
close(n.done)
n.wg.Wait() n.wg.Wait()
n.clean() n.clean()
n.Unlock()
} }
func (n *natsConsumer) Gather(acc telegraf.Accumulator) error { func (n *natsConsumer) Gather(acc telegraf.Accumulator) error {
@ -204,12 +236,13 @@ func (n *natsConsumer) Gather(acc telegraf.Accumulator) error {
func init() { func init() {
inputs.Add("nats_consumer", func() telegraf.Input { inputs.Add("nats_consumer", func() telegraf.Input {
return &natsConsumer{ return &natsConsumer{
Servers: []string{"nats://localhost:4222"}, Servers: []string{"nats://localhost:4222"},
Secure: false, Secure: false,
Subjects: []string{"telegraf"}, Subjects: []string{"telegraf"},
QueueGroup: "telegraf_consumers", QueueGroup: "telegraf_consumers",
PendingBytesLimit: nats.DefaultSubPendingBytesLimit, PendingBytesLimit: nats.DefaultSubPendingBytesLimit,
PendingMessageLimit: nats.DefaultSubPendingMsgsLimit, PendingMessageLimit: nats.DefaultSubPendingMsgsLimit,
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
} }
}) })
} }

View File

@ -1,134 +0,0 @@
package natsconsumer
import (
"testing"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/testutil"
nats "github.com/nats-io/go-nats"
"github.com/stretchr/testify/assert"
)
const (
testMsg = "cpu_load_short,host=server01 value=23422.0 1422568543702900257\n"
testMsgGraphite = "cpu.load.short.graphite 23422 1454780029"
testMsgJSON = "{\"a\": 5, \"b\": {\"c\": 6}}\n"
invalidMsg = "cpu_load_short,host=server01 1422568543702900257\n"
metricBuffer = 5
)
func newTestNatsConsumer() (*natsConsumer, chan *nats.Msg) {
in := make(chan *nats.Msg, metricBuffer)
n := &natsConsumer{
QueueGroup: "test",
Subjects: []string{"telegraf"},
Servers: []string{"nats://localhost:4222"},
Secure: false,
in: in,
errs: make(chan error, metricBuffer),
done: make(chan struct{}),
}
return n, in
}
// Test that the parser parses NATS messages into metrics
func TestRunParser(t *testing.T) {
n, in := newTestNatsConsumer()
acc := testutil.Accumulator{}
n.acc = &acc
defer close(n.done)
n.parser, _ = parsers.NewInfluxParser()
n.wg.Add(1)
go n.receiver()
in <- natsMsg(testMsg)
acc.Wait(1)
}
// Test that the parser ignores invalid messages
func TestRunParserInvalidMsg(t *testing.T) {
n, in := newTestNatsConsumer()
acc := testutil.Accumulator{}
n.acc = &acc
defer close(n.done)
n.parser, _ = parsers.NewInfluxParser()
n.wg.Add(1)
go n.receiver()
in <- natsMsg(invalidMsg)
acc.WaitError(1)
assert.Contains(t, acc.Errors[0].Error(), "E! subject: telegraf, error: metric parse error")
assert.EqualValues(t, 0, acc.NMetrics())
}
// Test that the parser parses line format messages into metrics
func TestRunParserAndGather(t *testing.T) {
n, in := newTestNatsConsumer()
acc := testutil.Accumulator{}
n.acc = &acc
defer close(n.done)
n.parser, _ = parsers.NewInfluxParser()
n.wg.Add(1)
go n.receiver()
in <- natsMsg(testMsg)
n.Gather(&acc)
acc.Wait(1)
acc.AssertContainsFields(t, "cpu_load_short",
map[string]interface{}{"value": float64(23422)})
}
// Test that the parser parses graphite format messages into metrics
func TestRunParserAndGatherGraphite(t *testing.T) {
n, in := newTestNatsConsumer()
acc := testutil.Accumulator{}
n.acc = &acc
defer close(n.done)
n.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil)
n.wg.Add(1)
go n.receiver()
in <- natsMsg(testMsgGraphite)
n.Gather(&acc)
acc.Wait(1)
acc.AssertContainsFields(t, "cpu_load_short_graphite",
map[string]interface{}{"value": float64(23422)})
}
// Test that the parser parses json format messages into metrics
func TestRunParserAndGatherJSON(t *testing.T) {
n, in := newTestNatsConsumer()
acc := testutil.Accumulator{}
n.acc = &acc
defer close(n.done)
n.parser, _ = parsers.NewParser(&parsers.Config{
DataFormat: "json",
MetricName: "nats_json_test",
})
n.wg.Add(1)
go n.receiver()
in <- natsMsg(testMsgJSON)
n.Gather(&acc)
acc.Wait(1)
acc.AssertContainsFields(t, "nats_json_test",
map[string]interface{}{
"a": float64(5),
"b_c": float64(6),
})
}
func natsMsg(val string) *nats.Msg {
return &nats.Msg{
Subject: "telegraf",
Data: []byte(val),
}
}

View File

@ -1,9 +1,9 @@
# NSQ Consumer Input Plugin # NSQ Consumer Input Plugin
The [NSQ](http://nsq.io/) consumer plugin polls a specified NSQD The [NSQ][nsq] consumer plugin reads from NSQD and creates metrics using one
topic and adds messages to InfluxDB. This plugin allows a message to be in any of the supported `data_format` types. of the supported [input data formats][].
## Configuration ### Configuration:
```toml ```toml
# Read metrics from NSQD topic(s) # Read metrics from NSQD topic(s)
@ -18,6 +18,16 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
channel = "consumer" channel = "consumer"
max_in_flight = 100 max_in_flight = 100
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Data format to consume. ## Data format to consume.
## Each data format has its own unique set of configuration options, read ## Each data format has its own unique set of configuration options, read
## more about them here: ## more about them here:
@ -25,5 +35,5 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
data_format = "influx" data_format = "influx"
``` ```
## Testing [nsq]: https://nsq.io
The `nsq_consumer_test` mocks out the interaction with `NSQD`. It requires no outside dependencies. [input data formats]: /docs/DATA_FORMATS_INPUT.md

View File

@ -1,7 +1,9 @@
package nsq_consumer package nsq_consumer
import ( import (
"fmt" "context"
"log"
"sync"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/inputs"
@ -9,17 +11,38 @@ import (
nsq "github.com/nsqio/go-nsq" nsq "github.com/nsqio/go-nsq"
) )
const (
defaultMaxUndeliveredMessages = 1000
)
type empty struct{}
type semaphore chan empty
type logger struct{}
func (l *logger) Output(calldepth int, s string) error {
log.Println("D! [inputs.nsq_consumer] " + s)
return nil
}
//NSQConsumer represents the configuration of the plugin //NSQConsumer represents the configuration of the plugin
type NSQConsumer struct { type NSQConsumer struct {
Server string Server string `toml:"server"`
Nsqd []string Nsqd []string `toml:"nsqd"`
Nsqlookupd []string Nsqlookupd []string `toml:"nsqlookupd"`
Topic string Topic string `toml:"topic"`
Channel string Channel string `toml:"channel"`
MaxInFlight int MaxInFlight int `toml:"max_in_flight"`
parser parsers.Parser
consumer *nsq.Consumer MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
acc telegraf.Accumulator
parser parsers.Parser
consumer *nsq.Consumer
mu sync.Mutex
messages map[telegraf.TrackingID]*nsq.Message
wg sync.WaitGroup
cancel context.CancelFunc
} }
var sampleConfig = ` var sampleConfig = `
@ -33,6 +56,16 @@ var sampleConfig = `
channel = "consumer" channel = "consumer"
max_in_flight = 100 max_in_flight = 100
## Maximum messages to read from the broker that have not been written by an
## output. For best throughput set based on the number of metrics within
## each message and the size of the output's metric_batch_size.
##
## For example, if each message from the queue contains 10 metrics and the
## output metric_batch_size is 1000, setting this to 100 will ensure that a
## full batch is collected and the write is triggered immediately without
## waiting until the next flush_interval.
# max_undelivered_messages = 1000
## Data format to consume. ## Data format to consume.
## Each data format has its own unique set of configuration options, read ## Each data format has its own unique set of configuration options, read
## more about them here: ## more about them here:
@ -40,12 +73,6 @@ var sampleConfig = `
data_format = "influx" data_format = "influx"
` `
func init() {
inputs.Add("nsq_consumer", func() telegraf.Input {
return &NSQConsumer{}
})
}
// SetParser takes the data_format from the config and finds the right parser for that format // SetParser takes the data_format from the config and finds the right parser for that format
func (n *NSQConsumer) SetParser(parser parsers.Parser) { func (n *NSQConsumer) SetParser(parser parsers.Parser) {
n.parser = parser n.parser = parser
@ -62,32 +89,88 @@ func (n *NSQConsumer) Description() string {
} }
// Start pulls data from nsq // Start pulls data from nsq
func (n *NSQConsumer) Start(acc telegraf.Accumulator) error { func (n *NSQConsumer) Start(ac telegraf.Accumulator) error {
n.acc = acc acc := ac.WithTracking(n.MaxUndeliveredMessages)
sem := make(semaphore, n.MaxUndeliveredMessages)
n.messages = make(map[telegraf.TrackingID]*nsq.Message, n.MaxUndeliveredMessages)
ctx, cancel := context.WithCancel(context.Background())
n.cancel = cancel
n.connect() n.connect()
n.consumer.AddConcurrentHandlers(nsq.HandlerFunc(func(message *nsq.Message) error { n.consumer.SetLogger(&logger{}, nsq.LogLevelInfo)
n.consumer.AddHandler(nsq.HandlerFunc(func(message *nsq.Message) error {
metrics, err := n.parser.Parse(message.Body) metrics, err := n.parser.Parse(message.Body)
if err != nil { if err != nil {
acc.AddError(fmt.Errorf("E! NSQConsumer Parse Error\nmessage:%s\nerror:%s", string(message.Body), err.Error())) acc.AddError(err)
// Remove the message from the queue
message.Finish()
return nil return nil
} }
for _, metric := range metrics { if len(metrics) == 0 {
n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time()) message.Finish()
return nil
} }
message.Finish()
select {
case <-ctx.Done():
return ctx.Err()
case sem <- empty{}:
break
}
n.mu.Lock()
id := acc.AddTrackingMetricGroup(metrics)
n.messages[id] = message
n.mu.Unlock()
message.DisableAutoResponse()
return nil return nil
}), n.MaxInFlight) }))
if len(n.Nsqlookupd) > 0 { if len(n.Nsqlookupd) > 0 {
n.consumer.ConnectToNSQLookupds(n.Nsqlookupd) n.consumer.ConnectToNSQLookupds(n.Nsqlookupd)
} }
n.consumer.ConnectToNSQDs(append(n.Nsqd, n.Server)) n.consumer.ConnectToNSQDs(append(n.Nsqd, n.Server))
n.wg.Add(1)
go func() {
defer n.wg.Done()
n.onDelivery(ctx, acc, sem)
}()
return nil return nil
} }
func (n *NSQConsumer) onDelivery(ctx context.Context, acc telegraf.TrackingAccumulator, sem semaphore) {
for {
select {
case <-ctx.Done():
return
case info := <-acc.Delivered():
n.mu.Lock()
msg, ok := n.messages[info.ID()]
if !ok {
n.mu.Unlock()
continue
}
<-sem
delete(n.messages, info.ID())
n.mu.Unlock()
if info.Delivered() {
msg.Finish()
} else {
msg.Requeue(-1)
}
}
}
}
// Stop processing messages // Stop processing messages
func (n *NSQConsumer) Stop() { func (n *NSQConsumer) Stop() {
n.cancel()
n.wg.Wait()
n.consumer.Stop() n.consumer.Stop()
<-n.consumer.StopChan
} }
// Gather is a noop // Gather is a noop
@ -107,3 +190,11 @@ func (n *NSQConsumer) connect() error {
} }
return nil return nil
} }
func init() {
inputs.Add("nsq_consumer", func() telegraf.Input {
return &NSQConsumer{
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
}
})
}

View File

@ -36,11 +36,12 @@ func TestReadsMetricsFromNSQ(t *testing.T) {
newMockNSQD(script, addr.String()) newMockNSQD(script, addr.String())
consumer := &NSQConsumer{ consumer := &NSQConsumer{
Server: "127.0.0.1:4155", Server: "127.0.0.1:4155",
Topic: "telegraf", Topic: "telegraf",
Channel: "consume", Channel: "consume",
MaxInFlight: 1, MaxInFlight: 1,
Nsqd: []string{"127.0.0.1:4155"}, MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
Nsqd: []string{"127.0.0.1:4155"},
} }
p, _ := parsers.NewInfluxParser() p, _ := parsers.NewInfluxParser()

View File

@ -2,6 +2,7 @@ package socket_listener
import ( import (
"bufio" "bufio"
"crypto/tls"
"fmt" "fmt"
"io" "io"
"log" "log"
@ -9,11 +10,8 @@ import (
"os" "os"
"strings" "strings"
"sync" "sync"
"time" "time"
"crypto/tls"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal"
tlsint "github.com/influxdata/telegraf/internal/tls" tlsint "github.com/influxdata/telegraf/internal/tls"
@ -120,7 +118,7 @@ func (ssl *streamSocketListener) read(c net.Conn) {
continue continue
} }
for _, m := range metrics { for _, m := range metrics {
ssl.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) ssl.AddMetric(m)
} }
} }
@ -156,7 +154,7 @@ func (psl *packetSocketListener) listen() {
continue continue
} }
for _, m := range metrics { for _, m := range metrics {
psl.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) psl.AddMetric(m)
} }
} }
} }

View File

@ -7,11 +7,13 @@ import (
type Discard struct{} type Discard struct{}
func (d *Discard) Connect() error { return nil } func (d *Discard) Connect() error { return nil }
func (d *Discard) Close() error { return nil } func (d *Discard) Close() error { return nil }
func (d *Discard) SampleConfig() string { return "" } func (d *Discard) SampleConfig() string { return "" }
func (d *Discard) Description() string { return "Send metrics to nowhere at all" } func (d *Discard) Description() string { return "Send metrics to nowhere at all" }
func (d *Discard) Write(metrics []telegraf.Metric) error { return nil } func (d *Discard) Write(metrics []telegraf.Metric) error {
return nil
}
func init() { func init() {
outputs.Add("discard", func() telegraf.Output { return &Discard{} }) outputs.Add("discard", func() telegraf.Output { return &Discard{} })

View File

@ -144,7 +144,7 @@ func (p *PrometheusClient) auth(h http.Handler) http.Handler {
}) })
} }
func (p *PrometheusClient) Start() error { func (p *PrometheusClient) Connect() error {
defaultCollectors := map[string]bool{ defaultCollectors := map[string]bool{
"gocollector": true, "gocollector": true,
"process": true, "process": true,
@ -200,15 +200,6 @@ func (p *PrometheusClient) Start() error {
return nil return nil
} }
func (p *PrometheusClient) Stop() {
// plugin gets cleaned up in Close() already.
}
func (p *PrometheusClient) Connect() error {
// This service output does not need to make any further connections
return nil
}
func (p *PrometheusClient) Close() error { func (p *PrometheusClient) Close() error {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel() defer cancel()

View File

@ -600,7 +600,7 @@ func TestPrometheusWritePointEmptyTag(t *testing.T) {
pClient, p, err := setupPrometheus() pClient, p, err := setupPrometheus()
require.NoError(t, err) require.NoError(t, err)
defer pClient.Stop() defer pClient.Close()
now := time.Now() now := time.Now()
tags := make(map[string]string) tags := make(map[string]string)
@ -675,7 +675,7 @@ func setupPrometheus() (*PrometheusClient, *prometheus_input.Prometheus, error)
pTesting = NewClient() pTesting = NewClient()
pTesting.Listen = "localhost:9127" pTesting.Listen = "localhost:9127"
pTesting.Path = "/metrics" pTesting.Path = "/metrics"
err := pTesting.Start() err := pTesting.Connect()
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }

View File

@ -10,6 +10,7 @@ import (
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/filter" "github.com/influxdata/telegraf/filter"
"github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/plugins/processors" "github.com/influxdata/telegraf/plugins/processors"
) )
@ -208,6 +209,11 @@ func (t *TopK) Apply(in ...telegraf.Metric) []telegraf.Metric {
// Add the metrics received to our internal cache // Add the metrics received to our internal cache
for _, m := range in { for _, m := range in {
// When tracking metrics this plugin could deadlock the input by
// holding undelivered metrics while the input waits for metrics to be
// delivered. Instead, treat all handled metrics as delivered and
// produced metrics as untracked in a similar way to aggregators.
m.Drop()
// Check if the metric has any of the fields over which we are aggregating // Check if the metric has any of the fields over which we are aggregating
hasField := false hasField := false
@ -281,7 +287,6 @@ func (t *TopK) push() []telegraf.Metric {
// Create a one dimensional list with the top K metrics of each key // Create a one dimensional list with the top K metrics of each key
for i, ag := range aggregations[0:min(t.K, len(aggregations))] { for i, ag := range aggregations[0:min(t.K, len(aggregations))] {
// Check whether of not we need to add fields of tags to the selected metrics // Check whether of not we need to add fields of tags to the selected metrics
if len(t.aggFieldSet) != 0 || len(t.rankFieldSet) != 0 || groupTag != "" { if len(t.aggFieldSet) != 0 || len(t.rankFieldSet) != 0 || groupTag != "" {
for _, m := range t.cache[ag.groupbykey] { for _, m := range t.cache[ag.groupbykey] {
@ -311,7 +316,16 @@ func (t *TopK) push() []telegraf.Metric {
t.Reset() t.Reset()
return ret result := make([]telegraf.Metric, 0, len(ret))
for _, m := range ret {
copy, err := metric.New(m.Name(), m.Tags(), m.Fields(), m.Time(), m.Type())
if err != nil {
continue
}
result = append(result, copy)
}
return result
} }
// Function that generates the aggregation functions // Function that generates the aggregation functions

View File

@ -1,12 +1,12 @@
package topk package topk
import ( import (
"reflect"
"testing" "testing"
"time" "time"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/testutil"
) )
// Key, value pair that represents a telegraf.Metric Field // Key, value pair that represents a telegraf.Metric Field
@ -95,7 +95,7 @@ func deepCopy(a []telegraf.Metric) []telegraf.Metric {
func belongs(m telegraf.Metric, ms []telegraf.Metric) bool { func belongs(m telegraf.Metric, ms []telegraf.Metric) bool {
for _, i := range ms { for _, i := range ms {
if reflect.DeepEqual(i, m) { if testutil.MetricEqual(i, m) {
return true return true
} }
} }

View File

@ -7,6 +7,6 @@ type Processor interface {
// Description returns a one-sentence description on the Input // Description returns a one-sentence description on the Input
Description() string Description() string
// Apply the filter to the given metric // Apply the filter to the given metric.
Apply(in ...Metric) []Metric Apply(in ...Metric) []Metric
} }

View File

@ -14,6 +14,15 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
var (
lastID uint64
)
func newTrackingID() telegraf.TrackingID {
atomic.AddUint64(&lastID, 1)
return telegraf.TrackingID(lastID)
}
// Metric defines a single point measurement // Metric defines a single point measurement
type Metric struct { type Metric struct {
Measurement string Measurement string
@ -23,7 +32,7 @@ type Metric struct {
} }
func (p *Metric) String() string { func (p *Metric) String() string {
return fmt.Sprintf("%s %v", p.Measurement, p.Fields) return fmt.Sprintf("%s %v %v", p.Measurement, p.Tags, p.Fields)
} }
// Accumulator defines a mocked out accumulator // Accumulator defines a mocked out accumulator
@ -31,11 +40,12 @@ type Accumulator struct {
sync.Mutex sync.Mutex
*sync.Cond *sync.Cond
Metrics []*Metric Metrics []*Metric
nMetrics uint64 nMetrics uint64
Discard bool Discard bool
Errors []error Errors []error
debug bool debug bool
delivered chan telegraf.DeliveryInfo
} }
func (a *Accumulator) NMetrics() uint64 { func (a *Accumulator) NMetrics() uint64 {
@ -154,6 +164,33 @@ func (a *Accumulator) AddHistogram(
a.AddFields(measurement, fields, tags, timestamp...) a.AddFields(measurement, fields, tags, timestamp...)
} }
func (a *Accumulator) AddMetric(m telegraf.Metric) {
a.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
}
func (a *Accumulator) WithTracking(maxTracked int) telegraf.TrackingAccumulator {
return a
}
func (a *Accumulator) AddTrackingMetric(m telegraf.Metric) telegraf.TrackingID {
a.AddMetric(m)
return newTrackingID()
}
func (a *Accumulator) AddTrackingMetricGroup(group []telegraf.Metric) telegraf.TrackingID {
for _, m := range group {
a.AddMetric(m)
}
return newTrackingID()
}
func (a *Accumulator) Delivered() <-chan telegraf.DeliveryInfo {
if a.delivered == nil {
a.delivered = make(chan telegraf.DeliveryInfo)
}
return a.delivered
}
// AddError appends the given error to Accumulator.Errors. // AddError appends the given error to Accumulator.Errors.
func (a *Accumulator) AddError(err error) { func (a *Accumulator) AddError(err error) {
if err == nil { if err == nil {

View File

@ -41,6 +41,18 @@ func newMetricDiff(metric telegraf.Metric) *metricDiff {
return m return m
} }
func MetricEqual(expected, actual telegraf.Metric) bool {
var lhs, rhs *metricDiff
if expected != nil {
lhs = newMetricDiff(expected)
}
if actual != nil {
rhs = newMetricDiff(actual)
}
return cmp.Equal(lhs, rhs)
}
func RequireMetricEqual(t *testing.T, expected, actual telegraf.Metric) { func RequireMetricEqual(t *testing.T, expected, actual telegraf.Metric) {
t.Helper() t.Helper()
@ -60,11 +72,11 @@ func RequireMetricEqual(t *testing.T, expected, actual telegraf.Metric) {
func RequireMetricsEqual(t *testing.T, expected, actual []telegraf.Metric) { func RequireMetricsEqual(t *testing.T, expected, actual []telegraf.Metric) {
t.Helper() t.Helper()
lhs := make([]*metricDiff, len(expected)) lhs := make([]*metricDiff, 0, len(expected))
for _, m := range expected { for _, m := range expected {
lhs = append(lhs, newMetricDiff(m)) lhs = append(lhs, newMetricDiff(m))
} }
rhs := make([]*metricDiff, len(actual)) rhs := make([]*metricDiff, 0, len(actual))
for _, m := range actual { for _, m := range actual {
rhs = append(rhs, newMetricDiff(m)) rhs = append(rhs, newMetricDiff(m))
} }