Remove outputs blocking inputs when output is slow (#4938)
This commit is contained in:
parent
74667cd681
commit
6e5c2f8bb6
500
CONTRIBUTING.md
500
CONTRIBUTING.md
|
@ -1,489 +1,52 @@
|
|||
## Steps for Contributing:
|
||||
### Contributing
|
||||
|
||||
1. [Sign the CLA](http://influxdb.com/community/cla.html)
|
||||
1. Make changes or write plugin (see below for details)
|
||||
1. Add your plugin to one of: `plugins/{inputs,outputs,aggregators,processors}/all/all.go`
|
||||
1. If your plugin requires a new Go package,
|
||||
[add it](https://github.com/influxdata/telegraf/blob/master/CONTRIBUTING.md#adding-a-dependency)
|
||||
1. Write a README for your plugin, if it's an input plugin, it should be structured
|
||||
like the [input example here](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/EXAMPLE_README.md).
|
||||
Output plugins READMEs are less structured,
|
||||
but any information you can provide on how the data will look is appreciated.
|
||||
See the [OpenTSDB output](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/opentsdb)
|
||||
for a good example.
|
||||
1. **Optional:** Help users of your plugin by including example queries for populating dashboards. Include these sample queries in the `README.md` for the plugin.
|
||||
1. **Optional:** Write a [tickscript](https://docs.influxdata.com/kapacitor/v1.0/tick/syntax/) for your plugin and add it to [Kapacitor](https://github.com/influxdata/kapacitor/tree/master/examples/telegraf).
|
||||
1. [Sign the CLA][cla].
|
||||
1. Open a [new issue][] to discuss the changes you would like to make. This is
|
||||
not strictly required but it may help reduce the amount of rework you need
|
||||
to do later.
|
||||
1. Make changes or write plugin using the guidelines in the following
|
||||
documents:
|
||||
- [Input Plugins][inputs]
|
||||
- [Processor Plugins][processors]
|
||||
- [Aggregator Plugins][aggregators]
|
||||
- [Output Plugins][outputs]
|
||||
1. Ensure you have added proper unit tests and documentation.
|
||||
1. Open a new [pull request][].
|
||||
|
||||
## GoDoc
|
||||
### GoDoc
|
||||
|
||||
Public interfaces for inputs, outputs, processors, aggregators, metrics,
|
||||
and the accumulator can be found on the GoDoc
|
||||
and the accumulator can be found in the GoDoc:
|
||||
|
||||
[![GoDoc](https://godoc.org/github.com/influxdata/telegraf?status.svg)](https://godoc.org/github.com/influxdata/telegraf)
|
||||
|
||||
## Sign the CLA
|
||||
### Common development tasks
|
||||
|
||||
Before we can merge a pull request, you will need to sign the CLA,
|
||||
which can be found [on our website](http://influxdb.com/community/cla.html)
|
||||
|
||||
## Adding a dependency
|
||||
**Adding a dependency:**
|
||||
|
||||
Assuming you can already build the project, run these in the telegraf directory:
|
||||
|
||||
1. `dep ensure -vendor-only`
|
||||
2. `dep ensure -add github.com/[dependency]/[new-package]`
|
||||
|
||||
## Input Plugins
|
||||
|
||||
This section is for developers who want to create new collection inputs.
|
||||
Telegraf is entirely plugin driven. This interface allows for operators to
|
||||
pick and chose what is gathered and makes it easy for developers
|
||||
to create new ways of generating metrics.
|
||||
|
||||
Plugin authorship is kept as simple as possible to promote people to develop
|
||||
and submit new inputs.
|
||||
|
||||
### Input Plugin Guidelines
|
||||
|
||||
* A plugin must conform to the [`telegraf.Input`](https://godoc.org/github.com/influxdata/telegraf#Input) interface.
|
||||
* Input Plugins should call `inputs.Add` in their `init` function to register themselves.
|
||||
See below for a quick example.
|
||||
* Input Plugins must be added to the
|
||||
`github.com/influxdata/telegraf/plugins/inputs/all/all.go` file.
|
||||
* The `SampleConfig` function should return valid toml that describes how the
|
||||
plugin can be configured. This is included in `telegraf config`. Please
|
||||
consult the [SampleConfig](https://github.com/influxdata/telegraf/wiki/SampleConfig)
|
||||
page for the latest style guidelines.
|
||||
* The `Description` function should say in one line what this plugin does.
|
||||
|
||||
Let's say you've written a plugin that emits metrics about processes on the
|
||||
current host.
|
||||
|
||||
### Input Plugin Example
|
||||
|
||||
```go
|
||||
package simple
|
||||
|
||||
// simple.go
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
type Simple struct {
|
||||
Ok bool
|
||||
}
|
||||
|
||||
func (s *Simple) Description() string {
|
||||
return "a demo plugin"
|
||||
}
|
||||
|
||||
func (s *Simple) SampleConfig() string {
|
||||
return `
|
||||
## Indicate if everything is fine
|
||||
ok = true
|
||||
`
|
||||
}
|
||||
|
||||
func (s *Simple) Gather(acc telegraf.Accumulator) error {
|
||||
if s.Ok {
|
||||
acc.AddFields("state", map[string]interface{}{"value": "pretty good"}, nil)
|
||||
} else {
|
||||
acc.AddFields("state", map[string]interface{}{"value": "not great"}, nil)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("simple", func() telegraf.Input { return &Simple{} })
|
||||
}
|
||||
```
|
||||
|
||||
### Input Plugin Development
|
||||
|
||||
* Run `make static` followed by `make plugin-[pluginName]` to spin up a docker dev environment
|
||||
using docker-compose.
|
||||
* ***[Optional]*** When developing a plugin, add a `dev` directory with a `docker-compose.yml` and `telegraf.conf`
|
||||
as well as any other supporting files, where sensible.
|
||||
|
||||
## Adding Typed Metrics
|
||||
|
||||
In addition the the `AddFields` function, the accumulator also supports an
|
||||
`AddGauge` and `AddCounter` function. These functions are for adding _typed_
|
||||
metrics. Metric types are ignored for the InfluxDB output, but can be used
|
||||
for other outputs, such as [prometheus](https://prometheus.io/docs/concepts/metric_types/).
|
||||
|
||||
## Input Plugins Accepting Arbitrary Data Formats
|
||||
|
||||
Some input plugins (such as
|
||||
[exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec))
|
||||
accept arbitrary input data formats. An overview of these data formats can
|
||||
be found
|
||||
[here](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
|
||||
|
||||
In order to enable this, you must specify a `SetParser(parser parsers.Parser)`
|
||||
function on the plugin object (see the exec plugin for an example), as well as
|
||||
defining `parser` as a field of the object.
|
||||
|
||||
You can then utilize the parser internally in your plugin, parsing data as you
|
||||
see fit. Telegraf's configuration layer will take care of instantiating and
|
||||
creating the `Parser` object.
|
||||
|
||||
You should also add the following to your SampleConfig() return:
|
||||
|
||||
```toml
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||
data_format = "influx"
|
||||
```
|
||||
|
||||
Below is the `Parser` interface.
|
||||
|
||||
```go
|
||||
// Parser is an interface defining functions that a parser plugin must satisfy.
|
||||
type Parser interface {
|
||||
// Parse takes a byte buffer separated by newlines
|
||||
// ie, `cpu.usage.idle 90\ncpu.usage.busy 10`
|
||||
// and parses it into telegraf metrics
|
||||
Parse(buf []byte) ([]telegraf.Metric, error)
|
||||
|
||||
// ParseLine takes a single string metric
|
||||
// ie, "cpu.usage.idle 90"
|
||||
// and parses it into a telegraf metric.
|
||||
ParseLine(line string) (telegraf.Metric, error)
|
||||
}
|
||||
```
|
||||
|
||||
And you can view the code
|
||||
[here.](https://github.com/influxdata/telegraf/blob/henrypfhu-master/plugins/parsers/registry.go)
|
||||
|
||||
## Service Input Plugins
|
||||
|
||||
This section is for developers who want to create new "service" collection
|
||||
inputs. A service plugin differs from a regular plugin in that it operates
|
||||
a background service while Telegraf is running. One example would be the `statsd`
|
||||
plugin, which operates a statsd server.
|
||||
|
||||
Service Input Plugins are substantially more complicated than a regular plugin, as they
|
||||
will require threads and locks to verify data integrity. Service Input Plugins should
|
||||
be avoided unless there is no way to create their behavior with a regular plugin.
|
||||
|
||||
Their interface is quite similar to a regular plugin, with the addition of `Start()`
|
||||
and `Stop()` methods.
|
||||
|
||||
### Service Plugin Guidelines
|
||||
|
||||
* Same as the `Plugin` guidelines, except that they must conform to the
|
||||
[`telegraf.ServiceInput`](https://godoc.org/github.com/influxdata/telegraf#ServiceInput) interface.
|
||||
|
||||
## Output Plugins
|
||||
|
||||
This section is for developers who want to create a new output sink. Outputs
|
||||
are created in a similar manner as collection plugins, and their interface has
|
||||
similar constructs.
|
||||
|
||||
### Output Plugin Guidelines
|
||||
|
||||
* An output must conform to the [`telegraf.Output`](https://godoc.org/github.com/influxdata/telegraf#Output) interface.
|
||||
* Outputs should call `outputs.Add` in their `init` function to register themselves.
|
||||
See below for a quick example.
|
||||
* To be available within Telegraf itself, plugins must add themselves to the
|
||||
`github.com/influxdata/telegraf/plugins/outputs/all/all.go` file.
|
||||
* The `SampleConfig` function should return valid toml that describes how the
|
||||
plugin can be configured. This is included in `telegraf config`. Please
|
||||
consult the [SampleConfig](https://github.com/influxdata/telegraf/wiki/SampleConfig)
|
||||
page for the latest style guidelines.
|
||||
* The `Description` function should say in one line what this output does.
|
||||
|
||||
### Output Example
|
||||
|
||||
```go
|
||||
package simpleoutput
|
||||
|
||||
// simpleoutput.go
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/outputs"
|
||||
)
|
||||
|
||||
type Simple struct {
|
||||
Ok bool
|
||||
}
|
||||
|
||||
func (s *Simple) Description() string {
|
||||
return "a demo output"
|
||||
}
|
||||
|
||||
func (s *Simple) SampleConfig() string {
|
||||
return `
|
||||
ok = true
|
||||
`
|
||||
}
|
||||
|
||||
func (s *Simple) Connect() error {
|
||||
// Make a connection to the URL here
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Simple) Close() error {
|
||||
// Close connection to the URL here
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Simple) Write(metrics []telegraf.Metric) error {
|
||||
for _, metric := range metrics {
|
||||
// write `metric` to the output sink here
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
outputs.Add("simpleoutput", func() telegraf.Output { return &Simple{} })
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Output Plugins Writing Arbitrary Data Formats
|
||||
|
||||
Some output plugins (such as
|
||||
[file](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/file))
|
||||
can write arbitrary output data formats. An overview of these data formats can
|
||||
be found
|
||||
[here](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md).
|
||||
|
||||
In order to enable this, you must specify a
|
||||
`SetSerializer(serializer serializers.Serializer)`
|
||||
function on the plugin object (see the file plugin for an example), as well as
|
||||
defining `serializer` as a field of the object.
|
||||
|
||||
You can then utilize the serializer internally in your plugin, serializing data
|
||||
before it's written. Telegraf's configuration layer will take care of
|
||||
instantiating and creating the `Serializer` object.
|
||||
|
||||
You should also add the following to your SampleConfig() return:
|
||||
|
||||
```toml
|
||||
## Data format to output.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
|
||||
data_format = "influx"
|
||||
```
|
||||
|
||||
## Service Output Plugins
|
||||
|
||||
This section is for developers who want to create new "service" output. A
|
||||
service output differs from a regular output in that it operates a background service
|
||||
while Telegraf is running. One example would be the `prometheus_client` output,
|
||||
which operates an HTTP server.
|
||||
|
||||
Their interface is quite similar to a regular output, with the addition of `Start()`
|
||||
and `Stop()` methods.
|
||||
|
||||
### Service Output Guidelines
|
||||
|
||||
* Same as the `Output` guidelines, except that they must conform to the
|
||||
`output.ServiceOutput` interface.
|
||||
|
||||
## Processor Plugins
|
||||
|
||||
This section is for developers who want to create a new processor plugin.
|
||||
|
||||
### Processor Plugin Guidelines
|
||||
|
||||
* A processor must conform to the [`telegraf.Processor`](https://godoc.org/github.com/influxdata/telegraf#Processor) interface.
|
||||
* Processors should call `processors.Add` in their `init` function to register themselves.
|
||||
See below for a quick example.
|
||||
* To be available within Telegraf itself, plugins must add themselves to the
|
||||
`github.com/influxdata/telegraf/plugins/processors/all/all.go` file.
|
||||
* The `SampleConfig` function should return valid toml that describes how the
|
||||
processor can be configured. This is include in the output of `telegraf config`.
|
||||
* The `Description` function should say in one line what this processor does.
|
||||
|
||||
### Processor Example
|
||||
|
||||
```go
|
||||
package printer
|
||||
|
||||
// printer.go
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/processors"
|
||||
)
|
||||
|
||||
type Printer struct {
|
||||
}
|
||||
|
||||
var sampleConfig = `
|
||||
`
|
||||
|
||||
func (p *Printer) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (p *Printer) Description() string {
|
||||
return "Print all metrics that pass through this filter."
|
||||
}
|
||||
|
||||
func (p *Printer) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
||||
for _, metric := range in {
|
||||
fmt.Println(metric.String())
|
||||
}
|
||||
return in
|
||||
}
|
||||
|
||||
func init() {
|
||||
processors.Add("printer", func() telegraf.Processor {
|
||||
return &Printer{}
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
## Aggregator Plugins
|
||||
|
||||
This section is for developers who want to create a new aggregator plugin.
|
||||
|
||||
### Aggregator Plugin Guidelines
|
||||
|
||||
* A aggregator must conform to the [`telegraf.Aggregator`](https://godoc.org/github.com/influxdata/telegraf#Aggregator) interface.
|
||||
* Aggregators should call `aggregators.Add` in their `init` function to register themselves.
|
||||
See below for a quick example.
|
||||
* To be available within Telegraf itself, plugins must add themselves to the
|
||||
`github.com/influxdata/telegraf/plugins/aggregators/all/all.go` file.
|
||||
* The `SampleConfig` function should return valid toml that describes how the
|
||||
aggregator can be configured. This is include in `telegraf config`.
|
||||
* The `Description` function should say in one line what this aggregator does.
|
||||
* The Aggregator plugin will need to keep caches of metrics that have passed
|
||||
through it. This should be done using the builtin `HashID()` function of each
|
||||
metric.
|
||||
* When the `Reset()` function is called, all caches should be cleared.
|
||||
|
||||
### Aggregator Example
|
||||
|
||||
```go
|
||||
package min
|
||||
|
||||
// min.go
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/aggregators"
|
||||
)
|
||||
|
||||
type Min struct {
|
||||
// caches for metric fields, names, and tags
|
||||
fieldCache map[uint64]map[string]float64
|
||||
nameCache map[uint64]string
|
||||
tagCache map[uint64]map[string]string
|
||||
}
|
||||
|
||||
func NewMin() telegraf.Aggregator {
|
||||
m := &Min{}
|
||||
m.Reset()
|
||||
return m
|
||||
}
|
||||
|
||||
var sampleConfig = `
|
||||
## period is the flush & clear interval of the aggregator.
|
||||
period = "30s"
|
||||
## If true drop_original will drop the original metrics and
|
||||
## only send aggregates.
|
||||
drop_original = false
|
||||
`
|
||||
|
||||
func (m *Min) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (m *Min) Description() string {
|
||||
return "Keep the aggregate min of each metric passing through."
|
||||
}
|
||||
|
||||
func (m *Min) Add(in telegraf.Metric) {
|
||||
id := in.HashID()
|
||||
if _, ok := m.nameCache[id]; !ok {
|
||||
// hit an uncached metric, create caches for first time:
|
||||
m.nameCache[id] = in.Name()
|
||||
m.tagCache[id] = in.Tags()
|
||||
m.fieldCache[id] = make(map[string]float64)
|
||||
for k, v := range in.Fields() {
|
||||
if fv, ok := convert(v); ok {
|
||||
m.fieldCache[id][k] = fv
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for k, v := range in.Fields() {
|
||||
if fv, ok := convert(v); ok {
|
||||
if _, ok := m.fieldCache[id][k]; !ok {
|
||||
// hit an uncached field of a cached metric
|
||||
m.fieldCache[id][k] = fv
|
||||
continue
|
||||
}
|
||||
if fv < m.fieldCache[id][k] {
|
||||
// set new minimum
|
||||
m.fieldCache[id][k] = fv
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Min) Push(acc telegraf.Accumulator) {
|
||||
for id, _ := range m.nameCache {
|
||||
fields := map[string]interface{}{}
|
||||
for k, v := range m.fieldCache[id] {
|
||||
fields[k+"_min"] = v
|
||||
}
|
||||
acc.AddFields(m.nameCache[id], fields, m.tagCache[id])
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Min) Reset() {
|
||||
m.fieldCache = make(map[uint64]map[string]float64)
|
||||
m.nameCache = make(map[uint64]string)
|
||||
m.tagCache = make(map[uint64]map[string]string)
|
||||
}
|
||||
|
||||
func convert(in interface{}) (float64, bool) {
|
||||
switch v := in.(type) {
|
||||
case float64:
|
||||
return v, true
|
||||
case int64:
|
||||
return float64(v), true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
aggregators.Add("min", func() telegraf.Aggregator {
|
||||
return NewMin()
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
## Unit Tests
|
||||
**Unit Tests:**
|
||||
|
||||
Before opening a pull request you should run the linter checks and
|
||||
the short tests.
|
||||
|
||||
### Execute linter
|
||||
**Run static analysis:**
|
||||
|
||||
execute `make check`
|
||||
```
|
||||
make check
|
||||
```
|
||||
|
||||
### Execute short tests
|
||||
**Run short tests:**
|
||||
|
||||
execute `make test`
|
||||
```
|
||||
make test
|
||||
```
|
||||
|
||||
### Execute integration tests
|
||||
**Execute integration tests:**
|
||||
|
||||
Running the integration tests requires several docker containers to be
|
||||
running. You can start the containers with:
|
||||
|
@ -497,3 +60,12 @@ make test-all
|
|||
```
|
||||
|
||||
Use `make docker-kill` to stop the containers.
|
||||
|
||||
|
||||
[cla]: https://www.influxdata.com/legal/cla/
|
||||
[new issue]: https://github.com/influxdata/telegraf/issues/new/choose
|
||||
[pull request]: https://github.com/influxdata/telegraf/compare
|
||||
[inputs]: /docs/INPUTS.md
|
||||
[processors]: /docs/PROCESSORS.md
|
||||
[aggregators]: /docs/AGGREGATORS.md
|
||||
[outputs]: /docs/OUTPUTS.md
|
||||
|
|
|
@ -1,16 +1,14 @@
|
|||
package telegraf
|
||||
|
||||
import "time"
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// Accumulator is an interface for "accumulating" metrics from plugin(s).
|
||||
// The metrics are sent down a channel shared between all plugins.
|
||||
// Accumulator allows adding metrics to the processing flow.
|
||||
type Accumulator interface {
|
||||
// AddFields adds a metric to the accumulator with the given measurement
|
||||
// name, fields, and tags (and timestamp). If a timestamp is not provided,
|
||||
// then the accumulator sets it to "now".
|
||||
// Create a point with a value, decorating it with tags
|
||||
// NOTE: tags is expected to be owned by the caller, don't mutate
|
||||
// it after passing to Add.
|
||||
AddFields(measurement string,
|
||||
fields map[string]interface{},
|
||||
tags map[string]string,
|
||||
|
@ -40,7 +38,49 @@ type Accumulator interface {
|
|||
tags map[string]string,
|
||||
t ...time.Time)
|
||||
|
||||
// AddMetric adds an metric to the accumulator.
|
||||
AddMetric(Metric)
|
||||
|
||||
// SetPrecision takes two time.Duration objects. If the first is non-zero,
|
||||
// it sets that as the precision. Otherwise, it takes the second argument
|
||||
// as the order of time that the metrics should be rounded to, with the
|
||||
// maximum being 1s.
|
||||
SetPrecision(precision, interval time.Duration)
|
||||
|
||||
// Report an error.
|
||||
AddError(err error)
|
||||
|
||||
// Upgrade to a TrackingAccumulator with space for maxTracked
|
||||
// metrics/batches.
|
||||
WithTracking(maxTracked int) TrackingAccumulator
|
||||
}
|
||||
|
||||
// TrackingID uniquely identifies a tracked metric group
|
||||
type TrackingID uint64
|
||||
|
||||
// DeliveryInfo provides the results of a delivered metric group.
|
||||
type DeliveryInfo interface {
|
||||
// ID is the TrackingID
|
||||
ID() TrackingID
|
||||
|
||||
// Delivered returns true if the metric was processed successfully.
|
||||
Delivered() bool
|
||||
}
|
||||
|
||||
// TrackingAccumulator is an Accumulator that provides a signal when the
|
||||
// metric has been fully processed. Sending more metrics than the accumulator
|
||||
// has been allocated for without reading status from the Accepted or Rejected
|
||||
// channels is an error.
|
||||
type TrackingAccumulator interface {
|
||||
Accumulator
|
||||
|
||||
// Add the Metric and arrange for tracking feedback after processing..
|
||||
AddTrackingMetric(m Metric) TrackingID
|
||||
|
||||
// Add a group of Metrics and arrange for a signal when the group has been
|
||||
// processed.
|
||||
AddTrackingMetricGroup(group []Metric) TrackingID
|
||||
|
||||
// Delivered returns a channel that will contain the tracking results.
|
||||
Delivered() <-chan DeliveryInfo
|
||||
}
|
||||
|
|
|
@ -20,13 +20,13 @@ type MetricMaker interface {
|
|||
|
||||
type accumulator struct {
|
||||
maker MetricMaker
|
||||
metrics chan telegraf.Metric
|
||||
metrics chan<- telegraf.Metric
|
||||
precision time.Duration
|
||||
}
|
||||
|
||||
func NewAccumulator(
|
||||
maker MetricMaker,
|
||||
metrics chan telegraf.Metric,
|
||||
metrics chan<- telegraf.Metric,
|
||||
) telegraf.Accumulator {
|
||||
acc := accumulator{
|
||||
maker: maker,
|
||||
|
@ -42,7 +42,7 @@ func (ac *accumulator) AddFields(
|
|||
tags map[string]string,
|
||||
t ...time.Time,
|
||||
) {
|
||||
ac.addMetric(measurement, tags, fields, telegraf.Untyped, t...)
|
||||
ac.addFields(measurement, tags, fields, telegraf.Untyped, t...)
|
||||
}
|
||||
|
||||
func (ac *accumulator) AddGauge(
|
||||
|
@ -51,7 +51,7 @@ func (ac *accumulator) AddGauge(
|
|||
tags map[string]string,
|
||||
t ...time.Time,
|
||||
) {
|
||||
ac.addMetric(measurement, tags, fields, telegraf.Gauge, t...)
|
||||
ac.addFields(measurement, tags, fields, telegraf.Gauge, t...)
|
||||
}
|
||||
|
||||
func (ac *accumulator) AddCounter(
|
||||
|
@ -60,7 +60,7 @@ func (ac *accumulator) AddCounter(
|
|||
tags map[string]string,
|
||||
t ...time.Time,
|
||||
) {
|
||||
ac.addMetric(measurement, tags, fields, telegraf.Counter, t...)
|
||||
ac.addFields(measurement, tags, fields, telegraf.Counter, t...)
|
||||
}
|
||||
|
||||
func (ac *accumulator) AddSummary(
|
||||
|
@ -69,7 +69,7 @@ func (ac *accumulator) AddSummary(
|
|||
tags map[string]string,
|
||||
t ...time.Time,
|
||||
) {
|
||||
ac.addMetric(measurement, tags, fields, telegraf.Summary, t...)
|
||||
ac.addFields(measurement, tags, fields, telegraf.Summary, t...)
|
||||
}
|
||||
|
||||
func (ac *accumulator) AddHistogram(
|
||||
|
@ -78,10 +78,16 @@ func (ac *accumulator) AddHistogram(
|
|||
tags map[string]string,
|
||||
t ...time.Time,
|
||||
) {
|
||||
ac.addMetric(measurement, tags, fields, telegraf.Histogram, t...)
|
||||
ac.addFields(measurement, tags, fields, telegraf.Histogram, t...)
|
||||
}
|
||||
|
||||
func (ac *accumulator) addMetric(
|
||||
func (ac *accumulator) AddMetric(m telegraf.Metric) {
|
||||
if m := ac.maker.MakeMetric(m); m != nil {
|
||||
ac.metrics <- m
|
||||
}
|
||||
}
|
||||
|
||||
func (ac *accumulator) addFields(
|
||||
measurement string,
|
||||
tags map[string]string,
|
||||
fields map[string]interface{},
|
||||
|
@ -104,13 +110,9 @@ func (ac *accumulator) AddError(err error) {
|
|||
return
|
||||
}
|
||||
NErrors.Incr(1)
|
||||
log.Printf("E! Error in plugin [%s]: %s", ac.maker.Name(), err)
|
||||
log.Printf("E! [%s]: Error in plugin: %v", ac.maker.Name(), err)
|
||||
}
|
||||
|
||||
// SetPrecision takes two time.Duration objects. If the first is non-zero,
|
||||
// it sets that as the precision. Otherwise, it takes the second argument
|
||||
// as the order of time that the metrics should be rounded to, with the
|
||||
// maximum being 1s.
|
||||
func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
|
||||
if precision > 0 {
|
||||
ac.precision = precision
|
||||
|
@ -128,7 +130,7 @@ func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
|
|||
}
|
||||
}
|
||||
|
||||
func (ac accumulator) getTime(t []time.Time) time.Time {
|
||||
func (ac *accumulator) getTime(t []time.Time) time.Time {
|
||||
var timestamp time.Time
|
||||
if len(t) > 0 {
|
||||
timestamp = t[0]
|
||||
|
@ -137,3 +139,43 @@ func (ac accumulator) getTime(t []time.Time) time.Time {
|
|||
}
|
||||
return timestamp.Round(ac.precision)
|
||||
}
|
||||
|
||||
func (ac *accumulator) WithTracking(maxTracked int) telegraf.TrackingAccumulator {
|
||||
return &trackingAccumulator{
|
||||
Accumulator: ac,
|
||||
delivered: make(chan telegraf.DeliveryInfo, maxTracked),
|
||||
}
|
||||
}
|
||||
|
||||
type trackingAccumulator struct {
|
||||
telegraf.Accumulator
|
||||
delivered chan telegraf.DeliveryInfo
|
||||
}
|
||||
|
||||
func (a *trackingAccumulator) AddTrackingMetric(m telegraf.Metric) telegraf.TrackingID {
|
||||
dm, id := metric.WithTracking(m, a.onDelivery)
|
||||
a.AddMetric(dm)
|
||||
return id
|
||||
}
|
||||
|
||||
func (a *trackingAccumulator) AddTrackingMetricGroup(group []telegraf.Metric) telegraf.TrackingID {
|
||||
db, id := metric.WithGroupTracking(group, a.onDelivery)
|
||||
for _, m := range db {
|
||||
a.AddMetric(m)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
func (a *trackingAccumulator) Delivered() <-chan telegraf.DeliveryInfo {
|
||||
return a.delivered
|
||||
}
|
||||
|
||||
func (a *trackingAccumulator) onDelivery(info telegraf.DeliveryInfo) {
|
||||
select {
|
||||
case a.delivered <- info:
|
||||
default:
|
||||
// This is a programming error in the input. More items were sent for
|
||||
// tracking than space requested.
|
||||
panic("channel is full")
|
||||
}
|
||||
}
|
||||
|
|
898
agent/agent.go
898
agent/agent.go
|
@ -1,9 +1,9 @@
|
|||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
@ -12,187 +12,157 @@ import (
|
|||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/internal/config"
|
||||
"github.com/influxdata/telegraf/internal/models"
|
||||
"github.com/influxdata/telegraf/selfstat"
|
||||
"github.com/influxdata/telegraf/plugins/serializers/influx"
|
||||
)
|
||||
|
||||
// Agent runs telegraf and collects data based on the given config
|
||||
// Agent runs a set of plugins.
|
||||
type Agent struct {
|
||||
Config *config.Config
|
||||
}
|
||||
|
||||
// NewAgent returns an Agent struct based off the given Config
|
||||
// NewAgent returns an Agent for the given Config.
|
||||
func NewAgent(config *config.Config) (*Agent, error) {
|
||||
a := &Agent{
|
||||
Config: config,
|
||||
}
|
||||
|
||||
if !a.Config.Agent.OmitHostname {
|
||||
if a.Config.Agent.Hostname == "" {
|
||||
hostname, err := os.Hostname()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
a.Config.Agent.Hostname = hostname
|
||||
}
|
||||
|
||||
config.Tags["host"] = a.Config.Agent.Hostname
|
||||
}
|
||||
|
||||
return a, nil
|
||||
}
|
||||
|
||||
// Connect connects to all configured outputs
|
||||
func (a *Agent) Connect() error {
|
||||
for _, o := range a.Config.Outputs {
|
||||
switch ot := o.Output.(type) {
|
||||
case telegraf.ServiceOutput:
|
||||
if err := ot.Start(); err != nil {
|
||||
log.Printf("E! Service for output %s failed to start, exiting\n%s\n",
|
||||
o.Name, err.Error())
|
||||
return err
|
||||
}
|
||||
// Run starts and runs the Agent until the context is done.
|
||||
func (a *Agent) Run(ctx context.Context) error {
|
||||
log.Printf("I! [agent] Config: Interval:%s, Quiet:%#v, Hostname:%#v, "+
|
||||
"Flush Interval:%s",
|
||||
a.Config.Agent.Interval.Duration, a.Config.Agent.Quiet,
|
||||
a.Config.Agent.Hostname, a.Config.Agent.FlushInterval.Duration)
|
||||
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
log.Printf("D! [agent] Connecting outputs")
|
||||
err := a.connectOutputs(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
inputC := make(chan telegraf.Metric, 100)
|
||||
procC := make(chan telegraf.Metric, 100)
|
||||
outputC := make(chan telegraf.Metric, 100)
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
log.Printf("D! [agent] Starting service inputs")
|
||||
err = a.startServiceInputs(ctx, inputC)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
src := inputC
|
||||
dst := inputC
|
||||
|
||||
wg.Add(1)
|
||||
go func(dst chan telegraf.Metric) {
|
||||
defer wg.Done()
|
||||
|
||||
err := a.runInputs(ctx, startTime, dst)
|
||||
if err != nil {
|
||||
log.Printf("E! [agent] Error running inputs: %v", err)
|
||||
}
|
||||
|
||||
log.Printf("D! Attempting connection to output: %s\n", o.Name)
|
||||
err := o.Output.Connect()
|
||||
if err != nil {
|
||||
log.Printf("E! Failed to connect to output %s, retrying in 15s, "+
|
||||
"error was '%s' \n", o.Name, err)
|
||||
time.Sleep(15 * time.Second)
|
||||
err = o.Output.Connect()
|
||||
log.Printf("D! [agent] Stopping service inputs")
|
||||
a.stopServiceInputs()
|
||||
|
||||
close(dst)
|
||||
log.Printf("D! [agent] Input channel closed")
|
||||
}(dst)
|
||||
|
||||
src = dst
|
||||
|
||||
if len(a.Config.Processors) > 0 {
|
||||
dst = procC
|
||||
|
||||
wg.Add(1)
|
||||
go func(src, dst chan telegraf.Metric) {
|
||||
defer wg.Done()
|
||||
|
||||
err := a.runProcessors(src, dst)
|
||||
if err != nil {
|
||||
return err
|
||||
log.Printf("E! [agent] Error running processors: %v", err)
|
||||
}
|
||||
}
|
||||
log.Printf("D! Successfully connected to output: %s\n", o.Name)
|
||||
close(dst)
|
||||
log.Printf("D! [agent] Processor channel closed")
|
||||
}(src, dst)
|
||||
|
||||
src = dst
|
||||
}
|
||||
|
||||
if len(a.Config.Aggregators) > 0 {
|
||||
dst = outputC
|
||||
|
||||
wg.Add(1)
|
||||
go func(src, dst chan telegraf.Metric) {
|
||||
defer wg.Done()
|
||||
|
||||
err := a.runAggregators(startTime, src, dst)
|
||||
if err != nil {
|
||||
log.Printf("E! [agent] Error running aggregators: %v", err)
|
||||
}
|
||||
close(dst)
|
||||
log.Printf("D! [agent] Output channel closed")
|
||||
}(src, dst)
|
||||
|
||||
src = dst
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func(src chan telegraf.Metric) {
|
||||
defer wg.Done()
|
||||
|
||||
err := a.runOutputs(startTime, src)
|
||||
if err != nil {
|
||||
log.Printf("E! [agent] Error running outputs: %v", err)
|
||||
}
|
||||
}(src)
|
||||
|
||||
wg.Wait()
|
||||
|
||||
log.Printf("D! [agent] Closing outputs")
|
||||
err = a.closeOutputs()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes the connection to all configured outputs
|
||||
func (a *Agent) Close() error {
|
||||
var err error
|
||||
for _, o := range a.Config.Outputs {
|
||||
err = o.Output.Close()
|
||||
switch ot := o.Output.(type) {
|
||||
case telegraf.ServiceOutput:
|
||||
ot.Stop()
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func panicRecover(input *models.RunningInput) {
|
||||
if err := recover(); err != nil {
|
||||
trace := make([]byte, 2048)
|
||||
runtime.Stack(trace, true)
|
||||
log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
|
||||
input.Name(), err, trace)
|
||||
log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
|
||||
"stack trace, configuration, and OS information: " +
|
||||
"https://github.com/influxdata/telegraf/issues/new")
|
||||
}
|
||||
}
|
||||
|
||||
// gatherer runs the inputs that have been configured with their own
|
||||
// reporting interval.
|
||||
func (a *Agent) gatherer(
|
||||
shutdown chan struct{},
|
||||
input *models.RunningInput,
|
||||
interval time.Duration,
|
||||
metricC chan telegraf.Metric,
|
||||
) {
|
||||
defer panicRecover(input)
|
||||
|
||||
GatherTime := selfstat.RegisterTiming("gather",
|
||||
"gather_time_ns",
|
||||
map[string]string{"input": input.Config.Name},
|
||||
)
|
||||
|
||||
acc := NewAccumulator(input, metricC)
|
||||
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
||||
a.Config.Agent.Interval.Duration)
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
internal.RandomSleep(a.Config.Agent.CollectionJitter.Duration, shutdown)
|
||||
|
||||
start := time.Now()
|
||||
gatherWithTimeout(shutdown, input, acc, interval)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
GatherTime.Incr(elapsed.Nanoseconds())
|
||||
|
||||
select {
|
||||
case <-shutdown:
|
||||
return
|
||||
case <-ticker.C:
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// gatherWithTimeout gathers from the given input, with the given timeout.
|
||||
// when the given timeout is reached, gatherWithTimeout logs an error message
|
||||
// but continues waiting for it to return. This is to avoid leaving behind
|
||||
// hung processes, and to prevent re-calling the same hung process over and
|
||||
// over.
|
||||
func gatherWithTimeout(
|
||||
shutdown chan struct{},
|
||||
input *models.RunningInput,
|
||||
acc telegraf.Accumulator,
|
||||
timeout time.Duration,
|
||||
) {
|
||||
ticker := time.NewTicker(timeout)
|
||||
defer ticker.Stop()
|
||||
done := make(chan error)
|
||||
go func() {
|
||||
done <- input.Input.Gather(acc)
|
||||
// Test runs the inputs once and prints the output to stdout in line protocol.
|
||||
func (a *Agent) Test() error {
|
||||
var wg sync.WaitGroup
|
||||
metricC := make(chan telegraf.Metric)
|
||||
defer func() {
|
||||
close(metricC)
|
||||
wg.Wait()
|
||||
}()
|
||||
|
||||
for {
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
acc.AddError(err)
|
||||
}
|
||||
return
|
||||
case <-ticker.C:
|
||||
err := fmt.Errorf("took longer to collect than collection interval (%s)",
|
||||
timeout)
|
||||
acc.AddError(err)
|
||||
continue
|
||||
case <-shutdown:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test verifies that we can 'Gather' from all inputs with their configured
|
||||
// Config struct
|
||||
func (a *Agent) Test() error {
|
||||
shutdown := make(chan struct{})
|
||||
defer close(shutdown)
|
||||
metricC := make(chan telegraf.Metric)
|
||||
|
||||
// dummy receiver for the point channel
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-metricC:
|
||||
// do nothing
|
||||
case <-shutdown:
|
||||
return
|
||||
defer wg.Done()
|
||||
|
||||
s := influx.NewSerializer()
|
||||
s.SetFieldSortOrder(influx.SortFields)
|
||||
for metric := range metricC {
|
||||
octets, err := s.Serialize(metric)
|
||||
if err == nil {
|
||||
fmt.Print("> ", string(octets))
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
for _, input := range a.Config.Inputs {
|
||||
if _, ok := input.Input.(telegraf.ServiceInput); ok {
|
||||
fmt.Printf("\nWARNING: skipping plugin [[%s]]: service inputs not supported in --test mode\n",
|
||||
log.Printf("W!: [agent] skipping plugin [[%s]]: service inputs not supported in --test mode",
|
||||
input.Name())
|
||||
continue
|
||||
}
|
||||
|
@ -200,7 +170,6 @@ func (a *Agent) Test() error {
|
|||
acc := NewAccumulator(input, metricC)
|
||||
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
||||
a.Config.Agent.Interval.Duration)
|
||||
input.SetTrace(true)
|
||||
input.SetDefaultTags(a.Config.Tags)
|
||||
|
||||
if err := input.Input.Gather(acc); err != nil {
|
||||
|
@ -218,216 +187,445 @@ func (a *Agent) Test() error {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// flush writes a list of metrics to all configured outputs
|
||||
func (a *Agent) flush() {
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Add(len(a.Config.Outputs))
|
||||
for _, o := range a.Config.Outputs {
|
||||
go func(output *models.RunningOutput) {
|
||||
defer wg.Done()
|
||||
err := output.Write()
|
||||
if err != nil {
|
||||
log.Printf("E! Error writing to output [%s]: %s\n",
|
||||
output.Name, err.Error())
|
||||
}
|
||||
}(o)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// flusher monitors the metrics input channel and flushes on the minimum interval
|
||||
func (a *Agent) flusher(
|
||||
shutdown chan struct{},
|
||||
metricC chan telegraf.Metric,
|
||||
aggMetricC chan telegraf.Metric,
|
||||
outMetricC chan telegraf.Metric,
|
||||
// runInputs starts and triggers the periodic gather for Inputs.
|
||||
//
|
||||
// When the context is done the timers are stopped and this function returns
|
||||
// after all ongoing Gather calls complete.
|
||||
func (a *Agent) runInputs(
|
||||
ctx context.Context,
|
||||
startTime time.Time,
|
||||
dst chan<- telegraf.Metric,
|
||||
) error {
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case <-shutdown:
|
||||
if len(outMetricC) > 0 {
|
||||
// keep going until channel is empty
|
||||
continue
|
||||
}
|
||||
return
|
||||
case metric := <-outMetricC:
|
||||
for i, o := range a.Config.Outputs {
|
||||
if i == len(a.Config.Outputs)-1 {
|
||||
o.AddMetric(metric)
|
||||
} else {
|
||||
o.AddMetric(metric.Copy())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for metric := range aggMetricC {
|
||||
// Apply Processors
|
||||
metrics := []telegraf.Metric{metric}
|
||||
for _, processor := range a.Config.Processors {
|
||||
metrics = processor.Apply(metrics...)
|
||||
}
|
||||
outMetricC <- metric
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case <-shutdown:
|
||||
if len(metricC) > 0 {
|
||||
// keep going until channel is empty
|
||||
continue
|
||||
}
|
||||
close(aggMetricC)
|
||||
return
|
||||
case metric := <-metricC:
|
||||
// Apply Processors
|
||||
metrics := []telegraf.Metric{metric}
|
||||
for _, processor := range a.Config.Processors {
|
||||
metrics = processor.Apply(metrics...)
|
||||
}
|
||||
|
||||
for _, metric := range metrics {
|
||||
// Apply Aggregators
|
||||
var dropOriginal bool
|
||||
for _, agg := range a.Config.Aggregators {
|
||||
if ok := agg.Add(metric.Copy()); ok {
|
||||
dropOriginal = true
|
||||
}
|
||||
}
|
||||
|
||||
// Forward metric to Outputs
|
||||
if !dropOriginal {
|
||||
outMetricC <- metric
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
ticker := time.NewTicker(a.Config.Agent.FlushInterval.Duration)
|
||||
semaphore := make(chan struct{}, 1)
|
||||
for {
|
||||
select {
|
||||
case <-shutdown:
|
||||
log.Println("I! Hang on, flushing any cached metrics before shutdown")
|
||||
// wait for outMetricC to get flushed before flushing outputs
|
||||
wg.Wait()
|
||||
a.flush()
|
||||
return nil
|
||||
case <-ticker.C:
|
||||
go func() {
|
||||
select {
|
||||
case semaphore <- struct{}{}:
|
||||
internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown)
|
||||
a.flush()
|
||||
<-semaphore
|
||||
default:
|
||||
// skipping this flush because one is already happening
|
||||
log.Println("W! Skipping a scheduled flush because there is" +
|
||||
" already a flush ongoing.")
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run runs the agent daemon, gathering every Interval
|
||||
func (a *Agent) Run(shutdown chan struct{}) error {
|
||||
var wg sync.WaitGroup
|
||||
|
||||
log.Printf("I! Agent Config: Interval:%s, Quiet:%#v, Hostname:%#v, "+
|
||||
"Flush Interval:%s \n",
|
||||
a.Config.Agent.Interval.Duration, a.Config.Agent.Quiet,
|
||||
a.Config.Agent.Hostname, a.Config.Agent.FlushInterval.Duration)
|
||||
|
||||
// Channel shared between all input threads for accumulating metrics
|
||||
metricC := make(chan telegraf.Metric, 100)
|
||||
|
||||
// Channel for metrics ready to be output
|
||||
outMetricC := make(chan telegraf.Metric, 100)
|
||||
|
||||
// Channel for aggregated metrics
|
||||
aggMetricC := make(chan telegraf.Metric, 100)
|
||||
|
||||
// Round collection to nearest interval by sleeping
|
||||
if a.Config.Agent.RoundInterval {
|
||||
i := int64(a.Config.Agent.Interval.Duration)
|
||||
time.Sleep(time.Duration(i - (time.Now().UnixNano() % i)))
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
if err := a.flusher(shutdown, metricC, aggMetricC, outMetricC); err != nil {
|
||||
log.Printf("E! Flusher routine failed, exiting: %s\n", err.Error())
|
||||
close(shutdown)
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Add(len(a.Config.Aggregators))
|
||||
for _, aggregator := range a.Config.Aggregators {
|
||||
go func(agg *models.RunningAggregator) {
|
||||
defer wg.Done()
|
||||
acc := NewAccumulator(agg, aggMetricC)
|
||||
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
||||
a.Config.Agent.Interval.Duration)
|
||||
agg.Run(acc, shutdown)
|
||||
}(aggregator)
|
||||
}
|
||||
|
||||
// Service inputs may immediately add metrics, if metrics are added before
|
||||
// the aggregator starts they will be dropped. Generally this occurs
|
||||
// only during testing but it is an outstanding issue.
|
||||
//
|
||||
// https://github.com/influxdata/telegraf/issues/4394
|
||||
for _, input := range a.Config.Inputs {
|
||||
input.SetDefaultTags(a.Config.Tags)
|
||||
switch p := input.Input.(type) {
|
||||
case telegraf.ServiceInput:
|
||||
acc := NewAccumulator(input, metricC)
|
||||
// Service input plugins should set their own precision of their
|
||||
// metrics.
|
||||
acc.SetPrecision(time.Nanosecond, 0)
|
||||
if err := p.Start(acc); err != nil {
|
||||
log.Printf("E! Service for input %s failed to start, exiting\n%s\n",
|
||||
input.Name(), err.Error())
|
||||
return err
|
||||
}
|
||||
defer p.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
wg.Add(len(a.Config.Inputs))
|
||||
for _, input := range a.Config.Inputs {
|
||||
interval := a.Config.Agent.Interval.Duration
|
||||
// overwrite global interval if this plugin has it's own.
|
||||
precision := a.Config.Agent.Precision.Duration
|
||||
jitter := a.Config.Agent.CollectionJitter.Duration
|
||||
|
||||
// Overwrite agent interval if this plugin has its own.
|
||||
if input.Config.Interval != 0 {
|
||||
interval = input.Config.Interval
|
||||
}
|
||||
go func(in *models.RunningInput, interv time.Duration) {
|
||||
|
||||
acc := NewAccumulator(input, dst)
|
||||
acc.SetPrecision(precision, interval)
|
||||
|
||||
wg.Add(1)
|
||||
go func(input *models.RunningInput) {
|
||||
defer wg.Done()
|
||||
a.gatherer(shutdown, in, interv, metricC)
|
||||
}(input, interval)
|
||||
|
||||
if a.Config.Agent.RoundInterval {
|
||||
err := internal.SleepContext(
|
||||
ctx, internal.AlignDuration(startTime, interval))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
a.gatherOnInterval(ctx, acc, input, interval, jitter)
|
||||
}(input)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// gather runs an input's gather function periodically until the context is
|
||||
// done.
|
||||
func (a *Agent) gatherOnInterval(
|
||||
ctx context.Context,
|
||||
acc telegraf.Accumulator,
|
||||
input *models.RunningInput,
|
||||
interval time.Duration,
|
||||
jitter time.Duration,
|
||||
) {
|
||||
defer panicRecover(input)
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
err := internal.SleepContext(ctx, internal.RandomDuration(jitter))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
err = a.gatherOnce(acc, input, interval)
|
||||
if err != nil {
|
||||
acc.AddError(err)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ticker.C:
|
||||
continue
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// gatherOnce runs the input's Gather function once, logging a warning each
|
||||
// interval it fails to complete before.
|
||||
func (a *Agent) gatherOnce(
|
||||
acc telegraf.Accumulator,
|
||||
input *models.RunningInput,
|
||||
timeout time.Duration,
|
||||
) error {
|
||||
ticker := time.NewTicker(timeout)
|
||||
defer ticker.Stop()
|
||||
|
||||
done := make(chan error)
|
||||
go func() {
|
||||
done <- input.Gather(acc)
|
||||
}()
|
||||
|
||||
for {
|
||||
select {
|
||||
case err := <-done:
|
||||
return err
|
||||
case <-ticker.C:
|
||||
log.Printf("W! [agent] input %q did not complete within its interval",
|
||||
input.Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// runProcessors applies processors to metrics.
|
||||
func (a *Agent) runProcessors(
|
||||
src <-chan telegraf.Metric,
|
||||
agg chan<- telegraf.Metric,
|
||||
) error {
|
||||
for metric := range src {
|
||||
metrics := a.applyProcessors(metric)
|
||||
|
||||
for _, metric := range metrics {
|
||||
agg <- metric
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// applyProcessors applies all processors to a metric.
|
||||
func (a *Agent) applyProcessors(m telegraf.Metric) []telegraf.Metric {
|
||||
metrics := []telegraf.Metric{m}
|
||||
for _, processor := range a.Config.Processors {
|
||||
metrics = processor.Apply(metrics...)
|
||||
}
|
||||
|
||||
return metrics
|
||||
}
|
||||
|
||||
// runAggregators triggers the periodic push for Aggregators.
|
||||
//
|
||||
// When the context is done a final push will occur and then this function
|
||||
// will return.
|
||||
func (a *Agent) runAggregators(
|
||||
startTime time.Time,
|
||||
src <-chan telegraf.Metric,
|
||||
dst chan<- telegraf.Metric,
|
||||
) error {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for metric := range src {
|
||||
var dropOriginal bool
|
||||
for _, agg := range a.Config.Aggregators {
|
||||
if ok := agg.Add(metric); ok {
|
||||
dropOriginal = true
|
||||
}
|
||||
}
|
||||
|
||||
if !dropOriginal {
|
||||
dst <- metric
|
||||
}
|
||||
}
|
||||
cancel()
|
||||
}()
|
||||
|
||||
precision := a.Config.Agent.Precision.Duration
|
||||
interval := a.Config.Agent.Interval.Duration
|
||||
aggregations := make(chan telegraf.Metric, 100)
|
||||
for _, agg := range a.Config.Aggregators {
|
||||
wg.Add(1)
|
||||
go func(agg *models.RunningAggregator) {
|
||||
defer wg.Done()
|
||||
|
||||
if a.Config.Agent.RoundInterval {
|
||||
// Aggregators are aligned to the agent interval regardless of
|
||||
// their period.
|
||||
err := internal.SleepContext(ctx, internal.AlignDuration(startTime, interval))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
agg.SetPeriodStart(startTime)
|
||||
|
||||
acc := NewAccumulator(agg, aggregations)
|
||||
acc.SetPrecision(precision, interval)
|
||||
a.push(ctx, agg, acc)
|
||||
close(aggregations)
|
||||
}(agg)
|
||||
}
|
||||
|
||||
for metric := range aggregations {
|
||||
metrics := a.applyProcessors(metric)
|
||||
for _, metric := range metrics {
|
||||
dst <- metric
|
||||
}
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
a.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
// push runs the push for a single aggregator every period. More simple than
|
||||
// the output/input version as timeout should be less likely.... not really
|
||||
// because the output channel can block for now.
|
||||
func (a *Agent) push(
|
||||
ctx context.Context,
|
||||
aggregator *models.RunningAggregator,
|
||||
acc telegraf.Accumulator,
|
||||
) {
|
||||
ticker := time.NewTicker(aggregator.Period())
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
break
|
||||
case <-ctx.Done():
|
||||
aggregator.Push(acc)
|
||||
return
|
||||
}
|
||||
|
||||
aggregator.Push(acc)
|
||||
}
|
||||
}
|
||||
|
||||
// runOutputs triggers the periodic write for Outputs.
|
||||
//
|
||||
// When the context is done, outputs continue to run until their buffer is
|
||||
// closed, afterwich they run flush once more.
|
||||
func (a *Agent) runOutputs(
|
||||
startTime time.Time,
|
||||
src <-chan telegraf.Metric,
|
||||
) error {
|
||||
interval := a.Config.Agent.FlushInterval.Duration
|
||||
jitter := a.Config.Agent.FlushJitter.Duration
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for _, output := range a.Config.Outputs {
|
||||
interval := interval
|
||||
// Overwrite agent flush_interval if this plugin has its own.
|
||||
if output.Config.FlushInterval != 0 {
|
||||
interval = output.Config.FlushInterval
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func(output *models.RunningOutput) {
|
||||
defer wg.Done()
|
||||
|
||||
if a.Config.Agent.RoundInterval {
|
||||
err := internal.SleepContext(
|
||||
ctx, internal.AlignDuration(startTime, interval))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
a.flush(ctx, output, interval, jitter)
|
||||
}(output)
|
||||
}
|
||||
|
||||
for metric := range src {
|
||||
for i, output := range a.Config.Outputs {
|
||||
if i == len(a.Config.Outputs)-1 {
|
||||
output.AddMetric(metric)
|
||||
} else {
|
||||
output.AddMetric(metric.Copy())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.Println("I! [agent] Hang on, flushing any cached metrics before shutdown")
|
||||
cancel()
|
||||
wg.Wait()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// flush runs an output's flush function periodically until the context is
|
||||
// done.
|
||||
func (a *Agent) flush(
|
||||
ctx context.Context,
|
||||
output *models.RunningOutput,
|
||||
interval time.Duration,
|
||||
jitter time.Duration,
|
||||
) {
|
||||
// since we are watching two channels we need a ticker with the jitter
|
||||
// integrated.
|
||||
ticker := NewTicker(interval, jitter)
|
||||
defer ticker.Stop()
|
||||
|
||||
logError := func(err error) {
|
||||
if err != nil {
|
||||
log.Printf("E! [agent] Error writing to output [%s]: %v", output.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
// Favor shutdown over other methods.
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
logError(a.flushOnce(output, interval, output.Write))
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ticker.C:
|
||||
logError(a.flushOnce(output, interval, output.Write))
|
||||
case <-output.BatchReady:
|
||||
// Favor the ticker over batch ready
|
||||
select {
|
||||
case <-ticker.C:
|
||||
logError(a.flushOnce(output, interval, output.Write))
|
||||
default:
|
||||
logError(a.flushOnce(output, interval, output.WriteBatch))
|
||||
}
|
||||
case <-ctx.Done():
|
||||
logError(a.flushOnce(output, interval, output.Write))
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// flushOnce runs the output's Write function once, logging a warning each
|
||||
// interval it fails to complete before.
|
||||
func (a *Agent) flushOnce(
|
||||
output *models.RunningOutput,
|
||||
timeout time.Duration,
|
||||
writeFunc func() error,
|
||||
) error {
|
||||
ticker := time.NewTicker(timeout)
|
||||
defer ticker.Stop()
|
||||
|
||||
done := make(chan error)
|
||||
go func() {
|
||||
done <- writeFunc()
|
||||
}()
|
||||
|
||||
for {
|
||||
select {
|
||||
case err := <-done:
|
||||
output.LogBufferStatus()
|
||||
return err
|
||||
case <-ticker.C:
|
||||
log.Printf("W! [agent] output %q did not complete within its flush interval",
|
||||
output.Name)
|
||||
output.LogBufferStatus()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// connectOutputs connects to all outputs.
|
||||
func (a *Agent) connectOutputs(ctx context.Context) error {
|
||||
for _, output := range a.Config.Outputs {
|
||||
log.Printf("D! [agent] Attempting connection to output: %s\n", output.Name)
|
||||
err := output.Output.Connect()
|
||||
if err != nil {
|
||||
log.Printf("E! [agent] Failed to connect to output %s, retrying in 15s, "+
|
||||
"error was '%s' \n", output.Name, err)
|
||||
|
||||
err := internal.SleepContext(ctx, 15*time.Second)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = output.Output.Connect()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
log.Printf("D! [agent] Successfully connected to output: %s\n", output.Name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// closeOutputs closes all outputs.
|
||||
func (a *Agent) closeOutputs() error {
|
||||
var err error
|
||||
for _, output := range a.Config.Outputs {
|
||||
err = output.Output.Close()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// startServiceInputs starts all service inputs.
|
||||
func (a *Agent) startServiceInputs(
|
||||
ctx context.Context,
|
||||
dst chan<- telegraf.Metric,
|
||||
) error {
|
||||
started := []telegraf.ServiceInput{}
|
||||
|
||||
for _, input := range a.Config.Inputs {
|
||||
if si, ok := input.Input.(telegraf.ServiceInput); ok {
|
||||
// Service input plugins are not subject to timestamp rounding.
|
||||
// This only applies to the accumulator passed to Start(), the
|
||||
// Gather() accumulator does apply rounding according to the
|
||||
// precision agent setting.
|
||||
acc := NewAccumulator(input, dst)
|
||||
acc.SetPrecision(time.Nanosecond, 0)
|
||||
|
||||
err := si.Start(acc)
|
||||
if err != nil {
|
||||
log.Printf("E! [agent] Service for input %s failed to start: %v",
|
||||
input.Name(), err)
|
||||
|
||||
for _, si := range started {
|
||||
si.Stop()
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
started = append(started, si)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// stopServiceInputs stops all service inputs.
|
||||
func (a *Agent) stopServiceInputs() {
|
||||
for _, input := range a.Config.Inputs {
|
||||
if si, ok := input.Input.(telegraf.ServiceInput); ok {
|
||||
si.Stop()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// panicRecover displays an error if an input panics.
|
||||
func panicRecover(input *models.RunningInput) {
|
||||
if err := recover(); err != nil {
|
||||
trace := make([]byte, 2048)
|
||||
runtime.Stack(trace, true)
|
||||
log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
|
||||
input.Name(), err, trace)
|
||||
log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
|
||||
"stack trace, configuration, and OS information: " +
|
||||
"https://github.com/influxdata/telegraf/issues/new/choose")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
)
|
||||
|
||||
type Ticker struct {
|
||||
C chan time.Time
|
||||
ticker *time.Ticker
|
||||
jitter time.Duration
|
||||
wg sync.WaitGroup
|
||||
cancelFunc context.CancelFunc
|
||||
}
|
||||
|
||||
func NewTicker(
|
||||
interval time.Duration,
|
||||
jitter time.Duration,
|
||||
) *Ticker {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
t := &Ticker{
|
||||
C: make(chan time.Time, 1),
|
||||
ticker: time.NewTicker(interval),
|
||||
jitter: jitter,
|
||||
cancelFunc: cancel,
|
||||
}
|
||||
|
||||
t.wg.Add(1)
|
||||
go t.relayTime(ctx)
|
||||
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *Ticker) Stop() {
|
||||
t.cancelFunc()
|
||||
t.wg.Wait()
|
||||
}
|
||||
|
||||
func (t *Ticker) relayTime(ctx context.Context) {
|
||||
defer t.wg.Done()
|
||||
for {
|
||||
select {
|
||||
case tm := <-t.ticker.C:
|
||||
internal.SleepContext(ctx, internal.RandomDuration(t.jitter))
|
||||
select {
|
||||
case t.C <- tm:
|
||||
default:
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,8 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
|
@ -78,112 +80,111 @@ func reloadLoop(
|
|||
for <-reload {
|
||||
reload <- false
|
||||
|
||||
// If no other options are specified, load the config file and run.
|
||||
c := config.NewConfig()
|
||||
c.OutputFilters = outputFilters
|
||||
c.InputFilters = inputFilters
|
||||
err := c.LoadConfig(*fConfig)
|
||||
if err != nil {
|
||||
log.Fatal("E! " + err.Error())
|
||||
}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
if *fConfigDirectory != "" {
|
||||
err = c.LoadDirectory(*fConfigDirectory)
|
||||
if err != nil {
|
||||
log.Fatal("E! " + err.Error())
|
||||
}
|
||||
}
|
||||
if !*fTest && len(c.Outputs) == 0 {
|
||||
log.Fatalf("E! Error: no outputs found, did you provide a valid config file?")
|
||||
}
|
||||
if len(c.Inputs) == 0 {
|
||||
log.Fatalf("E! Error: no inputs found, did you provide a valid config file?")
|
||||
}
|
||||
|
||||
if int64(c.Agent.Interval.Duration) <= 0 {
|
||||
log.Fatalf("E! Agent interval must be positive, found %s",
|
||||
c.Agent.Interval.Duration)
|
||||
}
|
||||
|
||||
if int64(c.Agent.FlushInterval.Duration) <= 0 {
|
||||
log.Fatalf("E! Agent flush_interval must be positive; found %s",
|
||||
c.Agent.Interval.Duration)
|
||||
}
|
||||
|
||||
ag, err := agent.NewAgent(c)
|
||||
if err != nil {
|
||||
log.Fatal("E! " + err.Error())
|
||||
}
|
||||
|
||||
// Setup logging
|
||||
logger.SetupLogging(
|
||||
ag.Config.Agent.Debug || *fDebug,
|
||||
ag.Config.Agent.Quiet || *fQuiet,
|
||||
ag.Config.Agent.Logfile,
|
||||
)
|
||||
|
||||
if *fTest {
|
||||
err = ag.Test()
|
||||
if err != nil {
|
||||
log.Fatal("E! " + err.Error())
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
err = ag.Connect()
|
||||
if err != nil {
|
||||
log.Fatal("E! " + err.Error())
|
||||
}
|
||||
|
||||
shutdown := make(chan struct{})
|
||||
signals := make(chan os.Signal)
|
||||
signal.Notify(signals, os.Interrupt, syscall.SIGHUP, syscall.SIGTERM)
|
||||
go func() {
|
||||
select {
|
||||
case sig := <-signals:
|
||||
if sig == os.Interrupt || sig == syscall.SIGTERM {
|
||||
close(shutdown)
|
||||
}
|
||||
if sig == syscall.SIGHUP {
|
||||
log.Printf("I! Reloading Telegraf config\n")
|
||||
log.Printf("I! Reloading Telegraf config")
|
||||
<-reload
|
||||
reload <- true
|
||||
close(shutdown)
|
||||
}
|
||||
cancel()
|
||||
case <-stop:
|
||||
close(shutdown)
|
||||
cancel()
|
||||
}
|
||||
}()
|
||||
|
||||
log.Printf("I! Starting Telegraf %s\n", version)
|
||||
log.Printf("I! Loaded inputs: %s", strings.Join(c.InputNames(), " "))
|
||||
log.Printf("I! Loaded aggregators: %s", strings.Join(c.AggregatorNames(), " "))
|
||||
log.Printf("I! Loaded processors: %s", strings.Join(c.ProcessorNames(), " "))
|
||||
log.Printf("I! Loaded outputs: %s", strings.Join(c.OutputNames(), " "))
|
||||
log.Printf("I! Tags enabled: %s", c.ListTags())
|
||||
|
||||
if *fPidfile != "" {
|
||||
f, err := os.OpenFile(*fPidfile, os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
log.Printf("E! Unable to create pidfile: %s", err)
|
||||
} else {
|
||||
fmt.Fprintf(f, "%d\n", os.Getpid())
|
||||
|
||||
f.Close()
|
||||
|
||||
defer func() {
|
||||
err := os.Remove(*fPidfile)
|
||||
if err != nil {
|
||||
log.Printf("E! Unable to remove pidfile: %s", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
err := runAgent(ctx, inputFilters, outputFilters)
|
||||
if err != nil {
|
||||
log.Fatalf("E! [telegraf] Error running agent: %v", err)
|
||||
}
|
||||
|
||||
ag.Run(shutdown)
|
||||
}
|
||||
}
|
||||
|
||||
func runAgent(ctx context.Context,
|
||||
inputFilters []string,
|
||||
outputFilters []string,
|
||||
) error {
|
||||
// If no other options are specified, load the config file and run.
|
||||
c := config.NewConfig()
|
||||
c.OutputFilters = outputFilters
|
||||
c.InputFilters = inputFilters
|
||||
err := c.LoadConfig(*fConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if *fConfigDirectory != "" {
|
||||
err = c.LoadDirectory(*fConfigDirectory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if !*fTest && len(c.Outputs) == 0 {
|
||||
return errors.New("Error: no outputs found, did you provide a valid config file?")
|
||||
}
|
||||
if len(c.Inputs) == 0 {
|
||||
return errors.New("Error: no inputs found, did you provide a valid config file?")
|
||||
}
|
||||
|
||||
if int64(c.Agent.Interval.Duration) <= 0 {
|
||||
return fmt.Errorf("Agent interval must be positive, found %s",
|
||||
c.Agent.Interval.Duration)
|
||||
}
|
||||
|
||||
if int64(c.Agent.FlushInterval.Duration) <= 0 {
|
||||
return fmt.Errorf("Agent flush_interval must be positive; found %s",
|
||||
c.Agent.Interval.Duration)
|
||||
}
|
||||
|
||||
ag, err := agent.NewAgent(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Setup logging
|
||||
logger.SetupLogging(
|
||||
ag.Config.Agent.Debug || *fDebug,
|
||||
ag.Config.Agent.Quiet || *fQuiet,
|
||||
ag.Config.Agent.Logfile,
|
||||
)
|
||||
|
||||
if *fTest {
|
||||
return ag.Test()
|
||||
}
|
||||
|
||||
log.Printf("I! Starting Telegraf %s\n", version)
|
||||
log.Printf("I! Loaded inputs: %s", strings.Join(c.InputNames(), " "))
|
||||
log.Printf("I! Loaded aggregators: %s", strings.Join(c.AggregatorNames(), " "))
|
||||
log.Printf("I! Loaded processors: %s", strings.Join(c.ProcessorNames(), " "))
|
||||
log.Printf("I! Loaded outputs: %s", strings.Join(c.OutputNames(), " "))
|
||||
log.Printf("I! Tags enabled: %s", c.ListTags())
|
||||
|
||||
if *fPidfile != "" {
|
||||
f, err := os.OpenFile(*fPidfile, os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
log.Printf("E! Unable to create pidfile: %s", err)
|
||||
} else {
|
||||
fmt.Fprintf(f, "%d\n", os.Getpid())
|
||||
|
||||
f.Close()
|
||||
|
||||
defer func() {
|
||||
err := os.Remove(*fPidfile)
|
||||
if err != nil {
|
||||
log.Printf("E! Unable to remove pidfile: %s", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
return ag.Run(ctx)
|
||||
}
|
||||
|
||||
func usageExit(rc int) {
|
||||
fmt.Println(internal.Usage)
|
||||
os.Exit(rc)
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
### Aggregator Plugins
|
||||
|
||||
This section is for developers who want to create a new aggregator plugin.
|
||||
|
||||
### Aggregator Plugin Guidelines
|
||||
|
||||
* A aggregator must conform to the [telegraf.Aggregator][] interface.
|
||||
* Aggregators should call `aggregators.Add` in their `init` function to
|
||||
register themselves. See below for a quick example.
|
||||
* To be available within Telegraf itself, plugins must add themselves to the
|
||||
`github.com/influxdata/telegraf/plugins/aggregators/all/all.go` file.
|
||||
- The `SampleConfig` function should return valid toml that describes how the
|
||||
plugin can be configured. This is included in `telegraf config`. Please
|
||||
consult the [SampleConfig][] page for the latest style guidelines.
|
||||
* The `Description` function should say in one line what this aggregator does.
|
||||
* The Aggregator plugin will need to keep caches of metrics that have passed
|
||||
through it. This should be done using the builtin `HashID()` function of
|
||||
each metric.
|
||||
* When the `Reset()` function is called, all caches should be cleared.
|
||||
|
||||
### Aggregator Plugin Example
|
||||
|
||||
```go
|
||||
package min
|
||||
|
||||
// min.go
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/aggregators"
|
||||
)
|
||||
|
||||
type Min struct {
|
||||
// caches for metric fields, names, and tags
|
||||
fieldCache map[uint64]map[string]float64
|
||||
nameCache map[uint64]string
|
||||
tagCache map[uint64]map[string]string
|
||||
}
|
||||
|
||||
func NewMin() telegraf.Aggregator {
|
||||
m := &Min{}
|
||||
m.Reset()
|
||||
return m
|
||||
}
|
||||
|
||||
var sampleConfig = `
|
||||
## period is the flush & clear interval of the aggregator.
|
||||
period = "30s"
|
||||
## If true drop_original will drop the original metrics and
|
||||
## only send aggregates.
|
||||
drop_original = false
|
||||
`
|
||||
|
||||
func (m *Min) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (m *Min) Description() string {
|
||||
return "Keep the aggregate min of each metric passing through."
|
||||
}
|
||||
|
||||
func (m *Min) Add(in telegraf.Metric) {
|
||||
id := in.HashID()
|
||||
if _, ok := m.nameCache[id]; !ok {
|
||||
// hit an uncached metric, create caches for first time:
|
||||
m.nameCache[id] = in.Name()
|
||||
m.tagCache[id] = in.Tags()
|
||||
m.fieldCache[id] = make(map[string]float64)
|
||||
for k, v := range in.Fields() {
|
||||
if fv, ok := convert(v); ok {
|
||||
m.fieldCache[id][k] = fv
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for k, v := range in.Fields() {
|
||||
if fv, ok := convert(v); ok {
|
||||
if _, ok := m.fieldCache[id][k]; !ok {
|
||||
// hit an uncached field of a cached metric
|
||||
m.fieldCache[id][k] = fv
|
||||
continue
|
||||
}
|
||||
if fv < m.fieldCache[id][k] {
|
||||
// set new minimum
|
||||
m.fieldCache[id][k] = fv
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Min) Push(acc telegraf.Accumulator) {
|
||||
for id, _ := range m.nameCache {
|
||||
fields := map[string]interface{}{}
|
||||
for k, v := range m.fieldCache[id] {
|
||||
fields[k+"_min"] = v
|
||||
}
|
||||
acc.AddFields(m.nameCache[id], fields, m.tagCache[id])
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Min) Reset() {
|
||||
m.fieldCache = make(map[uint64]map[string]float64)
|
||||
m.nameCache = make(map[uint64]string)
|
||||
m.tagCache = make(map[uint64]map[string]string)
|
||||
}
|
||||
|
||||
func convert(in interface{}) (float64, bool) {
|
||||
switch v := in.(type) {
|
||||
case float64:
|
||||
return v, true
|
||||
case int64:
|
||||
return float64(v), true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
aggregators.Add("min", func() telegraf.Aggregator {
|
||||
return NewMin()
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
[telegraf.Aggregator]: https://godoc.org/github.com/influxdata/telegraf#Aggregator
|
||||
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
|
|
@ -106,6 +106,14 @@ emitted from the input plugin.
|
|||
|
||||
### Output Configuration
|
||||
|
||||
- **flush_interval**: The maximum time between flushes. Use this setting to
|
||||
override the agent `flush_interval` on a per plugin basis.
|
||||
- **metric_batch_size**: The maximum number of metrics to send at once. Use
|
||||
this setting to override the agent `metric_batch_size` on a per plugin basis.
|
||||
- **metric_buffer_limit**: The maximum number of unsent metrics to buffer.
|
||||
Use this setting to override the agent `metric_buffer_limit` on a per plugin
|
||||
basis.
|
||||
|
||||
The [metric filtering](#metric-filtering) parameters can be used to limit what metrics are
|
||||
emitted from the output plugin.
|
||||
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
### Input Plugins
|
||||
|
||||
This section is for developers who want to create new collection inputs.
|
||||
Telegraf is entirely plugin driven. This interface allows for operators to
|
||||
pick and chose what is gathered and makes it easy for developers
|
||||
to create new ways of generating metrics.
|
||||
|
||||
Plugin authorship is kept as simple as possible to promote people to develop
|
||||
and submit new inputs.
|
||||
|
||||
### Input Plugin Guidelines
|
||||
|
||||
- A plugin must conform to the [telegraf.Input][] interface.
|
||||
- Input Plugins should call `inputs.Add` in their `init` function to register
|
||||
themselves. See below for a quick example.
|
||||
- Input Plugins must be added to the
|
||||
`github.com/influxdata/telegraf/plugins/inputs/all/all.go` file.
|
||||
- The `SampleConfig` function should return valid toml that describes how the
|
||||
plugin can be configured. This is included in `telegraf config`. Please
|
||||
consult the [SampleConfig][] page for the latest style
|
||||
guidelines.
|
||||
- The `Description` function should say in one line what this plugin does.
|
||||
|
||||
Let's say you've written a plugin that emits metrics about processes on the
|
||||
current host.
|
||||
|
||||
### Input Plugin Example
|
||||
|
||||
```go
|
||||
package simple
|
||||
|
||||
// simple.go
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
type Simple struct {
|
||||
Ok bool
|
||||
}
|
||||
|
||||
func (s *Simple) Description() string {
|
||||
return "a demo plugin"
|
||||
}
|
||||
|
||||
func (s *Simple) SampleConfig() string {
|
||||
return `
|
||||
## Indicate if everything is fine
|
||||
ok = true
|
||||
`
|
||||
}
|
||||
|
||||
func (s *Simple) Gather(acc telegraf.Accumulator) error {
|
||||
if s.Ok {
|
||||
acc.AddFields("state", map[string]interface{}{"value": "pretty good"}, nil)
|
||||
} else {
|
||||
acc.AddFields("state", map[string]interface{}{"value": "not great"}, nil)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("simple", func() telegraf.Input { return &Simple{} })
|
||||
}
|
||||
```
|
||||
|
||||
### Development
|
||||
|
||||
* Run `make static` followed by `make plugin-[pluginName]` to spin up a docker
|
||||
dev environment using docker-compose.
|
||||
* ***[Optional]*** When developing a plugin, add a `dev` directory with a
|
||||
`docker-compose.yml` and `telegraf.conf` as well as any other supporting
|
||||
files, where sensible.
|
||||
|
||||
### Typed Metrics
|
||||
|
||||
In addition the the `AddFields` function, the accumulator also supports
|
||||
functions to add typed metrics: `AddGauge`, `AddCounter`, etc. Metric types
|
||||
are ignored by the InfluxDB output, but can be used for other outputs, such as
|
||||
[prometheus][prom metric types].
|
||||
|
||||
### Data Formats
|
||||
|
||||
Some input plugins, such as the [exec][] plugin, can accept any supported
|
||||
[input data formats][].
|
||||
|
||||
In order to enable this, you must specify a `SetParser(parser parsers.Parser)`
|
||||
function on the plugin object (see the exec plugin for an example), as well as
|
||||
defining `parser` as a field of the object.
|
||||
|
||||
You can then utilize the parser internally in your plugin, parsing data as you
|
||||
see fit. Telegraf's configuration layer will take care of instantiating and
|
||||
creating the `Parser` object.
|
||||
|
||||
Add the following to the `SampleConfig()`:
|
||||
|
||||
```toml
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||
data_format = "influx"
|
||||
```
|
||||
|
||||
### Service Input Plugins
|
||||
|
||||
This section is for developers who want to create new "service" collection
|
||||
inputs. A service plugin differs from a regular plugin in that it operates a
|
||||
background service while Telegraf is running. One example would be the
|
||||
`statsd` plugin, which operates a statsd server.
|
||||
|
||||
Service Input Plugins are substantially more complicated than a regular
|
||||
plugin, as they will require threads and locks to verify data integrity.
|
||||
Service Input Plugins should be avoided unless there is no way to create their
|
||||
behavior with a regular plugin.
|
||||
|
||||
To create a Service Input implement the [telegraf.ServiceInput][] interface.
|
||||
|
||||
### Metric Tracking
|
||||
|
||||
Metric Tracking provides a system to be notified when metrics have been
|
||||
successfully written to their outputs or otherwise discarded. This allows
|
||||
inputs to be created that function as reliable queue consumers.
|
||||
|
||||
To get started with metric tracking begin by calling `WithTracking` on the
|
||||
[telegraf.Accumulator][]. Add metrics using the `AddTrackingMetricGroup`
|
||||
function on the returned [telegraf.TrackingAccumulator][] and store the
|
||||
`TrackingID`. The `Delivered()` channel will return a type with information
|
||||
about the final delivery status of the metric group.
|
||||
|
||||
Check the [amqp_consumer][] for an example implementation.
|
||||
|
||||
[exec]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec
|
||||
[amqp_consumer]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/amqp_consumer
|
||||
[prom metric types]: https://prometheus.io/docs/concepts/metric_types/
|
||||
[input data formats]: https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
|
||||
[telegraf.Input]: https://godoc.org/github.com/influxdata/telegraf#Input
|
||||
[telegraf.ServiceInput]: https://godoc.org/github.com/influxdata/telegraf#ServiceInput
|
||||
[telegraf.Accumulator]: https://godoc.org/github.com/influxdata/telegraf#Accumulator
|
||||
[telegraf.TrackingAccumulator]: https://godoc.org/github.com/influxdata/telegraf#Accumulator
|
|
@ -0,0 +1,95 @@
|
|||
### Output Plugins
|
||||
|
||||
This section is for developers who want to create a new output sink. Outputs
|
||||
are created in a similar manner as collection plugins, and their interface has
|
||||
similar constructs.
|
||||
|
||||
### Output Plugin Guidelines
|
||||
|
||||
- An output must conform to the [telegraf.Output][] interface.
|
||||
- Outputs should call `outputs.Add` in their `init` function to register
|
||||
themselves. See below for a quick example.
|
||||
- To be available within Telegraf itself, plugins must add themselves to the
|
||||
`github.com/influxdata/telegraf/plugins/outputs/all/all.go` file.
|
||||
- The `SampleConfig` function should return valid toml that describes how the
|
||||
plugin can be configured. This is included in `telegraf config`. Please
|
||||
consult the [SampleConfig][] page for the latest style guidelines.
|
||||
- The `Description` function should say in one line what this output does.
|
||||
|
||||
### Output Plugin Example
|
||||
|
||||
```go
|
||||
package simpleoutput
|
||||
|
||||
// simpleoutput.go
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/outputs"
|
||||
)
|
||||
|
||||
type Simple struct {
|
||||
Ok bool
|
||||
}
|
||||
|
||||
func (s *Simple) Description() string {
|
||||
return "a demo output"
|
||||
}
|
||||
|
||||
func (s *Simple) SampleConfig() string {
|
||||
return `
|
||||
ok = true
|
||||
`
|
||||
}
|
||||
|
||||
func (s *Simple) Connect() error {
|
||||
// Make a connection to the URL here
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Simple) Close() error {
|
||||
// Close connection to the URL here
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Simple) Write(metrics []telegraf.Metric) error {
|
||||
for _, metric := range metrics {
|
||||
// write `metric` to the output sink here
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
outputs.Add("simpleoutput", func() telegraf.Output { return &Simple{} })
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Data Formats
|
||||
|
||||
Some output plugins, such as the [file][] plugin, can write in any supported
|
||||
[output data formats][].
|
||||
|
||||
In order to enable this, you must specify a
|
||||
`SetSerializer(serializer serializers.Serializer)`
|
||||
function on the plugin object (see the file plugin for an example), as well as
|
||||
defining `serializer` as a field of the object.
|
||||
|
||||
You can then utilize the serializer internally in your plugin, serializing data
|
||||
before it's written. Telegraf's configuration layer will take care of
|
||||
instantiating and creating the `Serializer` object.
|
||||
|
||||
You should also add the following to your `SampleConfig()`:
|
||||
|
||||
```toml
|
||||
## Data format to output.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
|
||||
data_format = "influx"
|
||||
```
|
||||
|
||||
[file]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/file
|
||||
[output data formats]: https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
|
||||
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
|
||||
[telegraf.Output]: https://godoc.org/github.com/influxdata/telegraf#Output
|
|
@ -0,0 +1,63 @@
|
|||
### Processor Plugins
|
||||
|
||||
This section is for developers who want to create a new processor plugin.
|
||||
|
||||
### Processor Plugin Guidelines
|
||||
|
||||
* A processor must conform to the [telegraf.Processor][] interface.
|
||||
* Processors should call `processors.Add` in their `init` function to register
|
||||
themselves. See below for a quick example.
|
||||
* To be available within Telegraf itself, plugins must add themselves to the
|
||||
`github.com/influxdata/telegraf/plugins/processors/all/all.go` file.
|
||||
* The `SampleConfig` function should return valid toml that describes how the
|
||||
processor can be configured. This is include in the output of `telegraf
|
||||
config`.
|
||||
- The `SampleConfig` function should return valid toml that describes how the
|
||||
plugin can be configured. This is included in `telegraf config`. Please
|
||||
consult the [SampleConfig][] page for the latest style guidelines.
|
||||
* The `Description` function should say in one line what this processor does.
|
||||
|
||||
### Processor Plugin Example
|
||||
|
||||
```go
|
||||
package printer
|
||||
|
||||
// printer.go
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/processors"
|
||||
)
|
||||
|
||||
type Printer struct {
|
||||
}
|
||||
|
||||
var sampleConfig = `
|
||||
`
|
||||
|
||||
func (p *Printer) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (p *Printer) Description() string {
|
||||
return "Print all metrics that pass through this filter."
|
||||
}
|
||||
|
||||
func (p *Printer) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
||||
for _, metric := range in {
|
||||
fmt.Println(metric.String())
|
||||
}
|
||||
return in
|
||||
}
|
||||
|
||||
func init() {
|
||||
processors.Add("printer", func() telegraf.Processor {
|
||||
return &Printer{}
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
|
||||
[telegraf.Processor]: https://godoc.org/github.com/influxdata/telegraf#Processor
|
13
input.go
13
input.go
|
@ -13,17 +13,10 @@ type Input interface {
|
|||
}
|
||||
|
||||
type ServiceInput interface {
|
||||
// SampleConfig returns the default configuration of the Input
|
||||
SampleConfig() string
|
||||
Input
|
||||
|
||||
// Description returns a one-sentence description on the Input
|
||||
Description() string
|
||||
|
||||
// Gather takes in an accumulator and adds the metrics that the Input
|
||||
// gathers. This is called every "interval"
|
||||
Gather(Accumulator) error
|
||||
|
||||
// Start starts the ServiceInput's service, whatever that may be
|
||||
// Start the ServiceInput. The Accumulator may be retained and used until
|
||||
// Stop returns.
|
||||
Start(Accumulator) error
|
||||
|
||||
// Stop stops the services and closes any necessary channels and connections
|
||||
|
|
|
@ -1,130 +0,0 @@
|
|||
package buffer
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/selfstat"
|
||||
)
|
||||
|
||||
var (
|
||||
MetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
|
||||
MetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
|
||||
)
|
||||
|
||||
// Buffer is an object for storing metrics in a circular buffer.
|
||||
type Buffer struct {
|
||||
sync.Mutex
|
||||
buf []telegraf.Metric
|
||||
first int
|
||||
last int
|
||||
size int
|
||||
empty bool
|
||||
}
|
||||
|
||||
// NewBuffer returns a Buffer
|
||||
// size is the maximum number of metrics that Buffer will cache. If Add is
|
||||
// called when the buffer is full, then the oldest metric(s) will be dropped.
|
||||
func NewBuffer(size int) *Buffer {
|
||||
return &Buffer{
|
||||
buf: make([]telegraf.Metric, size),
|
||||
first: 0,
|
||||
last: 0,
|
||||
size: size,
|
||||
empty: true,
|
||||
}
|
||||
}
|
||||
|
||||
// IsEmpty returns true if Buffer is empty.
|
||||
func (b *Buffer) IsEmpty() bool {
|
||||
return b.empty
|
||||
}
|
||||
|
||||
// Len returns the current length of the buffer.
|
||||
func (b *Buffer) Len() int {
|
||||
if b.empty {
|
||||
return 0
|
||||
} else if b.first <= b.last {
|
||||
return b.last - b.first + 1
|
||||
}
|
||||
// Spans the end of array.
|
||||
// size - gap in the middle
|
||||
return b.size - (b.first - b.last - 1) // size - gap
|
||||
}
|
||||
|
||||
func (b *Buffer) push(m telegraf.Metric) {
|
||||
// Empty
|
||||
if b.empty {
|
||||
b.last = b.first // Reset
|
||||
b.buf[b.last] = m
|
||||
b.empty = false
|
||||
return
|
||||
}
|
||||
|
||||
b.last++
|
||||
b.last %= b.size
|
||||
|
||||
// Full
|
||||
if b.first == b.last {
|
||||
MetricsDropped.Incr(1)
|
||||
b.first = (b.first + 1) % b.size
|
||||
}
|
||||
b.buf[b.last] = m
|
||||
}
|
||||
|
||||
// Add adds metrics to the buffer.
|
||||
func (b *Buffer) Add(metrics ...telegraf.Metric) {
|
||||
b.Lock()
|
||||
defer b.Unlock()
|
||||
for i := range metrics {
|
||||
MetricsWritten.Incr(1)
|
||||
b.push(metrics[i])
|
||||
}
|
||||
}
|
||||
|
||||
// Batch returns a batch of metrics of size batchSize.
|
||||
// the batch will be of maximum length batchSize. It can be less than batchSize,
|
||||
// if the length of Buffer is less than batchSize.
|
||||
func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
|
||||
b.Lock()
|
||||
defer b.Unlock()
|
||||
outLen := min(b.Len(), batchSize)
|
||||
out := make([]telegraf.Metric, outLen)
|
||||
if outLen == 0 {
|
||||
return out
|
||||
}
|
||||
|
||||
// We copy everything right of first up to last, count or end
|
||||
// b.last >= rightInd || b.last < b.first
|
||||
// therefore wont copy past b.last
|
||||
rightInd := min(b.size, b.first+outLen) - 1
|
||||
|
||||
copyCount := copy(out, b.buf[b.first:rightInd+1])
|
||||
|
||||
// We've emptied the ring
|
||||
if rightInd == b.last {
|
||||
b.empty = true
|
||||
}
|
||||
b.first = rightInd + 1
|
||||
b.first %= b.size
|
||||
|
||||
// We circle back for the rest
|
||||
if copyCount < outLen {
|
||||
right := min(b.last, outLen-copyCount)
|
||||
copy(out[copyCount:], b.buf[b.first:right+1])
|
||||
// We've emptied the ring
|
||||
if right == b.last {
|
||||
b.empty = true
|
||||
}
|
||||
b.first = right + 1
|
||||
b.first %= b.size
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if b < a {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
|
@ -1,203 +0,0 @@
|
|||
package buffer
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
var metricList = []telegraf.Metric{
|
||||
testutil.TestMetric(2, "mymetric1"),
|
||||
testutil.TestMetric(1, "mymetric2"),
|
||||
testutil.TestMetric(11, "mymetric3"),
|
||||
testutil.TestMetric(15, "mymetric4"),
|
||||
testutil.TestMetric(8, "mymetric5"),
|
||||
}
|
||||
|
||||
func makeBench5(b *testing.B, freq, batchSize int) {
|
||||
const k = 1000
|
||||
var wg sync.WaitGroup
|
||||
buf := NewBuffer(10000)
|
||||
m := testutil.TestMetric(1, "mymetric")
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
buf.Add(m, m, m, m, m)
|
||||
if i%(freq*k) == 0 {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
buf.Batch(batchSize * k)
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
}
|
||||
// Flush
|
||||
buf.Batch(b.N)
|
||||
wg.Wait()
|
||||
|
||||
}
|
||||
func makeBenchStrict(b *testing.B, freq, batchSize int) {
|
||||
const k = 1000
|
||||
var count uint64
|
||||
var wg sync.WaitGroup
|
||||
buf := NewBuffer(10000)
|
||||
m := testutil.TestMetric(1, "mymetric")
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
buf.Add(m)
|
||||
if i%(freq*k) == 0 {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
l := len(buf.Batch(batchSize * k))
|
||||
atomic.AddUint64(&count, uint64(l))
|
||||
}()
|
||||
}
|
||||
}
|
||||
// Flush
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
l := len(buf.Batch(b.N))
|
||||
atomic.AddUint64(&count, uint64(l))
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
if count != uint64(b.N) {
|
||||
b.Errorf("not all metrics came out. %d of %d", count, b.N)
|
||||
}
|
||||
}
|
||||
func makeBench(b *testing.B, freq, batchSize int) {
|
||||
const k = 1000
|
||||
var wg sync.WaitGroup
|
||||
buf := NewBuffer(10000)
|
||||
m := testutil.TestMetric(1, "mymetric")
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
buf.Add(m)
|
||||
if i%(freq*k) == 0 {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
buf.Batch(batchSize * k)
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
// Flush
|
||||
buf.Batch(b.N)
|
||||
}
|
||||
|
||||
func BenchmarkBufferBatch5Add(b *testing.B) {
|
||||
makeBench5(b, 100, 101)
|
||||
}
|
||||
func BenchmarkBufferBigInfrequentBatchCatchup(b *testing.B) {
|
||||
makeBench(b, 100, 101)
|
||||
}
|
||||
func BenchmarkBufferOftenBatch(b *testing.B) {
|
||||
makeBench(b, 1, 1)
|
||||
}
|
||||
func BenchmarkBufferAlmostBatch(b *testing.B) {
|
||||
makeBench(b, 10, 9)
|
||||
}
|
||||
func BenchmarkBufferSlowBatch(b *testing.B) {
|
||||
makeBench(b, 10, 1)
|
||||
}
|
||||
func BenchmarkBufferBatchNoDrop(b *testing.B) {
|
||||
makeBenchStrict(b, 1, 4)
|
||||
}
|
||||
func BenchmarkBufferCatchup(b *testing.B) {
|
||||
buf := NewBuffer(10000)
|
||||
m := testutil.TestMetric(1, "mymetric")
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
buf.Add(m)
|
||||
}
|
||||
buf.Batch(b.N)
|
||||
}
|
||||
|
||||
func BenchmarkAddMetrics(b *testing.B) {
|
||||
buf := NewBuffer(10000)
|
||||
m := testutil.TestMetric(1, "mymetric")
|
||||
for n := 0; n < b.N; n++ {
|
||||
buf.Add(m)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewBufferBasicFuncs(t *testing.T) {
|
||||
b := NewBuffer(10)
|
||||
MetricsDropped.Set(0)
|
||||
MetricsWritten.Set(0)
|
||||
|
||||
assert.True(t, b.IsEmpty())
|
||||
assert.Zero(t, b.Len())
|
||||
assert.Zero(t, MetricsDropped.Get())
|
||||
assert.Zero(t, MetricsWritten.Get())
|
||||
|
||||
m := testutil.TestMetric(1, "mymetric")
|
||||
b.Add(m)
|
||||
assert.False(t, b.IsEmpty())
|
||||
assert.Equal(t, b.Len(), 1)
|
||||
assert.Equal(t, int64(0), MetricsDropped.Get())
|
||||
assert.Equal(t, int64(1), MetricsWritten.Get())
|
||||
|
||||
b.Add(metricList...)
|
||||
assert.False(t, b.IsEmpty())
|
||||
assert.Equal(t, b.Len(), 6)
|
||||
assert.Equal(t, int64(0), MetricsDropped.Get())
|
||||
assert.Equal(t, int64(6), MetricsWritten.Get())
|
||||
}
|
||||
|
||||
func TestDroppingMetrics(t *testing.T) {
|
||||
b := NewBuffer(10)
|
||||
MetricsDropped.Set(0)
|
||||
MetricsWritten.Set(0)
|
||||
|
||||
// Add up to the size of the buffer
|
||||
b.Add(metricList...)
|
||||
b.Add(metricList...)
|
||||
assert.False(t, b.IsEmpty())
|
||||
assert.Equal(t, b.Len(), 10)
|
||||
assert.Equal(t, int64(0), MetricsDropped.Get())
|
||||
assert.Equal(t, int64(10), MetricsWritten.Get())
|
||||
|
||||
// Add 5 more and verify they were dropped
|
||||
b.Add(metricList...)
|
||||
assert.False(t, b.IsEmpty())
|
||||
assert.Equal(t, b.Len(), 10)
|
||||
assert.Equal(t, int64(5), MetricsDropped.Get())
|
||||
assert.Equal(t, int64(15), MetricsWritten.Get())
|
||||
}
|
||||
|
||||
func TestGettingBatches(t *testing.T) {
|
||||
b := NewBuffer(20)
|
||||
MetricsDropped.Set(0)
|
||||
MetricsWritten.Set(0)
|
||||
|
||||
// Verify that the buffer returned is smaller than requested when there are
|
||||
// not as many items as requested.
|
||||
b.Add(metricList...)
|
||||
batch := b.Batch(10)
|
||||
assert.Len(t, batch, 5)
|
||||
|
||||
// Verify that the buffer is now empty
|
||||
assert.True(t, b.IsEmpty())
|
||||
assert.Zero(t, b.Len())
|
||||
assert.Zero(t, MetricsDropped.Get())
|
||||
assert.Equal(t, int64(5), MetricsWritten.Get())
|
||||
|
||||
// Verify that the buffer returned is not more than the size requested
|
||||
b.Add(metricList...)
|
||||
batch = b.Batch(3)
|
||||
assert.Len(t, batch, 3)
|
||||
|
||||
// Verify that buffer is not empty
|
||||
assert.False(t, b.IsEmpty())
|
||||
assert.Equal(t, b.Len(), 2)
|
||||
assert.Equal(t, int64(0), MetricsDropped.Get())
|
||||
assert.Equal(t, int64(10), MetricsWritten.Get())
|
||||
}
|
|
@ -9,7 +9,6 @@ import (
|
|||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"regexp"
|
||||
"runtime"
|
||||
"sort"
|
||||
|
@ -26,7 +25,6 @@ import (
|
|||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
"github.com/influxdata/telegraf/plugins/processors"
|
||||
"github.com/influxdata/telegraf/plugins/serializers"
|
||||
|
||||
"github.com/influxdata/toml"
|
||||
"github.com/influxdata/toml/ast"
|
||||
)
|
||||
|
@ -622,6 +620,19 @@ func (c *Config) LoadConfig(path string) error {
|
|||
}
|
||||
}
|
||||
|
||||
if !c.Agent.OmitHostname {
|
||||
if c.Agent.Hostname == "" {
|
||||
hostname, err := os.Hostname()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.Agent.Hostname = hostname
|
||||
}
|
||||
|
||||
c.Tags["host"] = c.Agent.Hostname
|
||||
}
|
||||
|
||||
// Parse all the rest of the plugins:
|
||||
for name, val := range tbl.Fields {
|
||||
subTable, ok := val.(*ast.Table)
|
||||
|
@ -709,6 +720,7 @@ func (c *Config) LoadConfig(path string) error {
|
|||
if len(c.Processors) > 1 {
|
||||
sort.Sort(c.Processors)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -876,6 +888,7 @@ func (c *Config) addInput(name string, table *ast.Table) error {
|
|||
}
|
||||
|
||||
rp := models.NewRunningInput(input, pluginConfig)
|
||||
rp.SetDefaultTags(c.Tags)
|
||||
c.Inputs = append(c.Inputs, rp)
|
||||
return nil
|
||||
}
|
||||
|
@ -1751,6 +1764,8 @@ func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
|
|||
Name: name,
|
||||
Filter: filter,
|
||||
}
|
||||
|
||||
// TODO
|
||||
// Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass
|
||||
if len(oc.Filter.FieldDrop) > 0 {
|
||||
oc.Filter.NameDrop = oc.Filter.FieldDrop
|
||||
|
@ -1758,5 +1773,47 @@ func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
|
|||
if len(oc.Filter.FieldPass) > 0 {
|
||||
oc.Filter.NamePass = oc.Filter.FieldPass
|
||||
}
|
||||
|
||||
if node, ok := tbl.Fields["flush_interval"]; ok {
|
||||
if kv, ok := node.(*ast.KeyValue); ok {
|
||||
if str, ok := kv.Value.(*ast.String); ok {
|
||||
dur, err := time.ParseDuration(str.Value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
oc.FlushInterval = dur
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if node, ok := tbl.Fields["metric_buffer_limit"]; ok {
|
||||
if kv, ok := node.(*ast.KeyValue); ok {
|
||||
if integer, ok := kv.Value.(*ast.Integer); ok {
|
||||
v, err := integer.Int()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
oc.MetricBufferLimit = int(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if node, ok := tbl.Fields["metric_batch_size"]; ok {
|
||||
if kv, ok := node.(*ast.KeyValue); ok {
|
||||
if integer, ok := kv.Value.(*ast.Integer); ok {
|
||||
v, err := integer.Int()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
oc.MetricBatchSize = int(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete(tbl.Fields, "flush_interval")
|
||||
delete(tbl.Fields, "metric_buffer_limit")
|
||||
delete(tbl.Fields, "metric_batch_size")
|
||||
|
||||
return oc, nil
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"bufio"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"errors"
|
||||
"io"
|
||||
|
@ -246,6 +247,51 @@ func RandomSleep(max time.Duration, shutdown chan struct{}) {
|
|||
}
|
||||
}
|
||||
|
||||
// RandomDuration returns a random duration between 0 and max.
|
||||
func RandomDuration(max time.Duration) time.Duration {
|
||||
if max == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var sleepns int64
|
||||
maxSleep := big.NewInt(max.Nanoseconds())
|
||||
if j, err := rand.Int(rand.Reader, maxSleep); err == nil {
|
||||
sleepns = j.Int64()
|
||||
}
|
||||
|
||||
return time.Duration(sleepns)
|
||||
}
|
||||
|
||||
// SleepContext sleeps until the context is closed or the duration is reached.
|
||||
func SleepContext(ctx context.Context, duration time.Duration) error {
|
||||
if duration == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
t := time.NewTimer(duration)
|
||||
select {
|
||||
case <-t.C:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
t.Stop()
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
// AlignDuration returns the duration until next aligned interval.
|
||||
func AlignDuration(tm time.Time, interval time.Duration) time.Duration {
|
||||
return AlignTime(tm, interval).Sub(tm)
|
||||
}
|
||||
|
||||
// AlignTime returns the time of the next aligned interval.
|
||||
func AlignTime(tm time.Time, interval time.Duration) time.Time {
|
||||
truncated := tm.Truncate(interval)
|
||||
if truncated == tm {
|
||||
return tm
|
||||
}
|
||||
return truncated.Add(interval)
|
||||
}
|
||||
|
||||
// Exit status takes the error from exec.Command
|
||||
// and returns the exit status and true
|
||||
// if error is not exit status, will return 0 and false
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type SnakeTest struct {
|
||||
|
@ -217,3 +218,55 @@ func TestVersionAlreadySet(t *testing.T) {
|
|||
|
||||
assert.Equal(t, "foo", Version())
|
||||
}
|
||||
|
||||
func TestAlignDuration(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
now time.Time
|
||||
interval time.Duration
|
||||
expected time.Duration
|
||||
}{
|
||||
{
|
||||
name: "aligned",
|
||||
now: time.Date(2018, 1, 1, 1, 1, 0, 0, time.UTC),
|
||||
interval: 10 * time.Second,
|
||||
expected: 0 * time.Second,
|
||||
},
|
||||
{
|
||||
name: "standard interval",
|
||||
now: time.Date(2018, 1, 1, 1, 1, 1, 0, time.UTC),
|
||||
interval: 10 * time.Second,
|
||||
expected: 9 * time.Second,
|
||||
},
|
||||
{
|
||||
name: "odd interval",
|
||||
now: time.Date(2018, 1, 1, 1, 1, 1, 0, time.UTC),
|
||||
interval: 3 * time.Second,
|
||||
expected: 2 * time.Second,
|
||||
},
|
||||
{
|
||||
name: "sub second interval",
|
||||
now: time.Date(2018, 1, 1, 1, 1, 0, 5e8, time.UTC),
|
||||
interval: 1 * time.Second,
|
||||
expected: 500 * time.Millisecond,
|
||||
},
|
||||
{
|
||||
name: "non divisible not aligned on minutes",
|
||||
now: time.Date(2018, 1, 1, 1, 0, 0, 0, time.UTC),
|
||||
interval: 1*time.Second + 100*time.Millisecond,
|
||||
expected: 400 * time.Millisecond,
|
||||
},
|
||||
{
|
||||
name: "long interval",
|
||||
now: time.Date(2018, 1, 1, 1, 1, 0, 0, time.UTC),
|
||||
interval: 1 * time.Hour,
|
||||
expected: 59 * time.Minute,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
actual := AlignDuration(tt.now, tt.interval)
|
||||
require.Equal(t, tt.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,214 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/selfstat"
|
||||
)
|
||||
|
||||
var (
|
||||
AgentMetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
|
||||
AgentMetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
|
||||
)
|
||||
|
||||
// Buffer stores metrics in a circular buffer.
|
||||
type Buffer struct {
|
||||
sync.Mutex
|
||||
buf []telegraf.Metric
|
||||
first int // index of the first/oldest metric
|
||||
last int // one after the index of the last/newest metric
|
||||
size int // number of metrics currently in the buffer
|
||||
cap int // the capacity of the buffer
|
||||
|
||||
batchFirst int // index of the first metric in the batch
|
||||
batchLast int // one after the index of the last metric in the batch
|
||||
batchSize int // number of metrics current in the batch
|
||||
|
||||
MetricsAdded selfstat.Stat
|
||||
MetricsWritten selfstat.Stat
|
||||
MetricsDropped selfstat.Stat
|
||||
}
|
||||
|
||||
// NewBuffer returns a new empty Buffer with the given capacity.
|
||||
func NewBuffer(name string, capacity int) *Buffer {
|
||||
b := &Buffer{
|
||||
buf: make([]telegraf.Metric, capacity),
|
||||
first: 0,
|
||||
last: 0,
|
||||
size: 0,
|
||||
cap: capacity,
|
||||
|
||||
MetricsAdded: selfstat.Register(
|
||||
"write",
|
||||
"metrics_added",
|
||||
map[string]string{"output": name},
|
||||
),
|
||||
MetricsWritten: selfstat.Register(
|
||||
"write",
|
||||
"metrics_written",
|
||||
map[string]string{"output": name},
|
||||
),
|
||||
MetricsDropped: selfstat.Register(
|
||||
"write",
|
||||
"metrics_dropped",
|
||||
map[string]string{"output": name},
|
||||
),
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// Len returns the number of metrics currently in the buffer.
|
||||
func (b *Buffer) Len() int {
|
||||
b.Lock()
|
||||
defer b.Unlock()
|
||||
|
||||
return b.size
|
||||
}
|
||||
|
||||
func (b *Buffer) metricAdded() {
|
||||
b.MetricsAdded.Incr(1)
|
||||
}
|
||||
|
||||
func (b *Buffer) metricWritten(metric telegraf.Metric) {
|
||||
AgentMetricsWritten.Incr(1)
|
||||
b.MetricsWritten.Incr(1)
|
||||
metric.Accept()
|
||||
}
|
||||
|
||||
func (b *Buffer) metricDropped(metric telegraf.Metric) {
|
||||
AgentMetricsDropped.Incr(1)
|
||||
b.MetricsDropped.Incr(1)
|
||||
metric.Reject()
|
||||
}
|
||||
|
||||
func (b *Buffer) inBatch() bool {
|
||||
if b.batchSize == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if b.batchFirst < b.batchLast {
|
||||
return b.last >= b.batchFirst && b.last < b.batchLast
|
||||
} else {
|
||||
return b.last >= b.batchFirst || b.last < b.batchLast
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Buffer) add(m telegraf.Metric) {
|
||||
// Check if Buffer is full
|
||||
if b.size == b.cap {
|
||||
if b.batchSize == 0 {
|
||||
// No batch taken by the output, we can drop the metric now.
|
||||
b.metricDropped(b.buf[b.last])
|
||||
} else if b.inBatch() {
|
||||
// There is an outstanding batch and this will overwrite a metric
|
||||
// in it, delay the dropping only in case the batch gets rejected.
|
||||
b.batchSize--
|
||||
b.batchFirst++
|
||||
b.batchFirst %= b.cap
|
||||
} else {
|
||||
// There is an outstanding batch, but this overwrites a metric
|
||||
// outside of it.
|
||||
b.metricDropped(b.buf[b.last])
|
||||
}
|
||||
}
|
||||
|
||||
b.metricAdded()
|
||||
|
||||
b.buf[b.last] = m
|
||||
b.last++
|
||||
b.last %= b.cap
|
||||
|
||||
if b.size == b.cap {
|
||||
b.first++
|
||||
b.first %= b.cap
|
||||
}
|
||||
|
||||
b.size = min(b.size+1, b.cap)
|
||||
}
|
||||
|
||||
// Add adds metrics to the buffer
|
||||
func (b *Buffer) Add(metrics ...telegraf.Metric) {
|
||||
b.Lock()
|
||||
defer b.Unlock()
|
||||
|
||||
for i := range metrics {
|
||||
b.add(metrics[i])
|
||||
}
|
||||
}
|
||||
|
||||
// Batch returns a slice containing up to batchSize of the most recently added
|
||||
// metrics.
|
||||
//
|
||||
// The metrics contained in the batch are not removed from the buffer, instead
|
||||
// the last batch is recorded and removed only if Accept is called.
|
||||
func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
|
||||
b.Lock()
|
||||
defer b.Unlock()
|
||||
|
||||
outLen := min(b.size, batchSize)
|
||||
out := make([]telegraf.Metric, outLen)
|
||||
if outLen == 0 {
|
||||
return out
|
||||
}
|
||||
|
||||
b.batchFirst = b.first
|
||||
b.batchLast = b.first + outLen
|
||||
b.batchLast %= b.cap
|
||||
b.batchSize = outLen
|
||||
|
||||
until := min(b.cap, b.first+outLen)
|
||||
|
||||
n := copy(out, b.buf[b.first:until])
|
||||
if n < outLen {
|
||||
copy(out[n:], b.buf[:outLen-n])
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Accept removes the metrics contained in the last batch.
|
||||
func (b *Buffer) Accept(batch []telegraf.Metric) {
|
||||
b.Lock()
|
||||
defer b.Unlock()
|
||||
|
||||
for _, m := range batch {
|
||||
b.metricWritten(m)
|
||||
}
|
||||
|
||||
if b.batchSize > 0 {
|
||||
b.size -= b.batchSize
|
||||
b.first += b.batchSize
|
||||
b.first %= b.cap
|
||||
}
|
||||
|
||||
b.resetBatch()
|
||||
}
|
||||
|
||||
// Reject clears the current batch record so that calls to Accept will have no
|
||||
// effect.
|
||||
func (b *Buffer) Reject(batch []telegraf.Metric) {
|
||||
b.Lock()
|
||||
defer b.Unlock()
|
||||
|
||||
if len(batch) > b.batchSize {
|
||||
// Part or all of the batch was dropped before reject was called.
|
||||
for _, m := range batch[b.batchSize:] {
|
||||
b.metricDropped(m)
|
||||
}
|
||||
}
|
||||
|
||||
b.resetBatch()
|
||||
}
|
||||
|
||||
func (b *Buffer) resetBatch() {
|
||||
b.batchFirst = 0
|
||||
b.batchLast = 0
|
||||
b.batchSize = 0
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if b < a {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
|
@ -0,0 +1,385 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type MockMetric struct {
|
||||
telegraf.Metric
|
||||
AcceptF func()
|
||||
RejectF func()
|
||||
DropF func()
|
||||
}
|
||||
|
||||
func (m *MockMetric) Accept() {
|
||||
m.AcceptF()
|
||||
}
|
||||
|
||||
func (m *MockMetric) Reject() {
|
||||
m.RejectF()
|
||||
}
|
||||
|
||||
func (m *MockMetric) Drop() {
|
||||
m.DropF()
|
||||
}
|
||||
|
||||
func Metric() telegraf.Metric {
|
||||
m, err := metric.New(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42.0,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func BenchmarkAddMetrics(b *testing.B) {
|
||||
buf := NewBuffer("test", 10000)
|
||||
m := Metric()
|
||||
for n := 0; n < b.N; n++ {
|
||||
buf.Add(m)
|
||||
}
|
||||
}
|
||||
|
||||
func setup(b *Buffer) *Buffer {
|
||||
b.MetricsAdded.Set(0)
|
||||
b.MetricsWritten.Set(0)
|
||||
b.MetricsDropped.Set(0)
|
||||
return b
|
||||
}
|
||||
|
||||
func TestBuffer_LenEmpty(t *testing.T) {
|
||||
b := setup(NewBuffer("test", 5))
|
||||
|
||||
require.Equal(t, 0, b.Len())
|
||||
}
|
||||
|
||||
func TestBuffer_LenOne(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m)
|
||||
|
||||
require.Equal(t, 1, b.Len())
|
||||
}
|
||||
|
||||
func TestBuffer_LenFull(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m, m, m, m, m)
|
||||
|
||||
require.Equal(t, 5, b.Len())
|
||||
}
|
||||
|
||||
func TestBuffer_LenOverfill(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
setup(b)
|
||||
b.Add(m, m, m, m, m, m)
|
||||
|
||||
require.Equal(t, 5, b.Len())
|
||||
}
|
||||
|
||||
func TestBuffer_BatchLenZero(t *testing.T) {
|
||||
b := setup(NewBuffer("test", 5))
|
||||
batch := b.Batch(0)
|
||||
|
||||
require.Len(t, batch, 0)
|
||||
}
|
||||
|
||||
func TestBuffer_BatchLenBufferEmpty(t *testing.T) {
|
||||
b := setup(NewBuffer("test", 5))
|
||||
batch := b.Batch(2)
|
||||
|
||||
require.Len(t, batch, 0)
|
||||
}
|
||||
|
||||
func TestBuffer_BatchLenUnderfill(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m)
|
||||
batch := b.Batch(2)
|
||||
|
||||
require.Len(t, batch, 1)
|
||||
}
|
||||
|
||||
func TestBuffer_BatchLenFill(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m, m, m)
|
||||
batch := b.Batch(2)
|
||||
require.Len(t, batch, 2)
|
||||
}
|
||||
|
||||
func TestBuffer_BatchLenExact(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m, m)
|
||||
batch := b.Batch(2)
|
||||
require.Len(t, batch, 2)
|
||||
}
|
||||
|
||||
func TestBuffer_BatchLenLargerThanBuffer(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m, m, m, m, m)
|
||||
batch := b.Batch(6)
|
||||
require.Len(t, batch, 5)
|
||||
}
|
||||
|
||||
func TestBuffer_BatchWrap(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m, m, m, m, m)
|
||||
batch := b.Batch(2)
|
||||
b.Accept(batch)
|
||||
b.Add(m, m)
|
||||
batch = b.Batch(5)
|
||||
require.Len(t, batch, 5)
|
||||
}
|
||||
|
||||
func TestBuffer_AddDropsOverwrittenMetrics(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
|
||||
b.Add(m, m, m, m, m)
|
||||
b.Add(m, m, m, m, m)
|
||||
|
||||
require.Equal(t, int64(5), b.MetricsDropped.Get())
|
||||
require.Equal(t, int64(0), b.MetricsWritten.Get())
|
||||
}
|
||||
|
||||
func TestBuffer_AcceptRemovesBatch(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m, m, m)
|
||||
batch := b.Batch(2)
|
||||
b.Accept(batch)
|
||||
require.Equal(t, 1, b.Len())
|
||||
}
|
||||
|
||||
func TestBuffer_RejectLeavesBatch(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m, m, m)
|
||||
batch := b.Batch(2)
|
||||
b.Reject(batch)
|
||||
require.Equal(t, 3, b.Len())
|
||||
}
|
||||
|
||||
func TestBuffer_AcceptWritesOverwrittenBatch(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
|
||||
b.Add(m, m, m, m, m)
|
||||
batch := b.Batch(5)
|
||||
b.Add(m, m, m, m, m)
|
||||
b.Accept(batch)
|
||||
|
||||
require.Equal(t, int64(0), b.MetricsDropped.Get())
|
||||
require.Equal(t, int64(5), b.MetricsWritten.Get())
|
||||
}
|
||||
|
||||
func TestBuffer_BatchRejectDropsOverwrittenBatch(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
|
||||
b.Add(m, m, m, m, m)
|
||||
batch := b.Batch(5)
|
||||
b.Add(m, m, m, m, m)
|
||||
b.Reject(batch)
|
||||
|
||||
require.Equal(t, int64(5), b.MetricsDropped.Get())
|
||||
require.Equal(t, int64(0), b.MetricsWritten.Get())
|
||||
}
|
||||
|
||||
func TestBuffer_MetricsOverwriteBatchAccept(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
|
||||
b.Add(m, m, m, m, m)
|
||||
batch := b.Batch(3)
|
||||
b.Add(m, m, m)
|
||||
b.Accept(batch)
|
||||
require.Equal(t, int64(0), b.MetricsDropped.Get())
|
||||
require.Equal(t, int64(3), b.MetricsWritten.Get())
|
||||
}
|
||||
|
||||
func TestBuffer_MetricsOverwriteBatchReject(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
|
||||
b.Add(m, m, m, m, m)
|
||||
batch := b.Batch(3)
|
||||
b.Add(m, m, m)
|
||||
b.Reject(batch)
|
||||
require.Equal(t, int64(3), b.MetricsDropped.Get())
|
||||
require.Equal(t, int64(0), b.MetricsWritten.Get())
|
||||
}
|
||||
|
||||
func TestBuffer_MetricsBatchAcceptRemoved(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
|
||||
b.Add(m, m, m, m, m)
|
||||
batch := b.Batch(3)
|
||||
b.Add(m, m, m, m, m)
|
||||
b.Accept(batch)
|
||||
require.Equal(t, int64(2), b.MetricsDropped.Get())
|
||||
require.Equal(t, int64(3), b.MetricsWritten.Get())
|
||||
}
|
||||
|
||||
func TestBuffer_WrapWithBatch(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
|
||||
b.Add(m, m, m)
|
||||
b.Batch(3)
|
||||
b.Add(m, m, m, m, m, m)
|
||||
|
||||
require.Equal(t, int64(1), b.MetricsDropped.Get())
|
||||
}
|
||||
|
||||
func TestBuffer_BatchNotRemoved(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m, m, m, m, m)
|
||||
b.Batch(2)
|
||||
require.Equal(t, 5, b.Len())
|
||||
}
|
||||
|
||||
func TestBuffer_BatchRejectAcceptNoop(t *testing.T) {
|
||||
m := Metric()
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(m, m, m, m, m)
|
||||
batch := b.Batch(2)
|
||||
b.Reject(batch)
|
||||
b.Accept(batch)
|
||||
require.Equal(t, 5, b.Len())
|
||||
}
|
||||
|
||||
func TestBuffer_AcceptCallsMetricAccept(t *testing.T) {
|
||||
var accept int
|
||||
mm := &MockMetric{
|
||||
Metric: Metric(),
|
||||
AcceptF: func() {
|
||||
accept++
|
||||
},
|
||||
}
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(mm, mm, mm)
|
||||
batch := b.Batch(2)
|
||||
b.Accept(batch)
|
||||
require.Equal(t, 2, accept)
|
||||
}
|
||||
|
||||
func TestBuffer_AddCallsMetricRejectWhenNoBatch(t *testing.T) {
|
||||
var reject int
|
||||
mm := &MockMetric{
|
||||
Metric: Metric(),
|
||||
RejectF: func() {
|
||||
reject++
|
||||
},
|
||||
}
|
||||
b := setup(NewBuffer("test", 5))
|
||||
setup(b)
|
||||
b.Add(mm, mm, mm, mm, mm)
|
||||
b.Add(mm, mm)
|
||||
require.Equal(t, 2, reject)
|
||||
}
|
||||
|
||||
func TestBuffer_AddCallsMetricRejectWhenNotInBatch(t *testing.T) {
|
||||
var reject int
|
||||
mm := &MockMetric{
|
||||
Metric: Metric(),
|
||||
RejectF: func() {
|
||||
reject++
|
||||
},
|
||||
}
|
||||
b := setup(NewBuffer("test", 5))
|
||||
setup(b)
|
||||
b.Add(mm, mm, mm, mm, mm)
|
||||
batch := b.Batch(2)
|
||||
b.Add(mm, mm, mm, mm)
|
||||
// metric[2] and metric[3] rejected
|
||||
require.Equal(t, 2, reject)
|
||||
b.Reject(batch)
|
||||
// metric[1] and metric[2] now rejected
|
||||
require.Equal(t, 4, reject)
|
||||
}
|
||||
|
||||
func TestBuffer_RejectCallsMetricRejectWithOverwritten(t *testing.T) {
|
||||
var reject int
|
||||
mm := &MockMetric{
|
||||
Metric: Metric(),
|
||||
RejectF: func() {
|
||||
reject++
|
||||
},
|
||||
}
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(mm, mm, mm, mm, mm)
|
||||
batch := b.Batch(5)
|
||||
b.Add(mm, mm)
|
||||
require.Equal(t, 0, reject)
|
||||
b.Reject(batch)
|
||||
require.Equal(t, 2, reject)
|
||||
}
|
||||
|
||||
func TestBuffer_AddOverwriteAndReject(t *testing.T) {
|
||||
var reject int
|
||||
mm := &MockMetric{
|
||||
Metric: Metric(),
|
||||
RejectF: func() {
|
||||
reject++
|
||||
},
|
||||
}
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(mm, mm, mm, mm, mm)
|
||||
batch := b.Batch(5)
|
||||
b.Add(mm, mm, mm, mm, mm)
|
||||
b.Add(mm, mm, mm, mm, mm)
|
||||
b.Add(mm, mm, mm, mm, mm)
|
||||
b.Add(mm, mm, mm, mm, mm)
|
||||
require.Equal(t, 15, reject)
|
||||
b.Reject(batch)
|
||||
require.Equal(t, 20, reject)
|
||||
}
|
||||
|
||||
func TestBuffer_AddOverwriteAndRejectOffset(t *testing.T) {
|
||||
var reject int
|
||||
var accept int
|
||||
mm := &MockMetric{
|
||||
Metric: Metric(),
|
||||
RejectF: func() {
|
||||
reject++
|
||||
},
|
||||
AcceptF: func() {
|
||||
accept++
|
||||
},
|
||||
}
|
||||
b := setup(NewBuffer("test", 5))
|
||||
b.Add(mm, mm, mm)
|
||||
b.Add(mm, mm, mm, mm)
|
||||
require.Equal(t, 2, reject)
|
||||
batch := b.Batch(5)
|
||||
b.Add(mm, mm, mm, mm)
|
||||
require.Equal(t, 2, reject)
|
||||
b.Add(mm, mm, mm, mm)
|
||||
require.Equal(t, 5, reject)
|
||||
b.Add(mm, mm, mm, mm)
|
||||
require.Equal(t, 9, reject)
|
||||
b.Add(mm, mm, mm, mm)
|
||||
require.Equal(t, 13, reject)
|
||||
b.Accept(batch)
|
||||
require.Equal(t, 13, reject)
|
||||
require.Equal(t, 5, accept)
|
||||
}
|
|
@ -6,6 +6,7 @@ import (
|
|||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
|
@ -480,3 +481,45 @@ func TestFilter_FilterTagsPassAndDrop(t *testing.T) {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkFilter(b *testing.B) {
|
||||
tests := []struct {
|
||||
name string
|
||||
filter Filter
|
||||
metric telegraf.Metric
|
||||
}{
|
||||
{
|
||||
name: "empty filter",
|
||||
filter: Filter{},
|
||||
metric: testutil.MustMetric("cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
{
|
||||
name: "namepass",
|
||||
filter: Filter{
|
||||
NamePass: []string{"cpu"},
|
||||
},
|
||||
metric: testutil.MustMetric("cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
b.Run(tt.name, func(b *testing.B) {
|
||||
require.NoError(b, tt.filter.Compile())
|
||||
for n := 0; n < b.N; n++ {
|
||||
tt.filter.Select(tt.metric)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,30 +1,53 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/selfstat"
|
||||
)
|
||||
|
||||
type RunningAggregator struct {
|
||||
a telegraf.Aggregator
|
||||
Config *AggregatorConfig
|
||||
|
||||
metrics chan telegraf.Metric
|
||||
|
||||
sync.Mutex
|
||||
Aggregator telegraf.Aggregator
|
||||
Config *AggregatorConfig
|
||||
periodStart time.Time
|
||||
periodEnd time.Time
|
||||
|
||||
MetricsPushed selfstat.Stat
|
||||
MetricsFiltered selfstat.Stat
|
||||
MetricsDropped selfstat.Stat
|
||||
PushTime selfstat.Stat
|
||||
}
|
||||
|
||||
func NewRunningAggregator(
|
||||
a telegraf.Aggregator,
|
||||
conf *AggregatorConfig,
|
||||
aggregator telegraf.Aggregator,
|
||||
config *AggregatorConfig,
|
||||
) *RunningAggregator {
|
||||
return &RunningAggregator{
|
||||
a: a,
|
||||
Config: conf,
|
||||
metrics: make(chan telegraf.Metric, 100),
|
||||
Aggregator: aggregator,
|
||||
Config: config,
|
||||
MetricsPushed: selfstat.Register(
|
||||
"aggregate",
|
||||
"metrics_pushed",
|
||||
map[string]string{"aggregator": config.Name},
|
||||
),
|
||||
MetricsFiltered: selfstat.Register(
|
||||
"aggregate",
|
||||
"metrics_filtered",
|
||||
map[string]string{"aggregator": config.Name},
|
||||
),
|
||||
MetricsDropped: selfstat.Register(
|
||||
"aggregate",
|
||||
"metrics_dropped",
|
||||
map[string]string{"aggregator": config.Name},
|
||||
),
|
||||
PushTime: selfstat.Register(
|
||||
"aggregate",
|
||||
"push_time_ns",
|
||||
map[string]string{"aggregator": config.Name},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -46,6 +69,15 @@ func (r *RunningAggregator) Name() string {
|
|||
return "aggregators." + r.Config.Name
|
||||
}
|
||||
|
||||
func (r *RunningAggregator) Period() time.Duration {
|
||||
return r.Config.Period
|
||||
}
|
||||
|
||||
func (r *RunningAggregator) SetPeriodStart(start time.Time) {
|
||||
r.periodStart = start
|
||||
r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
|
||||
}
|
||||
|
||||
func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
|
||||
m := makemetric(
|
||||
metric,
|
||||
|
@ -59,9 +91,21 @@ func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
|
|||
m.SetAggregate(true)
|
||||
}
|
||||
|
||||
r.MetricsPushed.Incr(1)
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
func (r *RunningAggregator) metricFiltered(metric telegraf.Metric) {
|
||||
r.MetricsFiltered.Incr(1)
|
||||
metric.Accept()
|
||||
}
|
||||
|
||||
func (r *RunningAggregator) metricDropped(metric telegraf.Metric) {
|
||||
r.MetricsDropped.Incr(1)
|
||||
metric.Accept()
|
||||
}
|
||||
|
||||
// Add a metric to the aggregator and return true if the original metric
|
||||
// should be dropped.
|
||||
func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
|
||||
|
@ -74,75 +118,31 @@ func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
|
|||
return r.Config.DropOriginal
|
||||
}
|
||||
|
||||
r.metrics <- metric
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
|
||||
if r.periodStart.IsZero() || metric.Time().Before(r.periodStart) || metric.Time().After(r.periodEnd) {
|
||||
r.metricDropped(metric)
|
||||
return false
|
||||
}
|
||||
|
||||
r.Aggregator.Add(metric)
|
||||
return r.Config.DropOriginal
|
||||
}
|
||||
|
||||
func (r *RunningAggregator) add(in telegraf.Metric) {
|
||||
r.a.Add(in)
|
||||
func (r *RunningAggregator) Push(acc telegraf.Accumulator) {
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
|
||||
r.periodStart = r.periodEnd
|
||||
r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
|
||||
r.push(acc)
|
||||
r.Aggregator.Reset()
|
||||
}
|
||||
|
||||
func (r *RunningAggregator) push(acc telegraf.Accumulator) {
|
||||
r.a.Push(acc)
|
||||
}
|
||||
|
||||
func (r *RunningAggregator) reset() {
|
||||
r.a.Reset()
|
||||
}
|
||||
|
||||
// Run runs the running aggregator, listens for incoming metrics, and waits
|
||||
// for period ticks to tell it when to push and reset the aggregator.
|
||||
func (r *RunningAggregator) Run(
|
||||
acc telegraf.Accumulator,
|
||||
shutdown chan struct{},
|
||||
) {
|
||||
// The start of the period is truncated to the nearest second.
|
||||
//
|
||||
// Every metric then gets it's timestamp checked and is dropped if it
|
||||
// is not within:
|
||||
//
|
||||
// start < t < end + truncation + delay
|
||||
//
|
||||
// So if we start at now = 00:00.2 with a 10s period and 0.3s delay:
|
||||
// now = 00:00.2
|
||||
// start = 00:00
|
||||
// truncation = 00:00.2
|
||||
// end = 00:10
|
||||
// 1st interval: 00:00 - 00:10.5
|
||||
// 2nd interval: 00:10 - 00:20.5
|
||||
// etc.
|
||||
//
|
||||
now := time.Now()
|
||||
r.periodStart = now.Truncate(time.Second)
|
||||
truncation := now.Sub(r.periodStart)
|
||||
r.periodEnd = r.periodStart.Add(r.Config.Period)
|
||||
time.Sleep(r.Config.Delay)
|
||||
periodT := time.NewTicker(r.Config.Period)
|
||||
defer periodT.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-shutdown:
|
||||
if len(r.metrics) > 0 {
|
||||
// wait until metrics are flushed before exiting
|
||||
continue
|
||||
}
|
||||
return
|
||||
case m := <-r.metrics:
|
||||
if m.Time().Before(r.periodStart) ||
|
||||
m.Time().After(r.periodEnd.Add(truncation).Add(r.Config.Delay)) {
|
||||
// the metric is outside the current aggregation period, so
|
||||
// skip it.
|
||||
log.Printf("D! aggregator: metric \"%s\" is not in the current timewindow, skipping", m.Name())
|
||||
continue
|
||||
}
|
||||
r.add(m)
|
||||
case <-periodT.C:
|
||||
r.periodStart = r.periodEnd
|
||||
r.periodEnd = r.periodStart.Add(r.Config.Period)
|
||||
r.push(acc)
|
||||
r.reset()
|
||||
}
|
||||
}
|
||||
start := time.Now()
|
||||
r.Aggregator.Push(acc)
|
||||
elapsed := time.Since(start)
|
||||
r.PushTime.Incr(elapsed.Nanoseconds())
|
||||
}
|
||||
|
|
|
@ -1,16 +1,13 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
|
@ -23,28 +20,24 @@ func TestAdd(t *testing.T) {
|
|||
},
|
||||
Period: time.Millisecond * 500,
|
||||
})
|
||||
assert.NoError(t, ra.Config.Filter.Compile())
|
||||
require.NoError(t, ra.Config.Filter.Compile())
|
||||
acc := testutil.Accumulator{}
|
||||
go ra.Run(&acc, make(chan struct{}))
|
||||
|
||||
m, err := metric.New("RITest",
|
||||
now := time.Now()
|
||||
ra.SetPeriodStart(now)
|
||||
|
||||
m := testutil.MustMetric("RITest",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": int64(101),
|
||||
},
|
||||
time.Now().Add(time.Millisecond*150),
|
||||
telegraf.Untyped)
|
||||
require.NoError(t, err)
|
||||
require.False(t, ra.Add(m))
|
||||
ra.Push(&acc)
|
||||
|
||||
assert.False(t, ra.Add(m))
|
||||
|
||||
for {
|
||||
time.Sleep(time.Millisecond)
|
||||
if atomic.LoadInt64(&a.sum) > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
|
||||
require.Equal(t, 1, len(acc.Metrics))
|
||||
require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
|
||||
}
|
||||
|
||||
func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
|
||||
|
@ -56,50 +49,45 @@ func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
|
|||
},
|
||||
Period: time.Millisecond * 500,
|
||||
})
|
||||
assert.NoError(t, ra.Config.Filter.Compile())
|
||||
require.NoError(t, ra.Config.Filter.Compile())
|
||||
acc := testutil.Accumulator{}
|
||||
go ra.Run(&acc, make(chan struct{}))
|
||||
now := time.Now()
|
||||
ra.SetPeriodStart(now)
|
||||
|
||||
m, err := metric.New("RITest",
|
||||
m := testutil.MustMetric("RITest",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": int64(101),
|
||||
},
|
||||
time.Now().Add(-time.Hour),
|
||||
telegraf.Untyped)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.False(t, ra.Add(m))
|
||||
now.Add(-time.Hour),
|
||||
telegraf.Untyped,
|
||||
)
|
||||
require.False(t, ra.Add(m))
|
||||
|
||||
// metric after current period
|
||||
m, err = metric.New("RITest",
|
||||
m = testutil.MustMetric("RITest",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": int64(101),
|
||||
},
|
||||
time.Now().Add(time.Hour),
|
||||
telegraf.Untyped)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, ra.Add(m))
|
||||
now.Add(time.Hour),
|
||||
telegraf.Untyped,
|
||||
)
|
||||
require.False(t, ra.Add(m))
|
||||
|
||||
// "now" metric
|
||||
m, err = metric.New("RITest",
|
||||
m = testutil.MustMetric("RITest",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": int64(101),
|
||||
},
|
||||
time.Now().Add(time.Millisecond*50),
|
||||
telegraf.Untyped)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, ra.Add(m))
|
||||
require.False(t, ra.Add(m))
|
||||
|
||||
for {
|
||||
time.Sleep(time.Millisecond)
|
||||
if atomic.LoadInt64(&a.sum) > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
|
||||
ra.Push(&acc)
|
||||
require.Equal(t, 1, len(acc.Metrics))
|
||||
require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
|
||||
}
|
||||
|
||||
func TestAddAndPushOnePeriod(t *testing.T) {
|
||||
|
@ -111,37 +99,24 @@ func TestAddAndPushOnePeriod(t *testing.T) {
|
|||
},
|
||||
Period: time.Millisecond * 500,
|
||||
})
|
||||
assert.NoError(t, ra.Config.Filter.Compile())
|
||||
require.NoError(t, ra.Config.Filter.Compile())
|
||||
acc := testutil.Accumulator{}
|
||||
shutdown := make(chan struct{})
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
ra.Run(&acc, shutdown)
|
||||
}()
|
||||
now := time.Now()
|
||||
ra.SetPeriodStart(now)
|
||||
|
||||
m, err := metric.New("RITest",
|
||||
m := testutil.MustMetric("RITest",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": int64(101),
|
||||
},
|
||||
time.Now().Add(time.Millisecond*100),
|
||||
telegraf.Untyped)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, ra.Add(m))
|
||||
require.False(t, ra.Add(m))
|
||||
|
||||
ra.Push(&acc)
|
||||
|
||||
for {
|
||||
time.Sleep(time.Millisecond)
|
||||
if acc.NMetrics() > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
acc.AssertContainsFields(t, "TestMetric", map[string]interface{}{"sum": int64(101)})
|
||||
|
||||
close(shutdown)
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestAddDropOriginal(t *testing.T) {
|
||||
|
@ -152,28 +127,29 @@ func TestAddDropOriginal(t *testing.T) {
|
|||
},
|
||||
DropOriginal: true,
|
||||
})
|
||||
assert.NoError(t, ra.Config.Filter.Compile())
|
||||
require.NoError(t, ra.Config.Filter.Compile())
|
||||
|
||||
m, err := metric.New("RITest",
|
||||
now := time.Now()
|
||||
ra.SetPeriodStart(now)
|
||||
|
||||
m := testutil.MustMetric("RITest",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": int64(101),
|
||||
},
|
||||
time.Now(),
|
||||
now,
|
||||
telegraf.Untyped)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, ra.Add(m))
|
||||
require.True(t, ra.Add(m))
|
||||
|
||||
// this metric name doesn't match the filter, so Add will return false
|
||||
m2, err := metric.New("foobar",
|
||||
m2 := testutil.MustMetric("foobar",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": int64(101),
|
||||
},
|
||||
time.Now(),
|
||||
now,
|
||||
telegraf.Untyped)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, ra.Add(m2))
|
||||
require.False(t, ra.Add(m2))
|
||||
}
|
||||
|
||||
type TestAggregator struct {
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/serializers/influx"
|
||||
"github.com/influxdata/telegraf/selfstat"
|
||||
)
|
||||
|
||||
|
@ -15,16 +13,13 @@ type RunningInput struct {
|
|||
Input telegraf.Input
|
||||
Config *InputConfig
|
||||
|
||||
trace bool
|
||||
defaultTags map[string]string
|
||||
|
||||
MetricsGathered selfstat.Stat
|
||||
GatherTime selfstat.Stat
|
||||
}
|
||||
|
||||
func NewRunningInput(
|
||||
input telegraf.Input,
|
||||
config *InputConfig,
|
||||
) *RunningInput {
|
||||
func NewRunningInput(input telegraf.Input, config *InputConfig) *RunningInput {
|
||||
return &RunningInput{
|
||||
Input: input,
|
||||
Config: config,
|
||||
|
@ -33,6 +28,11 @@ func NewRunningInput(
|
|||
"metrics_gathered",
|
||||
map[string]string{"input": config.Name},
|
||||
),
|
||||
GatherTime: selfstat.RegisterTiming(
|
||||
"gather",
|
||||
"gather_time_ns",
|
||||
map[string]string{"input": config.Name},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,13 +52,19 @@ func (r *RunningInput) Name() string {
|
|||
return "inputs." + r.Config.Name
|
||||
}
|
||||
|
||||
func (r *RunningInput) metricFiltered(metric telegraf.Metric) {
|
||||
metric.Drop()
|
||||
}
|
||||
|
||||
func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
|
||||
if ok := r.Config.Filter.Select(metric); !ok {
|
||||
r.metricFiltered(metric)
|
||||
return nil
|
||||
}
|
||||
|
||||
r.Config.Filter.Modify(metric)
|
||||
if len(metric.FieldList()) == 0 {
|
||||
r.metricFiltered(metric)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -70,26 +76,17 @@ func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
|
|||
r.Config.Tags,
|
||||
r.defaultTags)
|
||||
|
||||
if r.trace && m != nil {
|
||||
s := influx.NewSerializer()
|
||||
s.SetFieldSortOrder(influx.SortFields)
|
||||
octets, err := s.Serialize(m)
|
||||
if err == nil {
|
||||
fmt.Print("> " + string(octets))
|
||||
}
|
||||
}
|
||||
|
||||
r.MetricsGathered.Incr(1)
|
||||
GlobalMetricsGathered.Incr(1)
|
||||
return m
|
||||
}
|
||||
|
||||
func (r *RunningInput) Trace() bool {
|
||||
return r.trace
|
||||
}
|
||||
|
||||
func (r *RunningInput) SetTrace(trace bool) {
|
||||
r.trace = trace
|
||||
func (r *RunningInput) Gather(acc telegraf.Accumulator) error {
|
||||
start := time.Now()
|
||||
err := r.Input.Gather(acc)
|
||||
elapsed := time.Since(start)
|
||||
r.GatherTime.Incr(elapsed.Nanoseconds())
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *RunningInput) SetDefaultTags(tags map[string]string) {
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
@ -66,17 +67,13 @@ func TestMakeMetricWithPluginTags(t *testing.T) {
|
|||
},
|
||||
})
|
||||
|
||||
ri.SetTrace(true)
|
||||
assert.Equal(t, true, ri.Trace())
|
||||
|
||||
m, err := metric.New("RITest",
|
||||
m := testutil.MustMetric("RITest",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": int64(101),
|
||||
},
|
||||
now,
|
||||
telegraf.Untyped)
|
||||
require.NoError(t, err)
|
||||
m = ri.MakeMetric(m)
|
||||
|
||||
expected, err := metric.New("RITest",
|
||||
|
@ -102,8 +99,6 @@ func TestMakeMetricFilteredOut(t *testing.T) {
|
|||
Filter: Filter{NamePass: []string{"foobar"}},
|
||||
})
|
||||
|
||||
ri.SetTrace(true)
|
||||
assert.Equal(t, true, ri.Trace())
|
||||
assert.NoError(t, ri.Config.Filter.Compile())
|
||||
|
||||
m, err := metric.New("RITest",
|
||||
|
@ -127,17 +122,13 @@ func TestMakeMetricWithDaemonTags(t *testing.T) {
|
|||
"foo": "bar",
|
||||
})
|
||||
|
||||
ri.SetTrace(true)
|
||||
assert.Equal(t, true, ri.Trace())
|
||||
|
||||
m, err := metric.New("RITest",
|
||||
m := testutil.MustMetric("RITest",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": int64(101),
|
||||
},
|
||||
now,
|
||||
telegraf.Untyped)
|
||||
require.NoError(t, err)
|
||||
m = ri.MakeMetric(m)
|
||||
expected, err := metric.New("RITest",
|
||||
map[string]string{
|
||||
|
|
|
@ -6,7 +6,6 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal/buffer"
|
||||
"github.com/influxdata/telegraf/selfstat"
|
||||
)
|
||||
|
||||
|
@ -18,6 +17,16 @@ const (
|
|||
DEFAULT_METRIC_BUFFER_LIMIT = 10000
|
||||
)
|
||||
|
||||
// OutputConfig containing name and filter
|
||||
type OutputConfig struct {
|
||||
Name string
|
||||
Filter Filter
|
||||
|
||||
FlushInterval time.Duration
|
||||
MetricBufferLimit int
|
||||
MetricBatchSize int
|
||||
}
|
||||
|
||||
// RunningOutput contains the output configuration
|
||||
type RunningOutput struct {
|
||||
Name string
|
||||
|
@ -27,24 +36,16 @@ type RunningOutput struct {
|
|||
MetricBatchSize int
|
||||
|
||||
MetricsFiltered selfstat.Stat
|
||||
MetricsWritten selfstat.Stat
|
||||
BufferSize selfstat.Stat
|
||||
BufferLimit selfstat.Stat
|
||||
WriteTime selfstat.Stat
|
||||
|
||||
metrics *buffer.Buffer
|
||||
failMetrics *buffer.Buffer
|
||||
batch []telegraf.Metric
|
||||
buffer *Buffer
|
||||
BatchReady chan time.Time
|
||||
|
||||
// Guards against concurrent calls to Add, Push, Reset
|
||||
aggMutex sync.Mutex
|
||||
// Guards against concurrent calls to the Output as described in #3009
|
||||
writeMutex sync.Mutex
|
||||
}
|
||||
|
||||
// OutputConfig containing name and filter
|
||||
type OutputConfig struct {
|
||||
Name string
|
||||
Filter Filter
|
||||
aggMutex sync.Mutex
|
||||
batchMutex sync.Mutex
|
||||
}
|
||||
|
||||
func NewRunningOutput(
|
||||
|
@ -54,25 +55,27 @@ func NewRunningOutput(
|
|||
batchSize int,
|
||||
bufferLimit int,
|
||||
) *RunningOutput {
|
||||
if conf.MetricBufferLimit > 0 {
|
||||
bufferLimit = conf.MetricBufferLimit
|
||||
}
|
||||
if bufferLimit == 0 {
|
||||
bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
|
||||
}
|
||||
if conf.MetricBatchSize > 0 {
|
||||
batchSize = conf.MetricBatchSize
|
||||
}
|
||||
if batchSize == 0 {
|
||||
batchSize = DEFAULT_METRIC_BATCH_SIZE
|
||||
}
|
||||
ro := &RunningOutput{
|
||||
Name: name,
|
||||
metrics: buffer.NewBuffer(batchSize),
|
||||
failMetrics: buffer.NewBuffer(bufferLimit),
|
||||
batch: make([]telegraf.Metric, 0, batchSize),
|
||||
buffer: NewBuffer(name, bufferLimit),
|
||||
BatchReady: make(chan time.Time, 1),
|
||||
Output: output,
|
||||
Config: conf,
|
||||
MetricBufferLimit: bufferLimit,
|
||||
MetricBatchSize: batchSize,
|
||||
MetricsWritten: selfstat.Register(
|
||||
"write",
|
||||
"metrics_written",
|
||||
map[string]string{"output": name},
|
||||
),
|
||||
MetricsFiltered: selfstat.Register(
|
||||
"write",
|
||||
"metrics_filtered",
|
||||
|
@ -94,20 +97,28 @@ func NewRunningOutput(
|
|||
map[string]string{"output": name},
|
||||
),
|
||||
}
|
||||
|
||||
ro.BufferLimit.Set(int64(ro.MetricBufferLimit))
|
||||
return ro
|
||||
}
|
||||
|
||||
// AddMetric adds a metric to the output. This function can also write cached
|
||||
// points if FlushBufferWhenFull is true.
|
||||
func (ro *RunningOutput) metricFiltered(metric telegraf.Metric) {
|
||||
ro.MetricsFiltered.Incr(1)
|
||||
metric.Drop()
|
||||
}
|
||||
|
||||
// AddMetric adds a metric to the output.
|
||||
//
|
||||
// Takes ownership of metric
|
||||
func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
|
||||
if ok := ro.Config.Filter.Select(metric); !ok {
|
||||
ro.MetricsFiltered.Incr(1)
|
||||
ro.metricFiltered(metric)
|
||||
return
|
||||
}
|
||||
|
||||
ro.Config.Filter.Modify(metric)
|
||||
if len(metric.FieldList()) == 0 {
|
||||
ro.metricFiltered(metric)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -118,85 +129,98 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
|
|||
return
|
||||
}
|
||||
|
||||
ro.metrics.Add(metric)
|
||||
if ro.metrics.Len() == ro.MetricBatchSize {
|
||||
batch := ro.metrics.Batch(ro.MetricBatchSize)
|
||||
err := ro.write(batch)
|
||||
if err != nil {
|
||||
ro.failMetrics.Add(batch...)
|
||||
log.Printf("E! Error writing to output [%s]: %v", ro.Name, err)
|
||||
ro.batchMutex.Lock()
|
||||
|
||||
ro.batch = append(ro.batch, metric)
|
||||
if len(ro.batch) == ro.MetricBatchSize {
|
||||
ro.addBatchToBuffer()
|
||||
|
||||
nBuffer := ro.buffer.Len()
|
||||
ro.BufferSize.Set(int64(nBuffer))
|
||||
|
||||
select {
|
||||
case ro.BatchReady <- time.Now():
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
ro.batchMutex.Unlock()
|
||||
}
|
||||
|
||||
// Write writes all cached points to this output.
|
||||
// AddBatchToBuffer moves the metrics from the batch into the metric buffer.
|
||||
func (ro *RunningOutput) addBatchToBuffer() {
|
||||
ro.buffer.Add(ro.batch...)
|
||||
ro.batch = ro.batch[:0]
|
||||
}
|
||||
|
||||
// Write writes all metrics to the output, stopping when all have been sent on
|
||||
// or error.
|
||||
func (ro *RunningOutput) Write() error {
|
||||
if output, ok := ro.Output.(telegraf.AggregatingOutput); ok {
|
||||
ro.aggMutex.Lock()
|
||||
metrics := output.Push()
|
||||
ro.metrics.Add(metrics...)
|
||||
ro.buffer.Add(metrics...)
|
||||
output.Reset()
|
||||
ro.aggMutex.Unlock()
|
||||
}
|
||||
// add and write can be called concurrently
|
||||
ro.batchMutex.Lock()
|
||||
ro.addBatchToBuffer()
|
||||
ro.batchMutex.Unlock()
|
||||
|
||||
nFails, nMetrics := ro.failMetrics.Len(), ro.metrics.Len()
|
||||
ro.BufferSize.Set(int64(nFails + nMetrics))
|
||||
log.Printf("D! Output [%s] buffer fullness: %d / %d metrics. ",
|
||||
ro.Name, nFails+nMetrics, ro.MetricBufferLimit)
|
||||
var err error
|
||||
if !ro.failMetrics.IsEmpty() {
|
||||
// how many batches of failed writes we need to write.
|
||||
nBatches := nFails/ro.MetricBatchSize + 1
|
||||
batchSize := ro.MetricBatchSize
|
||||
nBuffer := ro.buffer.Len()
|
||||
|
||||
for i := 0; i < nBatches; i++ {
|
||||
// If it's the last batch, only grab the metrics that have not had
|
||||
// a write attempt already (this is primarily to preserve order).
|
||||
if i == nBatches-1 {
|
||||
batchSize = nFails % ro.MetricBatchSize
|
||||
}
|
||||
batch := ro.failMetrics.Batch(batchSize)
|
||||
// If we've already failed previous writes, don't bother trying to
|
||||
// write to this output again. We are not exiting the loop just so
|
||||
// that we can rotate the metrics to preserve order.
|
||||
if err == nil {
|
||||
err = ro.write(batch)
|
||||
}
|
||||
if err != nil {
|
||||
ro.failMetrics.Add(batch...)
|
||||
}
|
||||
// Only process the metrics in the buffer now. Metrics added while we are
|
||||
// writing will be sent on the next call.
|
||||
nBatches := nBuffer/ro.MetricBatchSize + 1
|
||||
for i := 0; i < nBatches; i++ {
|
||||
batch := ro.buffer.Batch(ro.MetricBatchSize)
|
||||
if len(batch) == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
batch := ro.metrics.Batch(ro.MetricBatchSize)
|
||||
// see comment above about not trying to write to an already failed output.
|
||||
// if ro.failMetrics is empty then err will always be nil at this point.
|
||||
if err == nil {
|
||||
err = ro.write(batch)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
ro.failMetrics.Add(batch...)
|
||||
return err
|
||||
err := ro.write(batch)
|
||||
if err != nil {
|
||||
ro.buffer.Reject(batch)
|
||||
return err
|
||||
}
|
||||
ro.buffer.Accept(batch)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
|
||||
nMetrics := len(metrics)
|
||||
if nMetrics == 0 {
|
||||
// WriteBatch writes only the batch metrics to the output.
|
||||
func (ro *RunningOutput) WriteBatch() error {
|
||||
batch := ro.buffer.Batch(ro.MetricBatchSize)
|
||||
if len(batch) == 0 {
|
||||
return nil
|
||||
}
|
||||
ro.writeMutex.Lock()
|
||||
defer ro.writeMutex.Unlock()
|
||||
|
||||
err := ro.write(batch)
|
||||
if err != nil {
|
||||
ro.buffer.Reject(batch)
|
||||
return err
|
||||
}
|
||||
ro.buffer.Accept(batch)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
|
||||
start := time.Now()
|
||||
err := ro.Output.Write(metrics)
|
||||
elapsed := time.Since(start)
|
||||
ro.WriteTime.Incr(elapsed.Nanoseconds())
|
||||
|
||||
if err == nil {
|
||||
log.Printf("D! Output [%s] wrote batch of %d metrics in %s\n",
|
||||
ro.Name, nMetrics, elapsed)
|
||||
ro.MetricsWritten.Incr(int64(nMetrics))
|
||||
ro.WriteTime.Incr(elapsed.Nanoseconds())
|
||||
log.Printf("D! [outputs.%s] wrote batch of %d metrics in %s\n",
|
||||
ro.Name, len(metrics), elapsed)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (ro *RunningOutput) LogBufferStatus() {
|
||||
nBuffer := ro.buffer.Len()
|
||||
log.Printf("D! [outputs.%s] buffer fullness: %d / %d metrics. ",
|
||||
ro.Name, nBuffer, ro.MetricBufferLimit)
|
||||
}
|
||||
|
|
|
@ -231,56 +231,6 @@ func TestRunningOutputDefault(t *testing.T) {
|
|||
assert.Len(t, m.Metrics(), 10)
|
||||
}
|
||||
|
||||
// Test that running output doesn't flush until it's full when
|
||||
// FlushBufferWhenFull is set.
|
||||
func TestRunningOutputFlushWhenFull(t *testing.T) {
|
||||
conf := &OutputConfig{
|
||||
Filter: Filter{},
|
||||
}
|
||||
|
||||
m := &mockOutput{}
|
||||
ro := NewRunningOutput("test", m, conf, 6, 10)
|
||||
|
||||
// Fill buffer to 1 under limit
|
||||
for _, metric := range first5 {
|
||||
ro.AddMetric(metric)
|
||||
}
|
||||
// no flush yet
|
||||
assert.Len(t, m.Metrics(), 0)
|
||||
|
||||
// add one more metric
|
||||
ro.AddMetric(next5[0])
|
||||
// now it flushed
|
||||
assert.Len(t, m.Metrics(), 6)
|
||||
|
||||
// add one more metric and write it manually
|
||||
ro.AddMetric(next5[1])
|
||||
err := ro.Write()
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, m.Metrics(), 7)
|
||||
}
|
||||
|
||||
// Test that running output doesn't flush until it's full when
|
||||
// FlushBufferWhenFull is set, twice.
|
||||
func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
|
||||
conf := &OutputConfig{
|
||||
Filter: Filter{},
|
||||
}
|
||||
|
||||
m := &mockOutput{}
|
||||
ro := NewRunningOutput("test", m, conf, 4, 12)
|
||||
|
||||
// Fill buffer past limit twive
|
||||
for _, metric := range first5 {
|
||||
ro.AddMetric(metric)
|
||||
}
|
||||
for _, metric := range next5 {
|
||||
ro.AddMetric(metric)
|
||||
}
|
||||
// flushed twice
|
||||
assert.Len(t, m.Metrics(), 8)
|
||||
}
|
||||
|
||||
func TestRunningOutputWriteFail(t *testing.T) {
|
||||
conf := &OutputConfig{
|
||||
Filter: Filter{},
|
||||
|
|
|
@ -27,6 +27,19 @@ type ProcessorConfig struct {
|
|||
Filter Filter
|
||||
}
|
||||
|
||||
func (rp *RunningProcessor) metricFiltered(metric telegraf.Metric) {
|
||||
metric.Drop()
|
||||
}
|
||||
|
||||
func containsMetric(item telegraf.Metric, metrics []telegraf.Metric) bool {
|
||||
for _, m := range metrics {
|
||||
if item == m {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
||||
rp.Lock()
|
||||
defer rp.Unlock()
|
||||
|
@ -43,6 +56,7 @@ func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
|||
|
||||
rp.Config.Filter.Modify(metric)
|
||||
if len(metric.FieldList()) == 0 {
|
||||
rp.metricFiltered(metric)
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
@ -41,20 +41,6 @@ func TagProcessor(key, value string) *MockProcessor {
|
|||
}
|
||||
}
|
||||
|
||||
func Metric(
|
||||
name string,
|
||||
tags map[string]string,
|
||||
fields map[string]interface{},
|
||||
tm time.Time,
|
||||
tp ...telegraf.ValueType,
|
||||
) telegraf.Metric {
|
||||
m, err := metric.New(name, tags, fields, tm, tp...)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func TestRunningProcessor_Apply(t *testing.T) {
|
||||
type args struct {
|
||||
Processor telegraf.Processor
|
||||
|
@ -76,7 +62,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
|||
},
|
||||
},
|
||||
input: []telegraf.Metric{
|
||||
Metric(
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
|
@ -86,7 +72,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
|||
),
|
||||
},
|
||||
expected: []telegraf.Metric{
|
||||
Metric(
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{
|
||||
"apply": "true",
|
||||
|
@ -109,7 +95,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
|||
},
|
||||
},
|
||||
input: []telegraf.Metric{
|
||||
Metric(
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
|
@ -119,7 +105,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
|||
),
|
||||
},
|
||||
expected: []telegraf.Metric{
|
||||
Metric(
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{
|
||||
"apply": "true",
|
||||
|
@ -142,7 +128,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
|||
},
|
||||
},
|
||||
input: []telegraf.Metric{
|
||||
Metric(
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
|
@ -152,7 +138,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
|||
),
|
||||
},
|
||||
expected: []telegraf.Metric{
|
||||
Metric(
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
|
|
11
metric.go
11
metric.go
|
@ -62,6 +62,17 @@ type Metric interface {
|
|||
// Copy returns a deep copy of the Metric.
|
||||
Copy() Metric
|
||||
|
||||
// Accept marks the metric as processed successfully and written to an
|
||||
// output.
|
||||
Accept()
|
||||
|
||||
// Reject marks the metric as processed unsuccessfully.
|
||||
Reject()
|
||||
|
||||
// Drop marks the metric as processed successfully without being written
|
||||
// to any output.
|
||||
Drop()
|
||||
|
||||
// Mark Metric as an aggregate
|
||||
SetAggregate(bool)
|
||||
IsAggregate() bool
|
||||
|
|
|
@ -248,6 +248,15 @@ func (m *metric) HashID() uint64 {
|
|||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (m *metric) Accept() {
|
||||
}
|
||||
|
||||
func (m *metric) Reject() {
|
||||
}
|
||||
|
||||
func (m *metric) Drop() {
|
||||
}
|
||||
|
||||
// Convert field to a supported type or nil if unconvertible
|
||||
func convertField(v interface{}) interface{} {
|
||||
switch v := v.(type) {
|
||||
|
|
|
@ -0,0 +1,171 @@
|
|||
package metric
|
||||
|
||||
import (
|
||||
"log"
|
||||
"runtime"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
)
|
||||
|
||||
// NotifyFunc is called when a tracking metric is done being processed with
|
||||
// the tracking information.
|
||||
type NotifyFunc = func(track telegraf.DeliveryInfo)
|
||||
|
||||
// WithTracking adds tracking to the metric and registers the notify function
|
||||
// to be called when processing is complete.
|
||||
func WithTracking(metric telegraf.Metric, fn NotifyFunc) (telegraf.Metric, telegraf.TrackingID) {
|
||||
return newTrackingMetric(metric, fn)
|
||||
}
|
||||
|
||||
// WithBatchTracking adds tracking to the metrics and registers the notify
|
||||
// function to be called when processing is complete.
|
||||
func WithGroupTracking(metric []telegraf.Metric, fn NotifyFunc) ([]telegraf.Metric, telegraf.TrackingID) {
|
||||
return newTrackingMetricGroup(metric, fn)
|
||||
}
|
||||
|
||||
func EnableDebugFinalizer() {
|
||||
finalizer = debugFinalizer
|
||||
}
|
||||
|
||||
var (
|
||||
lastID uint64
|
||||
finalizer func(*trackingData)
|
||||
)
|
||||
|
||||
func newTrackingID() telegraf.TrackingID {
|
||||
atomic.AddUint64(&lastID, 1)
|
||||
return telegraf.TrackingID(lastID)
|
||||
}
|
||||
|
||||
func debugFinalizer(d *trackingData) {
|
||||
rc := atomic.LoadInt32(&d.rc)
|
||||
if rc != 0 {
|
||||
log.Fatalf("E! [agent] metric collected with non-zero reference count rc: %d", rc)
|
||||
}
|
||||
}
|
||||
|
||||
type trackingData struct {
|
||||
id telegraf.TrackingID
|
||||
rc int32
|
||||
acceptCount int32
|
||||
rejectCount int32
|
||||
notify NotifyFunc
|
||||
}
|
||||
|
||||
func (d *trackingData) incr() {
|
||||
atomic.AddInt32(&d.rc, 1)
|
||||
}
|
||||
|
||||
func (d *trackingData) decr() int32 {
|
||||
return atomic.AddInt32(&d.rc, -1)
|
||||
}
|
||||
|
||||
func (d *trackingData) accept() {
|
||||
atomic.AddInt32(&d.acceptCount, 1)
|
||||
}
|
||||
|
||||
func (d *trackingData) reject() {
|
||||
atomic.AddInt32(&d.rejectCount, 1)
|
||||
}
|
||||
|
||||
type trackingMetric struct {
|
||||
telegraf.Metric
|
||||
d *trackingData
|
||||
}
|
||||
|
||||
func newTrackingMetric(metric telegraf.Metric, fn NotifyFunc) (telegraf.Metric, telegraf.TrackingID) {
|
||||
m := &trackingMetric{
|
||||
Metric: metric,
|
||||
d: &trackingData{
|
||||
id: newTrackingID(),
|
||||
rc: 1,
|
||||
acceptCount: 0,
|
||||
rejectCount: 0,
|
||||
notify: fn,
|
||||
},
|
||||
}
|
||||
|
||||
if finalizer != nil {
|
||||
runtime.SetFinalizer(m.d, finalizer)
|
||||
}
|
||||
return m, m.d.id
|
||||
}
|
||||
|
||||
func newTrackingMetricGroup(group []telegraf.Metric, fn NotifyFunc) ([]telegraf.Metric, telegraf.TrackingID) {
|
||||
d := &trackingData{
|
||||
id: newTrackingID(),
|
||||
rc: 0,
|
||||
acceptCount: 0,
|
||||
rejectCount: 0,
|
||||
notify: fn,
|
||||
}
|
||||
|
||||
for i, m := range group {
|
||||
d.incr()
|
||||
dm := &trackingMetric{
|
||||
Metric: m,
|
||||
d: d,
|
||||
}
|
||||
group[i] = dm
|
||||
|
||||
}
|
||||
if finalizer != nil {
|
||||
runtime.SetFinalizer(d, finalizer)
|
||||
}
|
||||
|
||||
return group, d.id
|
||||
}
|
||||
|
||||
func (m *trackingMetric) Copy() telegraf.Metric {
|
||||
m.d.incr()
|
||||
return &trackingMetric{
|
||||
Metric: m.Metric.Copy(),
|
||||
d: m.d,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *trackingMetric) Accept() {
|
||||
m.d.accept()
|
||||
m.decr()
|
||||
}
|
||||
|
||||
func (m *trackingMetric) Reject() {
|
||||
m.d.reject()
|
||||
m.decr()
|
||||
}
|
||||
|
||||
func (m *trackingMetric) Drop() {
|
||||
m.decr()
|
||||
}
|
||||
|
||||
func (m *trackingMetric) decr() {
|
||||
v := m.d.decr()
|
||||
if v < 0 {
|
||||
panic("negative refcount")
|
||||
}
|
||||
|
||||
if v == 0 {
|
||||
m.d.notify(
|
||||
&deliveryInfo{
|
||||
id: m.d.id,
|
||||
accepted: int(m.d.acceptCount),
|
||||
rejected: int(m.d.rejectCount),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
type deliveryInfo struct {
|
||||
id telegraf.TrackingID
|
||||
accepted int
|
||||
rejected int
|
||||
}
|
||||
|
||||
func (r *deliveryInfo) ID() telegraf.TrackingID {
|
||||
return r.id
|
||||
}
|
||||
|
||||
func (r *deliveryInfo) Delivered() bool {
|
||||
return r.rejected == 0
|
||||
}
|
|
@ -0,0 +1,260 @@
|
|||
package metric
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func mustMetric(
|
||||
name string,
|
||||
tags map[string]string,
|
||||
fields map[string]interface{},
|
||||
tm time.Time,
|
||||
tp ...telegraf.ValueType,
|
||||
) telegraf.Metric {
|
||||
m, err := New(name, tags, fields, tm, tp...)
|
||||
if err != nil {
|
||||
panic("mustMetric")
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
type deliveries struct {
|
||||
Info map[telegraf.TrackingID]telegraf.DeliveryInfo
|
||||
}
|
||||
|
||||
func (d *deliveries) onDelivery(info telegraf.DeliveryInfo) {
|
||||
d.Info[info.ID()] = info
|
||||
}
|
||||
|
||||
func TestTracking(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
metric telegraf.Metric
|
||||
actions func(metric telegraf.Metric)
|
||||
delivered bool
|
||||
}{
|
||||
{
|
||||
name: "accept",
|
||||
metric: mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
actions: func(m telegraf.Metric) {
|
||||
m.Accept()
|
||||
},
|
||||
delivered: true,
|
||||
},
|
||||
{
|
||||
name: "reject",
|
||||
metric: mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
actions: func(m telegraf.Metric) {
|
||||
m.Reject()
|
||||
},
|
||||
delivered: false,
|
||||
},
|
||||
{
|
||||
name: "accept copy",
|
||||
metric: mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
actions: func(m telegraf.Metric) {
|
||||
m2 := m.Copy()
|
||||
m.Accept()
|
||||
m2.Accept()
|
||||
},
|
||||
delivered: true,
|
||||
},
|
||||
{
|
||||
name: "copy with accept and done",
|
||||
metric: mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
actions: func(m telegraf.Metric) {
|
||||
m2 := m.Copy()
|
||||
m.Accept()
|
||||
m2.Drop()
|
||||
},
|
||||
delivered: true,
|
||||
},
|
||||
{
|
||||
name: "copy with mixed delivery",
|
||||
metric: mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
actions: func(m telegraf.Metric) {
|
||||
m2 := m.Copy()
|
||||
m.Accept()
|
||||
m2.Reject()
|
||||
},
|
||||
delivered: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
d := &deliveries{
|
||||
Info: make(map[telegraf.TrackingID]telegraf.DeliveryInfo),
|
||||
}
|
||||
metric, id := WithTracking(tt.metric, d.onDelivery)
|
||||
tt.actions(metric)
|
||||
|
||||
info := d.Info[id]
|
||||
require.Equal(t, tt.delivered, info.Delivered())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGroupTracking(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
metrics []telegraf.Metric
|
||||
actions func(metrics []telegraf.Metric)
|
||||
delivered bool
|
||||
}{
|
||||
{
|
||||
name: "accept",
|
||||
metrics: []telegraf.Metric{
|
||||
mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
actions: func(metrics []telegraf.Metric) {
|
||||
metrics[0].Accept()
|
||||
metrics[1].Accept()
|
||||
},
|
||||
delivered: true,
|
||||
},
|
||||
{
|
||||
name: "reject",
|
||||
metrics: []telegraf.Metric{
|
||||
mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
actions: func(metrics []telegraf.Metric) {
|
||||
metrics[0].Reject()
|
||||
metrics[1].Reject()
|
||||
},
|
||||
delivered: false,
|
||||
},
|
||||
{
|
||||
name: "remove",
|
||||
metrics: []telegraf.Metric{
|
||||
mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
actions: func(metrics []telegraf.Metric) {
|
||||
metrics[0].Drop()
|
||||
metrics[1].Drop()
|
||||
},
|
||||
delivered: true,
|
||||
},
|
||||
{
|
||||
name: "mixed",
|
||||
metrics: []telegraf.Metric{
|
||||
mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
mustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 42,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
actions: func(metrics []telegraf.Metric) {
|
||||
metrics[0].Accept()
|
||||
metrics[1].Reject()
|
||||
},
|
||||
delivered: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
d := &deliveries{
|
||||
Info: make(map[telegraf.TrackingID]telegraf.DeliveryInfo),
|
||||
}
|
||||
metrics, id := WithGroupTracking(tt.metrics, d.onDelivery)
|
||||
tt.actions(metrics)
|
||||
|
||||
info := d.Info[id]
|
||||
require.Equal(t, tt.delivered, info.Delivered())
|
||||
})
|
||||
}
|
||||
}
|
29
output.go
29
output.go
|
@ -17,16 +17,7 @@ type Output interface {
|
|||
// if the Output only accepts a fixed set of aggregations over a time period.
|
||||
// These functions may be called concurrently to the Write function.
|
||||
type AggregatingOutput interface {
|
||||
// Connect to the Output
|
||||
Connect() error
|
||||
// Close any connections to the Output
|
||||
Close() error
|
||||
// Description returns a one-sentence description on the Output
|
||||
Description() string
|
||||
// SampleConfig returns the default configuration of the Output
|
||||
SampleConfig() string
|
||||
// Write takes in group of points to be written to the Output
|
||||
Write(metrics []Metric) error
|
||||
Output
|
||||
|
||||
// Add the metric to the aggregator
|
||||
Add(in Metric)
|
||||
|
@ -35,21 +26,3 @@ type AggregatingOutput interface {
|
|||
// Reset signals the the aggregator period is completed.
|
||||
Reset()
|
||||
}
|
||||
|
||||
type ServiceOutput interface {
|
||||
// Connect to the Output
|
||||
Connect() error
|
||||
// Close any connections to the Output
|
||||
Close() error
|
||||
// Description returns a one-sentence description on the Output
|
||||
Description() string
|
||||
// SampleConfig returns the default configuration of the Output
|
||||
SampleConfig() string
|
||||
// Write takes in group of points to be written to the Output
|
||||
Write(metrics []Metric) error
|
||||
|
||||
// Start the "service" that will provide an Output
|
||||
Start() error
|
||||
// Stop the "service" that will provide an Output
|
||||
Stop()
|
||||
}
|
||||
|
|
|
@ -133,7 +133,6 @@ func (m *BasicStats) Add(in telegraf.Metric) {
|
|||
}
|
||||
|
||||
func (m *BasicStats) Push(acc telegraf.Accumulator) {
|
||||
|
||||
config := getConfiguredStats(m)
|
||||
|
||||
for _, aggregate := range m.cache {
|
||||
|
|
|
@ -13,7 +13,6 @@ For an introduction to AMQP see:
|
|||
The following defaults are known to work with RabbitMQ:
|
||||
|
||||
```toml
|
||||
# AMQP consumer plugin
|
||||
[[inputs.amqp_consumer]]
|
||||
## Broker to consume from.
|
||||
## deprecated in 1.7; use the brokers option
|
||||
|
@ -56,6 +55,16 @@ The following defaults are known to work with RabbitMQ:
|
|||
## Maximum number of messages server should give to the worker.
|
||||
# prefetch_count = 50
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Auth method. PLAIN and EXTERNAL are supported
|
||||
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
|
||||
## described here: https://www.rabbitmq.com/plugins.html
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package amqp_consumer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
|
@ -9,25 +10,32 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/streadway/amqp"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal/tls"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
"github.com/streadway/amqp"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultMaxUndeliveredMessages = 1000
|
||||
)
|
||||
|
||||
type empty struct{}
|
||||
type semaphore chan empty
|
||||
|
||||
// AMQPConsumer is the top level struct for this plugin
|
||||
type AMQPConsumer struct {
|
||||
URL string `toml:"url"` // deprecated in 1.7; use brokers
|
||||
Brokers []string `toml:"brokers"`
|
||||
Username string `toml:"username"`
|
||||
Password string `toml:"password"`
|
||||
Exchange string `toml:"exchange"`
|
||||
ExchangeType string `toml:"exchange_type"`
|
||||
ExchangeDurability string `toml:"exchange_durability"`
|
||||
ExchangePassive bool `toml:"exchange_passive"`
|
||||
ExchangeArguments map[string]string `toml:"exchange_arguments"`
|
||||
URL string `toml:"url"` // deprecated in 1.7; use brokers
|
||||
Brokers []string `toml:"brokers"`
|
||||
Username string `toml:"username"`
|
||||
Password string `toml:"password"`
|
||||
Exchange string `toml:"exchange"`
|
||||
ExchangeType string `toml:"exchange_type"`
|
||||
ExchangeDurability string `toml:"exchange_durability"`
|
||||
ExchangePassive bool `toml:"exchange_passive"`
|
||||
ExchangeArguments map[string]string `toml:"exchange_arguments"`
|
||||
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||
|
||||
// Queue Name
|
||||
Queue string `toml:"queue"`
|
||||
|
@ -44,9 +52,12 @@ type AMQPConsumer struct {
|
|||
AuthMethod string
|
||||
tls.ClientConfig
|
||||
|
||||
deliveries map[telegraf.TrackingID]amqp.Delivery
|
||||
|
||||
parser parsers.Parser
|
||||
conn *amqp.Connection
|
||||
wg *sync.WaitGroup
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
type externalAuth struct{}
|
||||
|
@ -114,6 +125,16 @@ func (a *AMQPConsumer) SampleConfig() string {
|
|||
## Maximum number of messages server should give to the worker.
|
||||
# prefetch_count = 50
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Auth method. PLAIN and EXTERNAL are supported
|
||||
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
|
||||
## described here: https://www.rabbitmq.com/plugins.html
|
||||
|
@ -185,9 +206,15 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
|
|||
return err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
a.cancel = cancel
|
||||
|
||||
a.wg = &sync.WaitGroup{}
|
||||
a.wg.Add(1)
|
||||
go a.process(msgs, acc)
|
||||
go func() {
|
||||
defer a.wg.Done()
|
||||
a.process(ctx, msgs, acc)
|
||||
}()
|
||||
|
||||
go func() {
|
||||
for {
|
||||
|
@ -196,7 +223,7 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
|
|||
break
|
||||
}
|
||||
|
||||
log.Printf("I! AMQP consumer connection closed: %s; trying to reconnect", err)
|
||||
log.Printf("I! [inputs.amqp_consumer] connection closed: %s; trying to reconnect", err)
|
||||
for {
|
||||
msgs, err := a.connect(amqpConf)
|
||||
if err != nil {
|
||||
|
@ -206,7 +233,10 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
|
|||
}
|
||||
|
||||
a.wg.Add(1)
|
||||
go a.process(msgs, acc)
|
||||
go func() {
|
||||
defer a.wg.Done()
|
||||
a.process(ctx, msgs, acc)
|
||||
}()
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -224,14 +254,14 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
|
|||
p := rand.Perm(len(brokers))
|
||||
for _, n := range p {
|
||||
broker := brokers[n]
|
||||
log.Printf("D! [amqp_consumer] connecting to %q", broker)
|
||||
log.Printf("D! [inputs.amqp_consumer] connecting to %q", broker)
|
||||
conn, err := amqp.DialConfig(broker, *amqpConf)
|
||||
if err == nil {
|
||||
a.conn = conn
|
||||
log.Printf("D! [amqp_consumer] connected to %q", broker)
|
||||
log.Printf("D! [inputs.amqp_consumer] connected to %q", broker)
|
||||
break
|
||||
}
|
||||
log.Printf("D! [amqp_consumer] error connecting to %q", broker)
|
||||
log.Printf("D! [inputs.amqp_consumer] error connecting to %q", broker)
|
||||
}
|
||||
|
||||
if a.conn == nil {
|
||||
|
@ -320,7 +350,6 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
|
|||
return nil, fmt.Errorf("Failed establishing connection to queue: %s", err)
|
||||
}
|
||||
|
||||
log.Println("I! Started AMQP consumer")
|
||||
return msgs, err
|
||||
}
|
||||
|
||||
|
@ -361,42 +390,101 @@ func declareExchange(
|
|||
}
|
||||
|
||||
// Read messages from queue and add them to the Accumulator
|
||||
func (a *AMQPConsumer) process(msgs <-chan amqp.Delivery, acc telegraf.Accumulator) {
|
||||
defer a.wg.Done()
|
||||
for d := range msgs {
|
||||
metrics, err := a.parser.Parse(d.Body)
|
||||
if err != nil {
|
||||
log.Printf("E! %v: error parsing metric - %v", err, string(d.Body))
|
||||
} else {
|
||||
for _, m := range metrics {
|
||||
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
|
||||
func (a *AMQPConsumer) process(ctx context.Context, msgs <-chan amqp.Delivery, ac telegraf.Accumulator) {
|
||||
a.deliveries = make(map[telegraf.TrackingID]amqp.Delivery)
|
||||
|
||||
acc := ac.WithTracking(a.MaxUndeliveredMessages)
|
||||
sem := make(semaphore, a.MaxUndeliveredMessages)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case track := <-acc.Delivered():
|
||||
if a.onDelivery(track) {
|
||||
<-sem
|
||||
}
|
||||
case sem <- empty{}:
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case track := <-acc.Delivered():
|
||||
if a.onDelivery(track) {
|
||||
<-sem
|
||||
<-sem
|
||||
}
|
||||
case d, ok := <-msgs:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
err := a.onMessage(acc, d)
|
||||
if err != nil {
|
||||
acc.AddError(err)
|
||||
<-sem
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
d.Ack(false)
|
||||
}
|
||||
log.Printf("I! AMQP consumer queue closed")
|
||||
}
|
||||
|
||||
func (a *AMQPConsumer) onMessage(acc telegraf.TrackingAccumulator, d amqp.Delivery) error {
|
||||
metrics, err := a.parser.Parse(d.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
id := acc.AddTrackingMetricGroup(metrics)
|
||||
a.deliveries[id] = d
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *AMQPConsumer) onDelivery(track telegraf.DeliveryInfo) bool {
|
||||
delivery, ok := a.deliveries[track.ID()]
|
||||
if !ok {
|
||||
// Added by a previous connection
|
||||
return false
|
||||
}
|
||||
|
||||
if track.Delivered() {
|
||||
err := delivery.Ack(false)
|
||||
if err != nil {
|
||||
log.Printf("E! [inputs.amqp_consumer] Unable to ack written delivery: %d: %v",
|
||||
delivery.DeliveryTag, err)
|
||||
a.conn.Close()
|
||||
}
|
||||
} else {
|
||||
err := delivery.Reject(false)
|
||||
if err != nil {
|
||||
log.Printf("E! [inputs.amqp_consumer] Unable to reject failed delivery: %d: %v",
|
||||
delivery.DeliveryTag, err)
|
||||
a.conn.Close()
|
||||
}
|
||||
}
|
||||
|
||||
delete(a.deliveries, track.ID())
|
||||
return true
|
||||
}
|
||||
|
||||
func (a *AMQPConsumer) Stop() {
|
||||
a.cancel()
|
||||
a.wg.Wait()
|
||||
err := a.conn.Close()
|
||||
if err != nil && err != amqp.ErrClosed {
|
||||
log.Printf("E! Error closing AMQP connection: %s", err)
|
||||
log.Printf("E! [inputs.amqp_consumer] Error closing AMQP connection: %s", err)
|
||||
return
|
||||
}
|
||||
a.wg.Wait()
|
||||
log.Println("I! Stopped AMQP service")
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("amqp_consumer", func() telegraf.Input {
|
||||
return &AMQPConsumer{
|
||||
URL: DefaultBroker,
|
||||
AuthMethod: DefaultAuthMethod,
|
||||
ExchangeType: DefaultExchangeType,
|
||||
ExchangeDurability: DefaultExchangeDurability,
|
||||
QueueDurability: DefaultQueueDurability,
|
||||
PrefetchCount: DefaultPrefetchCount,
|
||||
URL: DefaultBroker,
|
||||
AuthMethod: DefaultAuthMethod,
|
||||
ExchangeType: DefaultExchangeType,
|
||||
ExchangeDurability: DefaultExchangeDurability,
|
||||
QueueDurability: DefaultQueueDurability,
|
||||
PrefetchCount: DefaultPrefetchCount,
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -18,52 +18,54 @@ plugin.
|
|||
|
||||
memstats are taken from the Go runtime: https://golang.org/pkg/runtime/#MemStats
|
||||
|
||||
- internal\_memstats
|
||||
- alloc\_bytes
|
||||
- internal_memstats
|
||||
- alloc_bytes
|
||||
- frees
|
||||
- heap\_alloc\_bytes
|
||||
- heap\_idle\_bytes
|
||||
- heap\_in\_use\_bytes
|
||||
- heap\_objects\_bytes
|
||||
- heap\_released\_bytes
|
||||
- heap\_sys\_bytes
|
||||
- heap_alloc_bytes
|
||||
- heap_idle_bytes
|
||||
- heap_in_use_bytes
|
||||
- heap_objects_bytes
|
||||
- heap_released_bytes
|
||||
- heap_sys_bytes
|
||||
- mallocs
|
||||
- num\_gc
|
||||
- pointer\_lookups
|
||||
- sys\_bytes
|
||||
- total\_alloc\_bytes
|
||||
- num_gc
|
||||
- pointer_lookups
|
||||
- sys_bytes
|
||||
- total_alloc_bytes
|
||||
|
||||
agent stats collect aggregate stats on all telegraf plugins.
|
||||
|
||||
- internal\_agent
|
||||
- gather\_errors
|
||||
- metrics\_dropped
|
||||
- metrics\_gathered
|
||||
- metrics\_written
|
||||
- internal_agent
|
||||
- gather_errors
|
||||
- metrics_dropped
|
||||
- metrics_gathered
|
||||
- metrics_written
|
||||
|
||||
internal\_gather stats collect aggregate stats on all input plugins
|
||||
internal_gather stats collect aggregate stats on all input plugins
|
||||
that are of the same input type. They are tagged with `input=<plugin_name>`.
|
||||
|
||||
- internal\_gather
|
||||
- gather\_time\_ns
|
||||
- metrics\_gathered
|
||||
- internal_gather
|
||||
- gather_time_ns
|
||||
- metrics_gathered
|
||||
|
||||
internal\_write stats collect aggregate stats on all output plugins
|
||||
internal_write stats collect aggregate stats on all output plugins
|
||||
that are of the same input type. They are tagged with `output=<plugin_name>`.
|
||||
|
||||
|
||||
- internal\_write
|
||||
- buffer\_limit
|
||||
- buffer\_size
|
||||
- metrics\_written
|
||||
- metrics\_filtered
|
||||
- write\_time\_ns
|
||||
- internal_write
|
||||
- buffer_limit
|
||||
- buffer_size
|
||||
- metrics_added
|
||||
- metrics_written
|
||||
- metrics_dropped
|
||||
- metrics_filtered
|
||||
- write_time_ns
|
||||
|
||||
internal\_\<plugin\_name\> are metrics which are defined on a per-plugin basis, and
|
||||
internal_<plugin_name> are metrics which are defined on a per-plugin basis, and
|
||||
usually contain tags which differentiate each instance of a particular type of
|
||||
plugin.
|
||||
|
||||
- internal\_\<plugin\_name\>
|
||||
- internal_<plugin_name>
|
||||
- individual plugin-specific fields, such as requests counts.
|
||||
|
||||
### Tags:
|
||||
|
@ -76,7 +78,7 @@ to each particular plugin.
|
|||
```
|
||||
internal_memstats,host=tyrion alloc_bytes=4457408i,sys_bytes=10590456i,pointer_lookups=7i,mallocs=17642i,frees=7473i,heap_sys_bytes=6848512i,heap_idle_bytes=1368064i,heap_in_use_bytes=5480448i,heap_released_bytes=0i,total_alloc_bytes=6875560i,heap_alloc_bytes=4457408i,heap_objects_bytes=10169i,num_gc=2i 1480682800000000000
|
||||
internal_agent,host=tyrion metrics_written=18i,metrics_dropped=0i,metrics_gathered=19i,gather_errors=0i 1480682800000000000
|
||||
internal_write,output=file,host=tyrion buffer_limit=10000i,write_time_ns=636609i,metrics_written=18i,buffer_size=0i 1480682800000000000
|
||||
internal_write,output=file,host=tyrion buffer_limit=10000i,write_time_ns=636609i,metrics_added=18i,metrics_written=18i,buffer_size=0i 1480682800000000000
|
||||
internal_gather,input=internal,host=tyrion metrics_gathered=19i,gather_time_ns=442114i 1480682800000000000
|
||||
internal_gather,input=http_listener,host=tyrion metrics_gathered=0i,gather_time_ns=167285i 1480682800000000000
|
||||
internal_http_listener,address=:8186,host=tyrion queries_received=0i,writes_received=0i,requests_received=0i,buffers_created=0i,requests_served=0i,pings_received=0i,bytes_received=0i,not_founds_served=0i,pings_served=0i,queries_served=0i,writes_served=0i 1480682800000000000
|
||||
|
|
|
@ -1,18 +1,14 @@
|
|||
# Kafka Consumer Input Plugin
|
||||
|
||||
The [Kafka](http://kafka.apache.org/) consumer plugin polls a specified Kafka
|
||||
topic and adds messages to InfluxDB. The plugin assumes messages follow the
|
||||
line protocol. [Consumer Group](http://godoc.org/github.com/wvanbergen/kafka/consumergroup)
|
||||
is used to talk to the Kafka cluster so multiple instances of telegraf can read
|
||||
from the same topic in parallel.
|
||||
The [Kafka][kafka] consumer plugin reads from Kafka
|
||||
and creates metrics using one of the supported [input data formats][].
|
||||
|
||||
For old kafka version (< 0.8), please use the kafka_consumer_legacy input plugin
|
||||
For old kafka version (< 0.8), please use the [kafka_consumer_legacy][] input plugin
|
||||
and use the old zookeeper connection method.
|
||||
|
||||
## Configuration
|
||||
### Configuration
|
||||
|
||||
```toml
|
||||
# Read metrics from Kafka topic(s)
|
||||
[[inputs.kafka_consumer]]
|
||||
## kafka servers
|
||||
brokers = ["localhost:9092"]
|
||||
|
@ -44,18 +40,27 @@ and use the old zookeeper connection method.
|
|||
## Offset (must be either "oldest" or "newest")
|
||||
offset = "oldest"
|
||||
|
||||
## Maximum length of a message to consume, in bytes (default 0/unlimited);
|
||||
## larger messages are dropped
|
||||
max_message_len = 1000000
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||
data_format = "influx"
|
||||
|
||||
## Maximum length of a message to consume, in bytes (default 0/unlimited);
|
||||
## larger messages are dropped
|
||||
max_message_len = 1000000
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
Running integration tests requires running Zookeeper & Kafka. See Makefile
|
||||
for kafka container command.
|
||||
[kafka]: https://kafka.apache.org
|
||||
[kafka_consumer_legacy]: /plugins/inputs/kafka_consumer_legacy/README.md
|
||||
[input data formats]: /docs/DATA_FORMATS_INPUT.md
|
||||
|
|
|
@ -1,55 +1,54 @@
|
|||
package kafka_consumer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/Shopify/sarama"
|
||||
cluster "github.com/bsm/sarama-cluster"
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal/tls"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
|
||||
"github.com/Shopify/sarama"
|
||||
cluster "github.com/bsm/sarama-cluster"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultMaxUndeliveredMessages = 1000
|
||||
)
|
||||
|
||||
type empty struct{}
|
||||
type semaphore chan empty
|
||||
|
||||
type Consumer interface {
|
||||
Errors() <-chan error
|
||||
Messages() <-chan *sarama.ConsumerMessage
|
||||
MarkOffset(msg *sarama.ConsumerMessage, metadata string)
|
||||
Close() error
|
||||
}
|
||||
|
||||
type Kafka struct {
|
||||
ConsumerGroup string
|
||||
ClientID string `toml:"client_id"`
|
||||
Topics []string
|
||||
Brokers []string
|
||||
MaxMessageLen int
|
||||
Version string `toml:"version"`
|
||||
|
||||
Cluster *cluster.Consumer
|
||||
|
||||
ConsumerGroup string `toml:"consumer_group"`
|
||||
ClientID string `toml:"client_id"`
|
||||
Topics []string `toml:"topics"`
|
||||
Brokers []string `toml:"brokers"`
|
||||
MaxMessageLen int `toml:"max_message_len"`
|
||||
Version string `toml:"version"`
|
||||
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||
Offset string `toml:"offset"`
|
||||
SASLUsername string `toml:"sasl_username"`
|
||||
SASLPassword string `toml:"sasl_password"`
|
||||
tls.ClientConfig
|
||||
|
||||
// SASL Username
|
||||
SASLUsername string `toml:"sasl_username"`
|
||||
// SASL Password
|
||||
SASLPassword string `toml:"sasl_password"`
|
||||
cluster Consumer
|
||||
parser parsers.Parser
|
||||
wg *sync.WaitGroup
|
||||
cancel context.CancelFunc
|
||||
|
||||
// Legacy metric buffer support
|
||||
MetricBuffer int
|
||||
// TODO remove PointBuffer, legacy support
|
||||
PointBuffer int
|
||||
|
||||
Offset string
|
||||
parser parsers.Parser
|
||||
|
||||
sync.Mutex
|
||||
|
||||
// channel for all incoming kafka messages
|
||||
in <-chan *sarama.ConsumerMessage
|
||||
// channel for all kafka consumer errors
|
||||
errs <-chan error
|
||||
done chan struct{}
|
||||
|
||||
// keep the accumulator internally:
|
||||
acc telegraf.Accumulator
|
||||
// Unconfirmed messages
|
||||
messages map[telegraf.TrackingID]*sarama.ConsumerMessage
|
||||
|
||||
// doNotCommitMsgs tells the parser not to call CommitUpTo on the consumer
|
||||
// this is mostly for test purposes, but there may be a use-case for it later.
|
||||
|
@ -86,16 +85,25 @@ var sampleConfig = `
|
|||
consumer_group = "telegraf_metrics_consumers"
|
||||
## Offset (must be either "oldest" or "newest")
|
||||
offset = "oldest"
|
||||
## Maximum length of a message to consume, in bytes (default 0/unlimited);
|
||||
## larger messages are dropped
|
||||
max_message_len = 1000000
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||
data_format = "influx"
|
||||
|
||||
## Maximum length of a message to consume, in bytes (default 0/unlimited);
|
||||
## larger messages are dropped
|
||||
max_message_len = 1000000
|
||||
`
|
||||
|
||||
func (k *Kafka) SampleConfig() string {
|
||||
|
@ -111,12 +119,8 @@ func (k *Kafka) SetParser(parser parsers.Parser) {
|
|||
}
|
||||
|
||||
func (k *Kafka) Start(acc telegraf.Accumulator) error {
|
||||
k.Lock()
|
||||
defer k.Unlock()
|
||||
var clusterErr error
|
||||
|
||||
k.acc = acc
|
||||
|
||||
config := cluster.NewConfig()
|
||||
|
||||
if k.Version != "" {
|
||||
|
@ -159,13 +163,13 @@ func (k *Kafka) Start(acc telegraf.Accumulator) error {
|
|||
case "newest":
|
||||
config.Consumer.Offsets.Initial = sarama.OffsetNewest
|
||||
default:
|
||||
log.Printf("I! WARNING: Kafka consumer invalid offset '%s', using 'oldest'\n",
|
||||
log.Printf("I! WARNING: Kafka consumer invalid offset '%s', using 'oldest'",
|
||||
k.Offset)
|
||||
config.Consumer.Offsets.Initial = sarama.OffsetOldest
|
||||
}
|
||||
|
||||
if k.Cluster == nil {
|
||||
k.Cluster, clusterErr = cluster.NewConsumer(
|
||||
if k.cluster == nil {
|
||||
k.cluster, clusterErr = cluster.NewConsumer(
|
||||
k.Brokers,
|
||||
k.ConsumerGroup,
|
||||
k.Topics,
|
||||
|
@ -173,67 +177,110 @@ func (k *Kafka) Start(acc telegraf.Accumulator) error {
|
|||
)
|
||||
|
||||
if clusterErr != nil {
|
||||
log.Printf("E! Error when creating Kafka Consumer, brokers: %v, topics: %v\n",
|
||||
log.Printf("E! Error when creating Kafka Consumer, brokers: %v, topics: %v",
|
||||
k.Brokers, k.Topics)
|
||||
return clusterErr
|
||||
}
|
||||
|
||||
// Setup message and error channels
|
||||
k.in = k.Cluster.Messages()
|
||||
k.errs = k.Cluster.Errors()
|
||||
}
|
||||
|
||||
k.done = make(chan struct{})
|
||||
// Start the kafka message reader
|
||||
go k.receiver()
|
||||
log.Printf("I! Started the kafka consumer service, brokers: %v, topics: %v\n",
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
k.cancel = cancel
|
||||
|
||||
// Start consumer goroutine
|
||||
k.wg = &sync.WaitGroup{}
|
||||
k.wg.Add(1)
|
||||
go func() {
|
||||
defer k.wg.Done()
|
||||
k.receiver(ctx, acc)
|
||||
}()
|
||||
|
||||
log.Printf("I! Started the kafka consumer service, brokers: %v, topics: %v",
|
||||
k.Brokers, k.Topics)
|
||||
return nil
|
||||
}
|
||||
|
||||
// receiver() reads all incoming messages from the consumer, and parses them into
|
||||
// influxdb metric points.
|
||||
func (k *Kafka) receiver() {
|
||||
func (k *Kafka) receiver(ctx context.Context, ac telegraf.Accumulator) {
|
||||
k.messages = make(map[telegraf.TrackingID]*sarama.ConsumerMessage)
|
||||
|
||||
acc := ac.WithTracking(k.MaxUndeliveredMessages)
|
||||
sem := make(semaphore, k.MaxUndeliveredMessages)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-k.done:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case err := <-k.errs:
|
||||
if err != nil {
|
||||
k.acc.AddError(fmt.Errorf("Consumer Error: %s\n", err))
|
||||
}
|
||||
case msg := <-k.in:
|
||||
if k.MaxMessageLen != 0 && len(msg.Value) > k.MaxMessageLen {
|
||||
k.acc.AddError(fmt.Errorf("Message longer than max_message_len (%d > %d)",
|
||||
len(msg.Value), k.MaxMessageLen))
|
||||
} else {
|
||||
metrics, err := k.parser.Parse(msg.Value)
|
||||
case track := <-acc.Delivered():
|
||||
<-sem
|
||||
k.onDelivery(track)
|
||||
case err := <-k.cluster.Errors():
|
||||
acc.AddError(err)
|
||||
case sem <- empty{}:
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case track := <-acc.Delivered():
|
||||
// Once for the delivered message, once to leave the case
|
||||
<-sem
|
||||
<-sem
|
||||
k.onDelivery(track)
|
||||
case err := <-k.cluster.Errors():
|
||||
<-sem
|
||||
acc.AddError(err)
|
||||
case msg := <-k.cluster.Messages():
|
||||
err := k.onMessage(acc, msg)
|
||||
if err != nil {
|
||||
k.acc.AddError(fmt.Errorf("Message Parse Error\nmessage: %s\nerror: %s",
|
||||
string(msg.Value), err.Error()))
|
||||
acc.AddError(err)
|
||||
<-sem
|
||||
}
|
||||
for _, metric := range metrics {
|
||||
k.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
|
||||
}
|
||||
}
|
||||
|
||||
if !k.doNotCommitMsgs {
|
||||
// TODO(cam) this locking can be removed if this PR gets merged:
|
||||
// https://github.com/wvanbergen/kafka/pull/84
|
||||
k.Lock()
|
||||
k.Cluster.MarkOffset(msg, "")
|
||||
k.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (k *Kafka) markOffset(msg *sarama.ConsumerMessage) {
|
||||
if !k.doNotCommitMsgs {
|
||||
k.cluster.MarkOffset(msg, "")
|
||||
}
|
||||
}
|
||||
|
||||
func (k *Kafka) onMessage(acc telegraf.TrackingAccumulator, msg *sarama.ConsumerMessage) error {
|
||||
if k.MaxMessageLen != 0 && len(msg.Value) > k.MaxMessageLen {
|
||||
k.markOffset(msg)
|
||||
return fmt.Errorf("Message longer than max_message_len (%d > %d)",
|
||||
len(msg.Value), k.MaxMessageLen)
|
||||
}
|
||||
|
||||
metrics, err := k.parser.Parse(msg.Value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
id := acc.AddTrackingMetricGroup(metrics)
|
||||
k.messages[id] = msg
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (k *Kafka) onDelivery(track telegraf.DeliveryInfo) {
|
||||
msg, ok := k.messages[track.ID()]
|
||||
if !ok {
|
||||
log.Printf("E! [inputs.kafka_consumer] Could not mark message delivered: %d", track.ID())
|
||||
}
|
||||
|
||||
if track.Delivered() {
|
||||
k.markOffset(msg)
|
||||
}
|
||||
delete(k.messages, track.ID())
|
||||
}
|
||||
|
||||
func (k *Kafka) Stop() {
|
||||
k.Lock()
|
||||
defer k.Unlock()
|
||||
close(k.done)
|
||||
if err := k.Cluster.Close(); err != nil {
|
||||
k.acc.AddError(fmt.Errorf("Error closing consumer: %s\n", err.Error()))
|
||||
k.cancel()
|
||||
k.wg.Wait()
|
||||
|
||||
if err := k.cluster.Close(); err != nil {
|
||||
log.Printf("E! [inputs.kafka_consumer] Error closing consumer: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -243,6 +290,8 @@ func (k *Kafka) Gather(acc telegraf.Accumulator) error {
|
|||
|
||||
func init() {
|
||||
inputs.Add("kafka_consumer", func() telegraf.Input {
|
||||
return &Kafka{}
|
||||
return &Kafka{
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -38,7 +38,6 @@ func TestReadsMetricsFromKafka(t *testing.T) {
|
|||
ConsumerGroup: "telegraf_test_consumers",
|
||||
Topics: []string{testTopic},
|
||||
Brokers: brokerPeers,
|
||||
PointBuffer: 100000,
|
||||
Offset: "oldest",
|
||||
}
|
||||
p, _ := parsers.NewInfluxParser()
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
package kafka_consumer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/Shopify/sarama"
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
|
||||
"github.com/Shopify/sarama"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
|
@ -18,31 +19,57 @@ const (
|
|||
invalidMsg = "cpu_load_short,host=server01 1422568543702900257\n"
|
||||
)
|
||||
|
||||
func newTestKafka() (*Kafka, chan *sarama.ConsumerMessage) {
|
||||
in := make(chan *sarama.ConsumerMessage, 1000)
|
||||
k := Kafka{
|
||||
ConsumerGroup: "test",
|
||||
Topics: []string{"telegraf"},
|
||||
Brokers: []string{"localhost:9092"},
|
||||
Offset: "oldest",
|
||||
in: in,
|
||||
doNotCommitMsgs: true,
|
||||
errs: make(chan error, 1000),
|
||||
done: make(chan struct{}),
|
||||
type TestConsumer struct {
|
||||
errors chan error
|
||||
messages chan *sarama.ConsumerMessage
|
||||
}
|
||||
|
||||
func (c *TestConsumer) Errors() <-chan error {
|
||||
return c.errors
|
||||
}
|
||||
|
||||
func (c *TestConsumer) Messages() <-chan *sarama.ConsumerMessage {
|
||||
return c.messages
|
||||
}
|
||||
|
||||
func (c *TestConsumer) MarkOffset(msg *sarama.ConsumerMessage, metadata string) {
|
||||
}
|
||||
|
||||
func (c *TestConsumer) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *TestConsumer) Inject(msg *sarama.ConsumerMessage) {
|
||||
c.messages <- msg
|
||||
}
|
||||
|
||||
func newTestKafka() (*Kafka, *TestConsumer) {
|
||||
consumer := &TestConsumer{
|
||||
errors: make(chan error),
|
||||
messages: make(chan *sarama.ConsumerMessage, 1000),
|
||||
}
|
||||
return &k, in
|
||||
k := Kafka{
|
||||
cluster: consumer,
|
||||
ConsumerGroup: "test",
|
||||
Topics: []string{"telegraf"},
|
||||
Brokers: []string{"localhost:9092"},
|
||||
Offset: "oldest",
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
doNotCommitMsgs: true,
|
||||
messages: make(map[telegraf.TrackingID]*sarama.ConsumerMessage),
|
||||
}
|
||||
return &k, consumer
|
||||
}
|
||||
|
||||
// Test that the parser parses kafka messages into points
|
||||
func TestRunParser(t *testing.T) {
|
||||
k, in := newTestKafka()
|
||||
k, consumer := newTestKafka()
|
||||
acc := testutil.Accumulator{}
|
||||
k.acc = &acc
|
||||
defer close(k.done)
|
||||
ctx := context.Background()
|
||||
|
||||
k.parser, _ = parsers.NewInfluxParser()
|
||||
go k.receiver()
|
||||
in <- saramaMsg(testMsg)
|
||||
go k.receiver(ctx, &acc)
|
||||
consumer.Inject(saramaMsg(testMsg))
|
||||
acc.Wait(1)
|
||||
|
||||
assert.Equal(t, acc.NFields(), 1)
|
||||
|
@ -50,14 +77,13 @@ func TestRunParser(t *testing.T) {
|
|||
|
||||
// Test that the parser ignores invalid messages
|
||||
func TestRunParserInvalidMsg(t *testing.T) {
|
||||
k, in := newTestKafka()
|
||||
k, consumer := newTestKafka()
|
||||
acc := testutil.Accumulator{}
|
||||
k.acc = &acc
|
||||
defer close(k.done)
|
||||
ctx := context.Background()
|
||||
|
||||
k.parser, _ = parsers.NewInfluxParser()
|
||||
go k.receiver()
|
||||
in <- saramaMsg(invalidMsg)
|
||||
go k.receiver(ctx, &acc)
|
||||
consumer.Inject(saramaMsg(invalidMsg))
|
||||
acc.WaitError(1)
|
||||
|
||||
assert.Equal(t, acc.NFields(), 0)
|
||||
|
@ -66,15 +92,14 @@ func TestRunParserInvalidMsg(t *testing.T) {
|
|||
// Test that overlong messages are dropped
|
||||
func TestDropOverlongMsg(t *testing.T) {
|
||||
const maxMessageLen = 64 * 1024
|
||||
k, in := newTestKafka()
|
||||
k, consumer := newTestKafka()
|
||||
k.MaxMessageLen = maxMessageLen
|
||||
acc := testutil.Accumulator{}
|
||||
k.acc = &acc
|
||||
defer close(k.done)
|
||||
ctx := context.Background()
|
||||
overlongMsg := strings.Repeat("v", maxMessageLen+1)
|
||||
|
||||
go k.receiver()
|
||||
in <- saramaMsg(overlongMsg)
|
||||
go k.receiver(ctx, &acc)
|
||||
consumer.Inject(saramaMsg(overlongMsg))
|
||||
acc.WaitError(1)
|
||||
|
||||
assert.Equal(t, acc.NFields(), 0)
|
||||
|
@ -82,14 +107,13 @@ func TestDropOverlongMsg(t *testing.T) {
|
|||
|
||||
// Test that the parser parses kafka messages into points
|
||||
func TestRunParserAndGather(t *testing.T) {
|
||||
k, in := newTestKafka()
|
||||
k, consumer := newTestKafka()
|
||||
acc := testutil.Accumulator{}
|
||||
k.acc = &acc
|
||||
defer close(k.done)
|
||||
ctx := context.Background()
|
||||
|
||||
k.parser, _ = parsers.NewInfluxParser()
|
||||
go k.receiver()
|
||||
in <- saramaMsg(testMsg)
|
||||
go k.receiver(ctx, &acc)
|
||||
consumer.Inject(saramaMsg(testMsg))
|
||||
acc.Wait(1)
|
||||
|
||||
acc.GatherError(k.Gather)
|
||||
|
@ -101,14 +125,13 @@ func TestRunParserAndGather(t *testing.T) {
|
|||
|
||||
// Test that the parser parses kafka messages into points
|
||||
func TestRunParserAndGatherGraphite(t *testing.T) {
|
||||
k, in := newTestKafka()
|
||||
k, consumer := newTestKafka()
|
||||
acc := testutil.Accumulator{}
|
||||
k.acc = &acc
|
||||
defer close(k.done)
|
||||
ctx := context.Background()
|
||||
|
||||
k.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil)
|
||||
go k.receiver()
|
||||
in <- saramaMsg(testMsgGraphite)
|
||||
go k.receiver(ctx, &acc)
|
||||
consumer.Inject(saramaMsg(testMsgGraphite))
|
||||
acc.Wait(1)
|
||||
|
||||
acc.GatherError(k.Gather)
|
||||
|
@ -120,17 +143,16 @@ func TestRunParserAndGatherGraphite(t *testing.T) {
|
|||
|
||||
// Test that the parser parses kafka messages into points
|
||||
func TestRunParserAndGatherJSON(t *testing.T) {
|
||||
k, in := newTestKafka()
|
||||
k, consumer := newTestKafka()
|
||||
acc := testutil.Accumulator{}
|
||||
k.acc = &acc
|
||||
defer close(k.done)
|
||||
ctx := context.Background()
|
||||
|
||||
k.parser, _ = parsers.NewParser(&parsers.Config{
|
||||
DataFormat: "json",
|
||||
MetricName: "kafka_json_test",
|
||||
})
|
||||
go k.receiver()
|
||||
in <- saramaMsg(testMsgJSON)
|
||||
go k.receiver(ctx, &acc)
|
||||
consumer.Inject(saramaMsg(testMsgJSON))
|
||||
acc.Wait(1)
|
||||
|
||||
acc.GatherError(k.Gather)
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
# MQTT Consumer Input Plugin
|
||||
|
||||
The [MQTT](http://mqtt.org/) consumer plugin reads from
|
||||
specified MQTT topics and adds messages to InfluxDB.
|
||||
The plugin expects messages in the
|
||||
[Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
|
||||
The [MQTT][mqtt] consumer plugin reads from the specified MQTT topics
|
||||
and creates metrics using one of the supported [input data formats][].
|
||||
|
||||
### Configuration:
|
||||
|
||||
```toml
|
||||
# Read metrics from MQTT topic(s)
|
||||
[[inputs.mqtt_consumer]]
|
||||
## MQTT broker URLs to be used. The format should be scheme://host:port,
|
||||
## schema can be tcp, ssl, or ws.
|
||||
|
@ -26,6 +23,16 @@ The plugin expects messages in the
|
|||
## Connection timeout for initial connection in seconds
|
||||
connection_timeout = "30s"
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Topics to subscribe to
|
||||
topics = [
|
||||
"telegraf/host01/cpu",
|
||||
|
@ -62,3 +69,6 @@ The plugin expects messages in the
|
|||
|
||||
- All measurements are tagged with the incoming topic, ie
|
||||
`topic=telegraf/host01/cpu`
|
||||
|
||||
[mqtt]: https://mqtt.org
|
||||
[input data formats]: /docs/DATA_FORMATS_INPUT.md
|
||||
|
|
|
@ -1,25 +1,31 @@
|
|||
package mqtt_consumer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/eclipse/paho.mqtt.golang"
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/internal/tls"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
|
||||
"github.com/eclipse/paho.mqtt.golang"
|
||||
)
|
||||
|
||||
// 30 Seconds is the default used by paho.mqtt.golang
|
||||
var defaultConnectionTimeout = internal.Duration{Duration: 30 * time.Second}
|
||||
var (
|
||||
// 30 Seconds is the default used by paho.mqtt.golang
|
||||
defaultConnectionTimeout = internal.Duration{Duration: 30 * time.Second}
|
||||
|
||||
defaultMaxUndeliveredMessages = 1000
|
||||
)
|
||||
|
||||
type ConnectionState int
|
||||
type empty struct{}
|
||||
type semaphore chan empty
|
||||
|
||||
const (
|
||||
Disconnected ConnectionState = iota
|
||||
|
@ -28,12 +34,13 @@ const (
|
|||
)
|
||||
|
||||
type MQTTConsumer struct {
|
||||
Servers []string
|
||||
Topics []string
|
||||
Username string
|
||||
Password string
|
||||
QoS int `toml:"qos"`
|
||||
ConnectionTimeout internal.Duration `toml:"connection_timeout"`
|
||||
Servers []string
|
||||
Topics []string
|
||||
Username string
|
||||
Password string
|
||||
QoS int `toml:"qos"`
|
||||
ConnectionTimeout internal.Duration `toml:"connection_timeout"`
|
||||
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||
|
||||
parser parsers.Parser
|
||||
|
||||
|
@ -45,9 +52,14 @@ type MQTTConsumer struct {
|
|||
tls.ClientConfig
|
||||
|
||||
client mqtt.Client
|
||||
acc telegraf.Accumulator
|
||||
acc telegraf.TrackingAccumulator
|
||||
state ConnectionState
|
||||
subscribed bool
|
||||
sem semaphore
|
||||
messages map[telegraf.TrackingID]bool
|
||||
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
var sampleConfig = `
|
||||
|
@ -67,6 +79,16 @@ var sampleConfig = `
|
|||
## Connection timeout for initial connection in seconds
|
||||
connection_timeout = "30s"
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Topics to subscribe to
|
||||
topics = [
|
||||
"telegraf/host01/cpu",
|
||||
|
@ -118,7 +140,6 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
|
|||
return errors.New("persistent_session requires client_id")
|
||||
}
|
||||
|
||||
m.acc = acc
|
||||
if m.QoS > 2 || m.QoS < 0 {
|
||||
return fmt.Errorf("qos value must be 0, 1, or 2: %d", m.QoS)
|
||||
}
|
||||
|
@ -127,6 +148,9 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
|
|||
return fmt.Errorf("connection_timeout must be greater than 1s: %s", m.ConnectionTimeout.Duration)
|
||||
}
|
||||
|
||||
m.acc = acc.WithTracking(m.MaxUndeliveredMessages)
|
||||
m.ctx, m.cancel = context.WithCancel(context.Background())
|
||||
|
||||
opts, err := m.createOpts()
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -146,8 +170,10 @@ func (m *MQTTConsumer) connect() error {
|
|||
return err
|
||||
}
|
||||
|
||||
log.Printf("I! [inputs.mqtt_consumer]: connected %v", m.Servers)
|
||||
log.Printf("I! [inputs.mqtt_consumer] Connected %v", m.Servers)
|
||||
m.state = Connected
|
||||
m.sem = make(semaphore, m.MaxUndeliveredMessages)
|
||||
m.messages = make(map[telegraf.TrackingID]bool)
|
||||
|
||||
// Only subscribe on first connection when using persistent sessions. On
|
||||
// subsequent connections the subscriptions should be stored in the
|
||||
|
@ -172,38 +198,64 @@ func (m *MQTTConsumer) connect() error {
|
|||
|
||||
func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) {
|
||||
m.acc.AddError(fmt.Errorf("connection lost: %v", err))
|
||||
log.Printf("D! [inputs.mqtt_consumer]: disconnected %v", m.Servers)
|
||||
log.Printf("D! [inputs.mqtt_consumer] Disconnected %v", m.Servers)
|
||||
m.state = Disconnected
|
||||
return
|
||||
}
|
||||
|
||||
func (m *MQTTConsumer) recvMessage(c mqtt.Client, msg mqtt.Message) {
|
||||
topic := msg.Topic()
|
||||
for {
|
||||
select {
|
||||
case track := <-m.acc.Delivered():
|
||||
_, ok := m.messages[track.ID()]
|
||||
if !ok {
|
||||
// Added by a previous connection
|
||||
continue
|
||||
}
|
||||
<-m.sem
|
||||
// No ack, MQTT does not support durable handling
|
||||
delete(m.messages, track.ID())
|
||||
case m.sem <- empty{}:
|
||||
err := m.onMessage(m.acc, msg)
|
||||
if err != nil {
|
||||
m.acc.AddError(err)
|
||||
<-m.sem
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MQTTConsumer) onMessage(acc telegraf.TrackingAccumulator, msg mqtt.Message) error {
|
||||
metrics, err := m.parser.Parse(msg.Payload())
|
||||
if err != nil {
|
||||
m.acc.AddError(err)
|
||||
return err
|
||||
}
|
||||
|
||||
topic := msg.Topic()
|
||||
for _, metric := range metrics {
|
||||
tags := metric.Tags()
|
||||
tags["topic"] = topic
|
||||
m.acc.AddFields(metric.Name(), metric.Fields(), tags, metric.Time())
|
||||
metric.AddTag("topic", topic)
|
||||
}
|
||||
|
||||
id := acc.AddTrackingMetricGroup(metrics)
|
||||
m.messages[id] = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MQTTConsumer) Stop() {
|
||||
if m.state == Connected {
|
||||
log.Printf("D! [inputs.mqtt_consumer]: disconnecting %v", m.Servers)
|
||||
log.Printf("D! [inputs.mqtt_consumer] Disconnecting %v", m.Servers)
|
||||
m.client.Disconnect(200)
|
||||
log.Printf("D! [inputs.mqtt_consumer]: disconnected %v", m.Servers)
|
||||
log.Printf("D! [inputs.mqtt_consumer] Disconnected %v", m.Servers)
|
||||
m.state = Disconnected
|
||||
}
|
||||
m.cancel()
|
||||
}
|
||||
|
||||
func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error {
|
||||
if m.state == Disconnected {
|
||||
m.state = Connecting
|
||||
log.Printf("D! [inputs.mqtt_consumer]: connecting %v", m.Servers)
|
||||
log.Printf("D! [inputs.mqtt_consumer] Connecting %v", m.Servers)
|
||||
m.connect()
|
||||
}
|
||||
|
||||
|
@ -246,7 +298,7 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
|
|||
for _, server := range m.Servers {
|
||||
// Preserve support for host:port style servers; deprecated in Telegraf 1.4.4
|
||||
if !strings.Contains(server, "://") {
|
||||
log.Printf("W! [inputs.mqtt_consumer] server %q should be updated to use `scheme://host:port` format", server)
|
||||
log.Printf("W! [inputs.mqtt_consumer] Server %q should be updated to use `scheme://host:port` format", server)
|
||||
if tlsCfg == nil {
|
||||
server = "tcp://" + server
|
||||
} else {
|
||||
|
@ -267,8 +319,9 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
|
|||
func init() {
|
||||
inputs.Add("mqtt_consumer", func() telegraf.Input {
|
||||
return &MQTTConsumer{
|
||||
ConnectionTimeout: defaultConnectionTimeout,
|
||||
state: Disconnected,
|
||||
ConnectionTimeout: defaultConnectionTimeout,
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
state: Disconnected,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -3,12 +3,9 @@ package mqtt_consumer
|
|||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/eclipse/paho.mqtt.golang"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -71,47 +68,6 @@ func TestPersistentClientIDFail(t *testing.T) {
|
|||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestRunParser(t *testing.T) {
|
||||
n := newTestMQTTConsumer()
|
||||
acc := testutil.Accumulator{}
|
||||
n.acc = &acc
|
||||
n.parser, _ = parsers.NewInfluxParser()
|
||||
|
||||
n.recvMessage(nil, mqttMsg(testMsg))
|
||||
|
||||
if a := acc.NFields(); a != 1 {
|
||||
t.Errorf("got %v, expected %v", a, 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Test that the parser ignores invalid messages
|
||||
func TestRunParserInvalidMsg(t *testing.T) {
|
||||
n := newTestMQTTConsumer()
|
||||
acc := testutil.Accumulator{}
|
||||
n.acc = &acc
|
||||
n.parser, _ = parsers.NewInfluxParser()
|
||||
|
||||
n.recvMessage(nil, mqttMsg(invalidMsg))
|
||||
|
||||
if a := acc.NFields(); a != 0 {
|
||||
t.Errorf("got %v, expected %v", a, 0)
|
||||
}
|
||||
assert.Len(t, acc.Errors, 1)
|
||||
}
|
||||
|
||||
// Test that the parser parses line format messages into metrics
|
||||
func TestRunParserAndGather(t *testing.T) {
|
||||
n := newTestMQTTConsumer()
|
||||
acc := testutil.Accumulator{}
|
||||
n.acc = &acc
|
||||
n.parser, _ = parsers.NewInfluxParser()
|
||||
|
||||
n.recvMessage(nil, mqttMsg(testMsg))
|
||||
|
||||
acc.AssertContainsFields(t, "cpu_load_short",
|
||||
map[string]interface{}{"value": float64(23422)})
|
||||
}
|
||||
|
||||
func mqttMsg(val string) mqtt.Message {
|
||||
return &message{
|
||||
topic: "telegraf/unit_test",
|
||||
|
|
|
@ -1,16 +1,14 @@
|
|||
# NATS Consumer Input Plugin
|
||||
|
||||
The [NATS](http://www.nats.io/about/) consumer plugin reads from
|
||||
specified NATS subjects and adds messages to InfluxDB. The plugin expects messages
|
||||
in the [Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
|
||||
A [Queue Group](http://www.nats.io/documentation/concepts/nats-queueing/)
|
||||
is used when subscribing to subjects so multiple instances of telegraf can read
|
||||
from a NATS cluster in parallel.
|
||||
The [NATS][nats] consumer plugin reads from the specified NATS subjects and
|
||||
creates metrics using one of the supported [input data formats][].
|
||||
|
||||
## Configuration
|
||||
A [Queue Group][queue group] is used when subscribing to subjects so multiple
|
||||
instances of telegraf can read from a NATS cluster in parallel.
|
||||
|
||||
### Configuration:
|
||||
|
||||
```toml
|
||||
# Read metrics from NATS subject(s)
|
||||
[[inputs.nats_consumer]]
|
||||
## urls of NATS servers
|
||||
servers = ["nats://localhost:4222"]
|
||||
|
@ -20,13 +18,29 @@ from a NATS cluster in parallel.
|
|||
subjects = ["telegraf"]
|
||||
## name a queue group
|
||||
queue_group = "telegraf_consumers"
|
||||
## Maximum number of metrics to buffer between collection intervals
|
||||
metric_buffer = 100000
|
||||
|
||||
## Sets the limits for pending msgs and bytes for each subscription
|
||||
## These shouldn't need to be adjusted except in very high throughput scenarios
|
||||
# pending_message_limit = 65536
|
||||
# pending_bytes_limit = 67108864
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Data format to consume.
|
||||
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||
data_format = "influx"
|
||||
```
|
||||
|
||||
[nats]: https://www.nats.io/about/
|
||||
[input data formats]: /docs/DATA_FORMATS_INPUT.md
|
||||
[queue group]: https://www.nats.io/documentation/concepts/nats-queueing/
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package natsconsumer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
|
@ -11,6 +12,13 @@ import (
|
|||
nats "github.com/nats-io/go-nats"
|
||||
)
|
||||
|
||||
var (
|
||||
defaultMaxUndeliveredMessages = 1000
|
||||
)
|
||||
|
||||
type empty struct{}
|
||||
type semaphore chan empty
|
||||
|
||||
type natsError struct {
|
||||
conn *nats.Conn
|
||||
sub *nats.Subscription
|
||||
|
@ -23,48 +31,58 @@ func (e natsError) Error() string {
|
|||
}
|
||||
|
||||
type natsConsumer struct {
|
||||
QueueGroup string
|
||||
Subjects []string
|
||||
Servers []string
|
||||
Secure bool
|
||||
QueueGroup string `toml:"queue_group"`
|
||||
Subjects []string `toml:"subjects"`
|
||||
Servers []string `toml:"servers"`
|
||||
Secure bool `toml:"secure"`
|
||||
|
||||
// Client pending limits:
|
||||
PendingMessageLimit int
|
||||
PendingBytesLimit int
|
||||
PendingMessageLimit int `toml:"pending_message_limit"`
|
||||
PendingBytesLimit int `toml:"pending_bytes_limit"`
|
||||
|
||||
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||
|
||||
// Legacy metric buffer support; deprecated in v0.10.3
|
||||
MetricBuffer int
|
||||
|
||||
conn *nats.Conn
|
||||
subs []*nats.Subscription
|
||||
|
||||
parser parsers.Parser
|
||||
|
||||
sync.Mutex
|
||||
wg sync.WaitGroup
|
||||
Conn *nats.Conn
|
||||
Subs []*nats.Subscription
|
||||
|
||||
// channel for all incoming NATS messages
|
||||
in chan *nats.Msg
|
||||
// channel for all NATS read errors
|
||||
errs chan error
|
||||
done chan struct{}
|
||||
acc telegraf.Accumulator
|
||||
errs chan error
|
||||
acc telegraf.TrackingAccumulator
|
||||
wg sync.WaitGroup
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
var sampleConfig = `
|
||||
## urls of NATS servers
|
||||
# servers = ["nats://localhost:4222"]
|
||||
servers = ["nats://localhost:4222"]
|
||||
## Use Transport Layer Security
|
||||
# secure = false
|
||||
secure = false
|
||||
## subject(s) to consume
|
||||
# subjects = ["telegraf"]
|
||||
subjects = ["telegraf"]
|
||||
## name a queue group
|
||||
# queue_group = "telegraf_consumers"
|
||||
queue_group = "telegraf_consumers"
|
||||
|
||||
## Sets the limits for pending msgs and bytes for each subscription
|
||||
## These shouldn't need to be adjusted except in very high throughput scenarios
|
||||
# pending_message_limit = 65536
|
||||
# pending_bytes_limit = 67108864
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
|
@ -94,10 +112,7 @@ func (n *natsConsumer) natsErrHandler(c *nats.Conn, s *nats.Subscription, e erro
|
|||
|
||||
// Start the nats consumer. Caller must call *natsConsumer.Stop() to clean up.
|
||||
func (n *natsConsumer) Start(acc telegraf.Accumulator) error {
|
||||
n.Lock()
|
||||
defer n.Unlock()
|
||||
|
||||
n.acc = acc
|
||||
n.acc = acc.WithTracking(n.MaxUndeliveredMessages)
|
||||
|
||||
var connectErr error
|
||||
|
||||
|
@ -112,89 +127,106 @@ func (n *natsConsumer) Start(acc telegraf.Accumulator) error {
|
|||
|
||||
opts.Secure = n.Secure
|
||||
|
||||
if n.Conn == nil || n.Conn.IsClosed() {
|
||||
n.Conn, connectErr = opts.Connect()
|
||||
if n.conn == nil || n.conn.IsClosed() {
|
||||
n.conn, connectErr = opts.Connect()
|
||||
if connectErr != nil {
|
||||
return connectErr
|
||||
}
|
||||
|
||||
// Setup message and error channels
|
||||
n.errs = make(chan error)
|
||||
n.Conn.SetErrorHandler(n.natsErrHandler)
|
||||
n.conn.SetErrorHandler(n.natsErrHandler)
|
||||
|
||||
n.in = make(chan *nats.Msg, 1000)
|
||||
for _, subj := range n.Subjects {
|
||||
sub, err := n.Conn.QueueSubscribe(subj, n.QueueGroup, func(m *nats.Msg) {
|
||||
sub, err := n.conn.QueueSubscribe(subj, n.QueueGroup, func(m *nats.Msg) {
|
||||
n.in <- m
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// ensure that the subscription has been processed by the server
|
||||
if err = n.Conn.Flush(); err != nil {
|
||||
if err = n.conn.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
// set the subscription pending limits
|
||||
if err = sub.SetPendingLimits(n.PendingMessageLimit, n.PendingBytesLimit); err != nil {
|
||||
return err
|
||||
}
|
||||
n.Subs = append(n.Subs, sub)
|
||||
n.subs = append(n.subs, sub)
|
||||
}
|
||||
}
|
||||
|
||||
n.done = make(chan struct{})
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
n.cancel = cancel
|
||||
|
||||
// Start the message reader
|
||||
n.wg.Add(1)
|
||||
go n.receiver()
|
||||
go func() {
|
||||
defer n.wg.Done()
|
||||
go n.receiver(ctx)
|
||||
}()
|
||||
|
||||
log.Printf("I! Started the NATS consumer service, nats: %v, subjects: %v, queue: %v\n",
|
||||
n.Conn.ConnectedUrl(), n.Subjects, n.QueueGroup)
|
||||
n.conn.ConnectedUrl(), n.Subjects, n.QueueGroup)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// receiver() reads all incoming messages from NATS, and parses them into
|
||||
// telegraf metrics.
|
||||
func (n *natsConsumer) receiver() {
|
||||
defer n.wg.Done()
|
||||
func (n *natsConsumer) receiver(ctx context.Context) {
|
||||
sem := make(semaphore, n.MaxUndeliveredMessages)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-n.done:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-n.acc.Delivered():
|
||||
<-sem
|
||||
case err := <-n.errs:
|
||||
n.acc.AddError(fmt.Errorf("E! error reading from %s\n", err.Error()))
|
||||
case msg := <-n.in:
|
||||
metrics, err := n.parser.Parse(msg.Data)
|
||||
if err != nil {
|
||||
n.acc.AddError(fmt.Errorf("E! subject: %s, error: %s", msg.Subject, err.Error()))
|
||||
}
|
||||
n.acc.AddError(err)
|
||||
case sem <- empty{}:
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case err := <-n.errs:
|
||||
<-sem
|
||||
n.acc.AddError(err)
|
||||
case <-n.acc.Delivered():
|
||||
<-sem
|
||||
<-sem
|
||||
case msg := <-n.in:
|
||||
metrics, err := n.parser.Parse(msg.Data)
|
||||
if err != nil {
|
||||
n.acc.AddError(fmt.Errorf("subject: %s, error: %s", msg.Subject, err.Error()))
|
||||
<-sem
|
||||
continue
|
||||
}
|
||||
|
||||
for _, metric := range metrics {
|
||||
n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
|
||||
n.acc.AddTrackingMetricGroup(metrics)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *natsConsumer) clean() {
|
||||
for _, sub := range n.Subs {
|
||||
for _, sub := range n.subs {
|
||||
if err := sub.Unsubscribe(); err != nil {
|
||||
n.acc.AddError(fmt.Errorf("E! Error unsubscribing from subject %s in queue %s: %s\n",
|
||||
n.acc.AddError(fmt.Errorf("Error unsubscribing from subject %s in queue %s: %s\n",
|
||||
sub.Subject, sub.Queue, err.Error()))
|
||||
}
|
||||
}
|
||||
|
||||
if n.Conn != nil && !n.Conn.IsClosed() {
|
||||
n.Conn.Close()
|
||||
if n.conn != nil && !n.conn.IsClosed() {
|
||||
n.conn.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func (n *natsConsumer) Stop() {
|
||||
n.Lock()
|
||||
close(n.done)
|
||||
n.cancel()
|
||||
n.wg.Wait()
|
||||
n.clean()
|
||||
n.Unlock()
|
||||
}
|
||||
|
||||
func (n *natsConsumer) Gather(acc telegraf.Accumulator) error {
|
||||
|
@ -204,12 +236,13 @@ func (n *natsConsumer) Gather(acc telegraf.Accumulator) error {
|
|||
func init() {
|
||||
inputs.Add("nats_consumer", func() telegraf.Input {
|
||||
return &natsConsumer{
|
||||
Servers: []string{"nats://localhost:4222"},
|
||||
Secure: false,
|
||||
Subjects: []string{"telegraf"},
|
||||
QueueGroup: "telegraf_consumers",
|
||||
PendingBytesLimit: nats.DefaultSubPendingBytesLimit,
|
||||
PendingMessageLimit: nats.DefaultSubPendingMsgsLimit,
|
||||
Servers: []string{"nats://localhost:4222"},
|
||||
Secure: false,
|
||||
Subjects: []string{"telegraf"},
|
||||
QueueGroup: "telegraf_consumers",
|
||||
PendingBytesLimit: nats.DefaultSubPendingBytesLimit,
|
||||
PendingMessageLimit: nats.DefaultSubPendingMsgsLimit,
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -1,134 +0,0 @@
|
|||
package natsconsumer
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
nats "github.com/nats-io/go-nats"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
const (
|
||||
testMsg = "cpu_load_short,host=server01 value=23422.0 1422568543702900257\n"
|
||||
testMsgGraphite = "cpu.load.short.graphite 23422 1454780029"
|
||||
testMsgJSON = "{\"a\": 5, \"b\": {\"c\": 6}}\n"
|
||||
invalidMsg = "cpu_load_short,host=server01 1422568543702900257\n"
|
||||
metricBuffer = 5
|
||||
)
|
||||
|
||||
func newTestNatsConsumer() (*natsConsumer, chan *nats.Msg) {
|
||||
in := make(chan *nats.Msg, metricBuffer)
|
||||
n := &natsConsumer{
|
||||
QueueGroup: "test",
|
||||
Subjects: []string{"telegraf"},
|
||||
Servers: []string{"nats://localhost:4222"},
|
||||
Secure: false,
|
||||
in: in,
|
||||
errs: make(chan error, metricBuffer),
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
return n, in
|
||||
}
|
||||
|
||||
// Test that the parser parses NATS messages into metrics
|
||||
func TestRunParser(t *testing.T) {
|
||||
n, in := newTestNatsConsumer()
|
||||
acc := testutil.Accumulator{}
|
||||
n.acc = &acc
|
||||
defer close(n.done)
|
||||
|
||||
n.parser, _ = parsers.NewInfluxParser()
|
||||
n.wg.Add(1)
|
||||
go n.receiver()
|
||||
in <- natsMsg(testMsg)
|
||||
|
||||
acc.Wait(1)
|
||||
}
|
||||
|
||||
// Test that the parser ignores invalid messages
|
||||
func TestRunParserInvalidMsg(t *testing.T) {
|
||||
n, in := newTestNatsConsumer()
|
||||
acc := testutil.Accumulator{}
|
||||
n.acc = &acc
|
||||
defer close(n.done)
|
||||
|
||||
n.parser, _ = parsers.NewInfluxParser()
|
||||
n.wg.Add(1)
|
||||
go n.receiver()
|
||||
in <- natsMsg(invalidMsg)
|
||||
|
||||
acc.WaitError(1)
|
||||
assert.Contains(t, acc.Errors[0].Error(), "E! subject: telegraf, error: metric parse error")
|
||||
assert.EqualValues(t, 0, acc.NMetrics())
|
||||
}
|
||||
|
||||
// Test that the parser parses line format messages into metrics
|
||||
func TestRunParserAndGather(t *testing.T) {
|
||||
n, in := newTestNatsConsumer()
|
||||
acc := testutil.Accumulator{}
|
||||
n.acc = &acc
|
||||
defer close(n.done)
|
||||
|
||||
n.parser, _ = parsers.NewInfluxParser()
|
||||
n.wg.Add(1)
|
||||
go n.receiver()
|
||||
in <- natsMsg(testMsg)
|
||||
|
||||
n.Gather(&acc)
|
||||
|
||||
acc.Wait(1)
|
||||
acc.AssertContainsFields(t, "cpu_load_short",
|
||||
map[string]interface{}{"value": float64(23422)})
|
||||
}
|
||||
|
||||
// Test that the parser parses graphite format messages into metrics
|
||||
func TestRunParserAndGatherGraphite(t *testing.T) {
|
||||
n, in := newTestNatsConsumer()
|
||||
acc := testutil.Accumulator{}
|
||||
n.acc = &acc
|
||||
defer close(n.done)
|
||||
|
||||
n.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil)
|
||||
n.wg.Add(1)
|
||||
go n.receiver()
|
||||
in <- natsMsg(testMsgGraphite)
|
||||
|
||||
n.Gather(&acc)
|
||||
|
||||
acc.Wait(1)
|
||||
acc.AssertContainsFields(t, "cpu_load_short_graphite",
|
||||
map[string]interface{}{"value": float64(23422)})
|
||||
}
|
||||
|
||||
// Test that the parser parses json format messages into metrics
|
||||
func TestRunParserAndGatherJSON(t *testing.T) {
|
||||
n, in := newTestNatsConsumer()
|
||||
acc := testutil.Accumulator{}
|
||||
n.acc = &acc
|
||||
defer close(n.done)
|
||||
|
||||
n.parser, _ = parsers.NewParser(&parsers.Config{
|
||||
DataFormat: "json",
|
||||
MetricName: "nats_json_test",
|
||||
})
|
||||
n.wg.Add(1)
|
||||
go n.receiver()
|
||||
in <- natsMsg(testMsgJSON)
|
||||
|
||||
n.Gather(&acc)
|
||||
|
||||
acc.Wait(1)
|
||||
acc.AssertContainsFields(t, "nats_json_test",
|
||||
map[string]interface{}{
|
||||
"a": float64(5),
|
||||
"b_c": float64(6),
|
||||
})
|
||||
}
|
||||
|
||||
func natsMsg(val string) *nats.Msg {
|
||||
return &nats.Msg{
|
||||
Subject: "telegraf",
|
||||
Data: []byte(val),
|
||||
}
|
||||
}
|
|
@ -1,9 +1,9 @@
|
|||
# NSQ Consumer Input Plugin
|
||||
|
||||
The [NSQ](http://nsq.io/) consumer plugin polls a specified NSQD
|
||||
topic and adds messages to InfluxDB. This plugin allows a message to be in any of the supported `data_format` types.
|
||||
The [NSQ][nsq] consumer plugin reads from NSQD and creates metrics using one
|
||||
of the supported [input data formats][].
|
||||
|
||||
## Configuration
|
||||
### Configuration:
|
||||
|
||||
```toml
|
||||
# Read metrics from NSQD topic(s)
|
||||
|
@ -18,6 +18,16 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
|
|||
channel = "consumer"
|
||||
max_in_flight = 100
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
|
@ -25,5 +35,5 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
|
|||
data_format = "influx"
|
||||
```
|
||||
|
||||
## Testing
|
||||
The `nsq_consumer_test` mocks out the interaction with `NSQD`. It requires no outside dependencies.
|
||||
[nsq]: https://nsq.io
|
||||
[input data formats]: /docs/DATA_FORMATS_INPUT.md
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
package nsq_consumer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"context"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
|
@ -9,17 +11,38 @@ import (
|
|||
nsq "github.com/nsqio/go-nsq"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultMaxUndeliveredMessages = 1000
|
||||
)
|
||||
|
||||
type empty struct{}
|
||||
type semaphore chan empty
|
||||
|
||||
type logger struct{}
|
||||
|
||||
func (l *logger) Output(calldepth int, s string) error {
|
||||
log.Println("D! [inputs.nsq_consumer] " + s)
|
||||
return nil
|
||||
}
|
||||
|
||||
//NSQConsumer represents the configuration of the plugin
|
||||
type NSQConsumer struct {
|
||||
Server string
|
||||
Nsqd []string
|
||||
Nsqlookupd []string
|
||||
Topic string
|
||||
Channel string
|
||||
MaxInFlight int
|
||||
parser parsers.Parser
|
||||
consumer *nsq.Consumer
|
||||
acc telegraf.Accumulator
|
||||
Server string `toml:"server"`
|
||||
Nsqd []string `toml:"nsqd"`
|
||||
Nsqlookupd []string `toml:"nsqlookupd"`
|
||||
Topic string `toml:"topic"`
|
||||
Channel string `toml:"channel"`
|
||||
MaxInFlight int `toml:"max_in_flight"`
|
||||
|
||||
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||
|
||||
parser parsers.Parser
|
||||
consumer *nsq.Consumer
|
||||
|
||||
mu sync.Mutex
|
||||
messages map[telegraf.TrackingID]*nsq.Message
|
||||
wg sync.WaitGroup
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
var sampleConfig = `
|
||||
|
@ -33,6 +56,16 @@ var sampleConfig = `
|
|||
channel = "consumer"
|
||||
max_in_flight = 100
|
||||
|
||||
## Maximum messages to read from the broker that have not been written by an
|
||||
## output. For best throughput set based on the number of metrics within
|
||||
## each message and the size of the output's metric_batch_size.
|
||||
##
|
||||
## For example, if each message from the queue contains 10 metrics and the
|
||||
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||
## full batch is collected and the write is triggered immediately without
|
||||
## waiting until the next flush_interval.
|
||||
# max_undelivered_messages = 1000
|
||||
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
|
@ -40,12 +73,6 @@ var sampleConfig = `
|
|||
data_format = "influx"
|
||||
`
|
||||
|
||||
func init() {
|
||||
inputs.Add("nsq_consumer", func() telegraf.Input {
|
||||
return &NSQConsumer{}
|
||||
})
|
||||
}
|
||||
|
||||
// SetParser takes the data_format from the config and finds the right parser for that format
|
||||
func (n *NSQConsumer) SetParser(parser parsers.Parser) {
|
||||
n.parser = parser
|
||||
|
@ -62,32 +89,88 @@ func (n *NSQConsumer) Description() string {
|
|||
}
|
||||
|
||||
// Start pulls data from nsq
|
||||
func (n *NSQConsumer) Start(acc telegraf.Accumulator) error {
|
||||
n.acc = acc
|
||||
func (n *NSQConsumer) Start(ac telegraf.Accumulator) error {
|
||||
acc := ac.WithTracking(n.MaxUndeliveredMessages)
|
||||
sem := make(semaphore, n.MaxUndeliveredMessages)
|
||||
n.messages = make(map[telegraf.TrackingID]*nsq.Message, n.MaxUndeliveredMessages)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
n.cancel = cancel
|
||||
|
||||
n.connect()
|
||||
n.consumer.AddConcurrentHandlers(nsq.HandlerFunc(func(message *nsq.Message) error {
|
||||
n.consumer.SetLogger(&logger{}, nsq.LogLevelInfo)
|
||||
n.consumer.AddHandler(nsq.HandlerFunc(func(message *nsq.Message) error {
|
||||
metrics, err := n.parser.Parse(message.Body)
|
||||
if err != nil {
|
||||
acc.AddError(fmt.Errorf("E! NSQConsumer Parse Error\nmessage:%s\nerror:%s", string(message.Body), err.Error()))
|
||||
acc.AddError(err)
|
||||
// Remove the message from the queue
|
||||
message.Finish()
|
||||
return nil
|
||||
}
|
||||
for _, metric := range metrics {
|
||||
n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
|
||||
if len(metrics) == 0 {
|
||||
message.Finish()
|
||||
return nil
|
||||
}
|
||||
message.Finish()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case sem <- empty{}:
|
||||
break
|
||||
}
|
||||
|
||||
n.mu.Lock()
|
||||
id := acc.AddTrackingMetricGroup(metrics)
|
||||
n.messages[id] = message
|
||||
n.mu.Unlock()
|
||||
message.DisableAutoResponse()
|
||||
return nil
|
||||
}), n.MaxInFlight)
|
||||
}))
|
||||
|
||||
if len(n.Nsqlookupd) > 0 {
|
||||
n.consumer.ConnectToNSQLookupds(n.Nsqlookupd)
|
||||
}
|
||||
n.consumer.ConnectToNSQDs(append(n.Nsqd, n.Server))
|
||||
|
||||
n.wg.Add(1)
|
||||
go func() {
|
||||
defer n.wg.Done()
|
||||
n.onDelivery(ctx, acc, sem)
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NSQConsumer) onDelivery(ctx context.Context, acc telegraf.TrackingAccumulator, sem semaphore) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case info := <-acc.Delivered():
|
||||
n.mu.Lock()
|
||||
msg, ok := n.messages[info.ID()]
|
||||
if !ok {
|
||||
n.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
<-sem
|
||||
delete(n.messages, info.ID())
|
||||
n.mu.Unlock()
|
||||
|
||||
if info.Delivered() {
|
||||
msg.Finish()
|
||||
} else {
|
||||
msg.Requeue(-1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stop processing messages
|
||||
func (n *NSQConsumer) Stop() {
|
||||
n.cancel()
|
||||
n.wg.Wait()
|
||||
n.consumer.Stop()
|
||||
<-n.consumer.StopChan
|
||||
}
|
||||
|
||||
// Gather is a noop
|
||||
|
@ -107,3 +190,11 @@ func (n *NSQConsumer) connect() error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("nsq_consumer", func() telegraf.Input {
|
||||
return &NSQConsumer{
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -36,11 +36,12 @@ func TestReadsMetricsFromNSQ(t *testing.T) {
|
|||
newMockNSQD(script, addr.String())
|
||||
|
||||
consumer := &NSQConsumer{
|
||||
Server: "127.0.0.1:4155",
|
||||
Topic: "telegraf",
|
||||
Channel: "consume",
|
||||
MaxInFlight: 1,
|
||||
Nsqd: []string{"127.0.0.1:4155"},
|
||||
Server: "127.0.0.1:4155",
|
||||
Topic: "telegraf",
|
||||
Channel: "consume",
|
||||
MaxInFlight: 1,
|
||||
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||
Nsqd: []string{"127.0.0.1:4155"},
|
||||
}
|
||||
|
||||
p, _ := parsers.NewInfluxParser()
|
||||
|
|
|
@ -2,6 +2,7 @@ package socket_listener
|
|||
|
||||
import (
|
||||
"bufio"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
|
@ -9,11 +10,8 @@ import (
|
|||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"time"
|
||||
|
||||
"crypto/tls"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
tlsint "github.com/influxdata/telegraf/internal/tls"
|
||||
|
@ -120,7 +118,7 @@ func (ssl *streamSocketListener) read(c net.Conn) {
|
|||
continue
|
||||
}
|
||||
for _, m := range metrics {
|
||||
ssl.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
|
||||
ssl.AddMetric(m)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -156,7 +154,7 @@ func (psl *packetSocketListener) listen() {
|
|||
continue
|
||||
}
|
||||
for _, m := range metrics {
|
||||
psl.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
|
||||
psl.AddMetric(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,11 +7,13 @@ import (
|
|||
|
||||
type Discard struct{}
|
||||
|
||||
func (d *Discard) Connect() error { return nil }
|
||||
func (d *Discard) Close() error { return nil }
|
||||
func (d *Discard) SampleConfig() string { return "" }
|
||||
func (d *Discard) Description() string { return "Send metrics to nowhere at all" }
|
||||
func (d *Discard) Write(metrics []telegraf.Metric) error { return nil }
|
||||
func (d *Discard) Connect() error { return nil }
|
||||
func (d *Discard) Close() error { return nil }
|
||||
func (d *Discard) SampleConfig() string { return "" }
|
||||
func (d *Discard) Description() string { return "Send metrics to nowhere at all" }
|
||||
func (d *Discard) Write(metrics []telegraf.Metric) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
outputs.Add("discard", func() telegraf.Output { return &Discard{} })
|
||||
|
|
|
@ -144,7 +144,7 @@ func (p *PrometheusClient) auth(h http.Handler) http.Handler {
|
|||
})
|
||||
}
|
||||
|
||||
func (p *PrometheusClient) Start() error {
|
||||
func (p *PrometheusClient) Connect() error {
|
||||
defaultCollectors := map[string]bool{
|
||||
"gocollector": true,
|
||||
"process": true,
|
||||
|
@ -200,15 +200,6 @@ func (p *PrometheusClient) Start() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (p *PrometheusClient) Stop() {
|
||||
// plugin gets cleaned up in Close() already.
|
||||
}
|
||||
|
||||
func (p *PrometheusClient) Connect() error {
|
||||
// This service output does not need to make any further connections
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *PrometheusClient) Close() error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
||||
defer cancel()
|
||||
|
|
|
@ -600,7 +600,7 @@ func TestPrometheusWritePointEmptyTag(t *testing.T) {
|
|||
|
||||
pClient, p, err := setupPrometheus()
|
||||
require.NoError(t, err)
|
||||
defer pClient.Stop()
|
||||
defer pClient.Close()
|
||||
|
||||
now := time.Now()
|
||||
tags := make(map[string]string)
|
||||
|
@ -675,7 +675,7 @@ func setupPrometheus() (*PrometheusClient, *prometheus_input.Prometheus, error)
|
|||
pTesting = NewClient()
|
||||
pTesting.Listen = "localhost:9127"
|
||||
pTesting.Path = "/metrics"
|
||||
err := pTesting.Start()
|
||||
err := pTesting.Connect()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/filter"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/influxdata/telegraf/plugins/processors"
|
||||
)
|
||||
|
||||
|
@ -208,6 +209,11 @@ func (t *TopK) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
|||
|
||||
// Add the metrics received to our internal cache
|
||||
for _, m := range in {
|
||||
// When tracking metrics this plugin could deadlock the input by
|
||||
// holding undelivered metrics while the input waits for metrics to be
|
||||
// delivered. Instead, treat all handled metrics as delivered and
|
||||
// produced metrics as untracked in a similar way to aggregators.
|
||||
m.Drop()
|
||||
|
||||
// Check if the metric has any of the fields over which we are aggregating
|
||||
hasField := false
|
||||
|
@ -281,7 +287,6 @@ func (t *TopK) push() []telegraf.Metric {
|
|||
|
||||
// Create a one dimensional list with the top K metrics of each key
|
||||
for i, ag := range aggregations[0:min(t.K, len(aggregations))] {
|
||||
|
||||
// Check whether of not we need to add fields of tags to the selected metrics
|
||||
if len(t.aggFieldSet) != 0 || len(t.rankFieldSet) != 0 || groupTag != "" {
|
||||
for _, m := range t.cache[ag.groupbykey] {
|
||||
|
@ -311,7 +316,16 @@ func (t *TopK) push() []telegraf.Metric {
|
|||
|
||||
t.Reset()
|
||||
|
||||
return ret
|
||||
result := make([]telegraf.Metric, 0, len(ret))
|
||||
for _, m := range ret {
|
||||
copy, err := metric.New(m.Name(), m.Tags(), m.Fields(), m.Time(), m.Type())
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
result = append(result, copy)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Function that generates the aggregation functions
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
package topk
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
// Key, value pair that represents a telegraf.Metric Field
|
||||
|
@ -95,7 +95,7 @@ func deepCopy(a []telegraf.Metric) []telegraf.Metric {
|
|||
|
||||
func belongs(m telegraf.Metric, ms []telegraf.Metric) bool {
|
||||
for _, i := range ms {
|
||||
if reflect.DeepEqual(i, m) {
|
||||
if testutil.MetricEqual(i, m) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,6 +7,6 @@ type Processor interface {
|
|||
// Description returns a one-sentence description on the Input
|
||||
Description() string
|
||||
|
||||
// Apply the filter to the given metric
|
||||
// Apply the filter to the given metric.
|
||||
Apply(in ...Metric) []Metric
|
||||
}
|
||||
|
|
|
@ -14,6 +14,15 @@ import (
|
|||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
var (
|
||||
lastID uint64
|
||||
)
|
||||
|
||||
func newTrackingID() telegraf.TrackingID {
|
||||
atomic.AddUint64(&lastID, 1)
|
||||
return telegraf.TrackingID(lastID)
|
||||
}
|
||||
|
||||
// Metric defines a single point measurement
|
||||
type Metric struct {
|
||||
Measurement string
|
||||
|
@ -23,7 +32,7 @@ type Metric struct {
|
|||
}
|
||||
|
||||
func (p *Metric) String() string {
|
||||
return fmt.Sprintf("%s %v", p.Measurement, p.Fields)
|
||||
return fmt.Sprintf("%s %v %v", p.Measurement, p.Tags, p.Fields)
|
||||
}
|
||||
|
||||
// Accumulator defines a mocked out accumulator
|
||||
|
@ -31,11 +40,12 @@ type Accumulator struct {
|
|||
sync.Mutex
|
||||
*sync.Cond
|
||||
|
||||
Metrics []*Metric
|
||||
nMetrics uint64
|
||||
Discard bool
|
||||
Errors []error
|
||||
debug bool
|
||||
Metrics []*Metric
|
||||
nMetrics uint64
|
||||
Discard bool
|
||||
Errors []error
|
||||
debug bool
|
||||
delivered chan telegraf.DeliveryInfo
|
||||
}
|
||||
|
||||
func (a *Accumulator) NMetrics() uint64 {
|
||||
|
@ -154,6 +164,33 @@ func (a *Accumulator) AddHistogram(
|
|||
a.AddFields(measurement, fields, tags, timestamp...)
|
||||
}
|
||||
|
||||
func (a *Accumulator) AddMetric(m telegraf.Metric) {
|
||||
a.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
|
||||
}
|
||||
|
||||
func (a *Accumulator) WithTracking(maxTracked int) telegraf.TrackingAccumulator {
|
||||
return a
|
||||
}
|
||||
|
||||
func (a *Accumulator) AddTrackingMetric(m telegraf.Metric) telegraf.TrackingID {
|
||||
a.AddMetric(m)
|
||||
return newTrackingID()
|
||||
}
|
||||
|
||||
func (a *Accumulator) AddTrackingMetricGroup(group []telegraf.Metric) telegraf.TrackingID {
|
||||
for _, m := range group {
|
||||
a.AddMetric(m)
|
||||
}
|
||||
return newTrackingID()
|
||||
}
|
||||
|
||||
func (a *Accumulator) Delivered() <-chan telegraf.DeliveryInfo {
|
||||
if a.delivered == nil {
|
||||
a.delivered = make(chan telegraf.DeliveryInfo)
|
||||
}
|
||||
return a.delivered
|
||||
}
|
||||
|
||||
// AddError appends the given error to Accumulator.Errors.
|
||||
func (a *Accumulator) AddError(err error) {
|
||||
if err == nil {
|
||||
|
|
|
@ -41,6 +41,18 @@ func newMetricDiff(metric telegraf.Metric) *metricDiff {
|
|||
return m
|
||||
}
|
||||
|
||||
func MetricEqual(expected, actual telegraf.Metric) bool {
|
||||
var lhs, rhs *metricDiff
|
||||
if expected != nil {
|
||||
lhs = newMetricDiff(expected)
|
||||
}
|
||||
if actual != nil {
|
||||
rhs = newMetricDiff(actual)
|
||||
}
|
||||
|
||||
return cmp.Equal(lhs, rhs)
|
||||
}
|
||||
|
||||
func RequireMetricEqual(t *testing.T, expected, actual telegraf.Metric) {
|
||||
t.Helper()
|
||||
|
||||
|
@ -60,11 +72,11 @@ func RequireMetricEqual(t *testing.T, expected, actual telegraf.Metric) {
|
|||
func RequireMetricsEqual(t *testing.T, expected, actual []telegraf.Metric) {
|
||||
t.Helper()
|
||||
|
||||
lhs := make([]*metricDiff, len(expected))
|
||||
lhs := make([]*metricDiff, 0, len(expected))
|
||||
for _, m := range expected {
|
||||
lhs = append(lhs, newMetricDiff(m))
|
||||
}
|
||||
rhs := make([]*metricDiff, len(actual))
|
||||
rhs := make([]*metricDiff, 0, len(actual))
|
||||
for _, m := range actual {
|
||||
rhs = append(rhs, newMetricDiff(m))
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue