Remove outputs blocking inputs when output is slow (#4938)
This commit is contained in:
parent
74667cd681
commit
6e5c2f8bb6
500
CONTRIBUTING.md
500
CONTRIBUTING.md
|
@ -1,489 +1,52 @@
|
||||||
## Steps for Contributing:
|
### Contributing
|
||||||
|
|
||||||
1. [Sign the CLA](http://influxdb.com/community/cla.html)
|
1. [Sign the CLA][cla].
|
||||||
1. Make changes or write plugin (see below for details)
|
1. Open a [new issue][] to discuss the changes you would like to make. This is
|
||||||
1. Add your plugin to one of: `plugins/{inputs,outputs,aggregators,processors}/all/all.go`
|
not strictly required but it may help reduce the amount of rework you need
|
||||||
1. If your plugin requires a new Go package,
|
to do later.
|
||||||
[add it](https://github.com/influxdata/telegraf/blob/master/CONTRIBUTING.md#adding-a-dependency)
|
1. Make changes or write plugin using the guidelines in the following
|
||||||
1. Write a README for your plugin, if it's an input plugin, it should be structured
|
documents:
|
||||||
like the [input example here](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/EXAMPLE_README.md).
|
- [Input Plugins][inputs]
|
||||||
Output plugins READMEs are less structured,
|
- [Processor Plugins][processors]
|
||||||
but any information you can provide on how the data will look is appreciated.
|
- [Aggregator Plugins][aggregators]
|
||||||
See the [OpenTSDB output](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/opentsdb)
|
- [Output Plugins][outputs]
|
||||||
for a good example.
|
1. Ensure you have added proper unit tests and documentation.
|
||||||
1. **Optional:** Help users of your plugin by including example queries for populating dashboards. Include these sample queries in the `README.md` for the plugin.
|
1. Open a new [pull request][].
|
||||||
1. **Optional:** Write a [tickscript](https://docs.influxdata.com/kapacitor/v1.0/tick/syntax/) for your plugin and add it to [Kapacitor](https://github.com/influxdata/kapacitor/tree/master/examples/telegraf).
|
|
||||||
|
|
||||||
## GoDoc
|
### GoDoc
|
||||||
|
|
||||||
Public interfaces for inputs, outputs, processors, aggregators, metrics,
|
Public interfaces for inputs, outputs, processors, aggregators, metrics,
|
||||||
and the accumulator can be found on the GoDoc
|
and the accumulator can be found in the GoDoc:
|
||||||
|
|
||||||
[![GoDoc](https://godoc.org/github.com/influxdata/telegraf?status.svg)](https://godoc.org/github.com/influxdata/telegraf)
|
[![GoDoc](https://godoc.org/github.com/influxdata/telegraf?status.svg)](https://godoc.org/github.com/influxdata/telegraf)
|
||||||
|
|
||||||
## Sign the CLA
|
### Common development tasks
|
||||||
|
|
||||||
Before we can merge a pull request, you will need to sign the CLA,
|
**Adding a dependency:**
|
||||||
which can be found [on our website](http://influxdb.com/community/cla.html)
|
|
||||||
|
|
||||||
## Adding a dependency
|
|
||||||
|
|
||||||
Assuming you can already build the project, run these in the telegraf directory:
|
Assuming you can already build the project, run these in the telegraf directory:
|
||||||
|
|
||||||
1. `dep ensure -vendor-only`
|
1. `dep ensure -vendor-only`
|
||||||
2. `dep ensure -add github.com/[dependency]/[new-package]`
|
2. `dep ensure -add github.com/[dependency]/[new-package]`
|
||||||
|
|
||||||
## Input Plugins
|
**Unit Tests:**
|
||||||
|
|
||||||
This section is for developers who want to create new collection inputs.
|
|
||||||
Telegraf is entirely plugin driven. This interface allows for operators to
|
|
||||||
pick and chose what is gathered and makes it easy for developers
|
|
||||||
to create new ways of generating metrics.
|
|
||||||
|
|
||||||
Plugin authorship is kept as simple as possible to promote people to develop
|
|
||||||
and submit new inputs.
|
|
||||||
|
|
||||||
### Input Plugin Guidelines
|
|
||||||
|
|
||||||
* A plugin must conform to the [`telegraf.Input`](https://godoc.org/github.com/influxdata/telegraf#Input) interface.
|
|
||||||
* Input Plugins should call `inputs.Add` in their `init` function to register themselves.
|
|
||||||
See below for a quick example.
|
|
||||||
* Input Plugins must be added to the
|
|
||||||
`github.com/influxdata/telegraf/plugins/inputs/all/all.go` file.
|
|
||||||
* The `SampleConfig` function should return valid toml that describes how the
|
|
||||||
plugin can be configured. This is included in `telegraf config`. Please
|
|
||||||
consult the [SampleConfig](https://github.com/influxdata/telegraf/wiki/SampleConfig)
|
|
||||||
page for the latest style guidelines.
|
|
||||||
* The `Description` function should say in one line what this plugin does.
|
|
||||||
|
|
||||||
Let's say you've written a plugin that emits metrics about processes on the
|
|
||||||
current host.
|
|
||||||
|
|
||||||
### Input Plugin Example
|
|
||||||
|
|
||||||
```go
|
|
||||||
package simple
|
|
||||||
|
|
||||||
// simple.go
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/influxdata/telegraf"
|
|
||||||
"github.com/influxdata/telegraf/plugins/inputs"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Simple struct {
|
|
||||||
Ok bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Simple) Description() string {
|
|
||||||
return "a demo plugin"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Simple) SampleConfig() string {
|
|
||||||
return `
|
|
||||||
## Indicate if everything is fine
|
|
||||||
ok = true
|
|
||||||
`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Simple) Gather(acc telegraf.Accumulator) error {
|
|
||||||
if s.Ok {
|
|
||||||
acc.AddFields("state", map[string]interface{}{"value": "pretty good"}, nil)
|
|
||||||
} else {
|
|
||||||
acc.AddFields("state", map[string]interface{}{"value": "not great"}, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
inputs.Add("simple", func() telegraf.Input { return &Simple{} })
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Input Plugin Development
|
|
||||||
|
|
||||||
* Run `make static` followed by `make plugin-[pluginName]` to spin up a docker dev environment
|
|
||||||
using docker-compose.
|
|
||||||
* ***[Optional]*** When developing a plugin, add a `dev` directory with a `docker-compose.yml` and `telegraf.conf`
|
|
||||||
as well as any other supporting files, where sensible.
|
|
||||||
|
|
||||||
## Adding Typed Metrics
|
|
||||||
|
|
||||||
In addition the the `AddFields` function, the accumulator also supports an
|
|
||||||
`AddGauge` and `AddCounter` function. These functions are for adding _typed_
|
|
||||||
metrics. Metric types are ignored for the InfluxDB output, but can be used
|
|
||||||
for other outputs, such as [prometheus](https://prometheus.io/docs/concepts/metric_types/).
|
|
||||||
|
|
||||||
## Input Plugins Accepting Arbitrary Data Formats
|
|
||||||
|
|
||||||
Some input plugins (such as
|
|
||||||
[exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec))
|
|
||||||
accept arbitrary input data formats. An overview of these data formats can
|
|
||||||
be found
|
|
||||||
[here](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
|
|
||||||
|
|
||||||
In order to enable this, you must specify a `SetParser(parser parsers.Parser)`
|
|
||||||
function on the plugin object (see the exec plugin for an example), as well as
|
|
||||||
defining `parser` as a field of the object.
|
|
||||||
|
|
||||||
You can then utilize the parser internally in your plugin, parsing data as you
|
|
||||||
see fit. Telegraf's configuration layer will take care of instantiating and
|
|
||||||
creating the `Parser` object.
|
|
||||||
|
|
||||||
You should also add the following to your SampleConfig() return:
|
|
||||||
|
|
||||||
```toml
|
|
||||||
## Data format to consume.
|
|
||||||
## Each data format has its own unique set of configuration options, read
|
|
||||||
## more about them here:
|
|
||||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
|
||||||
data_format = "influx"
|
|
||||||
```
|
|
||||||
|
|
||||||
Below is the `Parser` interface.
|
|
||||||
|
|
||||||
```go
|
|
||||||
// Parser is an interface defining functions that a parser plugin must satisfy.
|
|
||||||
type Parser interface {
|
|
||||||
// Parse takes a byte buffer separated by newlines
|
|
||||||
// ie, `cpu.usage.idle 90\ncpu.usage.busy 10`
|
|
||||||
// and parses it into telegraf metrics
|
|
||||||
Parse(buf []byte) ([]telegraf.Metric, error)
|
|
||||||
|
|
||||||
// ParseLine takes a single string metric
|
|
||||||
// ie, "cpu.usage.idle 90"
|
|
||||||
// and parses it into a telegraf metric.
|
|
||||||
ParseLine(line string) (telegraf.Metric, error)
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
And you can view the code
|
|
||||||
[here.](https://github.com/influxdata/telegraf/blob/henrypfhu-master/plugins/parsers/registry.go)
|
|
||||||
|
|
||||||
## Service Input Plugins
|
|
||||||
|
|
||||||
This section is for developers who want to create new "service" collection
|
|
||||||
inputs. A service plugin differs from a regular plugin in that it operates
|
|
||||||
a background service while Telegraf is running. One example would be the `statsd`
|
|
||||||
plugin, which operates a statsd server.
|
|
||||||
|
|
||||||
Service Input Plugins are substantially more complicated than a regular plugin, as they
|
|
||||||
will require threads and locks to verify data integrity. Service Input Plugins should
|
|
||||||
be avoided unless there is no way to create their behavior with a regular plugin.
|
|
||||||
|
|
||||||
Their interface is quite similar to a regular plugin, with the addition of `Start()`
|
|
||||||
and `Stop()` methods.
|
|
||||||
|
|
||||||
### Service Plugin Guidelines
|
|
||||||
|
|
||||||
* Same as the `Plugin` guidelines, except that they must conform to the
|
|
||||||
[`telegraf.ServiceInput`](https://godoc.org/github.com/influxdata/telegraf#ServiceInput) interface.
|
|
||||||
|
|
||||||
## Output Plugins
|
|
||||||
|
|
||||||
This section is for developers who want to create a new output sink. Outputs
|
|
||||||
are created in a similar manner as collection plugins, and their interface has
|
|
||||||
similar constructs.
|
|
||||||
|
|
||||||
### Output Plugin Guidelines
|
|
||||||
|
|
||||||
* An output must conform to the [`telegraf.Output`](https://godoc.org/github.com/influxdata/telegraf#Output) interface.
|
|
||||||
* Outputs should call `outputs.Add` in their `init` function to register themselves.
|
|
||||||
See below for a quick example.
|
|
||||||
* To be available within Telegraf itself, plugins must add themselves to the
|
|
||||||
`github.com/influxdata/telegraf/plugins/outputs/all/all.go` file.
|
|
||||||
* The `SampleConfig` function should return valid toml that describes how the
|
|
||||||
plugin can be configured. This is included in `telegraf config`. Please
|
|
||||||
consult the [SampleConfig](https://github.com/influxdata/telegraf/wiki/SampleConfig)
|
|
||||||
page for the latest style guidelines.
|
|
||||||
* The `Description` function should say in one line what this output does.
|
|
||||||
|
|
||||||
### Output Example
|
|
||||||
|
|
||||||
```go
|
|
||||||
package simpleoutput
|
|
||||||
|
|
||||||
// simpleoutput.go
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/influxdata/telegraf"
|
|
||||||
"github.com/influxdata/telegraf/plugins/outputs"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Simple struct {
|
|
||||||
Ok bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Simple) Description() string {
|
|
||||||
return "a demo output"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Simple) SampleConfig() string {
|
|
||||||
return `
|
|
||||||
ok = true
|
|
||||||
`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Simple) Connect() error {
|
|
||||||
// Make a connection to the URL here
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Simple) Close() error {
|
|
||||||
// Close connection to the URL here
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Simple) Write(metrics []telegraf.Metric) error {
|
|
||||||
for _, metric := range metrics {
|
|
||||||
// write `metric` to the output sink here
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
outputs.Add("simpleoutput", func() telegraf.Output { return &Simple{} })
|
|
||||||
}
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
## Output Plugins Writing Arbitrary Data Formats
|
|
||||||
|
|
||||||
Some output plugins (such as
|
|
||||||
[file](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/file))
|
|
||||||
can write arbitrary output data formats. An overview of these data formats can
|
|
||||||
be found
|
|
||||||
[here](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md).
|
|
||||||
|
|
||||||
In order to enable this, you must specify a
|
|
||||||
`SetSerializer(serializer serializers.Serializer)`
|
|
||||||
function on the plugin object (see the file plugin for an example), as well as
|
|
||||||
defining `serializer` as a field of the object.
|
|
||||||
|
|
||||||
You can then utilize the serializer internally in your plugin, serializing data
|
|
||||||
before it's written. Telegraf's configuration layer will take care of
|
|
||||||
instantiating and creating the `Serializer` object.
|
|
||||||
|
|
||||||
You should also add the following to your SampleConfig() return:
|
|
||||||
|
|
||||||
```toml
|
|
||||||
## Data format to output.
|
|
||||||
## Each data format has its own unique set of configuration options, read
|
|
||||||
## more about them here:
|
|
||||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
|
|
||||||
data_format = "influx"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Service Output Plugins
|
|
||||||
|
|
||||||
This section is for developers who want to create new "service" output. A
|
|
||||||
service output differs from a regular output in that it operates a background service
|
|
||||||
while Telegraf is running. One example would be the `prometheus_client` output,
|
|
||||||
which operates an HTTP server.
|
|
||||||
|
|
||||||
Their interface is quite similar to a regular output, with the addition of `Start()`
|
|
||||||
and `Stop()` methods.
|
|
||||||
|
|
||||||
### Service Output Guidelines
|
|
||||||
|
|
||||||
* Same as the `Output` guidelines, except that they must conform to the
|
|
||||||
`output.ServiceOutput` interface.
|
|
||||||
|
|
||||||
## Processor Plugins
|
|
||||||
|
|
||||||
This section is for developers who want to create a new processor plugin.
|
|
||||||
|
|
||||||
### Processor Plugin Guidelines
|
|
||||||
|
|
||||||
* A processor must conform to the [`telegraf.Processor`](https://godoc.org/github.com/influxdata/telegraf#Processor) interface.
|
|
||||||
* Processors should call `processors.Add` in their `init` function to register themselves.
|
|
||||||
See below for a quick example.
|
|
||||||
* To be available within Telegraf itself, plugins must add themselves to the
|
|
||||||
`github.com/influxdata/telegraf/plugins/processors/all/all.go` file.
|
|
||||||
* The `SampleConfig` function should return valid toml that describes how the
|
|
||||||
processor can be configured. This is include in the output of `telegraf config`.
|
|
||||||
* The `Description` function should say in one line what this processor does.
|
|
||||||
|
|
||||||
### Processor Example
|
|
||||||
|
|
||||||
```go
|
|
||||||
package printer
|
|
||||||
|
|
||||||
// printer.go
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
|
||||||
"github.com/influxdata/telegraf/plugins/processors"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Printer struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
var sampleConfig = `
|
|
||||||
`
|
|
||||||
|
|
||||||
func (p *Printer) SampleConfig() string {
|
|
||||||
return sampleConfig
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Printer) Description() string {
|
|
||||||
return "Print all metrics that pass through this filter."
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Printer) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
|
||||||
for _, metric := range in {
|
|
||||||
fmt.Println(metric.String())
|
|
||||||
}
|
|
||||||
return in
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
processors.Add("printer", func() telegraf.Processor {
|
|
||||||
return &Printer{}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Aggregator Plugins
|
|
||||||
|
|
||||||
This section is for developers who want to create a new aggregator plugin.
|
|
||||||
|
|
||||||
### Aggregator Plugin Guidelines
|
|
||||||
|
|
||||||
* A aggregator must conform to the [`telegraf.Aggregator`](https://godoc.org/github.com/influxdata/telegraf#Aggregator) interface.
|
|
||||||
* Aggregators should call `aggregators.Add` in their `init` function to register themselves.
|
|
||||||
See below for a quick example.
|
|
||||||
* To be available within Telegraf itself, plugins must add themselves to the
|
|
||||||
`github.com/influxdata/telegraf/plugins/aggregators/all/all.go` file.
|
|
||||||
* The `SampleConfig` function should return valid toml that describes how the
|
|
||||||
aggregator can be configured. This is include in `telegraf config`.
|
|
||||||
* The `Description` function should say in one line what this aggregator does.
|
|
||||||
* The Aggregator plugin will need to keep caches of metrics that have passed
|
|
||||||
through it. This should be done using the builtin `HashID()` function of each
|
|
||||||
metric.
|
|
||||||
* When the `Reset()` function is called, all caches should be cleared.
|
|
||||||
|
|
||||||
### Aggregator Example
|
|
||||||
|
|
||||||
```go
|
|
||||||
package min
|
|
||||||
|
|
||||||
// min.go
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/influxdata/telegraf"
|
|
||||||
"github.com/influxdata/telegraf/plugins/aggregators"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Min struct {
|
|
||||||
// caches for metric fields, names, and tags
|
|
||||||
fieldCache map[uint64]map[string]float64
|
|
||||||
nameCache map[uint64]string
|
|
||||||
tagCache map[uint64]map[string]string
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewMin() telegraf.Aggregator {
|
|
||||||
m := &Min{}
|
|
||||||
m.Reset()
|
|
||||||
return m
|
|
||||||
}
|
|
||||||
|
|
||||||
var sampleConfig = `
|
|
||||||
## period is the flush & clear interval of the aggregator.
|
|
||||||
period = "30s"
|
|
||||||
## If true drop_original will drop the original metrics and
|
|
||||||
## only send aggregates.
|
|
||||||
drop_original = false
|
|
||||||
`
|
|
||||||
|
|
||||||
func (m *Min) SampleConfig() string {
|
|
||||||
return sampleConfig
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Min) Description() string {
|
|
||||||
return "Keep the aggregate min of each metric passing through."
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Min) Add(in telegraf.Metric) {
|
|
||||||
id := in.HashID()
|
|
||||||
if _, ok := m.nameCache[id]; !ok {
|
|
||||||
// hit an uncached metric, create caches for first time:
|
|
||||||
m.nameCache[id] = in.Name()
|
|
||||||
m.tagCache[id] = in.Tags()
|
|
||||||
m.fieldCache[id] = make(map[string]float64)
|
|
||||||
for k, v := range in.Fields() {
|
|
||||||
if fv, ok := convert(v); ok {
|
|
||||||
m.fieldCache[id][k] = fv
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for k, v := range in.Fields() {
|
|
||||||
if fv, ok := convert(v); ok {
|
|
||||||
if _, ok := m.fieldCache[id][k]; !ok {
|
|
||||||
// hit an uncached field of a cached metric
|
|
||||||
m.fieldCache[id][k] = fv
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if fv < m.fieldCache[id][k] {
|
|
||||||
// set new minimum
|
|
||||||
m.fieldCache[id][k] = fv
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Min) Push(acc telegraf.Accumulator) {
|
|
||||||
for id, _ := range m.nameCache {
|
|
||||||
fields := map[string]interface{}{}
|
|
||||||
for k, v := range m.fieldCache[id] {
|
|
||||||
fields[k+"_min"] = v
|
|
||||||
}
|
|
||||||
acc.AddFields(m.nameCache[id], fields, m.tagCache[id])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Min) Reset() {
|
|
||||||
m.fieldCache = make(map[uint64]map[string]float64)
|
|
||||||
m.nameCache = make(map[uint64]string)
|
|
||||||
m.tagCache = make(map[uint64]map[string]string)
|
|
||||||
}
|
|
||||||
|
|
||||||
func convert(in interface{}) (float64, bool) {
|
|
||||||
switch v := in.(type) {
|
|
||||||
case float64:
|
|
||||||
return v, true
|
|
||||||
case int64:
|
|
||||||
return float64(v), true
|
|
||||||
default:
|
|
||||||
return 0, false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
aggregators.Add("min", func() telegraf.Aggregator {
|
|
||||||
return NewMin()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Unit Tests
|
|
||||||
|
|
||||||
Before opening a pull request you should run the linter checks and
|
Before opening a pull request you should run the linter checks and
|
||||||
the short tests.
|
the short tests.
|
||||||
|
|
||||||
### Execute linter
|
**Run static analysis:**
|
||||||
|
|
||||||
execute `make check`
|
```
|
||||||
|
make check
|
||||||
|
```
|
||||||
|
|
||||||
### Execute short tests
|
**Run short tests:**
|
||||||
|
|
||||||
execute `make test`
|
```
|
||||||
|
make test
|
||||||
|
```
|
||||||
|
|
||||||
### Execute integration tests
|
**Execute integration tests:**
|
||||||
|
|
||||||
Running the integration tests requires several docker containers to be
|
Running the integration tests requires several docker containers to be
|
||||||
running. You can start the containers with:
|
running. You can start the containers with:
|
||||||
|
@ -497,3 +60,12 @@ make test-all
|
||||||
```
|
```
|
||||||
|
|
||||||
Use `make docker-kill` to stop the containers.
|
Use `make docker-kill` to stop the containers.
|
||||||
|
|
||||||
|
|
||||||
|
[cla]: https://www.influxdata.com/legal/cla/
|
||||||
|
[new issue]: https://github.com/influxdata/telegraf/issues/new/choose
|
||||||
|
[pull request]: https://github.com/influxdata/telegraf/compare
|
||||||
|
[inputs]: /docs/INPUTS.md
|
||||||
|
[processors]: /docs/PROCESSORS.md
|
||||||
|
[aggregators]: /docs/AGGREGATORS.md
|
||||||
|
[outputs]: /docs/OUTPUTS.md
|
||||||
|
|
|
@ -1,16 +1,14 @@
|
||||||
package telegraf
|
package telegraf
|
||||||
|
|
||||||
import "time"
|
import (
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
// Accumulator is an interface for "accumulating" metrics from plugin(s).
|
// Accumulator allows adding metrics to the processing flow.
|
||||||
// The metrics are sent down a channel shared between all plugins.
|
|
||||||
type Accumulator interface {
|
type Accumulator interface {
|
||||||
// AddFields adds a metric to the accumulator with the given measurement
|
// AddFields adds a metric to the accumulator with the given measurement
|
||||||
// name, fields, and tags (and timestamp). If a timestamp is not provided,
|
// name, fields, and tags (and timestamp). If a timestamp is not provided,
|
||||||
// then the accumulator sets it to "now".
|
// then the accumulator sets it to "now".
|
||||||
// Create a point with a value, decorating it with tags
|
|
||||||
// NOTE: tags is expected to be owned by the caller, don't mutate
|
|
||||||
// it after passing to Add.
|
|
||||||
AddFields(measurement string,
|
AddFields(measurement string,
|
||||||
fields map[string]interface{},
|
fields map[string]interface{},
|
||||||
tags map[string]string,
|
tags map[string]string,
|
||||||
|
@ -40,7 +38,49 @@ type Accumulator interface {
|
||||||
tags map[string]string,
|
tags map[string]string,
|
||||||
t ...time.Time)
|
t ...time.Time)
|
||||||
|
|
||||||
|
// AddMetric adds an metric to the accumulator.
|
||||||
|
AddMetric(Metric)
|
||||||
|
|
||||||
|
// SetPrecision takes two time.Duration objects. If the first is non-zero,
|
||||||
|
// it sets that as the precision. Otherwise, it takes the second argument
|
||||||
|
// as the order of time that the metrics should be rounded to, with the
|
||||||
|
// maximum being 1s.
|
||||||
SetPrecision(precision, interval time.Duration)
|
SetPrecision(precision, interval time.Duration)
|
||||||
|
|
||||||
|
// Report an error.
|
||||||
AddError(err error)
|
AddError(err error)
|
||||||
|
|
||||||
|
// Upgrade to a TrackingAccumulator with space for maxTracked
|
||||||
|
// metrics/batches.
|
||||||
|
WithTracking(maxTracked int) TrackingAccumulator
|
||||||
|
}
|
||||||
|
|
||||||
|
// TrackingID uniquely identifies a tracked metric group
|
||||||
|
type TrackingID uint64
|
||||||
|
|
||||||
|
// DeliveryInfo provides the results of a delivered metric group.
|
||||||
|
type DeliveryInfo interface {
|
||||||
|
// ID is the TrackingID
|
||||||
|
ID() TrackingID
|
||||||
|
|
||||||
|
// Delivered returns true if the metric was processed successfully.
|
||||||
|
Delivered() bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// TrackingAccumulator is an Accumulator that provides a signal when the
|
||||||
|
// metric has been fully processed. Sending more metrics than the accumulator
|
||||||
|
// has been allocated for without reading status from the Accepted or Rejected
|
||||||
|
// channels is an error.
|
||||||
|
type TrackingAccumulator interface {
|
||||||
|
Accumulator
|
||||||
|
|
||||||
|
// Add the Metric and arrange for tracking feedback after processing..
|
||||||
|
AddTrackingMetric(m Metric) TrackingID
|
||||||
|
|
||||||
|
// Add a group of Metrics and arrange for a signal when the group has been
|
||||||
|
// processed.
|
||||||
|
AddTrackingMetricGroup(group []Metric) TrackingID
|
||||||
|
|
||||||
|
// Delivered returns a channel that will contain the tracking results.
|
||||||
|
Delivered() <-chan DeliveryInfo
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,13 +20,13 @@ type MetricMaker interface {
|
||||||
|
|
||||||
type accumulator struct {
|
type accumulator struct {
|
||||||
maker MetricMaker
|
maker MetricMaker
|
||||||
metrics chan telegraf.Metric
|
metrics chan<- telegraf.Metric
|
||||||
precision time.Duration
|
precision time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewAccumulator(
|
func NewAccumulator(
|
||||||
maker MetricMaker,
|
maker MetricMaker,
|
||||||
metrics chan telegraf.Metric,
|
metrics chan<- telegraf.Metric,
|
||||||
) telegraf.Accumulator {
|
) telegraf.Accumulator {
|
||||||
acc := accumulator{
|
acc := accumulator{
|
||||||
maker: maker,
|
maker: maker,
|
||||||
|
@ -42,7 +42,7 @@ func (ac *accumulator) AddFields(
|
||||||
tags map[string]string,
|
tags map[string]string,
|
||||||
t ...time.Time,
|
t ...time.Time,
|
||||||
) {
|
) {
|
||||||
ac.addMetric(measurement, tags, fields, telegraf.Untyped, t...)
|
ac.addFields(measurement, tags, fields, telegraf.Untyped, t...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ac *accumulator) AddGauge(
|
func (ac *accumulator) AddGauge(
|
||||||
|
@ -51,7 +51,7 @@ func (ac *accumulator) AddGauge(
|
||||||
tags map[string]string,
|
tags map[string]string,
|
||||||
t ...time.Time,
|
t ...time.Time,
|
||||||
) {
|
) {
|
||||||
ac.addMetric(measurement, tags, fields, telegraf.Gauge, t...)
|
ac.addFields(measurement, tags, fields, telegraf.Gauge, t...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ac *accumulator) AddCounter(
|
func (ac *accumulator) AddCounter(
|
||||||
|
@ -60,7 +60,7 @@ func (ac *accumulator) AddCounter(
|
||||||
tags map[string]string,
|
tags map[string]string,
|
||||||
t ...time.Time,
|
t ...time.Time,
|
||||||
) {
|
) {
|
||||||
ac.addMetric(measurement, tags, fields, telegraf.Counter, t...)
|
ac.addFields(measurement, tags, fields, telegraf.Counter, t...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ac *accumulator) AddSummary(
|
func (ac *accumulator) AddSummary(
|
||||||
|
@ -69,7 +69,7 @@ func (ac *accumulator) AddSummary(
|
||||||
tags map[string]string,
|
tags map[string]string,
|
||||||
t ...time.Time,
|
t ...time.Time,
|
||||||
) {
|
) {
|
||||||
ac.addMetric(measurement, tags, fields, telegraf.Summary, t...)
|
ac.addFields(measurement, tags, fields, telegraf.Summary, t...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ac *accumulator) AddHistogram(
|
func (ac *accumulator) AddHistogram(
|
||||||
|
@ -78,10 +78,16 @@ func (ac *accumulator) AddHistogram(
|
||||||
tags map[string]string,
|
tags map[string]string,
|
||||||
t ...time.Time,
|
t ...time.Time,
|
||||||
) {
|
) {
|
||||||
ac.addMetric(measurement, tags, fields, telegraf.Histogram, t...)
|
ac.addFields(measurement, tags, fields, telegraf.Histogram, t...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ac *accumulator) addMetric(
|
func (ac *accumulator) AddMetric(m telegraf.Metric) {
|
||||||
|
if m := ac.maker.MakeMetric(m); m != nil {
|
||||||
|
ac.metrics <- m
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ac *accumulator) addFields(
|
||||||
measurement string,
|
measurement string,
|
||||||
tags map[string]string,
|
tags map[string]string,
|
||||||
fields map[string]interface{},
|
fields map[string]interface{},
|
||||||
|
@ -104,13 +110,9 @@ func (ac *accumulator) AddError(err error) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
NErrors.Incr(1)
|
NErrors.Incr(1)
|
||||||
log.Printf("E! Error in plugin [%s]: %s", ac.maker.Name(), err)
|
log.Printf("E! [%s]: Error in plugin: %v", ac.maker.Name(), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetPrecision takes two time.Duration objects. If the first is non-zero,
|
|
||||||
// it sets that as the precision. Otherwise, it takes the second argument
|
|
||||||
// as the order of time that the metrics should be rounded to, with the
|
|
||||||
// maximum being 1s.
|
|
||||||
func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
|
func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
|
||||||
if precision > 0 {
|
if precision > 0 {
|
||||||
ac.precision = precision
|
ac.precision = precision
|
||||||
|
@ -128,7 +130,7 @@ func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ac accumulator) getTime(t []time.Time) time.Time {
|
func (ac *accumulator) getTime(t []time.Time) time.Time {
|
||||||
var timestamp time.Time
|
var timestamp time.Time
|
||||||
if len(t) > 0 {
|
if len(t) > 0 {
|
||||||
timestamp = t[0]
|
timestamp = t[0]
|
||||||
|
@ -137,3 +139,43 @@ func (ac accumulator) getTime(t []time.Time) time.Time {
|
||||||
}
|
}
|
||||||
return timestamp.Round(ac.precision)
|
return timestamp.Round(ac.precision)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ac *accumulator) WithTracking(maxTracked int) telegraf.TrackingAccumulator {
|
||||||
|
return &trackingAccumulator{
|
||||||
|
Accumulator: ac,
|
||||||
|
delivered: make(chan telegraf.DeliveryInfo, maxTracked),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type trackingAccumulator struct {
|
||||||
|
telegraf.Accumulator
|
||||||
|
delivered chan telegraf.DeliveryInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *trackingAccumulator) AddTrackingMetric(m telegraf.Metric) telegraf.TrackingID {
|
||||||
|
dm, id := metric.WithTracking(m, a.onDelivery)
|
||||||
|
a.AddMetric(dm)
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *trackingAccumulator) AddTrackingMetricGroup(group []telegraf.Metric) telegraf.TrackingID {
|
||||||
|
db, id := metric.WithGroupTracking(group, a.onDelivery)
|
||||||
|
for _, m := range db {
|
||||||
|
a.AddMetric(m)
|
||||||
|
}
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *trackingAccumulator) Delivered() <-chan telegraf.DeliveryInfo {
|
||||||
|
return a.delivered
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *trackingAccumulator) onDelivery(info telegraf.DeliveryInfo) {
|
||||||
|
select {
|
||||||
|
case a.delivered <- info:
|
||||||
|
default:
|
||||||
|
// This is a programming error in the input. More items were sent for
|
||||||
|
// tracking than space requested.
|
||||||
|
panic("channel is full")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
824
agent/agent.go
824
agent/agent.go
|
@ -1,9 +1,9 @@
|
||||||
package agent
|
package agent
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
|
||||||
"runtime"
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
@ -12,187 +12,157 @@ import (
|
||||||
"github.com/influxdata/telegraf/internal"
|
"github.com/influxdata/telegraf/internal"
|
||||||
"github.com/influxdata/telegraf/internal/config"
|
"github.com/influxdata/telegraf/internal/config"
|
||||||
"github.com/influxdata/telegraf/internal/models"
|
"github.com/influxdata/telegraf/internal/models"
|
||||||
"github.com/influxdata/telegraf/selfstat"
|
"github.com/influxdata/telegraf/plugins/serializers/influx"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Agent runs telegraf and collects data based on the given config
|
// Agent runs a set of plugins.
|
||||||
type Agent struct {
|
type Agent struct {
|
||||||
Config *config.Config
|
Config *config.Config
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAgent returns an Agent struct based off the given Config
|
// NewAgent returns an Agent for the given Config.
|
||||||
func NewAgent(config *config.Config) (*Agent, error) {
|
func NewAgent(config *config.Config) (*Agent, error) {
|
||||||
a := &Agent{
|
a := &Agent{
|
||||||
Config: config,
|
Config: config,
|
||||||
}
|
}
|
||||||
|
|
||||||
if !a.Config.Agent.OmitHostname {
|
|
||||||
if a.Config.Agent.Hostname == "" {
|
|
||||||
hostname, err := os.Hostname()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
a.Config.Agent.Hostname = hostname
|
|
||||||
}
|
|
||||||
|
|
||||||
config.Tags["host"] = a.Config.Agent.Hostname
|
|
||||||
}
|
|
||||||
|
|
||||||
return a, nil
|
return a, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connect connects to all configured outputs
|
// Run starts and runs the Agent until the context is done.
|
||||||
func (a *Agent) Connect() error {
|
func (a *Agent) Run(ctx context.Context) error {
|
||||||
for _, o := range a.Config.Outputs {
|
log.Printf("I! [agent] Config: Interval:%s, Quiet:%#v, Hostname:%#v, "+
|
||||||
switch ot := o.Output.(type) {
|
"Flush Interval:%s",
|
||||||
case telegraf.ServiceOutput:
|
a.Config.Agent.Interval.Duration, a.Config.Agent.Quiet,
|
||||||
if err := ot.Start(); err != nil {
|
a.Config.Agent.Hostname, a.Config.Agent.FlushInterval.Duration)
|
||||||
log.Printf("E! Service for output %s failed to start, exiting\n%s\n",
|
|
||||||
o.Name, err.Error())
|
if ctx.Err() != nil {
|
||||||
return err
|
return ctx.Err()
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("D! Attempting connection to output: %s\n", o.Name)
|
log.Printf("D! [agent] Connecting outputs")
|
||||||
err := o.Output.Connect()
|
err := a.connectOutputs(ctx)
|
||||||
if err != nil {
|
|
||||||
log.Printf("E! Failed to connect to output %s, retrying in 15s, "+
|
|
||||||
"error was '%s' \n", o.Name, err)
|
|
||||||
time.Sleep(15 * time.Second)
|
|
||||||
err = o.Output.Connect()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inputC := make(chan telegraf.Metric, 100)
|
||||||
|
procC := make(chan telegraf.Metric, 100)
|
||||||
|
outputC := make(chan telegraf.Metric, 100)
|
||||||
|
|
||||||
|
startTime := time.Now()
|
||||||
|
|
||||||
|
log.Printf("D! [agent] Starting service inputs")
|
||||||
|
err = a.startServiceInputs(ctx, inputC)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
log.Printf("D! Successfully connected to output: %s\n", o.Name)
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
src := inputC
|
||||||
|
dst := inputC
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func(dst chan telegraf.Metric) {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
err := a.runInputs(ctx, startTime, dst)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("E! [agent] Error running inputs: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Printf("D! [agent] Stopping service inputs")
|
||||||
|
a.stopServiceInputs()
|
||||||
|
|
||||||
|
close(dst)
|
||||||
|
log.Printf("D! [agent] Input channel closed")
|
||||||
|
}(dst)
|
||||||
|
|
||||||
|
src = dst
|
||||||
|
|
||||||
|
if len(a.Config.Processors) > 0 {
|
||||||
|
dst = procC
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func(src, dst chan telegraf.Metric) {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
err := a.runProcessors(src, dst)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("E! [agent] Error running processors: %v", err)
|
||||||
|
}
|
||||||
|
close(dst)
|
||||||
|
log.Printf("D! [agent] Processor channel closed")
|
||||||
|
}(src, dst)
|
||||||
|
|
||||||
|
src = dst
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(a.Config.Aggregators) > 0 {
|
||||||
|
dst = outputC
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func(src, dst chan telegraf.Metric) {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
err := a.runAggregators(startTime, src, dst)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("E! [agent] Error running aggregators: %v", err)
|
||||||
|
}
|
||||||
|
close(dst)
|
||||||
|
log.Printf("D! [agent] Output channel closed")
|
||||||
|
}(src, dst)
|
||||||
|
|
||||||
|
src = dst
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func(src chan telegraf.Metric) {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
err := a.runOutputs(startTime, src)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("E! [agent] Error running outputs: %v", err)
|
||||||
|
}
|
||||||
|
}(src)
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
log.Printf("D! [agent] Closing outputs")
|
||||||
|
err = a.closeOutputs()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close closes the connection to all configured outputs
|
// Test runs the inputs once and prints the output to stdout in line protocol.
|
||||||
func (a *Agent) Close() error {
|
func (a *Agent) Test() error {
|
||||||
var err error
|
var wg sync.WaitGroup
|
||||||
for _, o := range a.Config.Outputs {
|
metricC := make(chan telegraf.Metric)
|
||||||
err = o.Output.Close()
|
defer func() {
|
||||||
switch ot := o.Output.(type) {
|
close(metricC)
|
||||||
case telegraf.ServiceOutput:
|
wg.Wait()
|
||||||
ot.Stop()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func panicRecover(input *models.RunningInput) {
|
|
||||||
if err := recover(); err != nil {
|
|
||||||
trace := make([]byte, 2048)
|
|
||||||
runtime.Stack(trace, true)
|
|
||||||
log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
|
|
||||||
input.Name(), err, trace)
|
|
||||||
log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
|
|
||||||
"stack trace, configuration, and OS information: " +
|
|
||||||
"https://github.com/influxdata/telegraf/issues/new")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// gatherer runs the inputs that have been configured with their own
|
|
||||||
// reporting interval.
|
|
||||||
func (a *Agent) gatherer(
|
|
||||||
shutdown chan struct{},
|
|
||||||
input *models.RunningInput,
|
|
||||||
interval time.Duration,
|
|
||||||
metricC chan telegraf.Metric,
|
|
||||||
) {
|
|
||||||
defer panicRecover(input)
|
|
||||||
|
|
||||||
GatherTime := selfstat.RegisterTiming("gather",
|
|
||||||
"gather_time_ns",
|
|
||||||
map[string]string{"input": input.Config.Name},
|
|
||||||
)
|
|
||||||
|
|
||||||
acc := NewAccumulator(input, metricC)
|
|
||||||
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
|
||||||
a.Config.Agent.Interval.Duration)
|
|
||||||
|
|
||||||
ticker := time.NewTicker(interval)
|
|
||||||
defer ticker.Stop()
|
|
||||||
|
|
||||||
for {
|
|
||||||
internal.RandomSleep(a.Config.Agent.CollectionJitter.Duration, shutdown)
|
|
||||||
|
|
||||||
start := time.Now()
|
|
||||||
gatherWithTimeout(shutdown, input, acc, interval)
|
|
||||||
elapsed := time.Since(start)
|
|
||||||
|
|
||||||
GatherTime.Incr(elapsed.Nanoseconds())
|
|
||||||
|
|
||||||
select {
|
|
||||||
case <-shutdown:
|
|
||||||
return
|
|
||||||
case <-ticker.C:
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// gatherWithTimeout gathers from the given input, with the given timeout.
|
|
||||||
// when the given timeout is reached, gatherWithTimeout logs an error message
|
|
||||||
// but continues waiting for it to return. This is to avoid leaving behind
|
|
||||||
// hung processes, and to prevent re-calling the same hung process over and
|
|
||||||
// over.
|
|
||||||
func gatherWithTimeout(
|
|
||||||
shutdown chan struct{},
|
|
||||||
input *models.RunningInput,
|
|
||||||
acc telegraf.Accumulator,
|
|
||||||
timeout time.Duration,
|
|
||||||
) {
|
|
||||||
ticker := time.NewTicker(timeout)
|
|
||||||
defer ticker.Stop()
|
|
||||||
done := make(chan error)
|
|
||||||
go func() {
|
|
||||||
done <- input.Input.Gather(acc)
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
for {
|
wg.Add(1)
|
||||||
select {
|
|
||||||
case err := <-done:
|
|
||||||
if err != nil {
|
|
||||||
acc.AddError(err)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
case <-ticker.C:
|
|
||||||
err := fmt.Errorf("took longer to collect than collection interval (%s)",
|
|
||||||
timeout)
|
|
||||||
acc.AddError(err)
|
|
||||||
continue
|
|
||||||
case <-shutdown:
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test verifies that we can 'Gather' from all inputs with their configured
|
|
||||||
// Config struct
|
|
||||||
func (a *Agent) Test() error {
|
|
||||||
shutdown := make(chan struct{})
|
|
||||||
defer close(shutdown)
|
|
||||||
metricC := make(chan telegraf.Metric)
|
|
||||||
|
|
||||||
// dummy receiver for the point channel
|
|
||||||
go func() {
|
go func() {
|
||||||
for {
|
defer wg.Done()
|
||||||
select {
|
|
||||||
case <-metricC:
|
s := influx.NewSerializer()
|
||||||
// do nothing
|
s.SetFieldSortOrder(influx.SortFields)
|
||||||
case <-shutdown:
|
for metric := range metricC {
|
||||||
return
|
octets, err := s.Serialize(metric)
|
||||||
|
if err == nil {
|
||||||
|
fmt.Print("> ", string(octets))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
for _, input := range a.Config.Inputs {
|
for _, input := range a.Config.Inputs {
|
||||||
if _, ok := input.Input.(telegraf.ServiceInput); ok {
|
if _, ok := input.Input.(telegraf.ServiceInput); ok {
|
||||||
fmt.Printf("\nWARNING: skipping plugin [[%s]]: service inputs not supported in --test mode\n",
|
log.Printf("W!: [agent] skipping plugin [[%s]]: service inputs not supported in --test mode",
|
||||||
input.Name())
|
input.Name())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -200,7 +170,6 @@ func (a *Agent) Test() error {
|
||||||
acc := NewAccumulator(input, metricC)
|
acc := NewAccumulator(input, metricC)
|
||||||
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
||||||
a.Config.Agent.Interval.Duration)
|
a.Config.Agent.Interval.Duration)
|
||||||
input.SetTrace(true)
|
|
||||||
input.SetDefaultTags(a.Config.Tags)
|
input.SetDefaultTags(a.Config.Tags)
|
||||||
|
|
||||||
if err := input.Input.Gather(acc); err != nil {
|
if err := input.Input.Gather(acc); err != nil {
|
||||||
|
@ -218,216 +187,445 @@ func (a *Agent) Test() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// flush writes a list of metrics to all configured outputs
|
// runInputs starts and triggers the periodic gather for Inputs.
|
||||||
func (a *Agent) flush() {
|
//
|
||||||
var wg sync.WaitGroup
|
// When the context is done the timers are stopped and this function returns
|
||||||
|
// after all ongoing Gather calls complete.
|
||||||
wg.Add(len(a.Config.Outputs))
|
func (a *Agent) runInputs(
|
||||||
for _, o := range a.Config.Outputs {
|
ctx context.Context,
|
||||||
go func(output *models.RunningOutput) {
|
startTime time.Time,
|
||||||
defer wg.Done()
|
dst chan<- telegraf.Metric,
|
||||||
err := output.Write()
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("E! Error writing to output [%s]: %s\n",
|
|
||||||
output.Name, err.Error())
|
|
||||||
}
|
|
||||||
}(o)
|
|
||||||
}
|
|
||||||
|
|
||||||
wg.Wait()
|
|
||||||
}
|
|
||||||
|
|
||||||
// flusher monitors the metrics input channel and flushes on the minimum interval
|
|
||||||
func (a *Agent) flusher(
|
|
||||||
shutdown chan struct{},
|
|
||||||
metricC chan telegraf.Metric,
|
|
||||||
aggMetricC chan telegraf.Metric,
|
|
||||||
outMetricC chan telegraf.Metric,
|
|
||||||
) error {
|
) error {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(1)
|
for _, input := range a.Config.Inputs {
|
||||||
go func() {
|
interval := a.Config.Agent.Interval.Duration
|
||||||
defer wg.Done()
|
precision := a.Config.Agent.Precision.Duration
|
||||||
for {
|
jitter := a.Config.Agent.CollectionJitter.Duration
|
||||||
select {
|
|
||||||
case <-shutdown:
|
// Overwrite agent interval if this plugin has its own.
|
||||||
if len(outMetricC) > 0 {
|
if input.Config.Interval != 0 {
|
||||||
// keep going until channel is empty
|
interval = input.Config.Interval
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
return
|
|
||||||
case metric := <-outMetricC:
|
acc := NewAccumulator(input, dst)
|
||||||
for i, o := range a.Config.Outputs {
|
acc.SetPrecision(precision, interval)
|
||||||
if i == len(a.Config.Outputs)-1 {
|
|
||||||
o.AddMetric(metric)
|
|
||||||
} else {
|
|
||||||
o.AddMetric(metric.Copy())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func() {
|
go func(input *models.RunningInput) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
for metric := range aggMetricC {
|
|
||||||
// Apply Processors
|
if a.Config.Agent.RoundInterval {
|
||||||
metrics := []telegraf.Metric{metric}
|
err := internal.SleepContext(
|
||||||
for _, processor := range a.Config.Processors {
|
ctx, internal.AlignDuration(startTime, interval))
|
||||||
metrics = processor.Apply(metrics...)
|
if err != nil {
|
||||||
|
return
|
||||||
}
|
}
|
||||||
outMetricC <- metric
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
a.gatherOnInterval(ctx, acc, input, interval, jitter)
|
||||||
|
}(input)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// gather runs an input's gather function periodically until the context is
|
||||||
|
// done.
|
||||||
|
func (a *Agent) gatherOnInterval(
|
||||||
|
ctx context.Context,
|
||||||
|
acc telegraf.Accumulator,
|
||||||
|
input *models.RunningInput,
|
||||||
|
interval time.Duration,
|
||||||
|
jitter time.Duration,
|
||||||
|
) {
|
||||||
|
defer panicRecover(input)
|
||||||
|
|
||||||
|
ticker := time.NewTicker(interval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
err := internal.SleepContext(ctx, internal.RandomDuration(jitter))
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
err = a.gatherOnce(acc, input, interval)
|
||||||
|
if err != nil {
|
||||||
|
acc.AddError(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
continue
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// gatherOnce runs the input's Gather function once, logging a warning each
|
||||||
|
// interval it fails to complete before.
|
||||||
|
func (a *Agent) gatherOnce(
|
||||||
|
acc telegraf.Accumulator,
|
||||||
|
input *models.RunningInput,
|
||||||
|
timeout time.Duration,
|
||||||
|
) error {
|
||||||
|
ticker := time.NewTicker(timeout)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
done := make(chan error)
|
||||||
|
go func() {
|
||||||
|
done <- input.Gather(acc)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-shutdown:
|
case err := <-done:
|
||||||
if len(metricC) > 0 {
|
return err
|
||||||
// keep going until channel is empty
|
case <-ticker.C:
|
||||||
continue
|
log.Printf("W! [agent] input %q did not complete within its interval",
|
||||||
|
input.Name())
|
||||||
}
|
}
|
||||||
close(aggMetricC)
|
|
||||||
return
|
|
||||||
case metric := <-metricC:
|
|
||||||
// Apply Processors
|
|
||||||
metrics := []telegraf.Metric{metric}
|
|
||||||
for _, processor := range a.Config.Processors {
|
|
||||||
metrics = processor.Apply(metrics...)
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// runProcessors applies processors to metrics.
|
||||||
|
func (a *Agent) runProcessors(
|
||||||
|
src <-chan telegraf.Metric,
|
||||||
|
agg chan<- telegraf.Metric,
|
||||||
|
) error {
|
||||||
|
for metric := range src {
|
||||||
|
metrics := a.applyProcessors(metric)
|
||||||
|
|
||||||
for _, metric := range metrics {
|
for _, metric := range metrics {
|
||||||
// Apply Aggregators
|
agg <- metric
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// applyProcessors applies all processors to a metric.
|
||||||
|
func (a *Agent) applyProcessors(m telegraf.Metric) []telegraf.Metric {
|
||||||
|
metrics := []telegraf.Metric{m}
|
||||||
|
for _, processor := range a.Config.Processors {
|
||||||
|
metrics = processor.Apply(metrics...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return metrics
|
||||||
|
}
|
||||||
|
|
||||||
|
// runAggregators triggers the periodic push for Aggregators.
|
||||||
|
//
|
||||||
|
// When the context is done a final push will occur and then this function
|
||||||
|
// will return.
|
||||||
|
func (a *Agent) runAggregators(
|
||||||
|
startTime time.Time,
|
||||||
|
src <-chan telegraf.Metric,
|
||||||
|
dst chan<- telegraf.Metric,
|
||||||
|
) error {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for metric := range src {
|
||||||
var dropOriginal bool
|
var dropOriginal bool
|
||||||
for _, agg := range a.Config.Aggregators {
|
for _, agg := range a.Config.Aggregators {
|
||||||
if ok := agg.Add(metric.Copy()); ok {
|
if ok := agg.Add(metric); ok {
|
||||||
dropOriginal = true
|
dropOriginal = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Forward metric to Outputs
|
|
||||||
if !dropOriginal {
|
if !dropOriginal {
|
||||||
outMetricC <- metric
|
dst <- metric
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
cancel()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ticker := time.NewTicker(a.Config.Agent.FlushInterval.Duration)
|
precision := a.Config.Agent.Precision.Duration
|
||||||
semaphore := make(chan struct{}, 1)
|
interval := a.Config.Agent.Interval.Duration
|
||||||
|
aggregations := make(chan telegraf.Metric, 100)
|
||||||
|
for _, agg := range a.Config.Aggregators {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(agg *models.RunningAggregator) {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
if a.Config.Agent.RoundInterval {
|
||||||
|
// Aggregators are aligned to the agent interval regardless of
|
||||||
|
// their period.
|
||||||
|
err := internal.SleepContext(ctx, internal.AlignDuration(startTime, interval))
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
agg.SetPeriodStart(startTime)
|
||||||
|
|
||||||
|
acc := NewAccumulator(agg, aggregations)
|
||||||
|
acc.SetPrecision(precision, interval)
|
||||||
|
a.push(ctx, agg, acc)
|
||||||
|
close(aggregations)
|
||||||
|
}(agg)
|
||||||
|
}
|
||||||
|
|
||||||
|
for metric := range aggregations {
|
||||||
|
metrics := a.applyProcessors(metric)
|
||||||
|
for _, metric := range metrics {
|
||||||
|
dst <- metric
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// push runs the push for a single aggregator every period. More simple than
|
||||||
|
// the output/input version as timeout should be less likely.... not really
|
||||||
|
// because the output channel can block for now.
|
||||||
|
func (a *Agent) push(
|
||||||
|
ctx context.Context,
|
||||||
|
aggregator *models.RunningAggregator,
|
||||||
|
acc telegraf.Accumulator,
|
||||||
|
) {
|
||||||
|
ticker := time.NewTicker(aggregator.Period())
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-shutdown:
|
|
||||||
log.Println("I! Hang on, flushing any cached metrics before shutdown")
|
|
||||||
// wait for outMetricC to get flushed before flushing outputs
|
|
||||||
wg.Wait()
|
|
||||||
a.flush()
|
|
||||||
return nil
|
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
go func() {
|
break
|
||||||
select {
|
case <-ctx.Done():
|
||||||
case semaphore <- struct{}{}:
|
aggregator.Push(acc)
|
||||||
internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown)
|
return
|
||||||
a.flush()
|
|
||||||
<-semaphore
|
|
||||||
default:
|
|
||||||
// skipping this flush because one is already happening
|
|
||||||
log.Println("W! Skipping a scheduled flush because there is" +
|
|
||||||
" already a flush ongoing.")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
aggregator.Push(acc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run runs the agent daemon, gathering every Interval
|
// runOutputs triggers the periodic write for Outputs.
|
||||||
func (a *Agent) Run(shutdown chan struct{}) error {
|
//
|
||||||
|
// When the context is done, outputs continue to run until their buffer is
|
||||||
|
// closed, afterwich they run flush once more.
|
||||||
|
func (a *Agent) runOutputs(
|
||||||
|
startTime time.Time,
|
||||||
|
src <-chan telegraf.Metric,
|
||||||
|
) error {
|
||||||
|
interval := a.Config.Agent.FlushInterval.Duration
|
||||||
|
jitter := a.Config.Agent.FlushJitter.Duration
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
for _, output := range a.Config.Outputs {
|
||||||
log.Printf("I! Agent Config: Interval:%s, Quiet:%#v, Hostname:%#v, "+
|
interval := interval
|
||||||
"Flush Interval:%s \n",
|
// Overwrite agent flush_interval if this plugin has its own.
|
||||||
a.Config.Agent.Interval.Duration, a.Config.Agent.Quiet,
|
if output.Config.FlushInterval != 0 {
|
||||||
a.Config.Agent.Hostname, a.Config.Agent.FlushInterval.Duration)
|
interval = output.Config.FlushInterval
|
||||||
|
|
||||||
// Channel shared between all input threads for accumulating metrics
|
|
||||||
metricC := make(chan telegraf.Metric, 100)
|
|
||||||
|
|
||||||
// Channel for metrics ready to be output
|
|
||||||
outMetricC := make(chan telegraf.Metric, 100)
|
|
||||||
|
|
||||||
// Channel for aggregated metrics
|
|
||||||
aggMetricC := make(chan telegraf.Metric, 100)
|
|
||||||
|
|
||||||
// Round collection to nearest interval by sleeping
|
|
||||||
if a.Config.Agent.RoundInterval {
|
|
||||||
i := int64(a.Config.Agent.Interval.Duration)
|
|
||||||
time.Sleep(time.Duration(i - (time.Now().UnixNano() % i)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func() {
|
go func(output *models.RunningOutput) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
if err := a.flusher(shutdown, metricC, aggMetricC, outMetricC); err != nil {
|
|
||||||
log.Printf("E! Flusher routine failed, exiting: %s\n", err.Error())
|
|
||||||
close(shutdown)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
wg.Add(len(a.Config.Aggregators))
|
if a.Config.Agent.RoundInterval {
|
||||||
for _, aggregator := range a.Config.Aggregators {
|
err := internal.SleepContext(
|
||||||
go func(agg *models.RunningAggregator) {
|
ctx, internal.AlignDuration(startTime, interval))
|
||||||
defer wg.Done()
|
if err != nil {
|
||||||
acc := NewAccumulator(agg, aggMetricC)
|
return
|
||||||
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
|
||||||
a.Config.Agent.Interval.Duration)
|
|
||||||
agg.Run(acc, shutdown)
|
|
||||||
}(aggregator)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Service inputs may immediately add metrics, if metrics are added before
|
|
||||||
// the aggregator starts they will be dropped. Generally this occurs
|
|
||||||
// only during testing but it is an outstanding issue.
|
|
||||||
//
|
|
||||||
// https://github.com/influxdata/telegraf/issues/4394
|
|
||||||
for _, input := range a.Config.Inputs {
|
|
||||||
input.SetDefaultTags(a.Config.Tags)
|
|
||||||
switch p := input.Input.(type) {
|
|
||||||
case telegraf.ServiceInput:
|
|
||||||
acc := NewAccumulator(input, metricC)
|
|
||||||
// Service input plugins should set their own precision of their
|
|
||||||
// metrics.
|
|
||||||
acc.SetPrecision(time.Nanosecond, 0)
|
|
||||||
if err := p.Start(acc); err != nil {
|
|
||||||
log.Printf("E! Service for input %s failed to start, exiting\n%s\n",
|
|
||||||
input.Name(), err.Error())
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer p.Stop()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Add(len(a.Config.Inputs))
|
a.flush(ctx, output, interval, jitter)
|
||||||
for _, input := range a.Config.Inputs {
|
}(output)
|
||||||
interval := a.Config.Agent.Interval.Duration
|
|
||||||
// overwrite global interval if this plugin has it's own.
|
|
||||||
if input.Config.Interval != 0 {
|
|
||||||
interval = input.Config.Interval
|
|
||||||
}
|
|
||||||
go func(in *models.RunningInput, interv time.Duration) {
|
|
||||||
defer wg.Done()
|
|
||||||
a.gatherer(shutdown, in, interv, metricC)
|
|
||||||
}(input, interval)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for metric := range src {
|
||||||
|
for i, output := range a.Config.Outputs {
|
||||||
|
if i == len(a.Config.Outputs)-1 {
|
||||||
|
output.AddMetric(metric)
|
||||||
|
} else {
|
||||||
|
output.AddMetric(metric.Copy())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("I! [agent] Hang on, flushing any cached metrics before shutdown")
|
||||||
|
cancel()
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
a.Close()
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// flush runs an output's flush function periodically until the context is
|
||||||
|
// done.
|
||||||
|
func (a *Agent) flush(
|
||||||
|
ctx context.Context,
|
||||||
|
output *models.RunningOutput,
|
||||||
|
interval time.Duration,
|
||||||
|
jitter time.Duration,
|
||||||
|
) {
|
||||||
|
// since we are watching two channels we need a ticker with the jitter
|
||||||
|
// integrated.
|
||||||
|
ticker := NewTicker(interval, jitter)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
logError := func(err error) {
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("E! [agent] Error writing to output [%s]: %v", output.Name, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
// Favor shutdown over other methods.
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
logError(a.flushOnce(output, interval, output.Write))
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
logError(a.flushOnce(output, interval, output.Write))
|
||||||
|
case <-output.BatchReady:
|
||||||
|
// Favor the ticker over batch ready
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
logError(a.flushOnce(output, interval, output.Write))
|
||||||
|
default:
|
||||||
|
logError(a.flushOnce(output, interval, output.WriteBatch))
|
||||||
|
}
|
||||||
|
case <-ctx.Done():
|
||||||
|
logError(a.flushOnce(output, interval, output.Write))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// flushOnce runs the output's Write function once, logging a warning each
|
||||||
|
// interval it fails to complete before.
|
||||||
|
func (a *Agent) flushOnce(
|
||||||
|
output *models.RunningOutput,
|
||||||
|
timeout time.Duration,
|
||||||
|
writeFunc func() error,
|
||||||
|
) error {
|
||||||
|
ticker := time.NewTicker(timeout)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
done := make(chan error)
|
||||||
|
go func() {
|
||||||
|
done <- writeFunc()
|
||||||
|
}()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case err := <-done:
|
||||||
|
output.LogBufferStatus()
|
||||||
|
return err
|
||||||
|
case <-ticker.C:
|
||||||
|
log.Printf("W! [agent] output %q did not complete within its flush interval",
|
||||||
|
output.Name)
|
||||||
|
output.LogBufferStatus()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// connectOutputs connects to all outputs.
|
||||||
|
func (a *Agent) connectOutputs(ctx context.Context) error {
|
||||||
|
for _, output := range a.Config.Outputs {
|
||||||
|
log.Printf("D! [agent] Attempting connection to output: %s\n", output.Name)
|
||||||
|
err := output.Output.Connect()
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("E! [agent] Failed to connect to output %s, retrying in 15s, "+
|
||||||
|
"error was '%s' \n", output.Name, err)
|
||||||
|
|
||||||
|
err := internal.SleepContext(ctx, 15*time.Second)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = output.Output.Connect()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Printf("D! [agent] Successfully connected to output: %s\n", output.Name)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// closeOutputs closes all outputs.
|
||||||
|
func (a *Agent) closeOutputs() error {
|
||||||
|
var err error
|
||||||
|
for _, output := range a.Config.Outputs {
|
||||||
|
err = output.Output.Close()
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// startServiceInputs starts all service inputs.
|
||||||
|
func (a *Agent) startServiceInputs(
|
||||||
|
ctx context.Context,
|
||||||
|
dst chan<- telegraf.Metric,
|
||||||
|
) error {
|
||||||
|
started := []telegraf.ServiceInput{}
|
||||||
|
|
||||||
|
for _, input := range a.Config.Inputs {
|
||||||
|
if si, ok := input.Input.(telegraf.ServiceInput); ok {
|
||||||
|
// Service input plugins are not subject to timestamp rounding.
|
||||||
|
// This only applies to the accumulator passed to Start(), the
|
||||||
|
// Gather() accumulator does apply rounding according to the
|
||||||
|
// precision agent setting.
|
||||||
|
acc := NewAccumulator(input, dst)
|
||||||
|
acc.SetPrecision(time.Nanosecond, 0)
|
||||||
|
|
||||||
|
err := si.Start(acc)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("E! [agent] Service for input %s failed to start: %v",
|
||||||
|
input.Name(), err)
|
||||||
|
|
||||||
|
for _, si := range started {
|
||||||
|
si.Stop()
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
started = append(started, si)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// stopServiceInputs stops all service inputs.
|
||||||
|
func (a *Agent) stopServiceInputs() {
|
||||||
|
for _, input := range a.Config.Inputs {
|
||||||
|
if si, ok := input.Input.(telegraf.ServiceInput); ok {
|
||||||
|
si.Stop()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// panicRecover displays an error if an input panics.
|
||||||
|
func panicRecover(input *models.RunningInput) {
|
||||||
|
if err := recover(); err != nil {
|
||||||
|
trace := make([]byte, 2048)
|
||||||
|
runtime.Stack(trace, true)
|
||||||
|
log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
|
||||||
|
input.Name(), err, trace)
|
||||||
|
log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
|
||||||
|
"stack trace, configuration, and OS information: " +
|
||||||
|
"https://github.com/influxdata/telegraf/issues/new/choose")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf/internal"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Ticker struct {
|
||||||
|
C chan time.Time
|
||||||
|
ticker *time.Ticker
|
||||||
|
jitter time.Duration
|
||||||
|
wg sync.WaitGroup
|
||||||
|
cancelFunc context.CancelFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewTicker(
|
||||||
|
interval time.Duration,
|
||||||
|
jitter time.Duration,
|
||||||
|
) *Ticker {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
t := &Ticker{
|
||||||
|
C: make(chan time.Time, 1),
|
||||||
|
ticker: time.NewTicker(interval),
|
||||||
|
jitter: jitter,
|
||||||
|
cancelFunc: cancel,
|
||||||
|
}
|
||||||
|
|
||||||
|
t.wg.Add(1)
|
||||||
|
go t.relayTime(ctx)
|
||||||
|
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Ticker) Stop() {
|
||||||
|
t.cancelFunc()
|
||||||
|
t.wg.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Ticker) relayTime(ctx context.Context) {
|
||||||
|
defer t.wg.Done()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case tm := <-t.ticker.C:
|
||||||
|
internal.SleepContext(ctx, internal.RandomDuration(t.jitter))
|
||||||
|
select {
|
||||||
|
case t.C <- tm:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,8 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
@ -78,41 +80,70 @@ func reloadLoop(
|
||||||
for <-reload {
|
for <-reload {
|
||||||
reload <- false
|
reload <- false
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
signals := make(chan os.Signal)
|
||||||
|
signal.Notify(signals, os.Interrupt, syscall.SIGHUP, syscall.SIGTERM)
|
||||||
|
go func() {
|
||||||
|
select {
|
||||||
|
case sig := <-signals:
|
||||||
|
if sig == syscall.SIGHUP {
|
||||||
|
log.Printf("I! Reloading Telegraf config")
|
||||||
|
<-reload
|
||||||
|
reload <- true
|
||||||
|
}
|
||||||
|
cancel()
|
||||||
|
case <-stop:
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
err := runAgent(ctx, inputFilters, outputFilters)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("E! [telegraf] Error running agent: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runAgent(ctx context.Context,
|
||||||
|
inputFilters []string,
|
||||||
|
outputFilters []string,
|
||||||
|
) error {
|
||||||
// If no other options are specified, load the config file and run.
|
// If no other options are specified, load the config file and run.
|
||||||
c := config.NewConfig()
|
c := config.NewConfig()
|
||||||
c.OutputFilters = outputFilters
|
c.OutputFilters = outputFilters
|
||||||
c.InputFilters = inputFilters
|
c.InputFilters = inputFilters
|
||||||
err := c.LoadConfig(*fConfig)
|
err := c.LoadConfig(*fConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal("E! " + err.Error())
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if *fConfigDirectory != "" {
|
if *fConfigDirectory != "" {
|
||||||
err = c.LoadDirectory(*fConfigDirectory)
|
err = c.LoadDirectory(*fConfigDirectory)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal("E! " + err.Error())
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !*fTest && len(c.Outputs) == 0 {
|
if !*fTest && len(c.Outputs) == 0 {
|
||||||
log.Fatalf("E! Error: no outputs found, did you provide a valid config file?")
|
return errors.New("Error: no outputs found, did you provide a valid config file?")
|
||||||
}
|
}
|
||||||
if len(c.Inputs) == 0 {
|
if len(c.Inputs) == 0 {
|
||||||
log.Fatalf("E! Error: no inputs found, did you provide a valid config file?")
|
return errors.New("Error: no inputs found, did you provide a valid config file?")
|
||||||
}
|
}
|
||||||
|
|
||||||
if int64(c.Agent.Interval.Duration) <= 0 {
|
if int64(c.Agent.Interval.Duration) <= 0 {
|
||||||
log.Fatalf("E! Agent interval must be positive, found %s",
|
return fmt.Errorf("Agent interval must be positive, found %s",
|
||||||
c.Agent.Interval.Duration)
|
c.Agent.Interval.Duration)
|
||||||
}
|
}
|
||||||
|
|
||||||
if int64(c.Agent.FlushInterval.Duration) <= 0 {
|
if int64(c.Agent.FlushInterval.Duration) <= 0 {
|
||||||
log.Fatalf("E! Agent flush_interval must be positive; found %s",
|
return fmt.Errorf("Agent flush_interval must be positive; found %s",
|
||||||
c.Agent.Interval.Duration)
|
c.Agent.Interval.Duration)
|
||||||
}
|
}
|
||||||
|
|
||||||
ag, err := agent.NewAgent(c)
|
ag, err := agent.NewAgent(c)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal("E! " + err.Error())
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup logging
|
// Setup logging
|
||||||
|
@ -123,37 +154,8 @@ func reloadLoop(
|
||||||
)
|
)
|
||||||
|
|
||||||
if *fTest {
|
if *fTest {
|
||||||
err = ag.Test()
|
return ag.Test()
|
||||||
if err != nil {
|
|
||||||
log.Fatal("E! " + err.Error())
|
|
||||||
}
|
}
|
||||||
os.Exit(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = ag.Connect()
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal("E! " + err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
shutdown := make(chan struct{})
|
|
||||||
signals := make(chan os.Signal)
|
|
||||||
signal.Notify(signals, os.Interrupt, syscall.SIGHUP, syscall.SIGTERM)
|
|
||||||
go func() {
|
|
||||||
select {
|
|
||||||
case sig := <-signals:
|
|
||||||
if sig == os.Interrupt || sig == syscall.SIGTERM {
|
|
||||||
close(shutdown)
|
|
||||||
}
|
|
||||||
if sig == syscall.SIGHUP {
|
|
||||||
log.Printf("I! Reloading Telegraf config\n")
|
|
||||||
<-reload
|
|
||||||
reload <- true
|
|
||||||
close(shutdown)
|
|
||||||
}
|
|
||||||
case <-stop:
|
|
||||||
close(shutdown)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
log.Printf("I! Starting Telegraf %s\n", version)
|
log.Printf("I! Starting Telegraf %s\n", version)
|
||||||
log.Printf("I! Loaded inputs: %s", strings.Join(c.InputNames(), " "))
|
log.Printf("I! Loaded inputs: %s", strings.Join(c.InputNames(), " "))
|
||||||
|
@ -180,8 +182,7 @@ func reloadLoop(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ag.Run(shutdown)
|
return ag.Run(ctx)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func usageExit(rc int) {
|
func usageExit(rc int) {
|
||||||
|
|
|
@ -0,0 +1,126 @@
|
||||||
|
### Aggregator Plugins
|
||||||
|
|
||||||
|
This section is for developers who want to create a new aggregator plugin.
|
||||||
|
|
||||||
|
### Aggregator Plugin Guidelines
|
||||||
|
|
||||||
|
* A aggregator must conform to the [telegraf.Aggregator][] interface.
|
||||||
|
* Aggregators should call `aggregators.Add` in their `init` function to
|
||||||
|
register themselves. See below for a quick example.
|
||||||
|
* To be available within Telegraf itself, plugins must add themselves to the
|
||||||
|
`github.com/influxdata/telegraf/plugins/aggregators/all/all.go` file.
|
||||||
|
- The `SampleConfig` function should return valid toml that describes how the
|
||||||
|
plugin can be configured. This is included in `telegraf config`. Please
|
||||||
|
consult the [SampleConfig][] page for the latest style guidelines.
|
||||||
|
* The `Description` function should say in one line what this aggregator does.
|
||||||
|
* The Aggregator plugin will need to keep caches of metrics that have passed
|
||||||
|
through it. This should be done using the builtin `HashID()` function of
|
||||||
|
each metric.
|
||||||
|
* When the `Reset()` function is called, all caches should be cleared.
|
||||||
|
|
||||||
|
### Aggregator Plugin Example
|
||||||
|
|
||||||
|
```go
|
||||||
|
package min
|
||||||
|
|
||||||
|
// min.go
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/plugins/aggregators"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Min struct {
|
||||||
|
// caches for metric fields, names, and tags
|
||||||
|
fieldCache map[uint64]map[string]float64
|
||||||
|
nameCache map[uint64]string
|
||||||
|
tagCache map[uint64]map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMin() telegraf.Aggregator {
|
||||||
|
m := &Min{}
|
||||||
|
m.Reset()
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
var sampleConfig = `
|
||||||
|
## period is the flush & clear interval of the aggregator.
|
||||||
|
period = "30s"
|
||||||
|
## If true drop_original will drop the original metrics and
|
||||||
|
## only send aggregates.
|
||||||
|
drop_original = false
|
||||||
|
`
|
||||||
|
|
||||||
|
func (m *Min) SampleConfig() string {
|
||||||
|
return sampleConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Min) Description() string {
|
||||||
|
return "Keep the aggregate min of each metric passing through."
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Min) Add(in telegraf.Metric) {
|
||||||
|
id := in.HashID()
|
||||||
|
if _, ok := m.nameCache[id]; !ok {
|
||||||
|
// hit an uncached metric, create caches for first time:
|
||||||
|
m.nameCache[id] = in.Name()
|
||||||
|
m.tagCache[id] = in.Tags()
|
||||||
|
m.fieldCache[id] = make(map[string]float64)
|
||||||
|
for k, v := range in.Fields() {
|
||||||
|
if fv, ok := convert(v); ok {
|
||||||
|
m.fieldCache[id][k] = fv
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for k, v := range in.Fields() {
|
||||||
|
if fv, ok := convert(v); ok {
|
||||||
|
if _, ok := m.fieldCache[id][k]; !ok {
|
||||||
|
// hit an uncached field of a cached metric
|
||||||
|
m.fieldCache[id][k] = fv
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if fv < m.fieldCache[id][k] {
|
||||||
|
// set new minimum
|
||||||
|
m.fieldCache[id][k] = fv
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Min) Push(acc telegraf.Accumulator) {
|
||||||
|
for id, _ := range m.nameCache {
|
||||||
|
fields := map[string]interface{}{}
|
||||||
|
for k, v := range m.fieldCache[id] {
|
||||||
|
fields[k+"_min"] = v
|
||||||
|
}
|
||||||
|
acc.AddFields(m.nameCache[id], fields, m.tagCache[id])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Min) Reset() {
|
||||||
|
m.fieldCache = make(map[uint64]map[string]float64)
|
||||||
|
m.nameCache = make(map[uint64]string)
|
||||||
|
m.tagCache = make(map[uint64]map[string]string)
|
||||||
|
}
|
||||||
|
|
||||||
|
func convert(in interface{}) (float64, bool) {
|
||||||
|
switch v := in.(type) {
|
||||||
|
case float64:
|
||||||
|
return v, true
|
||||||
|
case int64:
|
||||||
|
return float64(v), true
|
||||||
|
default:
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
aggregators.Add("min", func() telegraf.Aggregator {
|
||||||
|
return NewMin()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
[telegraf.Aggregator]: https://godoc.org/github.com/influxdata/telegraf#Aggregator
|
||||||
|
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
|
|
@ -106,6 +106,14 @@ emitted from the input plugin.
|
||||||
|
|
||||||
### Output Configuration
|
### Output Configuration
|
||||||
|
|
||||||
|
- **flush_interval**: The maximum time between flushes. Use this setting to
|
||||||
|
override the agent `flush_interval` on a per plugin basis.
|
||||||
|
- **metric_batch_size**: The maximum number of metrics to send at once. Use
|
||||||
|
this setting to override the agent `metric_batch_size` on a per plugin basis.
|
||||||
|
- **metric_buffer_limit**: The maximum number of unsent metrics to buffer.
|
||||||
|
Use this setting to override the agent `metric_buffer_limit` on a per plugin
|
||||||
|
basis.
|
||||||
|
|
||||||
The [metric filtering](#metric-filtering) parameters can be used to limit what metrics are
|
The [metric filtering](#metric-filtering) parameters can be used to limit what metrics are
|
||||||
emitted from the output plugin.
|
emitted from the output plugin.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,143 @@
|
||||||
|
### Input Plugins
|
||||||
|
|
||||||
|
This section is for developers who want to create new collection inputs.
|
||||||
|
Telegraf is entirely plugin driven. This interface allows for operators to
|
||||||
|
pick and chose what is gathered and makes it easy for developers
|
||||||
|
to create new ways of generating metrics.
|
||||||
|
|
||||||
|
Plugin authorship is kept as simple as possible to promote people to develop
|
||||||
|
and submit new inputs.
|
||||||
|
|
||||||
|
### Input Plugin Guidelines
|
||||||
|
|
||||||
|
- A plugin must conform to the [telegraf.Input][] interface.
|
||||||
|
- Input Plugins should call `inputs.Add` in their `init` function to register
|
||||||
|
themselves. See below for a quick example.
|
||||||
|
- Input Plugins must be added to the
|
||||||
|
`github.com/influxdata/telegraf/plugins/inputs/all/all.go` file.
|
||||||
|
- The `SampleConfig` function should return valid toml that describes how the
|
||||||
|
plugin can be configured. This is included in `telegraf config`. Please
|
||||||
|
consult the [SampleConfig][] page for the latest style
|
||||||
|
guidelines.
|
||||||
|
- The `Description` function should say in one line what this plugin does.
|
||||||
|
|
||||||
|
Let's say you've written a plugin that emits metrics about processes on the
|
||||||
|
current host.
|
||||||
|
|
||||||
|
### Input Plugin Example
|
||||||
|
|
||||||
|
```go
|
||||||
|
package simple
|
||||||
|
|
||||||
|
// simple.go
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/plugins/inputs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Simple struct {
|
||||||
|
Ok bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Simple) Description() string {
|
||||||
|
return "a demo plugin"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Simple) SampleConfig() string {
|
||||||
|
return `
|
||||||
|
## Indicate if everything is fine
|
||||||
|
ok = true
|
||||||
|
`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Simple) Gather(acc telegraf.Accumulator) error {
|
||||||
|
if s.Ok {
|
||||||
|
acc.AddFields("state", map[string]interface{}{"value": "pretty good"}, nil)
|
||||||
|
} else {
|
||||||
|
acc.AddFields("state", map[string]interface{}{"value": "not great"}, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
inputs.Add("simple", func() telegraf.Input { return &Simple{} })
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Development
|
||||||
|
|
||||||
|
* Run `make static` followed by `make plugin-[pluginName]` to spin up a docker
|
||||||
|
dev environment using docker-compose.
|
||||||
|
* ***[Optional]*** When developing a plugin, add a `dev` directory with a
|
||||||
|
`docker-compose.yml` and `telegraf.conf` as well as any other supporting
|
||||||
|
files, where sensible.
|
||||||
|
|
||||||
|
### Typed Metrics
|
||||||
|
|
||||||
|
In addition the the `AddFields` function, the accumulator also supports
|
||||||
|
functions to add typed metrics: `AddGauge`, `AddCounter`, etc. Metric types
|
||||||
|
are ignored by the InfluxDB output, but can be used for other outputs, such as
|
||||||
|
[prometheus][prom metric types].
|
||||||
|
|
||||||
|
### Data Formats
|
||||||
|
|
||||||
|
Some input plugins, such as the [exec][] plugin, can accept any supported
|
||||||
|
[input data formats][].
|
||||||
|
|
||||||
|
In order to enable this, you must specify a `SetParser(parser parsers.Parser)`
|
||||||
|
function on the plugin object (see the exec plugin for an example), as well as
|
||||||
|
defining `parser` as a field of the object.
|
||||||
|
|
||||||
|
You can then utilize the parser internally in your plugin, parsing data as you
|
||||||
|
see fit. Telegraf's configuration layer will take care of instantiating and
|
||||||
|
creating the `Parser` object.
|
||||||
|
|
||||||
|
Add the following to the `SampleConfig()`:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
## Data format to consume.
|
||||||
|
## Each data format has its own unique set of configuration options, read
|
||||||
|
## more about them here:
|
||||||
|
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||||
|
data_format = "influx"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Input Plugins
|
||||||
|
|
||||||
|
This section is for developers who want to create new "service" collection
|
||||||
|
inputs. A service plugin differs from a regular plugin in that it operates a
|
||||||
|
background service while Telegraf is running. One example would be the
|
||||||
|
`statsd` plugin, which operates a statsd server.
|
||||||
|
|
||||||
|
Service Input Plugins are substantially more complicated than a regular
|
||||||
|
plugin, as they will require threads and locks to verify data integrity.
|
||||||
|
Service Input Plugins should be avoided unless there is no way to create their
|
||||||
|
behavior with a regular plugin.
|
||||||
|
|
||||||
|
To create a Service Input implement the [telegraf.ServiceInput][] interface.
|
||||||
|
|
||||||
|
### Metric Tracking
|
||||||
|
|
||||||
|
Metric Tracking provides a system to be notified when metrics have been
|
||||||
|
successfully written to their outputs or otherwise discarded. This allows
|
||||||
|
inputs to be created that function as reliable queue consumers.
|
||||||
|
|
||||||
|
To get started with metric tracking begin by calling `WithTracking` on the
|
||||||
|
[telegraf.Accumulator][]. Add metrics using the `AddTrackingMetricGroup`
|
||||||
|
function on the returned [telegraf.TrackingAccumulator][] and store the
|
||||||
|
`TrackingID`. The `Delivered()` channel will return a type with information
|
||||||
|
about the final delivery status of the metric group.
|
||||||
|
|
||||||
|
Check the [amqp_consumer][] for an example implementation.
|
||||||
|
|
||||||
|
[exec]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec
|
||||||
|
[amqp_consumer]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/amqp_consumer
|
||||||
|
[prom metric types]: https://prometheus.io/docs/concepts/metric_types/
|
||||||
|
[input data formats]: https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||||
|
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
|
||||||
|
[telegraf.Input]: https://godoc.org/github.com/influxdata/telegraf#Input
|
||||||
|
[telegraf.ServiceInput]: https://godoc.org/github.com/influxdata/telegraf#ServiceInput
|
||||||
|
[telegraf.Accumulator]: https://godoc.org/github.com/influxdata/telegraf#Accumulator
|
||||||
|
[telegraf.TrackingAccumulator]: https://godoc.org/github.com/influxdata/telegraf#Accumulator
|
|
@ -0,0 +1,95 @@
|
||||||
|
### Output Plugins
|
||||||
|
|
||||||
|
This section is for developers who want to create a new output sink. Outputs
|
||||||
|
are created in a similar manner as collection plugins, and their interface has
|
||||||
|
similar constructs.
|
||||||
|
|
||||||
|
### Output Plugin Guidelines
|
||||||
|
|
||||||
|
- An output must conform to the [telegraf.Output][] interface.
|
||||||
|
- Outputs should call `outputs.Add` in their `init` function to register
|
||||||
|
themselves. See below for a quick example.
|
||||||
|
- To be available within Telegraf itself, plugins must add themselves to the
|
||||||
|
`github.com/influxdata/telegraf/plugins/outputs/all/all.go` file.
|
||||||
|
- The `SampleConfig` function should return valid toml that describes how the
|
||||||
|
plugin can be configured. This is included in `telegraf config`. Please
|
||||||
|
consult the [SampleConfig][] page for the latest style guidelines.
|
||||||
|
- The `Description` function should say in one line what this output does.
|
||||||
|
|
||||||
|
### Output Plugin Example
|
||||||
|
|
||||||
|
```go
|
||||||
|
package simpleoutput
|
||||||
|
|
||||||
|
// simpleoutput.go
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/plugins/outputs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Simple struct {
|
||||||
|
Ok bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Simple) Description() string {
|
||||||
|
return "a demo output"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Simple) SampleConfig() string {
|
||||||
|
return `
|
||||||
|
ok = true
|
||||||
|
`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Simple) Connect() error {
|
||||||
|
// Make a connection to the URL here
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Simple) Close() error {
|
||||||
|
// Close connection to the URL here
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Simple) Write(metrics []telegraf.Metric) error {
|
||||||
|
for _, metric := range metrics {
|
||||||
|
// write `metric` to the output sink here
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
outputs.Add("simpleoutput", func() telegraf.Output { return &Simple{} })
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Formats
|
||||||
|
|
||||||
|
Some output plugins, such as the [file][] plugin, can write in any supported
|
||||||
|
[output data formats][].
|
||||||
|
|
||||||
|
In order to enable this, you must specify a
|
||||||
|
`SetSerializer(serializer serializers.Serializer)`
|
||||||
|
function on the plugin object (see the file plugin for an example), as well as
|
||||||
|
defining `serializer` as a field of the object.
|
||||||
|
|
||||||
|
You can then utilize the serializer internally in your plugin, serializing data
|
||||||
|
before it's written. Telegraf's configuration layer will take care of
|
||||||
|
instantiating and creating the `Serializer` object.
|
||||||
|
|
||||||
|
You should also add the following to your `SampleConfig()`:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
## Data format to output.
|
||||||
|
## Each data format has its own unique set of configuration options, read
|
||||||
|
## more about them here:
|
||||||
|
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
|
||||||
|
data_format = "influx"
|
||||||
|
```
|
||||||
|
|
||||||
|
[file]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/file
|
||||||
|
[output data formats]: https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
|
||||||
|
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
|
||||||
|
[telegraf.Output]: https://godoc.org/github.com/influxdata/telegraf#Output
|
|
@ -0,0 +1,63 @@
|
||||||
|
### Processor Plugins
|
||||||
|
|
||||||
|
This section is for developers who want to create a new processor plugin.
|
||||||
|
|
||||||
|
### Processor Plugin Guidelines
|
||||||
|
|
||||||
|
* A processor must conform to the [telegraf.Processor][] interface.
|
||||||
|
* Processors should call `processors.Add` in their `init` function to register
|
||||||
|
themselves. See below for a quick example.
|
||||||
|
* To be available within Telegraf itself, plugins must add themselves to the
|
||||||
|
`github.com/influxdata/telegraf/plugins/processors/all/all.go` file.
|
||||||
|
* The `SampleConfig` function should return valid toml that describes how the
|
||||||
|
processor can be configured. This is include in the output of `telegraf
|
||||||
|
config`.
|
||||||
|
- The `SampleConfig` function should return valid toml that describes how the
|
||||||
|
plugin can be configured. This is included in `telegraf config`. Please
|
||||||
|
consult the [SampleConfig][] page for the latest style guidelines.
|
||||||
|
* The `Description` function should say in one line what this processor does.
|
||||||
|
|
||||||
|
### Processor Plugin Example
|
||||||
|
|
||||||
|
```go
|
||||||
|
package printer
|
||||||
|
|
||||||
|
// printer.go
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/plugins/processors"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Printer struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
var sampleConfig = `
|
||||||
|
`
|
||||||
|
|
||||||
|
func (p *Printer) SampleConfig() string {
|
||||||
|
return sampleConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Printer) Description() string {
|
||||||
|
return "Print all metrics that pass through this filter."
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Printer) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
||||||
|
for _, metric := range in {
|
||||||
|
fmt.Println(metric.String())
|
||||||
|
}
|
||||||
|
return in
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
processors.Add("printer", func() telegraf.Processor {
|
||||||
|
return &Printer{}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
|
||||||
|
[telegraf.Processor]: https://godoc.org/github.com/influxdata/telegraf#Processor
|
13
input.go
13
input.go
|
@ -13,17 +13,10 @@ type Input interface {
|
||||||
}
|
}
|
||||||
|
|
||||||
type ServiceInput interface {
|
type ServiceInput interface {
|
||||||
// SampleConfig returns the default configuration of the Input
|
Input
|
||||||
SampleConfig() string
|
|
||||||
|
|
||||||
// Description returns a one-sentence description on the Input
|
// Start the ServiceInput. The Accumulator may be retained and used until
|
||||||
Description() string
|
// Stop returns.
|
||||||
|
|
||||||
// Gather takes in an accumulator and adds the metrics that the Input
|
|
||||||
// gathers. This is called every "interval"
|
|
||||||
Gather(Accumulator) error
|
|
||||||
|
|
||||||
// Start starts the ServiceInput's service, whatever that may be
|
|
||||||
Start(Accumulator) error
|
Start(Accumulator) error
|
||||||
|
|
||||||
// Stop stops the services and closes any necessary channels and connections
|
// Stop stops the services and closes any necessary channels and connections
|
||||||
|
|
|
@ -1,130 +0,0 @@
|
||||||
package buffer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"sync"
|
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
|
||||||
"github.com/influxdata/telegraf/selfstat"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
MetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
|
|
||||||
MetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
|
|
||||||
)
|
|
||||||
|
|
||||||
// Buffer is an object for storing metrics in a circular buffer.
|
|
||||||
type Buffer struct {
|
|
||||||
sync.Mutex
|
|
||||||
buf []telegraf.Metric
|
|
||||||
first int
|
|
||||||
last int
|
|
||||||
size int
|
|
||||||
empty bool
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewBuffer returns a Buffer
|
|
||||||
// size is the maximum number of metrics that Buffer will cache. If Add is
|
|
||||||
// called when the buffer is full, then the oldest metric(s) will be dropped.
|
|
||||||
func NewBuffer(size int) *Buffer {
|
|
||||||
return &Buffer{
|
|
||||||
buf: make([]telegraf.Metric, size),
|
|
||||||
first: 0,
|
|
||||||
last: 0,
|
|
||||||
size: size,
|
|
||||||
empty: true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsEmpty returns true if Buffer is empty.
|
|
||||||
func (b *Buffer) IsEmpty() bool {
|
|
||||||
return b.empty
|
|
||||||
}
|
|
||||||
|
|
||||||
// Len returns the current length of the buffer.
|
|
||||||
func (b *Buffer) Len() int {
|
|
||||||
if b.empty {
|
|
||||||
return 0
|
|
||||||
} else if b.first <= b.last {
|
|
||||||
return b.last - b.first + 1
|
|
||||||
}
|
|
||||||
// Spans the end of array.
|
|
||||||
// size - gap in the middle
|
|
||||||
return b.size - (b.first - b.last - 1) // size - gap
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *Buffer) push(m telegraf.Metric) {
|
|
||||||
// Empty
|
|
||||||
if b.empty {
|
|
||||||
b.last = b.first // Reset
|
|
||||||
b.buf[b.last] = m
|
|
||||||
b.empty = false
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
b.last++
|
|
||||||
b.last %= b.size
|
|
||||||
|
|
||||||
// Full
|
|
||||||
if b.first == b.last {
|
|
||||||
MetricsDropped.Incr(1)
|
|
||||||
b.first = (b.first + 1) % b.size
|
|
||||||
}
|
|
||||||
b.buf[b.last] = m
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add adds metrics to the buffer.
|
|
||||||
func (b *Buffer) Add(metrics ...telegraf.Metric) {
|
|
||||||
b.Lock()
|
|
||||||
defer b.Unlock()
|
|
||||||
for i := range metrics {
|
|
||||||
MetricsWritten.Incr(1)
|
|
||||||
b.push(metrics[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Batch returns a batch of metrics of size batchSize.
|
|
||||||
// the batch will be of maximum length batchSize. It can be less than batchSize,
|
|
||||||
// if the length of Buffer is less than batchSize.
|
|
||||||
func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
|
|
||||||
b.Lock()
|
|
||||||
defer b.Unlock()
|
|
||||||
outLen := min(b.Len(), batchSize)
|
|
||||||
out := make([]telegraf.Metric, outLen)
|
|
||||||
if outLen == 0 {
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
// We copy everything right of first up to last, count or end
|
|
||||||
// b.last >= rightInd || b.last < b.first
|
|
||||||
// therefore wont copy past b.last
|
|
||||||
rightInd := min(b.size, b.first+outLen) - 1
|
|
||||||
|
|
||||||
copyCount := copy(out, b.buf[b.first:rightInd+1])
|
|
||||||
|
|
||||||
// We've emptied the ring
|
|
||||||
if rightInd == b.last {
|
|
||||||
b.empty = true
|
|
||||||
}
|
|
||||||
b.first = rightInd + 1
|
|
||||||
b.first %= b.size
|
|
||||||
|
|
||||||
// We circle back for the rest
|
|
||||||
if copyCount < outLen {
|
|
||||||
right := min(b.last, outLen-copyCount)
|
|
||||||
copy(out[copyCount:], b.buf[b.first:right+1])
|
|
||||||
// We've emptied the ring
|
|
||||||
if right == b.last {
|
|
||||||
b.empty = true
|
|
||||||
}
|
|
||||||
b.first = right + 1
|
|
||||||
b.first %= b.size
|
|
||||||
}
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func min(a, b int) int {
|
|
||||||
if b < a {
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
return a
|
|
||||||
}
|
|
|
@ -1,203 +0,0 @@
|
||||||
package buffer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"sync"
|
|
||||||
"sync/atomic"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
|
||||||
"github.com/influxdata/telegraf/testutil"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
var metricList = []telegraf.Metric{
|
|
||||||
testutil.TestMetric(2, "mymetric1"),
|
|
||||||
testutil.TestMetric(1, "mymetric2"),
|
|
||||||
testutil.TestMetric(11, "mymetric3"),
|
|
||||||
testutil.TestMetric(15, "mymetric4"),
|
|
||||||
testutil.TestMetric(8, "mymetric5"),
|
|
||||||
}
|
|
||||||
|
|
||||||
func makeBench5(b *testing.B, freq, batchSize int) {
|
|
||||||
const k = 1000
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
buf := NewBuffer(10000)
|
|
||||||
m := testutil.TestMetric(1, "mymetric")
|
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
buf.Add(m, m, m, m, m)
|
|
||||||
if i%(freq*k) == 0 {
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
buf.Batch(batchSize * k)
|
|
||||||
wg.Done()
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Flush
|
|
||||||
buf.Batch(b.N)
|
|
||||||
wg.Wait()
|
|
||||||
|
|
||||||
}
|
|
||||||
func makeBenchStrict(b *testing.B, freq, batchSize int) {
|
|
||||||
const k = 1000
|
|
||||||
var count uint64
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
buf := NewBuffer(10000)
|
|
||||||
m := testutil.TestMetric(1, "mymetric")
|
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
buf.Add(m)
|
|
||||||
if i%(freq*k) == 0 {
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
l := len(buf.Batch(batchSize * k))
|
|
||||||
atomic.AddUint64(&count, uint64(l))
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Flush
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
l := len(buf.Batch(b.N))
|
|
||||||
atomic.AddUint64(&count, uint64(l))
|
|
||||||
wg.Done()
|
|
||||||
}()
|
|
||||||
|
|
||||||
wg.Wait()
|
|
||||||
if count != uint64(b.N) {
|
|
||||||
b.Errorf("not all metrics came out. %d of %d", count, b.N)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func makeBench(b *testing.B, freq, batchSize int) {
|
|
||||||
const k = 1000
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
buf := NewBuffer(10000)
|
|
||||||
m := testutil.TestMetric(1, "mymetric")
|
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
buf.Add(m)
|
|
||||||
if i%(freq*k) == 0 {
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
buf.Batch(batchSize * k)
|
|
||||||
wg.Done()
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
wg.Wait()
|
|
||||||
// Flush
|
|
||||||
buf.Batch(b.N)
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkBufferBatch5Add(b *testing.B) {
|
|
||||||
makeBench5(b, 100, 101)
|
|
||||||
}
|
|
||||||
func BenchmarkBufferBigInfrequentBatchCatchup(b *testing.B) {
|
|
||||||
makeBench(b, 100, 101)
|
|
||||||
}
|
|
||||||
func BenchmarkBufferOftenBatch(b *testing.B) {
|
|
||||||
makeBench(b, 1, 1)
|
|
||||||
}
|
|
||||||
func BenchmarkBufferAlmostBatch(b *testing.B) {
|
|
||||||
makeBench(b, 10, 9)
|
|
||||||
}
|
|
||||||
func BenchmarkBufferSlowBatch(b *testing.B) {
|
|
||||||
makeBench(b, 10, 1)
|
|
||||||
}
|
|
||||||
func BenchmarkBufferBatchNoDrop(b *testing.B) {
|
|
||||||
makeBenchStrict(b, 1, 4)
|
|
||||||
}
|
|
||||||
func BenchmarkBufferCatchup(b *testing.B) {
|
|
||||||
buf := NewBuffer(10000)
|
|
||||||
m := testutil.TestMetric(1, "mymetric")
|
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
buf.Add(m)
|
|
||||||
}
|
|
||||||
buf.Batch(b.N)
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkAddMetrics(b *testing.B) {
|
|
||||||
buf := NewBuffer(10000)
|
|
||||||
m := testutil.TestMetric(1, "mymetric")
|
|
||||||
for n := 0; n < b.N; n++ {
|
|
||||||
buf.Add(m)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNewBufferBasicFuncs(t *testing.T) {
|
|
||||||
b := NewBuffer(10)
|
|
||||||
MetricsDropped.Set(0)
|
|
||||||
MetricsWritten.Set(0)
|
|
||||||
|
|
||||||
assert.True(t, b.IsEmpty())
|
|
||||||
assert.Zero(t, b.Len())
|
|
||||||
assert.Zero(t, MetricsDropped.Get())
|
|
||||||
assert.Zero(t, MetricsWritten.Get())
|
|
||||||
|
|
||||||
m := testutil.TestMetric(1, "mymetric")
|
|
||||||
b.Add(m)
|
|
||||||
assert.False(t, b.IsEmpty())
|
|
||||||
assert.Equal(t, b.Len(), 1)
|
|
||||||
assert.Equal(t, int64(0), MetricsDropped.Get())
|
|
||||||
assert.Equal(t, int64(1), MetricsWritten.Get())
|
|
||||||
|
|
||||||
b.Add(metricList...)
|
|
||||||
assert.False(t, b.IsEmpty())
|
|
||||||
assert.Equal(t, b.Len(), 6)
|
|
||||||
assert.Equal(t, int64(0), MetricsDropped.Get())
|
|
||||||
assert.Equal(t, int64(6), MetricsWritten.Get())
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDroppingMetrics(t *testing.T) {
|
|
||||||
b := NewBuffer(10)
|
|
||||||
MetricsDropped.Set(0)
|
|
||||||
MetricsWritten.Set(0)
|
|
||||||
|
|
||||||
// Add up to the size of the buffer
|
|
||||||
b.Add(metricList...)
|
|
||||||
b.Add(metricList...)
|
|
||||||
assert.False(t, b.IsEmpty())
|
|
||||||
assert.Equal(t, b.Len(), 10)
|
|
||||||
assert.Equal(t, int64(0), MetricsDropped.Get())
|
|
||||||
assert.Equal(t, int64(10), MetricsWritten.Get())
|
|
||||||
|
|
||||||
// Add 5 more and verify they were dropped
|
|
||||||
b.Add(metricList...)
|
|
||||||
assert.False(t, b.IsEmpty())
|
|
||||||
assert.Equal(t, b.Len(), 10)
|
|
||||||
assert.Equal(t, int64(5), MetricsDropped.Get())
|
|
||||||
assert.Equal(t, int64(15), MetricsWritten.Get())
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGettingBatches(t *testing.T) {
|
|
||||||
b := NewBuffer(20)
|
|
||||||
MetricsDropped.Set(0)
|
|
||||||
MetricsWritten.Set(0)
|
|
||||||
|
|
||||||
// Verify that the buffer returned is smaller than requested when there are
|
|
||||||
// not as many items as requested.
|
|
||||||
b.Add(metricList...)
|
|
||||||
batch := b.Batch(10)
|
|
||||||
assert.Len(t, batch, 5)
|
|
||||||
|
|
||||||
// Verify that the buffer is now empty
|
|
||||||
assert.True(t, b.IsEmpty())
|
|
||||||
assert.Zero(t, b.Len())
|
|
||||||
assert.Zero(t, MetricsDropped.Get())
|
|
||||||
assert.Equal(t, int64(5), MetricsWritten.Get())
|
|
||||||
|
|
||||||
// Verify that the buffer returned is not more than the size requested
|
|
||||||
b.Add(metricList...)
|
|
||||||
batch = b.Batch(3)
|
|
||||||
assert.Len(t, batch, 3)
|
|
||||||
|
|
||||||
// Verify that buffer is not empty
|
|
||||||
assert.False(t, b.IsEmpty())
|
|
||||||
assert.Equal(t, b.Len(), 2)
|
|
||||||
assert.Equal(t, int64(0), MetricsDropped.Get())
|
|
||||||
assert.Equal(t, int64(10), MetricsWritten.Get())
|
|
||||||
}
|
|
|
@ -9,7 +9,6 @@ import (
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"regexp"
|
"regexp"
|
||||||
"runtime"
|
"runtime"
|
||||||
"sort"
|
"sort"
|
||||||
|
@ -26,7 +25,6 @@ import (
|
||||||
"github.com/influxdata/telegraf/plugins/parsers"
|
"github.com/influxdata/telegraf/plugins/parsers"
|
||||||
"github.com/influxdata/telegraf/plugins/processors"
|
"github.com/influxdata/telegraf/plugins/processors"
|
||||||
"github.com/influxdata/telegraf/plugins/serializers"
|
"github.com/influxdata/telegraf/plugins/serializers"
|
||||||
|
|
||||||
"github.com/influxdata/toml"
|
"github.com/influxdata/toml"
|
||||||
"github.com/influxdata/toml/ast"
|
"github.com/influxdata/toml/ast"
|
||||||
)
|
)
|
||||||
|
@ -622,6 +620,19 @@ func (c *Config) LoadConfig(path string) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !c.Agent.OmitHostname {
|
||||||
|
if c.Agent.Hostname == "" {
|
||||||
|
hostname, err := os.Hostname()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Agent.Hostname = hostname
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Tags["host"] = c.Agent.Hostname
|
||||||
|
}
|
||||||
|
|
||||||
// Parse all the rest of the plugins:
|
// Parse all the rest of the plugins:
|
||||||
for name, val := range tbl.Fields {
|
for name, val := range tbl.Fields {
|
||||||
subTable, ok := val.(*ast.Table)
|
subTable, ok := val.(*ast.Table)
|
||||||
|
@ -709,6 +720,7 @@ func (c *Config) LoadConfig(path string) error {
|
||||||
if len(c.Processors) > 1 {
|
if len(c.Processors) > 1 {
|
||||||
sort.Sort(c.Processors)
|
sort.Sort(c.Processors)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -876,6 +888,7 @@ func (c *Config) addInput(name string, table *ast.Table) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
rp := models.NewRunningInput(input, pluginConfig)
|
rp := models.NewRunningInput(input, pluginConfig)
|
||||||
|
rp.SetDefaultTags(c.Tags)
|
||||||
c.Inputs = append(c.Inputs, rp)
|
c.Inputs = append(c.Inputs, rp)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -1751,6 +1764,8 @@ func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
|
||||||
Name: name,
|
Name: name,
|
||||||
Filter: filter,
|
Filter: filter,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO
|
||||||
// Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass
|
// Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass
|
||||||
if len(oc.Filter.FieldDrop) > 0 {
|
if len(oc.Filter.FieldDrop) > 0 {
|
||||||
oc.Filter.NameDrop = oc.Filter.FieldDrop
|
oc.Filter.NameDrop = oc.Filter.FieldDrop
|
||||||
|
@ -1758,5 +1773,47 @@ func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
|
||||||
if len(oc.Filter.FieldPass) > 0 {
|
if len(oc.Filter.FieldPass) > 0 {
|
||||||
oc.Filter.NamePass = oc.Filter.FieldPass
|
oc.Filter.NamePass = oc.Filter.FieldPass
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["flush_interval"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.String); ok {
|
||||||
|
dur, err := time.ParseDuration(str.Value)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
oc.FlushInterval = dur
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["metric_buffer_limit"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if integer, ok := kv.Value.(*ast.Integer); ok {
|
||||||
|
v, err := integer.Int()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
oc.MetricBufferLimit = int(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["metric_batch_size"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if integer, ok := kv.Value.(*ast.Integer); ok {
|
||||||
|
v, err := integer.Int()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
oc.MetricBatchSize = int(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete(tbl.Fields, "flush_interval")
|
||||||
|
delete(tbl.Fields, "metric_buffer_limit")
|
||||||
|
delete(tbl.Fields, "metric_batch_size")
|
||||||
|
|
||||||
return oc, nil
|
return oc, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"compress/gzip"
|
"compress/gzip"
|
||||||
|
"context"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
|
@ -246,6 +247,51 @@ func RandomSleep(max time.Duration, shutdown chan struct{}) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RandomDuration returns a random duration between 0 and max.
|
||||||
|
func RandomDuration(max time.Duration) time.Duration {
|
||||||
|
if max == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
var sleepns int64
|
||||||
|
maxSleep := big.NewInt(max.Nanoseconds())
|
||||||
|
if j, err := rand.Int(rand.Reader, maxSleep); err == nil {
|
||||||
|
sleepns = j.Int64()
|
||||||
|
}
|
||||||
|
|
||||||
|
return time.Duration(sleepns)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SleepContext sleeps until the context is closed or the duration is reached.
|
||||||
|
func SleepContext(ctx context.Context, duration time.Duration) error {
|
||||||
|
if duration == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
t := time.NewTimer(duration)
|
||||||
|
select {
|
||||||
|
case <-t.C:
|
||||||
|
return nil
|
||||||
|
case <-ctx.Done():
|
||||||
|
t.Stop()
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AlignDuration returns the duration until next aligned interval.
|
||||||
|
func AlignDuration(tm time.Time, interval time.Duration) time.Duration {
|
||||||
|
return AlignTime(tm, interval).Sub(tm)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AlignTime returns the time of the next aligned interval.
|
||||||
|
func AlignTime(tm time.Time, interval time.Duration) time.Time {
|
||||||
|
truncated := tm.Truncate(interval)
|
||||||
|
if truncated == tm {
|
||||||
|
return tm
|
||||||
|
}
|
||||||
|
return truncated.Add(interval)
|
||||||
|
}
|
||||||
|
|
||||||
// Exit status takes the error from exec.Command
|
// Exit status takes the error from exec.Command
|
||||||
// and returns the exit status and true
|
// and returns the exit status and true
|
||||||
// if error is not exit status, will return 0 and false
|
// if error is not exit status, will return 0 and false
|
||||||
|
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
type SnakeTest struct {
|
type SnakeTest struct {
|
||||||
|
@ -217,3 +218,55 @@ func TestVersionAlreadySet(t *testing.T) {
|
||||||
|
|
||||||
assert.Equal(t, "foo", Version())
|
assert.Equal(t, "foo", Version())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAlignDuration(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
now time.Time
|
||||||
|
interval time.Duration
|
||||||
|
expected time.Duration
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "aligned",
|
||||||
|
now: time.Date(2018, 1, 1, 1, 1, 0, 0, time.UTC),
|
||||||
|
interval: 10 * time.Second,
|
||||||
|
expected: 0 * time.Second,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "standard interval",
|
||||||
|
now: time.Date(2018, 1, 1, 1, 1, 1, 0, time.UTC),
|
||||||
|
interval: 10 * time.Second,
|
||||||
|
expected: 9 * time.Second,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "odd interval",
|
||||||
|
now: time.Date(2018, 1, 1, 1, 1, 1, 0, time.UTC),
|
||||||
|
interval: 3 * time.Second,
|
||||||
|
expected: 2 * time.Second,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "sub second interval",
|
||||||
|
now: time.Date(2018, 1, 1, 1, 1, 0, 5e8, time.UTC),
|
||||||
|
interval: 1 * time.Second,
|
||||||
|
expected: 500 * time.Millisecond,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non divisible not aligned on minutes",
|
||||||
|
now: time.Date(2018, 1, 1, 1, 0, 0, 0, time.UTC),
|
||||||
|
interval: 1*time.Second + 100*time.Millisecond,
|
||||||
|
expected: 400 * time.Millisecond,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "long interval",
|
||||||
|
now: time.Date(2018, 1, 1, 1, 1, 0, 0, time.UTC),
|
||||||
|
interval: 1 * time.Hour,
|
||||||
|
expected: 59 * time.Minute,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
actual := AlignDuration(tt.now, tt.interval)
|
||||||
|
require.Equal(t, tt.expected, actual)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,214 @@
|
||||||
|
package models
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/selfstat"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
AgentMetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
|
||||||
|
AgentMetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
|
||||||
|
)
|
||||||
|
|
||||||
|
// Buffer stores metrics in a circular buffer.
|
||||||
|
type Buffer struct {
|
||||||
|
sync.Mutex
|
||||||
|
buf []telegraf.Metric
|
||||||
|
first int // index of the first/oldest metric
|
||||||
|
last int // one after the index of the last/newest metric
|
||||||
|
size int // number of metrics currently in the buffer
|
||||||
|
cap int // the capacity of the buffer
|
||||||
|
|
||||||
|
batchFirst int // index of the first metric in the batch
|
||||||
|
batchLast int // one after the index of the last metric in the batch
|
||||||
|
batchSize int // number of metrics current in the batch
|
||||||
|
|
||||||
|
MetricsAdded selfstat.Stat
|
||||||
|
MetricsWritten selfstat.Stat
|
||||||
|
MetricsDropped selfstat.Stat
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewBuffer returns a new empty Buffer with the given capacity.
|
||||||
|
func NewBuffer(name string, capacity int) *Buffer {
|
||||||
|
b := &Buffer{
|
||||||
|
buf: make([]telegraf.Metric, capacity),
|
||||||
|
first: 0,
|
||||||
|
last: 0,
|
||||||
|
size: 0,
|
||||||
|
cap: capacity,
|
||||||
|
|
||||||
|
MetricsAdded: selfstat.Register(
|
||||||
|
"write",
|
||||||
|
"metrics_added",
|
||||||
|
map[string]string{"output": name},
|
||||||
|
),
|
||||||
|
MetricsWritten: selfstat.Register(
|
||||||
|
"write",
|
||||||
|
"metrics_written",
|
||||||
|
map[string]string{"output": name},
|
||||||
|
),
|
||||||
|
MetricsDropped: selfstat.Register(
|
||||||
|
"write",
|
||||||
|
"metrics_dropped",
|
||||||
|
map[string]string{"output": name},
|
||||||
|
),
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// Len returns the number of metrics currently in the buffer.
|
||||||
|
func (b *Buffer) Len() int {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
return b.size
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *Buffer) metricAdded() {
|
||||||
|
b.MetricsAdded.Incr(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *Buffer) metricWritten(metric telegraf.Metric) {
|
||||||
|
AgentMetricsWritten.Incr(1)
|
||||||
|
b.MetricsWritten.Incr(1)
|
||||||
|
metric.Accept()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *Buffer) metricDropped(metric telegraf.Metric) {
|
||||||
|
AgentMetricsDropped.Incr(1)
|
||||||
|
b.MetricsDropped.Incr(1)
|
||||||
|
metric.Reject()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *Buffer) inBatch() bool {
|
||||||
|
if b.batchSize == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.batchFirst < b.batchLast {
|
||||||
|
return b.last >= b.batchFirst && b.last < b.batchLast
|
||||||
|
} else {
|
||||||
|
return b.last >= b.batchFirst || b.last < b.batchLast
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *Buffer) add(m telegraf.Metric) {
|
||||||
|
// Check if Buffer is full
|
||||||
|
if b.size == b.cap {
|
||||||
|
if b.batchSize == 0 {
|
||||||
|
// No batch taken by the output, we can drop the metric now.
|
||||||
|
b.metricDropped(b.buf[b.last])
|
||||||
|
} else if b.inBatch() {
|
||||||
|
// There is an outstanding batch and this will overwrite a metric
|
||||||
|
// in it, delay the dropping only in case the batch gets rejected.
|
||||||
|
b.batchSize--
|
||||||
|
b.batchFirst++
|
||||||
|
b.batchFirst %= b.cap
|
||||||
|
} else {
|
||||||
|
// There is an outstanding batch, but this overwrites a metric
|
||||||
|
// outside of it.
|
||||||
|
b.metricDropped(b.buf[b.last])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
b.metricAdded()
|
||||||
|
|
||||||
|
b.buf[b.last] = m
|
||||||
|
b.last++
|
||||||
|
b.last %= b.cap
|
||||||
|
|
||||||
|
if b.size == b.cap {
|
||||||
|
b.first++
|
||||||
|
b.first %= b.cap
|
||||||
|
}
|
||||||
|
|
||||||
|
b.size = min(b.size+1, b.cap)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add adds metrics to the buffer
|
||||||
|
func (b *Buffer) Add(metrics ...telegraf.Metric) {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
for i := range metrics {
|
||||||
|
b.add(metrics[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch returns a slice containing up to batchSize of the most recently added
|
||||||
|
// metrics.
|
||||||
|
//
|
||||||
|
// The metrics contained in the batch are not removed from the buffer, instead
|
||||||
|
// the last batch is recorded and removed only if Accept is called.
|
||||||
|
func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
outLen := min(b.size, batchSize)
|
||||||
|
out := make([]telegraf.Metric, outLen)
|
||||||
|
if outLen == 0 {
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
b.batchFirst = b.first
|
||||||
|
b.batchLast = b.first + outLen
|
||||||
|
b.batchLast %= b.cap
|
||||||
|
b.batchSize = outLen
|
||||||
|
|
||||||
|
until := min(b.cap, b.first+outLen)
|
||||||
|
|
||||||
|
n := copy(out, b.buf[b.first:until])
|
||||||
|
if n < outLen {
|
||||||
|
copy(out[n:], b.buf[:outLen-n])
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accept removes the metrics contained in the last batch.
|
||||||
|
func (b *Buffer) Accept(batch []telegraf.Metric) {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
for _, m := range batch {
|
||||||
|
b.metricWritten(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.batchSize > 0 {
|
||||||
|
b.size -= b.batchSize
|
||||||
|
b.first += b.batchSize
|
||||||
|
b.first %= b.cap
|
||||||
|
}
|
||||||
|
|
||||||
|
b.resetBatch()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reject clears the current batch record so that calls to Accept will have no
|
||||||
|
// effect.
|
||||||
|
func (b *Buffer) Reject(batch []telegraf.Metric) {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
if len(batch) > b.batchSize {
|
||||||
|
// Part or all of the batch was dropped before reject was called.
|
||||||
|
for _, m := range batch[b.batchSize:] {
|
||||||
|
b.metricDropped(m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
b.resetBatch()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *Buffer) resetBatch() {
|
||||||
|
b.batchFirst = 0
|
||||||
|
b.batchLast = 0
|
||||||
|
b.batchSize = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func min(a, b int) int {
|
||||||
|
if b < a {
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
return a
|
||||||
|
}
|
|
@ -0,0 +1,385 @@
|
||||||
|
package models
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/metric"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MockMetric struct {
|
||||||
|
telegraf.Metric
|
||||||
|
AcceptF func()
|
||||||
|
RejectF func()
|
||||||
|
DropF func()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockMetric) Accept() {
|
||||||
|
m.AcceptF()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockMetric) Reject() {
|
||||||
|
m.RejectF()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockMetric) Drop() {
|
||||||
|
m.DropF()
|
||||||
|
}
|
||||||
|
|
||||||
|
func Metric() telegraf.Metric {
|
||||||
|
m, err := metric.New(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42.0,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkAddMetrics(b *testing.B) {
|
||||||
|
buf := NewBuffer("test", 10000)
|
||||||
|
m := Metric()
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
buf.Add(m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setup(b *Buffer) *Buffer {
|
||||||
|
b.MetricsAdded.Set(0)
|
||||||
|
b.MetricsWritten.Set(0)
|
||||||
|
b.MetricsDropped.Set(0)
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_LenEmpty(t *testing.T) {
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
|
||||||
|
require.Equal(t, 0, b.Len())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_LenOne(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m)
|
||||||
|
|
||||||
|
require.Equal(t, 1, b.Len())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_LenFull(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
|
||||||
|
require.Equal(t, 5, b.Len())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_LenOverfill(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
setup(b)
|
||||||
|
b.Add(m, m, m, m, m, m)
|
||||||
|
|
||||||
|
require.Equal(t, 5, b.Len())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_BatchLenZero(t *testing.T) {
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
batch := b.Batch(0)
|
||||||
|
|
||||||
|
require.Len(t, batch, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_BatchLenBufferEmpty(t *testing.T) {
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
batch := b.Batch(2)
|
||||||
|
|
||||||
|
require.Len(t, batch, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_BatchLenUnderfill(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m)
|
||||||
|
batch := b.Batch(2)
|
||||||
|
|
||||||
|
require.Len(t, batch, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_BatchLenFill(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m, m, m)
|
||||||
|
batch := b.Batch(2)
|
||||||
|
require.Len(t, batch, 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_BatchLenExact(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m, m)
|
||||||
|
batch := b.Batch(2)
|
||||||
|
require.Len(t, batch, 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_BatchLenLargerThanBuffer(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
batch := b.Batch(6)
|
||||||
|
require.Len(t, batch, 5)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_BatchWrap(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
batch := b.Batch(2)
|
||||||
|
b.Accept(batch)
|
||||||
|
b.Add(m, m)
|
||||||
|
batch = b.Batch(5)
|
||||||
|
require.Len(t, batch, 5)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_AddDropsOverwrittenMetrics(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
|
||||||
|
require.Equal(t, int64(5), b.MetricsDropped.Get())
|
||||||
|
require.Equal(t, int64(0), b.MetricsWritten.Get())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_AcceptRemovesBatch(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m, m, m)
|
||||||
|
batch := b.Batch(2)
|
||||||
|
b.Accept(batch)
|
||||||
|
require.Equal(t, 1, b.Len())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_RejectLeavesBatch(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m, m, m)
|
||||||
|
batch := b.Batch(2)
|
||||||
|
b.Reject(batch)
|
||||||
|
require.Equal(t, 3, b.Len())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_AcceptWritesOverwrittenBatch(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
batch := b.Batch(5)
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
b.Accept(batch)
|
||||||
|
|
||||||
|
require.Equal(t, int64(0), b.MetricsDropped.Get())
|
||||||
|
require.Equal(t, int64(5), b.MetricsWritten.Get())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_BatchRejectDropsOverwrittenBatch(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
batch := b.Batch(5)
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
b.Reject(batch)
|
||||||
|
|
||||||
|
require.Equal(t, int64(5), b.MetricsDropped.Get())
|
||||||
|
require.Equal(t, int64(0), b.MetricsWritten.Get())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_MetricsOverwriteBatchAccept(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
batch := b.Batch(3)
|
||||||
|
b.Add(m, m, m)
|
||||||
|
b.Accept(batch)
|
||||||
|
require.Equal(t, int64(0), b.MetricsDropped.Get())
|
||||||
|
require.Equal(t, int64(3), b.MetricsWritten.Get())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_MetricsOverwriteBatchReject(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
batch := b.Batch(3)
|
||||||
|
b.Add(m, m, m)
|
||||||
|
b.Reject(batch)
|
||||||
|
require.Equal(t, int64(3), b.MetricsDropped.Get())
|
||||||
|
require.Equal(t, int64(0), b.MetricsWritten.Get())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_MetricsBatchAcceptRemoved(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
batch := b.Batch(3)
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
b.Accept(batch)
|
||||||
|
require.Equal(t, int64(2), b.MetricsDropped.Get())
|
||||||
|
require.Equal(t, int64(3), b.MetricsWritten.Get())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_WrapWithBatch(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
|
||||||
|
b.Add(m, m, m)
|
||||||
|
b.Batch(3)
|
||||||
|
b.Add(m, m, m, m, m, m)
|
||||||
|
|
||||||
|
require.Equal(t, int64(1), b.MetricsDropped.Get())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_BatchNotRemoved(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
b.Batch(2)
|
||||||
|
require.Equal(t, 5, b.Len())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_BatchRejectAcceptNoop(t *testing.T) {
|
||||||
|
m := Metric()
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(m, m, m, m, m)
|
||||||
|
batch := b.Batch(2)
|
||||||
|
b.Reject(batch)
|
||||||
|
b.Accept(batch)
|
||||||
|
require.Equal(t, 5, b.Len())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_AcceptCallsMetricAccept(t *testing.T) {
|
||||||
|
var accept int
|
||||||
|
mm := &MockMetric{
|
||||||
|
Metric: Metric(),
|
||||||
|
AcceptF: func() {
|
||||||
|
accept++
|
||||||
|
},
|
||||||
|
}
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(mm, mm, mm)
|
||||||
|
batch := b.Batch(2)
|
||||||
|
b.Accept(batch)
|
||||||
|
require.Equal(t, 2, accept)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_AddCallsMetricRejectWhenNoBatch(t *testing.T) {
|
||||||
|
var reject int
|
||||||
|
mm := &MockMetric{
|
||||||
|
Metric: Metric(),
|
||||||
|
RejectF: func() {
|
||||||
|
reject++
|
||||||
|
},
|
||||||
|
}
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
setup(b)
|
||||||
|
b.Add(mm, mm, mm, mm, mm)
|
||||||
|
b.Add(mm, mm)
|
||||||
|
require.Equal(t, 2, reject)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_AddCallsMetricRejectWhenNotInBatch(t *testing.T) {
|
||||||
|
var reject int
|
||||||
|
mm := &MockMetric{
|
||||||
|
Metric: Metric(),
|
||||||
|
RejectF: func() {
|
||||||
|
reject++
|
||||||
|
},
|
||||||
|
}
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
setup(b)
|
||||||
|
b.Add(mm, mm, mm, mm, mm)
|
||||||
|
batch := b.Batch(2)
|
||||||
|
b.Add(mm, mm, mm, mm)
|
||||||
|
// metric[2] and metric[3] rejected
|
||||||
|
require.Equal(t, 2, reject)
|
||||||
|
b.Reject(batch)
|
||||||
|
// metric[1] and metric[2] now rejected
|
||||||
|
require.Equal(t, 4, reject)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_RejectCallsMetricRejectWithOverwritten(t *testing.T) {
|
||||||
|
var reject int
|
||||||
|
mm := &MockMetric{
|
||||||
|
Metric: Metric(),
|
||||||
|
RejectF: func() {
|
||||||
|
reject++
|
||||||
|
},
|
||||||
|
}
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(mm, mm, mm, mm, mm)
|
||||||
|
batch := b.Batch(5)
|
||||||
|
b.Add(mm, mm)
|
||||||
|
require.Equal(t, 0, reject)
|
||||||
|
b.Reject(batch)
|
||||||
|
require.Equal(t, 2, reject)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_AddOverwriteAndReject(t *testing.T) {
|
||||||
|
var reject int
|
||||||
|
mm := &MockMetric{
|
||||||
|
Metric: Metric(),
|
||||||
|
RejectF: func() {
|
||||||
|
reject++
|
||||||
|
},
|
||||||
|
}
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(mm, mm, mm, mm, mm)
|
||||||
|
batch := b.Batch(5)
|
||||||
|
b.Add(mm, mm, mm, mm, mm)
|
||||||
|
b.Add(mm, mm, mm, mm, mm)
|
||||||
|
b.Add(mm, mm, mm, mm, mm)
|
||||||
|
b.Add(mm, mm, mm, mm, mm)
|
||||||
|
require.Equal(t, 15, reject)
|
||||||
|
b.Reject(batch)
|
||||||
|
require.Equal(t, 20, reject)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_AddOverwriteAndRejectOffset(t *testing.T) {
|
||||||
|
var reject int
|
||||||
|
var accept int
|
||||||
|
mm := &MockMetric{
|
||||||
|
Metric: Metric(),
|
||||||
|
RejectF: func() {
|
||||||
|
reject++
|
||||||
|
},
|
||||||
|
AcceptF: func() {
|
||||||
|
accept++
|
||||||
|
},
|
||||||
|
}
|
||||||
|
b := setup(NewBuffer("test", 5))
|
||||||
|
b.Add(mm, mm, mm)
|
||||||
|
b.Add(mm, mm, mm, mm)
|
||||||
|
require.Equal(t, 2, reject)
|
||||||
|
batch := b.Batch(5)
|
||||||
|
b.Add(mm, mm, mm, mm)
|
||||||
|
require.Equal(t, 2, reject)
|
||||||
|
b.Add(mm, mm, mm, mm)
|
||||||
|
require.Equal(t, 5, reject)
|
||||||
|
b.Add(mm, mm, mm, mm)
|
||||||
|
require.Equal(t, 9, reject)
|
||||||
|
b.Add(mm, mm, mm, mm)
|
||||||
|
require.Equal(t, 13, reject)
|
||||||
|
b.Accept(batch)
|
||||||
|
require.Equal(t, 13, reject)
|
||||||
|
require.Equal(t, 5, accept)
|
||||||
|
}
|
|
@ -6,6 +6,7 @@ import (
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/metric"
|
"github.com/influxdata/telegraf/metric"
|
||||||
|
"github.com/influxdata/telegraf/testutil"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -480,3 +481,45 @@ func TestFilter_FilterTagsPassAndDrop(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkFilter(b *testing.B) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
filter Filter
|
||||||
|
metric telegraf.Metric
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty filter",
|
||||||
|
filter: Filter{},
|
||||||
|
metric: testutil.MustMetric("cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "namepass",
|
||||||
|
filter: Filter{
|
||||||
|
NamePass: []string{"cpu"},
|
||||||
|
},
|
||||||
|
metric: testutil.MustMetric("cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
b.Run(tt.name, func(b *testing.B) {
|
||||||
|
require.NoError(b, tt.filter.Compile())
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
tt.filter.Select(tt.metric)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,30 +1,53 @@
|
||||||
package models
|
package models
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/selfstat"
|
||||||
)
|
)
|
||||||
|
|
||||||
type RunningAggregator struct {
|
type RunningAggregator struct {
|
||||||
a telegraf.Aggregator
|
sync.Mutex
|
||||||
|
Aggregator telegraf.Aggregator
|
||||||
Config *AggregatorConfig
|
Config *AggregatorConfig
|
||||||
|
|
||||||
metrics chan telegraf.Metric
|
|
||||||
|
|
||||||
periodStart time.Time
|
periodStart time.Time
|
||||||
periodEnd time.Time
|
periodEnd time.Time
|
||||||
|
|
||||||
|
MetricsPushed selfstat.Stat
|
||||||
|
MetricsFiltered selfstat.Stat
|
||||||
|
MetricsDropped selfstat.Stat
|
||||||
|
PushTime selfstat.Stat
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewRunningAggregator(
|
func NewRunningAggregator(
|
||||||
a telegraf.Aggregator,
|
aggregator telegraf.Aggregator,
|
||||||
conf *AggregatorConfig,
|
config *AggregatorConfig,
|
||||||
) *RunningAggregator {
|
) *RunningAggregator {
|
||||||
return &RunningAggregator{
|
return &RunningAggregator{
|
||||||
a: a,
|
Aggregator: aggregator,
|
||||||
Config: conf,
|
Config: config,
|
||||||
metrics: make(chan telegraf.Metric, 100),
|
MetricsPushed: selfstat.Register(
|
||||||
|
"aggregate",
|
||||||
|
"metrics_pushed",
|
||||||
|
map[string]string{"aggregator": config.Name},
|
||||||
|
),
|
||||||
|
MetricsFiltered: selfstat.Register(
|
||||||
|
"aggregate",
|
||||||
|
"metrics_filtered",
|
||||||
|
map[string]string{"aggregator": config.Name},
|
||||||
|
),
|
||||||
|
MetricsDropped: selfstat.Register(
|
||||||
|
"aggregate",
|
||||||
|
"metrics_dropped",
|
||||||
|
map[string]string{"aggregator": config.Name},
|
||||||
|
),
|
||||||
|
PushTime: selfstat.Register(
|
||||||
|
"aggregate",
|
||||||
|
"push_time_ns",
|
||||||
|
map[string]string{"aggregator": config.Name},
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,6 +69,15 @@ func (r *RunningAggregator) Name() string {
|
||||||
return "aggregators." + r.Config.Name
|
return "aggregators." + r.Config.Name
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *RunningAggregator) Period() time.Duration {
|
||||||
|
return r.Config.Period
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RunningAggregator) SetPeriodStart(start time.Time) {
|
||||||
|
r.periodStart = start
|
||||||
|
r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
|
||||||
|
}
|
||||||
|
|
||||||
func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
|
func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
|
||||||
m := makemetric(
|
m := makemetric(
|
||||||
metric,
|
metric,
|
||||||
|
@ -59,9 +91,21 @@ func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
|
||||||
m.SetAggregate(true)
|
m.SetAggregate(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r.MetricsPushed.Incr(1)
|
||||||
|
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *RunningAggregator) metricFiltered(metric telegraf.Metric) {
|
||||||
|
r.MetricsFiltered.Incr(1)
|
||||||
|
metric.Accept()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RunningAggregator) metricDropped(metric telegraf.Metric) {
|
||||||
|
r.MetricsDropped.Incr(1)
|
||||||
|
metric.Accept()
|
||||||
|
}
|
||||||
|
|
||||||
// Add a metric to the aggregator and return true if the original metric
|
// Add a metric to the aggregator and return true if the original metric
|
||||||
// should be dropped.
|
// should be dropped.
|
||||||
func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
|
func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
|
||||||
|
@ -74,75 +118,31 @@ func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
|
||||||
return r.Config.DropOriginal
|
return r.Config.DropOriginal
|
||||||
}
|
}
|
||||||
|
|
||||||
r.metrics <- metric
|
r.Lock()
|
||||||
|
defer r.Unlock()
|
||||||
|
|
||||||
|
if r.periodStart.IsZero() || metric.Time().Before(r.periodStart) || metric.Time().After(r.periodEnd) {
|
||||||
|
r.metricDropped(metric)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Aggregator.Add(metric)
|
||||||
return r.Config.DropOriginal
|
return r.Config.DropOriginal
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunningAggregator) add(in telegraf.Metric) {
|
func (r *RunningAggregator) Push(acc telegraf.Accumulator) {
|
||||||
r.a.Add(in)
|
r.Lock()
|
||||||
|
defer r.Unlock()
|
||||||
|
|
||||||
|
r.periodStart = r.periodEnd
|
||||||
|
r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
|
||||||
|
r.push(acc)
|
||||||
|
r.Aggregator.Reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunningAggregator) push(acc telegraf.Accumulator) {
|
func (r *RunningAggregator) push(acc telegraf.Accumulator) {
|
||||||
r.a.Push(acc)
|
start := time.Now()
|
||||||
}
|
r.Aggregator.Push(acc)
|
||||||
|
elapsed := time.Since(start)
|
||||||
func (r *RunningAggregator) reset() {
|
r.PushTime.Incr(elapsed.Nanoseconds())
|
||||||
r.a.Reset()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run runs the running aggregator, listens for incoming metrics, and waits
|
|
||||||
// for period ticks to tell it when to push and reset the aggregator.
|
|
||||||
func (r *RunningAggregator) Run(
|
|
||||||
acc telegraf.Accumulator,
|
|
||||||
shutdown chan struct{},
|
|
||||||
) {
|
|
||||||
// The start of the period is truncated to the nearest second.
|
|
||||||
//
|
|
||||||
// Every metric then gets it's timestamp checked and is dropped if it
|
|
||||||
// is not within:
|
|
||||||
//
|
|
||||||
// start < t < end + truncation + delay
|
|
||||||
//
|
|
||||||
// So if we start at now = 00:00.2 with a 10s period and 0.3s delay:
|
|
||||||
// now = 00:00.2
|
|
||||||
// start = 00:00
|
|
||||||
// truncation = 00:00.2
|
|
||||||
// end = 00:10
|
|
||||||
// 1st interval: 00:00 - 00:10.5
|
|
||||||
// 2nd interval: 00:10 - 00:20.5
|
|
||||||
// etc.
|
|
||||||
//
|
|
||||||
now := time.Now()
|
|
||||||
r.periodStart = now.Truncate(time.Second)
|
|
||||||
truncation := now.Sub(r.periodStart)
|
|
||||||
r.periodEnd = r.periodStart.Add(r.Config.Period)
|
|
||||||
time.Sleep(r.Config.Delay)
|
|
||||||
periodT := time.NewTicker(r.Config.Period)
|
|
||||||
defer periodT.Stop()
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-shutdown:
|
|
||||||
if len(r.metrics) > 0 {
|
|
||||||
// wait until metrics are flushed before exiting
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
return
|
|
||||||
case m := <-r.metrics:
|
|
||||||
if m.Time().Before(r.periodStart) ||
|
|
||||||
m.Time().After(r.periodEnd.Add(truncation).Add(r.Config.Delay)) {
|
|
||||||
// the metric is outside the current aggregation period, so
|
|
||||||
// skip it.
|
|
||||||
log.Printf("D! aggregator: metric \"%s\" is not in the current timewindow, skipping", m.Name())
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
r.add(m)
|
|
||||||
case <-periodT.C:
|
|
||||||
r.periodStart = r.periodEnd
|
|
||||||
r.periodEnd = r.periodStart.Add(r.Config.Period)
|
|
||||||
r.push(acc)
|
|
||||||
r.reset()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,16 +1,13 @@
|
||||||
package models
|
package models
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/metric"
|
|
||||||
"github.com/influxdata/telegraf/testutil"
|
"github.com/influxdata/telegraf/testutil"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -23,28 +20,24 @@ func TestAdd(t *testing.T) {
|
||||||
},
|
},
|
||||||
Period: time.Millisecond * 500,
|
Period: time.Millisecond * 500,
|
||||||
})
|
})
|
||||||
assert.NoError(t, ra.Config.Filter.Compile())
|
require.NoError(t, ra.Config.Filter.Compile())
|
||||||
acc := testutil.Accumulator{}
|
acc := testutil.Accumulator{}
|
||||||
go ra.Run(&acc, make(chan struct{}))
|
|
||||||
|
|
||||||
m, err := metric.New("RITest",
|
now := time.Now()
|
||||||
|
ra.SetPeriodStart(now)
|
||||||
|
|
||||||
|
m := testutil.MustMetric("RITest",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"value": int64(101),
|
"value": int64(101),
|
||||||
},
|
},
|
||||||
time.Now().Add(time.Millisecond*150),
|
time.Now().Add(time.Millisecond*150),
|
||||||
telegraf.Untyped)
|
telegraf.Untyped)
|
||||||
require.NoError(t, err)
|
require.False(t, ra.Add(m))
|
||||||
|
ra.Push(&acc)
|
||||||
|
|
||||||
assert.False(t, ra.Add(m))
|
require.Equal(t, 1, len(acc.Metrics))
|
||||||
|
require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
|
||||||
for {
|
|
||||||
time.Sleep(time.Millisecond)
|
|
||||||
if atomic.LoadInt64(&a.sum) > 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
|
func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
|
||||||
|
@ -56,50 +49,45 @@ func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
|
||||||
},
|
},
|
||||||
Period: time.Millisecond * 500,
|
Period: time.Millisecond * 500,
|
||||||
})
|
})
|
||||||
assert.NoError(t, ra.Config.Filter.Compile())
|
require.NoError(t, ra.Config.Filter.Compile())
|
||||||
acc := testutil.Accumulator{}
|
acc := testutil.Accumulator{}
|
||||||
go ra.Run(&acc, make(chan struct{}))
|
now := time.Now()
|
||||||
|
ra.SetPeriodStart(now)
|
||||||
|
|
||||||
m, err := metric.New("RITest",
|
m := testutil.MustMetric("RITest",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"value": int64(101),
|
"value": int64(101),
|
||||||
},
|
},
|
||||||
time.Now().Add(-time.Hour),
|
now.Add(-time.Hour),
|
||||||
telegraf.Untyped)
|
telegraf.Untyped,
|
||||||
require.NoError(t, err)
|
)
|
||||||
|
require.False(t, ra.Add(m))
|
||||||
assert.False(t, ra.Add(m))
|
|
||||||
|
|
||||||
// metric after current period
|
// metric after current period
|
||||||
m, err = metric.New("RITest",
|
m = testutil.MustMetric("RITest",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"value": int64(101),
|
"value": int64(101),
|
||||||
},
|
},
|
||||||
time.Now().Add(time.Hour),
|
now.Add(time.Hour),
|
||||||
telegraf.Untyped)
|
telegraf.Untyped,
|
||||||
require.NoError(t, err)
|
)
|
||||||
assert.False(t, ra.Add(m))
|
require.False(t, ra.Add(m))
|
||||||
|
|
||||||
// "now" metric
|
// "now" metric
|
||||||
m, err = metric.New("RITest",
|
m = testutil.MustMetric("RITest",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"value": int64(101),
|
"value": int64(101),
|
||||||
},
|
},
|
||||||
time.Now().Add(time.Millisecond*50),
|
time.Now().Add(time.Millisecond*50),
|
||||||
telegraf.Untyped)
|
telegraf.Untyped)
|
||||||
require.NoError(t, err)
|
require.False(t, ra.Add(m))
|
||||||
assert.False(t, ra.Add(m))
|
|
||||||
|
|
||||||
for {
|
ra.Push(&acc)
|
||||||
time.Sleep(time.Millisecond)
|
require.Equal(t, 1, len(acc.Metrics))
|
||||||
if atomic.LoadInt64(&a.sum) > 0 {
|
require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAddAndPushOnePeriod(t *testing.T) {
|
func TestAddAndPushOnePeriod(t *testing.T) {
|
||||||
|
@ -111,37 +99,24 @@ func TestAddAndPushOnePeriod(t *testing.T) {
|
||||||
},
|
},
|
||||||
Period: time.Millisecond * 500,
|
Period: time.Millisecond * 500,
|
||||||
})
|
})
|
||||||
assert.NoError(t, ra.Config.Filter.Compile())
|
require.NoError(t, ra.Config.Filter.Compile())
|
||||||
acc := testutil.Accumulator{}
|
acc := testutil.Accumulator{}
|
||||||
shutdown := make(chan struct{})
|
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
now := time.Now()
|
||||||
wg.Add(1)
|
ra.SetPeriodStart(now)
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
ra.Run(&acc, shutdown)
|
|
||||||
}()
|
|
||||||
|
|
||||||
m, err := metric.New("RITest",
|
m := testutil.MustMetric("RITest",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"value": int64(101),
|
"value": int64(101),
|
||||||
},
|
},
|
||||||
time.Now().Add(time.Millisecond*100),
|
time.Now().Add(time.Millisecond*100),
|
||||||
telegraf.Untyped)
|
telegraf.Untyped)
|
||||||
require.NoError(t, err)
|
require.False(t, ra.Add(m))
|
||||||
assert.False(t, ra.Add(m))
|
|
||||||
|
ra.Push(&acc)
|
||||||
|
|
||||||
for {
|
|
||||||
time.Sleep(time.Millisecond)
|
|
||||||
if acc.NMetrics() > 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
acc.AssertContainsFields(t, "TestMetric", map[string]interface{}{"sum": int64(101)})
|
acc.AssertContainsFields(t, "TestMetric", map[string]interface{}{"sum": int64(101)})
|
||||||
|
|
||||||
close(shutdown)
|
|
||||||
wg.Wait()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAddDropOriginal(t *testing.T) {
|
func TestAddDropOriginal(t *testing.T) {
|
||||||
|
@ -152,28 +127,29 @@ func TestAddDropOriginal(t *testing.T) {
|
||||||
},
|
},
|
||||||
DropOriginal: true,
|
DropOriginal: true,
|
||||||
})
|
})
|
||||||
assert.NoError(t, ra.Config.Filter.Compile())
|
require.NoError(t, ra.Config.Filter.Compile())
|
||||||
|
|
||||||
m, err := metric.New("RITest",
|
now := time.Now()
|
||||||
|
ra.SetPeriodStart(now)
|
||||||
|
|
||||||
|
m := testutil.MustMetric("RITest",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"value": int64(101),
|
"value": int64(101),
|
||||||
},
|
},
|
||||||
time.Now(),
|
now,
|
||||||
telegraf.Untyped)
|
telegraf.Untyped)
|
||||||
require.NoError(t, err)
|
require.True(t, ra.Add(m))
|
||||||
assert.True(t, ra.Add(m))
|
|
||||||
|
|
||||||
// this metric name doesn't match the filter, so Add will return false
|
// this metric name doesn't match the filter, so Add will return false
|
||||||
m2, err := metric.New("foobar",
|
m2 := testutil.MustMetric("foobar",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"value": int64(101),
|
"value": int64(101),
|
||||||
},
|
},
|
||||||
time.Now(),
|
now,
|
||||||
telegraf.Untyped)
|
telegraf.Untyped)
|
||||||
require.NoError(t, err)
|
require.False(t, ra.Add(m2))
|
||||||
assert.False(t, ra.Add(m2))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type TestAggregator struct {
|
type TestAggregator struct {
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
package models
|
package models
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/plugins/serializers/influx"
|
|
||||||
"github.com/influxdata/telegraf/selfstat"
|
"github.com/influxdata/telegraf/selfstat"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -15,16 +13,13 @@ type RunningInput struct {
|
||||||
Input telegraf.Input
|
Input telegraf.Input
|
||||||
Config *InputConfig
|
Config *InputConfig
|
||||||
|
|
||||||
trace bool
|
|
||||||
defaultTags map[string]string
|
defaultTags map[string]string
|
||||||
|
|
||||||
MetricsGathered selfstat.Stat
|
MetricsGathered selfstat.Stat
|
||||||
|
GatherTime selfstat.Stat
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewRunningInput(
|
func NewRunningInput(input telegraf.Input, config *InputConfig) *RunningInput {
|
||||||
input telegraf.Input,
|
|
||||||
config *InputConfig,
|
|
||||||
) *RunningInput {
|
|
||||||
return &RunningInput{
|
return &RunningInput{
|
||||||
Input: input,
|
Input: input,
|
||||||
Config: config,
|
Config: config,
|
||||||
|
@ -33,6 +28,11 @@ func NewRunningInput(
|
||||||
"metrics_gathered",
|
"metrics_gathered",
|
||||||
map[string]string{"input": config.Name},
|
map[string]string{"input": config.Name},
|
||||||
),
|
),
|
||||||
|
GatherTime: selfstat.RegisterTiming(
|
||||||
|
"gather",
|
||||||
|
"gather_time_ns",
|
||||||
|
map[string]string{"input": config.Name},
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,13 +52,19 @@ func (r *RunningInput) Name() string {
|
||||||
return "inputs." + r.Config.Name
|
return "inputs." + r.Config.Name
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *RunningInput) metricFiltered(metric telegraf.Metric) {
|
||||||
|
metric.Drop()
|
||||||
|
}
|
||||||
|
|
||||||
func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
|
func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
|
||||||
if ok := r.Config.Filter.Select(metric); !ok {
|
if ok := r.Config.Filter.Select(metric); !ok {
|
||||||
|
r.metricFiltered(metric)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
r.Config.Filter.Modify(metric)
|
r.Config.Filter.Modify(metric)
|
||||||
if len(metric.FieldList()) == 0 {
|
if len(metric.FieldList()) == 0 {
|
||||||
|
r.metricFiltered(metric)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,26 +76,17 @@ func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
|
||||||
r.Config.Tags,
|
r.Config.Tags,
|
||||||
r.defaultTags)
|
r.defaultTags)
|
||||||
|
|
||||||
if r.trace && m != nil {
|
|
||||||
s := influx.NewSerializer()
|
|
||||||
s.SetFieldSortOrder(influx.SortFields)
|
|
||||||
octets, err := s.Serialize(m)
|
|
||||||
if err == nil {
|
|
||||||
fmt.Print("> " + string(octets))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
r.MetricsGathered.Incr(1)
|
r.MetricsGathered.Incr(1)
|
||||||
GlobalMetricsGathered.Incr(1)
|
GlobalMetricsGathered.Incr(1)
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunningInput) Trace() bool {
|
func (r *RunningInput) Gather(acc telegraf.Accumulator) error {
|
||||||
return r.trace
|
start := time.Now()
|
||||||
}
|
err := r.Input.Gather(acc)
|
||||||
|
elapsed := time.Since(start)
|
||||||
func (r *RunningInput) SetTrace(trace bool) {
|
r.GatherTime.Incr(elapsed.Nanoseconds())
|
||||||
r.trace = trace
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunningInput) SetDefaultTags(tags map[string]string) {
|
func (r *RunningInput) SetDefaultTags(tags map[string]string) {
|
||||||
|
|
|
@ -6,6 +6,7 @@ import (
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/metric"
|
"github.com/influxdata/telegraf/metric"
|
||||||
|
"github.com/influxdata/telegraf/testutil"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
@ -66,17 +67,13 @@ func TestMakeMetricWithPluginTags(t *testing.T) {
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
ri.SetTrace(true)
|
m := testutil.MustMetric("RITest",
|
||||||
assert.Equal(t, true, ri.Trace())
|
|
||||||
|
|
||||||
m, err := metric.New("RITest",
|
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"value": int64(101),
|
"value": int64(101),
|
||||||
},
|
},
|
||||||
now,
|
now,
|
||||||
telegraf.Untyped)
|
telegraf.Untyped)
|
||||||
require.NoError(t, err)
|
|
||||||
m = ri.MakeMetric(m)
|
m = ri.MakeMetric(m)
|
||||||
|
|
||||||
expected, err := metric.New("RITest",
|
expected, err := metric.New("RITest",
|
||||||
|
@ -102,8 +99,6 @@ func TestMakeMetricFilteredOut(t *testing.T) {
|
||||||
Filter: Filter{NamePass: []string{"foobar"}},
|
Filter: Filter{NamePass: []string{"foobar"}},
|
||||||
})
|
})
|
||||||
|
|
||||||
ri.SetTrace(true)
|
|
||||||
assert.Equal(t, true, ri.Trace())
|
|
||||||
assert.NoError(t, ri.Config.Filter.Compile())
|
assert.NoError(t, ri.Config.Filter.Compile())
|
||||||
|
|
||||||
m, err := metric.New("RITest",
|
m, err := metric.New("RITest",
|
||||||
|
@ -127,17 +122,13 @@ func TestMakeMetricWithDaemonTags(t *testing.T) {
|
||||||
"foo": "bar",
|
"foo": "bar",
|
||||||
})
|
})
|
||||||
|
|
||||||
ri.SetTrace(true)
|
m := testutil.MustMetric("RITest",
|
||||||
assert.Equal(t, true, ri.Trace())
|
|
||||||
|
|
||||||
m, err := metric.New("RITest",
|
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"value": int64(101),
|
"value": int64(101),
|
||||||
},
|
},
|
||||||
now,
|
now,
|
||||||
telegraf.Untyped)
|
telegraf.Untyped)
|
||||||
require.NoError(t, err)
|
|
||||||
m = ri.MakeMetric(m)
|
m = ri.MakeMetric(m)
|
||||||
expected, err := metric.New("RITest",
|
expected, err := metric.New("RITest",
|
||||||
map[string]string{
|
map[string]string{
|
||||||
|
|
|
@ -6,7 +6,6 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/internal/buffer"
|
|
||||||
"github.com/influxdata/telegraf/selfstat"
|
"github.com/influxdata/telegraf/selfstat"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -18,6 +17,16 @@ const (
|
||||||
DEFAULT_METRIC_BUFFER_LIMIT = 10000
|
DEFAULT_METRIC_BUFFER_LIMIT = 10000
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// OutputConfig containing name and filter
|
||||||
|
type OutputConfig struct {
|
||||||
|
Name string
|
||||||
|
Filter Filter
|
||||||
|
|
||||||
|
FlushInterval time.Duration
|
||||||
|
MetricBufferLimit int
|
||||||
|
MetricBatchSize int
|
||||||
|
}
|
||||||
|
|
||||||
// RunningOutput contains the output configuration
|
// RunningOutput contains the output configuration
|
||||||
type RunningOutput struct {
|
type RunningOutput struct {
|
||||||
Name string
|
Name string
|
||||||
|
@ -27,24 +36,16 @@ type RunningOutput struct {
|
||||||
MetricBatchSize int
|
MetricBatchSize int
|
||||||
|
|
||||||
MetricsFiltered selfstat.Stat
|
MetricsFiltered selfstat.Stat
|
||||||
MetricsWritten selfstat.Stat
|
|
||||||
BufferSize selfstat.Stat
|
BufferSize selfstat.Stat
|
||||||
BufferLimit selfstat.Stat
|
BufferLimit selfstat.Stat
|
||||||
WriteTime selfstat.Stat
|
WriteTime selfstat.Stat
|
||||||
|
|
||||||
metrics *buffer.Buffer
|
batch []telegraf.Metric
|
||||||
failMetrics *buffer.Buffer
|
buffer *Buffer
|
||||||
|
BatchReady chan time.Time
|
||||||
|
|
||||||
// Guards against concurrent calls to Add, Push, Reset
|
|
||||||
aggMutex sync.Mutex
|
aggMutex sync.Mutex
|
||||||
// Guards against concurrent calls to the Output as described in #3009
|
batchMutex sync.Mutex
|
||||||
writeMutex sync.Mutex
|
|
||||||
}
|
|
||||||
|
|
||||||
// OutputConfig containing name and filter
|
|
||||||
type OutputConfig struct {
|
|
||||||
Name string
|
|
||||||
Filter Filter
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewRunningOutput(
|
func NewRunningOutput(
|
||||||
|
@ -54,25 +55,27 @@ func NewRunningOutput(
|
||||||
batchSize int,
|
batchSize int,
|
||||||
bufferLimit int,
|
bufferLimit int,
|
||||||
) *RunningOutput {
|
) *RunningOutput {
|
||||||
|
if conf.MetricBufferLimit > 0 {
|
||||||
|
bufferLimit = conf.MetricBufferLimit
|
||||||
|
}
|
||||||
if bufferLimit == 0 {
|
if bufferLimit == 0 {
|
||||||
bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
|
bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
|
||||||
}
|
}
|
||||||
|
if conf.MetricBatchSize > 0 {
|
||||||
|
batchSize = conf.MetricBatchSize
|
||||||
|
}
|
||||||
if batchSize == 0 {
|
if batchSize == 0 {
|
||||||
batchSize = DEFAULT_METRIC_BATCH_SIZE
|
batchSize = DEFAULT_METRIC_BATCH_SIZE
|
||||||
}
|
}
|
||||||
ro := &RunningOutput{
|
ro := &RunningOutput{
|
||||||
Name: name,
|
Name: name,
|
||||||
metrics: buffer.NewBuffer(batchSize),
|
batch: make([]telegraf.Metric, 0, batchSize),
|
||||||
failMetrics: buffer.NewBuffer(bufferLimit),
|
buffer: NewBuffer(name, bufferLimit),
|
||||||
|
BatchReady: make(chan time.Time, 1),
|
||||||
Output: output,
|
Output: output,
|
||||||
Config: conf,
|
Config: conf,
|
||||||
MetricBufferLimit: bufferLimit,
|
MetricBufferLimit: bufferLimit,
|
||||||
MetricBatchSize: batchSize,
|
MetricBatchSize: batchSize,
|
||||||
MetricsWritten: selfstat.Register(
|
|
||||||
"write",
|
|
||||||
"metrics_written",
|
|
||||||
map[string]string{"output": name},
|
|
||||||
),
|
|
||||||
MetricsFiltered: selfstat.Register(
|
MetricsFiltered: selfstat.Register(
|
||||||
"write",
|
"write",
|
||||||
"metrics_filtered",
|
"metrics_filtered",
|
||||||
|
@ -94,20 +97,28 @@ func NewRunningOutput(
|
||||||
map[string]string{"output": name},
|
map[string]string{"output": name},
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
ro.BufferLimit.Set(int64(ro.MetricBufferLimit))
|
ro.BufferLimit.Set(int64(ro.MetricBufferLimit))
|
||||||
return ro
|
return ro
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddMetric adds a metric to the output. This function can also write cached
|
func (ro *RunningOutput) metricFiltered(metric telegraf.Metric) {
|
||||||
// points if FlushBufferWhenFull is true.
|
ro.MetricsFiltered.Incr(1)
|
||||||
|
metric.Drop()
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddMetric adds a metric to the output.
|
||||||
|
//
|
||||||
|
// Takes ownership of metric
|
||||||
func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
|
func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
|
||||||
if ok := ro.Config.Filter.Select(metric); !ok {
|
if ok := ro.Config.Filter.Select(metric); !ok {
|
||||||
ro.MetricsFiltered.Incr(1)
|
ro.metricFiltered(metric)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ro.Config.Filter.Modify(metric)
|
ro.Config.Filter.Modify(metric)
|
||||||
if len(metric.FieldList()) == 0 {
|
if len(metric.FieldList()) == 0 {
|
||||||
|
ro.metricFiltered(metric)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -118,85 +129,98 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ro.metrics.Add(metric)
|
ro.batchMutex.Lock()
|
||||||
if ro.metrics.Len() == ro.MetricBatchSize {
|
|
||||||
batch := ro.metrics.Batch(ro.MetricBatchSize)
|
ro.batch = append(ro.batch, metric)
|
||||||
err := ro.write(batch)
|
if len(ro.batch) == ro.MetricBatchSize {
|
||||||
if err != nil {
|
ro.addBatchToBuffer()
|
||||||
ro.failMetrics.Add(batch...)
|
|
||||||
log.Printf("E! Error writing to output [%s]: %v", ro.Name, err)
|
nBuffer := ro.buffer.Len()
|
||||||
|
ro.BufferSize.Set(int64(nBuffer))
|
||||||
|
|
||||||
|
select {
|
||||||
|
case ro.BatchReady <- time.Now():
|
||||||
|
default:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ro.batchMutex.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write writes all cached points to this output.
|
// AddBatchToBuffer moves the metrics from the batch into the metric buffer.
|
||||||
|
func (ro *RunningOutput) addBatchToBuffer() {
|
||||||
|
ro.buffer.Add(ro.batch...)
|
||||||
|
ro.batch = ro.batch[:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write writes all metrics to the output, stopping when all have been sent on
|
||||||
|
// or error.
|
||||||
func (ro *RunningOutput) Write() error {
|
func (ro *RunningOutput) Write() error {
|
||||||
if output, ok := ro.Output.(telegraf.AggregatingOutput); ok {
|
if output, ok := ro.Output.(telegraf.AggregatingOutput); ok {
|
||||||
ro.aggMutex.Lock()
|
ro.aggMutex.Lock()
|
||||||
metrics := output.Push()
|
metrics := output.Push()
|
||||||
ro.metrics.Add(metrics...)
|
ro.buffer.Add(metrics...)
|
||||||
output.Reset()
|
output.Reset()
|
||||||
ro.aggMutex.Unlock()
|
ro.aggMutex.Unlock()
|
||||||
}
|
}
|
||||||
|
// add and write can be called concurrently
|
||||||
|
ro.batchMutex.Lock()
|
||||||
|
ro.addBatchToBuffer()
|
||||||
|
ro.batchMutex.Unlock()
|
||||||
|
|
||||||
nFails, nMetrics := ro.failMetrics.Len(), ro.metrics.Len()
|
nBuffer := ro.buffer.Len()
|
||||||
ro.BufferSize.Set(int64(nFails + nMetrics))
|
|
||||||
log.Printf("D! Output [%s] buffer fullness: %d / %d metrics. ",
|
|
||||||
ro.Name, nFails+nMetrics, ro.MetricBufferLimit)
|
|
||||||
var err error
|
|
||||||
if !ro.failMetrics.IsEmpty() {
|
|
||||||
// how many batches of failed writes we need to write.
|
|
||||||
nBatches := nFails/ro.MetricBatchSize + 1
|
|
||||||
batchSize := ro.MetricBatchSize
|
|
||||||
|
|
||||||
|
// Only process the metrics in the buffer now. Metrics added while we are
|
||||||
|
// writing will be sent on the next call.
|
||||||
|
nBatches := nBuffer/ro.MetricBatchSize + 1
|
||||||
for i := 0; i < nBatches; i++ {
|
for i := 0; i < nBatches; i++ {
|
||||||
// If it's the last batch, only grab the metrics that have not had
|
batch := ro.buffer.Batch(ro.MetricBatchSize)
|
||||||
// a write attempt already (this is primarily to preserve order).
|
if len(batch) == 0 {
|
||||||
if i == nBatches-1 {
|
break
|
||||||
batchSize = nFails % ro.MetricBatchSize
|
|
||||||
}
|
|
||||||
batch := ro.failMetrics.Batch(batchSize)
|
|
||||||
// If we've already failed previous writes, don't bother trying to
|
|
||||||
// write to this output again. We are not exiting the loop just so
|
|
||||||
// that we can rotate the metrics to preserve order.
|
|
||||||
if err == nil {
|
|
||||||
err = ro.write(batch)
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
ro.failMetrics.Add(batch...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
batch := ro.metrics.Batch(ro.MetricBatchSize)
|
|
||||||
// see comment above about not trying to write to an already failed output.
|
|
||||||
// if ro.failMetrics is empty then err will always be nil at this point.
|
|
||||||
if err == nil {
|
|
||||||
err = ro.write(batch)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err := ro.write(batch)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ro.failMetrics.Add(batch...)
|
ro.buffer.Reject(batch)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
ro.buffer.Accept(batch)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteBatch writes only the batch metrics to the output.
|
||||||
|
func (ro *RunningOutput) WriteBatch() error {
|
||||||
|
batch := ro.buffer.Batch(ro.MetricBatchSize)
|
||||||
|
if len(batch) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
err := ro.write(batch)
|
||||||
|
if err != nil {
|
||||||
|
ro.buffer.Reject(batch)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ro.buffer.Accept(batch)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
|
func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
|
||||||
nMetrics := len(metrics)
|
|
||||||
if nMetrics == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
ro.writeMutex.Lock()
|
|
||||||
defer ro.writeMutex.Unlock()
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
err := ro.Output.Write(metrics)
|
err := ro.Output.Write(metrics)
|
||||||
elapsed := time.Since(start)
|
elapsed := time.Since(start)
|
||||||
if err == nil {
|
|
||||||
log.Printf("D! Output [%s] wrote batch of %d metrics in %s\n",
|
|
||||||
ro.Name, nMetrics, elapsed)
|
|
||||||
ro.MetricsWritten.Incr(int64(nMetrics))
|
|
||||||
ro.WriteTime.Incr(elapsed.Nanoseconds())
|
ro.WriteTime.Incr(elapsed.Nanoseconds())
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
log.Printf("D! [outputs.%s] wrote batch of %d metrics in %s\n",
|
||||||
|
ro.Name, len(metrics), elapsed)
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ro *RunningOutput) LogBufferStatus() {
|
||||||
|
nBuffer := ro.buffer.Len()
|
||||||
|
log.Printf("D! [outputs.%s] buffer fullness: %d / %d metrics. ",
|
||||||
|
ro.Name, nBuffer, ro.MetricBufferLimit)
|
||||||
|
}
|
||||||
|
|
|
@ -231,56 +231,6 @@ func TestRunningOutputDefault(t *testing.T) {
|
||||||
assert.Len(t, m.Metrics(), 10)
|
assert.Len(t, m.Metrics(), 10)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test that running output doesn't flush until it's full when
|
|
||||||
// FlushBufferWhenFull is set.
|
|
||||||
func TestRunningOutputFlushWhenFull(t *testing.T) {
|
|
||||||
conf := &OutputConfig{
|
|
||||||
Filter: Filter{},
|
|
||||||
}
|
|
||||||
|
|
||||||
m := &mockOutput{}
|
|
||||||
ro := NewRunningOutput("test", m, conf, 6, 10)
|
|
||||||
|
|
||||||
// Fill buffer to 1 under limit
|
|
||||||
for _, metric := range first5 {
|
|
||||||
ro.AddMetric(metric)
|
|
||||||
}
|
|
||||||
// no flush yet
|
|
||||||
assert.Len(t, m.Metrics(), 0)
|
|
||||||
|
|
||||||
// add one more metric
|
|
||||||
ro.AddMetric(next5[0])
|
|
||||||
// now it flushed
|
|
||||||
assert.Len(t, m.Metrics(), 6)
|
|
||||||
|
|
||||||
// add one more metric and write it manually
|
|
||||||
ro.AddMetric(next5[1])
|
|
||||||
err := ro.Write()
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Len(t, m.Metrics(), 7)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test that running output doesn't flush until it's full when
|
|
||||||
// FlushBufferWhenFull is set, twice.
|
|
||||||
func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
|
|
||||||
conf := &OutputConfig{
|
|
||||||
Filter: Filter{},
|
|
||||||
}
|
|
||||||
|
|
||||||
m := &mockOutput{}
|
|
||||||
ro := NewRunningOutput("test", m, conf, 4, 12)
|
|
||||||
|
|
||||||
// Fill buffer past limit twive
|
|
||||||
for _, metric := range first5 {
|
|
||||||
ro.AddMetric(metric)
|
|
||||||
}
|
|
||||||
for _, metric := range next5 {
|
|
||||||
ro.AddMetric(metric)
|
|
||||||
}
|
|
||||||
// flushed twice
|
|
||||||
assert.Len(t, m.Metrics(), 8)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestRunningOutputWriteFail(t *testing.T) {
|
func TestRunningOutputWriteFail(t *testing.T) {
|
||||||
conf := &OutputConfig{
|
conf := &OutputConfig{
|
||||||
Filter: Filter{},
|
Filter: Filter{},
|
||||||
|
|
|
@ -27,6 +27,19 @@ type ProcessorConfig struct {
|
||||||
Filter Filter
|
Filter Filter
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (rp *RunningProcessor) metricFiltered(metric telegraf.Metric) {
|
||||||
|
metric.Drop()
|
||||||
|
}
|
||||||
|
|
||||||
|
func containsMetric(item telegraf.Metric, metrics []telegraf.Metric) bool {
|
||||||
|
for _, m := range metrics {
|
||||||
|
if item == m {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
||||||
rp.Lock()
|
rp.Lock()
|
||||||
defer rp.Unlock()
|
defer rp.Unlock()
|
||||||
|
@ -43,6 +56,7 @@ func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
||||||
|
|
||||||
rp.Config.Filter.Modify(metric)
|
rp.Config.Filter.Modify(metric)
|
||||||
if len(metric.FieldList()) == 0 {
|
if len(metric.FieldList()) == 0 {
|
||||||
|
rp.metricFiltered(metric)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/metric"
|
"github.com/influxdata/telegraf/testutil"
|
||||||
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
@ -41,20 +41,6 @@ func TagProcessor(key, value string) *MockProcessor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Metric(
|
|
||||||
name string,
|
|
||||||
tags map[string]string,
|
|
||||||
fields map[string]interface{},
|
|
||||||
tm time.Time,
|
|
||||||
tp ...telegraf.ValueType,
|
|
||||||
) telegraf.Metric {
|
|
||||||
m, err := metric.New(name, tags, fields, tm, tp...)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
return m
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestRunningProcessor_Apply(t *testing.T) {
|
func TestRunningProcessor_Apply(t *testing.T) {
|
||||||
type args struct {
|
type args struct {
|
||||||
Processor telegraf.Processor
|
Processor telegraf.Processor
|
||||||
|
@ -76,7 +62,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
input: []telegraf.Metric{
|
input: []telegraf.Metric{
|
||||||
Metric(
|
testutil.MustMetric(
|
||||||
"cpu",
|
"cpu",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
|
@ -86,7 +72,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
expected: []telegraf.Metric{
|
expected: []telegraf.Metric{
|
||||||
Metric(
|
testutil.MustMetric(
|
||||||
"cpu",
|
"cpu",
|
||||||
map[string]string{
|
map[string]string{
|
||||||
"apply": "true",
|
"apply": "true",
|
||||||
|
@ -109,7 +95,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
input: []telegraf.Metric{
|
input: []telegraf.Metric{
|
||||||
Metric(
|
testutil.MustMetric(
|
||||||
"cpu",
|
"cpu",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
|
@ -119,7 +105,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
expected: []telegraf.Metric{
|
expected: []telegraf.Metric{
|
||||||
Metric(
|
testutil.MustMetric(
|
||||||
"cpu",
|
"cpu",
|
||||||
map[string]string{
|
map[string]string{
|
||||||
"apply": "true",
|
"apply": "true",
|
||||||
|
@ -142,7 +128,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
input: []telegraf.Metric{
|
input: []telegraf.Metric{
|
||||||
Metric(
|
testutil.MustMetric(
|
||||||
"cpu",
|
"cpu",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
|
@ -152,7 +138,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
expected: []telegraf.Metric{
|
expected: []telegraf.Metric{
|
||||||
Metric(
|
testutil.MustMetric(
|
||||||
"cpu",
|
"cpu",
|
||||||
map[string]string{},
|
map[string]string{},
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
|
|
11
metric.go
11
metric.go
|
@ -62,6 +62,17 @@ type Metric interface {
|
||||||
// Copy returns a deep copy of the Metric.
|
// Copy returns a deep copy of the Metric.
|
||||||
Copy() Metric
|
Copy() Metric
|
||||||
|
|
||||||
|
// Accept marks the metric as processed successfully and written to an
|
||||||
|
// output.
|
||||||
|
Accept()
|
||||||
|
|
||||||
|
// Reject marks the metric as processed unsuccessfully.
|
||||||
|
Reject()
|
||||||
|
|
||||||
|
// Drop marks the metric as processed successfully without being written
|
||||||
|
// to any output.
|
||||||
|
Drop()
|
||||||
|
|
||||||
// Mark Metric as an aggregate
|
// Mark Metric as an aggregate
|
||||||
SetAggregate(bool)
|
SetAggregate(bool)
|
||||||
IsAggregate() bool
|
IsAggregate() bool
|
||||||
|
|
|
@ -248,6 +248,15 @@ func (m *metric) HashID() uint64 {
|
||||||
return h.Sum64()
|
return h.Sum64()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *metric) Accept() {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *metric) Reject() {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *metric) Drop() {
|
||||||
|
}
|
||||||
|
|
||||||
// Convert field to a supported type or nil if unconvertible
|
// Convert field to a supported type or nil if unconvertible
|
||||||
func convertField(v interface{}) interface{} {
|
func convertField(v interface{}) interface{} {
|
||||||
switch v := v.(type) {
|
switch v := v.(type) {
|
||||||
|
|
|
@ -0,0 +1,171 @@
|
||||||
|
package metric
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
"runtime"
|
||||||
|
"sync/atomic"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NotifyFunc is called when a tracking metric is done being processed with
|
||||||
|
// the tracking information.
|
||||||
|
type NotifyFunc = func(track telegraf.DeliveryInfo)
|
||||||
|
|
||||||
|
// WithTracking adds tracking to the metric and registers the notify function
|
||||||
|
// to be called when processing is complete.
|
||||||
|
func WithTracking(metric telegraf.Metric, fn NotifyFunc) (telegraf.Metric, telegraf.TrackingID) {
|
||||||
|
return newTrackingMetric(metric, fn)
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithBatchTracking adds tracking to the metrics and registers the notify
|
||||||
|
// function to be called when processing is complete.
|
||||||
|
func WithGroupTracking(metric []telegraf.Metric, fn NotifyFunc) ([]telegraf.Metric, telegraf.TrackingID) {
|
||||||
|
return newTrackingMetricGroup(metric, fn)
|
||||||
|
}
|
||||||
|
|
||||||
|
func EnableDebugFinalizer() {
|
||||||
|
finalizer = debugFinalizer
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
lastID uint64
|
||||||
|
finalizer func(*trackingData)
|
||||||
|
)
|
||||||
|
|
||||||
|
func newTrackingID() telegraf.TrackingID {
|
||||||
|
atomic.AddUint64(&lastID, 1)
|
||||||
|
return telegraf.TrackingID(lastID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func debugFinalizer(d *trackingData) {
|
||||||
|
rc := atomic.LoadInt32(&d.rc)
|
||||||
|
if rc != 0 {
|
||||||
|
log.Fatalf("E! [agent] metric collected with non-zero reference count rc: %d", rc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type trackingData struct {
|
||||||
|
id telegraf.TrackingID
|
||||||
|
rc int32
|
||||||
|
acceptCount int32
|
||||||
|
rejectCount int32
|
||||||
|
notify NotifyFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *trackingData) incr() {
|
||||||
|
atomic.AddInt32(&d.rc, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *trackingData) decr() int32 {
|
||||||
|
return atomic.AddInt32(&d.rc, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *trackingData) accept() {
|
||||||
|
atomic.AddInt32(&d.acceptCount, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *trackingData) reject() {
|
||||||
|
atomic.AddInt32(&d.rejectCount, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
type trackingMetric struct {
|
||||||
|
telegraf.Metric
|
||||||
|
d *trackingData
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTrackingMetric(metric telegraf.Metric, fn NotifyFunc) (telegraf.Metric, telegraf.TrackingID) {
|
||||||
|
m := &trackingMetric{
|
||||||
|
Metric: metric,
|
||||||
|
d: &trackingData{
|
||||||
|
id: newTrackingID(),
|
||||||
|
rc: 1,
|
||||||
|
acceptCount: 0,
|
||||||
|
rejectCount: 0,
|
||||||
|
notify: fn,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if finalizer != nil {
|
||||||
|
runtime.SetFinalizer(m.d, finalizer)
|
||||||
|
}
|
||||||
|
return m, m.d.id
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTrackingMetricGroup(group []telegraf.Metric, fn NotifyFunc) ([]telegraf.Metric, telegraf.TrackingID) {
|
||||||
|
d := &trackingData{
|
||||||
|
id: newTrackingID(),
|
||||||
|
rc: 0,
|
||||||
|
acceptCount: 0,
|
||||||
|
rejectCount: 0,
|
||||||
|
notify: fn,
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, m := range group {
|
||||||
|
d.incr()
|
||||||
|
dm := &trackingMetric{
|
||||||
|
Metric: m,
|
||||||
|
d: d,
|
||||||
|
}
|
||||||
|
group[i] = dm
|
||||||
|
|
||||||
|
}
|
||||||
|
if finalizer != nil {
|
||||||
|
runtime.SetFinalizer(d, finalizer)
|
||||||
|
}
|
||||||
|
|
||||||
|
return group, d.id
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *trackingMetric) Copy() telegraf.Metric {
|
||||||
|
m.d.incr()
|
||||||
|
return &trackingMetric{
|
||||||
|
Metric: m.Metric.Copy(),
|
||||||
|
d: m.d,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *trackingMetric) Accept() {
|
||||||
|
m.d.accept()
|
||||||
|
m.decr()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *trackingMetric) Reject() {
|
||||||
|
m.d.reject()
|
||||||
|
m.decr()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *trackingMetric) Drop() {
|
||||||
|
m.decr()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *trackingMetric) decr() {
|
||||||
|
v := m.d.decr()
|
||||||
|
if v < 0 {
|
||||||
|
panic("negative refcount")
|
||||||
|
}
|
||||||
|
|
||||||
|
if v == 0 {
|
||||||
|
m.d.notify(
|
||||||
|
&deliveryInfo{
|
||||||
|
id: m.d.id,
|
||||||
|
accepted: int(m.d.acceptCount),
|
||||||
|
rejected: int(m.d.rejectCount),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type deliveryInfo struct {
|
||||||
|
id telegraf.TrackingID
|
||||||
|
accepted int
|
||||||
|
rejected int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *deliveryInfo) ID() telegraf.TrackingID {
|
||||||
|
return r.id
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *deliveryInfo) Delivered() bool {
|
||||||
|
return r.rejected == 0
|
||||||
|
}
|
|
@ -0,0 +1,260 @@
|
||||||
|
package metric
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func mustMetric(
|
||||||
|
name string,
|
||||||
|
tags map[string]string,
|
||||||
|
fields map[string]interface{},
|
||||||
|
tm time.Time,
|
||||||
|
tp ...telegraf.ValueType,
|
||||||
|
) telegraf.Metric {
|
||||||
|
m, err := New(name, tags, fields, tm, tp...)
|
||||||
|
if err != nil {
|
||||||
|
panic("mustMetric")
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
type deliveries struct {
|
||||||
|
Info map[telegraf.TrackingID]telegraf.DeliveryInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *deliveries) onDelivery(info telegraf.DeliveryInfo) {
|
||||||
|
d.Info[info.ID()] = info
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTracking(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
metric telegraf.Metric
|
||||||
|
actions func(metric telegraf.Metric)
|
||||||
|
delivered bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "accept",
|
||||||
|
metric: mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
actions: func(m telegraf.Metric) {
|
||||||
|
m.Accept()
|
||||||
|
},
|
||||||
|
delivered: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "reject",
|
||||||
|
metric: mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
actions: func(m telegraf.Metric) {
|
||||||
|
m.Reject()
|
||||||
|
},
|
||||||
|
delivered: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "accept copy",
|
||||||
|
metric: mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
actions: func(m telegraf.Metric) {
|
||||||
|
m2 := m.Copy()
|
||||||
|
m.Accept()
|
||||||
|
m2.Accept()
|
||||||
|
},
|
||||||
|
delivered: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "copy with accept and done",
|
||||||
|
metric: mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
actions: func(m telegraf.Metric) {
|
||||||
|
m2 := m.Copy()
|
||||||
|
m.Accept()
|
||||||
|
m2.Drop()
|
||||||
|
},
|
||||||
|
delivered: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "copy with mixed delivery",
|
||||||
|
metric: mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
actions: func(m telegraf.Metric) {
|
||||||
|
m2 := m.Copy()
|
||||||
|
m.Accept()
|
||||||
|
m2.Reject()
|
||||||
|
},
|
||||||
|
delivered: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
d := &deliveries{
|
||||||
|
Info: make(map[telegraf.TrackingID]telegraf.DeliveryInfo),
|
||||||
|
}
|
||||||
|
metric, id := WithTracking(tt.metric, d.onDelivery)
|
||||||
|
tt.actions(metric)
|
||||||
|
|
||||||
|
info := d.Info[id]
|
||||||
|
require.Equal(t, tt.delivered, info.Delivered())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGroupTracking(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
metrics []telegraf.Metric
|
||||||
|
actions func(metrics []telegraf.Metric)
|
||||||
|
delivered bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "accept",
|
||||||
|
metrics: []telegraf.Metric{
|
||||||
|
mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
actions: func(metrics []telegraf.Metric) {
|
||||||
|
metrics[0].Accept()
|
||||||
|
metrics[1].Accept()
|
||||||
|
},
|
||||||
|
delivered: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "reject",
|
||||||
|
metrics: []telegraf.Metric{
|
||||||
|
mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
actions: func(metrics []telegraf.Metric) {
|
||||||
|
metrics[0].Reject()
|
||||||
|
metrics[1].Reject()
|
||||||
|
},
|
||||||
|
delivered: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "remove",
|
||||||
|
metrics: []telegraf.Metric{
|
||||||
|
mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
actions: func(metrics []telegraf.Metric) {
|
||||||
|
metrics[0].Drop()
|
||||||
|
metrics[1].Drop()
|
||||||
|
},
|
||||||
|
delivered: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mixed",
|
||||||
|
metrics: []telegraf.Metric{
|
||||||
|
mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
mustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
actions: func(metrics []telegraf.Metric) {
|
||||||
|
metrics[0].Accept()
|
||||||
|
metrics[1].Reject()
|
||||||
|
},
|
||||||
|
delivered: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
d := &deliveries{
|
||||||
|
Info: make(map[telegraf.TrackingID]telegraf.DeliveryInfo),
|
||||||
|
}
|
||||||
|
metrics, id := WithGroupTracking(tt.metrics, d.onDelivery)
|
||||||
|
tt.actions(metrics)
|
||||||
|
|
||||||
|
info := d.Info[id]
|
||||||
|
require.Equal(t, tt.delivered, info.Delivered())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
29
output.go
29
output.go
|
@ -17,16 +17,7 @@ type Output interface {
|
||||||
// if the Output only accepts a fixed set of aggregations over a time period.
|
// if the Output only accepts a fixed set of aggregations over a time period.
|
||||||
// These functions may be called concurrently to the Write function.
|
// These functions may be called concurrently to the Write function.
|
||||||
type AggregatingOutput interface {
|
type AggregatingOutput interface {
|
||||||
// Connect to the Output
|
Output
|
||||||
Connect() error
|
|
||||||
// Close any connections to the Output
|
|
||||||
Close() error
|
|
||||||
// Description returns a one-sentence description on the Output
|
|
||||||
Description() string
|
|
||||||
// SampleConfig returns the default configuration of the Output
|
|
||||||
SampleConfig() string
|
|
||||||
// Write takes in group of points to be written to the Output
|
|
||||||
Write(metrics []Metric) error
|
|
||||||
|
|
||||||
// Add the metric to the aggregator
|
// Add the metric to the aggregator
|
||||||
Add(in Metric)
|
Add(in Metric)
|
||||||
|
@ -35,21 +26,3 @@ type AggregatingOutput interface {
|
||||||
// Reset signals the the aggregator period is completed.
|
// Reset signals the the aggregator period is completed.
|
||||||
Reset()
|
Reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
type ServiceOutput interface {
|
|
||||||
// Connect to the Output
|
|
||||||
Connect() error
|
|
||||||
// Close any connections to the Output
|
|
||||||
Close() error
|
|
||||||
// Description returns a one-sentence description on the Output
|
|
||||||
Description() string
|
|
||||||
// SampleConfig returns the default configuration of the Output
|
|
||||||
SampleConfig() string
|
|
||||||
// Write takes in group of points to be written to the Output
|
|
||||||
Write(metrics []Metric) error
|
|
||||||
|
|
||||||
// Start the "service" that will provide an Output
|
|
||||||
Start() error
|
|
||||||
// Stop the "service" that will provide an Output
|
|
||||||
Stop()
|
|
||||||
}
|
|
||||||
|
|
|
@ -133,7 +133,6 @@ func (m *BasicStats) Add(in telegraf.Metric) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *BasicStats) Push(acc telegraf.Accumulator) {
|
func (m *BasicStats) Push(acc telegraf.Accumulator) {
|
||||||
|
|
||||||
config := getConfiguredStats(m)
|
config := getConfiguredStats(m)
|
||||||
|
|
||||||
for _, aggregate := range m.cache {
|
for _, aggregate := range m.cache {
|
||||||
|
|
|
@ -13,7 +13,6 @@ For an introduction to AMQP see:
|
||||||
The following defaults are known to work with RabbitMQ:
|
The following defaults are known to work with RabbitMQ:
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
# AMQP consumer plugin
|
|
||||||
[[inputs.amqp_consumer]]
|
[[inputs.amqp_consumer]]
|
||||||
## Broker to consume from.
|
## Broker to consume from.
|
||||||
## deprecated in 1.7; use the brokers option
|
## deprecated in 1.7; use the brokers option
|
||||||
|
@ -56,6 +55,16 @@ The following defaults are known to work with RabbitMQ:
|
||||||
## Maximum number of messages server should give to the worker.
|
## Maximum number of messages server should give to the worker.
|
||||||
# prefetch_count = 50
|
# prefetch_count = 50
|
||||||
|
|
||||||
|
## Maximum messages to read from the broker that have not been written by an
|
||||||
|
## output. For best throughput set based on the number of metrics within
|
||||||
|
## each message and the size of the output's metric_batch_size.
|
||||||
|
##
|
||||||
|
## For example, if each message from the queue contains 10 metrics and the
|
||||||
|
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||||
|
## full batch is collected and the write is triggered immediately without
|
||||||
|
## waiting until the next flush_interval.
|
||||||
|
# max_undelivered_messages = 1000
|
||||||
|
|
||||||
## Auth method. PLAIN and EXTERNAL are supported
|
## Auth method. PLAIN and EXTERNAL are supported
|
||||||
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
|
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
|
||||||
## described here: https://www.rabbitmq.com/plugins.html
|
## described here: https://www.rabbitmq.com/plugins.html
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package amqp_consumer
|
package amqp_consumer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
@ -9,14 +10,20 @@ import (
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/streadway/amqp"
|
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/internal/tls"
|
"github.com/influxdata/telegraf/internal/tls"
|
||||||
"github.com/influxdata/telegraf/plugins/inputs"
|
"github.com/influxdata/telegraf/plugins/inputs"
|
||||||
"github.com/influxdata/telegraf/plugins/parsers"
|
"github.com/influxdata/telegraf/plugins/parsers"
|
||||||
|
"github.com/streadway/amqp"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultMaxUndeliveredMessages = 1000
|
||||||
|
)
|
||||||
|
|
||||||
|
type empty struct{}
|
||||||
|
type semaphore chan empty
|
||||||
|
|
||||||
// AMQPConsumer is the top level struct for this plugin
|
// AMQPConsumer is the top level struct for this plugin
|
||||||
type AMQPConsumer struct {
|
type AMQPConsumer struct {
|
||||||
URL string `toml:"url"` // deprecated in 1.7; use brokers
|
URL string `toml:"url"` // deprecated in 1.7; use brokers
|
||||||
|
@ -28,6 +35,7 @@ type AMQPConsumer struct {
|
||||||
ExchangeDurability string `toml:"exchange_durability"`
|
ExchangeDurability string `toml:"exchange_durability"`
|
||||||
ExchangePassive bool `toml:"exchange_passive"`
|
ExchangePassive bool `toml:"exchange_passive"`
|
||||||
ExchangeArguments map[string]string `toml:"exchange_arguments"`
|
ExchangeArguments map[string]string `toml:"exchange_arguments"`
|
||||||
|
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||||
|
|
||||||
// Queue Name
|
// Queue Name
|
||||||
Queue string `toml:"queue"`
|
Queue string `toml:"queue"`
|
||||||
|
@ -44,9 +52,12 @@ type AMQPConsumer struct {
|
||||||
AuthMethod string
|
AuthMethod string
|
||||||
tls.ClientConfig
|
tls.ClientConfig
|
||||||
|
|
||||||
|
deliveries map[telegraf.TrackingID]amqp.Delivery
|
||||||
|
|
||||||
parser parsers.Parser
|
parser parsers.Parser
|
||||||
conn *amqp.Connection
|
conn *amqp.Connection
|
||||||
wg *sync.WaitGroup
|
wg *sync.WaitGroup
|
||||||
|
cancel context.CancelFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
type externalAuth struct{}
|
type externalAuth struct{}
|
||||||
|
@ -114,6 +125,16 @@ func (a *AMQPConsumer) SampleConfig() string {
|
||||||
## Maximum number of messages server should give to the worker.
|
## Maximum number of messages server should give to the worker.
|
||||||
# prefetch_count = 50
|
# prefetch_count = 50
|
||||||
|
|
||||||
|
## Maximum messages to read from the broker that have not been written by an
|
||||||
|
## output. For best throughput set based on the number of metrics within
|
||||||
|
## each message and the size of the output's metric_batch_size.
|
||||||
|
##
|
||||||
|
## For example, if each message from the queue contains 10 metrics and the
|
||||||
|
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||||
|
## full batch is collected and the write is triggered immediately without
|
||||||
|
## waiting until the next flush_interval.
|
||||||
|
# max_undelivered_messages = 1000
|
||||||
|
|
||||||
## Auth method. PLAIN and EXTERNAL are supported
|
## Auth method. PLAIN and EXTERNAL are supported
|
||||||
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
|
## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
|
||||||
## described here: https://www.rabbitmq.com/plugins.html
|
## described here: https://www.rabbitmq.com/plugins.html
|
||||||
|
@ -185,9 +206,15 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
a.cancel = cancel
|
||||||
|
|
||||||
a.wg = &sync.WaitGroup{}
|
a.wg = &sync.WaitGroup{}
|
||||||
a.wg.Add(1)
|
a.wg.Add(1)
|
||||||
go a.process(msgs, acc)
|
go func() {
|
||||||
|
defer a.wg.Done()
|
||||||
|
a.process(ctx, msgs, acc)
|
||||||
|
}()
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for {
|
for {
|
||||||
|
@ -196,7 +223,7 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("I! AMQP consumer connection closed: %s; trying to reconnect", err)
|
log.Printf("I! [inputs.amqp_consumer] connection closed: %s; trying to reconnect", err)
|
||||||
for {
|
for {
|
||||||
msgs, err := a.connect(amqpConf)
|
msgs, err := a.connect(amqpConf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -206,7 +233,10 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
a.wg.Add(1)
|
a.wg.Add(1)
|
||||||
go a.process(msgs, acc)
|
go func() {
|
||||||
|
defer a.wg.Done()
|
||||||
|
a.process(ctx, msgs, acc)
|
||||||
|
}()
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -224,14 +254,14 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
|
||||||
p := rand.Perm(len(brokers))
|
p := rand.Perm(len(brokers))
|
||||||
for _, n := range p {
|
for _, n := range p {
|
||||||
broker := brokers[n]
|
broker := brokers[n]
|
||||||
log.Printf("D! [amqp_consumer] connecting to %q", broker)
|
log.Printf("D! [inputs.amqp_consumer] connecting to %q", broker)
|
||||||
conn, err := amqp.DialConfig(broker, *amqpConf)
|
conn, err := amqp.DialConfig(broker, *amqpConf)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
a.conn = conn
|
a.conn = conn
|
||||||
log.Printf("D! [amqp_consumer] connected to %q", broker)
|
log.Printf("D! [inputs.amqp_consumer] connected to %q", broker)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
log.Printf("D! [amqp_consumer] error connecting to %q", broker)
|
log.Printf("D! [inputs.amqp_consumer] error connecting to %q", broker)
|
||||||
}
|
}
|
||||||
|
|
||||||
if a.conn == nil {
|
if a.conn == nil {
|
||||||
|
@ -320,7 +350,6 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
|
||||||
return nil, fmt.Errorf("Failed establishing connection to queue: %s", err)
|
return nil, fmt.Errorf("Failed establishing connection to queue: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("I! Started AMQP consumer")
|
|
||||||
return msgs, err
|
return msgs, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -361,31 +390,89 @@ func declareExchange(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read messages from queue and add them to the Accumulator
|
// Read messages from queue and add them to the Accumulator
|
||||||
func (a *AMQPConsumer) process(msgs <-chan amqp.Delivery, acc telegraf.Accumulator) {
|
func (a *AMQPConsumer) process(ctx context.Context, msgs <-chan amqp.Delivery, ac telegraf.Accumulator) {
|
||||||
defer a.wg.Done()
|
a.deliveries = make(map[telegraf.TrackingID]amqp.Delivery)
|
||||||
for d := range msgs {
|
|
||||||
|
acc := ac.WithTracking(a.MaxUndeliveredMessages)
|
||||||
|
sem := make(semaphore, a.MaxUndeliveredMessages)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case track := <-acc.Delivered():
|
||||||
|
if a.onDelivery(track) {
|
||||||
|
<-sem
|
||||||
|
}
|
||||||
|
case sem <- empty{}:
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case track := <-acc.Delivered():
|
||||||
|
if a.onDelivery(track) {
|
||||||
|
<-sem
|
||||||
|
<-sem
|
||||||
|
}
|
||||||
|
case d, ok := <-msgs:
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
err := a.onMessage(acc, d)
|
||||||
|
if err != nil {
|
||||||
|
acc.AddError(err)
|
||||||
|
<-sem
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *AMQPConsumer) onMessage(acc telegraf.TrackingAccumulator, d amqp.Delivery) error {
|
||||||
metrics, err := a.parser.Parse(d.Body)
|
metrics, err := a.parser.Parse(d.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("E! %v: error parsing metric - %v", err, string(d.Body))
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
id := acc.AddTrackingMetricGroup(metrics)
|
||||||
|
a.deliveries[id] = d
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *AMQPConsumer) onDelivery(track telegraf.DeliveryInfo) bool {
|
||||||
|
delivery, ok := a.deliveries[track.ID()]
|
||||||
|
if !ok {
|
||||||
|
// Added by a previous connection
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if track.Delivered() {
|
||||||
|
err := delivery.Ack(false)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("E! [inputs.amqp_consumer] Unable to ack written delivery: %d: %v",
|
||||||
|
delivery.DeliveryTag, err)
|
||||||
|
a.conn.Close()
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
for _, m := range metrics {
|
err := delivery.Reject(false)
|
||||||
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
|
if err != nil {
|
||||||
|
log.Printf("E! [inputs.amqp_consumer] Unable to reject failed delivery: %d: %v",
|
||||||
|
delivery.DeliveryTag, err)
|
||||||
|
a.conn.Close()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
d.Ack(false)
|
delete(a.deliveries, track.ID())
|
||||||
}
|
return true
|
||||||
log.Printf("I! AMQP consumer queue closed")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *AMQPConsumer) Stop() {
|
func (a *AMQPConsumer) Stop() {
|
||||||
|
a.cancel()
|
||||||
|
a.wg.Wait()
|
||||||
err := a.conn.Close()
|
err := a.conn.Close()
|
||||||
if err != nil && err != amqp.ErrClosed {
|
if err != nil && err != amqp.ErrClosed {
|
||||||
log.Printf("E! Error closing AMQP connection: %s", err)
|
log.Printf("E! [inputs.amqp_consumer] Error closing AMQP connection: %s", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
a.wg.Wait()
|
|
||||||
log.Println("I! Stopped AMQP service")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
@ -397,6 +484,7 @@ func init() {
|
||||||
ExchangeDurability: DefaultExchangeDurability,
|
ExchangeDurability: DefaultExchangeDurability,
|
||||||
QueueDurability: DefaultQueueDurability,
|
QueueDurability: DefaultQueueDurability,
|
||||||
PrefetchCount: DefaultPrefetchCount,
|
PrefetchCount: DefaultPrefetchCount,
|
||||||
|
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,52 +18,54 @@ plugin.
|
||||||
|
|
||||||
memstats are taken from the Go runtime: https://golang.org/pkg/runtime/#MemStats
|
memstats are taken from the Go runtime: https://golang.org/pkg/runtime/#MemStats
|
||||||
|
|
||||||
- internal\_memstats
|
- internal_memstats
|
||||||
- alloc\_bytes
|
- alloc_bytes
|
||||||
- frees
|
- frees
|
||||||
- heap\_alloc\_bytes
|
- heap_alloc_bytes
|
||||||
- heap\_idle\_bytes
|
- heap_idle_bytes
|
||||||
- heap\_in\_use\_bytes
|
- heap_in_use_bytes
|
||||||
- heap\_objects\_bytes
|
- heap_objects_bytes
|
||||||
- heap\_released\_bytes
|
- heap_released_bytes
|
||||||
- heap\_sys\_bytes
|
- heap_sys_bytes
|
||||||
- mallocs
|
- mallocs
|
||||||
- num\_gc
|
- num_gc
|
||||||
- pointer\_lookups
|
- pointer_lookups
|
||||||
- sys\_bytes
|
- sys_bytes
|
||||||
- total\_alloc\_bytes
|
- total_alloc_bytes
|
||||||
|
|
||||||
agent stats collect aggregate stats on all telegraf plugins.
|
agent stats collect aggregate stats on all telegraf plugins.
|
||||||
|
|
||||||
- internal\_agent
|
- internal_agent
|
||||||
- gather\_errors
|
- gather_errors
|
||||||
- metrics\_dropped
|
- metrics_dropped
|
||||||
- metrics\_gathered
|
- metrics_gathered
|
||||||
- metrics\_written
|
- metrics_written
|
||||||
|
|
||||||
internal\_gather stats collect aggregate stats on all input plugins
|
internal_gather stats collect aggregate stats on all input plugins
|
||||||
that are of the same input type. They are tagged with `input=<plugin_name>`.
|
that are of the same input type. They are tagged with `input=<plugin_name>`.
|
||||||
|
|
||||||
- internal\_gather
|
- internal_gather
|
||||||
- gather\_time\_ns
|
- gather_time_ns
|
||||||
- metrics\_gathered
|
- metrics_gathered
|
||||||
|
|
||||||
internal\_write stats collect aggregate stats on all output plugins
|
internal_write stats collect aggregate stats on all output plugins
|
||||||
that are of the same input type. They are tagged with `output=<plugin_name>`.
|
that are of the same input type. They are tagged with `output=<plugin_name>`.
|
||||||
|
|
||||||
|
|
||||||
- internal\_write
|
- internal_write
|
||||||
- buffer\_limit
|
- buffer_limit
|
||||||
- buffer\_size
|
- buffer_size
|
||||||
- metrics\_written
|
- metrics_added
|
||||||
- metrics\_filtered
|
- metrics_written
|
||||||
- write\_time\_ns
|
- metrics_dropped
|
||||||
|
- metrics_filtered
|
||||||
|
- write_time_ns
|
||||||
|
|
||||||
internal\_\<plugin\_name\> are metrics which are defined on a per-plugin basis, and
|
internal_<plugin_name> are metrics which are defined on a per-plugin basis, and
|
||||||
usually contain tags which differentiate each instance of a particular type of
|
usually contain tags which differentiate each instance of a particular type of
|
||||||
plugin.
|
plugin.
|
||||||
|
|
||||||
- internal\_\<plugin\_name\>
|
- internal_<plugin_name>
|
||||||
- individual plugin-specific fields, such as requests counts.
|
- individual plugin-specific fields, such as requests counts.
|
||||||
|
|
||||||
### Tags:
|
### Tags:
|
||||||
|
@ -76,7 +78,7 @@ to each particular plugin.
|
||||||
```
|
```
|
||||||
internal_memstats,host=tyrion alloc_bytes=4457408i,sys_bytes=10590456i,pointer_lookups=7i,mallocs=17642i,frees=7473i,heap_sys_bytes=6848512i,heap_idle_bytes=1368064i,heap_in_use_bytes=5480448i,heap_released_bytes=0i,total_alloc_bytes=6875560i,heap_alloc_bytes=4457408i,heap_objects_bytes=10169i,num_gc=2i 1480682800000000000
|
internal_memstats,host=tyrion alloc_bytes=4457408i,sys_bytes=10590456i,pointer_lookups=7i,mallocs=17642i,frees=7473i,heap_sys_bytes=6848512i,heap_idle_bytes=1368064i,heap_in_use_bytes=5480448i,heap_released_bytes=0i,total_alloc_bytes=6875560i,heap_alloc_bytes=4457408i,heap_objects_bytes=10169i,num_gc=2i 1480682800000000000
|
||||||
internal_agent,host=tyrion metrics_written=18i,metrics_dropped=0i,metrics_gathered=19i,gather_errors=0i 1480682800000000000
|
internal_agent,host=tyrion metrics_written=18i,metrics_dropped=0i,metrics_gathered=19i,gather_errors=0i 1480682800000000000
|
||||||
internal_write,output=file,host=tyrion buffer_limit=10000i,write_time_ns=636609i,metrics_written=18i,buffer_size=0i 1480682800000000000
|
internal_write,output=file,host=tyrion buffer_limit=10000i,write_time_ns=636609i,metrics_added=18i,metrics_written=18i,buffer_size=0i 1480682800000000000
|
||||||
internal_gather,input=internal,host=tyrion metrics_gathered=19i,gather_time_ns=442114i 1480682800000000000
|
internal_gather,input=internal,host=tyrion metrics_gathered=19i,gather_time_ns=442114i 1480682800000000000
|
||||||
internal_gather,input=http_listener,host=tyrion metrics_gathered=0i,gather_time_ns=167285i 1480682800000000000
|
internal_gather,input=http_listener,host=tyrion metrics_gathered=0i,gather_time_ns=167285i 1480682800000000000
|
||||||
internal_http_listener,address=:8186,host=tyrion queries_received=0i,writes_received=0i,requests_received=0i,buffers_created=0i,requests_served=0i,pings_received=0i,bytes_received=0i,not_founds_served=0i,pings_served=0i,queries_served=0i,writes_served=0i 1480682800000000000
|
internal_http_listener,address=:8186,host=tyrion queries_received=0i,writes_received=0i,requests_received=0i,buffers_created=0i,requests_served=0i,pings_received=0i,bytes_received=0i,not_founds_served=0i,pings_served=0i,queries_served=0i,writes_served=0i 1480682800000000000
|
||||||
|
|
|
@ -1,18 +1,14 @@
|
||||||
# Kafka Consumer Input Plugin
|
# Kafka Consumer Input Plugin
|
||||||
|
|
||||||
The [Kafka](http://kafka.apache.org/) consumer plugin polls a specified Kafka
|
The [Kafka][kafka] consumer plugin reads from Kafka
|
||||||
topic and adds messages to InfluxDB. The plugin assumes messages follow the
|
and creates metrics using one of the supported [input data formats][].
|
||||||
line protocol. [Consumer Group](http://godoc.org/github.com/wvanbergen/kafka/consumergroup)
|
|
||||||
is used to talk to the Kafka cluster so multiple instances of telegraf can read
|
|
||||||
from the same topic in parallel.
|
|
||||||
|
|
||||||
For old kafka version (< 0.8), please use the kafka_consumer_legacy input plugin
|
For old kafka version (< 0.8), please use the [kafka_consumer_legacy][] input plugin
|
||||||
and use the old zookeeper connection method.
|
and use the old zookeeper connection method.
|
||||||
|
|
||||||
## Configuration
|
### Configuration
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
# Read metrics from Kafka topic(s)
|
|
||||||
[[inputs.kafka_consumer]]
|
[[inputs.kafka_consumer]]
|
||||||
## kafka servers
|
## kafka servers
|
||||||
brokers = ["localhost:9092"]
|
brokers = ["localhost:9092"]
|
||||||
|
@ -44,18 +40,27 @@ and use the old zookeeper connection method.
|
||||||
## Offset (must be either "oldest" or "newest")
|
## Offset (must be either "oldest" or "newest")
|
||||||
offset = "oldest"
|
offset = "oldest"
|
||||||
|
|
||||||
|
## Maximum length of a message to consume, in bytes (default 0/unlimited);
|
||||||
|
## larger messages are dropped
|
||||||
|
max_message_len = 1000000
|
||||||
|
|
||||||
|
## Maximum messages to read from the broker that have not been written by an
|
||||||
|
## output. For best throughput set based on the number of metrics within
|
||||||
|
## each message and the size of the output's metric_batch_size.
|
||||||
|
##
|
||||||
|
## For example, if each message from the queue contains 10 metrics and the
|
||||||
|
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||||
|
## full batch is collected and the write is triggered immediately without
|
||||||
|
## waiting until the next flush_interval.
|
||||||
|
# max_undelivered_messages = 1000
|
||||||
|
|
||||||
## Data format to consume.
|
## Data format to consume.
|
||||||
## Each data format has its own unique set of configuration options, read
|
## Each data format has its own unique set of configuration options, read
|
||||||
## more about them here:
|
## more about them here:
|
||||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||||
data_format = "influx"
|
data_format = "influx"
|
||||||
|
|
||||||
## Maximum length of a message to consume, in bytes (default 0/unlimited);
|
|
||||||
## larger messages are dropped
|
|
||||||
max_message_len = 1000000
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Testing
|
[kafka]: https://kafka.apache.org
|
||||||
|
[kafka_consumer_legacy]: /plugins/inputs/kafka_consumer_legacy/README.md
|
||||||
Running integration tests requires running Zookeeper & Kafka. See Makefile
|
[input data formats]: /docs/DATA_FORMATS_INPUT.md
|
||||||
for kafka container command.
|
|
||||||
|
|
|
@ -1,55 +1,54 @@
|
||||||
package kafka_consumer
|
package kafka_consumer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/Shopify/sarama"
|
||||||
|
cluster "github.com/bsm/sarama-cluster"
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/internal/tls"
|
"github.com/influxdata/telegraf/internal/tls"
|
||||||
"github.com/influxdata/telegraf/plugins/inputs"
|
"github.com/influxdata/telegraf/plugins/inputs"
|
||||||
"github.com/influxdata/telegraf/plugins/parsers"
|
"github.com/influxdata/telegraf/plugins/parsers"
|
||||||
|
|
||||||
"github.com/Shopify/sarama"
|
|
||||||
cluster "github.com/bsm/sarama-cluster"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultMaxUndeliveredMessages = 1000
|
||||||
|
)
|
||||||
|
|
||||||
|
type empty struct{}
|
||||||
|
type semaphore chan empty
|
||||||
|
|
||||||
|
type Consumer interface {
|
||||||
|
Errors() <-chan error
|
||||||
|
Messages() <-chan *sarama.ConsumerMessage
|
||||||
|
MarkOffset(msg *sarama.ConsumerMessage, metadata string)
|
||||||
|
Close() error
|
||||||
|
}
|
||||||
|
|
||||||
type Kafka struct {
|
type Kafka struct {
|
||||||
ConsumerGroup string
|
ConsumerGroup string `toml:"consumer_group"`
|
||||||
ClientID string `toml:"client_id"`
|
ClientID string `toml:"client_id"`
|
||||||
Topics []string
|
Topics []string `toml:"topics"`
|
||||||
Brokers []string
|
Brokers []string `toml:"brokers"`
|
||||||
MaxMessageLen int
|
MaxMessageLen int `toml:"max_message_len"`
|
||||||
Version string `toml:"version"`
|
Version string `toml:"version"`
|
||||||
|
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||||
Cluster *cluster.Consumer
|
Offset string `toml:"offset"`
|
||||||
|
SASLUsername string `toml:"sasl_username"`
|
||||||
|
SASLPassword string `toml:"sasl_password"`
|
||||||
tls.ClientConfig
|
tls.ClientConfig
|
||||||
|
|
||||||
// SASL Username
|
cluster Consumer
|
||||||
SASLUsername string `toml:"sasl_username"`
|
|
||||||
// SASL Password
|
|
||||||
SASLPassword string `toml:"sasl_password"`
|
|
||||||
|
|
||||||
// Legacy metric buffer support
|
|
||||||
MetricBuffer int
|
|
||||||
// TODO remove PointBuffer, legacy support
|
|
||||||
PointBuffer int
|
|
||||||
|
|
||||||
Offset string
|
|
||||||
parser parsers.Parser
|
parser parsers.Parser
|
||||||
|
wg *sync.WaitGroup
|
||||||
|
cancel context.CancelFunc
|
||||||
|
|
||||||
sync.Mutex
|
// Unconfirmed messages
|
||||||
|
messages map[telegraf.TrackingID]*sarama.ConsumerMessage
|
||||||
// channel for all incoming kafka messages
|
|
||||||
in <-chan *sarama.ConsumerMessage
|
|
||||||
// channel for all kafka consumer errors
|
|
||||||
errs <-chan error
|
|
||||||
done chan struct{}
|
|
||||||
|
|
||||||
// keep the accumulator internally:
|
|
||||||
acc telegraf.Accumulator
|
|
||||||
|
|
||||||
// doNotCommitMsgs tells the parser not to call CommitUpTo on the consumer
|
// doNotCommitMsgs tells the parser not to call CommitUpTo on the consumer
|
||||||
// this is mostly for test purposes, but there may be a use-case for it later.
|
// this is mostly for test purposes, but there may be a use-case for it later.
|
||||||
|
@ -86,16 +85,25 @@ var sampleConfig = `
|
||||||
consumer_group = "telegraf_metrics_consumers"
|
consumer_group = "telegraf_metrics_consumers"
|
||||||
## Offset (must be either "oldest" or "newest")
|
## Offset (must be either "oldest" or "newest")
|
||||||
offset = "oldest"
|
offset = "oldest"
|
||||||
|
## Maximum length of a message to consume, in bytes (default 0/unlimited);
|
||||||
|
## larger messages are dropped
|
||||||
|
max_message_len = 1000000
|
||||||
|
|
||||||
|
## Maximum messages to read from the broker that have not been written by an
|
||||||
|
## output. For best throughput set based on the number of metrics within
|
||||||
|
## each message and the size of the output's metric_batch_size.
|
||||||
|
##
|
||||||
|
## For example, if each message from the queue contains 10 metrics and the
|
||||||
|
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||||
|
## full batch is collected and the write is triggered immediately without
|
||||||
|
## waiting until the next flush_interval.
|
||||||
|
# max_undelivered_messages = 1000
|
||||||
|
|
||||||
## Data format to consume.
|
## Data format to consume.
|
||||||
## Each data format has its own unique set of configuration options, read
|
## Each data format has its own unique set of configuration options, read
|
||||||
## more about them here:
|
## more about them here:
|
||||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||||
data_format = "influx"
|
data_format = "influx"
|
||||||
|
|
||||||
## Maximum length of a message to consume, in bytes (default 0/unlimited);
|
|
||||||
## larger messages are dropped
|
|
||||||
max_message_len = 1000000
|
|
||||||
`
|
`
|
||||||
|
|
||||||
func (k *Kafka) SampleConfig() string {
|
func (k *Kafka) SampleConfig() string {
|
||||||
|
@ -111,12 +119,8 @@ func (k *Kafka) SetParser(parser parsers.Parser) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (k *Kafka) Start(acc telegraf.Accumulator) error {
|
func (k *Kafka) Start(acc telegraf.Accumulator) error {
|
||||||
k.Lock()
|
|
||||||
defer k.Unlock()
|
|
||||||
var clusterErr error
|
var clusterErr error
|
||||||
|
|
||||||
k.acc = acc
|
|
||||||
|
|
||||||
config := cluster.NewConfig()
|
config := cluster.NewConfig()
|
||||||
|
|
||||||
if k.Version != "" {
|
if k.Version != "" {
|
||||||
|
@ -159,13 +163,13 @@ func (k *Kafka) Start(acc telegraf.Accumulator) error {
|
||||||
case "newest":
|
case "newest":
|
||||||
config.Consumer.Offsets.Initial = sarama.OffsetNewest
|
config.Consumer.Offsets.Initial = sarama.OffsetNewest
|
||||||
default:
|
default:
|
||||||
log.Printf("I! WARNING: Kafka consumer invalid offset '%s', using 'oldest'\n",
|
log.Printf("I! WARNING: Kafka consumer invalid offset '%s', using 'oldest'",
|
||||||
k.Offset)
|
k.Offset)
|
||||||
config.Consumer.Offsets.Initial = sarama.OffsetOldest
|
config.Consumer.Offsets.Initial = sarama.OffsetOldest
|
||||||
}
|
}
|
||||||
|
|
||||||
if k.Cluster == nil {
|
if k.cluster == nil {
|
||||||
k.Cluster, clusterErr = cluster.NewConsumer(
|
k.cluster, clusterErr = cluster.NewConsumer(
|
||||||
k.Brokers,
|
k.Brokers,
|
||||||
k.ConsumerGroup,
|
k.ConsumerGroup,
|
||||||
k.Topics,
|
k.Topics,
|
||||||
|
@ -173,67 +177,110 @@ func (k *Kafka) Start(acc telegraf.Accumulator) error {
|
||||||
)
|
)
|
||||||
|
|
||||||
if clusterErr != nil {
|
if clusterErr != nil {
|
||||||
log.Printf("E! Error when creating Kafka Consumer, brokers: %v, topics: %v\n",
|
log.Printf("E! Error when creating Kafka Consumer, brokers: %v, topics: %v",
|
||||||
k.Brokers, k.Topics)
|
k.Brokers, k.Topics)
|
||||||
return clusterErr
|
return clusterErr
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup message and error channels
|
|
||||||
k.in = k.Cluster.Messages()
|
|
||||||
k.errs = k.Cluster.Errors()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
k.done = make(chan struct{})
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
// Start the kafka message reader
|
k.cancel = cancel
|
||||||
go k.receiver()
|
|
||||||
log.Printf("I! Started the kafka consumer service, brokers: %v, topics: %v\n",
|
// Start consumer goroutine
|
||||||
|
k.wg = &sync.WaitGroup{}
|
||||||
|
k.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer k.wg.Done()
|
||||||
|
k.receiver(ctx, acc)
|
||||||
|
}()
|
||||||
|
|
||||||
|
log.Printf("I! Started the kafka consumer service, brokers: %v, topics: %v",
|
||||||
k.Brokers, k.Topics)
|
k.Brokers, k.Topics)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// receiver() reads all incoming messages from the consumer, and parses them into
|
// receiver() reads all incoming messages from the consumer, and parses them into
|
||||||
// influxdb metric points.
|
// influxdb metric points.
|
||||||
func (k *Kafka) receiver() {
|
func (k *Kafka) receiver(ctx context.Context, ac telegraf.Accumulator) {
|
||||||
|
k.messages = make(map[telegraf.TrackingID]*sarama.ConsumerMessage)
|
||||||
|
|
||||||
|
acc := ac.WithTracking(k.MaxUndeliveredMessages)
|
||||||
|
sem := make(semaphore, k.MaxUndeliveredMessages)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-k.done:
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
case err := <-k.errs:
|
case track := <-acc.Delivered():
|
||||||
|
<-sem
|
||||||
|
k.onDelivery(track)
|
||||||
|
case err := <-k.cluster.Errors():
|
||||||
|
acc.AddError(err)
|
||||||
|
case sem <- empty{}:
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case track := <-acc.Delivered():
|
||||||
|
// Once for the delivered message, once to leave the case
|
||||||
|
<-sem
|
||||||
|
<-sem
|
||||||
|
k.onDelivery(track)
|
||||||
|
case err := <-k.cluster.Errors():
|
||||||
|
<-sem
|
||||||
|
acc.AddError(err)
|
||||||
|
case msg := <-k.cluster.Messages():
|
||||||
|
err := k.onMessage(acc, msg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
k.acc.AddError(fmt.Errorf("Consumer Error: %s\n", err))
|
acc.AddError(err)
|
||||||
|
<-sem
|
||||||
}
|
}
|
||||||
case msg := <-k.in:
|
|
||||||
if k.MaxMessageLen != 0 && len(msg.Value) > k.MaxMessageLen {
|
|
||||||
k.acc.AddError(fmt.Errorf("Message longer than max_message_len (%d > %d)",
|
|
||||||
len(msg.Value), k.MaxMessageLen))
|
|
||||||
} else {
|
|
||||||
metrics, err := k.parser.Parse(msg.Value)
|
|
||||||
if err != nil {
|
|
||||||
k.acc.AddError(fmt.Errorf("Message Parse Error\nmessage: %s\nerror: %s",
|
|
||||||
string(msg.Value), err.Error()))
|
|
||||||
}
|
|
||||||
for _, metric := range metrics {
|
|
||||||
k.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !k.doNotCommitMsgs {
|
|
||||||
// TODO(cam) this locking can be removed if this PR gets merged:
|
|
||||||
// https://github.com/wvanbergen/kafka/pull/84
|
|
||||||
k.Lock()
|
|
||||||
k.Cluster.MarkOffset(msg, "")
|
|
||||||
k.Unlock()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (k *Kafka) markOffset(msg *sarama.ConsumerMessage) {
|
||||||
|
if !k.doNotCommitMsgs {
|
||||||
|
k.cluster.MarkOffset(msg, "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *Kafka) onMessage(acc telegraf.TrackingAccumulator, msg *sarama.ConsumerMessage) error {
|
||||||
|
if k.MaxMessageLen != 0 && len(msg.Value) > k.MaxMessageLen {
|
||||||
|
k.markOffset(msg)
|
||||||
|
return fmt.Errorf("Message longer than max_message_len (%d > %d)",
|
||||||
|
len(msg.Value), k.MaxMessageLen)
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics, err := k.parser.Parse(msg.Value)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
id := acc.AddTrackingMetricGroup(metrics)
|
||||||
|
k.messages[id] = msg
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *Kafka) onDelivery(track telegraf.DeliveryInfo) {
|
||||||
|
msg, ok := k.messages[track.ID()]
|
||||||
|
if !ok {
|
||||||
|
log.Printf("E! [inputs.kafka_consumer] Could not mark message delivered: %d", track.ID())
|
||||||
|
}
|
||||||
|
|
||||||
|
if track.Delivered() {
|
||||||
|
k.markOffset(msg)
|
||||||
|
}
|
||||||
|
delete(k.messages, track.ID())
|
||||||
|
}
|
||||||
|
|
||||||
func (k *Kafka) Stop() {
|
func (k *Kafka) Stop() {
|
||||||
k.Lock()
|
k.cancel()
|
||||||
defer k.Unlock()
|
k.wg.Wait()
|
||||||
close(k.done)
|
|
||||||
if err := k.Cluster.Close(); err != nil {
|
if err := k.cluster.Close(); err != nil {
|
||||||
k.acc.AddError(fmt.Errorf("Error closing consumer: %s\n", err.Error()))
|
log.Printf("E! [inputs.kafka_consumer] Error closing consumer: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,6 +290,8 @@ func (k *Kafka) Gather(acc telegraf.Accumulator) error {
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
inputs.Add("kafka_consumer", func() telegraf.Input {
|
inputs.Add("kafka_consumer", func() telegraf.Input {
|
||||||
return &Kafka{}
|
return &Kafka{
|
||||||
|
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,6 @@ func TestReadsMetricsFromKafka(t *testing.T) {
|
||||||
ConsumerGroup: "telegraf_test_consumers",
|
ConsumerGroup: "telegraf_test_consumers",
|
||||||
Topics: []string{testTopic},
|
Topics: []string{testTopic},
|
||||||
Brokers: brokerPeers,
|
Brokers: brokerPeers,
|
||||||
PointBuffer: 100000,
|
|
||||||
Offset: "oldest",
|
Offset: "oldest",
|
||||||
}
|
}
|
||||||
p, _ := parsers.NewInfluxParser()
|
p, _ := parsers.NewInfluxParser()
|
||||||
|
|
|
@ -1,13 +1,14 @@
|
||||||
package kafka_consumer
|
package kafka_consumer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/Shopify/sarama"
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/plugins/parsers"
|
"github.com/influxdata/telegraf/plugins/parsers"
|
||||||
"github.com/influxdata/telegraf/testutil"
|
"github.com/influxdata/telegraf/testutil"
|
||||||
|
|
||||||
"github.com/Shopify/sarama"
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -18,31 +19,57 @@ const (
|
||||||
invalidMsg = "cpu_load_short,host=server01 1422568543702900257\n"
|
invalidMsg = "cpu_load_short,host=server01 1422568543702900257\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
func newTestKafka() (*Kafka, chan *sarama.ConsumerMessage) {
|
type TestConsumer struct {
|
||||||
in := make(chan *sarama.ConsumerMessage, 1000)
|
errors chan error
|
||||||
|
messages chan *sarama.ConsumerMessage
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *TestConsumer) Errors() <-chan error {
|
||||||
|
return c.errors
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *TestConsumer) Messages() <-chan *sarama.ConsumerMessage {
|
||||||
|
return c.messages
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *TestConsumer) MarkOffset(msg *sarama.ConsumerMessage, metadata string) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *TestConsumer) Close() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *TestConsumer) Inject(msg *sarama.ConsumerMessage) {
|
||||||
|
c.messages <- msg
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestKafka() (*Kafka, *TestConsumer) {
|
||||||
|
consumer := &TestConsumer{
|
||||||
|
errors: make(chan error),
|
||||||
|
messages: make(chan *sarama.ConsumerMessage, 1000),
|
||||||
|
}
|
||||||
k := Kafka{
|
k := Kafka{
|
||||||
|
cluster: consumer,
|
||||||
ConsumerGroup: "test",
|
ConsumerGroup: "test",
|
||||||
Topics: []string{"telegraf"},
|
Topics: []string{"telegraf"},
|
||||||
Brokers: []string{"localhost:9092"},
|
Brokers: []string{"localhost:9092"},
|
||||||
Offset: "oldest",
|
Offset: "oldest",
|
||||||
in: in,
|
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||||
doNotCommitMsgs: true,
|
doNotCommitMsgs: true,
|
||||||
errs: make(chan error, 1000),
|
messages: make(map[telegraf.TrackingID]*sarama.ConsumerMessage),
|
||||||
done: make(chan struct{}),
|
|
||||||
}
|
}
|
||||||
return &k, in
|
return &k, consumer
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test that the parser parses kafka messages into points
|
// Test that the parser parses kafka messages into points
|
||||||
func TestRunParser(t *testing.T) {
|
func TestRunParser(t *testing.T) {
|
||||||
k, in := newTestKafka()
|
k, consumer := newTestKafka()
|
||||||
acc := testutil.Accumulator{}
|
acc := testutil.Accumulator{}
|
||||||
k.acc = &acc
|
ctx := context.Background()
|
||||||
defer close(k.done)
|
|
||||||
|
|
||||||
k.parser, _ = parsers.NewInfluxParser()
|
k.parser, _ = parsers.NewInfluxParser()
|
||||||
go k.receiver()
|
go k.receiver(ctx, &acc)
|
||||||
in <- saramaMsg(testMsg)
|
consumer.Inject(saramaMsg(testMsg))
|
||||||
acc.Wait(1)
|
acc.Wait(1)
|
||||||
|
|
||||||
assert.Equal(t, acc.NFields(), 1)
|
assert.Equal(t, acc.NFields(), 1)
|
||||||
|
@ -50,14 +77,13 @@ func TestRunParser(t *testing.T) {
|
||||||
|
|
||||||
// Test that the parser ignores invalid messages
|
// Test that the parser ignores invalid messages
|
||||||
func TestRunParserInvalidMsg(t *testing.T) {
|
func TestRunParserInvalidMsg(t *testing.T) {
|
||||||
k, in := newTestKafka()
|
k, consumer := newTestKafka()
|
||||||
acc := testutil.Accumulator{}
|
acc := testutil.Accumulator{}
|
||||||
k.acc = &acc
|
ctx := context.Background()
|
||||||
defer close(k.done)
|
|
||||||
|
|
||||||
k.parser, _ = parsers.NewInfluxParser()
|
k.parser, _ = parsers.NewInfluxParser()
|
||||||
go k.receiver()
|
go k.receiver(ctx, &acc)
|
||||||
in <- saramaMsg(invalidMsg)
|
consumer.Inject(saramaMsg(invalidMsg))
|
||||||
acc.WaitError(1)
|
acc.WaitError(1)
|
||||||
|
|
||||||
assert.Equal(t, acc.NFields(), 0)
|
assert.Equal(t, acc.NFields(), 0)
|
||||||
|
@ -66,15 +92,14 @@ func TestRunParserInvalidMsg(t *testing.T) {
|
||||||
// Test that overlong messages are dropped
|
// Test that overlong messages are dropped
|
||||||
func TestDropOverlongMsg(t *testing.T) {
|
func TestDropOverlongMsg(t *testing.T) {
|
||||||
const maxMessageLen = 64 * 1024
|
const maxMessageLen = 64 * 1024
|
||||||
k, in := newTestKafka()
|
k, consumer := newTestKafka()
|
||||||
k.MaxMessageLen = maxMessageLen
|
k.MaxMessageLen = maxMessageLen
|
||||||
acc := testutil.Accumulator{}
|
acc := testutil.Accumulator{}
|
||||||
k.acc = &acc
|
ctx := context.Background()
|
||||||
defer close(k.done)
|
|
||||||
overlongMsg := strings.Repeat("v", maxMessageLen+1)
|
overlongMsg := strings.Repeat("v", maxMessageLen+1)
|
||||||
|
|
||||||
go k.receiver()
|
go k.receiver(ctx, &acc)
|
||||||
in <- saramaMsg(overlongMsg)
|
consumer.Inject(saramaMsg(overlongMsg))
|
||||||
acc.WaitError(1)
|
acc.WaitError(1)
|
||||||
|
|
||||||
assert.Equal(t, acc.NFields(), 0)
|
assert.Equal(t, acc.NFields(), 0)
|
||||||
|
@ -82,14 +107,13 @@ func TestDropOverlongMsg(t *testing.T) {
|
||||||
|
|
||||||
// Test that the parser parses kafka messages into points
|
// Test that the parser parses kafka messages into points
|
||||||
func TestRunParserAndGather(t *testing.T) {
|
func TestRunParserAndGather(t *testing.T) {
|
||||||
k, in := newTestKafka()
|
k, consumer := newTestKafka()
|
||||||
acc := testutil.Accumulator{}
|
acc := testutil.Accumulator{}
|
||||||
k.acc = &acc
|
ctx := context.Background()
|
||||||
defer close(k.done)
|
|
||||||
|
|
||||||
k.parser, _ = parsers.NewInfluxParser()
|
k.parser, _ = parsers.NewInfluxParser()
|
||||||
go k.receiver()
|
go k.receiver(ctx, &acc)
|
||||||
in <- saramaMsg(testMsg)
|
consumer.Inject(saramaMsg(testMsg))
|
||||||
acc.Wait(1)
|
acc.Wait(1)
|
||||||
|
|
||||||
acc.GatherError(k.Gather)
|
acc.GatherError(k.Gather)
|
||||||
|
@ -101,14 +125,13 @@ func TestRunParserAndGather(t *testing.T) {
|
||||||
|
|
||||||
// Test that the parser parses kafka messages into points
|
// Test that the parser parses kafka messages into points
|
||||||
func TestRunParserAndGatherGraphite(t *testing.T) {
|
func TestRunParserAndGatherGraphite(t *testing.T) {
|
||||||
k, in := newTestKafka()
|
k, consumer := newTestKafka()
|
||||||
acc := testutil.Accumulator{}
|
acc := testutil.Accumulator{}
|
||||||
k.acc = &acc
|
ctx := context.Background()
|
||||||
defer close(k.done)
|
|
||||||
|
|
||||||
k.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil)
|
k.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil)
|
||||||
go k.receiver()
|
go k.receiver(ctx, &acc)
|
||||||
in <- saramaMsg(testMsgGraphite)
|
consumer.Inject(saramaMsg(testMsgGraphite))
|
||||||
acc.Wait(1)
|
acc.Wait(1)
|
||||||
|
|
||||||
acc.GatherError(k.Gather)
|
acc.GatherError(k.Gather)
|
||||||
|
@ -120,17 +143,16 @@ func TestRunParserAndGatherGraphite(t *testing.T) {
|
||||||
|
|
||||||
// Test that the parser parses kafka messages into points
|
// Test that the parser parses kafka messages into points
|
||||||
func TestRunParserAndGatherJSON(t *testing.T) {
|
func TestRunParserAndGatherJSON(t *testing.T) {
|
||||||
k, in := newTestKafka()
|
k, consumer := newTestKafka()
|
||||||
acc := testutil.Accumulator{}
|
acc := testutil.Accumulator{}
|
||||||
k.acc = &acc
|
ctx := context.Background()
|
||||||
defer close(k.done)
|
|
||||||
|
|
||||||
k.parser, _ = parsers.NewParser(&parsers.Config{
|
k.parser, _ = parsers.NewParser(&parsers.Config{
|
||||||
DataFormat: "json",
|
DataFormat: "json",
|
||||||
MetricName: "kafka_json_test",
|
MetricName: "kafka_json_test",
|
||||||
})
|
})
|
||||||
go k.receiver()
|
go k.receiver(ctx, &acc)
|
||||||
in <- saramaMsg(testMsgJSON)
|
consumer.Inject(saramaMsg(testMsgJSON))
|
||||||
acc.Wait(1)
|
acc.Wait(1)
|
||||||
|
|
||||||
acc.GatherError(k.Gather)
|
acc.GatherError(k.Gather)
|
||||||
|
|
|
@ -1,14 +1,11 @@
|
||||||
# MQTT Consumer Input Plugin
|
# MQTT Consumer Input Plugin
|
||||||
|
|
||||||
The [MQTT](http://mqtt.org/) consumer plugin reads from
|
The [MQTT][mqtt] consumer plugin reads from the specified MQTT topics
|
||||||
specified MQTT topics and adds messages to InfluxDB.
|
and creates metrics using one of the supported [input data formats][].
|
||||||
The plugin expects messages in the
|
|
||||||
[Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
|
|
||||||
|
|
||||||
### Configuration:
|
### Configuration:
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
# Read metrics from MQTT topic(s)
|
|
||||||
[[inputs.mqtt_consumer]]
|
[[inputs.mqtt_consumer]]
|
||||||
## MQTT broker URLs to be used. The format should be scheme://host:port,
|
## MQTT broker URLs to be used. The format should be scheme://host:port,
|
||||||
## schema can be tcp, ssl, or ws.
|
## schema can be tcp, ssl, or ws.
|
||||||
|
@ -26,6 +23,16 @@ The plugin expects messages in the
|
||||||
## Connection timeout for initial connection in seconds
|
## Connection timeout for initial connection in seconds
|
||||||
connection_timeout = "30s"
|
connection_timeout = "30s"
|
||||||
|
|
||||||
|
## Maximum messages to read from the broker that have not been written by an
|
||||||
|
## output. For best throughput set based on the number of metrics within
|
||||||
|
## each message and the size of the output's metric_batch_size.
|
||||||
|
##
|
||||||
|
## For example, if each message from the queue contains 10 metrics and the
|
||||||
|
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||||
|
## full batch is collected and the write is triggered immediately without
|
||||||
|
## waiting until the next flush_interval.
|
||||||
|
# max_undelivered_messages = 1000
|
||||||
|
|
||||||
## Topics to subscribe to
|
## Topics to subscribe to
|
||||||
topics = [
|
topics = [
|
||||||
"telegraf/host01/cpu",
|
"telegraf/host01/cpu",
|
||||||
|
@ -62,3 +69,6 @@ The plugin expects messages in the
|
||||||
|
|
||||||
- All measurements are tagged with the incoming topic, ie
|
- All measurements are tagged with the incoming topic, ie
|
||||||
`topic=telegraf/host01/cpu`
|
`topic=telegraf/host01/cpu`
|
||||||
|
|
||||||
|
[mqtt]: https://mqtt.org
|
||||||
|
[input data formats]: /docs/DATA_FORMATS_INPUT.md
|
||||||
|
|
|
@ -1,25 +1,31 @@
|
||||||
package mqtt_consumer
|
package mqtt_consumer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/eclipse/paho.mqtt.golang"
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/internal"
|
"github.com/influxdata/telegraf/internal"
|
||||||
"github.com/influxdata/telegraf/internal/tls"
|
"github.com/influxdata/telegraf/internal/tls"
|
||||||
"github.com/influxdata/telegraf/plugins/inputs"
|
"github.com/influxdata/telegraf/plugins/inputs"
|
||||||
"github.com/influxdata/telegraf/plugins/parsers"
|
"github.com/influxdata/telegraf/plugins/parsers"
|
||||||
|
|
||||||
"github.com/eclipse/paho.mqtt.golang"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// 30 Seconds is the default used by paho.mqtt.golang
|
var (
|
||||||
var defaultConnectionTimeout = internal.Duration{Duration: 30 * time.Second}
|
// 30 Seconds is the default used by paho.mqtt.golang
|
||||||
|
defaultConnectionTimeout = internal.Duration{Duration: 30 * time.Second}
|
||||||
|
|
||||||
|
defaultMaxUndeliveredMessages = 1000
|
||||||
|
)
|
||||||
|
|
||||||
type ConnectionState int
|
type ConnectionState int
|
||||||
|
type empty struct{}
|
||||||
|
type semaphore chan empty
|
||||||
|
|
||||||
const (
|
const (
|
||||||
Disconnected ConnectionState = iota
|
Disconnected ConnectionState = iota
|
||||||
|
@ -34,6 +40,7 @@ type MQTTConsumer struct {
|
||||||
Password string
|
Password string
|
||||||
QoS int `toml:"qos"`
|
QoS int `toml:"qos"`
|
||||||
ConnectionTimeout internal.Duration `toml:"connection_timeout"`
|
ConnectionTimeout internal.Duration `toml:"connection_timeout"`
|
||||||
|
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||||
|
|
||||||
parser parsers.Parser
|
parser parsers.Parser
|
||||||
|
|
||||||
|
@ -45,9 +52,14 @@ type MQTTConsumer struct {
|
||||||
tls.ClientConfig
|
tls.ClientConfig
|
||||||
|
|
||||||
client mqtt.Client
|
client mqtt.Client
|
||||||
acc telegraf.Accumulator
|
acc telegraf.TrackingAccumulator
|
||||||
state ConnectionState
|
state ConnectionState
|
||||||
subscribed bool
|
subscribed bool
|
||||||
|
sem semaphore
|
||||||
|
messages map[telegraf.TrackingID]bool
|
||||||
|
|
||||||
|
ctx context.Context
|
||||||
|
cancel context.CancelFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
var sampleConfig = `
|
var sampleConfig = `
|
||||||
|
@ -67,6 +79,16 @@ var sampleConfig = `
|
||||||
## Connection timeout for initial connection in seconds
|
## Connection timeout for initial connection in seconds
|
||||||
connection_timeout = "30s"
|
connection_timeout = "30s"
|
||||||
|
|
||||||
|
## Maximum messages to read from the broker that have not been written by an
|
||||||
|
## output. For best throughput set based on the number of metrics within
|
||||||
|
## each message and the size of the output's metric_batch_size.
|
||||||
|
##
|
||||||
|
## For example, if each message from the queue contains 10 metrics and the
|
||||||
|
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||||
|
## full batch is collected and the write is triggered immediately without
|
||||||
|
## waiting until the next flush_interval.
|
||||||
|
# max_undelivered_messages = 1000
|
||||||
|
|
||||||
## Topics to subscribe to
|
## Topics to subscribe to
|
||||||
topics = [
|
topics = [
|
||||||
"telegraf/host01/cpu",
|
"telegraf/host01/cpu",
|
||||||
|
@ -118,7 +140,6 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
|
||||||
return errors.New("persistent_session requires client_id")
|
return errors.New("persistent_session requires client_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
m.acc = acc
|
|
||||||
if m.QoS > 2 || m.QoS < 0 {
|
if m.QoS > 2 || m.QoS < 0 {
|
||||||
return fmt.Errorf("qos value must be 0, 1, or 2: %d", m.QoS)
|
return fmt.Errorf("qos value must be 0, 1, or 2: %d", m.QoS)
|
||||||
}
|
}
|
||||||
|
@ -127,6 +148,9 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
|
||||||
return fmt.Errorf("connection_timeout must be greater than 1s: %s", m.ConnectionTimeout.Duration)
|
return fmt.Errorf("connection_timeout must be greater than 1s: %s", m.ConnectionTimeout.Duration)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m.acc = acc.WithTracking(m.MaxUndeliveredMessages)
|
||||||
|
m.ctx, m.cancel = context.WithCancel(context.Background())
|
||||||
|
|
||||||
opts, err := m.createOpts()
|
opts, err := m.createOpts()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -146,8 +170,10 @@ func (m *MQTTConsumer) connect() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("I! [inputs.mqtt_consumer]: connected %v", m.Servers)
|
log.Printf("I! [inputs.mqtt_consumer] Connected %v", m.Servers)
|
||||||
m.state = Connected
|
m.state = Connected
|
||||||
|
m.sem = make(semaphore, m.MaxUndeliveredMessages)
|
||||||
|
m.messages = make(map[telegraf.TrackingID]bool)
|
||||||
|
|
||||||
// Only subscribe on first connection when using persistent sessions. On
|
// Only subscribe on first connection when using persistent sessions. On
|
||||||
// subsequent connections the subscriptions should be stored in the
|
// subsequent connections the subscriptions should be stored in the
|
||||||
|
@ -172,38 +198,64 @@ func (m *MQTTConsumer) connect() error {
|
||||||
|
|
||||||
func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) {
|
func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) {
|
||||||
m.acc.AddError(fmt.Errorf("connection lost: %v", err))
|
m.acc.AddError(fmt.Errorf("connection lost: %v", err))
|
||||||
log.Printf("D! [inputs.mqtt_consumer]: disconnected %v", m.Servers)
|
log.Printf("D! [inputs.mqtt_consumer] Disconnected %v", m.Servers)
|
||||||
m.state = Disconnected
|
m.state = Disconnected
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MQTTConsumer) recvMessage(c mqtt.Client, msg mqtt.Message) {
|
func (m *MQTTConsumer) recvMessage(c mqtt.Client, msg mqtt.Message) {
|
||||||
topic := msg.Topic()
|
for {
|
||||||
metrics, err := m.parser.Parse(msg.Payload())
|
select {
|
||||||
|
case track := <-m.acc.Delivered():
|
||||||
|
_, ok := m.messages[track.ID()]
|
||||||
|
if !ok {
|
||||||
|
// Added by a previous connection
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
<-m.sem
|
||||||
|
// No ack, MQTT does not support durable handling
|
||||||
|
delete(m.messages, track.ID())
|
||||||
|
case m.sem <- empty{}:
|
||||||
|
err := m.onMessage(m.acc, msg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.acc.AddError(err)
|
m.acc.AddError(err)
|
||||||
|
<-m.sem
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MQTTConsumer) onMessage(acc telegraf.TrackingAccumulator, msg mqtt.Message) error {
|
||||||
|
metrics, err := m.parser.Parse(msg.Payload())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
topic := msg.Topic()
|
||||||
for _, metric := range metrics {
|
for _, metric := range metrics {
|
||||||
tags := metric.Tags()
|
metric.AddTag("topic", topic)
|
||||||
tags["topic"] = topic
|
|
||||||
m.acc.AddFields(metric.Name(), metric.Fields(), tags, metric.Time())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
id := acc.AddTrackingMetricGroup(metrics)
|
||||||
|
m.messages[id] = true
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MQTTConsumer) Stop() {
|
func (m *MQTTConsumer) Stop() {
|
||||||
if m.state == Connected {
|
if m.state == Connected {
|
||||||
log.Printf("D! [inputs.mqtt_consumer]: disconnecting %v", m.Servers)
|
log.Printf("D! [inputs.mqtt_consumer] Disconnecting %v", m.Servers)
|
||||||
m.client.Disconnect(200)
|
m.client.Disconnect(200)
|
||||||
log.Printf("D! [inputs.mqtt_consumer]: disconnected %v", m.Servers)
|
log.Printf("D! [inputs.mqtt_consumer] Disconnected %v", m.Servers)
|
||||||
m.state = Disconnected
|
m.state = Disconnected
|
||||||
}
|
}
|
||||||
|
m.cancel()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error {
|
func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error {
|
||||||
if m.state == Disconnected {
|
if m.state == Disconnected {
|
||||||
m.state = Connecting
|
m.state = Connecting
|
||||||
log.Printf("D! [inputs.mqtt_consumer]: connecting %v", m.Servers)
|
log.Printf("D! [inputs.mqtt_consumer] Connecting %v", m.Servers)
|
||||||
m.connect()
|
m.connect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,7 +298,7 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
|
||||||
for _, server := range m.Servers {
|
for _, server := range m.Servers {
|
||||||
// Preserve support for host:port style servers; deprecated in Telegraf 1.4.4
|
// Preserve support for host:port style servers; deprecated in Telegraf 1.4.4
|
||||||
if !strings.Contains(server, "://") {
|
if !strings.Contains(server, "://") {
|
||||||
log.Printf("W! [inputs.mqtt_consumer] server %q should be updated to use `scheme://host:port` format", server)
|
log.Printf("W! [inputs.mqtt_consumer] Server %q should be updated to use `scheme://host:port` format", server)
|
||||||
if tlsCfg == nil {
|
if tlsCfg == nil {
|
||||||
server = "tcp://" + server
|
server = "tcp://" + server
|
||||||
} else {
|
} else {
|
||||||
|
@ -268,6 +320,7 @@ func init() {
|
||||||
inputs.Add("mqtt_consumer", func() telegraf.Input {
|
inputs.Add("mqtt_consumer", func() telegraf.Input {
|
||||||
return &MQTTConsumer{
|
return &MQTTConsumer{
|
||||||
ConnectionTimeout: defaultConnectionTimeout,
|
ConnectionTimeout: defaultConnectionTimeout,
|
||||||
|
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||||
state: Disconnected,
|
state: Disconnected,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
|
@ -3,12 +3,9 @@ package mqtt_consumer
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf/plugins/parsers"
|
|
||||||
"github.com/influxdata/telegraf/testutil"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
|
|
||||||
"github.com/eclipse/paho.mqtt.golang"
|
"github.com/eclipse/paho.mqtt.golang"
|
||||||
|
"github.com/influxdata/telegraf/testutil"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -71,47 +68,6 @@ func TestPersistentClientIDFail(t *testing.T) {
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRunParser(t *testing.T) {
|
|
||||||
n := newTestMQTTConsumer()
|
|
||||||
acc := testutil.Accumulator{}
|
|
||||||
n.acc = &acc
|
|
||||||
n.parser, _ = parsers.NewInfluxParser()
|
|
||||||
|
|
||||||
n.recvMessage(nil, mqttMsg(testMsg))
|
|
||||||
|
|
||||||
if a := acc.NFields(); a != 1 {
|
|
||||||
t.Errorf("got %v, expected %v", a, 1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test that the parser ignores invalid messages
|
|
||||||
func TestRunParserInvalidMsg(t *testing.T) {
|
|
||||||
n := newTestMQTTConsumer()
|
|
||||||
acc := testutil.Accumulator{}
|
|
||||||
n.acc = &acc
|
|
||||||
n.parser, _ = parsers.NewInfluxParser()
|
|
||||||
|
|
||||||
n.recvMessage(nil, mqttMsg(invalidMsg))
|
|
||||||
|
|
||||||
if a := acc.NFields(); a != 0 {
|
|
||||||
t.Errorf("got %v, expected %v", a, 0)
|
|
||||||
}
|
|
||||||
assert.Len(t, acc.Errors, 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test that the parser parses line format messages into metrics
|
|
||||||
func TestRunParserAndGather(t *testing.T) {
|
|
||||||
n := newTestMQTTConsumer()
|
|
||||||
acc := testutil.Accumulator{}
|
|
||||||
n.acc = &acc
|
|
||||||
n.parser, _ = parsers.NewInfluxParser()
|
|
||||||
|
|
||||||
n.recvMessage(nil, mqttMsg(testMsg))
|
|
||||||
|
|
||||||
acc.AssertContainsFields(t, "cpu_load_short",
|
|
||||||
map[string]interface{}{"value": float64(23422)})
|
|
||||||
}
|
|
||||||
|
|
||||||
func mqttMsg(val string) mqtt.Message {
|
func mqttMsg(val string) mqtt.Message {
|
||||||
return &message{
|
return &message{
|
||||||
topic: "telegraf/unit_test",
|
topic: "telegraf/unit_test",
|
||||||
|
|
|
@ -1,16 +1,14 @@
|
||||||
# NATS Consumer Input Plugin
|
# NATS Consumer Input Plugin
|
||||||
|
|
||||||
The [NATS](http://www.nats.io/about/) consumer plugin reads from
|
The [NATS][nats] consumer plugin reads from the specified NATS subjects and
|
||||||
specified NATS subjects and adds messages to InfluxDB. The plugin expects messages
|
creates metrics using one of the supported [input data formats][].
|
||||||
in the [Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
|
|
||||||
A [Queue Group](http://www.nats.io/documentation/concepts/nats-queueing/)
|
|
||||||
is used when subscribing to subjects so multiple instances of telegraf can read
|
|
||||||
from a NATS cluster in parallel.
|
|
||||||
|
|
||||||
## Configuration
|
A [Queue Group][queue group] is used when subscribing to subjects so multiple
|
||||||
|
instances of telegraf can read from a NATS cluster in parallel.
|
||||||
|
|
||||||
|
### Configuration:
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
# Read metrics from NATS subject(s)
|
|
||||||
[[inputs.nats_consumer]]
|
[[inputs.nats_consumer]]
|
||||||
## urls of NATS servers
|
## urls of NATS servers
|
||||||
servers = ["nats://localhost:4222"]
|
servers = ["nats://localhost:4222"]
|
||||||
|
@ -20,13 +18,29 @@ from a NATS cluster in parallel.
|
||||||
subjects = ["telegraf"]
|
subjects = ["telegraf"]
|
||||||
## name a queue group
|
## name a queue group
|
||||||
queue_group = "telegraf_consumers"
|
queue_group = "telegraf_consumers"
|
||||||
## Maximum number of metrics to buffer between collection intervals
|
|
||||||
metric_buffer = 100000
|
## Sets the limits for pending msgs and bytes for each subscription
|
||||||
|
## These shouldn't need to be adjusted except in very high throughput scenarios
|
||||||
|
# pending_message_limit = 65536
|
||||||
|
# pending_bytes_limit = 67108864
|
||||||
|
|
||||||
|
## Maximum messages to read from the broker that have not been written by an
|
||||||
|
## output. For best throughput set based on the number of metrics within
|
||||||
|
## each message and the size of the output's metric_batch_size.
|
||||||
|
##
|
||||||
|
## For example, if each message from the queue contains 10 metrics and the
|
||||||
|
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||||
|
## full batch is collected and the write is triggered immediately without
|
||||||
|
## waiting until the next flush_interval.
|
||||||
|
# max_undelivered_messages = 1000
|
||||||
|
|
||||||
## Data format to consume.
|
## Data format to consume.
|
||||||
|
|
||||||
## Each data format has its own unique set of configuration options, read
|
## Each data format has its own unique set of configuration options, read
|
||||||
## more about them here:
|
## more about them here:
|
||||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||||
data_format = "influx"
|
data_format = "influx"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
[nats]: https://www.nats.io/about/
|
||||||
|
[input data formats]: /docs/DATA_FORMATS_INPUT.md
|
||||||
|
[queue group]: https://www.nats.io/documentation/concepts/nats-queueing/
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package natsconsumer
|
package natsconsumer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -11,6 +12,13 @@ import (
|
||||||
nats "github.com/nats-io/go-nats"
|
nats "github.com/nats-io/go-nats"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
defaultMaxUndeliveredMessages = 1000
|
||||||
|
)
|
||||||
|
|
||||||
|
type empty struct{}
|
||||||
|
type semaphore chan empty
|
||||||
|
|
||||||
type natsError struct {
|
type natsError struct {
|
||||||
conn *nats.Conn
|
conn *nats.Conn
|
||||||
sub *nats.Subscription
|
sub *nats.Subscription
|
||||||
|
@ -23,48 +31,58 @@ func (e natsError) Error() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
type natsConsumer struct {
|
type natsConsumer struct {
|
||||||
QueueGroup string
|
QueueGroup string `toml:"queue_group"`
|
||||||
Subjects []string
|
Subjects []string `toml:"subjects"`
|
||||||
Servers []string
|
Servers []string `toml:"servers"`
|
||||||
Secure bool
|
Secure bool `toml:"secure"`
|
||||||
|
|
||||||
// Client pending limits:
|
// Client pending limits:
|
||||||
PendingMessageLimit int
|
PendingMessageLimit int `toml:"pending_message_limit"`
|
||||||
PendingBytesLimit int
|
PendingBytesLimit int `toml:"pending_bytes_limit"`
|
||||||
|
|
||||||
|
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||||
|
|
||||||
// Legacy metric buffer support; deprecated in v0.10.3
|
// Legacy metric buffer support; deprecated in v0.10.3
|
||||||
MetricBuffer int
|
MetricBuffer int
|
||||||
|
|
||||||
|
conn *nats.Conn
|
||||||
|
subs []*nats.Subscription
|
||||||
|
|
||||||
parser parsers.Parser
|
parser parsers.Parser
|
||||||
|
|
||||||
sync.Mutex
|
|
||||||
wg sync.WaitGroup
|
|
||||||
Conn *nats.Conn
|
|
||||||
Subs []*nats.Subscription
|
|
||||||
|
|
||||||
// channel for all incoming NATS messages
|
// channel for all incoming NATS messages
|
||||||
in chan *nats.Msg
|
in chan *nats.Msg
|
||||||
// channel for all NATS read errors
|
// channel for all NATS read errors
|
||||||
errs chan error
|
errs chan error
|
||||||
done chan struct{}
|
acc telegraf.TrackingAccumulator
|
||||||
acc telegraf.Accumulator
|
wg sync.WaitGroup
|
||||||
|
cancel context.CancelFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
var sampleConfig = `
|
var sampleConfig = `
|
||||||
## urls of NATS servers
|
## urls of NATS servers
|
||||||
# servers = ["nats://localhost:4222"]
|
servers = ["nats://localhost:4222"]
|
||||||
## Use Transport Layer Security
|
## Use Transport Layer Security
|
||||||
# secure = false
|
secure = false
|
||||||
## subject(s) to consume
|
## subject(s) to consume
|
||||||
# subjects = ["telegraf"]
|
subjects = ["telegraf"]
|
||||||
## name a queue group
|
## name a queue group
|
||||||
# queue_group = "telegraf_consumers"
|
queue_group = "telegraf_consumers"
|
||||||
|
|
||||||
## Sets the limits for pending msgs and bytes for each subscription
|
## Sets the limits for pending msgs and bytes for each subscription
|
||||||
## These shouldn't need to be adjusted except in very high throughput scenarios
|
## These shouldn't need to be adjusted except in very high throughput scenarios
|
||||||
# pending_message_limit = 65536
|
# pending_message_limit = 65536
|
||||||
# pending_bytes_limit = 67108864
|
# pending_bytes_limit = 67108864
|
||||||
|
|
||||||
|
## Maximum messages to read from the broker that have not been written by an
|
||||||
|
## output. For best throughput set based on the number of metrics within
|
||||||
|
## each message and the size of the output's metric_batch_size.
|
||||||
|
##
|
||||||
|
## For example, if each message from the queue contains 10 metrics and the
|
||||||
|
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||||
|
## full batch is collected and the write is triggered immediately without
|
||||||
|
## waiting until the next flush_interval.
|
||||||
|
# max_undelivered_messages = 1000
|
||||||
|
|
||||||
## Data format to consume.
|
## Data format to consume.
|
||||||
## Each data format has its own unique set of configuration options, read
|
## Each data format has its own unique set of configuration options, read
|
||||||
## more about them here:
|
## more about them here:
|
||||||
|
@ -94,10 +112,7 @@ func (n *natsConsumer) natsErrHandler(c *nats.Conn, s *nats.Subscription, e erro
|
||||||
|
|
||||||
// Start the nats consumer. Caller must call *natsConsumer.Stop() to clean up.
|
// Start the nats consumer. Caller must call *natsConsumer.Stop() to clean up.
|
||||||
func (n *natsConsumer) Start(acc telegraf.Accumulator) error {
|
func (n *natsConsumer) Start(acc telegraf.Accumulator) error {
|
||||||
n.Lock()
|
n.acc = acc.WithTracking(n.MaxUndeliveredMessages)
|
||||||
defer n.Unlock()
|
|
||||||
|
|
||||||
n.acc = acc
|
|
||||||
|
|
||||||
var connectErr error
|
var connectErr error
|
||||||
|
|
||||||
|
@ -112,89 +127,106 @@ func (n *natsConsumer) Start(acc telegraf.Accumulator) error {
|
||||||
|
|
||||||
opts.Secure = n.Secure
|
opts.Secure = n.Secure
|
||||||
|
|
||||||
if n.Conn == nil || n.Conn.IsClosed() {
|
if n.conn == nil || n.conn.IsClosed() {
|
||||||
n.Conn, connectErr = opts.Connect()
|
n.conn, connectErr = opts.Connect()
|
||||||
if connectErr != nil {
|
if connectErr != nil {
|
||||||
return connectErr
|
return connectErr
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup message and error channels
|
// Setup message and error channels
|
||||||
n.errs = make(chan error)
|
n.errs = make(chan error)
|
||||||
n.Conn.SetErrorHandler(n.natsErrHandler)
|
n.conn.SetErrorHandler(n.natsErrHandler)
|
||||||
|
|
||||||
n.in = make(chan *nats.Msg, 1000)
|
n.in = make(chan *nats.Msg, 1000)
|
||||||
for _, subj := range n.Subjects {
|
for _, subj := range n.Subjects {
|
||||||
sub, err := n.Conn.QueueSubscribe(subj, n.QueueGroup, func(m *nats.Msg) {
|
sub, err := n.conn.QueueSubscribe(subj, n.QueueGroup, func(m *nats.Msg) {
|
||||||
n.in <- m
|
n.in <- m
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// ensure that the subscription has been processed by the server
|
// ensure that the subscription has been processed by the server
|
||||||
if err = n.Conn.Flush(); err != nil {
|
if err = n.conn.Flush(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// set the subscription pending limits
|
// set the subscription pending limits
|
||||||
if err = sub.SetPendingLimits(n.PendingMessageLimit, n.PendingBytesLimit); err != nil {
|
if err = sub.SetPendingLimits(n.PendingMessageLimit, n.PendingBytesLimit); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
n.Subs = append(n.Subs, sub)
|
n.subs = append(n.subs, sub)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
n.done = make(chan struct{})
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
n.cancel = cancel
|
||||||
|
|
||||||
// Start the message reader
|
// Start the message reader
|
||||||
n.wg.Add(1)
|
n.wg.Add(1)
|
||||||
go n.receiver()
|
go func() {
|
||||||
|
defer n.wg.Done()
|
||||||
|
go n.receiver(ctx)
|
||||||
|
}()
|
||||||
|
|
||||||
log.Printf("I! Started the NATS consumer service, nats: %v, subjects: %v, queue: %v\n",
|
log.Printf("I! Started the NATS consumer service, nats: %v, subjects: %v, queue: %v\n",
|
||||||
n.Conn.ConnectedUrl(), n.Subjects, n.QueueGroup)
|
n.conn.ConnectedUrl(), n.Subjects, n.QueueGroup)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// receiver() reads all incoming messages from NATS, and parses them into
|
// receiver() reads all incoming messages from NATS, and parses them into
|
||||||
// telegraf metrics.
|
// telegraf metrics.
|
||||||
func (n *natsConsumer) receiver() {
|
func (n *natsConsumer) receiver(ctx context.Context) {
|
||||||
defer n.wg.Done()
|
sem := make(semaphore, n.MaxUndeliveredMessages)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-n.done:
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-n.acc.Delivered():
|
||||||
|
<-sem
|
||||||
|
case err := <-n.errs:
|
||||||
|
n.acc.AddError(err)
|
||||||
|
case sem <- empty{}:
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
case err := <-n.errs:
|
case err := <-n.errs:
|
||||||
n.acc.AddError(fmt.Errorf("E! error reading from %s\n", err.Error()))
|
<-sem
|
||||||
|
n.acc.AddError(err)
|
||||||
|
case <-n.acc.Delivered():
|
||||||
|
<-sem
|
||||||
|
<-sem
|
||||||
case msg := <-n.in:
|
case msg := <-n.in:
|
||||||
metrics, err := n.parser.Parse(msg.Data)
|
metrics, err := n.parser.Parse(msg.Data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
n.acc.AddError(fmt.Errorf("E! subject: %s, error: %s", msg.Subject, err.Error()))
|
n.acc.AddError(fmt.Errorf("subject: %s, error: %s", msg.Subject, err.Error()))
|
||||||
|
<-sem
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, metric := range metrics {
|
n.acc.AddTrackingMetricGroup(metrics)
|
||||||
n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *natsConsumer) clean() {
|
func (n *natsConsumer) clean() {
|
||||||
for _, sub := range n.Subs {
|
for _, sub := range n.subs {
|
||||||
if err := sub.Unsubscribe(); err != nil {
|
if err := sub.Unsubscribe(); err != nil {
|
||||||
n.acc.AddError(fmt.Errorf("E! Error unsubscribing from subject %s in queue %s: %s\n",
|
n.acc.AddError(fmt.Errorf("Error unsubscribing from subject %s in queue %s: %s\n",
|
||||||
sub.Subject, sub.Queue, err.Error()))
|
sub.Subject, sub.Queue, err.Error()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if n.Conn != nil && !n.Conn.IsClosed() {
|
if n.conn != nil && !n.conn.IsClosed() {
|
||||||
n.Conn.Close()
|
n.conn.Close()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *natsConsumer) Stop() {
|
func (n *natsConsumer) Stop() {
|
||||||
n.Lock()
|
n.cancel()
|
||||||
close(n.done)
|
|
||||||
n.wg.Wait()
|
n.wg.Wait()
|
||||||
n.clean()
|
n.clean()
|
||||||
n.Unlock()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *natsConsumer) Gather(acc telegraf.Accumulator) error {
|
func (n *natsConsumer) Gather(acc telegraf.Accumulator) error {
|
||||||
|
@ -210,6 +242,7 @@ func init() {
|
||||||
QueueGroup: "telegraf_consumers",
|
QueueGroup: "telegraf_consumers",
|
||||||
PendingBytesLimit: nats.DefaultSubPendingBytesLimit,
|
PendingBytesLimit: nats.DefaultSubPendingBytesLimit,
|
||||||
PendingMessageLimit: nats.DefaultSubPendingMsgsLimit,
|
PendingMessageLimit: nats.DefaultSubPendingMsgsLimit,
|
||||||
|
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,134 +0,0 @@
|
||||||
package natsconsumer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/influxdata/telegraf/plugins/parsers"
|
|
||||||
"github.com/influxdata/telegraf/testutil"
|
|
||||||
nats "github.com/nats-io/go-nats"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
testMsg = "cpu_load_short,host=server01 value=23422.0 1422568543702900257\n"
|
|
||||||
testMsgGraphite = "cpu.load.short.graphite 23422 1454780029"
|
|
||||||
testMsgJSON = "{\"a\": 5, \"b\": {\"c\": 6}}\n"
|
|
||||||
invalidMsg = "cpu_load_short,host=server01 1422568543702900257\n"
|
|
||||||
metricBuffer = 5
|
|
||||||
)
|
|
||||||
|
|
||||||
func newTestNatsConsumer() (*natsConsumer, chan *nats.Msg) {
|
|
||||||
in := make(chan *nats.Msg, metricBuffer)
|
|
||||||
n := &natsConsumer{
|
|
||||||
QueueGroup: "test",
|
|
||||||
Subjects: []string{"telegraf"},
|
|
||||||
Servers: []string{"nats://localhost:4222"},
|
|
||||||
Secure: false,
|
|
||||||
in: in,
|
|
||||||
errs: make(chan error, metricBuffer),
|
|
||||||
done: make(chan struct{}),
|
|
||||||
}
|
|
||||||
return n, in
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test that the parser parses NATS messages into metrics
|
|
||||||
func TestRunParser(t *testing.T) {
|
|
||||||
n, in := newTestNatsConsumer()
|
|
||||||
acc := testutil.Accumulator{}
|
|
||||||
n.acc = &acc
|
|
||||||
defer close(n.done)
|
|
||||||
|
|
||||||
n.parser, _ = parsers.NewInfluxParser()
|
|
||||||
n.wg.Add(1)
|
|
||||||
go n.receiver()
|
|
||||||
in <- natsMsg(testMsg)
|
|
||||||
|
|
||||||
acc.Wait(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test that the parser ignores invalid messages
|
|
||||||
func TestRunParserInvalidMsg(t *testing.T) {
|
|
||||||
n, in := newTestNatsConsumer()
|
|
||||||
acc := testutil.Accumulator{}
|
|
||||||
n.acc = &acc
|
|
||||||
defer close(n.done)
|
|
||||||
|
|
||||||
n.parser, _ = parsers.NewInfluxParser()
|
|
||||||
n.wg.Add(1)
|
|
||||||
go n.receiver()
|
|
||||||
in <- natsMsg(invalidMsg)
|
|
||||||
|
|
||||||
acc.WaitError(1)
|
|
||||||
assert.Contains(t, acc.Errors[0].Error(), "E! subject: telegraf, error: metric parse error")
|
|
||||||
assert.EqualValues(t, 0, acc.NMetrics())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test that the parser parses line format messages into metrics
|
|
||||||
func TestRunParserAndGather(t *testing.T) {
|
|
||||||
n, in := newTestNatsConsumer()
|
|
||||||
acc := testutil.Accumulator{}
|
|
||||||
n.acc = &acc
|
|
||||||
defer close(n.done)
|
|
||||||
|
|
||||||
n.parser, _ = parsers.NewInfluxParser()
|
|
||||||
n.wg.Add(1)
|
|
||||||
go n.receiver()
|
|
||||||
in <- natsMsg(testMsg)
|
|
||||||
|
|
||||||
n.Gather(&acc)
|
|
||||||
|
|
||||||
acc.Wait(1)
|
|
||||||
acc.AssertContainsFields(t, "cpu_load_short",
|
|
||||||
map[string]interface{}{"value": float64(23422)})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test that the parser parses graphite format messages into metrics
|
|
||||||
func TestRunParserAndGatherGraphite(t *testing.T) {
|
|
||||||
n, in := newTestNatsConsumer()
|
|
||||||
acc := testutil.Accumulator{}
|
|
||||||
n.acc = &acc
|
|
||||||
defer close(n.done)
|
|
||||||
|
|
||||||
n.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil)
|
|
||||||
n.wg.Add(1)
|
|
||||||
go n.receiver()
|
|
||||||
in <- natsMsg(testMsgGraphite)
|
|
||||||
|
|
||||||
n.Gather(&acc)
|
|
||||||
|
|
||||||
acc.Wait(1)
|
|
||||||
acc.AssertContainsFields(t, "cpu_load_short_graphite",
|
|
||||||
map[string]interface{}{"value": float64(23422)})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test that the parser parses json format messages into metrics
|
|
||||||
func TestRunParserAndGatherJSON(t *testing.T) {
|
|
||||||
n, in := newTestNatsConsumer()
|
|
||||||
acc := testutil.Accumulator{}
|
|
||||||
n.acc = &acc
|
|
||||||
defer close(n.done)
|
|
||||||
|
|
||||||
n.parser, _ = parsers.NewParser(&parsers.Config{
|
|
||||||
DataFormat: "json",
|
|
||||||
MetricName: "nats_json_test",
|
|
||||||
})
|
|
||||||
n.wg.Add(1)
|
|
||||||
go n.receiver()
|
|
||||||
in <- natsMsg(testMsgJSON)
|
|
||||||
|
|
||||||
n.Gather(&acc)
|
|
||||||
|
|
||||||
acc.Wait(1)
|
|
||||||
acc.AssertContainsFields(t, "nats_json_test",
|
|
||||||
map[string]interface{}{
|
|
||||||
"a": float64(5),
|
|
||||||
"b_c": float64(6),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func natsMsg(val string) *nats.Msg {
|
|
||||||
return &nats.Msg{
|
|
||||||
Subject: "telegraf",
|
|
||||||
Data: []byte(val),
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,9 +1,9 @@
|
||||||
# NSQ Consumer Input Plugin
|
# NSQ Consumer Input Plugin
|
||||||
|
|
||||||
The [NSQ](http://nsq.io/) consumer plugin polls a specified NSQD
|
The [NSQ][nsq] consumer plugin reads from NSQD and creates metrics using one
|
||||||
topic and adds messages to InfluxDB. This plugin allows a message to be in any of the supported `data_format` types.
|
of the supported [input data formats][].
|
||||||
|
|
||||||
## Configuration
|
### Configuration:
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
# Read metrics from NSQD topic(s)
|
# Read metrics from NSQD topic(s)
|
||||||
|
@ -18,6 +18,16 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
|
||||||
channel = "consumer"
|
channel = "consumer"
|
||||||
max_in_flight = 100
|
max_in_flight = 100
|
||||||
|
|
||||||
|
## Maximum messages to read from the broker that have not been written by an
|
||||||
|
## output. For best throughput set based on the number of metrics within
|
||||||
|
## each message and the size of the output's metric_batch_size.
|
||||||
|
##
|
||||||
|
## For example, if each message from the queue contains 10 metrics and the
|
||||||
|
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||||
|
## full batch is collected and the write is triggered immediately without
|
||||||
|
## waiting until the next flush_interval.
|
||||||
|
# max_undelivered_messages = 1000
|
||||||
|
|
||||||
## Data format to consume.
|
## Data format to consume.
|
||||||
## Each data format has its own unique set of configuration options, read
|
## Each data format has its own unique set of configuration options, read
|
||||||
## more about them here:
|
## more about them here:
|
||||||
|
@ -25,5 +35,5 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
|
||||||
data_format = "influx"
|
data_format = "influx"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Testing
|
[nsq]: https://nsq.io
|
||||||
The `nsq_consumer_test` mocks out the interaction with `NSQD`. It requires no outside dependencies.
|
[input data formats]: /docs/DATA_FORMATS_INPUT.md
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
package nsq_consumer
|
package nsq_consumer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"context"
|
||||||
|
"log"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/plugins/inputs"
|
"github.com/influxdata/telegraf/plugins/inputs"
|
||||||
|
@ -9,17 +11,38 @@ import (
|
||||||
nsq "github.com/nsqio/go-nsq"
|
nsq "github.com/nsqio/go-nsq"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultMaxUndeliveredMessages = 1000
|
||||||
|
)
|
||||||
|
|
||||||
|
type empty struct{}
|
||||||
|
type semaphore chan empty
|
||||||
|
|
||||||
|
type logger struct{}
|
||||||
|
|
||||||
|
func (l *logger) Output(calldepth int, s string) error {
|
||||||
|
log.Println("D! [inputs.nsq_consumer] " + s)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
//NSQConsumer represents the configuration of the plugin
|
//NSQConsumer represents the configuration of the plugin
|
||||||
type NSQConsumer struct {
|
type NSQConsumer struct {
|
||||||
Server string
|
Server string `toml:"server"`
|
||||||
Nsqd []string
|
Nsqd []string `toml:"nsqd"`
|
||||||
Nsqlookupd []string
|
Nsqlookupd []string `toml:"nsqlookupd"`
|
||||||
Topic string
|
Topic string `toml:"topic"`
|
||||||
Channel string
|
Channel string `toml:"channel"`
|
||||||
MaxInFlight int
|
MaxInFlight int `toml:"max_in_flight"`
|
||||||
|
|
||||||
|
MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
|
||||||
|
|
||||||
parser parsers.Parser
|
parser parsers.Parser
|
||||||
consumer *nsq.Consumer
|
consumer *nsq.Consumer
|
||||||
acc telegraf.Accumulator
|
|
||||||
|
mu sync.Mutex
|
||||||
|
messages map[telegraf.TrackingID]*nsq.Message
|
||||||
|
wg sync.WaitGroup
|
||||||
|
cancel context.CancelFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
var sampleConfig = `
|
var sampleConfig = `
|
||||||
|
@ -33,6 +56,16 @@ var sampleConfig = `
|
||||||
channel = "consumer"
|
channel = "consumer"
|
||||||
max_in_flight = 100
|
max_in_flight = 100
|
||||||
|
|
||||||
|
## Maximum messages to read from the broker that have not been written by an
|
||||||
|
## output. For best throughput set based on the number of metrics within
|
||||||
|
## each message and the size of the output's metric_batch_size.
|
||||||
|
##
|
||||||
|
## For example, if each message from the queue contains 10 metrics and the
|
||||||
|
## output metric_batch_size is 1000, setting this to 100 will ensure that a
|
||||||
|
## full batch is collected and the write is triggered immediately without
|
||||||
|
## waiting until the next flush_interval.
|
||||||
|
# max_undelivered_messages = 1000
|
||||||
|
|
||||||
## Data format to consume.
|
## Data format to consume.
|
||||||
## Each data format has its own unique set of configuration options, read
|
## Each data format has its own unique set of configuration options, read
|
||||||
## more about them here:
|
## more about them here:
|
||||||
|
@ -40,12 +73,6 @@ var sampleConfig = `
|
||||||
data_format = "influx"
|
data_format = "influx"
|
||||||
`
|
`
|
||||||
|
|
||||||
func init() {
|
|
||||||
inputs.Add("nsq_consumer", func() telegraf.Input {
|
|
||||||
return &NSQConsumer{}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetParser takes the data_format from the config and finds the right parser for that format
|
// SetParser takes the data_format from the config and finds the right parser for that format
|
||||||
func (n *NSQConsumer) SetParser(parser parsers.Parser) {
|
func (n *NSQConsumer) SetParser(parser parsers.Parser) {
|
||||||
n.parser = parser
|
n.parser = parser
|
||||||
|
@ -62,32 +89,88 @@ func (n *NSQConsumer) Description() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start pulls data from nsq
|
// Start pulls data from nsq
|
||||||
func (n *NSQConsumer) Start(acc telegraf.Accumulator) error {
|
func (n *NSQConsumer) Start(ac telegraf.Accumulator) error {
|
||||||
n.acc = acc
|
acc := ac.WithTracking(n.MaxUndeliveredMessages)
|
||||||
|
sem := make(semaphore, n.MaxUndeliveredMessages)
|
||||||
|
n.messages = make(map[telegraf.TrackingID]*nsq.Message, n.MaxUndeliveredMessages)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
n.cancel = cancel
|
||||||
|
|
||||||
n.connect()
|
n.connect()
|
||||||
n.consumer.AddConcurrentHandlers(nsq.HandlerFunc(func(message *nsq.Message) error {
|
n.consumer.SetLogger(&logger{}, nsq.LogLevelInfo)
|
||||||
|
n.consumer.AddHandler(nsq.HandlerFunc(func(message *nsq.Message) error {
|
||||||
metrics, err := n.parser.Parse(message.Body)
|
metrics, err := n.parser.Parse(message.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
acc.AddError(fmt.Errorf("E! NSQConsumer Parse Error\nmessage:%s\nerror:%s", string(message.Body), err.Error()))
|
acc.AddError(err)
|
||||||
return nil
|
// Remove the message from the queue
|
||||||
}
|
|
||||||
for _, metric := range metrics {
|
|
||||||
n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
|
|
||||||
}
|
|
||||||
message.Finish()
|
message.Finish()
|
||||||
return nil
|
return nil
|
||||||
}), n.MaxInFlight)
|
}
|
||||||
|
if len(metrics) == 0 {
|
||||||
|
message.Finish()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case sem <- empty{}:
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
n.mu.Lock()
|
||||||
|
id := acc.AddTrackingMetricGroup(metrics)
|
||||||
|
n.messages[id] = message
|
||||||
|
n.mu.Unlock()
|
||||||
|
message.DisableAutoResponse()
|
||||||
|
return nil
|
||||||
|
}))
|
||||||
|
|
||||||
if len(n.Nsqlookupd) > 0 {
|
if len(n.Nsqlookupd) > 0 {
|
||||||
n.consumer.ConnectToNSQLookupds(n.Nsqlookupd)
|
n.consumer.ConnectToNSQLookupds(n.Nsqlookupd)
|
||||||
}
|
}
|
||||||
n.consumer.ConnectToNSQDs(append(n.Nsqd, n.Server))
|
n.consumer.ConnectToNSQDs(append(n.Nsqd, n.Server))
|
||||||
|
|
||||||
|
n.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer n.wg.Done()
|
||||||
|
n.onDelivery(ctx, acc, sem)
|
||||||
|
}()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (n *NSQConsumer) onDelivery(ctx context.Context, acc telegraf.TrackingAccumulator, sem semaphore) {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case info := <-acc.Delivered():
|
||||||
|
n.mu.Lock()
|
||||||
|
msg, ok := n.messages[info.ID()]
|
||||||
|
if !ok {
|
||||||
|
n.mu.Unlock()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
<-sem
|
||||||
|
delete(n.messages, info.ID())
|
||||||
|
n.mu.Unlock()
|
||||||
|
|
||||||
|
if info.Delivered() {
|
||||||
|
msg.Finish()
|
||||||
|
} else {
|
||||||
|
msg.Requeue(-1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Stop processing messages
|
// Stop processing messages
|
||||||
func (n *NSQConsumer) Stop() {
|
func (n *NSQConsumer) Stop() {
|
||||||
|
n.cancel()
|
||||||
|
n.wg.Wait()
|
||||||
n.consumer.Stop()
|
n.consumer.Stop()
|
||||||
|
<-n.consumer.StopChan
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gather is a noop
|
// Gather is a noop
|
||||||
|
@ -107,3 +190,11 @@ func (n *NSQConsumer) connect() error {
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
inputs.Add("nsq_consumer", func() telegraf.Input {
|
||||||
|
return &NSQConsumer{
|
||||||
|
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
|
@ -40,6 +40,7 @@ func TestReadsMetricsFromNSQ(t *testing.T) {
|
||||||
Topic: "telegraf",
|
Topic: "telegraf",
|
||||||
Channel: "consume",
|
Channel: "consume",
|
||||||
MaxInFlight: 1,
|
MaxInFlight: 1,
|
||||||
|
MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
|
||||||
Nsqd: []string{"127.0.0.1:4155"},
|
Nsqd: []string{"127.0.0.1:4155"},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@ package socket_listener
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"crypto/tls"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
|
@ -9,11 +10,8 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"crypto/tls"
|
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/internal"
|
"github.com/influxdata/telegraf/internal"
|
||||||
tlsint "github.com/influxdata/telegraf/internal/tls"
|
tlsint "github.com/influxdata/telegraf/internal/tls"
|
||||||
|
@ -120,7 +118,7 @@ func (ssl *streamSocketListener) read(c net.Conn) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for _, m := range metrics {
|
for _, m := range metrics {
|
||||||
ssl.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
|
ssl.AddMetric(m)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,7 +154,7 @@ func (psl *packetSocketListener) listen() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for _, m := range metrics {
|
for _, m := range metrics {
|
||||||
psl.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
|
psl.AddMetric(m)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,9 @@ func (d *Discard) Connect() error { return nil }
|
||||||
func (d *Discard) Close() error { return nil }
|
func (d *Discard) Close() error { return nil }
|
||||||
func (d *Discard) SampleConfig() string { return "" }
|
func (d *Discard) SampleConfig() string { return "" }
|
||||||
func (d *Discard) Description() string { return "Send metrics to nowhere at all" }
|
func (d *Discard) Description() string { return "Send metrics to nowhere at all" }
|
||||||
func (d *Discard) Write(metrics []telegraf.Metric) error { return nil }
|
func (d *Discard) Write(metrics []telegraf.Metric) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
outputs.Add("discard", func() telegraf.Output { return &Discard{} })
|
outputs.Add("discard", func() telegraf.Output { return &Discard{} })
|
||||||
|
|
|
@ -144,7 +144,7 @@ func (p *PrometheusClient) auth(h http.Handler) http.Handler {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *PrometheusClient) Start() error {
|
func (p *PrometheusClient) Connect() error {
|
||||||
defaultCollectors := map[string]bool{
|
defaultCollectors := map[string]bool{
|
||||||
"gocollector": true,
|
"gocollector": true,
|
||||||
"process": true,
|
"process": true,
|
||||||
|
@ -200,15 +200,6 @@ func (p *PrometheusClient) Start() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *PrometheusClient) Stop() {
|
|
||||||
// plugin gets cleaned up in Close() already.
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *PrometheusClient) Connect() error {
|
|
||||||
// This service output does not need to make any further connections
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *PrometheusClient) Close() error {
|
func (p *PrometheusClient) Close() error {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
|
@ -600,7 +600,7 @@ func TestPrometheusWritePointEmptyTag(t *testing.T) {
|
||||||
|
|
||||||
pClient, p, err := setupPrometheus()
|
pClient, p, err := setupPrometheus()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
defer pClient.Stop()
|
defer pClient.Close()
|
||||||
|
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
tags := make(map[string]string)
|
tags := make(map[string]string)
|
||||||
|
@ -675,7 +675,7 @@ func setupPrometheus() (*PrometheusClient, *prometheus_input.Prometheus, error)
|
||||||
pTesting = NewClient()
|
pTesting = NewClient()
|
||||||
pTesting.Listen = "localhost:9127"
|
pTesting.Listen = "localhost:9127"
|
||||||
pTesting.Path = "/metrics"
|
pTesting.Path = "/metrics"
|
||||||
err := pTesting.Start()
|
err := pTesting.Connect()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/filter"
|
"github.com/influxdata/telegraf/filter"
|
||||||
"github.com/influxdata/telegraf/internal"
|
"github.com/influxdata/telegraf/internal"
|
||||||
|
"github.com/influxdata/telegraf/metric"
|
||||||
"github.com/influxdata/telegraf/plugins/processors"
|
"github.com/influxdata/telegraf/plugins/processors"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -208,6 +209,11 @@ func (t *TopK) Apply(in ...telegraf.Metric) []telegraf.Metric {
|
||||||
|
|
||||||
// Add the metrics received to our internal cache
|
// Add the metrics received to our internal cache
|
||||||
for _, m := range in {
|
for _, m := range in {
|
||||||
|
// When tracking metrics this plugin could deadlock the input by
|
||||||
|
// holding undelivered metrics while the input waits for metrics to be
|
||||||
|
// delivered. Instead, treat all handled metrics as delivered and
|
||||||
|
// produced metrics as untracked in a similar way to aggregators.
|
||||||
|
m.Drop()
|
||||||
|
|
||||||
// Check if the metric has any of the fields over which we are aggregating
|
// Check if the metric has any of the fields over which we are aggregating
|
||||||
hasField := false
|
hasField := false
|
||||||
|
@ -281,7 +287,6 @@ func (t *TopK) push() []telegraf.Metric {
|
||||||
|
|
||||||
// Create a one dimensional list with the top K metrics of each key
|
// Create a one dimensional list with the top K metrics of each key
|
||||||
for i, ag := range aggregations[0:min(t.K, len(aggregations))] {
|
for i, ag := range aggregations[0:min(t.K, len(aggregations))] {
|
||||||
|
|
||||||
// Check whether of not we need to add fields of tags to the selected metrics
|
// Check whether of not we need to add fields of tags to the selected metrics
|
||||||
if len(t.aggFieldSet) != 0 || len(t.rankFieldSet) != 0 || groupTag != "" {
|
if len(t.aggFieldSet) != 0 || len(t.rankFieldSet) != 0 || groupTag != "" {
|
||||||
for _, m := range t.cache[ag.groupbykey] {
|
for _, m := range t.cache[ag.groupbykey] {
|
||||||
|
@ -311,7 +316,16 @@ func (t *TopK) push() []telegraf.Metric {
|
||||||
|
|
||||||
t.Reset()
|
t.Reset()
|
||||||
|
|
||||||
return ret
|
result := make([]telegraf.Metric, 0, len(ret))
|
||||||
|
for _, m := range ret {
|
||||||
|
copy, err := metric.New(m.Name(), m.Tags(), m.Fields(), m.Time(), m.Type())
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result = append(result, copy)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function that generates the aggregation functions
|
// Function that generates the aggregation functions
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
package topk
|
package topk
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"reflect"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/internal"
|
"github.com/influxdata/telegraf/internal"
|
||||||
|
"github.com/influxdata/telegraf/testutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Key, value pair that represents a telegraf.Metric Field
|
// Key, value pair that represents a telegraf.Metric Field
|
||||||
|
@ -95,7 +95,7 @@ func deepCopy(a []telegraf.Metric) []telegraf.Metric {
|
||||||
|
|
||||||
func belongs(m telegraf.Metric, ms []telegraf.Metric) bool {
|
func belongs(m telegraf.Metric, ms []telegraf.Metric) bool {
|
||||||
for _, i := range ms {
|
for _, i := range ms {
|
||||||
if reflect.DeepEqual(i, m) {
|
if testutil.MetricEqual(i, m) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,6 @@ type Processor interface {
|
||||||
// Description returns a one-sentence description on the Input
|
// Description returns a one-sentence description on the Input
|
||||||
Description() string
|
Description() string
|
||||||
|
|
||||||
// Apply the filter to the given metric
|
// Apply the filter to the given metric.
|
||||||
Apply(in ...Metric) []Metric
|
Apply(in ...Metric) []Metric
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,15 @@ import (
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
lastID uint64
|
||||||
|
)
|
||||||
|
|
||||||
|
func newTrackingID() telegraf.TrackingID {
|
||||||
|
atomic.AddUint64(&lastID, 1)
|
||||||
|
return telegraf.TrackingID(lastID)
|
||||||
|
}
|
||||||
|
|
||||||
// Metric defines a single point measurement
|
// Metric defines a single point measurement
|
||||||
type Metric struct {
|
type Metric struct {
|
||||||
Measurement string
|
Measurement string
|
||||||
|
@ -23,7 +32,7 @@ type Metric struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Metric) String() string {
|
func (p *Metric) String() string {
|
||||||
return fmt.Sprintf("%s %v", p.Measurement, p.Fields)
|
return fmt.Sprintf("%s %v %v", p.Measurement, p.Tags, p.Fields)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Accumulator defines a mocked out accumulator
|
// Accumulator defines a mocked out accumulator
|
||||||
|
@ -36,6 +45,7 @@ type Accumulator struct {
|
||||||
Discard bool
|
Discard bool
|
||||||
Errors []error
|
Errors []error
|
||||||
debug bool
|
debug bool
|
||||||
|
delivered chan telegraf.DeliveryInfo
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *Accumulator) NMetrics() uint64 {
|
func (a *Accumulator) NMetrics() uint64 {
|
||||||
|
@ -154,6 +164,33 @@ func (a *Accumulator) AddHistogram(
|
||||||
a.AddFields(measurement, fields, tags, timestamp...)
|
a.AddFields(measurement, fields, tags, timestamp...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *Accumulator) AddMetric(m telegraf.Metric) {
|
||||||
|
a.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Accumulator) WithTracking(maxTracked int) telegraf.TrackingAccumulator {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Accumulator) AddTrackingMetric(m telegraf.Metric) telegraf.TrackingID {
|
||||||
|
a.AddMetric(m)
|
||||||
|
return newTrackingID()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Accumulator) AddTrackingMetricGroup(group []telegraf.Metric) telegraf.TrackingID {
|
||||||
|
for _, m := range group {
|
||||||
|
a.AddMetric(m)
|
||||||
|
}
|
||||||
|
return newTrackingID()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Accumulator) Delivered() <-chan telegraf.DeliveryInfo {
|
||||||
|
if a.delivered == nil {
|
||||||
|
a.delivered = make(chan telegraf.DeliveryInfo)
|
||||||
|
}
|
||||||
|
return a.delivered
|
||||||
|
}
|
||||||
|
|
||||||
// AddError appends the given error to Accumulator.Errors.
|
// AddError appends the given error to Accumulator.Errors.
|
||||||
func (a *Accumulator) AddError(err error) {
|
func (a *Accumulator) AddError(err error) {
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
|
|
@ -41,6 +41,18 @@ func newMetricDiff(metric telegraf.Metric) *metricDiff {
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func MetricEqual(expected, actual telegraf.Metric) bool {
|
||||||
|
var lhs, rhs *metricDiff
|
||||||
|
if expected != nil {
|
||||||
|
lhs = newMetricDiff(expected)
|
||||||
|
}
|
||||||
|
if actual != nil {
|
||||||
|
rhs = newMetricDiff(actual)
|
||||||
|
}
|
||||||
|
|
||||||
|
return cmp.Equal(lhs, rhs)
|
||||||
|
}
|
||||||
|
|
||||||
func RequireMetricEqual(t *testing.T, expected, actual telegraf.Metric) {
|
func RequireMetricEqual(t *testing.T, expected, actual telegraf.Metric) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
|
@ -60,11 +72,11 @@ func RequireMetricEqual(t *testing.T, expected, actual telegraf.Metric) {
|
||||||
func RequireMetricsEqual(t *testing.T, expected, actual []telegraf.Metric) {
|
func RequireMetricsEqual(t *testing.T, expected, actual []telegraf.Metric) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
lhs := make([]*metricDiff, len(expected))
|
lhs := make([]*metricDiff, 0, len(expected))
|
||||||
for _, m := range expected {
|
for _, m := range expected {
|
||||||
lhs = append(lhs, newMetricDiff(m))
|
lhs = append(lhs, newMetricDiff(m))
|
||||||
}
|
}
|
||||||
rhs := make([]*metricDiff, len(actual))
|
rhs := make([]*metricDiff, 0, len(actual))
|
||||||
for _, m := range actual {
|
for _, m := range actual {
|
||||||
rhs = append(rhs, newMetricDiff(m))
|
rhs = append(rhs, newMetricDiff(m))
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue