2016-01-27 21:21:36 +00:00
|
|
|
package agent
|
2015-04-01 16:34:32 +00:00
|
|
|
|
|
|
|
import (
|
2018-11-05 21:34:28 +00:00
|
|
|
"context"
|
2015-05-18 21:10:12 +00:00
|
|
|
"fmt"
|
2015-04-01 16:34:32 +00:00
|
|
|
"log"
|
2020-04-20 17:49:10 +00:00
|
|
|
"os"
|
2016-01-26 08:19:34 +00:00
|
|
|
"runtime"
|
2015-05-20 05:19:32 +00:00
|
|
|
"sync"
|
2015-04-07 16:23:35 +00:00
|
|
|
"time"
|
2015-04-01 16:34:32 +00:00
|
|
|
|
2016-01-27 21:21:36 +00:00
|
|
|
"github.com/influxdata/telegraf"
|
2020-05-04 18:09:10 +00:00
|
|
|
"github.com/influxdata/telegraf/config"
|
2016-05-30 22:24:42 +00:00
|
|
|
"github.com/influxdata/telegraf/internal"
|
2020-05-04 18:09:10 +00:00
|
|
|
"github.com/influxdata/telegraf/models"
|
2018-11-05 21:34:28 +00:00
|
|
|
"github.com/influxdata/telegraf/plugins/serializers/influx"
|
2015-04-01 16:34:32 +00:00
|
|
|
)
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
// Agent runs a set of plugins.
|
2015-04-01 16:34:32 +00:00
|
|
|
type Agent struct {
|
2015-11-24 21:22:11 +00:00
|
|
|
Config *config.Config
|
2015-04-01 16:34:32 +00:00
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
// NewAgent returns an Agent for the given Config.
|
2015-11-24 21:22:11 +00:00
|
|
|
func NewAgent(config *config.Config) (*Agent, error) {
|
2015-11-26 01:42:07 +00:00
|
|
|
a := &Agent{
|
|
|
|
Config: config,
|
2015-09-02 16:30:44 +00:00
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
return a, nil
|
|
|
|
}
|
2015-04-01 16:34:32 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
// Run starts and runs the Agent until the context is done.
|
|
|
|
func (a *Agent) Run(ctx context.Context) error {
|
|
|
|
log.Printf("I! [agent] Config: Interval:%s, Quiet:%#v, Hostname:%#v, "+
|
|
|
|
"Flush Interval:%s",
|
|
|
|
a.Config.Agent.Interval.Duration, a.Config.Agent.Quiet,
|
|
|
|
a.Config.Agent.Hostname, a.Config.Agent.FlushInterval.Duration)
|
2016-03-21 21:33:19 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
if ctx.Err() != nil {
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2015-04-07 16:56:40 +00:00
|
|
|
|
2019-06-14 22:12:27 +00:00
|
|
|
log.Printf("D! [agent] Initializing plugins")
|
|
|
|
err := a.initPlugins()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
log.Printf("D! [agent] Connecting outputs")
|
2019-06-14 22:12:27 +00:00
|
|
|
err = a.connectOutputs(ctx)
|
2018-11-05 21:34:28 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
2015-11-24 01:00:54 +00:00
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
inputC := make(chan telegraf.Metric, 100)
|
|
|
|
procC := make(chan telegraf.Metric, 100)
|
|
|
|
outputC := make(chan telegraf.Metric, 100)
|
2015-04-07 00:24:24 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
startTime := time.Now()
|
2015-10-22 16:17:57 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
log.Printf("D! [agent] Starting service inputs")
|
|
|
|
err = a.startServiceInputs(ctx, inputC)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2015-04-01 16:34:32 +00:00
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
var wg sync.WaitGroup
|
2015-08-12 17:04:25 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
src := inputC
|
|
|
|
dst := inputC
|
2016-01-26 08:19:34 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
wg.Add(1)
|
|
|
|
go func(dst chan telegraf.Metric) {
|
|
|
|
defer wg.Done()
|
2016-01-26 08:19:34 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
err := a.runInputs(ctx, startTime, dst)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("E! [agent] Error running inputs: %v", err)
|
|
|
|
}
|
2016-11-07 08:34:46 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
log.Printf("D! [agent] Stopping service inputs")
|
|
|
|
a.stopServiceInputs()
|
2016-11-07 08:34:46 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
close(dst)
|
|
|
|
log.Printf("D! [agent] Input channel closed")
|
|
|
|
}(dst)
|
2016-05-19 15:36:58 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
src = dst
|
2015-05-20 05:19:32 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
if len(a.Config.Processors) > 0 {
|
|
|
|
dst = procC
|
2015-08-26 23:43:09 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
wg.Add(1)
|
|
|
|
go func(src, dst chan telegraf.Metric) {
|
|
|
|
defer wg.Done()
|
2015-05-20 05:19:32 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
err := a.runProcessors(src, dst)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("E! [agent] Error running processors: %v", err)
|
|
|
|
}
|
|
|
|
close(dst)
|
|
|
|
log.Printf("D! [agent] Processor channel closed")
|
|
|
|
}(src, dst)
|
|
|
|
|
|
|
|
src = dst
|
2015-05-20 05:19:32 +00:00
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
if len(a.Config.Aggregators) > 0 {
|
|
|
|
dst = outputC
|
2016-05-19 15:36:58 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
wg.Add(1)
|
|
|
|
go func(src, dst chan telegraf.Metric) {
|
|
|
|
defer wg.Done()
|
|
|
|
|
|
|
|
err := a.runAggregators(startTime, src, dst)
|
2016-05-19 15:36:58 +00:00
|
|
|
if err != nil {
|
2018-11-05 21:34:28 +00:00
|
|
|
log.Printf("E! [agent] Error running aggregators: %v", err)
|
2016-05-19 15:36:58 +00:00
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
close(dst)
|
|
|
|
log.Printf("D! [agent] Output channel closed")
|
|
|
|
}(src, dst)
|
|
|
|
|
|
|
|
src = dst
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func(src chan telegraf.Metric) {
|
|
|
|
defer wg.Done()
|
|
|
|
|
|
|
|
err := a.runOutputs(startTime, src)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("E! [agent] Error running outputs: %v", err)
|
2016-05-19 15:36:58 +00:00
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
}(src)
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
log.Printf("D! [agent] Closing outputs")
|
2019-03-22 20:59:30 +00:00
|
|
|
a.closeOutputs()
|
2018-11-05 21:34:28 +00:00
|
|
|
|
2018-12-27 02:54:50 +00:00
|
|
|
log.Printf("D! [agent] Stopped Successfully")
|
2018-11-05 21:34:28 +00:00
|
|
|
return nil
|
2016-05-19 15:36:58 +00:00
|
|
|
}
|
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
// Test runs the inputs, processors and aggregators for a single gather and
|
|
|
|
// writes the metrics to stdout.
|
|
|
|
func (a *Agent) Test(ctx context.Context, wait time.Duration) error {
|
|
|
|
outputF := func(src <-chan telegraf.Metric) {
|
2018-11-05 21:34:28 +00:00
|
|
|
s := influx.NewSerializer()
|
|
|
|
s.SetFieldSortOrder(influx.SortFields)
|
2020-06-01 22:26:20 +00:00
|
|
|
|
|
|
|
for metric := range src {
|
2018-11-05 21:34:28 +00:00
|
|
|
octets, err := s.Serialize(metric)
|
|
|
|
if err == nil {
|
|
|
|
fmt.Print("> ", string(octets))
|
2015-10-22 00:32:43 +00:00
|
|
|
}
|
2019-06-14 19:06:25 +00:00
|
|
|
metric.Reject()
|
2015-10-22 00:32:43 +00:00
|
|
|
}
|
2020-06-01 22:26:20 +00:00
|
|
|
}
|
2015-04-07 00:24:24 +00:00
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
err := a.test(ctx, wait, outputF)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if models.GlobalGatherErrors.Get() != 0 {
|
|
|
|
return fmt.Errorf("input plugins recorded %d errors", models.GlobalGatherErrors.Get())
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// Once runs the full agent for a single gather.
|
|
|
|
func (a *Agent) Once(ctx context.Context, wait time.Duration) error {
|
|
|
|
outputF := func(src <-chan telegraf.Metric) {
|
|
|
|
interval := a.Config.Agent.FlushInterval.Duration
|
|
|
|
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
for _, output := range a.Config.Outputs {
|
|
|
|
interval := interval
|
|
|
|
// Overwrite agent flush_interval if this plugin has its own.
|
|
|
|
if output.Config.FlushInterval != 0 {
|
|
|
|
interval = output.Config.FlushInterval
|
|
|
|
}
|
|
|
|
|
|
|
|
jitter := 0 * time.Second
|
|
|
|
|
|
|
|
ticker := NewRollingTicker(interval, jitter)
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func(output *models.RunningOutput) {
|
|
|
|
defer wg.Done()
|
|
|
|
a.flushLoop(ctx, output, ticker)
|
|
|
|
}(output)
|
2017-02-13 10:40:38 +00:00
|
|
|
}
|
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
for metric := range src {
|
|
|
|
for i, output := range a.Config.Outputs {
|
|
|
|
if i == len(a.Config.Outputs)-1 {
|
|
|
|
output.AddMetric(metric)
|
|
|
|
} else {
|
|
|
|
output.AddMetric(metric.Copy())
|
|
|
|
}
|
|
|
|
}
|
2019-06-14 19:06:25 +00:00
|
|
|
}
|
2020-06-01 22:26:20 +00:00
|
|
|
|
|
|
|
cancel()
|
|
|
|
wg.Wait()
|
|
|
|
}
|
|
|
|
|
|
|
|
err := a.test(ctx, wait, outputF)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2019-06-14 19:06:25 +00:00
|
|
|
}
|
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
if models.GlobalGatherErrors.Get() != 0 {
|
|
|
|
return fmt.Errorf("input plugins recorded %d errors", models.GlobalGatherErrors.Get())
|
|
|
|
}
|
|
|
|
|
|
|
|
unsent := 0
|
|
|
|
for _, output := range a.Config.Outputs {
|
|
|
|
unsent += output.BufferLength()
|
|
|
|
}
|
|
|
|
if unsent != 0 {
|
|
|
|
return fmt.Errorf("output plugins unable to send %d metrics", unsent)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test runs the agent and performs a single gather sending output to the
|
|
|
|
// outputF. After gathering pauses for the wait duration to allow service
|
|
|
|
// inputs to run.
|
|
|
|
func (a *Agent) test(ctx context.Context, wait time.Duration, outputF func(<-chan telegraf.Metric)) error {
|
2019-07-26 00:36:46 +00:00
|
|
|
log.Printf("D! [agent] Initializing plugins")
|
|
|
|
err := a.initPlugins()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
log.Printf("D! [agent] Connecting outputs")
|
|
|
|
err = a.connectOutputs(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2019-06-14 19:06:25 +00:00
|
|
|
}
|
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
inputC := make(chan telegraf.Metric, 100)
|
|
|
|
procC := make(chan telegraf.Metric, 100)
|
|
|
|
outputC := make(chan telegraf.Metric, 100)
|
|
|
|
|
|
|
|
startTime := time.Now()
|
2019-06-14 19:06:25 +00:00
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
var wg sync.WaitGroup
|
2019-06-14 19:06:25 +00:00
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
src := inputC
|
|
|
|
dst := inputC
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func(dst chan telegraf.Metric) {
|
|
|
|
defer wg.Done()
|
|
|
|
|
|
|
|
a.testRunInputs(ctx, wait, dst)
|
|
|
|
|
|
|
|
close(dst)
|
|
|
|
log.Printf("D! [agent] Input channel closed")
|
|
|
|
}(dst)
|
|
|
|
|
|
|
|
src = dst
|
|
|
|
|
|
|
|
if len(a.Config.Processors) > 0 {
|
|
|
|
dst = procC
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func(src, dst chan telegraf.Metric) {
|
|
|
|
defer wg.Done()
|
|
|
|
|
|
|
|
err := a.runProcessors(src, dst)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("E! [agent] Error running processors: %v", err)
|
2018-11-15 23:44:36 +00:00
|
|
|
}
|
2020-06-01 22:26:20 +00:00
|
|
|
close(dst)
|
|
|
|
log.Printf("D! [agent] Processor channel closed")
|
|
|
|
}(src, dst)
|
2015-09-21 17:05:58 +00:00
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
src = dst
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(a.Config.Aggregators) > 0 {
|
|
|
|
dst = outputC
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func(src, dst chan telegraf.Metric) {
|
|
|
|
defer wg.Done()
|
|
|
|
|
|
|
|
err := a.runAggregators(startTime, src, dst)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("E! [agent] Error running aggregators: %v", err)
|
2019-06-14 19:06:25 +00:00
|
|
|
}
|
2020-06-01 22:26:20 +00:00
|
|
|
close(dst)
|
|
|
|
log.Printf("D! [agent] Output channel closed")
|
|
|
|
}(src, dst)
|
|
|
|
|
|
|
|
src = dst
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func(src <-chan telegraf.Metric) {
|
|
|
|
defer wg.Done()
|
|
|
|
outputF(src)
|
|
|
|
}(src)
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
log.Printf("D! [agent] Closing outputs")
|
|
|
|
a.closeOutputs()
|
|
|
|
|
|
|
|
log.Printf("D! [agent] Stopped Successfully")
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (a *Agent) testRunInputs(
|
|
|
|
ctx context.Context,
|
|
|
|
wait time.Duration,
|
|
|
|
dst chan<- telegraf.Metric,
|
|
|
|
) {
|
|
|
|
log.Printf("D! [agent] Starting service inputs")
|
|
|
|
for _, input := range a.Config.Inputs {
|
|
|
|
if si, ok := input.Input.(telegraf.ServiceInput); ok {
|
|
|
|
// Service input plugins are not subject to timestamp rounding.
|
|
|
|
// This only applies to the accumulator passed to Start(), the
|
|
|
|
// Gather() accumulator does apply rounding according to the
|
|
|
|
// precision agent setting.
|
|
|
|
acc := NewAccumulator(input, dst)
|
|
|
|
acc.SetPrecision(time.Nanosecond)
|
|
|
|
|
|
|
|
err := si.Start(acc)
|
|
|
|
if err != nil {
|
2019-06-14 19:06:25 +00:00
|
|
|
acc.AddError(err)
|
2020-06-01 22:26:20 +00:00
|
|
|
si.Stop()
|
|
|
|
continue
|
2015-09-23 20:54:22 +00:00
|
|
|
}
|
2015-09-21 17:05:58 +00:00
|
|
|
}
|
2015-09-23 20:54:22 +00:00
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
nul := make(chan telegraf.Metric)
|
|
|
|
go func() {
|
|
|
|
for range nul {
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
for _, input := range a.Config.Inputs {
|
|
|
|
wg.Add(1)
|
|
|
|
go func(input *models.RunningInput) {
|
|
|
|
defer wg.Done()
|
|
|
|
|
|
|
|
// Run plugins that require multiple gathers to calculate rate
|
|
|
|
// and delta metrics twice.
|
|
|
|
switch input.Config.Name {
|
|
|
|
case "cpu", "mongodb", "procstat":
|
|
|
|
nulAcc := NewAccumulator(input, nul)
|
|
|
|
nulAcc.SetPrecision(a.Precision())
|
|
|
|
if err := input.Input.Gather(nulAcc); err != nil {
|
|
|
|
nulAcc.AddError(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
}
|
2019-06-14 19:06:25 +00:00
|
|
|
|
2020-06-01 22:26:20 +00:00
|
|
|
acc := NewAccumulator(input, dst)
|
|
|
|
acc.SetPrecision(a.Precision())
|
|
|
|
|
|
|
|
if err := input.Input.Gather(acc); err != nil {
|
|
|
|
acc.AddError(err)
|
|
|
|
}
|
|
|
|
}(input)
|
2019-08-16 21:44:38 +00:00
|
|
|
}
|
2020-06-01 22:26:20 +00:00
|
|
|
wg.Wait()
|
|
|
|
close(nul)
|
|
|
|
|
|
|
|
internal.SleepContext(ctx, wait)
|
|
|
|
|
|
|
|
log.Printf("D! [agent] Stopping service inputs")
|
|
|
|
a.stopServiceInputs()
|
|
|
|
|
2015-04-07 00:24:24 +00:00
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
// runInputs starts and triggers the periodic gather for Inputs.
|
|
|
|
//
|
|
|
|
// When the context is done the timers are stopped and this function returns
|
|
|
|
// after all ongoing Gather calls complete.
|
|
|
|
func (a *Agent) runInputs(
|
|
|
|
ctx context.Context,
|
|
|
|
startTime time.Time,
|
|
|
|
dst chan<- telegraf.Metric,
|
|
|
|
) error {
|
2015-10-21 20:05:27 +00:00
|
|
|
var wg sync.WaitGroup
|
2018-11-05 21:34:28 +00:00
|
|
|
for _, input := range a.Config.Inputs {
|
|
|
|
interval := a.Config.Agent.Interval.Duration
|
|
|
|
jitter := a.Config.Agent.CollectionJitter.Duration
|
2016-01-22 18:54:12 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
// Overwrite agent interval if this plugin has its own.
|
|
|
|
if input.Config.Interval != 0 {
|
|
|
|
interval = input.Config.Interval
|
|
|
|
}
|
|
|
|
|
2020-05-06 18:59:16 +00:00
|
|
|
var ticker Ticker
|
|
|
|
if a.Config.Agent.RoundInterval {
|
|
|
|
ticker = NewAlignedTicker(startTime, interval, jitter)
|
|
|
|
} else {
|
|
|
|
ticker = NewUnalignedTicker(interval, jitter)
|
|
|
|
}
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
acc := NewAccumulator(input, dst)
|
2019-03-29 22:40:33 +00:00
|
|
|
acc.SetPrecision(a.Precision())
|
2018-11-05 21:34:28 +00:00
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func(input *models.RunningInput) {
|
2016-01-22 18:54:12 +00:00
|
|
|
defer wg.Done()
|
2020-05-06 18:59:16 +00:00
|
|
|
a.gatherLoop(ctx, acc, input, ticker)
|
2018-11-05 21:34:28 +00:00
|
|
|
}(input)
|
|
|
|
}
|
|
|
|
|
2020-05-06 18:59:16 +00:00
|
|
|
wg.Wait()
|
2018-11-05 21:34:28 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// gather runs an input's gather function periodically until the context is
|
|
|
|
// done.
|
2020-05-06 18:59:16 +00:00
|
|
|
func (a *Agent) gatherLoop(
|
2018-11-05 21:34:28 +00:00
|
|
|
ctx context.Context,
|
|
|
|
acc telegraf.Accumulator,
|
|
|
|
input *models.RunningInput,
|
2020-05-06 18:59:16 +00:00
|
|
|
ticker Ticker,
|
2018-11-05 21:34:28 +00:00
|
|
|
) {
|
|
|
|
defer panicRecover(input)
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
2020-05-06 18:59:16 +00:00
|
|
|
case <-ticker.Elapsed():
|
|
|
|
err := a.gatherOnce(acc, input, ticker)
|
|
|
|
if err != nil {
|
|
|
|
acc.AddError(err)
|
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// gatherOnce runs the input's Gather function once, logging a warning each
|
|
|
|
// interval it fails to complete before.
|
|
|
|
func (a *Agent) gatherOnce(
|
|
|
|
acc telegraf.Accumulator,
|
|
|
|
input *models.RunningInput,
|
2020-05-06 18:59:16 +00:00
|
|
|
ticker Ticker,
|
2018-11-05 21:34:28 +00:00
|
|
|
) error {
|
|
|
|
done := make(chan error)
|
|
|
|
go func() {
|
|
|
|
done <- input.Gather(acc)
|
|
|
|
}()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case err := <-done:
|
|
|
|
return err
|
2020-05-06 18:59:16 +00:00
|
|
|
case <-ticker.Elapsed():
|
2019-08-21 23:49:07 +00:00
|
|
|
log.Printf("W! [agent] [%s] did not complete within its interval",
|
|
|
|
input.LogName())
|
2018-11-05 21:34:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// runProcessors applies processors to metrics.
|
|
|
|
func (a *Agent) runProcessors(
|
|
|
|
src <-chan telegraf.Metric,
|
|
|
|
agg chan<- telegraf.Metric,
|
|
|
|
) error {
|
|
|
|
for metric := range src {
|
|
|
|
metrics := a.applyProcessors(metric)
|
|
|
|
|
|
|
|
for _, metric := range metrics {
|
|
|
|
agg <- metric
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// applyProcessors applies all processors to a metric.
|
|
|
|
func (a *Agent) applyProcessors(m telegraf.Metric) []telegraf.Metric {
|
|
|
|
metrics := []telegraf.Metric{m}
|
|
|
|
for _, processor := range a.Config.Processors {
|
|
|
|
metrics = processor.Apply(metrics...)
|
|
|
|
}
|
|
|
|
|
|
|
|
return metrics
|
2015-10-16 22:13:32 +00:00
|
|
|
}
|
|
|
|
|
2019-03-29 22:40:33 +00:00
|
|
|
func updateWindow(start time.Time, roundInterval bool, period time.Duration) (time.Time, time.Time) {
|
|
|
|
var until time.Time
|
|
|
|
if roundInterval {
|
|
|
|
until = internal.AlignTime(start, period)
|
|
|
|
if until == start {
|
|
|
|
until = internal.AlignTime(start.Add(time.Nanosecond), period)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
until = start.Add(period)
|
|
|
|
}
|
|
|
|
|
|
|
|
since := until.Add(-period)
|
|
|
|
|
|
|
|
return since, until
|
|
|
|
}
|
|
|
|
|
|
|
|
// runAggregators adds metrics to the aggregators and triggers their periodic
|
|
|
|
// push call.
|
2018-11-05 21:34:28 +00:00
|
|
|
//
|
2019-03-29 22:40:33 +00:00
|
|
|
// Runs until src is closed and all metrics have been processed. Will call
|
|
|
|
// push one final time before returning.
|
2018-11-05 21:34:28 +00:00
|
|
|
func (a *Agent) runAggregators(
|
|
|
|
startTime time.Time,
|
|
|
|
src <-chan telegraf.Metric,
|
|
|
|
dst chan<- telegraf.Metric,
|
2018-07-12 00:33:27 +00:00
|
|
|
) error {
|
2018-11-05 21:34:28 +00:00
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
|
2019-03-29 22:40:33 +00:00
|
|
|
// Before calling Add, initialize the aggregation window. This ensures
|
|
|
|
// that any metric created after start time will be aggregated.
|
|
|
|
for _, agg := range a.Config.Aggregators {
|
|
|
|
since, until := updateWindow(startTime, a.Config.Agent.RoundInterval, agg.Period())
|
|
|
|
agg.UpdateWindow(since, until)
|
|
|
|
}
|
|
|
|
|
2016-09-08 14:22:10 +00:00
|
|
|
var wg sync.WaitGroup
|
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer wg.Done()
|
2018-11-05 21:34:28 +00:00
|
|
|
for metric := range src {
|
|
|
|
var dropOriginal bool
|
|
|
|
for _, agg := range a.Config.Aggregators {
|
|
|
|
if ok := agg.Add(metric); ok {
|
|
|
|
dropOriginal = true
|
2016-09-08 14:22:10 +00:00
|
|
|
}
|
|
|
|
}
|
2015-10-23 17:23:08 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
if !dropOriginal {
|
|
|
|
dst <- metric
|
2019-03-27 01:01:50 +00:00
|
|
|
} else {
|
|
|
|
metric.Drop()
|
2018-07-12 00:33:27 +00:00
|
|
|
}
|
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
cancel()
|
2018-07-12 00:33:27 +00:00
|
|
|
}()
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
aggregations := make(chan telegraf.Metric, 100)
|
2019-03-29 22:40:33 +00:00
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer wg.Done()
|
2018-11-05 21:34:28 +00:00
|
|
|
|
2019-03-29 22:40:33 +00:00
|
|
|
var aggWg sync.WaitGroup
|
|
|
|
for _, agg := range a.Config.Aggregators {
|
|
|
|
aggWg.Add(1)
|
|
|
|
go func(agg *models.RunningAggregator) {
|
|
|
|
defer aggWg.Done()
|
|
|
|
|
|
|
|
acc := NewAccumulator(agg, aggregations)
|
|
|
|
acc.SetPrecision(a.Precision())
|
|
|
|
a.push(ctx, agg, acc)
|
|
|
|
}(agg)
|
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
|
2019-03-29 22:40:33 +00:00
|
|
|
aggWg.Wait()
|
|
|
|
close(aggregations)
|
|
|
|
}()
|
2018-11-05 21:34:28 +00:00
|
|
|
|
|
|
|
for metric := range aggregations {
|
|
|
|
metrics := a.applyProcessors(metric)
|
|
|
|
for _, metric := range metrics {
|
|
|
|
dst <- metric
|
2017-07-13 22:34:21 +00:00
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-03-29 22:40:33 +00:00
|
|
|
// push runs the push for a single aggregator every period.
|
2018-11-05 21:34:28 +00:00
|
|
|
func (a *Agent) push(
|
|
|
|
ctx context.Context,
|
|
|
|
aggregator *models.RunningAggregator,
|
|
|
|
acc telegraf.Accumulator,
|
|
|
|
) {
|
2015-10-16 22:13:32 +00:00
|
|
|
for {
|
2019-03-29 22:40:33 +00:00
|
|
|
// Ensures that Push will be called for each period, even if it has
|
|
|
|
// already elapsed before this function is called. This is guaranteed
|
|
|
|
// because so long as only Push updates the EndPeriod. This method
|
|
|
|
// also avoids drift by not using a ticker.
|
|
|
|
until := time.Until(aggregator.EndPeriod())
|
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
select {
|
2019-03-29 22:40:33 +00:00
|
|
|
case <-time.After(until):
|
|
|
|
aggregator.Push(acc)
|
2018-11-05 21:34:28 +00:00
|
|
|
break
|
|
|
|
case <-ctx.Done():
|
|
|
|
aggregator.Push(acc)
|
|
|
|
return
|
2015-10-16 22:13:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
// runOutputs triggers the periodic write for Outputs.
|
|
|
|
//
|
2019-03-29 22:40:33 +00:00
|
|
|
|
|
|
|
// Runs until src is closed and all metrics have been processed. Will call
|
|
|
|
// Write one final time before returning.
|
2018-11-05 21:34:28 +00:00
|
|
|
func (a *Agent) runOutputs(
|
|
|
|
startTime time.Time,
|
|
|
|
src <-chan telegraf.Metric,
|
|
|
|
) error {
|
|
|
|
interval := a.Config.Agent.FlushInterval.Duration
|
|
|
|
jitter := a.Config.Agent.FlushJitter.Duration
|
|
|
|
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
|
2015-05-20 05:19:32 +00:00
|
|
|
var wg sync.WaitGroup
|
2018-11-05 21:34:28 +00:00
|
|
|
for _, output := range a.Config.Outputs {
|
|
|
|
interval := interval
|
|
|
|
// Overwrite agent flush_interval if this plugin has its own.
|
|
|
|
if output.Config.FlushInterval != 0 {
|
|
|
|
interval = output.Config.FlushInterval
|
|
|
|
}
|
2015-05-20 05:19:32 +00:00
|
|
|
|
2019-11-13 00:43:39 +00:00
|
|
|
jitter := jitter
|
|
|
|
// Overwrite agent flush_jitter if this plugin has its own.
|
|
|
|
if output.Config.FlushJitter != nil {
|
|
|
|
jitter = *output.Config.FlushJitter
|
|
|
|
}
|
|
|
|
|
2020-05-06 18:59:16 +00:00
|
|
|
ticker := NewRollingTicker(interval, jitter)
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
wg.Add(1)
|
|
|
|
go func(output *models.RunningOutput) {
|
|
|
|
defer wg.Done()
|
2020-05-06 18:59:16 +00:00
|
|
|
a.flushLoop(ctx, output, ticker)
|
2018-11-05 21:34:28 +00:00
|
|
|
}(output)
|
|
|
|
}
|
|
|
|
|
|
|
|
for metric := range src {
|
|
|
|
for i, output := range a.Config.Outputs {
|
|
|
|
if i == len(a.Config.Outputs)-1 {
|
|
|
|
output.AddMetric(metric)
|
|
|
|
} else {
|
|
|
|
output.AddMetric(metric.Copy())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-07-12 00:33:27 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
log.Println("I! [agent] Hang on, flushing any cached metrics before shutdown")
|
|
|
|
cancel()
|
|
|
|
wg.Wait()
|
2018-07-12 00:33:27 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
return nil
|
|
|
|
}
|
2015-10-16 22:13:32 +00:00
|
|
|
|
2020-04-20 17:49:10 +00:00
|
|
|
// flushLoop runs an output's flush function periodically until the context is
|
2018-11-05 21:34:28 +00:00
|
|
|
// done.
|
2020-04-20 17:49:10 +00:00
|
|
|
func (a *Agent) flushLoop(
|
2018-11-05 21:34:28 +00:00
|
|
|
ctx context.Context,
|
|
|
|
output *models.RunningOutput,
|
2020-05-06 18:59:16 +00:00
|
|
|
ticker Ticker,
|
2018-11-05 21:34:28 +00:00
|
|
|
) {
|
|
|
|
logError := func(err error) {
|
|
|
|
if err != nil {
|
2019-08-21 23:49:07 +00:00
|
|
|
log.Printf("E! [agent] Error writing to %s: %v", output.LogName(), err)
|
2018-11-05 21:34:28 +00:00
|
|
|
}
|
2015-10-21 20:05:27 +00:00
|
|
|
}
|
|
|
|
|
2020-04-20 17:49:10 +00:00
|
|
|
// watch for flush requests
|
|
|
|
flushRequested := make(chan os.Signal, 1)
|
|
|
|
watchForFlushSignal(flushRequested)
|
2020-04-21 17:33:47 +00:00
|
|
|
defer stopListeningForFlushSignal(flushRequested)
|
2020-04-20 17:49:10 +00:00
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
for {
|
|
|
|
// Favor shutdown over other methods.
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
2020-05-06 18:59:16 +00:00
|
|
|
logError(a.flushOnce(output, ticker, output.Write))
|
2018-11-05 21:34:28 +00:00
|
|
|
return
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
2020-04-20 17:49:10 +00:00
|
|
|
case <-ctx.Done():
|
2020-05-06 18:59:16 +00:00
|
|
|
logError(a.flushOnce(output, ticker, output.Write))
|
2020-04-20 17:49:10 +00:00
|
|
|
return
|
2020-05-06 18:59:16 +00:00
|
|
|
case <-ticker.Elapsed():
|
|
|
|
logError(a.flushOnce(output, ticker, output.Write))
|
2020-04-20 17:49:10 +00:00
|
|
|
case <-flushRequested:
|
2020-05-06 18:59:16 +00:00
|
|
|
logError(a.flushOnce(output, ticker, output.Write))
|
2018-11-05 21:34:28 +00:00
|
|
|
case <-output.BatchReady:
|
|
|
|
// Favor the ticker over batch ready
|
|
|
|
select {
|
2020-05-06 18:59:16 +00:00
|
|
|
case <-ticker.Elapsed():
|
|
|
|
logError(a.flushOnce(output, ticker, output.Write))
|
2018-11-05 21:34:28 +00:00
|
|
|
default:
|
2020-05-06 18:59:16 +00:00
|
|
|
logError(a.flushOnce(output, ticker, output.WriteBatch))
|
2018-11-05 21:34:28 +00:00
|
|
|
}
|
2015-10-16 22:13:32 +00:00
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// flushOnce runs the output's Write function once, logging a warning each
|
|
|
|
// interval it fails to complete before.
|
|
|
|
func (a *Agent) flushOnce(
|
|
|
|
output *models.RunningOutput,
|
2020-05-06 18:59:16 +00:00
|
|
|
ticker Ticker,
|
2018-11-05 21:34:28 +00:00
|
|
|
writeFunc func() error,
|
|
|
|
) error {
|
|
|
|
done := make(chan error)
|
|
|
|
go func() {
|
|
|
|
done <- writeFunc()
|
2015-10-16 22:13:32 +00:00
|
|
|
}()
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case err := <-done:
|
|
|
|
output.LogBufferStatus()
|
|
|
|
return err
|
2020-05-06 18:59:16 +00:00
|
|
|
case <-ticker.Elapsed():
|
2019-08-21 23:49:07 +00:00
|
|
|
log.Printf("W! [agent] [%q] did not complete within its flush interval",
|
|
|
|
output.LogName())
|
2018-11-05 21:34:28 +00:00
|
|
|
output.LogBufferStatus()
|
|
|
|
}
|
2016-10-10 12:43:47 +00:00
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
}
|
|
|
|
|
2019-06-14 22:12:27 +00:00
|
|
|
// initPlugins runs the Init function on plugins.
|
|
|
|
func (a *Agent) initPlugins() error {
|
|
|
|
for _, input := range a.Config.Inputs {
|
|
|
|
err := input.Init()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("could not initialize input %s: %v",
|
2019-08-21 23:49:07 +00:00
|
|
|
input.LogName(), err)
|
2019-06-14 22:12:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
for _, processor := range a.Config.Processors {
|
|
|
|
err := processor.Init()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("could not initialize processor %s: %v",
|
|
|
|
processor.Config.Name, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for _, aggregator := range a.Config.Aggregators {
|
|
|
|
err := aggregator.Init()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("could not initialize aggregator %s: %v",
|
|
|
|
aggregator.Config.Name, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for _, output := range a.Config.Outputs {
|
|
|
|
err := output.Init()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("could not initialize output %s: %v",
|
|
|
|
output.Config.Name, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
// connectOutputs connects to all outputs.
|
|
|
|
func (a *Agent) connectOutputs(ctx context.Context) error {
|
|
|
|
for _, output := range a.Config.Outputs {
|
2019-08-21 23:49:07 +00:00
|
|
|
log.Printf("D! [agent] Attempting connection to [%s]", output.LogName())
|
2018-11-05 21:34:28 +00:00
|
|
|
err := output.Output.Connect()
|
|
|
|
if err != nil {
|
2019-08-21 23:49:07 +00:00
|
|
|
log.Printf("E! [agent] Failed to connect to [%s], retrying in 15s, "+
|
|
|
|
"error was '%s'", output.LogName(), err)
|
2018-11-05 21:34:28 +00:00
|
|
|
|
|
|
|
err := internal.SleepContext(ctx, 15*time.Second)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
err = output.Output.Connect()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2019-08-21 23:49:07 +00:00
|
|
|
log.Printf("D! [agent] Successfully connected to %s", output.LogName())
|
2018-11-05 21:34:28 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// closeOutputs closes all outputs.
|
2019-03-22 20:59:30 +00:00
|
|
|
func (a *Agent) closeOutputs() {
|
2018-11-05 21:34:28 +00:00
|
|
|
for _, output := range a.Config.Outputs {
|
2019-03-22 20:59:30 +00:00
|
|
|
output.Close()
|
2018-11-05 21:34:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// startServiceInputs starts all service inputs.
|
|
|
|
func (a *Agent) startServiceInputs(
|
|
|
|
ctx context.Context,
|
|
|
|
dst chan<- telegraf.Metric,
|
|
|
|
) error {
|
|
|
|
started := []telegraf.ServiceInput{}
|
|
|
|
|
2018-06-05 23:30:53 +00:00
|
|
|
for _, input := range a.Config.Inputs {
|
2018-11-05 21:34:28 +00:00
|
|
|
if si, ok := input.Input.(telegraf.ServiceInput); ok {
|
|
|
|
// Service input plugins are not subject to timestamp rounding.
|
|
|
|
// This only applies to the accumulator passed to Start(), the
|
|
|
|
// Gather() accumulator does apply rounding according to the
|
|
|
|
// precision agent setting.
|
|
|
|
acc := NewAccumulator(input, dst)
|
2019-03-29 22:40:33 +00:00
|
|
|
acc.SetPrecision(time.Nanosecond)
|
2018-11-05 21:34:28 +00:00
|
|
|
|
|
|
|
err := si.Start(acc)
|
|
|
|
if err != nil {
|
2019-08-21 23:49:07 +00:00
|
|
|
log.Printf("E! [agent] Service for [%s] failed to start: %v",
|
|
|
|
input.LogName(), err)
|
2018-11-05 21:34:28 +00:00
|
|
|
|
|
|
|
for _, si := range started {
|
|
|
|
si.Stop()
|
|
|
|
}
|
|
|
|
|
2018-06-05 23:30:53 +00:00
|
|
|
return err
|
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
|
|
|
|
started = append(started, si)
|
2018-06-05 23:30:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// stopServiceInputs stops all service inputs.
|
|
|
|
func (a *Agent) stopServiceInputs() {
|
2018-07-07 07:54:21 +00:00
|
|
|
for _, input := range a.Config.Inputs {
|
2018-11-05 21:34:28 +00:00
|
|
|
if si, ok := input.Input.(telegraf.ServiceInput); ok {
|
|
|
|
si.Stop()
|
2018-07-07 07:54:21 +00:00
|
|
|
}
|
|
|
|
}
|
2018-11-05 21:34:28 +00:00
|
|
|
}
|
2018-07-07 07:54:21 +00:00
|
|
|
|
2019-03-29 22:40:33 +00:00
|
|
|
// Returns the rounding precision for metrics.
|
|
|
|
func (a *Agent) Precision() time.Duration {
|
|
|
|
precision := a.Config.Agent.Precision.Duration
|
|
|
|
interval := a.Config.Agent.Interval.Duration
|
|
|
|
|
|
|
|
if precision > 0 {
|
|
|
|
return precision
|
|
|
|
}
|
|
|
|
|
|
|
|
switch {
|
|
|
|
case interval >= time.Second:
|
|
|
|
return time.Second
|
|
|
|
case interval >= time.Millisecond:
|
|
|
|
return time.Millisecond
|
|
|
|
case interval >= time.Microsecond:
|
|
|
|
return time.Microsecond
|
|
|
|
default:
|
|
|
|
return time.Nanosecond
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:28 +00:00
|
|
|
// panicRecover displays an error if an input panics.
|
|
|
|
func panicRecover(input *models.RunningInput) {
|
|
|
|
if err := recover(); err != nil {
|
|
|
|
trace := make([]byte, 2048)
|
|
|
|
runtime.Stack(trace, true)
|
2019-08-21 23:49:07 +00:00
|
|
|
log.Printf("E! FATAL: [%s] panicked: %s, Stack:\n%s",
|
|
|
|
input.LogName(), err, trace)
|
2018-11-05 21:34:28 +00:00
|
|
|
log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
|
|
|
|
"stack trace, configuration, and OS information: " +
|
|
|
|
"https://github.com/influxdata/telegraf/issues/new/choose")
|
|
|
|
}
|
2015-04-01 16:34:32 +00:00
|
|
|
}
|