Fix aggregator window and shutdown of multiple aggregators (#5644)
This commit is contained in:
@@ -114,21 +114,8 @@ func (ac *accumulator) AddError(err error) {
|
||||
log.Printf("E! [%s]: Error in plugin: %v", ac.maker.Name(), err)
|
||||
}
|
||||
|
||||
func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
|
||||
if precision > 0 {
|
||||
ac.precision = precision
|
||||
return
|
||||
}
|
||||
switch {
|
||||
case interval >= time.Second:
|
||||
ac.precision = time.Second
|
||||
case interval >= time.Millisecond:
|
||||
ac.precision = time.Millisecond
|
||||
case interval >= time.Microsecond:
|
||||
ac.precision = time.Microsecond
|
||||
default:
|
||||
ac.precision = time.Nanosecond
|
||||
}
|
||||
func (ac *accumulator) SetPrecision(precision time.Duration) {
|
||||
ac.precision = precision
|
||||
}
|
||||
|
||||
func (ac *accumulator) getTime(t []time.Time) time.Time {
|
||||
|
||||
@@ -74,7 +74,6 @@ func TestSetPrecision(t *testing.T) {
|
||||
name string
|
||||
unset bool
|
||||
precision time.Duration
|
||||
interval time.Duration
|
||||
timestamp time.Time
|
||||
expected time.Time
|
||||
}{
|
||||
@@ -86,13 +85,13 @@ func TestSetPrecision(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "second interval",
|
||||
interval: time.Second,
|
||||
precision: time.Second,
|
||||
timestamp: time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC),
|
||||
expected: time.Date(2006, time.February, 10, 12, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
name: "microsecond interval",
|
||||
interval: time.Microsecond,
|
||||
precision: time.Microsecond,
|
||||
timestamp: time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC),
|
||||
expected: time.Date(2006, time.February, 10, 12, 0, 0, 82913000, time.UTC),
|
||||
},
|
||||
@@ -109,7 +108,7 @@ func TestSetPrecision(t *testing.T) {
|
||||
|
||||
a := NewAccumulator(&TestMetricMaker{}, metrics)
|
||||
if !tt.unset {
|
||||
a.SetPrecision(tt.precision, tt.interval)
|
||||
a.SetPrecision(tt.precision)
|
||||
}
|
||||
|
||||
a.AddFields("acctest",
|
||||
|
||||
129
agent/agent.go
129
agent/agent.go
@@ -180,8 +180,7 @@ func (a *Agent) Test(ctx context.Context) error {
|
||||
}
|
||||
|
||||
acc := NewAccumulator(input, metricC)
|
||||
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
||||
a.Config.Agent.Interval.Duration)
|
||||
acc.SetPrecision(a.Precision())
|
||||
input.SetDefaultTags(a.Config.Tags)
|
||||
|
||||
// Special instructions for some inputs. cpu, for example, needs to be
|
||||
@@ -189,8 +188,7 @@ func (a *Agent) Test(ctx context.Context) error {
|
||||
switch input.Name() {
|
||||
case "inputs.cpu", "inputs.mongodb", "inputs.procstat":
|
||||
nulAcc := NewAccumulator(input, nulC)
|
||||
nulAcc.SetPrecision(a.Config.Agent.Precision.Duration,
|
||||
a.Config.Agent.Interval.Duration)
|
||||
nulAcc.SetPrecision(a.Precision())
|
||||
if err := input.Input.Gather(nulAcc); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -222,7 +220,6 @@ func (a *Agent) runInputs(
|
||||
var wg sync.WaitGroup
|
||||
for _, input := range a.Config.Inputs {
|
||||
interval := a.Config.Agent.Interval.Duration
|
||||
precision := a.Config.Agent.Precision.Duration
|
||||
jitter := a.Config.Agent.CollectionJitter.Duration
|
||||
|
||||
// Overwrite agent interval if this plugin has its own.
|
||||
@@ -231,7 +228,7 @@ func (a *Agent) runInputs(
|
||||
}
|
||||
|
||||
acc := NewAccumulator(input, dst)
|
||||
acc.SetPrecision(precision, interval)
|
||||
acc.SetPrecision(a.Precision())
|
||||
|
||||
wg.Add(1)
|
||||
go func(input *models.RunningInput) {
|
||||
@@ -339,10 +336,27 @@ func (a *Agent) applyProcessors(m telegraf.Metric) []telegraf.Metric {
|
||||
return metrics
|
||||
}
|
||||
|
||||
// runAggregators triggers the periodic push for Aggregators.
|
||||
func updateWindow(start time.Time, roundInterval bool, period time.Duration) (time.Time, time.Time) {
|
||||
var until time.Time
|
||||
if roundInterval {
|
||||
until = internal.AlignTime(start, period)
|
||||
if until == start {
|
||||
until = internal.AlignTime(start.Add(time.Nanosecond), period)
|
||||
}
|
||||
} else {
|
||||
until = start.Add(period)
|
||||
}
|
||||
|
||||
since := until.Add(-period)
|
||||
|
||||
return since, until
|
||||
}
|
||||
|
||||
// runAggregators adds metrics to the aggregators and triggers their periodic
|
||||
// push call.
|
||||
//
|
||||
// When the context is done a final push will occur and then this function
|
||||
// will return.
|
||||
// Runs until src is closed and all metrics have been processed. Will call
|
||||
// push one final time before returning.
|
||||
func (a *Agent) runAggregators(
|
||||
startTime time.Time,
|
||||
src <-chan telegraf.Metric,
|
||||
@@ -350,6 +364,13 @@ func (a *Agent) runAggregators(
|
||||
) error {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
// Before calling Add, initialize the aggregation window. This ensures
|
||||
// that any metric created after start time will be aggregated.
|
||||
for _, agg := range a.Config.Aggregators {
|
||||
since, until := updateWindow(startTime, a.Config.Agent.RoundInterval, agg.Period())
|
||||
agg.UpdateWindow(since, until)
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
@@ -371,33 +392,29 @@ func (a *Agent) runAggregators(
|
||||
cancel()
|
||||
}()
|
||||
|
||||
precision := a.Config.Agent.Precision.Duration
|
||||
interval := a.Config.Agent.Interval.Duration
|
||||
aggregations := make(chan telegraf.Metric, 100)
|
||||
for _, agg := range a.Config.Aggregators {
|
||||
wg.Add(1)
|
||||
go func(agg *models.RunningAggregator) {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
close(aggregations)
|
||||
}()
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
if a.Config.Agent.RoundInterval {
|
||||
// Aggregators are aligned to the agent interval regardless of
|
||||
// their period.
|
||||
err := internal.SleepContext(ctx, internal.AlignDuration(startTime, interval))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
var aggWg sync.WaitGroup
|
||||
for _, agg := range a.Config.Aggregators {
|
||||
aggWg.Add(1)
|
||||
go func(agg *models.RunningAggregator) {
|
||||
defer aggWg.Done()
|
||||
|
||||
agg.SetPeriodStart(startTime)
|
||||
acc := NewAccumulator(agg, aggregations)
|
||||
acc.SetPrecision(a.Precision())
|
||||
fmt.Println(1)
|
||||
a.push(ctx, agg, acc)
|
||||
fmt.Println(2)
|
||||
}(agg)
|
||||
}
|
||||
|
||||
acc := NewAccumulator(agg, aggregations)
|
||||
acc.SetPrecision(precision, interval)
|
||||
a.push(ctx, agg, acc)
|
||||
}(agg)
|
||||
}
|
||||
aggWg.Wait()
|
||||
fmt.Println(3)
|
||||
close(aggregations)
|
||||
}()
|
||||
|
||||
for metric := range aggregations {
|
||||
metrics := a.applyProcessors(metric)
|
||||
@@ -405,39 +422,42 @@ func (a *Agent) runAggregators(
|
||||
dst <- metric
|
||||
}
|
||||
}
|
||||
fmt.Println(4)
|
||||
|
||||
wg.Wait()
|
||||
fmt.Println(5)
|
||||
return nil
|
||||
}
|
||||
|
||||
// push runs the push for a single aggregator every period. More simple than
|
||||
// the output/input version as timeout should be less likely.... not really
|
||||
// because the output channel can block for now.
|
||||
// push runs the push for a single aggregator every period.
|
||||
func (a *Agent) push(
|
||||
ctx context.Context,
|
||||
aggregator *models.RunningAggregator,
|
||||
acc telegraf.Accumulator,
|
||||
) {
|
||||
ticker := time.NewTicker(aggregator.Period())
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
// Ensures that Push will be called for each period, even if it has
|
||||
// already elapsed before this function is called. This is guaranteed
|
||||
// because so long as only Push updates the EndPeriod. This method
|
||||
// also avoids drift by not using a ticker.
|
||||
until := time.Until(aggregator.EndPeriod())
|
||||
|
||||
select {
|
||||
case <-ticker.C:
|
||||
case <-time.After(until):
|
||||
aggregator.Push(acc)
|
||||
break
|
||||
case <-ctx.Done():
|
||||
aggregator.Push(acc)
|
||||
return
|
||||
}
|
||||
|
||||
aggregator.Push(acc)
|
||||
}
|
||||
}
|
||||
|
||||
// runOutputs triggers the periodic write for Outputs.
|
||||
//
|
||||
// When the context is done, outputs continue to run until their buffer is
|
||||
// closed, afterwich they run flush once more.
|
||||
|
||||
// Runs until src is closed and all metrics have been processed. Will call
|
||||
// Write one final time before returning.
|
||||
func (a *Agent) runOutputs(
|
||||
startTime time.Time,
|
||||
src <-chan telegraf.Metric,
|
||||
@@ -608,7 +628,7 @@ func (a *Agent) startServiceInputs(
|
||||
// Gather() accumulator does apply rounding according to the
|
||||
// precision agent setting.
|
||||
acc := NewAccumulator(input, dst)
|
||||
acc.SetPrecision(time.Nanosecond, 0)
|
||||
acc.SetPrecision(time.Nanosecond)
|
||||
|
||||
err := si.Start(acc)
|
||||
if err != nil {
|
||||
@@ -638,6 +658,27 @@ func (a *Agent) stopServiceInputs() {
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the rounding precision for metrics.
|
||||
func (a *Agent) Precision() time.Duration {
|
||||
precision := a.Config.Agent.Precision.Duration
|
||||
interval := a.Config.Agent.Interval.Duration
|
||||
|
||||
if precision > 0 {
|
||||
return precision
|
||||
}
|
||||
|
||||
switch {
|
||||
case interval >= time.Second:
|
||||
return time.Second
|
||||
case interval >= time.Millisecond:
|
||||
return time.Millisecond
|
||||
case interval >= time.Microsecond:
|
||||
return time.Microsecond
|
||||
default:
|
||||
return time.Nanosecond
|
||||
}
|
||||
}
|
||||
|
||||
// panicRecover displays an error if an input panics.
|
||||
func panicRecover(input *models.RunningInput) {
|
||||
if err := recover(); err != nil {
|
||||
|
||||
@@ -2,15 +2,13 @@ package agent
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf/internal/config"
|
||||
|
||||
// needing to load the plugins
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/all"
|
||||
// needing to load the outputs
|
||||
_ "github.com/influxdata/telegraf/plugins/outputs/all"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAgent_OmitHostname(t *testing.T) {
|
||||
@@ -109,3 +107,62 @@ func TestAgent_LoadOutput(t *testing.T) {
|
||||
a, _ = NewAgent(c)
|
||||
assert.Equal(t, 3, len(a.Config.Outputs))
|
||||
}
|
||||
|
||||
func TestWindow(t *testing.T) {
|
||||
parse := func(s string) time.Time {
|
||||
tm, err := time.Parse(time.RFC3339, s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return tm
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
start time.Time
|
||||
roundInterval bool
|
||||
period time.Duration
|
||||
since time.Time
|
||||
until time.Time
|
||||
}{
|
||||
{
|
||||
name: "round with exact alignment",
|
||||
start: parse("2018-03-27T00:00:00Z"),
|
||||
roundInterval: true,
|
||||
period: 30 * time.Second,
|
||||
since: parse("2018-03-27T00:00:00Z"),
|
||||
until: parse("2018-03-27T00:00:30Z"),
|
||||
},
|
||||
{
|
||||
name: "round with alignment needed",
|
||||
start: parse("2018-03-27T00:00:05Z"),
|
||||
roundInterval: true,
|
||||
period: 30 * time.Second,
|
||||
since: parse("2018-03-27T00:00:00Z"),
|
||||
until: parse("2018-03-27T00:00:30Z"),
|
||||
},
|
||||
{
|
||||
name: "no round with exact alignment",
|
||||
start: parse("2018-03-27T00:00:00Z"),
|
||||
roundInterval: false,
|
||||
period: 30 * time.Second,
|
||||
since: parse("2018-03-27T00:00:00Z"),
|
||||
until: parse("2018-03-27T00:00:30Z"),
|
||||
},
|
||||
{
|
||||
name: "no found with alignment needed",
|
||||
start: parse("2018-03-27T00:00:05Z"),
|
||||
roundInterval: false,
|
||||
period: 30 * time.Second,
|
||||
since: parse("2018-03-27T00:00:05Z"),
|
||||
until: parse("2018-03-27T00:00:35Z"),
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
since, until := updateWindow(tt.start, tt.roundInterval, tt.period)
|
||||
require.Equal(t, tt.since, since, "since")
|
||||
require.Equal(t, tt.until, until, "until")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user