Add grace period for metrics late for aggregation (#6049)
This commit is contained in:
parent
282c8ce096
commit
dc292b73a9
|
@ -325,6 +325,10 @@ Parameters that can be used with any aggregator plugin:
|
||||||
how long for aggregators to wait before receiving metrics from input
|
how long for aggregators to wait before receiving metrics from input
|
||||||
plugins, in the case that aggregators are flushing and inputs are gathering
|
plugins, in the case that aggregators are flushing and inputs are gathering
|
||||||
on the same interval.
|
on the same interval.
|
||||||
|
- **grace**: The duration when the metrics will still be aggregated
|
||||||
|
by the plugin, even though they're outside of the aggregation period. This
|
||||||
|
is needed in a situation when the agent is expected to receive late metrics
|
||||||
|
and it's acceptable to roll them up into next aggregation period.
|
||||||
- **drop_original**: If true, the original metric will be dropped by the
|
- **drop_original**: If true, the original metric will be dropped by the
|
||||||
aggregator and will not get sent to the output plugins.
|
aggregator and will not get sent to the output plugins.
|
||||||
- **name_override**: Override the base name of the measurement. (Default is
|
- **name_override**: Override the base name of the measurement. (Default is
|
||||||
|
|
|
@ -1025,6 +1025,7 @@ func buildAggregator(name string, tbl *ast.Table) (*models.AggregatorConfig, err
|
||||||
Name: name,
|
Name: name,
|
||||||
Delay: time.Millisecond * 100,
|
Delay: time.Millisecond * 100,
|
||||||
Period: time.Second * 30,
|
Period: time.Second * 30,
|
||||||
|
Grace: time.Second * 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
if node, ok := tbl.Fields["period"]; ok {
|
if node, ok := tbl.Fields["period"]; ok {
|
||||||
|
@ -1053,6 +1054,18 @@ func buildAggregator(name string, tbl *ast.Table) (*models.AggregatorConfig, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["grace"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.String); ok {
|
||||||
|
dur, err := time.ParseDuration(str.Value)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
conf.Grace = dur
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if node, ok := tbl.Fields["drop_original"]; ok {
|
if node, ok := tbl.Fields["drop_original"]; ok {
|
||||||
if kv, ok := node.(*ast.KeyValue); ok {
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
if b, ok := kv.Value.(*ast.Boolean); ok {
|
if b, ok := kv.Value.(*ast.Boolean); ok {
|
||||||
|
@ -1100,6 +1113,7 @@ func buildAggregator(name string, tbl *ast.Table) (*models.AggregatorConfig, err
|
||||||
|
|
||||||
delete(tbl.Fields, "period")
|
delete(tbl.Fields, "period")
|
||||||
delete(tbl.Fields, "delay")
|
delete(tbl.Fields, "delay")
|
||||||
|
delete(tbl.Fields, "grace")
|
||||||
delete(tbl.Fields, "drop_original")
|
delete(tbl.Fields, "drop_original")
|
||||||
delete(tbl.Fields, "name_prefix")
|
delete(tbl.Fields, "name_prefix")
|
||||||
delete(tbl.Fields, "name_suffix")
|
delete(tbl.Fields, "name_suffix")
|
||||||
|
|
|
@ -59,6 +59,7 @@ type AggregatorConfig struct {
|
||||||
DropOriginal bool
|
DropOriginal bool
|
||||||
Period time.Duration
|
Period time.Duration
|
||||||
Delay time.Duration
|
Delay time.Duration
|
||||||
|
Grace time.Duration
|
||||||
|
|
||||||
NameOverride string
|
NameOverride string
|
||||||
MeasurementPrefix string
|
MeasurementPrefix string
|
||||||
|
@ -135,9 +136,9 @@ func (r *RunningAggregator) Add(m telegraf.Metric) bool {
|
||||||
r.Lock()
|
r.Lock()
|
||||||
defer r.Unlock()
|
defer r.Unlock()
|
||||||
|
|
||||||
if m.Time().Before(r.periodStart) || m.Time().After(r.periodEnd.Add(r.Config.Delay)) {
|
if m.Time().Before(r.periodStart.Add(-r.Config.Grace)) || m.Time().After(r.periodEnd.Add(r.Config.Delay)) {
|
||||||
log.Printf("D! [%s] metric is outside aggregation window; discarding. %s: m: %s e: %s",
|
log.Printf("D! [%s] metric is outside aggregation window; discarding. %s: m: %s e: %s g: %s",
|
||||||
r.Name(), m.Time(), r.periodStart, r.periodEnd)
|
r.Name(), m.Time(), r.periodStart, r.periodEnd, r.Config.Grace)
|
||||||
r.MetricsDropped.Incr(1)
|
r.MetricsDropped.Incr(1)
|
||||||
return r.Config.DropOriginal
|
return r.Config.DropOriginal
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,6 +89,68 @@ func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
|
||||||
require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
|
require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAddMetricsOutsideCurrentPeriodWithGrace(t *testing.T) {
|
||||||
|
a := &TestAggregator{}
|
||||||
|
ra := NewRunningAggregator(a, &AggregatorConfig{
|
||||||
|
Name: "TestRunningAggregator",
|
||||||
|
Filter: Filter{
|
||||||
|
NamePass: []string{"*"},
|
||||||
|
},
|
||||||
|
Period: time.Millisecond * 1500,
|
||||||
|
Grace: time.Millisecond * 500,
|
||||||
|
})
|
||||||
|
require.NoError(t, ra.Config.Filter.Compile())
|
||||||
|
acc := testutil.Accumulator{}
|
||||||
|
now := time.Now()
|
||||||
|
ra.UpdateWindow(now, now.Add(ra.Config.Period))
|
||||||
|
|
||||||
|
m := testutil.MustMetric("RITest",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(101),
|
||||||
|
},
|
||||||
|
now.Add(-time.Hour),
|
||||||
|
telegraf.Untyped,
|
||||||
|
)
|
||||||
|
require.False(t, ra.Add(m))
|
||||||
|
|
||||||
|
// metric before current period (late)
|
||||||
|
m = testutil.MustMetric("RITest",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(100),
|
||||||
|
},
|
||||||
|
now.Add(-time.Millisecond*1000),
|
||||||
|
telegraf.Untyped,
|
||||||
|
)
|
||||||
|
require.False(t, ra.Add(m))
|
||||||
|
|
||||||
|
// metric before current period, but within grace period (late)
|
||||||
|
m = testutil.MustMetric("RITest",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(102),
|
||||||
|
},
|
||||||
|
now.Add(-time.Millisecond*200),
|
||||||
|
telegraf.Untyped,
|
||||||
|
)
|
||||||
|
require.False(t, ra.Add(m))
|
||||||
|
|
||||||
|
// "now" metric
|
||||||
|
m = testutil.MustMetric("RITest",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": int64(101),
|
||||||
|
},
|
||||||
|
time.Now().Add(time.Millisecond*50),
|
||||||
|
telegraf.Untyped)
|
||||||
|
require.False(t, ra.Add(m))
|
||||||
|
|
||||||
|
ra.Push(&acc)
|
||||||
|
require.Equal(t, 1, len(acc.Metrics))
|
||||||
|
require.Equal(t, int64(203), acc.Metrics[0].Fields["sum"])
|
||||||
|
}
|
||||||
|
|
||||||
func TestAddAndPushOnePeriod(t *testing.T) {
|
func TestAddAndPushOnePeriod(t *testing.T) {
|
||||||
a := &TestAggregator{}
|
a := &TestAggregator{}
|
||||||
ra := NewRunningAggregator(a, &AggregatorConfig{
|
ra := NewRunningAggregator(a, &AggregatorConfig{
|
||||||
|
|
Loading…
Reference in New Issue