Remove outputs blocking inputs when output is slow (#4938)

This commit is contained in:
Daniel Nelson
2018-11-05 13:34:28 -08:00
committed by GitHub
parent 74667cd681
commit 6e5c2f8bb6
59 changed files with 3615 additions and 2189 deletions

214
internal/models/buffer.go Normal file
View File

@@ -0,0 +1,214 @@
package models
import (
"sync"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/selfstat"
)
var (
AgentMetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
AgentMetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
)
// Buffer stores metrics in a circular buffer.
type Buffer struct {
sync.Mutex
buf []telegraf.Metric
first int // index of the first/oldest metric
last int // one after the index of the last/newest metric
size int // number of metrics currently in the buffer
cap int // the capacity of the buffer
batchFirst int // index of the first metric in the batch
batchLast int // one after the index of the last metric in the batch
batchSize int // number of metrics current in the batch
MetricsAdded selfstat.Stat
MetricsWritten selfstat.Stat
MetricsDropped selfstat.Stat
}
// NewBuffer returns a new empty Buffer with the given capacity.
func NewBuffer(name string, capacity int) *Buffer {
b := &Buffer{
buf: make([]telegraf.Metric, capacity),
first: 0,
last: 0,
size: 0,
cap: capacity,
MetricsAdded: selfstat.Register(
"write",
"metrics_added",
map[string]string{"output": name},
),
MetricsWritten: selfstat.Register(
"write",
"metrics_written",
map[string]string{"output": name},
),
MetricsDropped: selfstat.Register(
"write",
"metrics_dropped",
map[string]string{"output": name},
),
}
return b
}
// Len returns the number of metrics currently in the buffer.
func (b *Buffer) Len() int {
b.Lock()
defer b.Unlock()
return b.size
}
func (b *Buffer) metricAdded() {
b.MetricsAdded.Incr(1)
}
func (b *Buffer) metricWritten(metric telegraf.Metric) {
AgentMetricsWritten.Incr(1)
b.MetricsWritten.Incr(1)
metric.Accept()
}
func (b *Buffer) metricDropped(metric telegraf.Metric) {
AgentMetricsDropped.Incr(1)
b.MetricsDropped.Incr(1)
metric.Reject()
}
func (b *Buffer) inBatch() bool {
if b.batchSize == 0 {
return false
}
if b.batchFirst < b.batchLast {
return b.last >= b.batchFirst && b.last < b.batchLast
} else {
return b.last >= b.batchFirst || b.last < b.batchLast
}
}
func (b *Buffer) add(m telegraf.Metric) {
// Check if Buffer is full
if b.size == b.cap {
if b.batchSize == 0 {
// No batch taken by the output, we can drop the metric now.
b.metricDropped(b.buf[b.last])
} else if b.inBatch() {
// There is an outstanding batch and this will overwrite a metric
// in it, delay the dropping only in case the batch gets rejected.
b.batchSize--
b.batchFirst++
b.batchFirst %= b.cap
} else {
// There is an outstanding batch, but this overwrites a metric
// outside of it.
b.metricDropped(b.buf[b.last])
}
}
b.metricAdded()
b.buf[b.last] = m
b.last++
b.last %= b.cap
if b.size == b.cap {
b.first++
b.first %= b.cap
}
b.size = min(b.size+1, b.cap)
}
// Add adds metrics to the buffer
func (b *Buffer) Add(metrics ...telegraf.Metric) {
b.Lock()
defer b.Unlock()
for i := range metrics {
b.add(metrics[i])
}
}
// Batch returns a slice containing up to batchSize of the most recently added
// metrics.
//
// The metrics contained in the batch are not removed from the buffer, instead
// the last batch is recorded and removed only if Accept is called.
func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
b.Lock()
defer b.Unlock()
outLen := min(b.size, batchSize)
out := make([]telegraf.Metric, outLen)
if outLen == 0 {
return out
}
b.batchFirst = b.first
b.batchLast = b.first + outLen
b.batchLast %= b.cap
b.batchSize = outLen
until := min(b.cap, b.first+outLen)
n := copy(out, b.buf[b.first:until])
if n < outLen {
copy(out[n:], b.buf[:outLen-n])
}
return out
}
// Accept removes the metrics contained in the last batch.
func (b *Buffer) Accept(batch []telegraf.Metric) {
b.Lock()
defer b.Unlock()
for _, m := range batch {
b.metricWritten(m)
}
if b.batchSize > 0 {
b.size -= b.batchSize
b.first += b.batchSize
b.first %= b.cap
}
b.resetBatch()
}
// Reject clears the current batch record so that calls to Accept will have no
// effect.
func (b *Buffer) Reject(batch []telegraf.Metric) {
b.Lock()
defer b.Unlock()
if len(batch) > b.batchSize {
// Part or all of the batch was dropped before reject was called.
for _, m := range batch[b.batchSize:] {
b.metricDropped(m)
}
}
b.resetBatch()
}
func (b *Buffer) resetBatch() {
b.batchFirst = 0
b.batchLast = 0
b.batchSize = 0
}
func min(a, b int) int {
if b < a {
return b
}
return a
}

View File

@@ -0,0 +1,385 @@
package models
import (
"testing"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/stretchr/testify/require"
)
type MockMetric struct {
telegraf.Metric
AcceptF func()
RejectF func()
DropF func()
}
func (m *MockMetric) Accept() {
m.AcceptF()
}
func (m *MockMetric) Reject() {
m.RejectF()
}
func (m *MockMetric) Drop() {
m.DropF()
}
func Metric() telegraf.Metric {
m, err := metric.New(
"cpu",
map[string]string{},
map[string]interface{}{
"value": 42.0,
},
time.Unix(0, 0),
)
if err != nil {
panic(err)
}
return m
}
func BenchmarkAddMetrics(b *testing.B) {
buf := NewBuffer("test", 10000)
m := Metric()
for n := 0; n < b.N; n++ {
buf.Add(m)
}
}
func setup(b *Buffer) *Buffer {
b.MetricsAdded.Set(0)
b.MetricsWritten.Set(0)
b.MetricsDropped.Set(0)
return b
}
func TestBuffer_LenEmpty(t *testing.T) {
b := setup(NewBuffer("test", 5))
require.Equal(t, 0, b.Len())
}
func TestBuffer_LenOne(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m)
require.Equal(t, 1, b.Len())
}
func TestBuffer_LenFull(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
require.Equal(t, 5, b.Len())
}
func TestBuffer_LenOverfill(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
setup(b)
b.Add(m, m, m, m, m, m)
require.Equal(t, 5, b.Len())
}
func TestBuffer_BatchLenZero(t *testing.T) {
b := setup(NewBuffer("test", 5))
batch := b.Batch(0)
require.Len(t, batch, 0)
}
func TestBuffer_BatchLenBufferEmpty(t *testing.T) {
b := setup(NewBuffer("test", 5))
batch := b.Batch(2)
require.Len(t, batch, 0)
}
func TestBuffer_BatchLenUnderfill(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m)
batch := b.Batch(2)
require.Len(t, batch, 1)
}
func TestBuffer_BatchLenFill(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m)
batch := b.Batch(2)
require.Len(t, batch, 2)
}
func TestBuffer_BatchLenExact(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m)
batch := b.Batch(2)
require.Len(t, batch, 2)
}
func TestBuffer_BatchLenLargerThanBuffer(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(6)
require.Len(t, batch, 5)
}
func TestBuffer_BatchWrap(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(2)
b.Accept(batch)
b.Add(m, m)
batch = b.Batch(5)
require.Len(t, batch, 5)
}
func TestBuffer_AddDropsOverwrittenMetrics(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
b.Add(m, m, m, m, m)
require.Equal(t, int64(5), b.MetricsDropped.Get())
require.Equal(t, int64(0), b.MetricsWritten.Get())
}
func TestBuffer_AcceptRemovesBatch(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m)
batch := b.Batch(2)
b.Accept(batch)
require.Equal(t, 1, b.Len())
}
func TestBuffer_RejectLeavesBatch(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m)
batch := b.Batch(2)
b.Reject(batch)
require.Equal(t, 3, b.Len())
}
func TestBuffer_AcceptWritesOverwrittenBatch(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(5)
b.Add(m, m, m, m, m)
b.Accept(batch)
require.Equal(t, int64(0), b.MetricsDropped.Get())
require.Equal(t, int64(5), b.MetricsWritten.Get())
}
func TestBuffer_BatchRejectDropsOverwrittenBatch(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(5)
b.Add(m, m, m, m, m)
b.Reject(batch)
require.Equal(t, int64(5), b.MetricsDropped.Get())
require.Equal(t, int64(0), b.MetricsWritten.Get())
}
func TestBuffer_MetricsOverwriteBatchAccept(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(3)
b.Add(m, m, m)
b.Accept(batch)
require.Equal(t, int64(0), b.MetricsDropped.Get())
require.Equal(t, int64(3), b.MetricsWritten.Get())
}
func TestBuffer_MetricsOverwriteBatchReject(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(3)
b.Add(m, m, m)
b.Reject(batch)
require.Equal(t, int64(3), b.MetricsDropped.Get())
require.Equal(t, int64(0), b.MetricsWritten.Get())
}
func TestBuffer_MetricsBatchAcceptRemoved(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(3)
b.Add(m, m, m, m, m)
b.Accept(batch)
require.Equal(t, int64(2), b.MetricsDropped.Get())
require.Equal(t, int64(3), b.MetricsWritten.Get())
}
func TestBuffer_WrapWithBatch(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m)
b.Batch(3)
b.Add(m, m, m, m, m, m)
require.Equal(t, int64(1), b.MetricsDropped.Get())
}
func TestBuffer_BatchNotRemoved(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
b.Batch(2)
require.Equal(t, 5, b.Len())
}
func TestBuffer_BatchRejectAcceptNoop(t *testing.T) {
m := Metric()
b := setup(NewBuffer("test", 5))
b.Add(m, m, m, m, m)
batch := b.Batch(2)
b.Reject(batch)
b.Accept(batch)
require.Equal(t, 5, b.Len())
}
func TestBuffer_AcceptCallsMetricAccept(t *testing.T) {
var accept int
mm := &MockMetric{
Metric: Metric(),
AcceptF: func() {
accept++
},
}
b := setup(NewBuffer("test", 5))
b.Add(mm, mm, mm)
batch := b.Batch(2)
b.Accept(batch)
require.Equal(t, 2, accept)
}
func TestBuffer_AddCallsMetricRejectWhenNoBatch(t *testing.T) {
var reject int
mm := &MockMetric{
Metric: Metric(),
RejectF: func() {
reject++
},
}
b := setup(NewBuffer("test", 5))
setup(b)
b.Add(mm, mm, mm, mm, mm)
b.Add(mm, mm)
require.Equal(t, 2, reject)
}
func TestBuffer_AddCallsMetricRejectWhenNotInBatch(t *testing.T) {
var reject int
mm := &MockMetric{
Metric: Metric(),
RejectF: func() {
reject++
},
}
b := setup(NewBuffer("test", 5))
setup(b)
b.Add(mm, mm, mm, mm, mm)
batch := b.Batch(2)
b.Add(mm, mm, mm, mm)
// metric[2] and metric[3] rejected
require.Equal(t, 2, reject)
b.Reject(batch)
// metric[1] and metric[2] now rejected
require.Equal(t, 4, reject)
}
func TestBuffer_RejectCallsMetricRejectWithOverwritten(t *testing.T) {
var reject int
mm := &MockMetric{
Metric: Metric(),
RejectF: func() {
reject++
},
}
b := setup(NewBuffer("test", 5))
b.Add(mm, mm, mm, mm, mm)
batch := b.Batch(5)
b.Add(mm, mm)
require.Equal(t, 0, reject)
b.Reject(batch)
require.Equal(t, 2, reject)
}
func TestBuffer_AddOverwriteAndReject(t *testing.T) {
var reject int
mm := &MockMetric{
Metric: Metric(),
RejectF: func() {
reject++
},
}
b := setup(NewBuffer("test", 5))
b.Add(mm, mm, mm, mm, mm)
batch := b.Batch(5)
b.Add(mm, mm, mm, mm, mm)
b.Add(mm, mm, mm, mm, mm)
b.Add(mm, mm, mm, mm, mm)
b.Add(mm, mm, mm, mm, mm)
require.Equal(t, 15, reject)
b.Reject(batch)
require.Equal(t, 20, reject)
}
func TestBuffer_AddOverwriteAndRejectOffset(t *testing.T) {
var reject int
var accept int
mm := &MockMetric{
Metric: Metric(),
RejectF: func() {
reject++
},
AcceptF: func() {
accept++
},
}
b := setup(NewBuffer("test", 5))
b.Add(mm, mm, mm)
b.Add(mm, mm, mm, mm)
require.Equal(t, 2, reject)
batch := b.Batch(5)
b.Add(mm, mm, mm, mm)
require.Equal(t, 2, reject)
b.Add(mm, mm, mm, mm)
require.Equal(t, 5, reject)
b.Add(mm, mm, mm, mm)
require.Equal(t, 9, reject)
b.Add(mm, mm, mm, mm)
require.Equal(t, 13, reject)
b.Accept(batch)
require.Equal(t, 13, reject)
require.Equal(t, 5, accept)
}

View File

@@ -6,6 +6,7 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)
@@ -480,3 +481,45 @@ func TestFilter_FilterTagsPassAndDrop(t *testing.T) {
}
}
func BenchmarkFilter(b *testing.B) {
tests := []struct {
name string
filter Filter
metric telegraf.Metric
}{
{
name: "empty filter",
filter: Filter{},
metric: testutil.MustMetric("cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
},
{
name: "namepass",
filter: Filter{
NamePass: []string{"cpu"},
},
metric: testutil.MustMetric("cpu",
map[string]string{},
map[string]interface{}{
"value": 42,
},
time.Unix(0, 0),
),
},
}
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
require.NoError(b, tt.filter.Compile())
for n := 0; n < b.N; n++ {
tt.filter.Select(tt.metric)
}
})
}
}

View File

@@ -1,30 +1,53 @@
package models
import (
"log"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/selfstat"
)
type RunningAggregator struct {
a telegraf.Aggregator
Config *AggregatorConfig
metrics chan telegraf.Metric
sync.Mutex
Aggregator telegraf.Aggregator
Config *AggregatorConfig
periodStart time.Time
periodEnd time.Time
MetricsPushed selfstat.Stat
MetricsFiltered selfstat.Stat
MetricsDropped selfstat.Stat
PushTime selfstat.Stat
}
func NewRunningAggregator(
a telegraf.Aggregator,
conf *AggregatorConfig,
aggregator telegraf.Aggregator,
config *AggregatorConfig,
) *RunningAggregator {
return &RunningAggregator{
a: a,
Config: conf,
metrics: make(chan telegraf.Metric, 100),
Aggregator: aggregator,
Config: config,
MetricsPushed: selfstat.Register(
"aggregate",
"metrics_pushed",
map[string]string{"aggregator": config.Name},
),
MetricsFiltered: selfstat.Register(
"aggregate",
"metrics_filtered",
map[string]string{"aggregator": config.Name},
),
MetricsDropped: selfstat.Register(
"aggregate",
"metrics_dropped",
map[string]string{"aggregator": config.Name},
),
PushTime: selfstat.Register(
"aggregate",
"push_time_ns",
map[string]string{"aggregator": config.Name},
),
}
}
@@ -46,6 +69,15 @@ func (r *RunningAggregator) Name() string {
return "aggregators." + r.Config.Name
}
func (r *RunningAggregator) Period() time.Duration {
return r.Config.Period
}
func (r *RunningAggregator) SetPeriodStart(start time.Time) {
r.periodStart = start
r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
}
func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
m := makemetric(
metric,
@@ -59,9 +91,21 @@ func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
m.SetAggregate(true)
}
r.MetricsPushed.Incr(1)
return m
}
func (r *RunningAggregator) metricFiltered(metric telegraf.Metric) {
r.MetricsFiltered.Incr(1)
metric.Accept()
}
func (r *RunningAggregator) metricDropped(metric telegraf.Metric) {
r.MetricsDropped.Incr(1)
metric.Accept()
}
// Add a metric to the aggregator and return true if the original metric
// should be dropped.
func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
@@ -74,75 +118,31 @@ func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
return r.Config.DropOriginal
}
r.metrics <- metric
r.Lock()
defer r.Unlock()
if r.periodStart.IsZero() || metric.Time().Before(r.periodStart) || metric.Time().After(r.periodEnd) {
r.metricDropped(metric)
return false
}
r.Aggregator.Add(metric)
return r.Config.DropOriginal
}
func (r *RunningAggregator) add(in telegraf.Metric) {
r.a.Add(in)
func (r *RunningAggregator) Push(acc telegraf.Accumulator) {
r.Lock()
defer r.Unlock()
r.periodStart = r.periodEnd
r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
r.push(acc)
r.Aggregator.Reset()
}
func (r *RunningAggregator) push(acc telegraf.Accumulator) {
r.a.Push(acc)
}
func (r *RunningAggregator) reset() {
r.a.Reset()
}
// Run runs the running aggregator, listens for incoming metrics, and waits
// for period ticks to tell it when to push and reset the aggregator.
func (r *RunningAggregator) Run(
acc telegraf.Accumulator,
shutdown chan struct{},
) {
// The start of the period is truncated to the nearest second.
//
// Every metric then gets it's timestamp checked and is dropped if it
// is not within:
//
// start < t < end + truncation + delay
//
// So if we start at now = 00:00.2 with a 10s period and 0.3s delay:
// now = 00:00.2
// start = 00:00
// truncation = 00:00.2
// end = 00:10
// 1st interval: 00:00 - 00:10.5
// 2nd interval: 00:10 - 00:20.5
// etc.
//
now := time.Now()
r.periodStart = now.Truncate(time.Second)
truncation := now.Sub(r.periodStart)
r.periodEnd = r.periodStart.Add(r.Config.Period)
time.Sleep(r.Config.Delay)
periodT := time.NewTicker(r.Config.Period)
defer periodT.Stop()
for {
select {
case <-shutdown:
if len(r.metrics) > 0 {
// wait until metrics are flushed before exiting
continue
}
return
case m := <-r.metrics:
if m.Time().Before(r.periodStart) ||
m.Time().After(r.periodEnd.Add(truncation).Add(r.Config.Delay)) {
// the metric is outside the current aggregation period, so
// skip it.
log.Printf("D! aggregator: metric \"%s\" is not in the current timewindow, skipping", m.Name())
continue
}
r.add(m)
case <-periodT.C:
r.periodStart = r.periodEnd
r.periodEnd = r.periodStart.Add(r.Config.Period)
r.push(acc)
r.reset()
}
}
start := time.Now()
r.Aggregator.Push(acc)
elapsed := time.Since(start)
r.PushTime.Incr(elapsed.Nanoseconds())
}

View File

@@ -1,16 +1,13 @@
package models
import (
"sync"
"sync/atomic"
"testing"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@@ -23,28 +20,24 @@ func TestAdd(t *testing.T) {
},
Period: time.Millisecond * 500,
})
assert.NoError(t, ra.Config.Filter.Compile())
require.NoError(t, ra.Config.Filter.Compile())
acc := testutil.Accumulator{}
go ra.Run(&acc, make(chan struct{}))
m, err := metric.New("RITest",
now := time.Now()
ra.SetPeriodStart(now)
m := testutil.MustMetric("RITest",
map[string]string{},
map[string]interface{}{
"value": int64(101),
},
time.Now().Add(time.Millisecond*150),
telegraf.Untyped)
require.NoError(t, err)
require.False(t, ra.Add(m))
ra.Push(&acc)
assert.False(t, ra.Add(m))
for {
time.Sleep(time.Millisecond)
if atomic.LoadInt64(&a.sum) > 0 {
break
}
}
assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
require.Equal(t, 1, len(acc.Metrics))
require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
}
func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
@@ -56,50 +49,45 @@ func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
},
Period: time.Millisecond * 500,
})
assert.NoError(t, ra.Config.Filter.Compile())
require.NoError(t, ra.Config.Filter.Compile())
acc := testutil.Accumulator{}
go ra.Run(&acc, make(chan struct{}))
now := time.Now()
ra.SetPeriodStart(now)
m, err := metric.New("RITest",
m := testutil.MustMetric("RITest",
map[string]string{},
map[string]interface{}{
"value": int64(101),
},
time.Now().Add(-time.Hour),
telegraf.Untyped)
require.NoError(t, err)
assert.False(t, ra.Add(m))
now.Add(-time.Hour),
telegraf.Untyped,
)
require.False(t, ra.Add(m))
// metric after current period
m, err = metric.New("RITest",
m = testutil.MustMetric("RITest",
map[string]string{},
map[string]interface{}{
"value": int64(101),
},
time.Now().Add(time.Hour),
telegraf.Untyped)
require.NoError(t, err)
assert.False(t, ra.Add(m))
now.Add(time.Hour),
telegraf.Untyped,
)
require.False(t, ra.Add(m))
// "now" metric
m, err = metric.New("RITest",
m = testutil.MustMetric("RITest",
map[string]string{},
map[string]interface{}{
"value": int64(101),
},
time.Now().Add(time.Millisecond*50),
telegraf.Untyped)
require.NoError(t, err)
assert.False(t, ra.Add(m))
require.False(t, ra.Add(m))
for {
time.Sleep(time.Millisecond)
if atomic.LoadInt64(&a.sum) > 0 {
break
}
}
assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
ra.Push(&acc)
require.Equal(t, 1, len(acc.Metrics))
require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
}
func TestAddAndPushOnePeriod(t *testing.T) {
@@ -111,37 +99,24 @@ func TestAddAndPushOnePeriod(t *testing.T) {
},
Period: time.Millisecond * 500,
})
assert.NoError(t, ra.Config.Filter.Compile())
require.NoError(t, ra.Config.Filter.Compile())
acc := testutil.Accumulator{}
shutdown := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
ra.Run(&acc, shutdown)
}()
now := time.Now()
ra.SetPeriodStart(now)
m, err := metric.New("RITest",
m := testutil.MustMetric("RITest",
map[string]string{},
map[string]interface{}{
"value": int64(101),
},
time.Now().Add(time.Millisecond*100),
telegraf.Untyped)
require.NoError(t, err)
assert.False(t, ra.Add(m))
require.False(t, ra.Add(m))
ra.Push(&acc)
for {
time.Sleep(time.Millisecond)
if acc.NMetrics() > 0 {
break
}
}
acc.AssertContainsFields(t, "TestMetric", map[string]interface{}{"sum": int64(101)})
close(shutdown)
wg.Wait()
}
func TestAddDropOriginal(t *testing.T) {
@@ -152,28 +127,29 @@ func TestAddDropOriginal(t *testing.T) {
},
DropOriginal: true,
})
assert.NoError(t, ra.Config.Filter.Compile())
require.NoError(t, ra.Config.Filter.Compile())
m, err := metric.New("RITest",
now := time.Now()
ra.SetPeriodStart(now)
m := testutil.MustMetric("RITest",
map[string]string{},
map[string]interface{}{
"value": int64(101),
},
time.Now(),
now,
telegraf.Untyped)
require.NoError(t, err)
assert.True(t, ra.Add(m))
require.True(t, ra.Add(m))
// this metric name doesn't match the filter, so Add will return false
m2, err := metric.New("foobar",
m2 := testutil.MustMetric("foobar",
map[string]string{},
map[string]interface{}{
"value": int64(101),
},
time.Now(),
now,
telegraf.Untyped)
require.NoError(t, err)
assert.False(t, ra.Add(m2))
require.False(t, ra.Add(m2))
}
type TestAggregator struct {

View File

@@ -1,11 +1,9 @@
package models
import (
"fmt"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/serializers/influx"
"github.com/influxdata/telegraf/selfstat"
)
@@ -15,16 +13,13 @@ type RunningInput struct {
Input telegraf.Input
Config *InputConfig
trace bool
defaultTags map[string]string
MetricsGathered selfstat.Stat
GatherTime selfstat.Stat
}
func NewRunningInput(
input telegraf.Input,
config *InputConfig,
) *RunningInput {
func NewRunningInput(input telegraf.Input, config *InputConfig) *RunningInput {
return &RunningInput{
Input: input,
Config: config,
@@ -33,6 +28,11 @@ func NewRunningInput(
"metrics_gathered",
map[string]string{"input": config.Name},
),
GatherTime: selfstat.RegisterTiming(
"gather",
"gather_time_ns",
map[string]string{"input": config.Name},
),
}
}
@@ -52,13 +52,19 @@ func (r *RunningInput) Name() string {
return "inputs." + r.Config.Name
}
func (r *RunningInput) metricFiltered(metric telegraf.Metric) {
metric.Drop()
}
func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
if ok := r.Config.Filter.Select(metric); !ok {
r.metricFiltered(metric)
return nil
}
r.Config.Filter.Modify(metric)
if len(metric.FieldList()) == 0 {
r.metricFiltered(metric)
return nil
}
@@ -70,26 +76,17 @@ func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
r.Config.Tags,
r.defaultTags)
if r.trace && m != nil {
s := influx.NewSerializer()
s.SetFieldSortOrder(influx.SortFields)
octets, err := s.Serialize(m)
if err == nil {
fmt.Print("> " + string(octets))
}
}
r.MetricsGathered.Incr(1)
GlobalMetricsGathered.Incr(1)
return m
}
func (r *RunningInput) Trace() bool {
return r.trace
}
func (r *RunningInput) SetTrace(trace bool) {
r.trace = trace
func (r *RunningInput) Gather(acc telegraf.Accumulator) error {
start := time.Now()
err := r.Input.Gather(acc)
elapsed := time.Since(start)
r.GatherTime.Incr(elapsed.Nanoseconds())
return err
}
func (r *RunningInput) SetDefaultTags(tags map[string]string) {

View File

@@ -6,6 +6,7 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -66,17 +67,13 @@ func TestMakeMetricWithPluginTags(t *testing.T) {
},
})
ri.SetTrace(true)
assert.Equal(t, true, ri.Trace())
m, err := metric.New("RITest",
m := testutil.MustMetric("RITest",
map[string]string{},
map[string]interface{}{
"value": int64(101),
},
now,
telegraf.Untyped)
require.NoError(t, err)
m = ri.MakeMetric(m)
expected, err := metric.New("RITest",
@@ -102,8 +99,6 @@ func TestMakeMetricFilteredOut(t *testing.T) {
Filter: Filter{NamePass: []string{"foobar"}},
})
ri.SetTrace(true)
assert.Equal(t, true, ri.Trace())
assert.NoError(t, ri.Config.Filter.Compile())
m, err := metric.New("RITest",
@@ -127,17 +122,13 @@ func TestMakeMetricWithDaemonTags(t *testing.T) {
"foo": "bar",
})
ri.SetTrace(true)
assert.Equal(t, true, ri.Trace())
m, err := metric.New("RITest",
m := testutil.MustMetric("RITest",
map[string]string{},
map[string]interface{}{
"value": int64(101),
},
now,
telegraf.Untyped)
require.NoError(t, err)
m = ri.MakeMetric(m)
expected, err := metric.New("RITest",
map[string]string{

View File

@@ -6,7 +6,6 @@ import (
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/buffer"
"github.com/influxdata/telegraf/selfstat"
)
@@ -18,6 +17,16 @@ const (
DEFAULT_METRIC_BUFFER_LIMIT = 10000
)
// OutputConfig containing name and filter
type OutputConfig struct {
Name string
Filter Filter
FlushInterval time.Duration
MetricBufferLimit int
MetricBatchSize int
}
// RunningOutput contains the output configuration
type RunningOutput struct {
Name string
@@ -27,24 +36,16 @@ type RunningOutput struct {
MetricBatchSize int
MetricsFiltered selfstat.Stat
MetricsWritten selfstat.Stat
BufferSize selfstat.Stat
BufferLimit selfstat.Stat
WriteTime selfstat.Stat
metrics *buffer.Buffer
failMetrics *buffer.Buffer
batch []telegraf.Metric
buffer *Buffer
BatchReady chan time.Time
// Guards against concurrent calls to Add, Push, Reset
aggMutex sync.Mutex
// Guards against concurrent calls to the Output as described in #3009
writeMutex sync.Mutex
}
// OutputConfig containing name and filter
type OutputConfig struct {
Name string
Filter Filter
aggMutex sync.Mutex
batchMutex sync.Mutex
}
func NewRunningOutput(
@@ -54,25 +55,27 @@ func NewRunningOutput(
batchSize int,
bufferLimit int,
) *RunningOutput {
if conf.MetricBufferLimit > 0 {
bufferLimit = conf.MetricBufferLimit
}
if bufferLimit == 0 {
bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
}
if conf.MetricBatchSize > 0 {
batchSize = conf.MetricBatchSize
}
if batchSize == 0 {
batchSize = DEFAULT_METRIC_BATCH_SIZE
}
ro := &RunningOutput{
Name: name,
metrics: buffer.NewBuffer(batchSize),
failMetrics: buffer.NewBuffer(bufferLimit),
batch: make([]telegraf.Metric, 0, batchSize),
buffer: NewBuffer(name, bufferLimit),
BatchReady: make(chan time.Time, 1),
Output: output,
Config: conf,
MetricBufferLimit: bufferLimit,
MetricBatchSize: batchSize,
MetricsWritten: selfstat.Register(
"write",
"metrics_written",
map[string]string{"output": name},
),
MetricsFiltered: selfstat.Register(
"write",
"metrics_filtered",
@@ -94,20 +97,28 @@ func NewRunningOutput(
map[string]string{"output": name},
),
}
ro.BufferLimit.Set(int64(ro.MetricBufferLimit))
return ro
}
// AddMetric adds a metric to the output. This function can also write cached
// points if FlushBufferWhenFull is true.
func (ro *RunningOutput) metricFiltered(metric telegraf.Metric) {
ro.MetricsFiltered.Incr(1)
metric.Drop()
}
// AddMetric adds a metric to the output.
//
// Takes ownership of metric
func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
if ok := ro.Config.Filter.Select(metric); !ok {
ro.MetricsFiltered.Incr(1)
ro.metricFiltered(metric)
return
}
ro.Config.Filter.Modify(metric)
if len(metric.FieldList()) == 0 {
ro.metricFiltered(metric)
return
}
@@ -118,85 +129,98 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
return
}
ro.metrics.Add(metric)
if ro.metrics.Len() == ro.MetricBatchSize {
batch := ro.metrics.Batch(ro.MetricBatchSize)
err := ro.write(batch)
if err != nil {
ro.failMetrics.Add(batch...)
log.Printf("E! Error writing to output [%s]: %v", ro.Name, err)
ro.batchMutex.Lock()
ro.batch = append(ro.batch, metric)
if len(ro.batch) == ro.MetricBatchSize {
ro.addBatchToBuffer()
nBuffer := ro.buffer.Len()
ro.BufferSize.Set(int64(nBuffer))
select {
case ro.BatchReady <- time.Now():
default:
}
}
ro.batchMutex.Unlock()
}
// Write writes all cached points to this output.
// AddBatchToBuffer moves the metrics from the batch into the metric buffer.
func (ro *RunningOutput) addBatchToBuffer() {
ro.buffer.Add(ro.batch...)
ro.batch = ro.batch[:0]
}
// Write writes all metrics to the output, stopping when all have been sent on
// or error.
func (ro *RunningOutput) Write() error {
if output, ok := ro.Output.(telegraf.AggregatingOutput); ok {
ro.aggMutex.Lock()
metrics := output.Push()
ro.metrics.Add(metrics...)
ro.buffer.Add(metrics...)
output.Reset()
ro.aggMutex.Unlock()
}
// add and write can be called concurrently
ro.batchMutex.Lock()
ro.addBatchToBuffer()
ro.batchMutex.Unlock()
nFails, nMetrics := ro.failMetrics.Len(), ro.metrics.Len()
ro.BufferSize.Set(int64(nFails + nMetrics))
log.Printf("D! Output [%s] buffer fullness: %d / %d metrics. ",
ro.Name, nFails+nMetrics, ro.MetricBufferLimit)
var err error
if !ro.failMetrics.IsEmpty() {
// how many batches of failed writes we need to write.
nBatches := nFails/ro.MetricBatchSize + 1
batchSize := ro.MetricBatchSize
nBuffer := ro.buffer.Len()
for i := 0; i < nBatches; i++ {
// If it's the last batch, only grab the metrics that have not had
// a write attempt already (this is primarily to preserve order).
if i == nBatches-1 {
batchSize = nFails % ro.MetricBatchSize
}
batch := ro.failMetrics.Batch(batchSize)
// If we've already failed previous writes, don't bother trying to
// write to this output again. We are not exiting the loop just so
// that we can rotate the metrics to preserve order.
if err == nil {
err = ro.write(batch)
}
if err != nil {
ro.failMetrics.Add(batch...)
}
// Only process the metrics in the buffer now. Metrics added while we are
// writing will be sent on the next call.
nBatches := nBuffer/ro.MetricBatchSize + 1
for i := 0; i < nBatches; i++ {
batch := ro.buffer.Batch(ro.MetricBatchSize)
if len(batch) == 0 {
break
}
}
batch := ro.metrics.Batch(ro.MetricBatchSize)
// see comment above about not trying to write to an already failed output.
// if ro.failMetrics is empty then err will always be nil at this point.
if err == nil {
err = ro.write(batch)
}
if err != nil {
ro.failMetrics.Add(batch...)
return err
err := ro.write(batch)
if err != nil {
ro.buffer.Reject(batch)
return err
}
ro.buffer.Accept(batch)
}
return nil
}
func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
nMetrics := len(metrics)
if nMetrics == 0 {
// WriteBatch writes only the batch metrics to the output.
func (ro *RunningOutput) WriteBatch() error {
batch := ro.buffer.Batch(ro.MetricBatchSize)
if len(batch) == 0 {
return nil
}
ro.writeMutex.Lock()
defer ro.writeMutex.Unlock()
err := ro.write(batch)
if err != nil {
ro.buffer.Reject(batch)
return err
}
ro.buffer.Accept(batch)
return nil
}
func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
start := time.Now()
err := ro.Output.Write(metrics)
elapsed := time.Since(start)
ro.WriteTime.Incr(elapsed.Nanoseconds())
if err == nil {
log.Printf("D! Output [%s] wrote batch of %d metrics in %s\n",
ro.Name, nMetrics, elapsed)
ro.MetricsWritten.Incr(int64(nMetrics))
ro.WriteTime.Incr(elapsed.Nanoseconds())
log.Printf("D! [outputs.%s] wrote batch of %d metrics in %s\n",
ro.Name, len(metrics), elapsed)
}
return err
}
func (ro *RunningOutput) LogBufferStatus() {
nBuffer := ro.buffer.Len()
log.Printf("D! [outputs.%s] buffer fullness: %d / %d metrics. ",
ro.Name, nBuffer, ro.MetricBufferLimit)
}

View File

@@ -231,56 +231,6 @@ func TestRunningOutputDefault(t *testing.T) {
assert.Len(t, m.Metrics(), 10)
}
// Test that running output doesn't flush until it's full when
// FlushBufferWhenFull is set.
func TestRunningOutputFlushWhenFull(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{},
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 6, 10)
// Fill buffer to 1 under limit
for _, metric := range first5 {
ro.AddMetric(metric)
}
// no flush yet
assert.Len(t, m.Metrics(), 0)
// add one more metric
ro.AddMetric(next5[0])
// now it flushed
assert.Len(t, m.Metrics(), 6)
// add one more metric and write it manually
ro.AddMetric(next5[1])
err := ro.Write()
assert.NoError(t, err)
assert.Len(t, m.Metrics(), 7)
}
// Test that running output doesn't flush until it's full when
// FlushBufferWhenFull is set, twice.
func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{},
}
m := &mockOutput{}
ro := NewRunningOutput("test", m, conf, 4, 12)
// Fill buffer past limit twive
for _, metric := range first5 {
ro.AddMetric(metric)
}
for _, metric := range next5 {
ro.AddMetric(metric)
}
// flushed twice
assert.Len(t, m.Metrics(), 8)
}
func TestRunningOutputWriteFail(t *testing.T) {
conf := &OutputConfig{
Filter: Filter{},

View File

@@ -27,6 +27,19 @@ type ProcessorConfig struct {
Filter Filter
}
func (rp *RunningProcessor) metricFiltered(metric telegraf.Metric) {
metric.Drop()
}
func containsMetric(item telegraf.Metric, metrics []telegraf.Metric) bool {
for _, m := range metrics {
if item == m {
return true
}
}
return false
}
func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
rp.Lock()
defer rp.Unlock()
@@ -43,6 +56,7 @@ func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
rp.Config.Filter.Modify(metric)
if len(metric.FieldList()) == 0 {
rp.metricFiltered(metric)
continue
}

View File

@@ -6,7 +6,7 @@ import (
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)
@@ -41,20 +41,6 @@ func TagProcessor(key, value string) *MockProcessor {
}
}
func Metric(
name string,
tags map[string]string,
fields map[string]interface{},
tm time.Time,
tp ...telegraf.ValueType,
) telegraf.Metric {
m, err := metric.New(name, tags, fields, tm, tp...)
if err != nil {
panic(err)
}
return m
}
func TestRunningProcessor_Apply(t *testing.T) {
type args struct {
Processor telegraf.Processor
@@ -76,7 +62,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
},
},
input: []telegraf.Metric{
Metric(
testutil.MustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
@@ -86,7 +72,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
),
},
expected: []telegraf.Metric{
Metric(
testutil.MustMetric(
"cpu",
map[string]string{
"apply": "true",
@@ -109,7 +95,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
},
},
input: []telegraf.Metric{
Metric(
testutil.MustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
@@ -119,7 +105,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
),
},
expected: []telegraf.Metric{
Metric(
testutil.MustMetric(
"cpu",
map[string]string{
"apply": "true",
@@ -142,7 +128,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
},
},
input: []telegraf.Metric{
Metric(
testutil.MustMetric(
"cpu",
map[string]string{},
map[string]interface{}{
@@ -152,7 +138,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
),
},
expected: []telegraf.Metric{
Metric(
testutil.MustMetric(
"cpu",
map[string]string{},
map[string]interface{}{