Remove outputs blocking inputs when output is slow (#4938)

2018-11-05 13:34:28 -08:00
parent 74667cd681
commit 6e5c2f8bb6
59 changed files with 3615 additions and 2189 deletions
--- a/internal/buffer/buffer.go
+++ b/internal/buffer/buffer.go
@@ -1,130 +0,0 @@
-package buffer
-
-import (
-	"sync"
-
-	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/selfstat"
-)
-
-var (
-	MetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
-	MetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
-)
-
-// Buffer is an object for storing metrics in a circular buffer.
-type Buffer struct {
-	sync.Mutex
-	buf   []telegraf.Metric
-	first int
-	last  int
-	size  int
-	empty bool
-}
-
-// NewBuffer returns a Buffer
-//   size is the maximum number of metrics that Buffer will cache. If Add is
-//   called when the buffer is full, then the oldest metric(s) will be dropped.
-func NewBuffer(size int) *Buffer {
-	return &Buffer{
-		buf:   make([]telegraf.Metric, size),
-		first: 0,
-		last:  0,
-		size:  size,
-		empty: true,
-	}
-}
-
-// IsEmpty returns true if Buffer is empty.
-func (b *Buffer) IsEmpty() bool {
-	return b.empty
-}
-
-// Len returns the current length of the buffer.
-func (b *Buffer) Len() int {
-	if b.empty {
-		return 0
-	} else if b.first <= b.last {
-		return b.last - b.first + 1
-	}
-	// Spans the end of array.
-	// size - gap in the middle
-	return b.size - (b.first - b.last - 1) // size - gap
-}
-
-func (b *Buffer) push(m telegraf.Metric) {
-	// Empty
-	if b.empty {
-		b.last = b.first // Reset
-		b.buf[b.last] = m
-		b.empty = false
-		return
-	}
-
-	b.last++
-	b.last %= b.size
-
-	// Full
-	if b.first == b.last {
-		MetricsDropped.Incr(1)
-		b.first = (b.first + 1) % b.size
-	}
-	b.buf[b.last] = m
-}
-
-// Add adds metrics to the buffer.
-func (b *Buffer) Add(metrics ...telegraf.Metric) {
-	b.Lock()
-	defer b.Unlock()
-	for i := range metrics {
-		MetricsWritten.Incr(1)
-		b.push(metrics[i])
-	}
-}
-
-// Batch returns a batch of metrics of size batchSize.
-// the batch will be of maximum length batchSize. It can be less than batchSize,
-// if the length of Buffer is less than batchSize.
-func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
-	b.Lock()
-	defer b.Unlock()
-	outLen := min(b.Len(), batchSize)
-	out := make([]telegraf.Metric, outLen)
-	if outLen == 0 {
-		return out
-	}
-
-	// We copy everything right of first up to last, count or end
-	// b.last >= rightInd || b.last < b.first
-	// therefore wont copy past b.last
-	rightInd := min(b.size, b.first+outLen) - 1
-
-	copyCount := copy(out, b.buf[b.first:rightInd+1])
-
-	// We've emptied the ring
-	if rightInd == b.last {
-		b.empty = true
-	}
-	b.first = rightInd + 1
-	b.first %= b.size
-
-	// We circle back for the rest
-	if copyCount < outLen {
-		right := min(b.last, outLen-copyCount)
-		copy(out[copyCount:], b.buf[b.first:right+1])
-		// We've emptied the ring
-		if right == b.last {
-			b.empty = true
-		}
-		b.first = right + 1
-		b.first %= b.size
-	}
-	return out
-}
-
-func min(a, b int) int {
-	if b < a {
-		return b
-	}
-	return a
-}
--- a/internal/buffer/buffer_test.go
+++ b/internal/buffer/buffer_test.go
@@ -1,203 +0,0 @@
-package buffer
-
-import (
-	"sync"
-	"sync/atomic"
-	"testing"
-
-	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/testutil"
-
-	"github.com/stretchr/testify/assert"
-)
-
-var metricList = []telegraf.Metric{
-	testutil.TestMetric(2, "mymetric1"),
-	testutil.TestMetric(1, "mymetric2"),
-	testutil.TestMetric(11, "mymetric3"),
-	testutil.TestMetric(15, "mymetric4"),
-	testutil.TestMetric(8, "mymetric5"),
-}
-
-func makeBench5(b *testing.B, freq, batchSize int) {
-	const k = 1000
-	var wg sync.WaitGroup
-	buf := NewBuffer(10000)
-	m := testutil.TestMetric(1, "mymetric")
-
-	for i := 0; i < b.N; i++ {
-		buf.Add(m, m, m, m, m)
-		if i%(freq*k) == 0 {
-			wg.Add(1)
-			go func() {
-				buf.Batch(batchSize * k)
-				wg.Done()
-			}()
-		}
-	}
-	// Flush
-	buf.Batch(b.N)
-	wg.Wait()
-
-}
-func makeBenchStrict(b *testing.B, freq, batchSize int) {
-	const k = 1000
-	var count uint64
-	var wg sync.WaitGroup
-	buf := NewBuffer(10000)
-	m := testutil.TestMetric(1, "mymetric")
-
-	for i := 0; i < b.N; i++ {
-		buf.Add(m)
-		if i%(freq*k) == 0 {
-			wg.Add(1)
-			go func() {
-				defer wg.Done()
-				l := len(buf.Batch(batchSize * k))
-				atomic.AddUint64(&count, uint64(l))
-			}()
-		}
-	}
-	// Flush
-	wg.Add(1)
-	go func() {
-		l := len(buf.Batch(b.N))
-		atomic.AddUint64(&count, uint64(l))
-		wg.Done()
-	}()
-
-	wg.Wait()
-	if count != uint64(b.N) {
-		b.Errorf("not all metrics came out. %d of %d", count, b.N)
-	}
-}
-func makeBench(b *testing.B, freq, batchSize int) {
-	const k = 1000
-	var wg sync.WaitGroup
-	buf := NewBuffer(10000)
-	m := testutil.TestMetric(1, "mymetric")
-
-	for i := 0; i < b.N; i++ {
-		buf.Add(m)
-		if i%(freq*k) == 0 {
-			wg.Add(1)
-			go func() {
-				buf.Batch(batchSize * k)
-				wg.Done()
-			}()
-		}
-	}
-	wg.Wait()
-	// Flush
-	buf.Batch(b.N)
-}
-
-func BenchmarkBufferBatch5Add(b *testing.B) {
-	makeBench5(b, 100, 101)
-}
-func BenchmarkBufferBigInfrequentBatchCatchup(b *testing.B) {
-	makeBench(b, 100, 101)
-}
-func BenchmarkBufferOftenBatch(b *testing.B) {
-	makeBench(b, 1, 1)
-}
-func BenchmarkBufferAlmostBatch(b *testing.B) {
-	makeBench(b, 10, 9)
-}
-func BenchmarkBufferSlowBatch(b *testing.B) {
-	makeBench(b, 10, 1)
-}
-func BenchmarkBufferBatchNoDrop(b *testing.B) {
-	makeBenchStrict(b, 1, 4)
-}
-func BenchmarkBufferCatchup(b *testing.B) {
-	buf := NewBuffer(10000)
-	m := testutil.TestMetric(1, "mymetric")
-
-	for i := 0; i < b.N; i++ {
-		buf.Add(m)
-	}
-	buf.Batch(b.N)
-}
-
-func BenchmarkAddMetrics(b *testing.B) {
-	buf := NewBuffer(10000)
-	m := testutil.TestMetric(1, "mymetric")
-	for n := 0; n < b.N; n++ {
-		buf.Add(m)
-	}
-}
-
-func TestNewBufferBasicFuncs(t *testing.T) {
-	b := NewBuffer(10)
-	MetricsDropped.Set(0)
-	MetricsWritten.Set(0)
-
-	assert.True(t, b.IsEmpty())
-	assert.Zero(t, b.Len())
-	assert.Zero(t, MetricsDropped.Get())
-	assert.Zero(t, MetricsWritten.Get())
-
-	m := testutil.TestMetric(1, "mymetric")
-	b.Add(m)
-	assert.False(t, b.IsEmpty())
-	assert.Equal(t, b.Len(), 1)
-	assert.Equal(t, int64(0), MetricsDropped.Get())
-	assert.Equal(t, int64(1), MetricsWritten.Get())
-
-	b.Add(metricList...)
-	assert.False(t, b.IsEmpty())
-	assert.Equal(t, b.Len(), 6)
-	assert.Equal(t, int64(0), MetricsDropped.Get())
-	assert.Equal(t, int64(6), MetricsWritten.Get())
-}
-
-func TestDroppingMetrics(t *testing.T) {
-	b := NewBuffer(10)
-	MetricsDropped.Set(0)
-	MetricsWritten.Set(0)
-
-	// Add up to the size of the buffer
-	b.Add(metricList...)
-	b.Add(metricList...)
-	assert.False(t, b.IsEmpty())
-	assert.Equal(t, b.Len(), 10)
-	assert.Equal(t, int64(0), MetricsDropped.Get())
-	assert.Equal(t, int64(10), MetricsWritten.Get())
-
-	// Add 5 more and verify they were dropped
-	b.Add(metricList...)
-	assert.False(t, b.IsEmpty())
-	assert.Equal(t, b.Len(), 10)
-	assert.Equal(t, int64(5), MetricsDropped.Get())
-	assert.Equal(t, int64(15), MetricsWritten.Get())
-}
-
-func TestGettingBatches(t *testing.T) {
-	b := NewBuffer(20)
-	MetricsDropped.Set(0)
-	MetricsWritten.Set(0)
-
-	// Verify that the buffer returned is smaller than requested when there are
-	// not as many items as requested.
-	b.Add(metricList...)
-	batch := b.Batch(10)
-	assert.Len(t, batch, 5)
-
-	// Verify that the buffer is now empty
-	assert.True(t, b.IsEmpty())
-	assert.Zero(t, b.Len())
-	assert.Zero(t, MetricsDropped.Get())
-	assert.Equal(t, int64(5), MetricsWritten.Get())
-
-	// Verify that the buffer returned is not more than the size requested
-	b.Add(metricList...)
-	batch = b.Batch(3)
-	assert.Len(t, batch, 3)
-
-	// Verify that buffer is not empty
-	assert.False(t, b.IsEmpty())
-	assert.Equal(t, b.Len(), 2)
-	assert.Equal(t, int64(0), MetricsDropped.Get())
-	assert.Equal(t, int64(10), MetricsWritten.Get())
-}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -9,7 +9,6 @@ import (
 	"math"
 	"os"
 	"path/filepath"
-
 	"regexp"
 	"runtime"
 	"sort"
@@ -26,7 +25,6 @@ import (
 	"github.com/influxdata/telegraf/plugins/parsers"
 	"github.com/influxdata/telegraf/plugins/processors"
 	"github.com/influxdata/telegraf/plugins/serializers"
-
 	"github.com/influxdata/toml"
 	"github.com/influxdata/toml/ast"
 )
@@ -622,6 +620,19 @@ func (c *Config) LoadConfig(path string) error {
 		}
 	}

+	if !c.Agent.OmitHostname {
+		if c.Agent.Hostname == "" {
+			hostname, err := os.Hostname()
+			if err != nil {
+				return err
+			}
+
+			c.Agent.Hostname = hostname
+		}
+
+		c.Tags["host"] = c.Agent.Hostname
+	}
+
 	// Parse all the rest of the plugins:
 	for name, val := range tbl.Fields {
 		subTable, ok := val.(*ast.Table)
@@ -709,6 +720,7 @@ func (c *Config) LoadConfig(path string) error {
 	if len(c.Processors) > 1 {
 		sort.Sort(c.Processors)
 	}
+
 	return nil
 }

@@ -876,6 +888,7 @@ func (c *Config) addInput(name string, table *ast.Table) error {
 	}

 	rp := models.NewRunningInput(input, pluginConfig)
+	rp.SetDefaultTags(c.Tags)
 	c.Inputs = append(c.Inputs, rp)
 	return nil
 }
@@ -1751,6 +1764,8 @@ func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
 		Name:   name,
 		Filter: filter,
 	}
+
+	// TODO
 	// Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass
 	if len(oc.Filter.FieldDrop) > 0 {
 		oc.Filter.NameDrop = oc.Filter.FieldDrop
@@ -1758,5 +1773,47 @@ func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
 	if len(oc.Filter.FieldPass) > 0 {
 		oc.Filter.NamePass = oc.Filter.FieldPass
 	}
+
+	if node, ok := tbl.Fields["flush_interval"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.String); ok {
+				dur, err := time.ParseDuration(str.Value)
+				if err != nil {
+					return nil, err
+				}
+
+				oc.FlushInterval = dur
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["metric_buffer_limit"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if integer, ok := kv.Value.(*ast.Integer); ok {
+				v, err := integer.Int()
+				if err != nil {
+					return nil, err
+				}
+				oc.MetricBufferLimit = int(v)
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["metric_batch_size"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if integer, ok := kv.Value.(*ast.Integer); ok {
+				v, err := integer.Int()
+				if err != nil {
+					return nil, err
+				}
+				oc.MetricBatchSize = int(v)
+			}
+		}
+	}
+
+	delete(tbl.Fields, "flush_interval")
+	delete(tbl.Fields, "metric_buffer_limit")
+	delete(tbl.Fields, "metric_batch_size")
+
 	return oc, nil
 }
--- a/internal/internal.go
+++ b/internal/internal.go
@@ -4,6 +4,7 @@ import (
 	"bufio"
 	"bytes"
 	"compress/gzip"
+	"context"
 	"crypto/rand"
 	"errors"
 	"io"
@@ -246,6 +247,51 @@ func RandomSleep(max time.Duration, shutdown chan struct{}) {
 	}
 }

+// RandomDuration returns a random duration between 0 and max.
+func RandomDuration(max time.Duration) time.Duration {
+	if max == 0 {
+		return 0
+	}
+
+	var sleepns int64
+	maxSleep := big.NewInt(max.Nanoseconds())
+	if j, err := rand.Int(rand.Reader, maxSleep); err == nil {
+		sleepns = j.Int64()
+	}
+
+	return time.Duration(sleepns)
+}
+
+// SleepContext sleeps until the context is closed or the duration is reached.
+func SleepContext(ctx context.Context, duration time.Duration) error {
+	if duration == 0 {
+		return nil
+	}
+
+	t := time.NewTimer(duration)
+	select {
+	case <-t.C:
+		return nil
+	case <-ctx.Done():
+		t.Stop()
+		return ctx.Err()
+	}
+}
+
+// AlignDuration returns the duration until next aligned interval.
+func AlignDuration(tm time.Time, interval time.Duration) time.Duration {
+	return AlignTime(tm, interval).Sub(tm)
+}
+
+// AlignTime returns the time of the next aligned interval.
+func AlignTime(tm time.Time, interval time.Duration) time.Time {
+	truncated := tm.Truncate(interval)
+	if truncated == tm {
+		return tm
+	}
+	return truncated.Add(interval)
+}
+
 // Exit status takes the error from exec.Command
 // and returns the exit status and true
 // if error is not exit status, will return 0 and false
--- a/internal/internal_test.go
+++ b/internal/internal_test.go
@@ -9,6 +9,7 @@ import (
 	"time"

 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )

 type SnakeTest struct {
@@ -217,3 +218,55 @@ func TestVersionAlreadySet(t *testing.T) {

 	assert.Equal(t, "foo", Version())
 }
+
+func TestAlignDuration(t *testing.T) {
+	tests := []struct {
+		name     string
+		now      time.Time
+		interval time.Duration
+		expected time.Duration
+	}{
+		{
+			name:     "aligned",
+			now:      time.Date(2018, 1, 1, 1, 1, 0, 0, time.UTC),
+			interval: 10 * time.Second,
+			expected: 0 * time.Second,
+		},
+		{
+			name:     "standard interval",
+			now:      time.Date(2018, 1, 1, 1, 1, 1, 0, time.UTC),
+			interval: 10 * time.Second,
+			expected: 9 * time.Second,
+		},
+		{
+			name:     "odd interval",
+			now:      time.Date(2018, 1, 1, 1, 1, 1, 0, time.UTC),
+			interval: 3 * time.Second,
+			expected: 2 * time.Second,
+		},
+		{
+			name:     "sub second interval",
+			now:      time.Date(2018, 1, 1, 1, 1, 0, 5e8, time.UTC),
+			interval: 1 * time.Second,
+			expected: 500 * time.Millisecond,
+		},
+		{
+			name:     "non divisible not aligned on minutes",
+			now:      time.Date(2018, 1, 1, 1, 0, 0, 0, time.UTC),
+			interval: 1*time.Second + 100*time.Millisecond,
+			expected: 400 * time.Millisecond,
+		},
+		{
+			name:     "long interval",
+			now:      time.Date(2018, 1, 1, 1, 1, 0, 0, time.UTC),
+			interval: 1 * time.Hour,
+			expected: 59 * time.Minute,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			actual := AlignDuration(tt.now, tt.interval)
+			require.Equal(t, tt.expected, actual)
+		})
+	}
+}
--- a/internal/models/buffer.go
+++ b/internal/models/buffer.go
@@ -0,0 +1,214 @@
+package models
+
+import (
+	"sync"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/selfstat"
+)
+
+var (
+	AgentMetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
+	AgentMetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
+)
+
+// Buffer stores metrics in a circular buffer.
+type Buffer struct {
+	sync.Mutex
+	buf   []telegraf.Metric
+	first int // index of the first/oldest metric
+	last  int // one after the index of the last/newest metric
+	size  int // number of metrics currently in the buffer
+	cap   int // the capacity of the buffer
+
+	batchFirst int // index of the first metric in the batch
+	batchLast  int // one after the index of the last metric in the batch
+	batchSize  int // number of metrics current in the batch
+
+	MetricsAdded   selfstat.Stat
+	MetricsWritten selfstat.Stat
+	MetricsDropped selfstat.Stat
+}
+
+// NewBuffer returns a new empty Buffer with the given capacity.
+func NewBuffer(name string, capacity int) *Buffer {
+	b := &Buffer{
+		buf:   make([]telegraf.Metric, capacity),
+		first: 0,
+		last:  0,
+		size:  0,
+		cap:   capacity,
+
+		MetricsAdded: selfstat.Register(
+			"write",
+			"metrics_added",
+			map[string]string{"output": name},
+		),
+		MetricsWritten: selfstat.Register(
+			"write",
+			"metrics_written",
+			map[string]string{"output": name},
+		),
+		MetricsDropped: selfstat.Register(
+			"write",
+			"metrics_dropped",
+			map[string]string{"output": name},
+		),
+	}
+	return b
+}
+
+// Len returns the number of metrics currently in the buffer.
+func (b *Buffer) Len() int {
+	b.Lock()
+	defer b.Unlock()
+
+	return b.size
+}
+
+func (b *Buffer) metricAdded() {
+	b.MetricsAdded.Incr(1)
+}
+
+func (b *Buffer) metricWritten(metric telegraf.Metric) {
+	AgentMetricsWritten.Incr(1)
+	b.MetricsWritten.Incr(1)
+	metric.Accept()
+}
+
+func (b *Buffer) metricDropped(metric telegraf.Metric) {
+	AgentMetricsDropped.Incr(1)
+	b.MetricsDropped.Incr(1)
+	metric.Reject()
+}
+
+func (b *Buffer) inBatch() bool {
+	if b.batchSize == 0 {
+		return false
+	}
+
+	if b.batchFirst < b.batchLast {
+		return b.last >= b.batchFirst && b.last < b.batchLast
+	} else {
+		return b.last >= b.batchFirst || b.last < b.batchLast
+	}
+}
+
+func (b *Buffer) add(m telegraf.Metric) {
+	// Check if Buffer is full
+	if b.size == b.cap {
+		if b.batchSize == 0 {
+			// No batch taken by the output, we can drop the metric now.
+			b.metricDropped(b.buf[b.last])
+		} else if b.inBatch() {
+			// There is an outstanding batch and this will overwrite a metric
+			// in it, delay the dropping only in case the batch gets rejected.
+			b.batchSize--
+			b.batchFirst++
+			b.batchFirst %= b.cap
+		} else {
+			// There is an outstanding batch, but this overwrites a metric
+			// outside of it.
+			b.metricDropped(b.buf[b.last])
+		}
+	}
+
+	b.metricAdded()
+
+	b.buf[b.last] = m
+	b.last++
+	b.last %= b.cap
+
+	if b.size == b.cap {
+		b.first++
+		b.first %= b.cap
+	}
+
+	b.size = min(b.size+1, b.cap)
+}
+
+// Add adds metrics to the buffer
+func (b *Buffer) Add(metrics ...telegraf.Metric) {
+	b.Lock()
+	defer b.Unlock()
+
+	for i := range metrics {
+		b.add(metrics[i])
+	}
+}
+
+// Batch returns a slice containing up to batchSize of the most recently added
+// metrics.
+//
+// The metrics contained in the batch are not removed from the buffer, instead
+// the last batch is recorded and removed only if Accept is called.
+func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
+	b.Lock()
+	defer b.Unlock()
+
+	outLen := min(b.size, batchSize)
+	out := make([]telegraf.Metric, outLen)
+	if outLen == 0 {
+		return out
+	}
+
+	b.batchFirst = b.first
+	b.batchLast = b.first + outLen
+	b.batchLast %= b.cap
+	b.batchSize = outLen
+
+	until := min(b.cap, b.first+outLen)
+
+	n := copy(out, b.buf[b.first:until])
+	if n < outLen {
+		copy(out[n:], b.buf[:outLen-n])
+	}
+	return out
+}
+
+// Accept removes the metrics contained in the last batch.
+func (b *Buffer) Accept(batch []telegraf.Metric) {
+	b.Lock()
+	defer b.Unlock()
+
+	for _, m := range batch {
+		b.metricWritten(m)
+	}
+
+	if b.batchSize > 0 {
+		b.size -= b.batchSize
+		b.first += b.batchSize
+		b.first %= b.cap
+	}
+
+	b.resetBatch()
+}
+
+// Reject clears the current batch record so that calls to Accept will have no
+// effect.
+func (b *Buffer) Reject(batch []telegraf.Metric) {
+	b.Lock()
+	defer b.Unlock()
+
+	if len(batch) > b.batchSize {
+		// Part or all of the batch was dropped before reject was called.
+		for _, m := range batch[b.batchSize:] {
+			b.metricDropped(m)
+		}
+	}
+
+	b.resetBatch()
+}
+
+func (b *Buffer) resetBatch() {
+	b.batchFirst = 0
+	b.batchLast = 0
+	b.batchSize = 0
+}
+
+func min(a, b int) int {
+	if b < a {
+		return b
+	}
+	return a
+}
--- a/internal/models/buffer_test.go
+++ b/internal/models/buffer_test.go
@@ -0,0 +1,385 @@
+package models
+
+import (
+	"testing"
+	"time"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/metric"
+	"github.com/stretchr/testify/require"
+)
+
+type MockMetric struct {
+	telegraf.Metric
+	AcceptF func()
+	RejectF func()
+	DropF   func()
+}
+
+func (m *MockMetric) Accept() {
+	m.AcceptF()
+}
+
+func (m *MockMetric) Reject() {
+	m.RejectF()
+}
+
+func (m *MockMetric) Drop() {
+	m.DropF()
+}
+
+func Metric() telegraf.Metric {
+	m, err := metric.New(
+		"cpu",
+		map[string]string{},
+		map[string]interface{}{
+			"value": 42.0,
+		},
+		time.Unix(0, 0),
+	)
+	if err != nil {
+		panic(err)
+	}
+	return m
+}
+
+func BenchmarkAddMetrics(b *testing.B) {
+	buf := NewBuffer("test", 10000)
+	m := Metric()
+	for n := 0; n < b.N; n++ {
+		buf.Add(m)
+	}
+}
+
+func setup(b *Buffer) *Buffer {
+	b.MetricsAdded.Set(0)
+	b.MetricsWritten.Set(0)
+	b.MetricsDropped.Set(0)
+	return b
+}
+
+func TestBuffer_LenEmpty(t *testing.T) {
+	b := setup(NewBuffer("test", 5))
+
+	require.Equal(t, 0, b.Len())
+}
+
+func TestBuffer_LenOne(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m)
+
+	require.Equal(t, 1, b.Len())
+}
+
+func TestBuffer_LenFull(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m, m, m)
+
+	require.Equal(t, 5, b.Len())
+}
+
+func TestBuffer_LenOverfill(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	setup(b)
+	b.Add(m, m, m, m, m, m)
+
+	require.Equal(t, 5, b.Len())
+}
+
+func TestBuffer_BatchLenZero(t *testing.T) {
+	b := setup(NewBuffer("test", 5))
+	batch := b.Batch(0)
+
+	require.Len(t, batch, 0)
+}
+
+func TestBuffer_BatchLenBufferEmpty(t *testing.T) {
+	b := setup(NewBuffer("test", 5))
+	batch := b.Batch(2)
+
+	require.Len(t, batch, 0)
+}
+
+func TestBuffer_BatchLenUnderfill(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m)
+	batch := b.Batch(2)
+
+	require.Len(t, batch, 1)
+}
+
+func TestBuffer_BatchLenFill(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m)
+	batch := b.Batch(2)
+	require.Len(t, batch, 2)
+}
+
+func TestBuffer_BatchLenExact(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m)
+	batch := b.Batch(2)
+	require.Len(t, batch, 2)
+}
+
+func TestBuffer_BatchLenLargerThanBuffer(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(6)
+	require.Len(t, batch, 5)
+}
+
+func TestBuffer_BatchWrap(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(2)
+	b.Accept(batch)
+	b.Add(m, m)
+	batch = b.Batch(5)
+	require.Len(t, batch, 5)
+}
+
+func TestBuffer_AddDropsOverwrittenMetrics(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	b.Add(m, m, m, m, m)
+
+	require.Equal(t, int64(5), b.MetricsDropped.Get())
+	require.Equal(t, int64(0), b.MetricsWritten.Get())
+}
+
+func TestBuffer_AcceptRemovesBatch(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m)
+	batch := b.Batch(2)
+	b.Accept(batch)
+	require.Equal(t, 1, b.Len())
+}
+
+func TestBuffer_RejectLeavesBatch(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m)
+	batch := b.Batch(2)
+	b.Reject(batch)
+	require.Equal(t, 3, b.Len())
+}
+
+func TestBuffer_AcceptWritesOverwrittenBatch(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(5)
+	b.Add(m, m, m, m, m)
+	b.Accept(batch)
+
+	require.Equal(t, int64(0), b.MetricsDropped.Get())
+	require.Equal(t, int64(5), b.MetricsWritten.Get())
+}
+
+func TestBuffer_BatchRejectDropsOverwrittenBatch(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(5)
+	b.Add(m, m, m, m, m)
+	b.Reject(batch)
+
+	require.Equal(t, int64(5), b.MetricsDropped.Get())
+	require.Equal(t, int64(0), b.MetricsWritten.Get())
+}
+
+func TestBuffer_MetricsOverwriteBatchAccept(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(3)
+	b.Add(m, m, m)
+	b.Accept(batch)
+	require.Equal(t, int64(0), b.MetricsDropped.Get())
+	require.Equal(t, int64(3), b.MetricsWritten.Get())
+}
+
+func TestBuffer_MetricsOverwriteBatchReject(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(3)
+	b.Add(m, m, m)
+	b.Reject(batch)
+	require.Equal(t, int64(3), b.MetricsDropped.Get())
+	require.Equal(t, int64(0), b.MetricsWritten.Get())
+}
+
+func TestBuffer_MetricsBatchAcceptRemoved(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(3)
+	b.Add(m, m, m, m, m)
+	b.Accept(batch)
+	require.Equal(t, int64(2), b.MetricsDropped.Get())
+	require.Equal(t, int64(3), b.MetricsWritten.Get())
+}
+
+func TestBuffer_WrapWithBatch(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m)
+	b.Batch(3)
+	b.Add(m, m, m, m, m, m)
+
+	require.Equal(t, int64(1), b.MetricsDropped.Get())
+}
+
+func TestBuffer_BatchNotRemoved(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m, m, m)
+	b.Batch(2)
+	require.Equal(t, 5, b.Len())
+}
+
+func TestBuffer_BatchRejectAcceptNoop(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(2)
+	b.Reject(batch)
+	b.Accept(batch)
+	require.Equal(t, 5, b.Len())
+}
+
+func TestBuffer_AcceptCallsMetricAccept(t *testing.T) {
+	var accept int
+	mm := &MockMetric{
+		Metric: Metric(),
+		AcceptF: func() {
+			accept++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	b.Add(mm, mm, mm)
+	batch := b.Batch(2)
+	b.Accept(batch)
+	require.Equal(t, 2, accept)
+}
+
+func TestBuffer_AddCallsMetricRejectWhenNoBatch(t *testing.T) {
+	var reject int
+	mm := &MockMetric{
+		Metric: Metric(),
+		RejectF: func() {
+			reject++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	setup(b)
+	b.Add(mm, mm, mm, mm, mm)
+	b.Add(mm, mm)
+	require.Equal(t, 2, reject)
+}
+
+func TestBuffer_AddCallsMetricRejectWhenNotInBatch(t *testing.T) {
+	var reject int
+	mm := &MockMetric{
+		Metric: Metric(),
+		RejectF: func() {
+			reject++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	setup(b)
+	b.Add(mm, mm, mm, mm, mm)
+	batch := b.Batch(2)
+	b.Add(mm, mm, mm, mm)
+	// metric[2] and metric[3] rejected
+	require.Equal(t, 2, reject)
+	b.Reject(batch)
+	// metric[1] and metric[2] now rejected
+	require.Equal(t, 4, reject)
+}
+
+func TestBuffer_RejectCallsMetricRejectWithOverwritten(t *testing.T) {
+	var reject int
+	mm := &MockMetric{
+		Metric: Metric(),
+		RejectF: func() {
+			reject++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	b.Add(mm, mm, mm, mm, mm)
+	batch := b.Batch(5)
+	b.Add(mm, mm)
+	require.Equal(t, 0, reject)
+	b.Reject(batch)
+	require.Equal(t, 2, reject)
+}
+
+func TestBuffer_AddOverwriteAndReject(t *testing.T) {
+	var reject int
+	mm := &MockMetric{
+		Metric: Metric(),
+		RejectF: func() {
+			reject++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	b.Add(mm, mm, mm, mm, mm)
+	batch := b.Batch(5)
+	b.Add(mm, mm, mm, mm, mm)
+	b.Add(mm, mm, mm, mm, mm)
+	b.Add(mm, mm, mm, mm, mm)
+	b.Add(mm, mm, mm, mm, mm)
+	require.Equal(t, 15, reject)
+	b.Reject(batch)
+	require.Equal(t, 20, reject)
+}
+
+func TestBuffer_AddOverwriteAndRejectOffset(t *testing.T) {
+	var reject int
+	var accept int
+	mm := &MockMetric{
+		Metric: Metric(),
+		RejectF: func() {
+			reject++
+		},
+		AcceptF: func() {
+			accept++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	b.Add(mm, mm, mm)
+	b.Add(mm, mm, mm, mm)
+	require.Equal(t, 2, reject)
+	batch := b.Batch(5)
+	b.Add(mm, mm, mm, mm)
+	require.Equal(t, 2, reject)
+	b.Add(mm, mm, mm, mm)
+	require.Equal(t, 5, reject)
+	b.Add(mm, mm, mm, mm)
+	require.Equal(t, 9, reject)
+	b.Add(mm, mm, mm, mm)
+	require.Equal(t, 13, reject)
+	b.Accept(batch)
+	require.Equal(t, 13, reject)
+	require.Equal(t, 5, accept)
+}
--- a/internal/models/filter_test.go
+++ b/internal/models/filter_test.go
@@ -6,6 +6,7 @@ import (

 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/metric"
+	"github.com/influxdata/telegraf/testutil"
 	"github.com/stretchr/testify/require"
 )

@@ -480,3 +481,45 @@ func TestFilter_FilterTagsPassAndDrop(t *testing.T) {
 	}

 }
+
+func BenchmarkFilter(b *testing.B) {
+	tests := []struct {
+		name   string
+		filter Filter
+		metric telegraf.Metric
+	}{
+		{
+			name:   "empty filter",
+			filter: Filter{},
+			metric: testutil.MustMetric("cpu",
+				map[string]string{},
+				map[string]interface{}{
+					"value": 42,
+				},
+				time.Unix(0, 0),
+			),
+		},
+		{
+			name: "namepass",
+			filter: Filter{
+				NamePass: []string{"cpu"},
+			},
+			metric: testutil.MustMetric("cpu",
+				map[string]string{},
+				map[string]interface{}{
+					"value": 42,
+				},
+				time.Unix(0, 0),
+			),
+		},
+	}
+
+	for _, tt := range tests {
+		b.Run(tt.name, func(b *testing.B) {
+			require.NoError(b, tt.filter.Compile())
+			for n := 0; n < b.N; n++ {
+				tt.filter.Select(tt.metric)
+			}
+		})
+	}
+}
--- a/internal/models/running_aggregator.go
+++ b/internal/models/running_aggregator.go
@@ -1,30 +1,53 @@
 package models

 import (
-	"log"
+	"sync"
 	"time"

 	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/selfstat"
 )

 type RunningAggregator struct {
-	a      telegraf.Aggregator
-	Config *AggregatorConfig
-
-	metrics chan telegraf.Metric
-
+	sync.Mutex
+	Aggregator  telegraf.Aggregator
+	Config      *AggregatorConfig
 	periodStart time.Time
 	periodEnd   time.Time
+
+	MetricsPushed   selfstat.Stat
+	MetricsFiltered selfstat.Stat
+	MetricsDropped  selfstat.Stat
+	PushTime        selfstat.Stat
 }

 func NewRunningAggregator(
-	a telegraf.Aggregator,
-	conf *AggregatorConfig,
+	aggregator telegraf.Aggregator,
+	config *AggregatorConfig,
 ) *RunningAggregator {
 	return &RunningAggregator{
-		a:       a,
-		Config:  conf,
-		metrics: make(chan telegraf.Metric, 100),
+		Aggregator: aggregator,
+		Config:     config,
+		MetricsPushed: selfstat.Register(
+			"aggregate",
+			"metrics_pushed",
+			map[string]string{"aggregator": config.Name},
+		),
+		MetricsFiltered: selfstat.Register(
+			"aggregate",
+			"metrics_filtered",
+			map[string]string{"aggregator": config.Name},
+		),
+		MetricsDropped: selfstat.Register(
+			"aggregate",
+			"metrics_dropped",
+			map[string]string{"aggregator": config.Name},
+		),
+		PushTime: selfstat.Register(
+			"aggregate",
+			"push_time_ns",
+			map[string]string{"aggregator": config.Name},
+		),
 	}
 }

@@ -46,6 +69,15 @@ func (r *RunningAggregator) Name() string {
 	return "aggregators." + r.Config.Name
 }

+func (r *RunningAggregator) Period() time.Duration {
+	return r.Config.Period
+}
+
+func (r *RunningAggregator) SetPeriodStart(start time.Time) {
+	r.periodStart = start
+	r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
+}
+
 func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
 	m := makemetric(
 		metric,
@@ -59,9 +91,21 @@ func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
 		m.SetAggregate(true)
 	}

+	r.MetricsPushed.Incr(1)
+
 	return m
 }

+func (r *RunningAggregator) metricFiltered(metric telegraf.Metric) {
+	r.MetricsFiltered.Incr(1)
+	metric.Accept()
+}
+
+func (r *RunningAggregator) metricDropped(metric telegraf.Metric) {
+	r.MetricsDropped.Incr(1)
+	metric.Accept()
+}
+
 // Add a metric to the aggregator and return true if the original metric
 // should be dropped.
 func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
@@ -74,75 +118,31 @@ func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
 		return r.Config.DropOriginal
 	}

-	r.metrics <- metric
+	r.Lock()
+	defer r.Unlock()

+	if r.periodStart.IsZero() || metric.Time().Before(r.periodStart) || metric.Time().After(r.periodEnd) {
+		r.metricDropped(metric)
+		return false
+	}
+
+	r.Aggregator.Add(metric)
 	return r.Config.DropOriginal
 }

-func (r *RunningAggregator) add(in telegraf.Metric) {
-	r.a.Add(in)
+func (r *RunningAggregator) Push(acc telegraf.Accumulator) {
+	r.Lock()
+	defer r.Unlock()
+
+	r.periodStart = r.periodEnd
+	r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
+	r.push(acc)
+	r.Aggregator.Reset()
 }

 func (r *RunningAggregator) push(acc telegraf.Accumulator) {
-	r.a.Push(acc)
-}
-
-func (r *RunningAggregator) reset() {
-	r.a.Reset()
-}
-
-// Run runs the running aggregator, listens for incoming metrics, and waits
-// for period ticks to tell it when to push and reset the aggregator.
-func (r *RunningAggregator) Run(
-	acc telegraf.Accumulator,
-	shutdown chan struct{},
-) {
-	// The start of the period is truncated to the nearest second.
-	//
-	// Every metric then gets it's timestamp checked and is dropped if it
-	// is not within:
-	//
-	//   start < t < end + truncation + delay
-	//
-	// So if we start at now = 00:00.2 with a 10s period and 0.3s delay:
-	//   now = 00:00.2
-	//   start = 00:00
-	//   truncation = 00:00.2
-	//   end = 00:10
-	// 1st interval: 00:00 - 00:10.5
-	// 2nd interval: 00:10 - 00:20.5
-	// etc.
-	//
-	now := time.Now()
-	r.periodStart = now.Truncate(time.Second)
-	truncation := now.Sub(r.periodStart)
-	r.periodEnd = r.periodStart.Add(r.Config.Period)
-	time.Sleep(r.Config.Delay)
-	periodT := time.NewTicker(r.Config.Period)
-	defer periodT.Stop()
-
-	for {
-		select {
-		case <-shutdown:
-			if len(r.metrics) > 0 {
-				// wait until metrics are flushed before exiting
-				continue
-			}
-			return
-		case m := <-r.metrics:
-			if m.Time().Before(r.periodStart) ||
-				m.Time().After(r.periodEnd.Add(truncation).Add(r.Config.Delay)) {
-				// the metric is outside the current aggregation period, so
-				// skip it.
-				log.Printf("D! aggregator: metric \"%s\" is not in the current timewindow, skipping", m.Name())
-				continue
-			}
-			r.add(m)
-		case <-periodT.C:
-			r.periodStart = r.periodEnd
-			r.periodEnd = r.periodStart.Add(r.Config.Period)
-			r.push(acc)
-			r.reset()
-		}
-	}
+	start := time.Now()
+	r.Aggregator.Push(acc)
+	elapsed := time.Since(start)
+	r.PushTime.Incr(elapsed.Nanoseconds())
 }
--- a/internal/models/running_aggregator_test.go
+++ b/internal/models/running_aggregator_test.go
@@ -1,16 +1,13 @@
 package models

 import (
-	"sync"
 	"sync/atomic"
 	"testing"
 	"time"

 	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/metric"
 	"github.com/influxdata/telegraf/testutil"

-	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )

@@ -23,28 +20,24 @@ func TestAdd(t *testing.T) {
 		},
 		Period: time.Millisecond * 500,
 	})
-	assert.NoError(t, ra.Config.Filter.Compile())
+	require.NoError(t, ra.Config.Filter.Compile())
 	acc := testutil.Accumulator{}
-	go ra.Run(&acc, make(chan struct{}))

-	m, err := metric.New("RITest",
+	now := time.Now()
+	ra.SetPeriodStart(now)
+
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
 		time.Now().Add(time.Millisecond*150),
 		telegraf.Untyped)
-	require.NoError(t, err)
+	require.False(t, ra.Add(m))
+	ra.Push(&acc)

-	assert.False(t, ra.Add(m))
-
-	for {
-		time.Sleep(time.Millisecond)
-		if atomic.LoadInt64(&a.sum) > 0 {
-			break
-		}
-	}
-	assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
+	require.Equal(t, 1, len(acc.Metrics))
+	require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
 }

 func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
@@ -56,50 +49,45 @@ func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
 		},
 		Period: time.Millisecond * 500,
 	})
-	assert.NoError(t, ra.Config.Filter.Compile())
+	require.NoError(t, ra.Config.Filter.Compile())
 	acc := testutil.Accumulator{}
-	go ra.Run(&acc, make(chan struct{}))
+	now := time.Now()
+	ra.SetPeriodStart(now)

-	m, err := metric.New("RITest",
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
-		time.Now().Add(-time.Hour),
-		telegraf.Untyped)
-	require.NoError(t, err)
-
-	assert.False(t, ra.Add(m))
+		now.Add(-time.Hour),
+		telegraf.Untyped,
+	)
+	require.False(t, ra.Add(m))

 	// metric after current period
-	m, err = metric.New("RITest",
+	m = testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
-		time.Now().Add(time.Hour),
-		telegraf.Untyped)
-	require.NoError(t, err)
-	assert.False(t, ra.Add(m))
+		now.Add(time.Hour),
+		telegraf.Untyped,
+	)
+	require.False(t, ra.Add(m))

 	// "now" metric
-	m, err = metric.New("RITest",
+	m = testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
 		time.Now().Add(time.Millisecond*50),
 		telegraf.Untyped)
-	require.NoError(t, err)
-	assert.False(t, ra.Add(m))
+	require.False(t, ra.Add(m))

-	for {
-		time.Sleep(time.Millisecond)
-		if atomic.LoadInt64(&a.sum) > 0 {
-			break
-		}
-	}
-	assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
+	ra.Push(&acc)
+	require.Equal(t, 1, len(acc.Metrics))
+	require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
 }

 func TestAddAndPushOnePeriod(t *testing.T) {
@@ -111,37 +99,24 @@ func TestAddAndPushOnePeriod(t *testing.T) {
 		},
 		Period: time.Millisecond * 500,
 	})
-	assert.NoError(t, ra.Config.Filter.Compile())
+	require.NoError(t, ra.Config.Filter.Compile())
 	acc := testutil.Accumulator{}
-	shutdown := make(chan struct{})

-	var wg sync.WaitGroup
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		ra.Run(&acc, shutdown)
-	}()
+	now := time.Now()
+	ra.SetPeriodStart(now)

-	m, err := metric.New("RITest",
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
 		time.Now().Add(time.Millisecond*100),
 		telegraf.Untyped)
-	require.NoError(t, err)
-	assert.False(t, ra.Add(m))
+	require.False(t, ra.Add(m))
+
+	ra.Push(&acc)

-	for {
-		time.Sleep(time.Millisecond)
-		if acc.NMetrics() > 0 {
-			break
-		}
-	}
 	acc.AssertContainsFields(t, "TestMetric", map[string]interface{}{"sum": int64(101)})
-
-	close(shutdown)
-	wg.Wait()
 }

 func TestAddDropOriginal(t *testing.T) {
@@ -152,28 +127,29 @@ func TestAddDropOriginal(t *testing.T) {
 		},
 		DropOriginal: true,
 	})
-	assert.NoError(t, ra.Config.Filter.Compile())
+	require.NoError(t, ra.Config.Filter.Compile())

-	m, err := metric.New("RITest",
+	now := time.Now()
+	ra.SetPeriodStart(now)
+
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
-		time.Now(),
+		now,
 		telegraf.Untyped)
-	require.NoError(t, err)
-	assert.True(t, ra.Add(m))
+	require.True(t, ra.Add(m))

 	// this metric name doesn't match the filter, so Add will return false
-	m2, err := metric.New("foobar",
+	m2 := testutil.MustMetric("foobar",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
-		time.Now(),
+		now,
 		telegraf.Untyped)
-	require.NoError(t, err)
-	assert.False(t, ra.Add(m2))
+	require.False(t, ra.Add(m2))
 }

 type TestAggregator struct {
--- a/internal/models/running_input.go
+++ b/internal/models/running_input.go
@@ -1,11 +1,9 @@
 package models

 import (
-	"fmt"
 	"time"

 	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/plugins/serializers/influx"
 	"github.com/influxdata/telegraf/selfstat"
 )

@@ -15,16 +13,13 @@ type RunningInput struct {
 	Input  telegraf.Input
 	Config *InputConfig

-	trace       bool
 	defaultTags map[string]string

 	MetricsGathered selfstat.Stat
+	GatherTime      selfstat.Stat
 }

-func NewRunningInput(
-	input telegraf.Input,
-	config *InputConfig,
-) *RunningInput {
+func NewRunningInput(input telegraf.Input, config *InputConfig) *RunningInput {
 	return &RunningInput{
 		Input:  input,
 		Config: config,
@@ -33,6 +28,11 @@ func NewRunningInput(
 			"metrics_gathered",
 			map[string]string{"input": config.Name},
 		),
+		GatherTime: selfstat.RegisterTiming(
+			"gather",
+			"gather_time_ns",
+			map[string]string{"input": config.Name},
+		),
 	}
 }

@@ -52,13 +52,19 @@ func (r *RunningInput) Name() string {
 	return "inputs." + r.Config.Name
 }

+func (r *RunningInput) metricFiltered(metric telegraf.Metric) {
+	metric.Drop()
+}
+
 func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
 	if ok := r.Config.Filter.Select(metric); !ok {
+		r.metricFiltered(metric)
 		return nil
 	}

 	r.Config.Filter.Modify(metric)
 	if len(metric.FieldList()) == 0 {
+		r.metricFiltered(metric)
 		return nil
 	}

@@ -70,26 +76,17 @@ func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
 		r.Config.Tags,
 		r.defaultTags)

-	if r.trace && m != nil {
-		s := influx.NewSerializer()
-		s.SetFieldSortOrder(influx.SortFields)
-		octets, err := s.Serialize(m)
-		if err == nil {
-			fmt.Print("> " + string(octets))
-		}
-	}
-
 	r.MetricsGathered.Incr(1)
 	GlobalMetricsGathered.Incr(1)
 	return m
 }

-func (r *RunningInput) Trace() bool {
-	return r.trace
-}
-
-func (r *RunningInput) SetTrace(trace bool) {
-	r.trace = trace
+func (r *RunningInput) Gather(acc telegraf.Accumulator) error {
+	start := time.Now()
+	err := r.Input.Gather(acc)
+	elapsed := time.Since(start)
+	r.GatherTime.Incr(elapsed.Nanoseconds())
+	return err
 }

 func (r *RunningInput) SetDefaultTags(tags map[string]string) {
--- a/internal/models/running_input_test.go
+++ b/internal/models/running_input_test.go
@@ -6,6 +6,7 @@ import (

 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/metric"
+	"github.com/influxdata/telegraf/testutil"

 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -66,17 +67,13 @@ func TestMakeMetricWithPluginTags(t *testing.T) {
 		},
 	})

-	ri.SetTrace(true)
-	assert.Equal(t, true, ri.Trace())
-
-	m, err := metric.New("RITest",
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
 		now,
 		telegraf.Untyped)
-	require.NoError(t, err)
 	m = ri.MakeMetric(m)

 	expected, err := metric.New("RITest",
@@ -102,8 +99,6 @@ func TestMakeMetricFilteredOut(t *testing.T) {
 		Filter: Filter{NamePass: []string{"foobar"}},
 	})

-	ri.SetTrace(true)
-	assert.Equal(t, true, ri.Trace())
 	assert.NoError(t, ri.Config.Filter.Compile())

 	m, err := metric.New("RITest",
@@ -127,17 +122,13 @@ func TestMakeMetricWithDaemonTags(t *testing.T) {
 		"foo": "bar",
 	})

-	ri.SetTrace(true)
-	assert.Equal(t, true, ri.Trace())
-
-	m, err := metric.New("RITest",
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
 		now,
 		telegraf.Untyped)
-	require.NoError(t, err)
 	m = ri.MakeMetric(m)
 	expected, err := metric.New("RITest",
 		map[string]string{
--- a/internal/models/running_output.go
+++ b/internal/models/running_output.go
@@ -6,7 +6,6 @@ import (
 	"time"

 	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/internal/buffer"
 	"github.com/influxdata/telegraf/selfstat"
 )

@@ -18,6 +17,16 @@ const (
 	DEFAULT_METRIC_BUFFER_LIMIT = 10000
 )

+// OutputConfig containing name and filter
+type OutputConfig struct {
+	Name   string
+	Filter Filter
+
+	FlushInterval     time.Duration
+	MetricBufferLimit int
+	MetricBatchSize   int
+}
+
 // RunningOutput contains the output configuration
 type RunningOutput struct {
 	Name              string
@@ -27,24 +36,16 @@ type RunningOutput struct {
 	MetricBatchSize   int

 	MetricsFiltered selfstat.Stat
-	MetricsWritten  selfstat.Stat
 	BufferSize      selfstat.Stat
 	BufferLimit     selfstat.Stat
 	WriteTime       selfstat.Stat

-	metrics     *buffer.Buffer
-	failMetrics *buffer.Buffer
+	batch      []telegraf.Metric
+	buffer     *Buffer
+	BatchReady chan time.Time

-	// Guards against concurrent calls to Add, Push, Reset
-	aggMutex sync.Mutex
-	// Guards against concurrent calls to the Output as described in #3009
-	writeMutex sync.Mutex
-}
-
-// OutputConfig containing name and filter
-type OutputConfig struct {
-	Name   string
-	Filter Filter
+	aggMutex   sync.Mutex
+	batchMutex sync.Mutex
 }

 func NewRunningOutput(
@@ -54,25 +55,27 @@ func NewRunningOutput(
 	batchSize int,
 	bufferLimit int,
 ) *RunningOutput {
+	if conf.MetricBufferLimit > 0 {
+		bufferLimit = conf.MetricBufferLimit
+	}
 	if bufferLimit == 0 {
 		bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
 	}
+	if conf.MetricBatchSize > 0 {
+		batchSize = conf.MetricBatchSize
+	}
 	if batchSize == 0 {
 		batchSize = DEFAULT_METRIC_BATCH_SIZE
 	}
 	ro := &RunningOutput{
 		Name:              name,
-		metrics:           buffer.NewBuffer(batchSize),
-		failMetrics:       buffer.NewBuffer(bufferLimit),
+		batch:             make([]telegraf.Metric, 0, batchSize),
+		buffer:            NewBuffer(name, bufferLimit),
+		BatchReady:        make(chan time.Time, 1),
 		Output:            output,
 		Config:            conf,
 		MetricBufferLimit: bufferLimit,
 		MetricBatchSize:   batchSize,
-		MetricsWritten: selfstat.Register(
-			"write",
-			"metrics_written",
-			map[string]string{"output": name},
-		),
 		MetricsFiltered: selfstat.Register(
 			"write",
 			"metrics_filtered",
@@ -94,20 +97,28 @@ func NewRunningOutput(
 			map[string]string{"output": name},
 		),
 	}
+
 	ro.BufferLimit.Set(int64(ro.MetricBufferLimit))
 	return ro
 }

-// AddMetric adds a metric to the output. This function can also write cached
-// points if FlushBufferWhenFull is true.
+func (ro *RunningOutput) metricFiltered(metric telegraf.Metric) {
+	ro.MetricsFiltered.Incr(1)
+	metric.Drop()
+}
+
+// AddMetric adds a metric to the output.
+//
+// Takes ownership of metric
 func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
 	if ok := ro.Config.Filter.Select(metric); !ok {
-		ro.MetricsFiltered.Incr(1)
+		ro.metricFiltered(metric)
 		return
 	}

 	ro.Config.Filter.Modify(metric)
 	if len(metric.FieldList()) == 0 {
+		ro.metricFiltered(metric)
 		return
 	}

@@ -118,85 +129,98 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
 		return
 	}

-	ro.metrics.Add(metric)
-	if ro.metrics.Len() == ro.MetricBatchSize {
-		batch := ro.metrics.Batch(ro.MetricBatchSize)
-		err := ro.write(batch)
-		if err != nil {
-			ro.failMetrics.Add(batch...)
-			log.Printf("E! Error writing to output [%s]: %v", ro.Name, err)
+	ro.batchMutex.Lock()
+
+	ro.batch = append(ro.batch, metric)
+	if len(ro.batch) == ro.MetricBatchSize {
+		ro.addBatchToBuffer()
+
+		nBuffer := ro.buffer.Len()
+		ro.BufferSize.Set(int64(nBuffer))
+
+		select {
+		case ro.BatchReady <- time.Now():
+		default:
 		}
 	}
+
+	ro.batchMutex.Unlock()
 }

-// Write writes all cached points to this output.
+// AddBatchToBuffer moves the metrics from the batch into the metric buffer.
+func (ro *RunningOutput) addBatchToBuffer() {
+	ro.buffer.Add(ro.batch...)
+	ro.batch = ro.batch[:0]
+}
+
+// Write writes all metrics to the output, stopping when all have been sent on
+// or error.
 func (ro *RunningOutput) Write() error {
 	if output, ok := ro.Output.(telegraf.AggregatingOutput); ok {
 		ro.aggMutex.Lock()
 		metrics := output.Push()
-		ro.metrics.Add(metrics...)
+		ro.buffer.Add(metrics...)
 		output.Reset()
 		ro.aggMutex.Unlock()
 	}
+	// add and write can be called concurrently
+	ro.batchMutex.Lock()
+	ro.addBatchToBuffer()
+	ro.batchMutex.Unlock()

-	nFails, nMetrics := ro.failMetrics.Len(), ro.metrics.Len()
-	ro.BufferSize.Set(int64(nFails + nMetrics))
-	log.Printf("D! Output [%s] buffer fullness: %d / %d metrics. ",
-		ro.Name, nFails+nMetrics, ro.MetricBufferLimit)
-	var err error
-	if !ro.failMetrics.IsEmpty() {
-		// how many batches of failed writes we need to write.
-		nBatches := nFails/ro.MetricBatchSize + 1
-		batchSize := ro.MetricBatchSize
+	nBuffer := ro.buffer.Len()

-		for i := 0; i < nBatches; i++ {
-			// If it's the last batch, only grab the metrics that have not had
-			// a write attempt already (this is primarily to preserve order).
-			if i == nBatches-1 {
-				batchSize = nFails % ro.MetricBatchSize
-			}
-			batch := ro.failMetrics.Batch(batchSize)
-			// If we've already failed previous writes, don't bother trying to
-			// write to this output again. We are not exiting the loop just so
-			// that we can rotate the metrics to preserve order.
-			if err == nil {
-				err = ro.write(batch)
-			}
-			if err != nil {
-				ro.failMetrics.Add(batch...)
-			}
+	// Only process the metrics in the buffer now.  Metrics added while we are
+	// writing will be sent on the next call.
+	nBatches := nBuffer/ro.MetricBatchSize + 1
+	for i := 0; i < nBatches; i++ {
+		batch := ro.buffer.Batch(ro.MetricBatchSize)
+		if len(batch) == 0 {
+			break
 		}
-	}

-	batch := ro.metrics.Batch(ro.MetricBatchSize)
-	// see comment above about not trying to write to an already failed output.
-	// if ro.failMetrics is empty then err will always be nil at this point.
-	if err == nil {
-		err = ro.write(batch)
-	}
-
-	if err != nil {
-		ro.failMetrics.Add(batch...)
-		return err
+		err := ro.write(batch)
+		if err != nil {
+			ro.buffer.Reject(batch)
+			return err
+		}
+		ro.buffer.Accept(batch)
 	}
 	return nil
 }

-func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
-	nMetrics := len(metrics)
-	if nMetrics == 0 {
+// WriteBatch writes only the batch metrics to the output.
+func (ro *RunningOutput) WriteBatch() error {
+	batch := ro.buffer.Batch(ro.MetricBatchSize)
+	if len(batch) == 0 {
 		return nil
 	}
-	ro.writeMutex.Lock()
-	defer ro.writeMutex.Unlock()
+
+	err := ro.write(batch)
+	if err != nil {
+		ro.buffer.Reject(batch)
+		return err
+	}
+	ro.buffer.Accept(batch)
+
+	return nil
+}
+
+func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
 	start := time.Now()
 	err := ro.Output.Write(metrics)
 	elapsed := time.Since(start)
+	ro.WriteTime.Incr(elapsed.Nanoseconds())
+
 	if err == nil {
-		log.Printf("D! Output [%s] wrote batch of %d metrics in %s\n",
-			ro.Name, nMetrics, elapsed)
-		ro.MetricsWritten.Incr(int64(nMetrics))
-		ro.WriteTime.Incr(elapsed.Nanoseconds())
+		log.Printf("D! [outputs.%s] wrote batch of %d metrics in %s\n",
+			ro.Name, len(metrics), elapsed)
 	}
 	return err
 }
+
+func (ro *RunningOutput) LogBufferStatus() {
+	nBuffer := ro.buffer.Len()
+	log.Printf("D! [outputs.%s] buffer fullness: %d / %d metrics. ",
+		ro.Name, nBuffer, ro.MetricBufferLimit)
+}
--- a/internal/models/running_output_test.go
+++ b/internal/models/running_output_test.go
@@ -231,56 +231,6 @@ func TestRunningOutputDefault(t *testing.T) {
 	assert.Len(t, m.Metrics(), 10)
 }

-// Test that running output doesn't flush until it's full when
-// FlushBufferWhenFull is set.
-func TestRunningOutputFlushWhenFull(t *testing.T) {
-	conf := &OutputConfig{
-		Filter: Filter{},
-	}
-
-	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf, 6, 10)
-
-	// Fill buffer to 1 under limit
-	for _, metric := range first5 {
-		ro.AddMetric(metric)
-	}
-	// no flush yet
-	assert.Len(t, m.Metrics(), 0)
-
-	// add one more metric
-	ro.AddMetric(next5[0])
-	// now it flushed
-	assert.Len(t, m.Metrics(), 6)
-
-	// add one more metric and write it manually
-	ro.AddMetric(next5[1])
-	err := ro.Write()
-	assert.NoError(t, err)
-	assert.Len(t, m.Metrics(), 7)
-}
-
-// Test that running output doesn't flush until it's full when
-// FlushBufferWhenFull is set, twice.
-func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
-	conf := &OutputConfig{
-		Filter: Filter{},
-	}
-
-	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf, 4, 12)
-
-	// Fill buffer past limit twive
-	for _, metric := range first5 {
-		ro.AddMetric(metric)
-	}
-	for _, metric := range next5 {
-		ro.AddMetric(metric)
-	}
-	// flushed twice
-	assert.Len(t, m.Metrics(), 8)
-}
-
 func TestRunningOutputWriteFail(t *testing.T) {
 	conf := &OutputConfig{
 		Filter: Filter{},
--- a/internal/models/running_processor.go
+++ b/internal/models/running_processor.go
@@ -27,6 +27,19 @@ type ProcessorConfig struct {
 	Filter Filter
 }

+func (rp *RunningProcessor) metricFiltered(metric telegraf.Metric) {
+	metric.Drop()
+}
+
+func containsMetric(item telegraf.Metric, metrics []telegraf.Metric) bool {
+	for _, m := range metrics {
+		if item == m {
+			return true
+		}
+	}
+	return false
+}
+
 func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
 	rp.Lock()
 	defer rp.Unlock()
@@ -43,6 +56,7 @@ func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {

 		rp.Config.Filter.Modify(metric)
 		if len(metric.FieldList()) == 0 {
+			rp.metricFiltered(metric)
 			continue
 		}

--- a/internal/models/running_processor_test.go
+++ b/internal/models/running_processor_test.go
@@ -6,7 +6,7 @@ import (
 	"time"

 	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/metric"
+	"github.com/influxdata/telegraf/testutil"

 	"github.com/stretchr/testify/require"
 )
@@ -41,20 +41,6 @@ func TagProcessor(key, value string) *MockProcessor {
 	}
 }

-func Metric(
-	name string,
-	tags map[string]string,
-	fields map[string]interface{},
-	tm time.Time,
-	tp ...telegraf.ValueType,
-) telegraf.Metric {
-	m, err := metric.New(name, tags, fields, tm, tp...)
-	if err != nil {
-		panic(err)
-	}
-	return m
-}
-
 func TestRunningProcessor_Apply(t *testing.T) {
 	type args struct {
 		Processor telegraf.Processor
@@ -76,7 +62,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				},
 			},
 			input: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{},
 					map[string]interface{}{
@@ -86,7 +72,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				),
 			},
 			expected: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{
 						"apply": "true",
@@ -109,7 +95,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				},
 			},
 			input: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{},
 					map[string]interface{}{
@@ -119,7 +105,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				),
 			},
 			expected: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{
 						"apply": "true",
@@ -142,7 +128,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				},
 			},
 			input: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{},
 					map[string]interface{}{
@@ -152,7 +138,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				),
 			},
 			expected: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{},
 					map[string]interface{}{