Performance refactor of running_output buffers

closes #914 closes #967
2016-04-25 17:49:06 -06:00 · 2016-04-25 17:49:06 -06:00 · 4de75ce621
parent 1c4043ab39
commit 4de75ce621
7 changed files with 587 additions and 206 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,9 +2,18 @@
 ### Release Notes
 - New [agent] configuration option: `metric_batch_size`. This option tells
 telegraf the maximum batch size to allow to accumulate before sending a flush
 to the configured outputs. `metric_buffer_limit` now refers to the absolute
 maximum number of metrics that will accumulate before metrics are dropped.
 - There is no longer an option to
 `flush_buffer_when_full`, this is now the default and only behavior of telegraf.
 - **Breaking Change**: docker plugin tags. The cont_id tag no longer exists, it
 will now be a field, and be called container_id. Additionally, cont_image and
 cont_name are being renamed to container_image and container_name.
 - **Breaking Change**: docker plugin measurements. The `docker_cpu`, `docker_mem`,
 `docker_blkio` and `docker_net` measurements are being renamed to
 `docker_container_cpu`, `docker_container_mem`, `docker_container_blkio` and
@ -16,15 +25,19 @@ So adding "container" to each metric will:
 (1) make it more clear that these metrics are per-container, and
 (2) allow users to easily drop per-container metrics if cardinality is an
 issue (`namedrop = ["docker_container_*"]`)
 - `tagexclude` and `taginclude` are now available, which can be used to remove
 tags from measurements on inputs and outputs. See
 [the configuration doc](https://github.com/influxdata/telegraf/blob/master/docs/CONFIGURATION.md)
 for more details.
 - **Measurement filtering:** All measurement filters now match based on glob
 only. Previously there was an undocumented behavior where filters would match
 based on _prefix_ in addition to globs. This means that a filter like
 `fielddrop = ["time_"]` will need to be changed to `fielddrop = ["time_*"]`
 - **datadog**: measurement and field names will no longer have `_` replaced by `.`
 - The following plugins have changed their tags to _not_ overwrite the host tag:
  - cassandra: `host -> cassandra_host`
  - disque: `host -> disque_host`
@ -42,6 +55,8 @@ based on _prefix_ in addition to globs. This means that a filter like
 - [#1072](https://github.com/influxdata/telegraf/pull/1072): New Input Plugin: filestat.
 - [#1066](https://github.com/influxdata/telegraf/pull/1066): Replication lag metrics for MongoDB input plugin
 - [#1086](https://github.com/influxdata/telegraf/pull/1086): Ability to specify AWS keys in config file. Thanks @johnrengleman!
 - [#1096](https://github.com/influxdata/telegraf/pull/1096): Performance refactor of running output buffers.
 - [#967](https://github.com/influxdata/telegraf/issues/967): Buffer logging improvements.
 ### Bugfixes
@ -55,6 +70,7 @@ based on _prefix_ in addition to globs. This means that a filter like
 - [#1078](https://github.com/influxdata/telegraf/issues/1078): Use default AWS credential chain.
 - [#1070](https://github.com/influxdata/telegraf/issues/1070): SQL Server input. Fix datatype conversion.
 - [#1089](https://github.com/influxdata/telegraf/issues/1089): Fix leaky TCP connections in phpfpm plugin.
 - [#914](https://github.com/influxdata/telegraf/issues/914): Telegraf can drop metrics on full buffers.
 ## v0.12.1 [2016-04-14]
--- a/etc/telegraf.conf
+++ b/etc/telegraf.conf
@ -30,15 +30,13 @@
  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
  round_interval = true
-  ## Telegraf will send metrics to output in batch of at
+  ## Telegraf will send metrics to outputs in batches of at
  ## most metric_batch_size metrics.
  metric_batch_size = 1000
-  ## Telegraf will cache metric_buffer_limit metrics for each output, and will
+  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
-  ## flush this buffer on a successful write. This should be a multiple of
+  ## output, and will flush this buffer on a successful write. Oldest metrics
-  ## metric_batch_size and could not be less than 2 times metric_batch_size
+  ## are dropped first when this buffer fills.
  metric_buffer_limit = 10000
  ## Flush the buffer whenever full, regardless of flush_interval.
  flush_buffer_when_full = true
  ## Collection jitter is used to jitter the collection by a random amount.
  ## Each plugin will sleep for a random time within jitter before collecting.
@ -151,6 +149,15 @@
 #   ## Amazon REGION
 #   region = 'us-east-1'
 # 
 #   ## Amazon Credentials
 #   ## Credentials are loaded in the following order
 #   ## 1) explicit credentials from 'access_key' and 'secret_key'
 #   ## 2) environment variables
 #   ## 3) shared credentials file
 #   ## 4) EC2 Instance Profile
 #   #access_key = ""
 #   #secret_key = ""
 # 
 #   ## Namespace for the CloudWatch MetricDatums
 #   namespace = 'InfluxData/Telegraf'
@ -243,6 +250,16 @@
 # [[outputs.kinesis]]
 #   ## Amazon REGION of kinesis endpoint.
 #   region = "ap-southeast-2"
 # 
 #   ## Amazon Credentials
 #   ## Credentials are loaded in the following order
 #   ## 1) explicit credentials from 'access_key' and 'secret_key'
 #   ## 2) environment variables
 #   ## 3) shared credentials file
 #   ## 4) EC2 Instance Profile
 #   #access_key = ""
 #   #secret_key = ""
 # 
 #   ## Kinesis StreamName must exist prior to starting telegraf.
 #   streamname = "StreamName"
 #   ## PartitionKey as used for sharding data.
@ -457,6 +474,15 @@
 #   ## Amazon Region
 #   region = 'us-east-1'
 # 
 #   ## Amazon Credentials
 #   ## Credentials are loaded in the following order
 #   ## 1) explicit credentials from 'access_key' and 'secret_key'
 #   ## 2) environment variables
 #   ## 3) shared credentials file
 #   ## 4) EC2 Instance Profile
 #   #access_key = ""
 #   #secret_key = ""
 # 
 #   ## Requested CloudWatch aggregation Period (required - must be a multiple of 60s)
 #   period = '1m'
 # 
@ -588,8 +614,14 @@
 # [[inputs.filestat]]
 #   ## Files to gather stats about.
 #   ## These accept standard unix glob matching rules, but with the addition of
-#   ## ** as a "super asterisk". See https://github.com/gobwas/glob.
+#   ## ** as a "super asterisk". ie:
-#   files = ["/etc/telegraf/telegraf.conf", "/var/log/**.log"]
+#   ##   "/var/log/**.log"  -> recursively find all .log files in /var/log
 #   ##   "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
 #   ##   "/var/log/apache.log" -> just tail the apache log file
 #   ##
 #   ## See https://github.com/gobwas/glob for more examples
 #   ##
 #   files = ["/var/log/**.log"]
 #   ## If true, read the entire file and calculate an md5 checksum.
 #   md5 = false
@ -980,6 +1012,11 @@
 #   ## databases are gathered.
 #   ## databases = ["app_production", "testing"]
 #   #
 #   # outputaddress = "db01"
 #   ## A custom name for the database that will be used as the "server" tag in the
 #   ## measurement output. If not specified, a default one generated from
 #   ## the connection address is used.
 #   #
 #   ## Define the toml config where the sql queries are stored
 #   ## New queries can be added, if the withdbname is set to true and there is no
 #   ## databases defined in the 'databases field', the sql query is ended by a
@ -990,24 +1027,28 @@
 #   ## because the databases variable was set to ['postgres', 'pgbench' ] and the
 #   ## withdbname was true. Be careful that if the withdbname is set to false you
 #   ## don't have to define the where clause (aka with the dbname) the tagvalue
-#   ## field is used to define custom tags (separated by comas)
+#   ## field is used to define custom tags (separated by commas)
 #   ## The optional "measurement" value can be used to override the default
 #   ## output measurement name ("postgresql").
 #   #
 #   ## Structure :
 #   ## [[inputs.postgresql_extensible.query]]
 #   ##   sqlquery string
 #   ##   version string
 #   ##   withdbname boolean
-#   ##   tagvalue string (coma separated)
+#   ##   tagvalue string (comma separated)
 #   ##   measurement string
 #   [[inputs.postgresql_extensible.query]]
 #     sqlquery="SELECT * FROM pg_stat_database"
 #     version=901
 #     withdbname=false
 #     tagvalue=""
 #     measurement=""
 #   [[inputs.postgresql_extensible.query]]
 #     sqlquery="SELECT * FROM pg_stat_bgwriter"
 #     version=901
 #     withdbname=false
-#     tagvalue=""
+#     tagvalue="postgresql.stats"
 # # Read metrics from one or many PowerDNS servers
@ -1379,6 +1420,28 @@
 #   percentile_limit = 1000
 # # Stream a log file, like the tail -f command
 # [[inputs.tail]]
 #   ## files to tail.
 #   ## These accept standard unix glob matching rules, but with the addition of
 #   ## ** as a "super asterisk". ie:
 #   ##   "/var/log/**.log"  -> recursively find all .log files in /var/log
 #   ##   "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
 #   ##   "/var/log/apache.log" -> just tail the apache log file
 #   ##
 #   ## See https://github.com/gobwas/glob for more examples
 #   ##
 #   files = ["/var/mymetrics.out"]
 #   ## Read file from beginning.
 #   from_beginning = false
 # 
 #   ## Data format to consume.
 #   ## Each data format has it's own unique set of configuration options, read
 #   ## more about them here:
 #   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
 #   data_format = "influx"
 # # Generic TCP listener
 # [[inputs.tcp_listener]]
 #   ## Address and port to host TCP listener on
--- a/internal/buffer/buffer.go
+++ b/internal/buffer/buffer.go
@ -0,0 +1,77 @@
 package buffer
 import (
 	"github.com/influxdata/telegraf"
 )
 // Buffer is an object for storing metrics in a circular buffer.
 type Buffer struct {
 	buf chan telegraf.Metric
 	// total dropped metrics
 	drops int
 	// total metrics added
 	total int
 }
 // NewBuffer returns a Buffer
 //   size is the maximum number of metrics that Buffer will cache. If Add is
 //   called when the buffer is full, then the oldest metric(s) will be dropped.
 func NewBuffer(size int) *Buffer {
 	return &Buffer{
 		buf: make(chan telegraf.Metric, size),
 	}
 }
 // IsEmpty returns true if Buffer is empty.
 func (b *Buffer) IsEmpty() bool {
 	return len(b.buf) == 0
 }
 // Len returns the current length of the buffer.
 func (b *Buffer) Len() int {
 	return len(b.buf)
 }
 // Drops returns the total number of dropped metrics that have occured in this
 // buffer since instantiation.
 func (b *Buffer) Drops() int {
 	return b.drops
 }
 // Total returns the total number of metrics that have been added to this buffer.
 func (b *Buffer) Total() int {
 	return b.total
 }
 // Add adds metrics to the buffer.
 func (b *Buffer) Add(metrics ...telegraf.Metric) {
 	for i, _ := range metrics {
 		b.total++
 		select {
 		case b.buf <- metrics[i]:
 		default:
 			b.drops++
 			<-b.buf
 			b.buf <- metrics[i]
 		}
 	}
 }
 // Batch returns a batch of metrics of size batchSize.
 // the batch will be of maximum length batchSize. It can be less than batchSize,
 // if the length of Buffer is less than batchSize.
 func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
 	n := min(len(b.buf), batchSize)
 	out := make([]telegraf.Metric, n)
 	for i := 0; i < n; i++ {
 		out[i] = <-b.buf
 	}
 	return out
 }
 func min(a, b int) int {
 	if b < a {
 		return b
 	}
 	return a
 }
--- a/internal/buffer/buffer_test.go
+++ b/internal/buffer/buffer_test.go
@ -0,0 +1,94 @@
 package buffer
 import (
 	"testing"
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/testutil"
 	"github.com/stretchr/testify/assert"
 )
 var metricList = []telegraf.Metric{
 	testutil.TestMetric(2, "mymetric1"),
 	testutil.TestMetric(1, "mymetric2"),
 	testutil.TestMetric(11, "mymetric3"),
 	testutil.TestMetric(15, "mymetric4"),
 	testutil.TestMetric(8, "mymetric5"),
 }
 func BenchmarkAddMetrics(b *testing.B) {
 	buf := NewBuffer(10000)
 	m := testutil.TestMetric(1, "mymetric")
 	for n := 0; n < b.N; n++ {
 		buf.Add(m)
 	}
 }
 func TestNewBufferBasicFuncs(t *testing.T) {
 	b := NewBuffer(10)
 	assert.True(t, b.IsEmpty())
 	assert.Zero(t, b.Len())
 	assert.Zero(t, b.Drops())
 	assert.Zero(t, b.Total())
 	m := testutil.TestMetric(1, "mymetric")
 	b.Add(m)
 	assert.False(t, b.IsEmpty())
 	assert.Equal(t, b.Len(), 1)
 	assert.Equal(t, b.Drops(), 0)
 	assert.Equal(t, b.Total(), 1)
 	b.Add(metricList...)
 	assert.False(t, b.IsEmpty())
 	assert.Equal(t, b.Len(), 6)
 	assert.Equal(t, b.Drops(), 0)
 	assert.Equal(t, b.Total(), 6)
 }
 func TestDroppingMetrics(t *testing.T) {
 	b := NewBuffer(10)
 	// Add up to the size of the buffer
 	b.Add(metricList...)
 	b.Add(metricList...)
 	assert.False(t, b.IsEmpty())
 	assert.Equal(t, b.Len(), 10)
 	assert.Equal(t, b.Drops(), 0)
 	assert.Equal(t, b.Total(), 10)
 	// Add 5 more and verify they were dropped
 	b.Add(metricList...)
 	assert.False(t, b.IsEmpty())
 	assert.Equal(t, b.Len(), 10)
 	assert.Equal(t, b.Drops(), 5)
 	assert.Equal(t, b.Total(), 15)
 }
 func TestGettingBatches(t *testing.T) {
 	b := NewBuffer(20)
 	// Verify that the buffer returned is smaller than requested when there are
 	// not as many items as requested.
 	b.Add(metricList...)
 	batch := b.Batch(10)
 	assert.Len(t, batch, 5)
 	// Verify that the buffer is now empty
 	assert.True(t, b.IsEmpty())
 	assert.Zero(t, b.Len())
 	assert.Zero(t, b.Drops())
 	assert.Equal(t, b.Total(), 5)
 	// Verify that the buffer returned is not more than the size requested
 	b.Add(metricList...)
 	batch = b.Batch(3)
 	assert.Len(t, batch, 3)
 	// Verify that buffer is not empty
 	assert.False(t, b.IsEmpty())
 	assert.Equal(t, b.Len(), 2)
 	assert.Equal(t, b.Drops(), 0)
 	assert.Equal(t, b.Total(), 10)
 }
--- a/internal/config/config.go
+++ b/internal/config/config.go
@ -188,15 +188,13 @@ var header = `# Telegraf Configuration
  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
  round_interval = true
-  ## Telegraf will send metrics to output in batch of at
+  ## Telegraf will send metrics to outputs in batches of at
  ## most metric_batch_size metrics.
  metric_batch_size = 1000
-  ## Telegraf will cache metric_buffer_limit metrics for each output, and will
+  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
-  ## flush this buffer on a successful write. This should be a multiple of
+  ## output, and will flush this buffer on a successful write. Oldest metrics
-  ## metric_batch_size and could not be less than 2 times metric_batch_size
+  ## are dropped first when this buffer fills.
  metric_buffer_limit = 10000
  ## Flush the buffer whenever full, regardless of flush_interval.
  flush_buffer_when_full = true
  ## Collection jitter is used to jitter the collection by a random amount.
  ## Each plugin will sleep for a random time within jitter before collecting.
@ -535,14 +533,8 @@ func (c *Config) addOutput(name string, table *ast.Table) error {
 		return err
 	}
-	ro := internal_models.NewRunningOutput(name, output, outputConfig)
+	ro := internal_models.NewRunningOutput(name, output, outputConfig,
-	if c.Agent.MetricBatchSize > 0 {
+		c.Agent.MetricBatchSize, c.Agent.MetricBufferLimit)
 		ro.MetricBatchSize = c.Agent.MetricBatchSize
 	}
 	if c.Agent.MetricBufferLimit > 0 {
 		ro.MetricBufferLimit = c.Agent.MetricBufferLimit
 	}
 	ro.FlushBufferWhenFull = c.Agent.FlushBufferWhenFull
 	c.Outputs = append(c.Outputs, ro)
 	return nil
 }
--- a/internal/models/running_output.go
+++ b/internal/models/running_output.go
@ -2,14 +2,13 @@ package internal_models
 import (
 	"log"
 	"sync"
 	"time"
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/internal/buffer"
 )
 const (
 	// Default size of metrics batch size.
 	DEFAULT_METRIC_BATCH_SIZE = 1000
@ -17,40 +16,40 @@ const (
 	DEFAULT_METRIC_BUFFER_LIMIT = 10000
 )
-// tmpmetrics point to batch of metrics ready to be wrote to output.
+// RunningOutput contains the output configuration
 // readI point to the oldest batch of metrics (the first to sent to output). It
 // may point to nil value if tmpmetrics is empty.
 // writeI point to the next slot to buffer a batch of metrics is output fail to
 // write.
 type RunningOutput struct {
-	Name                string
+	Name              string
-	Output              telegraf.Output
+	Output            telegraf.Output
-	Config              *OutputConfig
+	Config            *OutputConfig
-	Quiet               bool
+	Quiet             bool
-	MetricBufferLimit   int
+	MetricBufferLimit int
-	MetricBatchSize     int
+	MetricBatchSize   int
 	FlushBufferWhenFull bool
-	metrics    []telegraf.Metric
+	metrics     *buffer.Buffer
-	tmpmetrics []([]telegraf.Metric)
+	failMetrics *buffer.Buffer
 	writeI     int
 	readI      int
 	sync.Mutex
 }
 func NewRunningOutput(
 	name string,
 	output telegraf.Output,
 	conf *OutputConfig,
 	batchSize int,
 	bufferLimit int,
 ) *RunningOutput {
 	if bufferLimit == 0 {
 		bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
 	}
 	if batchSize == 0 {
 		batchSize = DEFAULT_METRIC_BATCH_SIZE
 	}
 	ro := &RunningOutput{
 		Name:              name,
-		metrics:           make([]telegraf.Metric, 0),
+		metrics:           buffer.NewBuffer(batchSize),
 		failMetrics:       buffer.NewBuffer(bufferLimit),
 		Output:            output,
 		Config:            conf,
-		MetricBufferLimit: DEFAULT_METRIC_BUFFER_LIMIT,
+		MetricBufferLimit: bufferLimit,
-		MetricBatchSize:   DEFAULT_METRIC_BATCH_SIZE,
+		MetricBatchSize:   batchSize,
 	}
 	return ro
 }
@ -63,19 +62,6 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
 			return
 		}
 	}
 	ro.Lock()
 	defer ro.Unlock()
 	if ro.tmpmetrics == nil {
 		size := ro.MetricBufferLimit / ro.MetricBatchSize
 		// ro.metrics already contains one batch
 		size = size - 1
 		if size < 1 {
 			size = 1
 		}
 		ro.tmpmetrics = make([]([]telegraf.Metric), size)
 	}
 	// Filter any tagexclude/taginclude parameters before adding metric
 	if len(ro.Config.Filter.TagExclude) != 0 || len(ro.Config.Filter.TagInclude) != 0 {
@ -90,69 +76,64 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
 		metric, _ = telegraf.NewMetric(name, tags, fields, t)
 	}
-	if len(ro.metrics) < ro.MetricBatchSize {
+	ro.metrics.Add(metric)
-		ro.metrics = append(ro.metrics, metric)
+	if ro.metrics.Len() == ro.MetricBatchSize {
-	} else {
+		batch := ro.metrics.Batch(ro.MetricBatchSize)
-		flushSuccess := true
+		err := ro.write(batch)
-		if ro.FlushBufferWhenFull {
+		if err != nil {
-			err := ro.write(ro.metrics)
+			ro.failMetrics.Add(batch...)
 			if err != nil {
 				log.Printf("ERROR writing full metric buffer to output %s, %s",
 					ro.Name, err)
 				flushSuccess = false
 			}
 		} else {
 			flushSuccess = false
 		}
 		if !flushSuccess {
 			if ro.tmpmetrics[ro.writeI] != nil && ro.writeI == ro.readI {
 				log.Printf("WARNING: overwriting cached metrics, you may want to " +
 					"increase the metric_buffer_limit setting in your [agent] " +
 					"config if you do not wish to overwrite metrics.\n")
 				ro.readI = (ro.readI + 1) % cap(ro.tmpmetrics)
 			}
 			ro.tmpmetrics[ro.writeI] = ro.metrics
 			ro.writeI = (ro.writeI + 1) % cap(ro.tmpmetrics)
 		}
 		ro.metrics = make([]telegraf.Metric, 0)
 		ro.metrics = append(ro.metrics, metric)
 	}
 }
 // Write writes all cached points to this output.
 func (ro *RunningOutput) Write() error {
-	ro.Lock()
+	if !ro.Quiet {
-	defer ro.Unlock()
+		log.Printf("Output [%s] buffer fullness: %d / %d metrics. "+
-
+			"Total gathered metrics: %d. Total dropped metrics: %d.",
-	if ro.tmpmetrics == nil {
+			ro.Name,
-		size := ro.MetricBufferLimit / ro.MetricBatchSize
+			ro.failMetrics.Len()+ro.metrics.Len(),
-		// ro.metrics already contains one batch
+			ro.MetricBufferLimit,
-		size = size - 1
+			ro.metrics.Total(),
-
+			ro.metrics.Drops()+ro.failMetrics.Drops())
 		if size < 1 {
 			size = 1
 		}
 		ro.tmpmetrics = make([]([]telegraf.Metric), size)
 	}
-	// Write any cached metric buffers before, as those metrics are the
+	var err error
-	// oldest
+	if !ro.failMetrics.IsEmpty() {
-	for ro.tmpmetrics[ro.readI] != nil {
+		bufLen := ro.failMetrics.Len()
-		if err := ro.write(ro.tmpmetrics[ro.readI]); err != nil {
+		// how many batches of failed writes we need to write.
-			return err
+		nBatches := bufLen/ro.MetricBatchSize + 1
-		} else {
+		batchSize := ro.MetricBatchSize
-			ro.tmpmetrics[ro.readI] = nil
+
-			ro.readI = (ro.readI + 1) % cap(ro.tmpmetrics)
+		for i := 0; i < nBatches; i++ {
 			// If it's the last batch, only grab the metrics that have not had
 			// a write attempt already (this is primarily to preserve order).
 			if i == nBatches-1 {
 				batchSize = bufLen % ro.MetricBatchSize
 			}
 			batch := ro.failMetrics.Batch(batchSize)
 			// If we've already failed previous writes, don't bother trying to
 			// write to this output again. We are not exiting the loop just so
 			// that we can rotate the metrics to preserve order.
 			if err == nil {
 				err = ro.write(batch)
 			}
 			if err != nil {
 				ro.failMetrics.Add(batch...)
 			}
 		}
 	}
-	err := ro.write(ro.metrics)
+	batch := ro.metrics.Batch(ro.MetricBatchSize)
 	// see comment above about not trying to write to an already failed output.
 	// if ro.failMetrics is empty then err will always be nil at this point.
 	if err == nil {
 		err = ro.write(batch)
 	}
 	if err != nil {
 		ro.failMetrics.Add(batch...)
 		return err
 	} else {
 		ro.metrics = make([]telegraf.Metric, 0)
 	}
 	return nil
 }
@ -165,8 +146,8 @@ func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
 	elapsed := time.Since(start)
 	if err == nil {
 		if !ro.Quiet {
-			log.Printf("Wrote %d metrics to output %s in %s\n",
+			log.Printf("Output [%s] wrote batch of %d metrics in %s\n",
-				len(metrics), ro.Name, elapsed)
+				ro.Name, len(metrics), elapsed)
 		}
 	}
 	return err
--- a/internal/models/running_output_test.go
+++ b/internal/models/running_output_test.go
@ -2,7 +2,6 @@ package internal_models
 import (
 	"fmt"
 	"sort"
 	"sync"
 	"testing"
@ -29,6 +28,62 @@ var next5 = []telegraf.Metric{
 	testutil.TestMetric(101, "metric10"),
 }
 // Benchmark adding metrics.
 func BenchmarkRunningOutputAddWrite(b *testing.B) {
 	conf := &OutputConfig{
 		Filter: Filter{
 			IsActive: false,
 		},
 	}
 	m := &perfOutput{}
 	ro := NewRunningOutput("test", m, conf, 1000, 10000)
 	ro.Quiet = true
 	for n := 0; n < b.N; n++ {
 		ro.AddMetric(first5[0])
 		ro.Write()
 	}
 }
 // Benchmark adding metrics.
 func BenchmarkRunningOutputAddWriteEvery100(b *testing.B) {
 	conf := &OutputConfig{
 		Filter: Filter{
 			IsActive: false,
 		},
 	}
 	m := &perfOutput{}
 	ro := NewRunningOutput("test", m, conf, 1000, 10000)
 	ro.Quiet = true
 	for n := 0; n < b.N; n++ {
 		ro.AddMetric(first5[0])
 		if n%100 == 0 {
 			ro.Write()
 		}
 	}
 }
 // Benchmark adding metrics.
 func BenchmarkRunningOutputAddFailWrites(b *testing.B) {
 	conf := &OutputConfig{
 		Filter: Filter{
 			IsActive: false,
 		},
 	}
 	m := &perfOutput{}
 	m.failWrite = true
 	ro := NewRunningOutput("test", m, conf, 1000, 10000)
 	ro.Quiet = true
 	for n := 0; n < b.N; n++ {
 		ro.AddMetric(first5[0])
 	}
 }
 // Test that NameDrop filters ger properly applied.
 func TestRunningOutput_DropFilter(t *testing.T) {
 	conf := &OutputConfig{
@ -40,7 +95,7 @@ func TestRunningOutput_DropFilter(t *testing.T) {
 	assert.NoError(t, conf.Filter.CompileFilter())
 	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf)
+	ro := NewRunningOutput("test", m, conf, 1000, 10000)
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
@ -66,7 +121,7 @@ func TestRunningOutput_PassFilter(t *testing.T) {
 	assert.NoError(t, conf.Filter.CompileFilter())
 	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf)
+	ro := NewRunningOutput("test", m, conf, 1000, 10000)
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
@ -92,7 +147,7 @@ func TestRunningOutput_TagIncludeNoMatch(t *testing.T) {
 	assert.NoError(t, conf.Filter.CompileFilter())
 	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf)
+	ro := NewRunningOutput("test", m, conf, 1000, 10000)
 	ro.AddMetric(first5[0])
 	assert.Len(t, m.Metrics(), 0)
@ -114,7 +169,7 @@ func TestRunningOutput_TagExcludeMatch(t *testing.T) {
 	assert.NoError(t, conf.Filter.CompileFilter())
 	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf)
+	ro := NewRunningOutput("test", m, conf, 1000, 10000)
 	ro.AddMetric(first5[0])
 	assert.Len(t, m.Metrics(), 0)
@ -136,7 +191,7 @@ func TestRunningOutput_TagExcludeNoMatch(t *testing.T) {
 	assert.NoError(t, conf.Filter.CompileFilter())
 	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf)
+	ro := NewRunningOutput("test", m, conf, 1000, 10000)
 	ro.AddMetric(first5[0])
 	assert.Len(t, m.Metrics(), 0)
@ -158,7 +213,7 @@ func TestRunningOutput_TagIncludeMatch(t *testing.T) {
 	assert.NoError(t, conf.Filter.CompileFilter())
 	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf)
+	ro := NewRunningOutput("test", m, conf, 1000, 10000)
 	ro.AddMetric(first5[0])
 	assert.Len(t, m.Metrics(), 0)
@ -178,7 +233,7 @@ func TestRunningOutputDefault(t *testing.T) {
 	}
 	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf)
+	ro := NewRunningOutput("test", m, conf, 1000, 10000)
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
@ -193,77 +248,6 @@ func TestRunningOutputDefault(t *testing.T) {
 	assert.Len(t, m.Metrics(), 10)
 }
 // Test that the first metrics batch gets overwritten if there is a buffer overflow.
 func TestRunningOutputOverwrite(t *testing.T) {
 	conf := &OutputConfig{
 		Filter: Filter{
 			IsActive: false,
 		},
 	}
 	m := &mockOutput{}
 	ro := NewRunningOutput("test", m, conf)
 	ro.MetricBatchSize = 1
 	ro.MetricBufferLimit = 4
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
 	}
 	require.Len(t, m.Metrics(), 0)
 	err := ro.Write()
 	require.NoError(t, err)
 	require.Len(t, m.Metrics(), 4)
 	var expected, actual []string
 	for i, exp := range first5[1:] {
 		expected = append(expected, exp.String())
 		actual = append(actual, m.Metrics()[i].String())
 	}
 	sort.Strings(expected)
 	sort.Strings(actual)
 	assert.Equal(t, expected, actual)
 }
 // Test that multiple buffer overflows are handled properly.
 func TestRunningOutputMultiOverwrite(t *testing.T) {
 	conf := &OutputConfig{
 		Filter: Filter{
 			IsActive: false,
 		},
 	}
 	m := &mockOutput{}
 	ro := NewRunningOutput("test", m, conf)
 	ro.MetricBatchSize = 1
 	ro.MetricBufferLimit = 3
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
 	}
 	for _, metric := range next5 {
 		ro.AddMetric(metric)
 	}
 	require.Len(t, m.Metrics(), 0)
 	err := ro.Write()
 	require.NoError(t, err)
 	require.Len(t, m.Metrics(), 3)
 	var expected, actual []string
 	for i, exp := range next5[2:] {
 		expected = append(expected, exp.String())
 		actual = append(actual, m.Metrics()[i].String())
 	}
 	sort.Strings(expected)
 	sort.Strings(actual)
 	assert.Equal(t, expected, actual)
 }
 // Test that running output doesn't flush until it's full when
 // FlushBufferWhenFull is set.
 func TestRunningOutputFlushWhenFull(t *testing.T) {
@ -274,12 +258,9 @@ func TestRunningOutputFlushWhenFull(t *testing.T) {
 	}
 	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf)
+	ro := NewRunningOutput("test", m, conf, 6, 10)
 	ro.FlushBufferWhenFull = true
 	ro.MetricBatchSize = 5
 	ro.MetricBufferLimit = 10
-	// Fill buffer to limit
+	// Fill buffer to 1 under limit
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
 	}
@ -289,7 +270,7 @@ func TestRunningOutputFlushWhenFull(t *testing.T) {
 	// add one more metric
 	ro.AddMetric(next5[0])
 	// now it flushed
-	assert.Len(t, m.Metrics(), 5)
+	assert.Len(t, m.Metrics(), 6)
 	// add one more metric and write it manually
 	ro.AddMetric(next5[1])
@ -308,10 +289,7 @@ func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
 	}
 	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf)
+	ro := NewRunningOutput("test", m, conf, 4, 12)
 	ro.FlushBufferWhenFull = true
 	ro.MetricBatchSize = 4
 	ro.MetricBufferLimit = 12
 	// Fill buffer past limit twive
 	for _, metric := range first5 {
@ -333,12 +311,9 @@ func TestRunningOutputWriteFail(t *testing.T) {
 	m := &mockOutput{}
 	m.failWrite = true
-	ro := NewRunningOutput("test", m, conf)
+	ro := NewRunningOutput("test", m, conf, 4, 12)
 	ro.FlushBufferWhenFull = true
 	ro.MetricBatchSize = 4
 	ro.MetricBufferLimit = 12
-	// Fill buffer past limit twice
+	// Fill buffer to limit twice
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
 	}
@ -361,6 +336,161 @@ func TestRunningOutputWriteFail(t *testing.T) {
 	assert.Len(t, m.Metrics(), 10)
 }
 // Verify that the order of points is preserved during a write failure.
 func TestRunningOutputWriteFailOrder(t *testing.T) {
 	conf := &OutputConfig{
 		Filter: Filter{
 			IsActive: false,
 		},
 	}
 	m := &mockOutput{}
 	m.failWrite = true
 	ro := NewRunningOutput("test", m, conf, 100, 1000)
 	// add 5 metrics
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
 	}
 	// no successful flush yet
 	assert.Len(t, m.Metrics(), 0)
 	// Write fails
 	err := ro.Write()
 	require.Error(t, err)
 	// no successful flush yet
 	assert.Len(t, m.Metrics(), 0)
 	m.failWrite = false
 	// add 5 more metrics
 	for _, metric := range next5 {
 		ro.AddMetric(metric)
 	}
 	err = ro.Write()
 	require.NoError(t, err)
 	// Verify that 10 metrics were written
 	assert.Len(t, m.Metrics(), 10)
 	// Verify that they are in order
 	expected := append(first5, next5...)
 	assert.Equal(t, expected, m.Metrics())
 }
 // Verify that the order of points is preserved during many write failures.
 func TestRunningOutputWriteFailOrder2(t *testing.T) {
 	conf := &OutputConfig{
 		Filter: Filter{
 			IsActive: false,
 		},
 	}
 	m := &mockOutput{}
 	m.failWrite = true
 	ro := NewRunningOutput("test", m, conf, 5, 100)
 	// add 5 metrics
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
 	}
 	// Write fails
 	err := ro.Write()
 	require.Error(t, err)
 	// no successful flush yet
 	assert.Len(t, m.Metrics(), 0)
 	// add 5 metrics
 	for _, metric := range next5 {
 		ro.AddMetric(metric)
 	}
 	// Write fails
 	err = ro.Write()
 	require.Error(t, err)
 	// no successful flush yet
 	assert.Len(t, m.Metrics(), 0)
 	// add 5 metrics
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
 	}
 	// Write fails
 	err = ro.Write()
 	require.Error(t, err)
 	// no successful flush yet
 	assert.Len(t, m.Metrics(), 0)
 	// add 5 metrics
 	for _, metric := range next5 {
 		ro.AddMetric(metric)
 	}
 	// Write fails
 	err = ro.Write()
 	require.Error(t, err)
 	// no successful flush yet
 	assert.Len(t, m.Metrics(), 0)
 	m.failWrite = false
 	err = ro.Write()
 	require.NoError(t, err)
 	// Verify that 10 metrics were written
 	assert.Len(t, m.Metrics(), 20)
 	// Verify that they are in order
 	expected := append(first5, next5...)
 	expected = append(expected, first5...)
 	expected = append(expected, next5...)
 	assert.Equal(t, expected, m.Metrics())
 }
 // Verify that the order of points is preserved when there is a remainder
 // of points for the batch.
 //
 // ie, with a batch size of 5:
 //
 //     1 2 3 4 5 6 <-- order, failed points
 //     6 1 2 3 4 5 <-- order, after 1st write failure (1 2 3 4 5 was batch)
 //     1 2 3 4 5 6 <-- order, after 2nd write failure, (6 was batch)
 //
 func TestRunningOutputWriteFailOrder3(t *testing.T) {
 	conf := &OutputConfig{
 		Filter: Filter{
 			IsActive: false,
 		},
 	}
 	m := &mockOutput{}
 	m.failWrite = true
 	ro := NewRunningOutput("test", m, conf, 5, 1000)
 	// add 5 metrics
 	for _, metric := range first5 {
 		ro.AddMetric(metric)
 	}
 	// no successful flush yet
 	assert.Len(t, m.Metrics(), 0)
 	// Write fails
 	err := ro.Write()
 	require.Error(t, err)
 	// no successful flush yet
 	assert.Len(t, m.Metrics(), 0)
 	// add and attempt to write a single metric:
 	ro.AddMetric(next5[0])
 	err = ro.Write()
 	require.Error(t, err)
 	// unset fail and write metrics
 	m.failWrite = false
 	err = ro.Write()
 	require.NoError(t, err)
 	// Verify that 6 metrics were written
 	assert.Len(t, m.Metrics(), 6)
 	// Verify that they are in order
 	expected := append(first5, next5[0])
 	assert.Equal(t, expected, m.Metrics())
 }
 type mockOutput struct {
 	sync.Mutex
@ -408,3 +538,31 @@ func (m *mockOutput) Metrics() []telegraf.Metric {
 	defer m.Unlock()
 	return m.metrics
 }
 type perfOutput struct {
 	// if true, mock a write failure
 	failWrite bool
 }
 func (m *perfOutput) Connect() error {
 	return nil
 }
 func (m *perfOutput) Close() error {
 	return nil
 }
 func (m *perfOutput) Description() string {
 	return ""
 }
 func (m *perfOutput) SampleConfig() string {
 	return ""
 }
 func (m *perfOutput) Write(metrics []telegraf.Metric) error {
 	if m.failWrite {
 		return fmt.Errorf("Failed Write!")
 	}
 	return nil
 }