Remove outputs blocking inputs when output is slow (#4938)

2018-11-05 13:34:28 -08:00 · 2018-11-05 13:34:28 -08:00 · 6e5c2f8bb6
parent 74667cd681
commit 6e5c2f8bb6
59 changed files with 3615 additions and 2189 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -1,489 +1,52 @@
-## Steps for Contributing:
+### Contributing

-1. [Sign the CLA](http://influxdb.com/community/cla.html)
-1. Make changes or write plugin (see below for details)
-1. Add your plugin to one of: `plugins/{inputs,outputs,aggregators,processors}/all/all.go`
-1. If your plugin requires a new Go package,
-[add it](https://github.com/influxdata/telegraf/blob/master/CONTRIBUTING.md#adding-a-dependency)
-1. Write a README for your plugin, if it's an input plugin, it should be structured
-like the [input example here](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/EXAMPLE_README.md).
-Output plugins READMEs are less structured,
-but any information you can provide on how the data will look is appreciated.
-See the [OpenTSDB output](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/opentsdb)
-for a good example.
-1. **Optional:** Help users of your plugin by including example queries for populating dashboards. Include these sample queries in the `README.md` for the plugin.
-1. **Optional:** Write a [tickscript](https://docs.influxdata.com/kapacitor/v1.0/tick/syntax/) for your plugin and add it to [Kapacitor](https://github.com/influxdata/kapacitor/tree/master/examples/telegraf).
+1. [Sign the CLA][cla].
+1. Open a [new issue][] to discuss the changes you would like to make.  This is
+   not strictly required but it may help reduce the amount of rework you need
+   to do later.
+1. Make changes or write plugin using the guidelines in the following
+   documents:
+   - [Input Plugins][inputs]
+   - [Processor Plugins][processors]
+   - [Aggregator Plugins][aggregators]
+   - [Output Plugins][outputs]
+1. Ensure you have added proper unit tests and documentation.
+1. Open a new [pull request][].

-## GoDoc
+### GoDoc

 Public interfaces for inputs, outputs, processors, aggregators, metrics,
-and the accumulator can be found on the GoDoc
+and the accumulator can be found in the GoDoc:

 [![GoDoc](https://godoc.org/github.com/influxdata/telegraf?status.svg)](https://godoc.org/github.com/influxdata/telegraf)

-## Sign the CLA
+### Common development tasks

-Before we can merge a pull request, you will need to sign the CLA,
-which can be found [on our website](http://influxdb.com/community/cla.html)
-
-## Adding a dependency
+**Adding a dependency:**

 Assuming you can already build the project, run these in the telegraf directory:

 1. `dep ensure -vendor-only`
 2. `dep ensure -add github.com/[dependency]/[new-package]`

-## Input Plugins
-
-This section is for developers who want to create new collection inputs.
-Telegraf is entirely plugin driven. This interface allows for operators to
-pick and chose what is gathered and makes it easy for developers
-to create new ways of generating metrics.
-
-Plugin authorship is kept as simple as possible to promote people to develop
-and submit new inputs.
-
-### Input Plugin Guidelines
-
-* A plugin must conform to the [`telegraf.Input`](https://godoc.org/github.com/influxdata/telegraf#Input) interface.
-* Input Plugins should call `inputs.Add` in their `init` function to register themselves.
-See below for a quick example.
-* Input Plugins must be added to the
-`github.com/influxdata/telegraf/plugins/inputs/all/all.go` file.
-* The `SampleConfig` function should return valid toml that describes how the
-plugin can be configured. This is included in `telegraf config`.  Please
-consult the [SampleConfig](https://github.com/influxdata/telegraf/wiki/SampleConfig)
-page for the latest style guidelines.
-* The `Description` function should say in one line what this plugin does.
-
-Let's say you've written a plugin that emits metrics about processes on the
-current host.
-
-### Input Plugin Example
-
-```go
-package simple
-
-// simple.go
-
-import (
-    "github.com/influxdata/telegraf"
-    "github.com/influxdata/telegraf/plugins/inputs"
-)
-
-type Simple struct {
-    Ok bool
-}
-
-func (s *Simple) Description() string {
-    return "a demo plugin"
-}
-
-func (s *Simple) SampleConfig() string {
-    return `
-  ## Indicate if everything is fine
-  ok = true
-`
-}
-
-func (s *Simple) Gather(acc telegraf.Accumulator) error {
-    if s.Ok {
-        acc.AddFields("state", map[string]interface{}{"value": "pretty good"}, nil)
-    } else {
-        acc.AddFields("state", map[string]interface{}{"value": "not great"}, nil)
-    }
-
-    return nil
-}
-
-func init() {
-    inputs.Add("simple", func() telegraf.Input { return &Simple{} })
-}
-```
-
-### Input Plugin Development
-
-* Run `make static` followed by `make plugin-[pluginName]` to spin up a docker dev environment
-using docker-compose.
-* ***[Optional]*** When developing a plugin, add a `dev` directory with a `docker-compose.yml` and `telegraf.conf`
-as well as any other supporting files, where sensible.
-
-## Adding Typed Metrics
-
-In addition the the `AddFields` function, the accumulator also supports an
-`AddGauge` and `AddCounter` function. These functions are for adding _typed_
-metrics. Metric types are ignored for the InfluxDB output, but can be used
-for other outputs, such as [prometheus](https://prometheus.io/docs/concepts/metric_types/).
-
-## Input Plugins Accepting Arbitrary Data Formats
-
-Some input plugins (such as
-[exec](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec))
-accept arbitrary input data formats. An overview of these data formats can
-be found
-[here](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
-
-In order to enable this, you must specify a `SetParser(parser parsers.Parser)`
-function on the plugin object (see the exec plugin for an example), as well as
-defining `parser` as a field of the object.
-
-You can then utilize the parser internally in your plugin, parsing data as you
-see fit. Telegraf's configuration layer will take care of instantiating and
-creating the `Parser` object.
-
-You should also add the following to your SampleConfig() return:
-
-```toml
-  ## Data format to consume.
-  ## Each data format has its own unique set of configuration options, read
-  ## more about them here:
-  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
-  data_format = "influx"
-```
-
-Below is the `Parser` interface.
-
-```go
-// Parser is an interface defining functions that a parser plugin must satisfy.
-type Parser interface {
-    // Parse takes a byte buffer separated by newlines
-    // ie, `cpu.usage.idle 90\ncpu.usage.busy 10`
-    // and parses it into telegraf metrics
-    Parse(buf []byte) ([]telegraf.Metric, error)
-
-    // ParseLine takes a single string metric
-    // ie, "cpu.usage.idle 90"
-    // and parses it into a telegraf metric.
-    ParseLine(line string) (telegraf.Metric, error)
-}
-```
-
-And you can view the code
-[here.](https://github.com/influxdata/telegraf/blob/henrypfhu-master/plugins/parsers/registry.go)
-
-## Service Input Plugins
-
-This section is for developers who want to create new "service" collection
-inputs. A service plugin differs from a regular plugin in that it operates
-a background service while Telegraf is running. One example would be the `statsd`
-plugin, which operates a statsd server.
-
-Service Input Plugins are substantially more complicated than a regular plugin, as they
-will require threads and locks to verify data integrity. Service Input Plugins should
-be avoided unless there is no way to create their behavior with a regular plugin.
-
-Their interface is quite similar to a regular plugin, with the addition of `Start()`
-and `Stop()` methods.
-
-### Service Plugin Guidelines
-
-* Same as the `Plugin` guidelines, except that they must conform to the
-[`telegraf.ServiceInput`](https://godoc.org/github.com/influxdata/telegraf#ServiceInput) interface.
-
-## Output Plugins
-
-This section is for developers who want to create a new output sink. Outputs
-are created in a similar manner as collection plugins, and their interface has
-similar constructs.
-
-### Output Plugin Guidelines
-
-* An output must conform to the [`telegraf.Output`](https://godoc.org/github.com/influxdata/telegraf#Output) interface.
-* Outputs should call `outputs.Add` in their `init` function to register themselves.
-See below for a quick example.
-* To be available within Telegraf itself, plugins must add themselves to the
-`github.com/influxdata/telegraf/plugins/outputs/all/all.go` file.
-* The `SampleConfig` function should return valid toml that describes how the
-plugin can be configured. This is included in `telegraf config`.  Please
-consult the [SampleConfig](https://github.com/influxdata/telegraf/wiki/SampleConfig)
-page for the latest style guidelines.
-* The `Description` function should say in one line what this output does.
-
-### Output Example
-
-```go
-package simpleoutput
-
-// simpleoutput.go
-
-import (
-    "github.com/influxdata/telegraf"
-    "github.com/influxdata/telegraf/plugins/outputs"
-)
-
-type Simple struct {
-    Ok bool
-}
-
-func (s *Simple) Description() string {
-    return "a demo output"
-}
-
-func (s *Simple) SampleConfig() string {
-    return `
-  ok = true
-`
-}
-
-func (s *Simple) Connect() error {
-    // Make a connection to the URL here
-    return nil
-}
-
-func (s *Simple) Close() error {
-    // Close connection to the URL here
-    return nil
-}
-
-func (s *Simple) Write(metrics []telegraf.Metric) error {
-    for _, metric := range metrics {
-        // write `metric` to the output sink here
-    }
-    return nil
-}
-
-func init() {
-    outputs.Add("simpleoutput", func() telegraf.Output { return &Simple{} })
-}
-
-```
-
-## Output Plugins Writing Arbitrary Data Formats
-
-Some output plugins (such as
-[file](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/file))
-can write arbitrary output data formats. An overview of these data formats can
-be found
-[here](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md).
-
-In order to enable this, you must specify a
-`SetSerializer(serializer serializers.Serializer)`
-function on the plugin object (see the file plugin for an example), as well as
-defining `serializer` as a field of the object.
-
-You can then utilize the serializer internally in your plugin, serializing data
-before it's written. Telegraf's configuration layer will take care of
-instantiating and creating the `Serializer` object.
-
-You should also add the following to your SampleConfig() return:
-
-```toml
-  ## Data format to output.
-  ## Each data format has its own unique set of configuration options, read
-  ## more about them here:
-  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
-  data_format = "influx"
-```
-
-## Service Output Plugins
-
-This section is for developers who want to create new "service" output. A
-service output differs from a regular output in that it operates a background service
-while Telegraf is running. One example would be the `prometheus_client` output,
-which operates an HTTP server.
-
-Their interface is quite similar to a regular output, with the addition of `Start()`
-and `Stop()` methods.
-
-### Service Output Guidelines
-
-* Same as the `Output` guidelines, except that they must conform to the
-`output.ServiceOutput` interface.
-
-## Processor Plugins
-
-This section is for developers who want to create a new processor plugin.
-
-### Processor Plugin Guidelines
-
-* A processor must conform to the [`telegraf.Processor`](https://godoc.org/github.com/influxdata/telegraf#Processor) interface.
-* Processors should call `processors.Add` in their `init` function to register themselves.
-See below for a quick example.
-* To be available within Telegraf itself, plugins must add themselves to the
-`github.com/influxdata/telegraf/plugins/processors/all/all.go` file.
-* The `SampleConfig` function should return valid toml that describes how the
-processor can be configured. This is include in the output of `telegraf config`.
-* The `Description` function should say in one line what this processor does.
-
-### Processor Example
-
-```go
-package printer
-
-// printer.go
-
-import (
-	"fmt"
-
-	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/plugins/processors"
-)
-
-type Printer struct {
-}
-
-var sampleConfig = `
-`
-
-func (p *Printer) SampleConfig() string {
-	return sampleConfig
-}
-
-func (p *Printer) Description() string {
-	return "Print all metrics that pass through this filter."
-}
-
-func (p *Printer) Apply(in ...telegraf.Metric) []telegraf.Metric {
-	for _, metric := range in {
-		fmt.Println(metric.String())
-	}
-	return in
-}
-
-func init() {
-	processors.Add("printer", func() telegraf.Processor {
-		return &Printer{}
-	})
-}
-```
-
-## Aggregator Plugins
-
-This section is for developers who want to create a new aggregator plugin.
-
-### Aggregator Plugin Guidelines
-
-* A aggregator must conform to the [`telegraf.Aggregator`](https://godoc.org/github.com/influxdata/telegraf#Aggregator) interface.
-* Aggregators should call `aggregators.Add` in their `init` function to register themselves.
-See below for a quick example.
-* To be available within Telegraf itself, plugins must add themselves to the
-`github.com/influxdata/telegraf/plugins/aggregators/all/all.go` file.
-* The `SampleConfig` function should return valid toml that describes how the
-aggregator can be configured. This is include in `telegraf config`.
-* The `Description` function should say in one line what this aggregator does.
-* The Aggregator plugin will need to keep caches of metrics that have passed
-through it. This should be done using the builtin `HashID()` function of each
-metric.
-* When the `Reset()` function is called, all caches should be cleared.
-
-### Aggregator Example
-
-```go
-package min
-
-// min.go
-
-import (
-	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/plugins/aggregators"
-)
-
-type Min struct {
-	// caches for metric fields, names, and tags
-	fieldCache map[uint64]map[string]float64
-	nameCache  map[uint64]string
-	tagCache   map[uint64]map[string]string
-}
-
-func NewMin() telegraf.Aggregator {
-	m := &Min{}
-	m.Reset()
-	return m
-}
-
-var sampleConfig = `
-  ## period is the flush & clear interval of the aggregator.
-  period = "30s"
-  ## If true drop_original will drop the original metrics and
-  ## only send aggregates.
-  drop_original = false
-`
-
-func (m *Min) SampleConfig() string {
-	return sampleConfig
-}
-
-func (m *Min) Description() string {
-	return "Keep the aggregate min of each metric passing through."
-}
-
-func (m *Min) Add(in telegraf.Metric) {
-	id := in.HashID()
-	if _, ok := m.nameCache[id]; !ok {
-		// hit an uncached metric, create caches for first time:
-		m.nameCache[id] = in.Name()
-		m.tagCache[id] = in.Tags()
-		m.fieldCache[id] = make(map[string]float64)
-		for k, v := range in.Fields() {
-			if fv, ok := convert(v); ok {
-				m.fieldCache[id][k] = fv
-			}
-		}
-	} else {
-		for k, v := range in.Fields() {
-			if fv, ok := convert(v); ok {
-				if _, ok := m.fieldCache[id][k]; !ok {
-					// hit an uncached field of a cached metric
-					m.fieldCache[id][k] = fv
-					continue
-				}
-				if fv < m.fieldCache[id][k] {
-                    // set new minimum
-					m.fieldCache[id][k] = fv
-				}
-			}
-		}
-	}
-}
-
-func (m *Min) Push(acc telegraf.Accumulator) {
-	for id, _ := range m.nameCache {
-		fields := map[string]interface{}{}
-		for k, v := range m.fieldCache[id] {
-			fields[k+"_min"] = v
-		}
-		acc.AddFields(m.nameCache[id], fields, m.tagCache[id])
-	}
-}
-
-func (m *Min) Reset() {
-	m.fieldCache = make(map[uint64]map[string]float64)
-	m.nameCache = make(map[uint64]string)
-	m.tagCache = make(map[uint64]map[string]string)
-}
-
-func convert(in interface{}) (float64, bool) {
-	switch v := in.(type) {
-	case float64:
-		return v, true
-	case int64:
-		return float64(v), true
-	default:
-		return 0, false
-	}
-}
-
-func init() {
-	aggregators.Add("min", func() telegraf.Aggregator {
-		return NewMin()
-	})
-}
-```
-
-## Unit Tests
+**Unit Tests:**

 Before opening a pull request you should run the linter checks and
 the short tests.

-### Execute linter
+**Run static analysis:**

-execute `make check`
+```
+make check
+```

-### Execute short tests
+**Run short tests:**

-execute `make test`
+```
+make test
+```

-### Execute integration tests
+**Execute integration tests:**

 Running the integration tests requires several docker containers to be
 running.  You can start the containers with:
@ -497,3 +60,12 @@ make test-all
 ```

 Use `make docker-kill` to stop the containers.
+
+
+[cla]: https://www.influxdata.com/legal/cla/
+[new issue]: https://github.com/influxdata/telegraf/issues/new/choose
+[pull request]: https://github.com/influxdata/telegraf/compare
+[inputs]: /docs/INPUTS.md
+[processors]: /docs/PROCESSORS.md
+[aggregators]: /docs/AGGREGATORS.md
+[outputs]: /docs/OUTPUTS.md
--- a/accumulator.go
+++ b/accumulator.go
@ -1,16 +1,14 @@
 package telegraf

-import "time"
+import (
+	"time"
+)

-// Accumulator is an interface for "accumulating" metrics from plugin(s).
-// The metrics are sent down a channel shared between all plugins.
+// Accumulator allows adding metrics to the processing flow.
 type Accumulator interface {
 	// AddFields adds a metric to the accumulator with the given measurement
 	// name, fields, and tags (and timestamp). If a timestamp is not provided,
 	// then the accumulator sets it to "now".
-	// Create a point with a value, decorating it with tags
-	// NOTE: tags is expected to be owned by the caller, don't mutate
-	// it after passing to Add.
 	AddFields(measurement string,
 		fields map[string]interface{},
 		tags map[string]string,
@ -40,7 +38,49 @@ type Accumulator interface {
 		tags map[string]string,
 		t ...time.Time)

+	// AddMetric adds an metric to the accumulator.
+	AddMetric(Metric)
+
+	// SetPrecision takes two time.Duration objects. If the first is non-zero,
+	// it sets that as the precision. Otherwise, it takes the second argument
+	// as the order of time that the metrics should be rounded to, with the
+	// maximum being 1s.
 	SetPrecision(precision, interval time.Duration)

+	// Report an error.
 	AddError(err error)
+
+	// Upgrade to a TrackingAccumulator with space for maxTracked
+	// metrics/batches.
+	WithTracking(maxTracked int) TrackingAccumulator
+}
+
+// TrackingID uniquely identifies a tracked metric group
+type TrackingID uint64
+
+// DeliveryInfo provides the results of a delivered metric group.
+type DeliveryInfo interface {
+	// ID is the TrackingID
+	ID() TrackingID
+
+	// Delivered returns true if the metric was processed successfully.
+	Delivered() bool
+}
+
+// TrackingAccumulator is an Accumulator that provides a signal when the
+// metric has been fully processed.  Sending more metrics than the accumulator
+// has been allocated for without reading status from the Accepted or Rejected
+// channels is an error.
+type TrackingAccumulator interface {
+	Accumulator
+
+	// Add the Metric and arrange for tracking feedback after processing..
+	AddTrackingMetric(m Metric) TrackingID
+
+	// Add a group of Metrics and arrange for a signal when the group has been
+	// processed.
+	AddTrackingMetricGroup(group []Metric) TrackingID
+
+	// Delivered returns a channel that will contain the tracking results.
+	Delivered() <-chan DeliveryInfo
 }
--- a/agent/accumulator.go
+++ b/agent/accumulator.go
@ -20,13 +20,13 @@ type MetricMaker interface {

 type accumulator struct {
 	maker     MetricMaker
-	metrics   chan telegraf.Metric
+	metrics   chan<- telegraf.Metric
 	precision time.Duration
 }

 func NewAccumulator(
 	maker MetricMaker,
-	metrics chan telegraf.Metric,
+	metrics chan<- telegraf.Metric,
 ) telegraf.Accumulator {
 	acc := accumulator{
 		maker:     maker,
@ -42,7 +42,7 @@ func (ac *accumulator) AddFields(
 	tags map[string]string,
 	t ...time.Time,
 ) {
-	ac.addMetric(measurement, tags, fields, telegraf.Untyped, t...)
+	ac.addFields(measurement, tags, fields, telegraf.Untyped, t...)
 }

 func (ac *accumulator) AddGauge(
@ -51,7 +51,7 @@ func (ac *accumulator) AddGauge(
 	tags map[string]string,
 	t ...time.Time,
 ) {
-	ac.addMetric(measurement, tags, fields, telegraf.Gauge, t...)
+	ac.addFields(measurement, tags, fields, telegraf.Gauge, t...)
 }

 func (ac *accumulator) AddCounter(
@ -60,7 +60,7 @@ func (ac *accumulator) AddCounter(
 	tags map[string]string,
 	t ...time.Time,
 ) {
-	ac.addMetric(measurement, tags, fields, telegraf.Counter, t...)
+	ac.addFields(measurement, tags, fields, telegraf.Counter, t...)
 }

 func (ac *accumulator) AddSummary(
@ -69,7 +69,7 @@ func (ac *accumulator) AddSummary(
 	tags map[string]string,
 	t ...time.Time,
 ) {
-	ac.addMetric(measurement, tags, fields, telegraf.Summary, t...)
+	ac.addFields(measurement, tags, fields, telegraf.Summary, t...)
 }

 func (ac *accumulator) AddHistogram(
@ -78,10 +78,16 @@ func (ac *accumulator) AddHistogram(
 	tags map[string]string,
 	t ...time.Time,
 ) {
-	ac.addMetric(measurement, tags, fields, telegraf.Histogram, t...)
+	ac.addFields(measurement, tags, fields, telegraf.Histogram, t...)
 }

-func (ac *accumulator) addMetric(
+func (ac *accumulator) AddMetric(m telegraf.Metric) {
+	if m := ac.maker.MakeMetric(m); m != nil {
+		ac.metrics <- m
+	}
+}
+
+func (ac *accumulator) addFields(
 	measurement string,
 	tags map[string]string,
 	fields map[string]interface{},
@ -104,13 +110,9 @@ func (ac *accumulator) AddError(err error) {
 		return
 	}
 	NErrors.Incr(1)
-	log.Printf("E! Error in plugin [%s]: %s", ac.maker.Name(), err)
+	log.Printf("E! [%s]: Error in plugin: %v", ac.maker.Name(), err)
 }

-// SetPrecision takes two time.Duration objects. If the first is non-zero,
-// it sets that as the precision. Otherwise, it takes the second argument
-// as the order of time that the metrics should be rounded to, with the
-// maximum being 1s.
 func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
 	if precision > 0 {
 		ac.precision = precision
@ -128,7 +130,7 @@ func (ac *accumulator) SetPrecision(precision, interval time.Duration) {
 	}
 }

-func (ac accumulator) getTime(t []time.Time) time.Time {
+func (ac *accumulator) getTime(t []time.Time) time.Time {
 	var timestamp time.Time
 	if len(t) > 0 {
 		timestamp = t[0]
@ -137,3 +139,43 @@ func (ac accumulator) getTime(t []time.Time) time.Time {
 	}
 	return timestamp.Round(ac.precision)
 }
+
+func (ac *accumulator) WithTracking(maxTracked int) telegraf.TrackingAccumulator {
+	return &trackingAccumulator{
+		Accumulator: ac,
+		delivered:   make(chan telegraf.DeliveryInfo, maxTracked),
+	}
+}
+
+type trackingAccumulator struct {
+	telegraf.Accumulator
+	delivered chan telegraf.DeliveryInfo
+}
+
+func (a *trackingAccumulator) AddTrackingMetric(m telegraf.Metric) telegraf.TrackingID {
+	dm, id := metric.WithTracking(m, a.onDelivery)
+	a.AddMetric(dm)
+	return id
+}
+
+func (a *trackingAccumulator) AddTrackingMetricGroup(group []telegraf.Metric) telegraf.TrackingID {
+	db, id := metric.WithGroupTracking(group, a.onDelivery)
+	for _, m := range db {
+		a.AddMetric(m)
+	}
+	return id
+}
+
+func (a *trackingAccumulator) Delivered() <-chan telegraf.DeliveryInfo {
+	return a.delivered
+}
+
+func (a *trackingAccumulator) onDelivery(info telegraf.DeliveryInfo) {
+	select {
+	case a.delivered <- info:
+	default:
+		// This is a programming error in the input.  More items were sent for
+		// tracking than space requested.
+		panic("channel is full")
+	}
+}
--- a/agent/agent.go
+++ b/agent/agent.go
@ -1,9 +1,9 @@
 package agent

 import (
+	"context"
 	"fmt"
 	"log"
-	"os"
 	"runtime"
 	"sync"
 	"time"
@ -12,187 +12,157 @@ import (
 	"github.com/influxdata/telegraf/internal"
 	"github.com/influxdata/telegraf/internal/config"
 	"github.com/influxdata/telegraf/internal/models"
-	"github.com/influxdata/telegraf/selfstat"
+	"github.com/influxdata/telegraf/plugins/serializers/influx"
 )

-// Agent runs telegraf and collects data based on the given config
+// Agent runs a set of plugins.
 type Agent struct {
 	Config *config.Config
 }

-// NewAgent returns an Agent struct based off the given Config
+// NewAgent returns an Agent for the given Config.
 func NewAgent(config *config.Config) (*Agent, error) {
 	a := &Agent{
 		Config: config,
 	}
-
-	if !a.Config.Agent.OmitHostname {
-		if a.Config.Agent.Hostname == "" {
-			hostname, err := os.Hostname()
-			if err != nil {
-				return nil, err
-			}
-
-			a.Config.Agent.Hostname = hostname
-		}
-
-		config.Tags["host"] = a.Config.Agent.Hostname
-	}
-
 	return a, nil
 }

-// Connect connects to all configured outputs
-func (a *Agent) Connect() error {
-	for _, o := range a.Config.Outputs {
-		switch ot := o.Output.(type) {
-		case telegraf.ServiceOutput:
-			if err := ot.Start(); err != nil {
-				log.Printf("E! Service for output %s failed to start, exiting\n%s\n",
-					o.Name, err.Error())
-				return err
-			}
+// Run starts and runs the Agent until the context is done.
+func (a *Agent) Run(ctx context.Context) error {
+	log.Printf("I! [agent] Config: Interval:%s, Quiet:%#v, Hostname:%#v, "+
+		"Flush Interval:%s",
+		a.Config.Agent.Interval.Duration, a.Config.Agent.Quiet,
+		a.Config.Agent.Hostname, a.Config.Agent.FlushInterval.Duration)
+
+	if ctx.Err() != nil {
+		return ctx.Err()
+	}
+
+	log.Printf("D! [agent] Connecting outputs")
+	err := a.connectOutputs(ctx)
+	if err != nil {
+		return err
+	}
+
+	inputC := make(chan telegraf.Metric, 100)
+	procC := make(chan telegraf.Metric, 100)
+	outputC := make(chan telegraf.Metric, 100)
+
+	startTime := time.Now()
+
+	log.Printf("D! [agent] Starting service inputs")
+	err = a.startServiceInputs(ctx, inputC)
+	if err != nil {
+		return err
+	}
+
+	var wg sync.WaitGroup
+
+	src := inputC
+	dst := inputC
+
+	wg.Add(1)
+	go func(dst chan telegraf.Metric) {
+		defer wg.Done()
+
+		err := a.runInputs(ctx, startTime, dst)
+		if err != nil {
+			log.Printf("E! [agent] Error running inputs: %v", err)
 		}

-		log.Printf("D! Attempting connection to output: %s\n", o.Name)
-		err := o.Output.Connect()
-		if err != nil {
-			log.Printf("E! Failed to connect to output %s, retrying in 15s, "+
-				"error was '%s' \n", o.Name, err)
-			time.Sleep(15 * time.Second)
-			err = o.Output.Connect()
+		log.Printf("D! [agent] Stopping service inputs")
+		a.stopServiceInputs()
+
+		close(dst)
+		log.Printf("D! [agent] Input channel closed")
+	}(dst)
+
+	src = dst
+
+	if len(a.Config.Processors) > 0 {
+		dst = procC
+
+		wg.Add(1)
+		go func(src, dst chan telegraf.Metric) {
+			defer wg.Done()
+
+			err := a.runProcessors(src, dst)
 			if err != nil {
-				return err
+				log.Printf("E! [agent] Error running processors: %v", err)
 			}
-		}
-		log.Printf("D! Successfully connected to output: %s\n", o.Name)
+			close(dst)
+			log.Printf("D! [agent] Processor channel closed")
+		}(src, dst)
+
+		src = dst
 	}
+
+	if len(a.Config.Aggregators) > 0 {
+		dst = outputC
+
+		wg.Add(1)
+		go func(src, dst chan telegraf.Metric) {
+			defer wg.Done()
+
+			err := a.runAggregators(startTime, src, dst)
+			if err != nil {
+				log.Printf("E! [agent] Error running aggregators: %v", err)
+			}
+			close(dst)
+			log.Printf("D! [agent] Output channel closed")
+		}(src, dst)
+
+		src = dst
+	}
+
+	wg.Add(1)
+	go func(src chan telegraf.Metric) {
+		defer wg.Done()
+
+		err := a.runOutputs(startTime, src)
+		if err != nil {
+			log.Printf("E! [agent] Error running outputs: %v", err)
+		}
+	}(src)
+
+	wg.Wait()
+
+	log.Printf("D! [agent] Closing outputs")
+	err = a.closeOutputs()
+	if err != nil {
+		return err
+	}
+
 	return nil
 }

-// Close closes the connection to all configured outputs
-func (a *Agent) Close() error {
-	var err error
-	for _, o := range a.Config.Outputs {
-		err = o.Output.Close()
-		switch ot := o.Output.(type) {
-		case telegraf.ServiceOutput:
-			ot.Stop()
-		}
-	}
-	return err
-}
-
-func panicRecover(input *models.RunningInput) {
-	if err := recover(); err != nil {
-		trace := make([]byte, 2048)
-		runtime.Stack(trace, true)
-		log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
-			input.Name(), err, trace)
-		log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
-			"stack trace, configuration, and OS information: " +
-			"https://github.com/influxdata/telegraf/issues/new")
-	}
-}
-
-// gatherer runs the inputs that have been configured with their own
-// reporting interval.
-func (a *Agent) gatherer(
-	shutdown chan struct{},
-	input *models.RunningInput,
-	interval time.Duration,
-	metricC chan telegraf.Metric,
-) {
-	defer panicRecover(input)
-
-	GatherTime := selfstat.RegisterTiming("gather",
-		"gather_time_ns",
-		map[string]string{"input": input.Config.Name},
-	)
-
-	acc := NewAccumulator(input, metricC)
-	acc.SetPrecision(a.Config.Agent.Precision.Duration,
-		a.Config.Agent.Interval.Duration)
-
-	ticker := time.NewTicker(interval)
-	defer ticker.Stop()
-
-	for {
-		internal.RandomSleep(a.Config.Agent.CollectionJitter.Duration, shutdown)
-
-		start := time.Now()
-		gatherWithTimeout(shutdown, input, acc, interval)
-		elapsed := time.Since(start)
-
-		GatherTime.Incr(elapsed.Nanoseconds())
-
-		select {
-		case <-shutdown:
-			return
-		case <-ticker.C:
-			continue
-		}
-	}
-}
-
-// gatherWithTimeout gathers from the given input, with the given timeout.
-//   when the given timeout is reached, gatherWithTimeout logs an error message
-//   but continues waiting for it to return. This is to avoid leaving behind
-//   hung processes, and to prevent re-calling the same hung process over and
-//   over.
-func gatherWithTimeout(
-	shutdown chan struct{},
-	input *models.RunningInput,
-	acc telegraf.Accumulator,
-	timeout time.Duration,
-) {
-	ticker := time.NewTicker(timeout)
-	defer ticker.Stop()
-	done := make(chan error)
-	go func() {
-		done <- input.Input.Gather(acc)
+// Test runs the inputs once and prints the output to stdout in line protocol.
+func (a *Agent) Test() error {
+	var wg sync.WaitGroup
+	metricC := make(chan telegraf.Metric)
+	defer func() {
+		close(metricC)
+		wg.Wait()
 	}()

-	for {
-		select {
-		case err := <-done:
-			if err != nil {
-				acc.AddError(err)
-			}
-			return
-		case <-ticker.C:
-			err := fmt.Errorf("took longer to collect than collection interval (%s)",
-				timeout)
-			acc.AddError(err)
-			continue
-		case <-shutdown:
-			return
-		}
-	}
-}
-
-// Test verifies that we can 'Gather' from all inputs with their configured
-// Config struct
-func (a *Agent) Test() error {
-	shutdown := make(chan struct{})
-	defer close(shutdown)
-	metricC := make(chan telegraf.Metric)
-
-	// dummy receiver for the point channel
+	wg.Add(1)
 	go func() {
-		for {
-			select {
-			case <-metricC:
-				// do nothing
-			case <-shutdown:
-				return
+		defer wg.Done()
+
+		s := influx.NewSerializer()
+		s.SetFieldSortOrder(influx.SortFields)
+		for metric := range metricC {
+			octets, err := s.Serialize(metric)
+			if err == nil {
+				fmt.Print("> ", string(octets))
 			}
 		}
 	}()

 	for _, input := range a.Config.Inputs {
 		if _, ok := input.Input.(telegraf.ServiceInput); ok {
-			fmt.Printf("\nWARNING: skipping plugin [[%s]]: service inputs not supported in --test mode\n",
+			log.Printf("W!: [agent] skipping plugin [[%s]]: service inputs not supported in --test mode",
 				input.Name())
 			continue
 		}
@ -200,7 +170,6 @@ func (a *Agent) Test() error {
 		acc := NewAccumulator(input, metricC)
 		acc.SetPrecision(a.Config.Agent.Precision.Duration,
 			a.Config.Agent.Interval.Duration)
-		input.SetTrace(true)
 		input.SetDefaultTags(a.Config.Tags)

 		if err := input.Input.Gather(acc); err != nil {
@ -218,216 +187,445 @@ func (a *Agent) Test() error {
 		}

 	}
+
 	return nil
 }

-// flush writes a list of metrics to all configured outputs
-func (a *Agent) flush() {
-	var wg sync.WaitGroup
-
-	wg.Add(len(a.Config.Outputs))
-	for _, o := range a.Config.Outputs {
-		go func(output *models.RunningOutput) {
-			defer wg.Done()
-			err := output.Write()
-			if err != nil {
-				log.Printf("E! Error writing to output [%s]: %s\n",
-					output.Name, err.Error())
-			}
-		}(o)
-	}
-
-	wg.Wait()
-}
-
-// flusher monitors the metrics input channel and flushes on the minimum interval
-func (a *Agent) flusher(
-	shutdown chan struct{},
-	metricC chan telegraf.Metric,
-	aggMetricC chan telegraf.Metric,
-	outMetricC chan telegraf.Metric,
+// runInputs starts and triggers the periodic gather for Inputs.
+//
+// When the context is done the timers are stopped and this function returns
+// after all ongoing Gather calls complete.
+func (a *Agent) runInputs(
+	ctx context.Context,
+	startTime time.Time,
+	dst chan<- telegraf.Metric,
 ) error {
 	var wg sync.WaitGroup
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		for {
-			select {
-			case <-shutdown:
-				if len(outMetricC) > 0 {
-					// keep going until channel is empty
-					continue
-				}
-				return
-			case metric := <-outMetricC:
-				for i, o := range a.Config.Outputs {
-					if i == len(a.Config.Outputs)-1 {
-						o.AddMetric(metric)
-					} else {
-						o.AddMetric(metric.Copy())
-					}
-				}
-			}
-		}
-	}()
-
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		for metric := range aggMetricC {
-			// Apply Processors
-			metrics := []telegraf.Metric{metric}
-			for _, processor := range a.Config.Processors {
-				metrics = processor.Apply(metrics...)
-			}
-			outMetricC <- metric
-		}
-	}()
-
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		for {
-			select {
-			case <-shutdown:
-				if len(metricC) > 0 {
-					// keep going until channel is empty
-					continue
-				}
-				close(aggMetricC)
-				return
-			case metric := <-metricC:
-				// Apply Processors
-				metrics := []telegraf.Metric{metric}
-				for _, processor := range a.Config.Processors {
-					metrics = processor.Apply(metrics...)
-				}
-
-				for _, metric := range metrics {
-					// Apply Aggregators
-					var dropOriginal bool
-					for _, agg := range a.Config.Aggregators {
-						if ok := agg.Add(metric.Copy()); ok {
-							dropOriginal = true
-						}
-					}
-
-					// Forward metric to Outputs
-					if !dropOriginal {
-						outMetricC <- metric
-					}
-				}
-			}
-		}
-	}()
-
-	ticker := time.NewTicker(a.Config.Agent.FlushInterval.Duration)
-	semaphore := make(chan struct{}, 1)
-	for {
-		select {
-		case <-shutdown:
-			log.Println("I! Hang on, flushing any cached metrics before shutdown")
-			// wait for outMetricC to get flushed before flushing outputs
-			wg.Wait()
-			a.flush()
-			return nil
-		case <-ticker.C:
-			go func() {
-				select {
-				case semaphore <- struct{}{}:
-					internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown)
-					a.flush()
-					<-semaphore
-				default:
-					// skipping this flush because one is already happening
-					log.Println("W! Skipping a scheduled flush because there is" +
-						" already a flush ongoing.")
-				}
-			}()
-		}
-	}
-}
-
-// Run runs the agent daemon, gathering every Interval
-func (a *Agent) Run(shutdown chan struct{}) error {
-	var wg sync.WaitGroup
-
-	log.Printf("I! Agent Config: Interval:%s, Quiet:%#v, Hostname:%#v, "+
-		"Flush Interval:%s \n",
-		a.Config.Agent.Interval.Duration, a.Config.Agent.Quiet,
-		a.Config.Agent.Hostname, a.Config.Agent.FlushInterval.Duration)
-
-	// Channel shared between all input threads for accumulating metrics
-	metricC := make(chan telegraf.Metric, 100)
-
-	// Channel for metrics ready to be output
-	outMetricC := make(chan telegraf.Metric, 100)
-
-	// Channel for aggregated metrics
-	aggMetricC := make(chan telegraf.Metric, 100)
-
-	// Round collection to nearest interval by sleeping
-	if a.Config.Agent.RoundInterval {
-		i := int64(a.Config.Agent.Interval.Duration)
-		time.Sleep(time.Duration(i - (time.Now().UnixNano() % i)))
-	}
-
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		if err := a.flusher(shutdown, metricC, aggMetricC, outMetricC); err != nil {
-			log.Printf("E! Flusher routine failed, exiting: %s\n", err.Error())
-			close(shutdown)
-		}
-	}()
-
-	wg.Add(len(a.Config.Aggregators))
-	for _, aggregator := range a.Config.Aggregators {
-		go func(agg *models.RunningAggregator) {
-			defer wg.Done()
-			acc := NewAccumulator(agg, aggMetricC)
-			acc.SetPrecision(a.Config.Agent.Precision.Duration,
-				a.Config.Agent.Interval.Duration)
-			agg.Run(acc, shutdown)
-		}(aggregator)
-	}
-
-	// Service inputs may immediately add metrics, if metrics are added before
-	// the aggregator starts they will be dropped.  Generally this occurs
-	// only during testing but it is an outstanding issue.
-	//
-	//   https://github.com/influxdata/telegraf/issues/4394
-	for _, input := range a.Config.Inputs {
-		input.SetDefaultTags(a.Config.Tags)
-		switch p := input.Input.(type) {
-		case telegraf.ServiceInput:
-			acc := NewAccumulator(input, metricC)
-			// Service input plugins should set their own precision of their
-			// metrics.
-			acc.SetPrecision(time.Nanosecond, 0)
-			if err := p.Start(acc); err != nil {
-				log.Printf("E! Service for input %s failed to start, exiting\n%s\n",
-					input.Name(), err.Error())
-				return err
-			}
-			defer p.Stop()
-		}
-	}
-
-	wg.Add(len(a.Config.Inputs))
 	for _, input := range a.Config.Inputs {
 		interval := a.Config.Agent.Interval.Duration
-		// overwrite global interval if this plugin has it's own.
+		precision := a.Config.Agent.Precision.Duration
+		jitter := a.Config.Agent.CollectionJitter.Duration
+
+		// Overwrite agent interval if this plugin has its own.
 		if input.Config.Interval != 0 {
 			interval = input.Config.Interval
 		}
-		go func(in *models.RunningInput, interv time.Duration) {
+
+		acc := NewAccumulator(input, dst)
+		acc.SetPrecision(precision, interval)
+
+		wg.Add(1)
+		go func(input *models.RunningInput) {
 			defer wg.Done()
-			a.gatherer(shutdown, in, interv, metricC)
-		}(input, interval)
+
+			if a.Config.Agent.RoundInterval {
+				err := internal.SleepContext(
+					ctx, internal.AlignDuration(startTime, interval))
+				if err != nil {
+					return
+				}
+			}
+
+			a.gatherOnInterval(ctx, acc, input, interval, jitter)
+		}(input)
+	}
+	wg.Wait()
+
+	return nil
+}
+
+// gather runs an input's gather function periodically until the context is
+// done.
+func (a *Agent) gatherOnInterval(
+	ctx context.Context,
+	acc telegraf.Accumulator,
+	input *models.RunningInput,
+	interval time.Duration,
+	jitter time.Duration,
+) {
+	defer panicRecover(input)
+
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	for {
+		err := internal.SleepContext(ctx, internal.RandomDuration(jitter))
+		if err != nil {
+			return
+		}
+
+		err = a.gatherOnce(acc, input, interval)
+		if err != nil {
+			acc.AddError(err)
+		}
+
+		select {
+		case <-ticker.C:
+			continue
+		case <-ctx.Done():
+			return
+		}
+	}
+}
+
+// gatherOnce runs the input's Gather function once, logging a warning each
+// interval it fails to complete before.
+func (a *Agent) gatherOnce(
+	acc telegraf.Accumulator,
+	input *models.RunningInput,
+	timeout time.Duration,
+) error {
+	ticker := time.NewTicker(timeout)
+	defer ticker.Stop()
+
+	done := make(chan error)
+	go func() {
+		done <- input.Gather(acc)
+	}()
+
+	for {
+		select {
+		case err := <-done:
+			return err
+		case <-ticker.C:
+			log.Printf("W! [agent] input %q did not complete within its interval",
+				input.Name())
+		}
+	}
+}
+
+// runProcessors applies processors to metrics.
+func (a *Agent) runProcessors(
+	src <-chan telegraf.Metric,
+	agg chan<- telegraf.Metric,
+) error {
+	for metric := range src {
+		metrics := a.applyProcessors(metric)
+
+		for _, metric := range metrics {
+			agg <- metric
+		}
+	}
+
+	return nil
+}
+
+// applyProcessors applies all processors to a metric.
+func (a *Agent) applyProcessors(m telegraf.Metric) []telegraf.Metric {
+	metrics := []telegraf.Metric{m}
+	for _, processor := range a.Config.Processors {
+		metrics = processor.Apply(metrics...)
+	}
+
+	return metrics
+}
+
+// runAggregators triggers the periodic push for Aggregators.
+//
+// When the context is done a final push will occur and then this function
+// will return.
+func (a *Agent) runAggregators(
+	startTime time.Time,
+	src <-chan telegraf.Metric,
+	dst chan<- telegraf.Metric,
+) error {
+	ctx, cancel := context.WithCancel(context.Background())
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for metric := range src {
+			var dropOriginal bool
+			for _, agg := range a.Config.Aggregators {
+				if ok := agg.Add(metric); ok {
+					dropOriginal = true
+				}
+			}
+
+			if !dropOriginal {
+				dst <- metric
+			}
+		}
+		cancel()
+	}()
+
+	precision := a.Config.Agent.Precision.Duration
+	interval := a.Config.Agent.Interval.Duration
+	aggregations := make(chan telegraf.Metric, 100)
+	for _, agg := range a.Config.Aggregators {
+		wg.Add(1)
+		go func(agg *models.RunningAggregator) {
+			defer wg.Done()
+
+			if a.Config.Agent.RoundInterval {
+				// Aggregators are aligned to the agent interval regardless of
+				// their period.
+				err := internal.SleepContext(ctx, internal.AlignDuration(startTime, interval))
+				if err != nil {
+					return
+				}
+			}
+
+			agg.SetPeriodStart(startTime)
+
+			acc := NewAccumulator(agg, aggregations)
+			acc.SetPrecision(precision, interval)
+			a.push(ctx, agg, acc)
+			close(aggregations)
+		}(agg)
+	}
+
+	for metric := range aggregations {
+		metrics := a.applyProcessors(metric)
+		for _, metric := range metrics {
+			dst <- metric
+		}
 	}

 	wg.Wait()
-	a.Close()
 	return nil
 }
+
+// push runs the push for a single aggregator every period.  More simple than
+// the output/input version as timeout should be less likely.... not really
+// because the output channel can block for now.
+func (a *Agent) push(
+	ctx context.Context,
+	aggregator *models.RunningAggregator,
+	acc telegraf.Accumulator,
+) {
+	ticker := time.NewTicker(aggregator.Period())
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			break
+		case <-ctx.Done():
+			aggregator.Push(acc)
+			return
+		}
+
+		aggregator.Push(acc)
+	}
+}
+
+// runOutputs triggers the periodic write for Outputs.
+//
+// When the context is done, outputs continue to run until their buffer is
+// closed, afterwich they run flush once more.
+func (a *Agent) runOutputs(
+	startTime time.Time,
+	src <-chan telegraf.Metric,
+) error {
+	interval := a.Config.Agent.FlushInterval.Duration
+	jitter := a.Config.Agent.FlushJitter.Duration
+
+	ctx, cancel := context.WithCancel(context.Background())
+
+	var wg sync.WaitGroup
+	for _, output := range a.Config.Outputs {
+		interval := interval
+		// Overwrite agent flush_interval if this plugin has its own.
+		if output.Config.FlushInterval != 0 {
+			interval = output.Config.FlushInterval
+		}
+
+		wg.Add(1)
+		go func(output *models.RunningOutput) {
+			defer wg.Done()
+
+			if a.Config.Agent.RoundInterval {
+				err := internal.SleepContext(
+					ctx, internal.AlignDuration(startTime, interval))
+				if err != nil {
+					return
+				}
+			}
+
+			a.flush(ctx, output, interval, jitter)
+		}(output)
+	}
+
+	for metric := range src {
+		for i, output := range a.Config.Outputs {
+			if i == len(a.Config.Outputs)-1 {
+				output.AddMetric(metric)
+			} else {
+				output.AddMetric(metric.Copy())
+			}
+		}
+	}
+
+	log.Println("I! [agent] Hang on, flushing any cached metrics before shutdown")
+	cancel()
+	wg.Wait()
+
+	return nil
+}
+
+// flush runs an output's flush function periodically until the context is
+// done.
+func (a *Agent) flush(
+	ctx context.Context,
+	output *models.RunningOutput,
+	interval time.Duration,
+	jitter time.Duration,
+) {
+	// since we are watching two channels we need a ticker with the jitter
+	// integrated.
+	ticker := NewTicker(interval, jitter)
+	defer ticker.Stop()
+
+	logError := func(err error) {
+		if err != nil {
+			log.Printf("E! [agent] Error writing to output [%s]: %v", output.Name, err)
+		}
+	}
+
+	for {
+		// Favor shutdown over other methods.
+		select {
+		case <-ctx.Done():
+			logError(a.flushOnce(output, interval, output.Write))
+			return
+		default:
+		}
+
+		select {
+		case <-ticker.C:
+			logError(a.flushOnce(output, interval, output.Write))
+		case <-output.BatchReady:
+			// Favor the ticker over batch ready
+			select {
+			case <-ticker.C:
+				logError(a.flushOnce(output, interval, output.Write))
+			default:
+				logError(a.flushOnce(output, interval, output.WriteBatch))
+			}
+		case <-ctx.Done():
+			logError(a.flushOnce(output, interval, output.Write))
+			return
+		}
+	}
+}
+
+// flushOnce runs the output's Write function once, logging a warning each
+// interval it fails to complete before.
+func (a *Agent) flushOnce(
+	output *models.RunningOutput,
+	timeout time.Duration,
+	writeFunc func() error,
+) error {
+	ticker := time.NewTicker(timeout)
+	defer ticker.Stop()
+
+	done := make(chan error)
+	go func() {
+		done <- writeFunc()
+	}()
+
+	for {
+		select {
+		case err := <-done:
+			output.LogBufferStatus()
+			return err
+		case <-ticker.C:
+			log.Printf("W! [agent] output %q did not complete within its flush interval",
+				output.Name)
+			output.LogBufferStatus()
+		}
+	}
+
+}
+
+// connectOutputs connects to all outputs.
+func (a *Agent) connectOutputs(ctx context.Context) error {
+	for _, output := range a.Config.Outputs {
+		log.Printf("D! [agent] Attempting connection to output: %s\n", output.Name)
+		err := output.Output.Connect()
+		if err != nil {
+			log.Printf("E! [agent] Failed to connect to output %s, retrying in 15s, "+
+				"error was '%s' \n", output.Name, err)
+
+			err := internal.SleepContext(ctx, 15*time.Second)
+			if err != nil {
+				return err
+			}
+
+			err = output.Output.Connect()
+			if err != nil {
+				return err
+			}
+		}
+		log.Printf("D! [agent] Successfully connected to output: %s\n", output.Name)
+	}
+	return nil
+}
+
+// closeOutputs closes all outputs.
+func (a *Agent) closeOutputs() error {
+	var err error
+	for _, output := range a.Config.Outputs {
+		err = output.Output.Close()
+	}
+	return err
+}
+
+// startServiceInputs starts all service inputs.
+func (a *Agent) startServiceInputs(
+	ctx context.Context,
+	dst chan<- telegraf.Metric,
+) error {
+	started := []telegraf.ServiceInput{}
+
+	for _, input := range a.Config.Inputs {
+		if si, ok := input.Input.(telegraf.ServiceInput); ok {
+			// Service input plugins are not subject to timestamp rounding.
+			// This only applies to the accumulator passed to Start(), the
+			// Gather() accumulator does apply rounding according to the
+			// precision agent setting.
+			acc := NewAccumulator(input, dst)
+			acc.SetPrecision(time.Nanosecond, 0)
+
+			err := si.Start(acc)
+			if err != nil {
+				log.Printf("E! [agent] Service for input %s failed to start: %v",
+					input.Name(), err)
+
+				for _, si := range started {
+					si.Stop()
+				}
+
+				return err
+			}
+
+			started = append(started, si)
+		}
+	}
+
+	return nil
+}
+
+// stopServiceInputs stops all service inputs.
+func (a *Agent) stopServiceInputs() {
+	for _, input := range a.Config.Inputs {
+		if si, ok := input.Input.(telegraf.ServiceInput); ok {
+			si.Stop()
+		}
+	}
+}
+
+// panicRecover displays an error if an input panics.
+func panicRecover(input *models.RunningInput) {
+	if err := recover(); err != nil {
+		trace := make([]byte, 2048)
+		runtime.Stack(trace, true)
+		log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
+			input.Name(), err, trace)
+		log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
+			"stack trace, configuration, and OS information: " +
+			"https://github.com/influxdata/telegraf/issues/new/choose")
+	}
+}
--- a/agent/tick.go
+++ b/agent/tick.go
@ -0,0 +1,57 @@
+package agent
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"github.com/influxdata/telegraf/internal"
+)
+
+type Ticker struct {
+	C          chan time.Time
+	ticker     *time.Ticker
+	jitter     time.Duration
+	wg         sync.WaitGroup
+	cancelFunc context.CancelFunc
+}
+
+func NewTicker(
+	interval time.Duration,
+	jitter time.Duration,
+) *Ticker {
+	ctx, cancel := context.WithCancel(context.Background())
+
+	t := &Ticker{
+		C:          make(chan time.Time, 1),
+		ticker:     time.NewTicker(interval),
+		jitter:     jitter,
+		cancelFunc: cancel,
+	}
+
+	t.wg.Add(1)
+	go t.relayTime(ctx)
+
+	return t
+}
+
+func (t *Ticker) Stop() {
+	t.cancelFunc()
+	t.wg.Wait()
+}
+
+func (t *Ticker) relayTime(ctx context.Context) {
+	defer t.wg.Done()
+	for {
+		select {
+		case tm := <-t.ticker.C:
+			internal.SleepContext(ctx, internal.RandomDuration(t.jitter))
+			select {
+			case t.C <- tm:
+			default:
+			}
+		case <-ctx.Done():
+			return
+		}
+	}
+}
--- a/cmd/telegraf/telegraf.go
+++ b/cmd/telegraf/telegraf.go
@ -1,6 +1,8 @@
 package main

 import (
+	"context"
+	"errors"
 	"flag"
 	"fmt"
 	"log"
@ -78,112 +80,111 @@ func reloadLoop(
 	for <-reload {
 		reload <- false

-		// If no other options are specified, load the config file and run.
-		c := config.NewConfig()
-		c.OutputFilters = outputFilters
-		c.InputFilters = inputFilters
-		err := c.LoadConfig(*fConfig)
-		if err != nil {
-			log.Fatal("E! " + err.Error())
-		}
+		ctx, cancel := context.WithCancel(context.Background())

-		if *fConfigDirectory != "" {
-			err = c.LoadDirectory(*fConfigDirectory)
-			if err != nil {
-				log.Fatal("E! " + err.Error())
-			}
-		}
-		if !*fTest && len(c.Outputs) == 0 {
-			log.Fatalf("E! Error: no outputs found, did you provide a valid config file?")
-		}
-		if len(c.Inputs) == 0 {
-			log.Fatalf("E! Error: no inputs found, did you provide a valid config file?")
-		}
-
-		if int64(c.Agent.Interval.Duration) <= 0 {
-			log.Fatalf("E! Agent interval must be positive, found %s",
-				c.Agent.Interval.Duration)
-		}
-
-		if int64(c.Agent.FlushInterval.Duration) <= 0 {
-			log.Fatalf("E! Agent flush_interval must be positive; found %s",
-				c.Agent.Interval.Duration)
-		}
-
-		ag, err := agent.NewAgent(c)
-		if err != nil {
-			log.Fatal("E! " + err.Error())
-		}
-
-		// Setup logging
-		logger.SetupLogging(
-			ag.Config.Agent.Debug || *fDebug,
-			ag.Config.Agent.Quiet || *fQuiet,
-			ag.Config.Agent.Logfile,
-		)
-
-		if *fTest {
-			err = ag.Test()
-			if err != nil {
-				log.Fatal("E! " + err.Error())
-			}
-			os.Exit(0)
-		}
-
-		err = ag.Connect()
-		if err != nil {
-			log.Fatal("E! " + err.Error())
-		}
-
-		shutdown := make(chan struct{})
 		signals := make(chan os.Signal)
 		signal.Notify(signals, os.Interrupt, syscall.SIGHUP, syscall.SIGTERM)
 		go func() {
 			select {
 			case sig := <-signals:
-				if sig == os.Interrupt || sig == syscall.SIGTERM {
-					close(shutdown)
-				}
 				if sig == syscall.SIGHUP {
-					log.Printf("I! Reloading Telegraf config\n")
+					log.Printf("I! Reloading Telegraf config")
 					<-reload
 					reload <- true
-					close(shutdown)
 				}
+				cancel()
 			case <-stop:
-				close(shutdown)
+				cancel()
 			}
 		}()

-		log.Printf("I! Starting Telegraf %s\n", version)
-		log.Printf("I! Loaded inputs: %s", strings.Join(c.InputNames(), " "))
-		log.Printf("I! Loaded aggregators: %s", strings.Join(c.AggregatorNames(), " "))
-		log.Printf("I! Loaded processors: %s", strings.Join(c.ProcessorNames(), " "))
-		log.Printf("I! Loaded outputs: %s", strings.Join(c.OutputNames(), " "))
-		log.Printf("I! Tags enabled: %s", c.ListTags())
-
-		if *fPidfile != "" {
-			f, err := os.OpenFile(*fPidfile, os.O_CREATE|os.O_WRONLY, 0644)
-			if err != nil {
-				log.Printf("E! Unable to create pidfile: %s", err)
-			} else {
-				fmt.Fprintf(f, "%d\n", os.Getpid())
-
-				f.Close()
-
-				defer func() {
-					err := os.Remove(*fPidfile)
-					if err != nil {
-						log.Printf("E! Unable to remove pidfile: %s", err)
-					}
-				}()
-			}
+		err := runAgent(ctx, inputFilters, outputFilters)
+		if err != nil {
+			log.Fatalf("E! [telegraf] Error running agent: %v", err)
 		}
-
-		ag.Run(shutdown)
 	}
 }

+func runAgent(ctx context.Context,
+	inputFilters []string,
+	outputFilters []string,
+) error {
+	// If no other options are specified, load the config file and run.
+	c := config.NewConfig()
+	c.OutputFilters = outputFilters
+	c.InputFilters = inputFilters
+	err := c.LoadConfig(*fConfig)
+	if err != nil {
+		return err
+	}
+
+	if *fConfigDirectory != "" {
+		err = c.LoadDirectory(*fConfigDirectory)
+		if err != nil {
+			return err
+		}
+	}
+	if !*fTest && len(c.Outputs) == 0 {
+		return errors.New("Error: no outputs found, did you provide a valid config file?")
+	}
+	if len(c.Inputs) == 0 {
+		return errors.New("Error: no inputs found, did you provide a valid config file?")
+	}
+
+	if int64(c.Agent.Interval.Duration) <= 0 {
+		return fmt.Errorf("Agent interval must be positive, found %s",
+			c.Agent.Interval.Duration)
+	}
+
+	if int64(c.Agent.FlushInterval.Duration) <= 0 {
+		return fmt.Errorf("Agent flush_interval must be positive; found %s",
+			c.Agent.Interval.Duration)
+	}
+
+	ag, err := agent.NewAgent(c)
+	if err != nil {
+		return err
+	}
+
+	// Setup logging
+	logger.SetupLogging(
+		ag.Config.Agent.Debug || *fDebug,
+		ag.Config.Agent.Quiet || *fQuiet,
+		ag.Config.Agent.Logfile,
+	)
+
+	if *fTest {
+		return ag.Test()
+	}
+
+	log.Printf("I! Starting Telegraf %s\n", version)
+	log.Printf("I! Loaded inputs: %s", strings.Join(c.InputNames(), " "))
+	log.Printf("I! Loaded aggregators: %s", strings.Join(c.AggregatorNames(), " "))
+	log.Printf("I! Loaded processors: %s", strings.Join(c.ProcessorNames(), " "))
+	log.Printf("I! Loaded outputs: %s", strings.Join(c.OutputNames(), " "))
+	log.Printf("I! Tags enabled: %s", c.ListTags())
+
+	if *fPidfile != "" {
+		f, err := os.OpenFile(*fPidfile, os.O_CREATE|os.O_WRONLY, 0644)
+		if err != nil {
+			log.Printf("E! Unable to create pidfile: %s", err)
+		} else {
+			fmt.Fprintf(f, "%d\n", os.Getpid())
+
+			f.Close()
+
+			defer func() {
+				err := os.Remove(*fPidfile)
+				if err != nil {
+					log.Printf("E! Unable to remove pidfile: %s", err)
+				}
+			}()
+		}
+	}
+
+	return ag.Run(ctx)
+}
+
 func usageExit(rc int) {
 	fmt.Println(internal.Usage)
 	os.Exit(rc)
--- a/docs/AGGREGATORS.md
+++ b/docs/AGGREGATORS.md
@ -0,0 +1,126 @@
+### Aggregator Plugins
+
+This section is for developers who want to create a new aggregator plugin.
+
+### Aggregator Plugin Guidelines
+
+* A aggregator must conform to the [telegraf.Aggregator][] interface.
+* Aggregators should call `aggregators.Add` in their `init` function to
+  register themselves.  See below for a quick example.
+* To be available within Telegraf itself, plugins must add themselves to the
+  `github.com/influxdata/telegraf/plugins/aggregators/all/all.go` file.
+- The `SampleConfig` function should return valid toml that describes how the
+  plugin can be configured. This is included in `telegraf config`.  Please
+  consult the [SampleConfig][] page for the latest style guidelines.
+* The `Description` function should say in one line what this aggregator does.
+* The Aggregator plugin will need to keep caches of metrics that have passed
+  through it. This should be done using the builtin `HashID()` function of
+  each metric.
+* When the `Reset()` function is called, all caches should be cleared.
+
+### Aggregator Plugin Example
+
+```go
+package min
+
+// min.go
+
+import (
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/plugins/aggregators"
+)
+
+type Min struct {
+	// caches for metric fields, names, and tags
+	fieldCache map[uint64]map[string]float64
+	nameCache  map[uint64]string
+	tagCache   map[uint64]map[string]string
+}
+
+func NewMin() telegraf.Aggregator {
+	m := &Min{}
+	m.Reset()
+	return m
+}
+
+var sampleConfig = `
+  ## period is the flush & clear interval of the aggregator.
+  period = "30s"
+  ## If true drop_original will drop the original metrics and
+  ## only send aggregates.
+  drop_original = false
+`
+
+func (m *Min) SampleConfig() string {
+	return sampleConfig
+}
+
+func (m *Min) Description() string {
+	return "Keep the aggregate min of each metric passing through."
+}
+
+func (m *Min) Add(in telegraf.Metric) {
+	id := in.HashID()
+	if _, ok := m.nameCache[id]; !ok {
+		// hit an uncached metric, create caches for first time:
+		m.nameCache[id] = in.Name()
+		m.tagCache[id] = in.Tags()
+		m.fieldCache[id] = make(map[string]float64)
+		for k, v := range in.Fields() {
+			if fv, ok := convert(v); ok {
+				m.fieldCache[id][k] = fv
+			}
+		}
+	} else {
+		for k, v := range in.Fields() {
+			if fv, ok := convert(v); ok {
+				if _, ok := m.fieldCache[id][k]; !ok {
+					// hit an uncached field of a cached metric
+					m.fieldCache[id][k] = fv
+					continue
+				}
+				if fv < m.fieldCache[id][k] {
+                    // set new minimum
+					m.fieldCache[id][k] = fv
+				}
+			}
+		}
+	}
+}
+
+func (m *Min) Push(acc telegraf.Accumulator) {
+	for id, _ := range m.nameCache {
+		fields := map[string]interface{}{}
+		for k, v := range m.fieldCache[id] {
+			fields[k+"_min"] = v
+		}
+		acc.AddFields(m.nameCache[id], fields, m.tagCache[id])
+	}
+}
+
+func (m *Min) Reset() {
+	m.fieldCache = make(map[uint64]map[string]float64)
+	m.nameCache = make(map[uint64]string)
+	m.tagCache = make(map[uint64]map[string]string)
+}
+
+func convert(in interface{}) (float64, bool) {
+	switch v := in.(type) {
+	case float64:
+		return v, true
+	case int64:
+		return float64(v), true
+	default:
+		return 0, false
+	}
+}
+
+func init() {
+	aggregators.Add("min", func() telegraf.Aggregator {
+		return NewMin()
+	})
+}
+```
+
+[telegraf.Aggregator]: https://godoc.org/github.com/influxdata/telegraf#Aggregator
+[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@ -106,6 +106,14 @@ emitted from the input plugin.

 ### Output Configuration

+- **flush_interval**: The maximum time between flushes.  Use this setting to
+  override the agent `flush_interval` on a per plugin basis.
+- **metric_batch_size**: The maximum number of metrics to send at once.  Use
+  this setting to override the agent `metric_batch_size` on a per plugin basis.
+- **metric_buffer_limit**: The maximum number of unsent metrics to buffer.
+  Use this setting to override the agent `metric_buffer_limit` on a per plugin
+  basis.
+
 The [metric filtering](#metric-filtering) parameters can be used to limit what metrics are
 emitted from the output plugin.

--- a/docs/INPUTS.md
+++ b/docs/INPUTS.md
@ -0,0 +1,143 @@
+### Input Plugins
+
+This section is for developers who want to create new collection inputs.
+Telegraf is entirely plugin driven. This interface allows for operators to
+pick and chose what is gathered and makes it easy for developers
+to create new ways of generating metrics.
+
+Plugin authorship is kept as simple as possible to promote people to develop
+and submit new inputs.
+
+### Input Plugin Guidelines
+
+- A plugin must conform to the [telegraf.Input][] interface.
+- Input Plugins should call `inputs.Add` in their `init` function to register
+  themselves.  See below for a quick example.
+- Input Plugins must be added to the
+  `github.com/influxdata/telegraf/plugins/inputs/all/all.go` file.
+- The `SampleConfig` function should return valid toml that describes how the
+  plugin can be configured. This is included in `telegraf config`.  Please
+  consult the [SampleConfig][] page for the latest style
+  guidelines.
+- The `Description` function should say in one line what this plugin does.
+
+Let's say you've written a plugin that emits metrics about processes on the
+current host.
+
+### Input Plugin Example
+
+```go
+package simple
+
+// simple.go
+
+import (
+    "github.com/influxdata/telegraf"
+    "github.com/influxdata/telegraf/plugins/inputs"
+)
+
+type Simple struct {
+    Ok bool
+}
+
+func (s *Simple) Description() string {
+    return "a demo plugin"
+}
+
+func (s *Simple) SampleConfig() string {
+    return `
+  ## Indicate if everything is fine
+  ok = true
+`
+}
+
+func (s *Simple) Gather(acc telegraf.Accumulator) error {
+    if s.Ok {
+        acc.AddFields("state", map[string]interface{}{"value": "pretty good"}, nil)
+    } else {
+        acc.AddFields("state", map[string]interface{}{"value": "not great"}, nil)
+    }
+
+    return nil
+}
+
+func init() {
+    inputs.Add("simple", func() telegraf.Input { return &Simple{} })
+}
+```
+
+### Development
+
+* Run `make static` followed by `make plugin-[pluginName]` to spin up a docker
+  dev environment using docker-compose.
+* ***[Optional]*** When developing a plugin, add a `dev` directory with a
+  `docker-compose.yml` and `telegraf.conf` as well as any other supporting
+  files, where sensible.
+
+### Typed Metrics
+
+In addition the the `AddFields` function, the accumulator also supports
+functions to add typed metrics: `AddGauge`, `AddCounter`, etc.  Metric types
+are ignored by the InfluxDB output, but can be used for other outputs, such as
+[prometheus][prom metric types].
+
+### Data Formats
+
+Some input plugins, such as the [exec][] plugin, can accept any supported
+[input data formats][].
+
+In order to enable this, you must specify a `SetParser(parser parsers.Parser)`
+function on the plugin object (see the exec plugin for an example), as well as
+defining `parser` as a field of the object.
+
+You can then utilize the parser internally in your plugin, parsing data as you
+see fit. Telegraf's configuration layer will take care of instantiating and
+creating the `Parser` object.
+
+Add the following to the `SampleConfig()`:
+
+```toml
+  ## Data format to consume.
+  ## Each data format has its own unique set of configuration options, read
+  ## more about them here:
+  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "influx"
+```
+
+### Service Input Plugins
+
+This section is for developers who want to create new "service" collection
+inputs. A service plugin differs from a regular plugin in that it operates a
+background service while Telegraf is running. One example would be the
+`statsd` plugin, which operates a statsd server.
+
+Service Input Plugins are substantially more complicated than a regular
+plugin, as they will require threads and locks to verify data integrity.
+Service Input Plugins should be avoided unless there is no way to create their
+behavior with a regular plugin.
+
+To create a Service Input implement the [telegraf.ServiceInput][] interface.
+
+### Metric Tracking
+
+Metric Tracking provides a system to be notified when metrics have been
+successfully written to their outputs or otherwise discarded.  This allows
+inputs to be created that function as reliable queue consumers.
+
+To get started with metric tracking begin by calling `WithTracking` on the
+[telegraf.Accumulator][].  Add metrics using the `AddTrackingMetricGroup`
+function on the returned [telegraf.TrackingAccumulator][] and store the
+`TrackingID`.  The `Delivered()` channel will return a type with information
+about the final delivery status of the metric group.
+
+Check the [amqp_consumer][] for an example implementation.
+
+[exec]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/exec
+[amqp_consumer]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/amqp_consumer
+[prom metric types]: https://prometheus.io/docs/concepts/metric_types/
+[input data formats]: https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
+[telegraf.Input]: https://godoc.org/github.com/influxdata/telegraf#Input
+[telegraf.ServiceInput]: https://godoc.org/github.com/influxdata/telegraf#ServiceInput
+[telegraf.Accumulator]: https://godoc.org/github.com/influxdata/telegraf#Accumulator
+[telegraf.TrackingAccumulator]: https://godoc.org/github.com/influxdata/telegraf#Accumulator
--- a/docs/OUTPUTS.md
+++ b/docs/OUTPUTS.md
@ -0,0 +1,95 @@
+### Output Plugins
+
+This section is for developers who want to create a new output sink. Outputs
+are created in a similar manner as collection plugins, and their interface has
+similar constructs.
+
+### Output Plugin Guidelines
+
+- An output must conform to the [telegraf.Output][] interface.
+- Outputs should call `outputs.Add` in their `init` function to register
+  themselves.  See below for a quick example.
+- To be available within Telegraf itself, plugins must add themselves to the
+  `github.com/influxdata/telegraf/plugins/outputs/all/all.go` file.
+- The `SampleConfig` function should return valid toml that describes how the
+  plugin can be configured. This is included in `telegraf config`.  Please
+  consult the [SampleConfig][] page for the latest style guidelines.
+- The `Description` function should say in one line what this output does.
+
+### Output Plugin Example
+
+```go
+package simpleoutput
+
+// simpleoutput.go
+
+import (
+    "github.com/influxdata/telegraf"
+    "github.com/influxdata/telegraf/plugins/outputs"
+)
+
+type Simple struct {
+    Ok bool
+}
+
+func (s *Simple) Description() string {
+    return "a demo output"
+}
+
+func (s *Simple) SampleConfig() string {
+    return `
+  ok = true
+`
+}
+
+func (s *Simple) Connect() error {
+    // Make a connection to the URL here
+    return nil
+}
+
+func (s *Simple) Close() error {
+    // Close connection to the URL here
+    return nil
+}
+
+func (s *Simple) Write(metrics []telegraf.Metric) error {
+    for _, metric := range metrics {
+        // write `metric` to the output sink here
+    }
+    return nil
+}
+
+func init() {
+    outputs.Add("simpleoutput", func() telegraf.Output { return &Simple{} })
+}
+
+```
+
+## Data Formats
+
+Some output plugins, such as the [file][] plugin, can write in any supported
+[output data formats][].
+
+In order to enable this, you must specify a
+`SetSerializer(serializer serializers.Serializer)`
+function on the plugin object (see the file plugin for an example), as well as
+defining `serializer` as a field of the object.
+
+You can then utilize the serializer internally in your plugin, serializing data
+before it's written. Telegraf's configuration layer will take care of
+instantiating and creating the `Serializer` object.
+
+You should also add the following to your `SampleConfig()`:
+
+```toml
+  ## Data format to output.
+  ## Each data format has its own unique set of configuration options, read
+  ## more about them here:
+  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
+  data_format = "influx"
+```
+
+[file]: https://github.com/influxdata/telegraf/tree/master/plugins/inputs/file
+[output data formats]: https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
+[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
+[telegraf.Output]: https://godoc.org/github.com/influxdata/telegraf#Output
--- a/docs/PROCESSORS.md
+++ b/docs/PROCESSORS.md
@ -0,0 +1,63 @@
+### Processor Plugins
+
+This section is for developers who want to create a new processor plugin.
+
+### Processor Plugin Guidelines
+
+* A processor must conform to the [telegraf.Processor][] interface.
+* Processors should call `processors.Add` in their `init` function to register
+  themselves.  See below for a quick example.
+* To be available within Telegraf itself, plugins must add themselves to the
+  `github.com/influxdata/telegraf/plugins/processors/all/all.go` file.
+* The `SampleConfig` function should return valid toml that describes how the
+  processor can be configured. This is include in the output of `telegraf
+  config`.
+- The `SampleConfig` function should return valid toml that describes how the
+  plugin can be configured. This is included in `telegraf config`.  Please
+  consult the [SampleConfig][] page for the latest style guidelines.
+* The `Description` function should say in one line what this processor does.
+
+### Processor Plugin Example
+
+```go
+package printer
+
+// printer.go
+
+import (
+	"fmt"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/plugins/processors"
+)
+
+type Printer struct {
+}
+
+var sampleConfig = `
+`
+
+func (p *Printer) SampleConfig() string {
+	return sampleConfig
+}
+
+func (p *Printer) Description() string {
+	return "Print all metrics that pass through this filter."
+}
+
+func (p *Printer) Apply(in ...telegraf.Metric) []telegraf.Metric {
+	for _, metric := range in {
+		fmt.Println(metric.String())
+	}
+	return in
+}
+
+func init() {
+	processors.Add("printer", func() telegraf.Processor {
+		return &Printer{}
+	})
+}
+```
+
+[SampleConfig]: https://github.com/influxdata/telegraf/wiki/SampleConfig
+[telegraf.Processor]: https://godoc.org/github.com/influxdata/telegraf#Processor
--- a/input.go
+++ b/input.go
@ -13,17 +13,10 @@ type Input interface {
 }

 type ServiceInput interface {
-	// SampleConfig returns the default configuration of the Input
-	SampleConfig() string
+	Input

-	// Description returns a one-sentence description on the Input
-	Description() string
-
-	// Gather takes in an accumulator and adds the metrics that the Input
-	// gathers. This is called every "interval"
-	Gather(Accumulator) error
-
-	// Start starts the ServiceInput's service, whatever that may be
+	// Start the ServiceInput.  The Accumulator may be retained and used until
+	// Stop returns.
 	Start(Accumulator) error

 	// Stop stops the services and closes any necessary channels and connections
--- a/internal/buffer/buffer.go
+++ b/internal/buffer/buffer.go
@ -1,130 +0,0 @@
-package buffer
-
-import (
-	"sync"
-
-	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/selfstat"
-)
-
-var (
-	MetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
-	MetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
-)
-
-// Buffer is an object for storing metrics in a circular buffer.
-type Buffer struct {
-	sync.Mutex
-	buf   []telegraf.Metric
-	first int
-	last  int
-	size  int
-	empty bool
-}
-
-// NewBuffer returns a Buffer
-//   size is the maximum number of metrics that Buffer will cache. If Add is
-//   called when the buffer is full, then the oldest metric(s) will be dropped.
-func NewBuffer(size int) *Buffer {
-	return &Buffer{
-		buf:   make([]telegraf.Metric, size),
-		first: 0,
-		last:  0,
-		size:  size,
-		empty: true,
-	}
-}
-
-// IsEmpty returns true if Buffer is empty.
-func (b *Buffer) IsEmpty() bool {
-	return b.empty
-}
-
-// Len returns the current length of the buffer.
-func (b *Buffer) Len() int {
-	if b.empty {
-		return 0
-	} else if b.first <= b.last {
-		return b.last - b.first + 1
-	}
-	// Spans the end of array.
-	// size - gap in the middle
-	return b.size - (b.first - b.last - 1) // size - gap
-}
-
-func (b *Buffer) push(m telegraf.Metric) {
-	// Empty
-	if b.empty {
-		b.last = b.first // Reset
-		b.buf[b.last] = m
-		b.empty = false
-		return
-	}
-
-	b.last++
-	b.last %= b.size
-
-	// Full
-	if b.first == b.last {
-		MetricsDropped.Incr(1)
-		b.first = (b.first + 1) % b.size
-	}
-	b.buf[b.last] = m
-}
-
-// Add adds metrics to the buffer.
-func (b *Buffer) Add(metrics ...telegraf.Metric) {
-	b.Lock()
-	defer b.Unlock()
-	for i := range metrics {
-		MetricsWritten.Incr(1)
-		b.push(metrics[i])
-	}
-}
-
-// Batch returns a batch of metrics of size batchSize.
-// the batch will be of maximum length batchSize. It can be less than batchSize,
-// if the length of Buffer is less than batchSize.
-func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
-	b.Lock()
-	defer b.Unlock()
-	outLen := min(b.Len(), batchSize)
-	out := make([]telegraf.Metric, outLen)
-	if outLen == 0 {
-		return out
-	}
-
-	// We copy everything right of first up to last, count or end
-	// b.last >= rightInd || b.last < b.first
-	// therefore wont copy past b.last
-	rightInd := min(b.size, b.first+outLen) - 1
-
-	copyCount := copy(out, b.buf[b.first:rightInd+1])
-
-	// We've emptied the ring
-	if rightInd == b.last {
-		b.empty = true
-	}
-	b.first = rightInd + 1
-	b.first %= b.size
-
-	// We circle back for the rest
-	if copyCount < outLen {
-		right := min(b.last, outLen-copyCount)
-		copy(out[copyCount:], b.buf[b.first:right+1])
-		// We've emptied the ring
-		if right == b.last {
-			b.empty = true
-		}
-		b.first = right + 1
-		b.first %= b.size
-	}
-	return out
-}
-
-func min(a, b int) int {
-	if b < a {
-		return b
-	}
-	return a
-}
--- a/internal/buffer/buffer_test.go
+++ b/internal/buffer/buffer_test.go
@ -1,203 +0,0 @@
-package buffer
-
-import (
-	"sync"
-	"sync/atomic"
-	"testing"
-
-	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/testutil"
-
-	"github.com/stretchr/testify/assert"
-)
-
-var metricList = []telegraf.Metric{
-	testutil.TestMetric(2, "mymetric1"),
-	testutil.TestMetric(1, "mymetric2"),
-	testutil.TestMetric(11, "mymetric3"),
-	testutil.TestMetric(15, "mymetric4"),
-	testutil.TestMetric(8, "mymetric5"),
-}
-
-func makeBench5(b *testing.B, freq, batchSize int) {
-	const k = 1000
-	var wg sync.WaitGroup
-	buf := NewBuffer(10000)
-	m := testutil.TestMetric(1, "mymetric")
-
-	for i := 0; i < b.N; i++ {
-		buf.Add(m, m, m, m, m)
-		if i%(freq*k) == 0 {
-			wg.Add(1)
-			go func() {
-				buf.Batch(batchSize * k)
-				wg.Done()
-			}()
-		}
-	}
-	// Flush
-	buf.Batch(b.N)
-	wg.Wait()
-
-}
-func makeBenchStrict(b *testing.B, freq, batchSize int) {
-	const k = 1000
-	var count uint64
-	var wg sync.WaitGroup
-	buf := NewBuffer(10000)
-	m := testutil.TestMetric(1, "mymetric")
-
-	for i := 0; i < b.N; i++ {
-		buf.Add(m)
-		if i%(freq*k) == 0 {
-			wg.Add(1)
-			go func() {
-				defer wg.Done()
-				l := len(buf.Batch(batchSize * k))
-				atomic.AddUint64(&count, uint64(l))
-			}()
-		}
-	}
-	// Flush
-	wg.Add(1)
-	go func() {
-		l := len(buf.Batch(b.N))
-		atomic.AddUint64(&count, uint64(l))
-		wg.Done()
-	}()
-
-	wg.Wait()
-	if count != uint64(b.N) {
-		b.Errorf("not all metrics came out. %d of %d", count, b.N)
-	}
-}
-func makeBench(b *testing.B, freq, batchSize int) {
-	const k = 1000
-	var wg sync.WaitGroup
-	buf := NewBuffer(10000)
-	m := testutil.TestMetric(1, "mymetric")
-
-	for i := 0; i < b.N; i++ {
-		buf.Add(m)
-		if i%(freq*k) == 0 {
-			wg.Add(1)
-			go func() {
-				buf.Batch(batchSize * k)
-				wg.Done()
-			}()
-		}
-	}
-	wg.Wait()
-	// Flush
-	buf.Batch(b.N)
-}
-
-func BenchmarkBufferBatch5Add(b *testing.B) {
-	makeBench5(b, 100, 101)
-}
-func BenchmarkBufferBigInfrequentBatchCatchup(b *testing.B) {
-	makeBench(b, 100, 101)
-}
-func BenchmarkBufferOftenBatch(b *testing.B) {
-	makeBench(b, 1, 1)
-}
-func BenchmarkBufferAlmostBatch(b *testing.B) {
-	makeBench(b, 10, 9)
-}
-func BenchmarkBufferSlowBatch(b *testing.B) {
-	makeBench(b, 10, 1)
-}
-func BenchmarkBufferBatchNoDrop(b *testing.B) {
-	makeBenchStrict(b, 1, 4)
-}
-func BenchmarkBufferCatchup(b *testing.B) {
-	buf := NewBuffer(10000)
-	m := testutil.TestMetric(1, "mymetric")
-
-	for i := 0; i < b.N; i++ {
-		buf.Add(m)
-	}
-	buf.Batch(b.N)
-}
-
-func BenchmarkAddMetrics(b *testing.B) {
-	buf := NewBuffer(10000)
-	m := testutil.TestMetric(1, "mymetric")
-	for n := 0; n < b.N; n++ {
-		buf.Add(m)
-	}
-}
-
-func TestNewBufferBasicFuncs(t *testing.T) {
-	b := NewBuffer(10)
-	MetricsDropped.Set(0)
-	MetricsWritten.Set(0)
-
-	assert.True(t, b.IsEmpty())
-	assert.Zero(t, b.Len())
-	assert.Zero(t, MetricsDropped.Get())
-	assert.Zero(t, MetricsWritten.Get())
-
-	m := testutil.TestMetric(1, "mymetric")
-	b.Add(m)
-	assert.False(t, b.IsEmpty())
-	assert.Equal(t, b.Len(), 1)
-	assert.Equal(t, int64(0), MetricsDropped.Get())
-	assert.Equal(t, int64(1), MetricsWritten.Get())
-
-	b.Add(metricList...)
-	assert.False(t, b.IsEmpty())
-	assert.Equal(t, b.Len(), 6)
-	assert.Equal(t, int64(0), MetricsDropped.Get())
-	assert.Equal(t, int64(6), MetricsWritten.Get())
-}
-
-func TestDroppingMetrics(t *testing.T) {
-	b := NewBuffer(10)
-	MetricsDropped.Set(0)
-	MetricsWritten.Set(0)
-
-	// Add up to the size of the buffer
-	b.Add(metricList...)
-	b.Add(metricList...)
-	assert.False(t, b.IsEmpty())
-	assert.Equal(t, b.Len(), 10)
-	assert.Equal(t, int64(0), MetricsDropped.Get())
-	assert.Equal(t, int64(10), MetricsWritten.Get())
-
-	// Add 5 more and verify they were dropped
-	b.Add(metricList...)
-	assert.False(t, b.IsEmpty())
-	assert.Equal(t, b.Len(), 10)
-	assert.Equal(t, int64(5), MetricsDropped.Get())
-	assert.Equal(t, int64(15), MetricsWritten.Get())
-}
-
-func TestGettingBatches(t *testing.T) {
-	b := NewBuffer(20)
-	MetricsDropped.Set(0)
-	MetricsWritten.Set(0)
-
-	// Verify that the buffer returned is smaller than requested when there are
-	// not as many items as requested.
-	b.Add(metricList...)
-	batch := b.Batch(10)
-	assert.Len(t, batch, 5)
-
-	// Verify that the buffer is now empty
-	assert.True(t, b.IsEmpty())
-	assert.Zero(t, b.Len())
-	assert.Zero(t, MetricsDropped.Get())
-	assert.Equal(t, int64(5), MetricsWritten.Get())
-
-	// Verify that the buffer returned is not more than the size requested
-	b.Add(metricList...)
-	batch = b.Batch(3)
-	assert.Len(t, batch, 3)
-
-	// Verify that buffer is not empty
-	assert.False(t, b.IsEmpty())
-	assert.Equal(t, b.Len(), 2)
-	assert.Equal(t, int64(0), MetricsDropped.Get())
-	assert.Equal(t, int64(10), MetricsWritten.Get())
-}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@ -9,7 +9,6 @@ import (
 	"math"
 	"os"
 	"path/filepath"
-
 	"regexp"
 	"runtime"
 	"sort"
@ -26,7 +25,6 @@ import (
 	"github.com/influxdata/telegraf/plugins/parsers"
 	"github.com/influxdata/telegraf/plugins/processors"
 	"github.com/influxdata/telegraf/plugins/serializers"
-
 	"github.com/influxdata/toml"
 	"github.com/influxdata/toml/ast"
 )
@ -622,6 +620,19 @@ func (c *Config) LoadConfig(path string) error {
 		}
 	}

+	if !c.Agent.OmitHostname {
+		if c.Agent.Hostname == "" {
+			hostname, err := os.Hostname()
+			if err != nil {
+				return err
+			}
+
+			c.Agent.Hostname = hostname
+		}
+
+		c.Tags["host"] = c.Agent.Hostname
+	}
+
 	// Parse all the rest of the plugins:
 	for name, val := range tbl.Fields {
 		subTable, ok := val.(*ast.Table)
@ -709,6 +720,7 @@ func (c *Config) LoadConfig(path string) error {
 	if len(c.Processors) > 1 {
 		sort.Sort(c.Processors)
 	}
+
 	return nil
 }

@ -876,6 +888,7 @@ func (c *Config) addInput(name string, table *ast.Table) error {
 	}

 	rp := models.NewRunningInput(input, pluginConfig)
+	rp.SetDefaultTags(c.Tags)
 	c.Inputs = append(c.Inputs, rp)
 	return nil
 }
@ -1751,6 +1764,8 @@ func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
 		Name:   name,
 		Filter: filter,
 	}
+
+	// TODO
 	// Outputs don't support FieldDrop/FieldPass, so set to NameDrop/NamePass
 	if len(oc.Filter.FieldDrop) > 0 {
 		oc.Filter.NameDrop = oc.Filter.FieldDrop
@ -1758,5 +1773,47 @@ func buildOutput(name string, tbl *ast.Table) (*models.OutputConfig, error) {
 	if len(oc.Filter.FieldPass) > 0 {
 		oc.Filter.NamePass = oc.Filter.FieldPass
 	}
+
+	if node, ok := tbl.Fields["flush_interval"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.String); ok {
+				dur, err := time.ParseDuration(str.Value)
+				if err != nil {
+					return nil, err
+				}
+
+				oc.FlushInterval = dur
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["metric_buffer_limit"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if integer, ok := kv.Value.(*ast.Integer); ok {
+				v, err := integer.Int()
+				if err != nil {
+					return nil, err
+				}
+				oc.MetricBufferLimit = int(v)
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["metric_batch_size"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if integer, ok := kv.Value.(*ast.Integer); ok {
+				v, err := integer.Int()
+				if err != nil {
+					return nil, err
+				}
+				oc.MetricBatchSize = int(v)
+			}
+		}
+	}
+
+	delete(tbl.Fields, "flush_interval")
+	delete(tbl.Fields, "metric_buffer_limit")
+	delete(tbl.Fields, "metric_batch_size")
+
 	return oc, nil
 }
--- a/internal/internal.go
+++ b/internal/internal.go
@ -4,6 +4,7 @@ import (
 	"bufio"
 	"bytes"
 	"compress/gzip"
+	"context"
 	"crypto/rand"
 	"errors"
 	"io"
@ -246,6 +247,51 @@ func RandomSleep(max time.Duration, shutdown chan struct{}) {
 	}
 }

+// RandomDuration returns a random duration between 0 and max.
+func RandomDuration(max time.Duration) time.Duration {
+	if max == 0 {
+		return 0
+	}
+
+	var sleepns int64
+	maxSleep := big.NewInt(max.Nanoseconds())
+	if j, err := rand.Int(rand.Reader, maxSleep); err == nil {
+		sleepns = j.Int64()
+	}
+
+	return time.Duration(sleepns)
+}
+
+// SleepContext sleeps until the context is closed or the duration is reached.
+func SleepContext(ctx context.Context, duration time.Duration) error {
+	if duration == 0 {
+		return nil
+	}
+
+	t := time.NewTimer(duration)
+	select {
+	case <-t.C:
+		return nil
+	case <-ctx.Done():
+		t.Stop()
+		return ctx.Err()
+	}
+}
+
+// AlignDuration returns the duration until next aligned interval.
+func AlignDuration(tm time.Time, interval time.Duration) time.Duration {
+	return AlignTime(tm, interval).Sub(tm)
+}
+
+// AlignTime returns the time of the next aligned interval.
+func AlignTime(tm time.Time, interval time.Duration) time.Time {
+	truncated := tm.Truncate(interval)
+	if truncated == tm {
+		return tm
+	}
+	return truncated.Add(interval)
+}
+
 // Exit status takes the error from exec.Command
 // and returns the exit status and true
 // if error is not exit status, will return 0 and false
--- a/internal/internal_test.go
+++ b/internal/internal_test.go
@ -9,6 +9,7 @@ import (
 	"time"

 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )

 type SnakeTest struct {
@ -217,3 +218,55 @@ func TestVersionAlreadySet(t *testing.T) {

 	assert.Equal(t, "foo", Version())
 }
+
+func TestAlignDuration(t *testing.T) {
+	tests := []struct {
+		name     string
+		now      time.Time
+		interval time.Duration
+		expected time.Duration
+	}{
+		{
+			name:     "aligned",
+			now:      time.Date(2018, 1, 1, 1, 1, 0, 0, time.UTC),
+			interval: 10 * time.Second,
+			expected: 0 * time.Second,
+		},
+		{
+			name:     "standard interval",
+			now:      time.Date(2018, 1, 1, 1, 1, 1, 0, time.UTC),
+			interval: 10 * time.Second,
+			expected: 9 * time.Second,
+		},
+		{
+			name:     "odd interval",
+			now:      time.Date(2018, 1, 1, 1, 1, 1, 0, time.UTC),
+			interval: 3 * time.Second,
+			expected: 2 * time.Second,
+		},
+		{
+			name:     "sub second interval",
+			now:      time.Date(2018, 1, 1, 1, 1, 0, 5e8, time.UTC),
+			interval: 1 * time.Second,
+			expected: 500 * time.Millisecond,
+		},
+		{
+			name:     "non divisible not aligned on minutes",
+			now:      time.Date(2018, 1, 1, 1, 0, 0, 0, time.UTC),
+			interval: 1*time.Second + 100*time.Millisecond,
+			expected: 400 * time.Millisecond,
+		},
+		{
+			name:     "long interval",
+			now:      time.Date(2018, 1, 1, 1, 1, 0, 0, time.UTC),
+			interval: 1 * time.Hour,
+			expected: 59 * time.Minute,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			actual := AlignDuration(tt.now, tt.interval)
+			require.Equal(t, tt.expected, actual)
+		})
+	}
+}
--- a/internal/models/buffer.go
+++ b/internal/models/buffer.go
@ -0,0 +1,214 @@
+package models
+
+import (
+	"sync"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/selfstat"
+)
+
+var (
+	AgentMetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
+	AgentMetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
+)
+
+// Buffer stores metrics in a circular buffer.
+type Buffer struct {
+	sync.Mutex
+	buf   []telegraf.Metric
+	first int // index of the first/oldest metric
+	last  int // one after the index of the last/newest metric
+	size  int // number of metrics currently in the buffer
+	cap   int // the capacity of the buffer
+
+	batchFirst int // index of the first metric in the batch
+	batchLast  int // one after the index of the last metric in the batch
+	batchSize  int // number of metrics current in the batch
+
+	MetricsAdded   selfstat.Stat
+	MetricsWritten selfstat.Stat
+	MetricsDropped selfstat.Stat
+}
+
+// NewBuffer returns a new empty Buffer with the given capacity.
+func NewBuffer(name string, capacity int) *Buffer {
+	b := &Buffer{
+		buf:   make([]telegraf.Metric, capacity),
+		first: 0,
+		last:  0,
+		size:  0,
+		cap:   capacity,
+
+		MetricsAdded: selfstat.Register(
+			"write",
+			"metrics_added",
+			map[string]string{"output": name},
+		),
+		MetricsWritten: selfstat.Register(
+			"write",
+			"metrics_written",
+			map[string]string{"output": name},
+		),
+		MetricsDropped: selfstat.Register(
+			"write",
+			"metrics_dropped",
+			map[string]string{"output": name},
+		),
+	}
+	return b
+}
+
+// Len returns the number of metrics currently in the buffer.
+func (b *Buffer) Len() int {
+	b.Lock()
+	defer b.Unlock()
+
+	return b.size
+}
+
+func (b *Buffer) metricAdded() {
+	b.MetricsAdded.Incr(1)
+}
+
+func (b *Buffer) metricWritten(metric telegraf.Metric) {
+	AgentMetricsWritten.Incr(1)
+	b.MetricsWritten.Incr(1)
+	metric.Accept()
+}
+
+func (b *Buffer) metricDropped(metric telegraf.Metric) {
+	AgentMetricsDropped.Incr(1)
+	b.MetricsDropped.Incr(1)
+	metric.Reject()
+}
+
+func (b *Buffer) inBatch() bool {
+	if b.batchSize == 0 {
+		return false
+	}
+
+	if b.batchFirst < b.batchLast {
+		return b.last >= b.batchFirst && b.last < b.batchLast
+	} else {
+		return b.last >= b.batchFirst || b.last < b.batchLast
+	}
+}
+
+func (b *Buffer) add(m telegraf.Metric) {
+	// Check if Buffer is full
+	if b.size == b.cap {
+		if b.batchSize == 0 {
+			// No batch taken by the output, we can drop the metric now.
+			b.metricDropped(b.buf[b.last])
+		} else if b.inBatch() {
+			// There is an outstanding batch and this will overwrite a metric
+			// in it, delay the dropping only in case the batch gets rejected.
+			b.batchSize--
+			b.batchFirst++
+			b.batchFirst %= b.cap
+		} else {
+			// There is an outstanding batch, but this overwrites a metric
+			// outside of it.
+			b.metricDropped(b.buf[b.last])
+		}
+	}
+
+	b.metricAdded()
+
+	b.buf[b.last] = m
+	b.last++
+	b.last %= b.cap
+
+	if b.size == b.cap {
+		b.first++
+		b.first %= b.cap
+	}
+
+	b.size = min(b.size+1, b.cap)
+}
+
+// Add adds metrics to the buffer
+func (b *Buffer) Add(metrics ...telegraf.Metric) {
+	b.Lock()
+	defer b.Unlock()
+
+	for i := range metrics {
+		b.add(metrics[i])
+	}
+}
+
+// Batch returns a slice containing up to batchSize of the most recently added
+// metrics.
+//
+// The metrics contained in the batch are not removed from the buffer, instead
+// the last batch is recorded and removed only if Accept is called.
+func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
+	b.Lock()
+	defer b.Unlock()
+
+	outLen := min(b.size, batchSize)
+	out := make([]telegraf.Metric, outLen)
+	if outLen == 0 {
+		return out
+	}
+
+	b.batchFirst = b.first
+	b.batchLast = b.first + outLen
+	b.batchLast %= b.cap
+	b.batchSize = outLen
+
+	until := min(b.cap, b.first+outLen)
+
+	n := copy(out, b.buf[b.first:until])
+	if n < outLen {
+		copy(out[n:], b.buf[:outLen-n])
+	}
+	return out
+}
+
+// Accept removes the metrics contained in the last batch.
+func (b *Buffer) Accept(batch []telegraf.Metric) {
+	b.Lock()
+	defer b.Unlock()
+
+	for _, m := range batch {
+		b.metricWritten(m)
+	}
+
+	if b.batchSize > 0 {
+		b.size -= b.batchSize
+		b.first += b.batchSize
+		b.first %= b.cap
+	}
+
+	b.resetBatch()
+}
+
+// Reject clears the current batch record so that calls to Accept will have no
+// effect.
+func (b *Buffer) Reject(batch []telegraf.Metric) {
+	b.Lock()
+	defer b.Unlock()
+
+	if len(batch) > b.batchSize {
+		// Part or all of the batch was dropped before reject was called.
+		for _, m := range batch[b.batchSize:] {
+			b.metricDropped(m)
+		}
+	}
+
+	b.resetBatch()
+}
+
+func (b *Buffer) resetBatch() {
+	b.batchFirst = 0
+	b.batchLast = 0
+	b.batchSize = 0
+}
+
+func min(a, b int) int {
+	if b < a {
+		return b
+	}
+	return a
+}
--- a/internal/models/buffer_test.go
+++ b/internal/models/buffer_test.go
@ -0,0 +1,385 @@
+package models
+
+import (
+	"testing"
+	"time"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/metric"
+	"github.com/stretchr/testify/require"
+)
+
+type MockMetric struct {
+	telegraf.Metric
+	AcceptF func()
+	RejectF func()
+	DropF   func()
+}
+
+func (m *MockMetric) Accept() {
+	m.AcceptF()
+}
+
+func (m *MockMetric) Reject() {
+	m.RejectF()
+}
+
+func (m *MockMetric) Drop() {
+	m.DropF()
+}
+
+func Metric() telegraf.Metric {
+	m, err := metric.New(
+		"cpu",
+		map[string]string{},
+		map[string]interface{}{
+			"value": 42.0,
+		},
+		time.Unix(0, 0),
+	)
+	if err != nil {
+		panic(err)
+	}
+	return m
+}
+
+func BenchmarkAddMetrics(b *testing.B) {
+	buf := NewBuffer("test", 10000)
+	m := Metric()
+	for n := 0; n < b.N; n++ {
+		buf.Add(m)
+	}
+}
+
+func setup(b *Buffer) *Buffer {
+	b.MetricsAdded.Set(0)
+	b.MetricsWritten.Set(0)
+	b.MetricsDropped.Set(0)
+	return b
+}
+
+func TestBuffer_LenEmpty(t *testing.T) {
+	b := setup(NewBuffer("test", 5))
+
+	require.Equal(t, 0, b.Len())
+}
+
+func TestBuffer_LenOne(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m)
+
+	require.Equal(t, 1, b.Len())
+}
+
+func TestBuffer_LenFull(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m, m, m)
+
+	require.Equal(t, 5, b.Len())
+}
+
+func TestBuffer_LenOverfill(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	setup(b)
+	b.Add(m, m, m, m, m, m)
+
+	require.Equal(t, 5, b.Len())
+}
+
+func TestBuffer_BatchLenZero(t *testing.T) {
+	b := setup(NewBuffer("test", 5))
+	batch := b.Batch(0)
+
+	require.Len(t, batch, 0)
+}
+
+func TestBuffer_BatchLenBufferEmpty(t *testing.T) {
+	b := setup(NewBuffer("test", 5))
+	batch := b.Batch(2)
+
+	require.Len(t, batch, 0)
+}
+
+func TestBuffer_BatchLenUnderfill(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m)
+	batch := b.Batch(2)
+
+	require.Len(t, batch, 1)
+}
+
+func TestBuffer_BatchLenFill(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m)
+	batch := b.Batch(2)
+	require.Len(t, batch, 2)
+}
+
+func TestBuffer_BatchLenExact(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m)
+	batch := b.Batch(2)
+	require.Len(t, batch, 2)
+}
+
+func TestBuffer_BatchLenLargerThanBuffer(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(6)
+	require.Len(t, batch, 5)
+}
+
+func TestBuffer_BatchWrap(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(2)
+	b.Accept(batch)
+	b.Add(m, m)
+	batch = b.Batch(5)
+	require.Len(t, batch, 5)
+}
+
+func TestBuffer_AddDropsOverwrittenMetrics(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	b.Add(m, m, m, m, m)
+
+	require.Equal(t, int64(5), b.MetricsDropped.Get())
+	require.Equal(t, int64(0), b.MetricsWritten.Get())
+}
+
+func TestBuffer_AcceptRemovesBatch(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m)
+	batch := b.Batch(2)
+	b.Accept(batch)
+	require.Equal(t, 1, b.Len())
+}
+
+func TestBuffer_RejectLeavesBatch(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m)
+	batch := b.Batch(2)
+	b.Reject(batch)
+	require.Equal(t, 3, b.Len())
+}
+
+func TestBuffer_AcceptWritesOverwrittenBatch(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(5)
+	b.Add(m, m, m, m, m)
+	b.Accept(batch)
+
+	require.Equal(t, int64(0), b.MetricsDropped.Get())
+	require.Equal(t, int64(5), b.MetricsWritten.Get())
+}
+
+func TestBuffer_BatchRejectDropsOverwrittenBatch(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(5)
+	b.Add(m, m, m, m, m)
+	b.Reject(batch)
+
+	require.Equal(t, int64(5), b.MetricsDropped.Get())
+	require.Equal(t, int64(0), b.MetricsWritten.Get())
+}
+
+func TestBuffer_MetricsOverwriteBatchAccept(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(3)
+	b.Add(m, m, m)
+	b.Accept(batch)
+	require.Equal(t, int64(0), b.MetricsDropped.Get())
+	require.Equal(t, int64(3), b.MetricsWritten.Get())
+}
+
+func TestBuffer_MetricsOverwriteBatchReject(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(3)
+	b.Add(m, m, m)
+	b.Reject(batch)
+	require.Equal(t, int64(3), b.MetricsDropped.Get())
+	require.Equal(t, int64(0), b.MetricsWritten.Get())
+}
+
+func TestBuffer_MetricsBatchAcceptRemoved(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(3)
+	b.Add(m, m, m, m, m)
+	b.Accept(batch)
+	require.Equal(t, int64(2), b.MetricsDropped.Get())
+	require.Equal(t, int64(3), b.MetricsWritten.Get())
+}
+
+func TestBuffer_WrapWithBatch(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+
+	b.Add(m, m, m)
+	b.Batch(3)
+	b.Add(m, m, m, m, m, m)
+
+	require.Equal(t, int64(1), b.MetricsDropped.Get())
+}
+
+func TestBuffer_BatchNotRemoved(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m, m, m)
+	b.Batch(2)
+	require.Equal(t, 5, b.Len())
+}
+
+func TestBuffer_BatchRejectAcceptNoop(t *testing.T) {
+	m := Metric()
+	b := setup(NewBuffer("test", 5))
+	b.Add(m, m, m, m, m)
+	batch := b.Batch(2)
+	b.Reject(batch)
+	b.Accept(batch)
+	require.Equal(t, 5, b.Len())
+}
+
+func TestBuffer_AcceptCallsMetricAccept(t *testing.T) {
+	var accept int
+	mm := &MockMetric{
+		Metric: Metric(),
+		AcceptF: func() {
+			accept++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	b.Add(mm, mm, mm)
+	batch := b.Batch(2)
+	b.Accept(batch)
+	require.Equal(t, 2, accept)
+}
+
+func TestBuffer_AddCallsMetricRejectWhenNoBatch(t *testing.T) {
+	var reject int
+	mm := &MockMetric{
+		Metric: Metric(),
+		RejectF: func() {
+			reject++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	setup(b)
+	b.Add(mm, mm, mm, mm, mm)
+	b.Add(mm, mm)
+	require.Equal(t, 2, reject)
+}
+
+func TestBuffer_AddCallsMetricRejectWhenNotInBatch(t *testing.T) {
+	var reject int
+	mm := &MockMetric{
+		Metric: Metric(),
+		RejectF: func() {
+			reject++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	setup(b)
+	b.Add(mm, mm, mm, mm, mm)
+	batch := b.Batch(2)
+	b.Add(mm, mm, mm, mm)
+	// metric[2] and metric[3] rejected
+	require.Equal(t, 2, reject)
+	b.Reject(batch)
+	// metric[1] and metric[2] now rejected
+	require.Equal(t, 4, reject)
+}
+
+func TestBuffer_RejectCallsMetricRejectWithOverwritten(t *testing.T) {
+	var reject int
+	mm := &MockMetric{
+		Metric: Metric(),
+		RejectF: func() {
+			reject++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	b.Add(mm, mm, mm, mm, mm)
+	batch := b.Batch(5)
+	b.Add(mm, mm)
+	require.Equal(t, 0, reject)
+	b.Reject(batch)
+	require.Equal(t, 2, reject)
+}
+
+func TestBuffer_AddOverwriteAndReject(t *testing.T) {
+	var reject int
+	mm := &MockMetric{
+		Metric: Metric(),
+		RejectF: func() {
+			reject++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	b.Add(mm, mm, mm, mm, mm)
+	batch := b.Batch(5)
+	b.Add(mm, mm, mm, mm, mm)
+	b.Add(mm, mm, mm, mm, mm)
+	b.Add(mm, mm, mm, mm, mm)
+	b.Add(mm, mm, mm, mm, mm)
+	require.Equal(t, 15, reject)
+	b.Reject(batch)
+	require.Equal(t, 20, reject)
+}
+
+func TestBuffer_AddOverwriteAndRejectOffset(t *testing.T) {
+	var reject int
+	var accept int
+	mm := &MockMetric{
+		Metric: Metric(),
+		RejectF: func() {
+			reject++
+		},
+		AcceptF: func() {
+			accept++
+		},
+	}
+	b := setup(NewBuffer("test", 5))
+	b.Add(mm, mm, mm)
+	b.Add(mm, mm, mm, mm)
+	require.Equal(t, 2, reject)
+	batch := b.Batch(5)
+	b.Add(mm, mm, mm, mm)
+	require.Equal(t, 2, reject)
+	b.Add(mm, mm, mm, mm)
+	require.Equal(t, 5, reject)
+	b.Add(mm, mm, mm, mm)
+	require.Equal(t, 9, reject)
+	b.Add(mm, mm, mm, mm)
+	require.Equal(t, 13, reject)
+	b.Accept(batch)
+	require.Equal(t, 13, reject)
+	require.Equal(t, 5, accept)
+}
--- a/internal/models/filter_test.go
+++ b/internal/models/filter_test.go
@ -6,6 +6,7 @@ import (

 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/metric"
+	"github.com/influxdata/telegraf/testutil"
 	"github.com/stretchr/testify/require"
 )

@ -480,3 +481,45 @@ func TestFilter_FilterTagsPassAndDrop(t *testing.T) {
 	}

 }
+
+func BenchmarkFilter(b *testing.B) {
+	tests := []struct {
+		name   string
+		filter Filter
+		metric telegraf.Metric
+	}{
+		{
+			name:   "empty filter",
+			filter: Filter{},
+			metric: testutil.MustMetric("cpu",
+				map[string]string{},
+				map[string]interface{}{
+					"value": 42,
+				},
+				time.Unix(0, 0),
+			),
+		},
+		{
+			name: "namepass",
+			filter: Filter{
+				NamePass: []string{"cpu"},
+			},
+			metric: testutil.MustMetric("cpu",
+				map[string]string{},
+				map[string]interface{}{
+					"value": 42,
+				},
+				time.Unix(0, 0),
+			),
+		},
+	}
+
+	for _, tt := range tests {
+		b.Run(tt.name, func(b *testing.B) {
+			require.NoError(b, tt.filter.Compile())
+			for n := 0; n < b.N; n++ {
+				tt.filter.Select(tt.metric)
+			}
+		})
+	}
+}
--- a/internal/models/running_aggregator.go
+++ b/internal/models/running_aggregator.go
@ -1,30 +1,53 @@
 package models

 import (
-	"log"
+	"sync"
 	"time"

 	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/selfstat"
 )

 type RunningAggregator struct {
-	a      telegraf.Aggregator
-	Config *AggregatorConfig
-
-	metrics chan telegraf.Metric
-
+	sync.Mutex
+	Aggregator  telegraf.Aggregator
+	Config      *AggregatorConfig
 	periodStart time.Time
 	periodEnd   time.Time
+
+	MetricsPushed   selfstat.Stat
+	MetricsFiltered selfstat.Stat
+	MetricsDropped  selfstat.Stat
+	PushTime        selfstat.Stat
 }

 func NewRunningAggregator(
-	a telegraf.Aggregator,
-	conf *AggregatorConfig,
+	aggregator telegraf.Aggregator,
+	config *AggregatorConfig,
 ) *RunningAggregator {
 	return &RunningAggregator{
-		a:       a,
-		Config:  conf,
-		metrics: make(chan telegraf.Metric, 100),
+		Aggregator: aggregator,
+		Config:     config,
+		MetricsPushed: selfstat.Register(
+			"aggregate",
+			"metrics_pushed",
+			map[string]string{"aggregator": config.Name},
+		),
+		MetricsFiltered: selfstat.Register(
+			"aggregate",
+			"metrics_filtered",
+			map[string]string{"aggregator": config.Name},
+		),
+		MetricsDropped: selfstat.Register(
+			"aggregate",
+			"metrics_dropped",
+			map[string]string{"aggregator": config.Name},
+		),
+		PushTime: selfstat.Register(
+			"aggregate",
+			"push_time_ns",
+			map[string]string{"aggregator": config.Name},
+		),
 	}
 }

@ -46,6 +69,15 @@ func (r *RunningAggregator) Name() string {
 	return "aggregators." + r.Config.Name
 }

+func (r *RunningAggregator) Period() time.Duration {
+	return r.Config.Period
+}
+
+func (r *RunningAggregator) SetPeriodStart(start time.Time) {
+	r.periodStart = start
+	r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
+}
+
 func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
 	m := makemetric(
 		metric,
@ -59,9 +91,21 @@ func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric {
 		m.SetAggregate(true)
 	}

+	r.MetricsPushed.Incr(1)
+
 	return m
 }

+func (r *RunningAggregator) metricFiltered(metric telegraf.Metric) {
+	r.MetricsFiltered.Incr(1)
+	metric.Accept()
+}
+
+func (r *RunningAggregator) metricDropped(metric telegraf.Metric) {
+	r.MetricsDropped.Incr(1)
+	metric.Accept()
+}
+
 // Add a metric to the aggregator and return true if the original metric
 // should be dropped.
 func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
@ -74,75 +118,31 @@ func (r *RunningAggregator) Add(metric telegraf.Metric) bool {
 		return r.Config.DropOriginal
 	}

-	r.metrics <- metric
+	r.Lock()
+	defer r.Unlock()

+	if r.periodStart.IsZero() || metric.Time().Before(r.periodStart) || metric.Time().After(r.periodEnd) {
+		r.metricDropped(metric)
+		return false
+	}
+
+	r.Aggregator.Add(metric)
 	return r.Config.DropOriginal
 }

-func (r *RunningAggregator) add(in telegraf.Metric) {
-	r.a.Add(in)
+func (r *RunningAggregator) Push(acc telegraf.Accumulator) {
+	r.Lock()
+	defer r.Unlock()
+
+	r.periodStart = r.periodEnd
+	r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay)
+	r.push(acc)
+	r.Aggregator.Reset()
 }

 func (r *RunningAggregator) push(acc telegraf.Accumulator) {
-	r.a.Push(acc)
-}
-
-func (r *RunningAggregator) reset() {
-	r.a.Reset()
-}
-
-// Run runs the running aggregator, listens for incoming metrics, and waits
-// for period ticks to tell it when to push and reset the aggregator.
-func (r *RunningAggregator) Run(
-	acc telegraf.Accumulator,
-	shutdown chan struct{},
-) {
-	// The start of the period is truncated to the nearest second.
-	//
-	// Every metric then gets it's timestamp checked and is dropped if it
-	// is not within:
-	//
-	//   start < t < end + truncation + delay
-	//
-	// So if we start at now = 00:00.2 with a 10s period and 0.3s delay:
-	//   now = 00:00.2
-	//   start = 00:00
-	//   truncation = 00:00.2
-	//   end = 00:10
-	// 1st interval: 00:00 - 00:10.5
-	// 2nd interval: 00:10 - 00:20.5
-	// etc.
-	//
-	now := time.Now()
-	r.periodStart = now.Truncate(time.Second)
-	truncation := now.Sub(r.periodStart)
-	r.periodEnd = r.periodStart.Add(r.Config.Period)
-	time.Sleep(r.Config.Delay)
-	periodT := time.NewTicker(r.Config.Period)
-	defer periodT.Stop()
-
-	for {
-		select {
-		case <-shutdown:
-			if len(r.metrics) > 0 {
-				// wait until metrics are flushed before exiting
-				continue
-			}
-			return
-		case m := <-r.metrics:
-			if m.Time().Before(r.periodStart) ||
-				m.Time().After(r.periodEnd.Add(truncation).Add(r.Config.Delay)) {
-				// the metric is outside the current aggregation period, so
-				// skip it.
-				log.Printf("D! aggregator: metric \"%s\" is not in the current timewindow, skipping", m.Name())
-				continue
-			}
-			r.add(m)
-		case <-periodT.C:
-			r.periodStart = r.periodEnd
-			r.periodEnd = r.periodStart.Add(r.Config.Period)
-			r.push(acc)
-			r.reset()
-		}
-	}
+	start := time.Now()
+	r.Aggregator.Push(acc)
+	elapsed := time.Since(start)
+	r.PushTime.Incr(elapsed.Nanoseconds())
 }
--- a/internal/models/running_aggregator_test.go
+++ b/internal/models/running_aggregator_test.go
@ -1,16 +1,13 @@
 package models

 import (
-	"sync"
 	"sync/atomic"
 	"testing"
 	"time"

 	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/metric"
 	"github.com/influxdata/telegraf/testutil"

-	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )

@ -23,28 +20,24 @@ func TestAdd(t *testing.T) {
 		},
 		Period: time.Millisecond * 500,
 	})
-	assert.NoError(t, ra.Config.Filter.Compile())
+	require.NoError(t, ra.Config.Filter.Compile())
 	acc := testutil.Accumulator{}
-	go ra.Run(&acc, make(chan struct{}))

-	m, err := metric.New("RITest",
+	now := time.Now()
+	ra.SetPeriodStart(now)
+
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
 		time.Now().Add(time.Millisecond*150),
 		telegraf.Untyped)
-	require.NoError(t, err)
+	require.False(t, ra.Add(m))
+	ra.Push(&acc)

-	assert.False(t, ra.Add(m))
-
-	for {
-		time.Sleep(time.Millisecond)
-		if atomic.LoadInt64(&a.sum) > 0 {
-			break
-		}
-	}
-	assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
+	require.Equal(t, 1, len(acc.Metrics))
+	require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
 }

 func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
@ -56,50 +49,45 @@ func TestAddMetricsOutsideCurrentPeriod(t *testing.T) {
 		},
 		Period: time.Millisecond * 500,
 	})
-	assert.NoError(t, ra.Config.Filter.Compile())
+	require.NoError(t, ra.Config.Filter.Compile())
 	acc := testutil.Accumulator{}
-	go ra.Run(&acc, make(chan struct{}))
+	now := time.Now()
+	ra.SetPeriodStart(now)

-	m, err := metric.New("RITest",
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
-		time.Now().Add(-time.Hour),
-		telegraf.Untyped)
-	require.NoError(t, err)
-
-	assert.False(t, ra.Add(m))
+		now.Add(-time.Hour),
+		telegraf.Untyped,
+	)
+	require.False(t, ra.Add(m))

 	// metric after current period
-	m, err = metric.New("RITest",
+	m = testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
-		time.Now().Add(time.Hour),
-		telegraf.Untyped)
-	require.NoError(t, err)
-	assert.False(t, ra.Add(m))
+		now.Add(time.Hour),
+		telegraf.Untyped,
+	)
+	require.False(t, ra.Add(m))

 	// "now" metric
-	m, err = metric.New("RITest",
+	m = testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
 		time.Now().Add(time.Millisecond*50),
 		telegraf.Untyped)
-	require.NoError(t, err)
-	assert.False(t, ra.Add(m))
+	require.False(t, ra.Add(m))

-	for {
-		time.Sleep(time.Millisecond)
-		if atomic.LoadInt64(&a.sum) > 0 {
-			break
-		}
-	}
-	assert.Equal(t, int64(101), atomic.LoadInt64(&a.sum))
+	ra.Push(&acc)
+	require.Equal(t, 1, len(acc.Metrics))
+	require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"])
 }

 func TestAddAndPushOnePeriod(t *testing.T) {
@ -111,37 +99,24 @@ func TestAddAndPushOnePeriod(t *testing.T) {
 		},
 		Period: time.Millisecond * 500,
 	})
-	assert.NoError(t, ra.Config.Filter.Compile())
+	require.NoError(t, ra.Config.Filter.Compile())
 	acc := testutil.Accumulator{}
-	shutdown := make(chan struct{})

-	var wg sync.WaitGroup
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		ra.Run(&acc, shutdown)
-	}()
+	now := time.Now()
+	ra.SetPeriodStart(now)

-	m, err := metric.New("RITest",
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
 		time.Now().Add(time.Millisecond*100),
 		telegraf.Untyped)
-	require.NoError(t, err)
-	assert.False(t, ra.Add(m))
+	require.False(t, ra.Add(m))
+
+	ra.Push(&acc)

-	for {
-		time.Sleep(time.Millisecond)
-		if acc.NMetrics() > 0 {
-			break
-		}
-	}
 	acc.AssertContainsFields(t, "TestMetric", map[string]interface{}{"sum": int64(101)})
-
-	close(shutdown)
-	wg.Wait()
 }

 func TestAddDropOriginal(t *testing.T) {
@ -152,28 +127,29 @@ func TestAddDropOriginal(t *testing.T) {
 		},
 		DropOriginal: true,
 	})
-	assert.NoError(t, ra.Config.Filter.Compile())
+	require.NoError(t, ra.Config.Filter.Compile())

-	m, err := metric.New("RITest",
+	now := time.Now()
+	ra.SetPeriodStart(now)
+
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
-		time.Now(),
+		now,
 		telegraf.Untyped)
-	require.NoError(t, err)
-	assert.True(t, ra.Add(m))
+	require.True(t, ra.Add(m))

 	// this metric name doesn't match the filter, so Add will return false
-	m2, err := metric.New("foobar",
+	m2 := testutil.MustMetric("foobar",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
-		time.Now(),
+		now,
 		telegraf.Untyped)
-	require.NoError(t, err)
-	assert.False(t, ra.Add(m2))
+	require.False(t, ra.Add(m2))
 }

 type TestAggregator struct {
--- a/internal/models/running_input.go
+++ b/internal/models/running_input.go
@ -1,11 +1,9 @@
 package models

 import (
-	"fmt"
 	"time"

 	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/plugins/serializers/influx"
 	"github.com/influxdata/telegraf/selfstat"
 )

@ -15,16 +13,13 @@ type RunningInput struct {
 	Input  telegraf.Input
 	Config *InputConfig

-	trace       bool
 	defaultTags map[string]string

 	MetricsGathered selfstat.Stat
+	GatherTime      selfstat.Stat
 }

-func NewRunningInput(
-	input telegraf.Input,
-	config *InputConfig,
-) *RunningInput {
+func NewRunningInput(input telegraf.Input, config *InputConfig) *RunningInput {
 	return &RunningInput{
 		Input:  input,
 		Config: config,
@ -33,6 +28,11 @@ func NewRunningInput(
 			"metrics_gathered",
 			map[string]string{"input": config.Name},
 		),
+		GatherTime: selfstat.RegisterTiming(
+			"gather",
+			"gather_time_ns",
+			map[string]string{"input": config.Name},
+		),
 	}
 }

@ -52,13 +52,19 @@ func (r *RunningInput) Name() string {
 	return "inputs." + r.Config.Name
 }

+func (r *RunningInput) metricFiltered(metric telegraf.Metric) {
+	metric.Drop()
+}
+
 func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
 	if ok := r.Config.Filter.Select(metric); !ok {
+		r.metricFiltered(metric)
 		return nil
 	}

 	r.Config.Filter.Modify(metric)
 	if len(metric.FieldList()) == 0 {
+		r.metricFiltered(metric)
 		return nil
 	}

@ -70,26 +76,17 @@ func (r *RunningInput) MakeMetric(metric telegraf.Metric) telegraf.Metric {
 		r.Config.Tags,
 		r.defaultTags)

-	if r.trace && m != nil {
-		s := influx.NewSerializer()
-		s.SetFieldSortOrder(influx.SortFields)
-		octets, err := s.Serialize(m)
-		if err == nil {
-			fmt.Print("> " + string(octets))
-		}
-	}
-
 	r.MetricsGathered.Incr(1)
 	GlobalMetricsGathered.Incr(1)
 	return m
 }

-func (r *RunningInput) Trace() bool {
-	return r.trace
-}
-
-func (r *RunningInput) SetTrace(trace bool) {
-	r.trace = trace
+func (r *RunningInput) Gather(acc telegraf.Accumulator) error {
+	start := time.Now()
+	err := r.Input.Gather(acc)
+	elapsed := time.Since(start)
+	r.GatherTime.Incr(elapsed.Nanoseconds())
+	return err
 }

 func (r *RunningInput) SetDefaultTags(tags map[string]string) {
--- a/internal/models/running_input_test.go
+++ b/internal/models/running_input_test.go
@ -6,6 +6,7 @@ import (

 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/metric"
+	"github.com/influxdata/telegraf/testutil"

 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@ -66,17 +67,13 @@ func TestMakeMetricWithPluginTags(t *testing.T) {
 		},
 	})

-	ri.SetTrace(true)
-	assert.Equal(t, true, ri.Trace())
-
-	m, err := metric.New("RITest",
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
 		now,
 		telegraf.Untyped)
-	require.NoError(t, err)
 	m = ri.MakeMetric(m)

 	expected, err := metric.New("RITest",
@ -102,8 +99,6 @@ func TestMakeMetricFilteredOut(t *testing.T) {
 		Filter: Filter{NamePass: []string{"foobar"}},
 	})

-	ri.SetTrace(true)
-	assert.Equal(t, true, ri.Trace())
 	assert.NoError(t, ri.Config.Filter.Compile())

 	m, err := metric.New("RITest",
@ -127,17 +122,13 @@ func TestMakeMetricWithDaemonTags(t *testing.T) {
 		"foo": "bar",
 	})

-	ri.SetTrace(true)
-	assert.Equal(t, true, ri.Trace())
-
-	m, err := metric.New("RITest",
+	m := testutil.MustMetric("RITest",
 		map[string]string{},
 		map[string]interface{}{
 			"value": int64(101),
 		},
 		now,
 		telegraf.Untyped)
-	require.NoError(t, err)
 	m = ri.MakeMetric(m)
 	expected, err := metric.New("RITest",
 		map[string]string{
--- a/internal/models/running_output.go
+++ b/internal/models/running_output.go
@ -6,7 +6,6 @@ import (
 	"time"

 	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/internal/buffer"
 	"github.com/influxdata/telegraf/selfstat"
 )

@ -18,6 +17,16 @@ const (
 	DEFAULT_METRIC_BUFFER_LIMIT = 10000
 )

+// OutputConfig containing name and filter
+type OutputConfig struct {
+	Name   string
+	Filter Filter
+
+	FlushInterval     time.Duration
+	MetricBufferLimit int
+	MetricBatchSize   int
+}
+
 // RunningOutput contains the output configuration
 type RunningOutput struct {
 	Name              string
@ -27,24 +36,16 @@ type RunningOutput struct {
 	MetricBatchSize   int

 	MetricsFiltered selfstat.Stat
-	MetricsWritten  selfstat.Stat
 	BufferSize      selfstat.Stat
 	BufferLimit     selfstat.Stat
 	WriteTime       selfstat.Stat

-	metrics     *buffer.Buffer
-	failMetrics *buffer.Buffer
+	batch      []telegraf.Metric
+	buffer     *Buffer
+	BatchReady chan time.Time

-	// Guards against concurrent calls to Add, Push, Reset
-	aggMutex sync.Mutex
-	// Guards against concurrent calls to the Output as described in #3009
-	writeMutex sync.Mutex
-}
-
-// OutputConfig containing name and filter
-type OutputConfig struct {
-	Name   string
-	Filter Filter
+	aggMutex   sync.Mutex
+	batchMutex sync.Mutex
 }

 func NewRunningOutput(
@ -54,25 +55,27 @@ func NewRunningOutput(
 	batchSize int,
 	bufferLimit int,
 ) *RunningOutput {
+	if conf.MetricBufferLimit > 0 {
+		bufferLimit = conf.MetricBufferLimit
+	}
 	if bufferLimit == 0 {
 		bufferLimit = DEFAULT_METRIC_BUFFER_LIMIT
 	}
+	if conf.MetricBatchSize > 0 {
+		batchSize = conf.MetricBatchSize
+	}
 	if batchSize == 0 {
 		batchSize = DEFAULT_METRIC_BATCH_SIZE
 	}
 	ro := &RunningOutput{
 		Name:              name,
-		metrics:           buffer.NewBuffer(batchSize),
-		failMetrics:       buffer.NewBuffer(bufferLimit),
+		batch:             make([]telegraf.Metric, 0, batchSize),
+		buffer:            NewBuffer(name, bufferLimit),
+		BatchReady:        make(chan time.Time, 1),
 		Output:            output,
 		Config:            conf,
 		MetricBufferLimit: bufferLimit,
 		MetricBatchSize:   batchSize,
-		MetricsWritten: selfstat.Register(
-			"write",
-			"metrics_written",
-			map[string]string{"output": name},
-		),
 		MetricsFiltered: selfstat.Register(
 			"write",
 			"metrics_filtered",
@ -94,20 +97,28 @@ func NewRunningOutput(
 			map[string]string{"output": name},
 		),
 	}
+
 	ro.BufferLimit.Set(int64(ro.MetricBufferLimit))
 	return ro
 }

-// AddMetric adds a metric to the output. This function can also write cached
-// points if FlushBufferWhenFull is true.
+func (ro *RunningOutput) metricFiltered(metric telegraf.Metric) {
+	ro.MetricsFiltered.Incr(1)
+	metric.Drop()
+}
+
+// AddMetric adds a metric to the output.
+//
+// Takes ownership of metric
 func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
 	if ok := ro.Config.Filter.Select(metric); !ok {
-		ro.MetricsFiltered.Incr(1)
+		ro.metricFiltered(metric)
 		return
 	}

 	ro.Config.Filter.Modify(metric)
 	if len(metric.FieldList()) == 0 {
+		ro.metricFiltered(metric)
 		return
 	}

@ -118,85 +129,98 @@ func (ro *RunningOutput) AddMetric(metric telegraf.Metric) {
 		return
 	}

-	ro.metrics.Add(metric)
-	if ro.metrics.Len() == ro.MetricBatchSize {
-		batch := ro.metrics.Batch(ro.MetricBatchSize)
-		err := ro.write(batch)
-		if err != nil {
-			ro.failMetrics.Add(batch...)
-			log.Printf("E! Error writing to output [%s]: %v", ro.Name, err)
+	ro.batchMutex.Lock()
+
+	ro.batch = append(ro.batch, metric)
+	if len(ro.batch) == ro.MetricBatchSize {
+		ro.addBatchToBuffer()
+
+		nBuffer := ro.buffer.Len()
+		ro.BufferSize.Set(int64(nBuffer))
+
+		select {
+		case ro.BatchReady <- time.Now():
+		default:
 		}
 	}
+
+	ro.batchMutex.Unlock()
 }

-// Write writes all cached points to this output.
+// AddBatchToBuffer moves the metrics from the batch into the metric buffer.
+func (ro *RunningOutput) addBatchToBuffer() {
+	ro.buffer.Add(ro.batch...)
+	ro.batch = ro.batch[:0]
+}
+
+// Write writes all metrics to the output, stopping when all have been sent on
+// or error.
 func (ro *RunningOutput) Write() error {
 	if output, ok := ro.Output.(telegraf.AggregatingOutput); ok {
 		ro.aggMutex.Lock()
 		metrics := output.Push()
-		ro.metrics.Add(metrics...)
+		ro.buffer.Add(metrics...)
 		output.Reset()
 		ro.aggMutex.Unlock()
 	}
+	// add and write can be called concurrently
+	ro.batchMutex.Lock()
+	ro.addBatchToBuffer()
+	ro.batchMutex.Unlock()

-	nFails, nMetrics := ro.failMetrics.Len(), ro.metrics.Len()
-	ro.BufferSize.Set(int64(nFails + nMetrics))
-	log.Printf("D! Output [%s] buffer fullness: %d / %d metrics. ",
-		ro.Name, nFails+nMetrics, ro.MetricBufferLimit)
-	var err error
-	if !ro.failMetrics.IsEmpty() {
-		// how many batches of failed writes we need to write.
-		nBatches := nFails/ro.MetricBatchSize + 1
-		batchSize := ro.MetricBatchSize
+	nBuffer := ro.buffer.Len()

-		for i := 0; i < nBatches; i++ {
-			// If it's the last batch, only grab the metrics that have not had
-			// a write attempt already (this is primarily to preserve order).
-			if i == nBatches-1 {
-				batchSize = nFails % ro.MetricBatchSize
-			}
-			batch := ro.failMetrics.Batch(batchSize)
-			// If we've already failed previous writes, don't bother trying to
-			// write to this output again. We are not exiting the loop just so
-			// that we can rotate the metrics to preserve order.
-			if err == nil {
-				err = ro.write(batch)
-			}
-			if err != nil {
-				ro.failMetrics.Add(batch...)
-			}
+	// Only process the metrics in the buffer now.  Metrics added while we are
+	// writing will be sent on the next call.
+	nBatches := nBuffer/ro.MetricBatchSize + 1
+	for i := 0; i < nBatches; i++ {
+		batch := ro.buffer.Batch(ro.MetricBatchSize)
+		if len(batch) == 0 {
+			break
 		}
-	}

-	batch := ro.metrics.Batch(ro.MetricBatchSize)
-	// see comment above about not trying to write to an already failed output.
-	// if ro.failMetrics is empty then err will always be nil at this point.
-	if err == nil {
-		err = ro.write(batch)
-	}
-
-	if err != nil {
-		ro.failMetrics.Add(batch...)
-		return err
+		err := ro.write(batch)
+		if err != nil {
+			ro.buffer.Reject(batch)
+			return err
+		}
+		ro.buffer.Accept(batch)
 	}
 	return nil
 }

-func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
-	nMetrics := len(metrics)
-	if nMetrics == 0 {
+// WriteBatch writes only the batch metrics to the output.
+func (ro *RunningOutput) WriteBatch() error {
+	batch := ro.buffer.Batch(ro.MetricBatchSize)
+	if len(batch) == 0 {
 		return nil
 	}
-	ro.writeMutex.Lock()
-	defer ro.writeMutex.Unlock()
+
+	err := ro.write(batch)
+	if err != nil {
+		ro.buffer.Reject(batch)
+		return err
+	}
+	ro.buffer.Accept(batch)
+
+	return nil
+}
+
+func (ro *RunningOutput) write(metrics []telegraf.Metric) error {
 	start := time.Now()
 	err := ro.Output.Write(metrics)
 	elapsed := time.Since(start)
+	ro.WriteTime.Incr(elapsed.Nanoseconds())
+
 	if err == nil {
-		log.Printf("D! Output [%s] wrote batch of %d metrics in %s\n",
-			ro.Name, nMetrics, elapsed)
-		ro.MetricsWritten.Incr(int64(nMetrics))
-		ro.WriteTime.Incr(elapsed.Nanoseconds())
+		log.Printf("D! [outputs.%s] wrote batch of %d metrics in %s\n",
+			ro.Name, len(metrics), elapsed)
 	}
 	return err
 }
+
+func (ro *RunningOutput) LogBufferStatus() {
+	nBuffer := ro.buffer.Len()
+	log.Printf("D! [outputs.%s] buffer fullness: %d / %d metrics. ",
+		ro.Name, nBuffer, ro.MetricBufferLimit)
+}
--- a/internal/models/running_output_test.go
+++ b/internal/models/running_output_test.go
@ -231,56 +231,6 @@ func TestRunningOutputDefault(t *testing.T) {
 	assert.Len(t, m.Metrics(), 10)
 }

-// Test that running output doesn't flush until it's full when
-// FlushBufferWhenFull is set.
-func TestRunningOutputFlushWhenFull(t *testing.T) {
-	conf := &OutputConfig{
-		Filter: Filter{},
-	}
-
-	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf, 6, 10)
-
-	// Fill buffer to 1 under limit
-	for _, metric := range first5 {
-		ro.AddMetric(metric)
-	}
-	// no flush yet
-	assert.Len(t, m.Metrics(), 0)
-
-	// add one more metric
-	ro.AddMetric(next5[0])
-	// now it flushed
-	assert.Len(t, m.Metrics(), 6)
-
-	// add one more metric and write it manually
-	ro.AddMetric(next5[1])
-	err := ro.Write()
-	assert.NoError(t, err)
-	assert.Len(t, m.Metrics(), 7)
-}
-
-// Test that running output doesn't flush until it's full when
-// FlushBufferWhenFull is set, twice.
-func TestRunningOutputMultiFlushWhenFull(t *testing.T) {
-	conf := &OutputConfig{
-		Filter: Filter{},
-	}
-
-	m := &mockOutput{}
-	ro := NewRunningOutput("test", m, conf, 4, 12)
-
-	// Fill buffer past limit twive
-	for _, metric := range first5 {
-		ro.AddMetric(metric)
-	}
-	for _, metric := range next5 {
-		ro.AddMetric(metric)
-	}
-	// flushed twice
-	assert.Len(t, m.Metrics(), 8)
-}
-
 func TestRunningOutputWriteFail(t *testing.T) {
 	conf := &OutputConfig{
 		Filter: Filter{},
--- a/internal/models/running_processor.go
+++ b/internal/models/running_processor.go
@ -27,6 +27,19 @@ type ProcessorConfig struct {
 	Filter Filter
 }

+func (rp *RunningProcessor) metricFiltered(metric telegraf.Metric) {
+	metric.Drop()
+}
+
+func containsMetric(item telegraf.Metric, metrics []telegraf.Metric) bool {
+	for _, m := range metrics {
+		if item == m {
+			return true
+		}
+	}
+	return false
+}
+
 func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {
 	rp.Lock()
 	defer rp.Unlock()
@ -43,6 +56,7 @@ func (rp *RunningProcessor) Apply(in ...telegraf.Metric) []telegraf.Metric {

 		rp.Config.Filter.Modify(metric)
 		if len(metric.FieldList()) == 0 {
+			rp.metricFiltered(metric)
 			continue
 		}

--- a/internal/models/running_processor_test.go
+++ b/internal/models/running_processor_test.go
@ -6,7 +6,7 @@ import (
 	"time"

 	"github.com/influxdata/telegraf"
-	"github.com/influxdata/telegraf/metric"
+	"github.com/influxdata/telegraf/testutil"

 	"github.com/stretchr/testify/require"
 )
@ -41,20 +41,6 @@ func TagProcessor(key, value string) *MockProcessor {
 	}
 }

-func Metric(
-	name string,
-	tags map[string]string,
-	fields map[string]interface{},
-	tm time.Time,
-	tp ...telegraf.ValueType,
-) telegraf.Metric {
-	m, err := metric.New(name, tags, fields, tm, tp...)
-	if err != nil {
-		panic(err)
-	}
-	return m
-}
-
 func TestRunningProcessor_Apply(t *testing.T) {
 	type args struct {
 		Processor telegraf.Processor
@ -76,7 +62,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				},
 			},
 			input: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{},
 					map[string]interface{}{
@ -86,7 +72,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				),
 			},
 			expected: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{
 						"apply": "true",
@ -109,7 +95,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				},
 			},
 			input: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{},
 					map[string]interface{}{
@ -119,7 +105,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				),
 			},
 			expected: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{
 						"apply": "true",
@ -142,7 +128,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				},
 			},
 			input: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{},
 					map[string]interface{}{
@ -152,7 +138,7 @@ func TestRunningProcessor_Apply(t *testing.T) {
 				),
 			},
 			expected: []telegraf.Metric{
-				Metric(
+				testutil.MustMetric(
 					"cpu",
 					map[string]string{},
 					map[string]interface{}{
--- a/metric.go
+++ b/metric.go
@ -62,6 +62,17 @@ type Metric interface {
 	// Copy returns a deep copy of the Metric.
 	Copy() Metric

+	// Accept marks the metric as processed successfully and written to an
+	// output.
+	Accept()
+
+	// Reject marks the metric as processed unsuccessfully.
+	Reject()
+
+	// Drop marks the metric as processed successfully without being written
+	// to any output.
+	Drop()
+
 	// Mark Metric as an aggregate
 	SetAggregate(bool)
 	IsAggregate() bool
--- a/metric/metric.go
+++ b/metric/metric.go
@ -248,6 +248,15 @@ func (m *metric) HashID() uint64 {
 	return h.Sum64()
 }

+func (m *metric) Accept() {
+}
+
+func (m *metric) Reject() {
+}
+
+func (m *metric) Drop() {
+}
+
 // Convert field to a supported type or nil if unconvertible
 func convertField(v interface{}) interface{} {
 	switch v := v.(type) {
--- a/metric/tracking.go
+++ b/metric/tracking.go
@ -0,0 +1,171 @@
+package metric
+
+import (
+	"log"
+	"runtime"
+	"sync/atomic"
+
+	"github.com/influxdata/telegraf"
+)
+
+// NotifyFunc is called when a tracking metric is done being processed with
+// the tracking information.
+type NotifyFunc = func(track telegraf.DeliveryInfo)
+
+// WithTracking adds tracking to the metric and registers the notify function
+// to be called when processing is complete.
+func WithTracking(metric telegraf.Metric, fn NotifyFunc) (telegraf.Metric, telegraf.TrackingID) {
+	return newTrackingMetric(metric, fn)
+}
+
+// WithBatchTracking adds tracking to the metrics and registers the notify
+// function to be called when processing is complete.
+func WithGroupTracking(metric []telegraf.Metric, fn NotifyFunc) ([]telegraf.Metric, telegraf.TrackingID) {
+	return newTrackingMetricGroup(metric, fn)
+}
+
+func EnableDebugFinalizer() {
+	finalizer = debugFinalizer
+}
+
+var (
+	lastID    uint64
+	finalizer func(*trackingData)
+)
+
+func newTrackingID() telegraf.TrackingID {
+	atomic.AddUint64(&lastID, 1)
+	return telegraf.TrackingID(lastID)
+}
+
+func debugFinalizer(d *trackingData) {
+	rc := atomic.LoadInt32(&d.rc)
+	if rc != 0 {
+		log.Fatalf("E! [agent] metric collected with non-zero reference count rc: %d", rc)
+	}
+}
+
+type trackingData struct {
+	id          telegraf.TrackingID
+	rc          int32
+	acceptCount int32
+	rejectCount int32
+	notify      NotifyFunc
+}
+
+func (d *trackingData) incr() {
+	atomic.AddInt32(&d.rc, 1)
+}
+
+func (d *trackingData) decr() int32 {
+	return atomic.AddInt32(&d.rc, -1)
+}
+
+func (d *trackingData) accept() {
+	atomic.AddInt32(&d.acceptCount, 1)
+}
+
+func (d *trackingData) reject() {
+	atomic.AddInt32(&d.rejectCount, 1)
+}
+
+type trackingMetric struct {
+	telegraf.Metric
+	d *trackingData
+}
+
+func newTrackingMetric(metric telegraf.Metric, fn NotifyFunc) (telegraf.Metric, telegraf.TrackingID) {
+	m := &trackingMetric{
+		Metric: metric,
+		d: &trackingData{
+			id:          newTrackingID(),
+			rc:          1,
+			acceptCount: 0,
+			rejectCount: 0,
+			notify:      fn,
+		},
+	}
+
+	if finalizer != nil {
+		runtime.SetFinalizer(m.d, finalizer)
+	}
+	return m, m.d.id
+}
+
+func newTrackingMetricGroup(group []telegraf.Metric, fn NotifyFunc) ([]telegraf.Metric, telegraf.TrackingID) {
+	d := &trackingData{
+		id:          newTrackingID(),
+		rc:          0,
+		acceptCount: 0,
+		rejectCount: 0,
+		notify:      fn,
+	}
+
+	for i, m := range group {
+		d.incr()
+		dm := &trackingMetric{
+			Metric: m,
+			d:      d,
+		}
+		group[i] = dm
+
+	}
+	if finalizer != nil {
+		runtime.SetFinalizer(d, finalizer)
+	}
+
+	return group, d.id
+}
+
+func (m *trackingMetric) Copy() telegraf.Metric {
+	m.d.incr()
+	return &trackingMetric{
+		Metric: m.Metric.Copy(),
+		d:      m.d,
+	}
+}
+
+func (m *trackingMetric) Accept() {
+	m.d.accept()
+	m.decr()
+}
+
+func (m *trackingMetric) Reject() {
+	m.d.reject()
+	m.decr()
+}
+
+func (m *trackingMetric) Drop() {
+	m.decr()
+}
+
+func (m *trackingMetric) decr() {
+	v := m.d.decr()
+	if v < 0 {
+		panic("negative refcount")
+	}
+
+	if v == 0 {
+		m.d.notify(
+			&deliveryInfo{
+				id:       m.d.id,
+				accepted: int(m.d.acceptCount),
+				rejected: int(m.d.rejectCount),
+			},
+		)
+	}
+}
+
+type deliveryInfo struct {
+	id       telegraf.TrackingID
+	accepted int
+	rejected int
+}
+
+func (r *deliveryInfo) ID() telegraf.TrackingID {
+	return r.id
+}
+
+func (r *deliveryInfo) Delivered() bool {
+	return r.rejected == 0
+}
--- a/metric/tracking_test.go
+++ b/metric/tracking_test.go
@ -0,0 +1,260 @@
+package metric
+
+import (
+	"testing"
+	"time"
+
+	"github.com/influxdata/telegraf"
+	"github.com/stretchr/testify/require"
+)
+
+func mustMetric(
+	name string,
+	tags map[string]string,
+	fields map[string]interface{},
+	tm time.Time,
+	tp ...telegraf.ValueType,
+) telegraf.Metric {
+	m, err := New(name, tags, fields, tm, tp...)
+	if err != nil {
+		panic("mustMetric")
+	}
+	return m
+}
+
+type deliveries struct {
+	Info map[telegraf.TrackingID]telegraf.DeliveryInfo
+}
+
+func (d *deliveries) onDelivery(info telegraf.DeliveryInfo) {
+	d.Info[info.ID()] = info
+}
+
+func TestTracking(t *testing.T) {
+	tests := []struct {
+		name      string
+		metric    telegraf.Metric
+		actions   func(metric telegraf.Metric)
+		delivered bool
+	}{
+		{
+			name: "accept",
+			metric: mustMetric(
+				"cpu",
+				map[string]string{},
+				map[string]interface{}{
+					"value": 42,
+				},
+				time.Unix(0, 0),
+			),
+			actions: func(m telegraf.Metric) {
+				m.Accept()
+			},
+			delivered: true,
+		},
+		{
+			name: "reject",
+			metric: mustMetric(
+				"cpu",
+				map[string]string{},
+				map[string]interface{}{
+					"value": 42,
+				},
+				time.Unix(0, 0),
+			),
+			actions: func(m telegraf.Metric) {
+				m.Reject()
+			},
+			delivered: false,
+		},
+		{
+			name: "accept copy",
+			metric: mustMetric(
+				"cpu",
+				map[string]string{},
+				map[string]interface{}{
+					"value": 42,
+				},
+				time.Unix(0, 0),
+			),
+			actions: func(m telegraf.Metric) {
+				m2 := m.Copy()
+				m.Accept()
+				m2.Accept()
+			},
+			delivered: true,
+		},
+		{
+			name: "copy with accept and done",
+			metric: mustMetric(
+				"cpu",
+				map[string]string{},
+				map[string]interface{}{
+					"value": 42,
+				},
+				time.Unix(0, 0),
+			),
+			actions: func(m telegraf.Metric) {
+				m2 := m.Copy()
+				m.Accept()
+				m2.Drop()
+			},
+			delivered: true,
+		},
+		{
+			name: "copy with mixed delivery",
+			metric: mustMetric(
+				"cpu",
+				map[string]string{},
+				map[string]interface{}{
+					"value": 42,
+				},
+				time.Unix(0, 0),
+			),
+			actions: func(m telegraf.Metric) {
+				m2 := m.Copy()
+				m.Accept()
+				m2.Reject()
+			},
+			delivered: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			d := &deliveries{
+				Info: make(map[telegraf.TrackingID]telegraf.DeliveryInfo),
+			}
+			metric, id := WithTracking(tt.metric, d.onDelivery)
+			tt.actions(metric)
+
+			info := d.Info[id]
+			require.Equal(t, tt.delivered, info.Delivered())
+		})
+	}
+}
+
+func TestGroupTracking(t *testing.T) {
+	tests := []struct {
+		name      string
+		metrics   []telegraf.Metric
+		actions   func(metrics []telegraf.Metric)
+		delivered bool
+	}{
+		{
+			name: "accept",
+			metrics: []telegraf.Metric{
+				mustMetric(
+					"cpu",
+					map[string]string{},
+					map[string]interface{}{
+						"value": 42,
+					},
+					time.Unix(0, 0),
+				),
+				mustMetric(
+					"cpu",
+					map[string]string{},
+					map[string]interface{}{
+						"value": 42,
+					},
+					time.Unix(0, 0),
+				),
+			},
+			actions: func(metrics []telegraf.Metric) {
+				metrics[0].Accept()
+				metrics[1].Accept()
+			},
+			delivered: true,
+		},
+		{
+			name: "reject",
+			metrics: []telegraf.Metric{
+				mustMetric(
+					"cpu",
+					map[string]string{},
+					map[string]interface{}{
+						"value": 42,
+					},
+					time.Unix(0, 0),
+				),
+				mustMetric(
+					"cpu",
+					map[string]string{},
+					map[string]interface{}{
+						"value": 42,
+					},
+					time.Unix(0, 0),
+				),
+			},
+			actions: func(metrics []telegraf.Metric) {
+				metrics[0].Reject()
+				metrics[1].Reject()
+			},
+			delivered: false,
+		},
+		{
+			name: "remove",
+			metrics: []telegraf.Metric{
+				mustMetric(
+					"cpu",
+					map[string]string{},
+					map[string]interface{}{
+						"value": 42,
+					},
+					time.Unix(0, 0),
+				),
+				mustMetric(
+					"cpu",
+					map[string]string{},
+					map[string]interface{}{
+						"value": 42,
+					},
+					time.Unix(0, 0),
+				),
+			},
+			actions: func(metrics []telegraf.Metric) {
+				metrics[0].Drop()
+				metrics[1].Drop()
+			},
+			delivered: true,
+		},
+		{
+			name: "mixed",
+			metrics: []telegraf.Metric{
+				mustMetric(
+					"cpu",
+					map[string]string{},
+					map[string]interface{}{
+						"value": 42,
+					},
+					time.Unix(0, 0),
+				),
+				mustMetric(
+					"cpu",
+					map[string]string{},
+					map[string]interface{}{
+						"value": 42,
+					},
+					time.Unix(0, 0),
+				),
+			},
+			actions: func(metrics []telegraf.Metric) {
+				metrics[0].Accept()
+				metrics[1].Reject()
+			},
+			delivered: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			d := &deliveries{
+				Info: make(map[telegraf.TrackingID]telegraf.DeliveryInfo),
+			}
+			metrics, id := WithGroupTracking(tt.metrics, d.onDelivery)
+			tt.actions(metrics)
+
+			info := d.Info[id]
+			require.Equal(t, tt.delivered, info.Delivered())
+		})
+	}
+}
--- a/output.go
+++ b/output.go
@ -17,16 +17,7 @@ type Output interface {
 // if the Output only accepts a fixed set of aggregations over a time period.
 // These functions may be called concurrently to the Write function.
 type AggregatingOutput interface {
-	// Connect to the Output
-	Connect() error
-	// Close any connections to the Output
-	Close() error
-	// Description returns a one-sentence description on the Output
-	Description() string
-	// SampleConfig returns the default configuration of the Output
-	SampleConfig() string
-	// Write takes in group of points to be written to the Output
-	Write(metrics []Metric) error
+	Output

 	// Add the metric to the aggregator
 	Add(in Metric)
@ -35,21 +26,3 @@ type AggregatingOutput interface {
 	// Reset signals the the aggregator period is completed.
 	Reset()
 }
-
-type ServiceOutput interface {
-	// Connect to the Output
-	Connect() error
-	// Close any connections to the Output
-	Close() error
-	// Description returns a one-sentence description on the Output
-	Description() string
-	// SampleConfig returns the default configuration of the Output
-	SampleConfig() string
-	// Write takes in group of points to be written to the Output
-	Write(metrics []Metric) error
-
-	// Start the "service" that will provide an Output
-	Start() error
-	// Stop the "service" that will provide an Output
-	Stop()
-}
--- a/plugins/aggregators/basicstats/basicstats.go
+++ b/plugins/aggregators/basicstats/basicstats.go
@ -133,7 +133,6 @@ func (m *BasicStats) Add(in telegraf.Metric) {
 }

 func (m *BasicStats) Push(acc telegraf.Accumulator) {
-
 	config := getConfiguredStats(m)

 	for _, aggregate := range m.cache {
--- a/plugins/inputs/amqp_consumer/README.md
+++ b/plugins/inputs/amqp_consumer/README.md
@ -13,7 +13,6 @@ For an introduction to AMQP see:
 The following defaults are known to work with RabbitMQ:

 ```toml
-# AMQP consumer plugin
 [[inputs.amqp_consumer]]
  ## Broker to consume from.
  ##   deprecated in 1.7; use the brokers option
@ -46,16 +45,26 @@ The following defaults are known to work with RabbitMQ:

  ## AMQP queue name
  queue = "telegraf"
-  
+
  ## AMQP queue durability can be "transient" or "durable".
  queue_durability = "durable"
-  
+
  ## Binding Key
  binding_key = "#"

  ## Maximum number of messages server should give to the worker.
  # prefetch_count = 50

+  ## Maximum messages to read from the broker that have not been written by an
+  ## output.  For best throughput set based on the number of metrics within
+  ## each message and the size of the output's metric_batch_size.
+  ##
+  ## For example, if each message from the queue contains 10 metrics and the
+  ## output metric_batch_size is 1000, setting this to 100 will ensure that a
+  ## full batch is collected and the write is triggered immediately without
+  ## waiting until the next flush_interval.
+  # max_undelivered_messages = 1000
+
  ## Auth method. PLAIN and EXTERNAL are supported
  ## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
  ## described here: https://www.rabbitmq.com/plugins.html
--- a/plugins/inputs/amqp_consumer/amqp_consumer.go
+++ b/plugins/inputs/amqp_consumer/amqp_consumer.go
@ -1,6 +1,7 @@
 package amqp_consumer

 import (
+	"context"
 	"errors"
 	"fmt"
 	"log"
@ -9,25 +10,32 @@ import (
 	"sync"
 	"time"

-	"github.com/streadway/amqp"
-
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/internal/tls"
 	"github.com/influxdata/telegraf/plugins/inputs"
 	"github.com/influxdata/telegraf/plugins/parsers"
+	"github.com/streadway/amqp"
 )

+const (
+	defaultMaxUndeliveredMessages = 1000
+)
+
+type empty struct{}
+type semaphore chan empty
+
 // AMQPConsumer is the top level struct for this plugin
 type AMQPConsumer struct {
-	URL                string            `toml:"url"` // deprecated in 1.7; use brokers
-	Brokers            []string          `toml:"brokers"`
-	Username           string            `toml:"username"`
-	Password           string            `toml:"password"`
-	Exchange           string            `toml:"exchange"`
-	ExchangeType       string            `toml:"exchange_type"`
-	ExchangeDurability string            `toml:"exchange_durability"`
-	ExchangePassive    bool              `toml:"exchange_passive"`
-	ExchangeArguments  map[string]string `toml:"exchange_arguments"`
+	URL                    string            `toml:"url"` // deprecated in 1.7; use brokers
+	Brokers                []string          `toml:"brokers"`
+	Username               string            `toml:"username"`
+	Password               string            `toml:"password"`
+	Exchange               string            `toml:"exchange"`
+	ExchangeType           string            `toml:"exchange_type"`
+	ExchangeDurability     string            `toml:"exchange_durability"`
+	ExchangePassive        bool              `toml:"exchange_passive"`
+	ExchangeArguments      map[string]string `toml:"exchange_arguments"`
+	MaxUndeliveredMessages int               `toml:"max_undelivered_messages"`

 	// Queue Name
 	Queue           string `toml:"queue"`
@ -44,9 +52,12 @@ type AMQPConsumer struct {
 	AuthMethod string
 	tls.ClientConfig

+	deliveries map[telegraf.TrackingID]amqp.Delivery
+
 	parser parsers.Parser
 	conn   *amqp.Connection
 	wg     *sync.WaitGroup
+	cancel context.CancelFunc
 }

 type externalAuth struct{}
@ -114,6 +125,16 @@ func (a *AMQPConsumer) SampleConfig() string {
  ## Maximum number of messages server should give to the worker.
  # prefetch_count = 50

+  ## Maximum messages to read from the broker that have not been written by an
+  ## output.  For best throughput set based on the number of metrics within
+  ## each message and the size of the output's metric_batch_size.
+  ##
+  ## For example, if each message from the queue contains 10 metrics and the
+  ## output metric_batch_size is 1000, setting this to 100 will ensure that a
+  ## full batch is collected and the write is triggered immediately without
+  ## waiting until the next flush_interval.
+  # max_undelivered_messages = 1000
+
  ## Auth method. PLAIN and EXTERNAL are supported
  ## Using EXTERNAL requires enabling the rabbitmq_auth_mechanism_ssl plugin as
  ## described here: https://www.rabbitmq.com/plugins.html
@ -185,9 +206,15 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
 		return err
 	}

+	ctx, cancel := context.WithCancel(context.Background())
+	a.cancel = cancel
+
 	a.wg = &sync.WaitGroup{}
 	a.wg.Add(1)
-	go a.process(msgs, acc)
+	go func() {
+		defer a.wg.Done()
+		a.process(ctx, msgs, acc)
+	}()

 	go func() {
 		for {
@ -196,7 +223,7 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
 				break
 			}

-			log.Printf("I! AMQP consumer connection closed: %s; trying to reconnect", err)
+			log.Printf("I! [inputs.amqp_consumer] connection closed: %s; trying to reconnect", err)
 			for {
 				msgs, err := a.connect(amqpConf)
 				if err != nil {
@ -206,7 +233,10 @@ func (a *AMQPConsumer) Start(acc telegraf.Accumulator) error {
 				}

 				a.wg.Add(1)
-				go a.process(msgs, acc)
+				go func() {
+					defer a.wg.Done()
+					a.process(ctx, msgs, acc)
+				}()
 				break
 			}
 		}
@ -224,14 +254,14 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
 	p := rand.Perm(len(brokers))
 	for _, n := range p {
 		broker := brokers[n]
-		log.Printf("D! [amqp_consumer] connecting to %q", broker)
+		log.Printf("D! [inputs.amqp_consumer] connecting to %q", broker)
 		conn, err := amqp.DialConfig(broker, *amqpConf)
 		if err == nil {
 			a.conn = conn
-			log.Printf("D! [amqp_consumer] connected to %q", broker)
+			log.Printf("D! [inputs.amqp_consumer] connected to %q", broker)
 			break
 		}
-		log.Printf("D! [amqp_consumer] error connecting to %q", broker)
+		log.Printf("D! [inputs.amqp_consumer] error connecting to %q", broker)
 	}

 	if a.conn == nil {
@ -320,7 +350,6 @@ func (a *AMQPConsumer) connect(amqpConf *amqp.Config) (<-chan amqp.Delivery, err
 		return nil, fmt.Errorf("Failed establishing connection to queue: %s", err)
 	}

-	log.Println("I! Started AMQP consumer")
 	return msgs, err
 }

@ -361,42 +390,101 @@ func declareExchange(
 }

 // Read messages from queue and add them to the Accumulator
-func (a *AMQPConsumer) process(msgs <-chan amqp.Delivery, acc telegraf.Accumulator) {
-	defer a.wg.Done()
-	for d := range msgs {
-		metrics, err := a.parser.Parse(d.Body)
-		if err != nil {
-			log.Printf("E! %v: error parsing metric - %v", err, string(d.Body))
-		} else {
-			for _, m := range metrics {
-				acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
+func (a *AMQPConsumer) process(ctx context.Context, msgs <-chan amqp.Delivery, ac telegraf.Accumulator) {
+	a.deliveries = make(map[telegraf.TrackingID]amqp.Delivery)
+
+	acc := ac.WithTracking(a.MaxUndeliveredMessages)
+	sem := make(semaphore, a.MaxUndeliveredMessages)
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case track := <-acc.Delivered():
+			if a.onDelivery(track) {
+				<-sem
+			}
+		case sem <- empty{}:
+			select {
+			case <-ctx.Done():
+				return
+			case track := <-acc.Delivered():
+				if a.onDelivery(track) {
+					<-sem
+					<-sem
+				}
+			case d, ok := <-msgs:
+				if !ok {
+					return
+				}
+				err := a.onMessage(acc, d)
+				if err != nil {
+					acc.AddError(err)
+					<-sem
+				}
 			}
 		}
-
-		d.Ack(false)
 	}
-	log.Printf("I! AMQP consumer queue closed")
+}
+
+func (a *AMQPConsumer) onMessage(acc telegraf.TrackingAccumulator, d amqp.Delivery) error {
+	metrics, err := a.parser.Parse(d.Body)
+	if err != nil {
+		return err
+	}
+
+	id := acc.AddTrackingMetricGroup(metrics)
+	a.deliveries[id] = d
+	return nil
+}
+
+func (a *AMQPConsumer) onDelivery(track telegraf.DeliveryInfo) bool {
+	delivery, ok := a.deliveries[track.ID()]
+	if !ok {
+		// Added by a previous connection
+		return false
+	}
+
+	if track.Delivered() {
+		err := delivery.Ack(false)
+		if err != nil {
+			log.Printf("E! [inputs.amqp_consumer] Unable to ack written delivery: %d: %v",
+				delivery.DeliveryTag, err)
+			a.conn.Close()
+		}
+	} else {
+		err := delivery.Reject(false)
+		if err != nil {
+			log.Printf("E! [inputs.amqp_consumer] Unable to reject failed delivery: %d: %v",
+				delivery.DeliveryTag, err)
+			a.conn.Close()
+		}
+	}
+
+	delete(a.deliveries, track.ID())
+	return true
 }

 func (a *AMQPConsumer) Stop() {
+	a.cancel()
+	a.wg.Wait()
 	err := a.conn.Close()
 	if err != nil && err != amqp.ErrClosed {
-		log.Printf("E! Error closing AMQP connection: %s", err)
+		log.Printf("E! [inputs.amqp_consumer] Error closing AMQP connection: %s", err)
 		return
 	}
-	a.wg.Wait()
-	log.Println("I! Stopped AMQP service")
 }

 func init() {
 	inputs.Add("amqp_consumer", func() telegraf.Input {
 		return &AMQPConsumer{
-			URL:                DefaultBroker,
-			AuthMethod:         DefaultAuthMethod,
-			ExchangeType:       DefaultExchangeType,
-			ExchangeDurability: DefaultExchangeDurability,
-			QueueDurability:    DefaultQueueDurability,
-			PrefetchCount:      DefaultPrefetchCount,
+			URL:                    DefaultBroker,
+			AuthMethod:             DefaultAuthMethod,
+			ExchangeType:           DefaultExchangeType,
+			ExchangeDurability:     DefaultExchangeDurability,
+			QueueDurability:        DefaultQueueDurability,
+			PrefetchCount:          DefaultPrefetchCount,
+			MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
 		}
 	})
 }
--- a/plugins/inputs/internal/README.md
+++ b/plugins/inputs/internal/README.md
@ -18,52 +18,54 @@ plugin.

 memstats are taken from the Go runtime: https://golang.org/pkg/runtime/#MemStats

- internal\_memstats
-    - alloc\_bytes
+- internal_memstats
+    - alloc_bytes
    - frees
-    - heap\_alloc\_bytes
-    - heap\_idle\_bytes
-    - heap\_in\_use\_bytes
-    - heap\_objects\_bytes
-    - heap\_released\_bytes
-    - heap\_sys\_bytes
+    - heap_alloc_bytes
+    - heap_idle_bytes
+    - heap_in_use_bytes
+    - heap_objects_bytes
+    - heap_released_bytes
+    - heap_sys_bytes
    - mallocs
-    - num\_gc
-    - pointer\_lookups
-    - sys\_bytes
-    - total\_alloc\_bytes
+    - num_gc
+    - pointer_lookups
+    - sys_bytes
+    - total_alloc_bytes

 agent stats collect aggregate stats on all telegraf plugins.

- internal\_agent
-    - gather\_errors
-    - metrics\_dropped
-    - metrics\_gathered
-    - metrics\_written
+- internal_agent
+    - gather_errors
+    - metrics_dropped
+    - metrics_gathered
+    - metrics_written

-internal\_gather stats collect aggregate stats on all input plugins
+internal_gather stats collect aggregate stats on all input plugins
 that are of the same input type. They are tagged with `input=<plugin_name>`.

- internal\_gather
-    - gather\_time\_ns
-    - metrics\_gathered
+- internal_gather
+    - gather_time_ns
+    - metrics_gathered

-internal\_write stats collect aggregate stats on all output plugins
+internal_write stats collect aggregate stats on all output plugins
 that are of the same input type. They are tagged with `output=<plugin_name>`.


- internal\_write
-    - buffer\_limit
-    - buffer\_size
-    - metrics\_written
-    - metrics\_filtered
-    - write\_time\_ns
+- internal_write
+    - buffer_limit
+    - buffer_size
+    - metrics_added
+    - metrics_written
+    - metrics_dropped
+    - metrics_filtered
+    - write_time_ns

-internal\_\<plugin\_name\> are metrics which are defined on a per-plugin basis, and
+internal_<plugin_name> are metrics which are defined on a per-plugin basis, and
 usually contain tags which differentiate each instance of a particular type of
 plugin.

- internal\_\<plugin\_name\>
+- internal_<plugin_name>
    - individual plugin-specific fields, such as requests counts.

 ### Tags:
@ -76,7 +78,7 @@ to each particular plugin.
 ```
 internal_memstats,host=tyrion alloc_bytes=4457408i,sys_bytes=10590456i,pointer_lookups=7i,mallocs=17642i,frees=7473i,heap_sys_bytes=6848512i,heap_idle_bytes=1368064i,heap_in_use_bytes=5480448i,heap_released_bytes=0i,total_alloc_bytes=6875560i,heap_alloc_bytes=4457408i,heap_objects_bytes=10169i,num_gc=2i 1480682800000000000
 internal_agent,host=tyrion metrics_written=18i,metrics_dropped=0i,metrics_gathered=19i,gather_errors=0i 1480682800000000000
-internal_write,output=file,host=tyrion buffer_limit=10000i,write_time_ns=636609i,metrics_written=18i,buffer_size=0i 1480682800000000000
+internal_write,output=file,host=tyrion buffer_limit=10000i,write_time_ns=636609i,metrics_added=18i,metrics_written=18i,buffer_size=0i 1480682800000000000
 internal_gather,input=internal,host=tyrion metrics_gathered=19i,gather_time_ns=442114i 1480682800000000000
 internal_gather,input=http_listener,host=tyrion metrics_gathered=0i,gather_time_ns=167285i 1480682800000000000
 internal_http_listener,address=:8186,host=tyrion queries_received=0i,writes_received=0i,requests_received=0i,buffers_created=0i,requests_served=0i,pings_received=0i,bytes_received=0i,not_founds_served=0i,pings_served=0i,queries_served=0i,writes_served=0i 1480682800000000000
--- a/plugins/inputs/kafka_consumer/README.md
+++ b/plugins/inputs/kafka_consumer/README.md
@ -1,18 +1,14 @@
 # Kafka Consumer Input Plugin

-The [Kafka](http://kafka.apache.org/) consumer plugin polls a specified Kafka
-topic and adds messages to InfluxDB. The plugin assumes messages follow the
-line protocol. [Consumer Group](http://godoc.org/github.com/wvanbergen/kafka/consumergroup)
-is used to talk to the Kafka cluster so multiple instances of telegraf can read
-from the same topic in parallel.
+The [Kafka][kafka] consumer plugin reads from Kafka
+and creates metrics using one of the supported [input data formats][].

-For old kafka version (< 0.8), please use the kafka_consumer_legacy input plugin
+For old kafka version (< 0.8), please use the [kafka_consumer_legacy][] input plugin
 and use the old zookeeper connection method.

-## Configuration
+### Configuration

 ```toml
-# Read metrics from Kafka topic(s)
 [[inputs.kafka_consumer]]
  ## kafka servers
  brokers = ["localhost:9092"]
@ -44,18 +40,27 @@ and use the old zookeeper connection method.
  ## Offset (must be either "oldest" or "newest")
  offset = "oldest"

+  ## Maximum length of a message to consume, in bytes (default 0/unlimited);
+  ## larger messages are dropped
+  max_message_len = 1000000
+
+  ## Maximum messages to read from the broker that have not been written by an
+  ## output.  For best throughput set based on the number of metrics within
+  ## each message and the size of the output's metric_batch_size.
+  ##
+  ## For example, if each message from the queue contains 10 metrics and the
+  ## output metric_batch_size is 1000, setting this to 100 will ensure that a
+  ## full batch is collected and the write is triggered immediately without
+  ## waiting until the next flush_interval.
+  # max_undelivered_messages = 1000
+
  ## Data format to consume.
  ## Each data format has its own unique set of configuration options, read
  ## more about them here:
  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
  data_format = "influx"
-
-  ## Maximum length of a message to consume, in bytes (default 0/unlimited);
-  ## larger messages are dropped
-  max_message_len = 1000000
 ```

-## Testing
-
-Running integration tests requires running Zookeeper & Kafka. See Makefile
-for kafka container command.
+[kafka]: https://kafka.apache.org
+[kafka_consumer_legacy]: /plugins/inputs/kafka_consumer_legacy/README.md
+[input data formats]: /docs/DATA_FORMATS_INPUT.md
--- a/plugins/inputs/kafka_consumer/kafka_consumer.go
+++ b/plugins/inputs/kafka_consumer/kafka_consumer.go
@ -1,55 +1,54 @@
 package kafka_consumer

 import (
+	"context"
 	"fmt"
 	"log"
 	"strings"
 	"sync"

+	"github.com/Shopify/sarama"
+	cluster "github.com/bsm/sarama-cluster"
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/internal/tls"
 	"github.com/influxdata/telegraf/plugins/inputs"
 	"github.com/influxdata/telegraf/plugins/parsers"
-
-	"github.com/Shopify/sarama"
-	cluster "github.com/bsm/sarama-cluster"
 )

+const (
+	defaultMaxUndeliveredMessages = 1000
+)
+
+type empty struct{}
+type semaphore chan empty
+
+type Consumer interface {
+	Errors() <-chan error
+	Messages() <-chan *sarama.ConsumerMessage
+	MarkOffset(msg *sarama.ConsumerMessage, metadata string)
+	Close() error
+}
+
 type Kafka struct {
-	ConsumerGroup string
-	ClientID      string `toml:"client_id"`
-	Topics        []string
-	Brokers       []string
-	MaxMessageLen int
-	Version       string `toml:"version"`
-
-	Cluster *cluster.Consumer
-
+	ConsumerGroup          string   `toml:"consumer_group"`
+	ClientID               string   `toml:"client_id"`
+	Topics                 []string `toml:"topics"`
+	Brokers                []string `toml:"brokers"`
+	MaxMessageLen          int      `toml:"max_message_len"`
+	Version                string   `toml:"version"`
+	MaxUndeliveredMessages int      `toml:"max_undelivered_messages"`
+	Offset                 string   `toml:"offset"`
+	SASLUsername           string   `toml:"sasl_username"`
+	SASLPassword           string   `toml:"sasl_password"`
 	tls.ClientConfig

-	// SASL Username
-	SASLUsername string `toml:"sasl_username"`
-	// SASL Password
-	SASLPassword string `toml:"sasl_password"`
+	cluster Consumer
+	parser  parsers.Parser
+	wg      *sync.WaitGroup
+	cancel  context.CancelFunc

-	// Legacy metric buffer support
-	MetricBuffer int
-	// TODO remove PointBuffer, legacy support
-	PointBuffer int
-
-	Offset string
-	parser parsers.Parser
-
-	sync.Mutex
-
-	// channel for all incoming kafka messages
-	in <-chan *sarama.ConsumerMessage
-	// channel for all kafka consumer errors
-	errs <-chan error
-	done chan struct{}
-
-	// keep the accumulator internally:
-	acc telegraf.Accumulator
+	// Unconfirmed messages
+	messages map[telegraf.TrackingID]*sarama.ConsumerMessage

 	// doNotCommitMsgs tells the parser not to call CommitUpTo on the consumer
 	// this is mostly for test purposes, but there may be a use-case for it later.
@ -86,16 +85,25 @@ var sampleConfig = `
  consumer_group = "telegraf_metrics_consumers"
  ## Offset (must be either "oldest" or "newest")
  offset = "oldest"
+  ## Maximum length of a message to consume, in bytes (default 0/unlimited);
+  ## larger messages are dropped
+  max_message_len = 1000000
+
+  ## Maximum messages to read from the broker that have not been written by an
+  ## output.  For best throughput set based on the number of metrics within
+  ## each message and the size of the output's metric_batch_size.
+  ##
+  ## For example, if each message from the queue contains 10 metrics and the
+  ## output metric_batch_size is 1000, setting this to 100 will ensure that a
+  ## full batch is collected and the write is triggered immediately without
+  ## waiting until the next flush_interval.
+  # max_undelivered_messages = 1000

  ## Data format to consume.
  ## Each data format has its own unique set of configuration options, read
  ## more about them here:
  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
  data_format = "influx"
-
-  ## Maximum length of a message to consume, in bytes (default 0/unlimited);
-  ## larger messages are dropped
-  max_message_len = 1000000
 `

 func (k *Kafka) SampleConfig() string {
@ -111,12 +119,8 @@ func (k *Kafka) SetParser(parser parsers.Parser) {
 }

 func (k *Kafka) Start(acc telegraf.Accumulator) error {
-	k.Lock()
-	defer k.Unlock()
 	var clusterErr error

-	k.acc = acc
-
 	config := cluster.NewConfig()

 	if k.Version != "" {
@ -159,13 +163,13 @@ func (k *Kafka) Start(acc telegraf.Accumulator) error {
 	case "newest":
 		config.Consumer.Offsets.Initial = sarama.OffsetNewest
 	default:
-		log.Printf("I! WARNING: Kafka consumer invalid offset '%s', using 'oldest'\n",
+		log.Printf("I! WARNING: Kafka consumer invalid offset '%s', using 'oldest'",
 			k.Offset)
 		config.Consumer.Offsets.Initial = sarama.OffsetOldest
 	}

-	if k.Cluster == nil {
-		k.Cluster, clusterErr = cluster.NewConsumer(
+	if k.cluster == nil {
+		k.cluster, clusterErr = cluster.NewConsumer(
 			k.Brokers,
 			k.ConsumerGroup,
 			k.Topics,
@ -173,67 +177,110 @@ func (k *Kafka) Start(acc telegraf.Accumulator) error {
 		)

 		if clusterErr != nil {
-			log.Printf("E! Error when creating Kafka Consumer, brokers: %v, topics: %v\n",
+			log.Printf("E! Error when creating Kafka Consumer, brokers: %v, topics: %v",
 				k.Brokers, k.Topics)
 			return clusterErr
 		}
-
-		// Setup message and error channels
-		k.in = k.Cluster.Messages()
-		k.errs = k.Cluster.Errors()
 	}

-	k.done = make(chan struct{})
-	// Start the kafka message reader
-	go k.receiver()
-	log.Printf("I! Started the kafka consumer service, brokers: %v, topics: %v\n",
+	ctx, cancel := context.WithCancel(context.Background())
+	k.cancel = cancel
+
+	// Start consumer goroutine
+	k.wg = &sync.WaitGroup{}
+	k.wg.Add(1)
+	go func() {
+		defer k.wg.Done()
+		k.receiver(ctx, acc)
+	}()
+
+	log.Printf("I! Started the kafka consumer service, brokers: %v, topics: %v",
 		k.Brokers, k.Topics)
 	return nil
 }

 // receiver() reads all incoming messages from the consumer, and parses them into
 // influxdb metric points.
-func (k *Kafka) receiver() {
+func (k *Kafka) receiver(ctx context.Context, ac telegraf.Accumulator) {
+	k.messages = make(map[telegraf.TrackingID]*sarama.ConsumerMessage)
+
+	acc := ac.WithTracking(k.MaxUndeliveredMessages)
+	sem := make(semaphore, k.MaxUndeliveredMessages)
+
 	for {
 		select {
-		case <-k.done:
+		case <-ctx.Done():
 			return
-		case err := <-k.errs:
-			if err != nil {
-				k.acc.AddError(fmt.Errorf("Consumer Error: %s\n", err))
-			}
-		case msg := <-k.in:
-			if k.MaxMessageLen != 0 && len(msg.Value) > k.MaxMessageLen {
-				k.acc.AddError(fmt.Errorf("Message longer than max_message_len (%d > %d)",
-					len(msg.Value), k.MaxMessageLen))
-			} else {
-				metrics, err := k.parser.Parse(msg.Value)
+		case track := <-acc.Delivered():
+			<-sem
+			k.onDelivery(track)
+		case err := <-k.cluster.Errors():
+			acc.AddError(err)
+		case sem <- empty{}:
+			select {
+			case <-ctx.Done():
+				return
+			case track := <-acc.Delivered():
+				// Once for the delivered message, once to leave the case
+				<-sem
+				<-sem
+				k.onDelivery(track)
+			case err := <-k.cluster.Errors():
+				<-sem
+				acc.AddError(err)
+			case msg := <-k.cluster.Messages():
+				err := k.onMessage(acc, msg)
 				if err != nil {
-					k.acc.AddError(fmt.Errorf("Message Parse Error\nmessage: %s\nerror: %s",
-						string(msg.Value), err.Error()))
+					acc.AddError(err)
+					<-sem
 				}
-				for _, metric := range metrics {
-					k.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
-				}
-			}
-
-			if !k.doNotCommitMsgs {
-				// TODO(cam) this locking can be removed if this PR gets merged:
-				// https://github.com/wvanbergen/kafka/pull/84
-				k.Lock()
-				k.Cluster.MarkOffset(msg, "")
-				k.Unlock()
 			}
 		}
 	}
 }

+func (k *Kafka) markOffset(msg *sarama.ConsumerMessage) {
+	if !k.doNotCommitMsgs {
+		k.cluster.MarkOffset(msg, "")
+	}
+}
+
+func (k *Kafka) onMessage(acc telegraf.TrackingAccumulator, msg *sarama.ConsumerMessage) error {
+	if k.MaxMessageLen != 0 && len(msg.Value) > k.MaxMessageLen {
+		k.markOffset(msg)
+		return fmt.Errorf("Message longer than max_message_len (%d > %d)",
+			len(msg.Value), k.MaxMessageLen)
+	}
+
+	metrics, err := k.parser.Parse(msg.Value)
+	if err != nil {
+		return err
+	}
+
+	id := acc.AddTrackingMetricGroup(metrics)
+	k.messages[id] = msg
+
+	return nil
+}
+
+func (k *Kafka) onDelivery(track telegraf.DeliveryInfo) {
+	msg, ok := k.messages[track.ID()]
+	if !ok {
+		log.Printf("E! [inputs.kafka_consumer] Could not mark message delivered: %d", track.ID())
+	}
+
+	if track.Delivered() {
+		k.markOffset(msg)
+	}
+	delete(k.messages, track.ID())
+}
+
 func (k *Kafka) Stop() {
-	k.Lock()
-	defer k.Unlock()
-	close(k.done)
-	if err := k.Cluster.Close(); err != nil {
-		k.acc.AddError(fmt.Errorf("Error closing consumer: %s\n", err.Error()))
+	k.cancel()
+	k.wg.Wait()
+
+	if err := k.cluster.Close(); err != nil {
+		log.Printf("E! [inputs.kafka_consumer] Error closing consumer: %v", err)
 	}
 }

@ -243,6 +290,8 @@ func (k *Kafka) Gather(acc telegraf.Accumulator) error {

 func init() {
 	inputs.Add("kafka_consumer", func() telegraf.Input {
-		return &Kafka{}
+		return &Kafka{
+			MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
+		}
 	})
 }
--- a/plugins/inputs/kafka_consumer/kafka_consumer_integration_test.go
+++ b/plugins/inputs/kafka_consumer/kafka_consumer_integration_test.go
@ -38,7 +38,6 @@ func TestReadsMetricsFromKafka(t *testing.T) {
 		ConsumerGroup: "telegraf_test_consumers",
 		Topics:        []string{testTopic},
 		Brokers:       brokerPeers,
-		PointBuffer:   100000,
 		Offset:        "oldest",
 	}
 	p, _ := parsers.NewInfluxParser()
--- a/plugins/inputs/kafka_consumer/kafka_consumer_test.go
+++ b/plugins/inputs/kafka_consumer/kafka_consumer_test.go
@ -1,13 +1,14 @@
 package kafka_consumer

 import (
+	"context"
 	"strings"
 	"testing"

+	"github.com/Shopify/sarama"
+	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/plugins/parsers"
 	"github.com/influxdata/telegraf/testutil"
-
-	"github.com/Shopify/sarama"
 	"github.com/stretchr/testify/assert"
 )

@ -18,31 +19,57 @@ const (
 	invalidMsg      = "cpu_load_short,host=server01 1422568543702900257\n"
 )

-func newTestKafka() (*Kafka, chan *sarama.ConsumerMessage) {
-	in := make(chan *sarama.ConsumerMessage, 1000)
-	k := Kafka{
-		ConsumerGroup:   "test",
-		Topics:          []string{"telegraf"},
-		Brokers:         []string{"localhost:9092"},
-		Offset:          "oldest",
-		in:              in,
-		doNotCommitMsgs: true,
-		errs:            make(chan error, 1000),
-		done:            make(chan struct{}),
+type TestConsumer struct {
+	errors   chan error
+	messages chan *sarama.ConsumerMessage
+}
+
+func (c *TestConsumer) Errors() <-chan error {
+	return c.errors
+}
+
+func (c *TestConsumer) Messages() <-chan *sarama.ConsumerMessage {
+	return c.messages
+}
+
+func (c *TestConsumer) MarkOffset(msg *sarama.ConsumerMessage, metadata string) {
+}
+
+func (c *TestConsumer) Close() error {
+	return nil
+}
+
+func (c *TestConsumer) Inject(msg *sarama.ConsumerMessage) {
+	c.messages <- msg
+}
+
+func newTestKafka() (*Kafka, *TestConsumer) {
+	consumer := &TestConsumer{
+		errors:   make(chan error),
+		messages: make(chan *sarama.ConsumerMessage, 1000),
 	}
-	return &k, in
+	k := Kafka{
+		cluster:                consumer,
+		ConsumerGroup:          "test",
+		Topics:                 []string{"telegraf"},
+		Brokers:                []string{"localhost:9092"},
+		Offset:                 "oldest",
+		MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
+		doNotCommitMsgs:        true,
+		messages:               make(map[telegraf.TrackingID]*sarama.ConsumerMessage),
+	}
+	return &k, consumer
 }

 // Test that the parser parses kafka messages into points
 func TestRunParser(t *testing.T) {
-	k, in := newTestKafka()
+	k, consumer := newTestKafka()
 	acc := testutil.Accumulator{}
-	k.acc = &acc
-	defer close(k.done)
+	ctx := context.Background()

 	k.parser, _ = parsers.NewInfluxParser()
-	go k.receiver()
-	in <- saramaMsg(testMsg)
+	go k.receiver(ctx, &acc)
+	consumer.Inject(saramaMsg(testMsg))
 	acc.Wait(1)

 	assert.Equal(t, acc.NFields(), 1)
@ -50,14 +77,13 @@ func TestRunParser(t *testing.T) {

 // Test that the parser ignores invalid messages
 func TestRunParserInvalidMsg(t *testing.T) {
-	k, in := newTestKafka()
+	k, consumer := newTestKafka()
 	acc := testutil.Accumulator{}
-	k.acc = &acc
-	defer close(k.done)
+	ctx := context.Background()

 	k.parser, _ = parsers.NewInfluxParser()
-	go k.receiver()
-	in <- saramaMsg(invalidMsg)
+	go k.receiver(ctx, &acc)
+	consumer.Inject(saramaMsg(invalidMsg))
 	acc.WaitError(1)

 	assert.Equal(t, acc.NFields(), 0)
@ -66,15 +92,14 @@ func TestRunParserInvalidMsg(t *testing.T) {
 // Test that overlong messages are dropped
 func TestDropOverlongMsg(t *testing.T) {
 	const maxMessageLen = 64 * 1024
-	k, in := newTestKafka()
+	k, consumer := newTestKafka()
 	k.MaxMessageLen = maxMessageLen
 	acc := testutil.Accumulator{}
-	k.acc = &acc
-	defer close(k.done)
+	ctx := context.Background()
 	overlongMsg := strings.Repeat("v", maxMessageLen+1)

-	go k.receiver()
-	in <- saramaMsg(overlongMsg)
+	go k.receiver(ctx, &acc)
+	consumer.Inject(saramaMsg(overlongMsg))
 	acc.WaitError(1)

 	assert.Equal(t, acc.NFields(), 0)
@ -82,14 +107,13 @@ func TestDropOverlongMsg(t *testing.T) {

 // Test that the parser parses kafka messages into points
 func TestRunParserAndGather(t *testing.T) {
-	k, in := newTestKafka()
+	k, consumer := newTestKafka()
 	acc := testutil.Accumulator{}
-	k.acc = &acc
-	defer close(k.done)
+	ctx := context.Background()

 	k.parser, _ = parsers.NewInfluxParser()
-	go k.receiver()
-	in <- saramaMsg(testMsg)
+	go k.receiver(ctx, &acc)
+	consumer.Inject(saramaMsg(testMsg))
 	acc.Wait(1)

 	acc.GatherError(k.Gather)
@ -101,14 +125,13 @@ func TestRunParserAndGather(t *testing.T) {

 // Test that the parser parses kafka messages into points
 func TestRunParserAndGatherGraphite(t *testing.T) {
-	k, in := newTestKafka()
+	k, consumer := newTestKafka()
 	acc := testutil.Accumulator{}
-	k.acc = &acc
-	defer close(k.done)
+	ctx := context.Background()

 	k.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil)
-	go k.receiver()
-	in <- saramaMsg(testMsgGraphite)
+	go k.receiver(ctx, &acc)
+	consumer.Inject(saramaMsg(testMsgGraphite))
 	acc.Wait(1)

 	acc.GatherError(k.Gather)
@ -120,17 +143,16 @@ func TestRunParserAndGatherGraphite(t *testing.T) {

 // Test that the parser parses kafka messages into points
 func TestRunParserAndGatherJSON(t *testing.T) {
-	k, in := newTestKafka()
+	k, consumer := newTestKafka()
 	acc := testutil.Accumulator{}
-	k.acc = &acc
-	defer close(k.done)
+	ctx := context.Background()

 	k.parser, _ = parsers.NewParser(&parsers.Config{
 		DataFormat: "json",
 		MetricName: "kafka_json_test",
 	})
-	go k.receiver()
-	in <- saramaMsg(testMsgJSON)
+	go k.receiver(ctx, &acc)
+	consumer.Inject(saramaMsg(testMsgJSON))
 	acc.Wait(1)

 	acc.GatherError(k.Gather)
--- a/plugins/inputs/mqtt_consumer/README.md
+++ b/plugins/inputs/mqtt_consumer/README.md
@ -1,14 +1,11 @@
 # MQTT Consumer Input Plugin

-The [MQTT](http://mqtt.org/) consumer plugin reads from
-specified MQTT topics and adds messages to InfluxDB.
-The plugin expects messages in the
-[Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
+The [MQTT][mqtt] consumer plugin reads from the specified MQTT topics
+and creates metrics using one of the supported [input data formats][].

 ### Configuration:

 ```toml
-# Read metrics from MQTT topic(s)
 [[inputs.mqtt_consumer]]
  ## MQTT broker URLs to be used. The format should be scheme://host:port,
  ## schema can be tcp, ssl, or ws.
@ -26,6 +23,16 @@ The plugin expects messages in the
  ## Connection timeout for initial connection in seconds
  connection_timeout = "30s"

+  ## Maximum messages to read from the broker that have not been written by an
+  ## output.  For best throughput set based on the number of metrics within
+  ## each message and the size of the output's metric_batch_size.
+  ##
+  ## For example, if each message from the queue contains 10 metrics and the
+  ## output metric_batch_size is 1000, setting this to 100 will ensure that a
+  ## full batch is collected and the write is triggered immediately without
+  ## waiting until the next flush_interval.
+  # max_undelivered_messages = 1000
+
  ## Topics to subscribe to
  topics = [
    "telegraf/host01/cpu",
@ -62,3 +69,6 @@ The plugin expects messages in the

 - All measurements are tagged with the incoming topic, ie
 `topic=telegraf/host01/cpu`
+
+[mqtt]: https://mqtt.org
+[input data formats]: /docs/DATA_FORMATS_INPUT.md
--- a/plugins/inputs/mqtt_consumer/mqtt_consumer.go
+++ b/plugins/inputs/mqtt_consumer/mqtt_consumer.go
@ -1,25 +1,31 @@
 package mqtt_consumer

 import (
+	"context"
 	"errors"
 	"fmt"
 	"log"
 	"strings"
 	"time"

+	"github.com/eclipse/paho.mqtt.golang"
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/internal"
 	"github.com/influxdata/telegraf/internal/tls"
 	"github.com/influxdata/telegraf/plugins/inputs"
 	"github.com/influxdata/telegraf/plugins/parsers"
-
-	"github.com/eclipse/paho.mqtt.golang"
 )

-// 30 Seconds is the default used by paho.mqtt.golang
-var defaultConnectionTimeout = internal.Duration{Duration: 30 * time.Second}
+var (
+	// 30 Seconds is the default used by paho.mqtt.golang
+	defaultConnectionTimeout = internal.Duration{Duration: 30 * time.Second}
+
+	defaultMaxUndeliveredMessages = 1000
+)

 type ConnectionState int
+type empty struct{}
+type semaphore chan empty

 const (
 	Disconnected ConnectionState = iota
@ -28,12 +34,13 @@ const (
 )

 type MQTTConsumer struct {
-	Servers           []string
-	Topics            []string
-	Username          string
-	Password          string
-	QoS               int               `toml:"qos"`
-	ConnectionTimeout internal.Duration `toml:"connection_timeout"`
+	Servers                []string
+	Topics                 []string
+	Username               string
+	Password               string
+	QoS                    int               `toml:"qos"`
+	ConnectionTimeout      internal.Duration `toml:"connection_timeout"`
+	MaxUndeliveredMessages int               `toml:"max_undelivered_messages"`

 	parser parsers.Parser

@ -45,9 +52,14 @@ type MQTTConsumer struct {
 	tls.ClientConfig

 	client     mqtt.Client
-	acc        telegraf.Accumulator
+	acc        telegraf.TrackingAccumulator
 	state      ConnectionState
 	subscribed bool
+	sem        semaphore
+	messages   map[telegraf.TrackingID]bool
+
+	ctx    context.Context
+	cancel context.CancelFunc
 }

 var sampleConfig = `
@ -67,6 +79,16 @@ var sampleConfig = `
  ## Connection timeout for initial connection in seconds
  connection_timeout = "30s"

+  ## Maximum messages to read from the broker that have not been written by an
+  ## output.  For best throughput set based on the number of metrics within
+  ## each message and the size of the output's metric_batch_size.
+  ##
+  ## For example, if each message from the queue contains 10 metrics and the
+  ## output metric_batch_size is 1000, setting this to 100 will ensure that a
+  ## full batch is collected and the write is triggered immediately without
+  ## waiting until the next flush_interval.
+  # max_undelivered_messages = 1000
+
  ## Topics to subscribe to
  topics = [
    "telegraf/host01/cpu",
@ -118,7 +140,6 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
 		return errors.New("persistent_session requires client_id")
 	}

-	m.acc = acc
 	if m.QoS > 2 || m.QoS < 0 {
 		return fmt.Errorf("qos value must be 0, 1, or 2: %d", m.QoS)
 	}
@ -127,6 +148,9 @@ func (m *MQTTConsumer) Start(acc telegraf.Accumulator) error {
 		return fmt.Errorf("connection_timeout must be greater than 1s: %s", m.ConnectionTimeout.Duration)
 	}

+	m.acc = acc.WithTracking(m.MaxUndeliveredMessages)
+	m.ctx, m.cancel = context.WithCancel(context.Background())
+
 	opts, err := m.createOpts()
 	if err != nil {
 		return err
@ -146,8 +170,10 @@ func (m *MQTTConsumer) connect() error {
 		return err
 	}

-	log.Printf("I! [inputs.mqtt_consumer]: connected %v", m.Servers)
+	log.Printf("I! [inputs.mqtt_consumer] Connected %v", m.Servers)
 	m.state = Connected
+	m.sem = make(semaphore, m.MaxUndeliveredMessages)
+	m.messages = make(map[telegraf.TrackingID]bool)

 	// Only subscribe on first connection when using persistent sessions.  On
 	// subsequent connections the subscriptions should be stored in the
@ -172,38 +198,64 @@ func (m *MQTTConsumer) connect() error {

 func (m *MQTTConsumer) onConnectionLost(c mqtt.Client, err error) {
 	m.acc.AddError(fmt.Errorf("connection lost: %v", err))
-	log.Printf("D! [inputs.mqtt_consumer]: disconnected %v", m.Servers)
+	log.Printf("D! [inputs.mqtt_consumer] Disconnected %v", m.Servers)
 	m.state = Disconnected
 	return
 }

 func (m *MQTTConsumer) recvMessage(c mqtt.Client, msg mqtt.Message) {
-	topic := msg.Topic()
+	for {
+		select {
+		case track := <-m.acc.Delivered():
+			_, ok := m.messages[track.ID()]
+			if !ok {
+				// Added by a previous connection
+				continue
+			}
+			<-m.sem
+			// No ack, MQTT does not support durable handling
+			delete(m.messages, track.ID())
+		case m.sem <- empty{}:
+			err := m.onMessage(m.acc, msg)
+			if err != nil {
+				m.acc.AddError(err)
+				<-m.sem
+			}
+			return
+		}
+	}
+}
+
+func (m *MQTTConsumer) onMessage(acc telegraf.TrackingAccumulator, msg mqtt.Message) error {
 	metrics, err := m.parser.Parse(msg.Payload())
 	if err != nil {
-		m.acc.AddError(err)
+		return err
 	}

+	topic := msg.Topic()
 	for _, metric := range metrics {
-		tags := metric.Tags()
-		tags["topic"] = topic
-		m.acc.AddFields(metric.Name(), metric.Fields(), tags, metric.Time())
+		metric.AddTag("topic", topic)
 	}
+
+	id := acc.AddTrackingMetricGroup(metrics)
+	m.messages[id] = true
+	return nil
 }

 func (m *MQTTConsumer) Stop() {
 	if m.state == Connected {
-		log.Printf("D! [inputs.mqtt_consumer]: disconnecting %v", m.Servers)
+		log.Printf("D! [inputs.mqtt_consumer] Disconnecting %v", m.Servers)
 		m.client.Disconnect(200)
-		log.Printf("D! [inputs.mqtt_consumer]: disconnected %v", m.Servers)
+		log.Printf("D! [inputs.mqtt_consumer] Disconnected %v", m.Servers)
 		m.state = Disconnected
 	}
+	m.cancel()
 }

 func (m *MQTTConsumer) Gather(acc telegraf.Accumulator) error {
 	if m.state == Disconnected {
 		m.state = Connecting
-		log.Printf("D! [inputs.mqtt_consumer]: connecting %v", m.Servers)
+		log.Printf("D! [inputs.mqtt_consumer] Connecting %v", m.Servers)
 		m.connect()
 	}

@ -246,7 +298,7 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
 	for _, server := range m.Servers {
 		// Preserve support for host:port style servers; deprecated in Telegraf 1.4.4
 		if !strings.Contains(server, "://") {
-			log.Printf("W! [inputs.mqtt_consumer] server %q should be updated to use `scheme://host:port` format", server)
+			log.Printf("W! [inputs.mqtt_consumer] Server %q should be updated to use `scheme://host:port` format", server)
 			if tlsCfg == nil {
 				server = "tcp://" + server
 			} else {
@ -267,8 +319,9 @@ func (m *MQTTConsumer) createOpts() (*mqtt.ClientOptions, error) {
 func init() {
 	inputs.Add("mqtt_consumer", func() telegraf.Input {
 		return &MQTTConsumer{
-			ConnectionTimeout: defaultConnectionTimeout,
-			state:             Disconnected,
+			ConnectionTimeout:      defaultConnectionTimeout,
+			MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
+			state: Disconnected,
 		}
 	})
 }
--- a/plugins/inputs/mqtt_consumer/mqtt_consumer_test.go
+++ b/plugins/inputs/mqtt_consumer/mqtt_consumer_test.go
@ -3,12 +3,9 @@ package mqtt_consumer
 import (
 	"testing"

-	"github.com/influxdata/telegraf/plugins/parsers"
-	"github.com/influxdata/telegraf/testutil"
-
-	"github.com/stretchr/testify/assert"
-
 	"github.com/eclipse/paho.mqtt.golang"
+	"github.com/influxdata/telegraf/testutil"
+	"github.com/stretchr/testify/assert"
 )

 const (
@ -71,47 +68,6 @@ func TestPersistentClientIDFail(t *testing.T) {
 	assert.Error(t, err)
 }

-func TestRunParser(t *testing.T) {
-	n := newTestMQTTConsumer()
-	acc := testutil.Accumulator{}
-	n.acc = &acc
-	n.parser, _ = parsers.NewInfluxParser()
-
-	n.recvMessage(nil, mqttMsg(testMsg))
-
-	if a := acc.NFields(); a != 1 {
-		t.Errorf("got %v, expected %v", a, 1)
-	}
-}
-
-// Test that the parser ignores invalid messages
-func TestRunParserInvalidMsg(t *testing.T) {
-	n := newTestMQTTConsumer()
-	acc := testutil.Accumulator{}
-	n.acc = &acc
-	n.parser, _ = parsers.NewInfluxParser()
-
-	n.recvMessage(nil, mqttMsg(invalidMsg))
-
-	if a := acc.NFields(); a != 0 {
-		t.Errorf("got %v, expected %v", a, 0)
-	}
-	assert.Len(t, acc.Errors, 1)
-}
-
-// Test that the parser parses line format messages into metrics
-func TestRunParserAndGather(t *testing.T) {
-	n := newTestMQTTConsumer()
-	acc := testutil.Accumulator{}
-	n.acc = &acc
-	n.parser, _ = parsers.NewInfluxParser()
-
-	n.recvMessage(nil, mqttMsg(testMsg))
-
-	acc.AssertContainsFields(t, "cpu_load_short",
-		map[string]interface{}{"value": float64(23422)})
-}
-
 func mqttMsg(val string) mqtt.Message {
 	return &message{
 		topic:   "telegraf/unit_test",
--- a/plugins/inputs/nats_consumer/README.md
+++ b/plugins/inputs/nats_consumer/README.md
@ -1,16 +1,14 @@
 # NATS Consumer Input Plugin

-The [NATS](http://www.nats.io/about/) consumer plugin reads from
-specified NATS subjects and adds messages to InfluxDB. The plugin expects messages
-in the [Telegraf Input Data Formats](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md).
-A [Queue Group](http://www.nats.io/documentation/concepts/nats-queueing/)
-is used when subscribing to subjects so multiple instances of telegraf can read
-from a NATS cluster in parallel.
+The [NATS][nats] consumer plugin reads from the specified NATS subjects and
+creates metrics using one of the supported [input data formats][].

-## Configuration
+A [Queue Group][queue group] is used when subscribing to subjects so multiple
+instances of telegraf can read from a NATS cluster in parallel.
+
+### Configuration:

 ```toml
-# Read metrics from NATS subject(s)
 [[inputs.nats_consumer]]
  ## urls of NATS servers
  servers = ["nats://localhost:4222"]
@ -20,13 +18,29 @@ from a NATS cluster in parallel.
  subjects = ["telegraf"]
  ## name a queue group
  queue_group = "telegraf_consumers"
-  ## Maximum number of metrics to buffer between collection intervals
-  metric_buffer = 100000

-  ## Data format to consume. 
+  ## Sets the limits for pending msgs and bytes for each subscription
+  ## These shouldn't need to be adjusted except in very high throughput scenarios
+  # pending_message_limit = 65536
+  # pending_bytes_limit = 67108864

+  ## Maximum messages to read from the broker that have not been written by an
+  ## output.  For best throughput set based on the number of metrics within
+  ## each message and the size of the output's metric_batch_size.
+  ##
+  ## For example, if each message from the queue contains 10 metrics and the
+  ## output metric_batch_size is 1000, setting this to 100 will ensure that a
+  ## full batch is collected and the write is triggered immediately without
+  ## waiting until the next flush_interval.
+  # max_undelivered_messages = 1000
+
+  ## Data format to consume.
  ## Each data format has its own unique set of configuration options, read
  ## more about them here:
  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
  data_format = "influx"
 ```
+
+[nats]: https://www.nats.io/about/
+[input data formats]: /docs/DATA_FORMATS_INPUT.md
+[queue group]: https://www.nats.io/documentation/concepts/nats-queueing/
--- a/plugins/inputs/nats_consumer/nats_consumer.go
+++ b/plugins/inputs/nats_consumer/nats_consumer.go
@ -1,6 +1,7 @@
 package natsconsumer

 import (
+	"context"
 	"fmt"
 	"log"
 	"sync"
@ -11,6 +12,13 @@ import (
 	nats "github.com/nats-io/go-nats"
 )

+var (
+	defaultMaxUndeliveredMessages = 1000
+)
+
+type empty struct{}
+type semaphore chan empty
+
 type natsError struct {
 	conn *nats.Conn
 	sub  *nats.Subscription
@ -23,48 +31,58 @@ func (e natsError) Error() string {
 }

 type natsConsumer struct {
-	QueueGroup string
-	Subjects   []string
-	Servers    []string
-	Secure     bool
+	QueueGroup string   `toml:"queue_group"`
+	Subjects   []string `toml:"subjects"`
+	Servers    []string `toml:"servers"`
+	Secure     bool     `toml:"secure"`

 	// Client pending limits:
-	PendingMessageLimit int
-	PendingBytesLimit   int
+	PendingMessageLimit int `toml:"pending_message_limit"`
+	PendingBytesLimit   int `toml:"pending_bytes_limit"`
+
+	MaxUndeliveredMessages int `toml:"max_undelivered_messages"`

 	// Legacy metric buffer support; deprecated in v0.10.3
 	MetricBuffer int

+	conn *nats.Conn
+	subs []*nats.Subscription
+
 	parser parsers.Parser
-
-	sync.Mutex
-	wg   sync.WaitGroup
-	Conn *nats.Conn
-	Subs []*nats.Subscription
-
 	// channel for all incoming NATS messages
 	in chan *nats.Msg
 	// channel for all NATS read errors
-	errs chan error
-	done chan struct{}
-	acc  telegraf.Accumulator
+	errs   chan error
+	acc    telegraf.TrackingAccumulator
+	wg     sync.WaitGroup
+	cancel context.CancelFunc
 }

 var sampleConfig = `
  ## urls of NATS servers
-  # servers = ["nats://localhost:4222"]
+  servers = ["nats://localhost:4222"]
  ## Use Transport Layer Security
-  # secure = false
+  secure = false
  ## subject(s) to consume
-  # subjects = ["telegraf"]
+  subjects = ["telegraf"]
  ## name a queue group
-  # queue_group = "telegraf_consumers"
+  queue_group = "telegraf_consumers"

  ## Sets the limits for pending msgs and bytes for each subscription
  ## These shouldn't need to be adjusted except in very high throughput scenarios
  # pending_message_limit = 65536
  # pending_bytes_limit = 67108864

+  ## Maximum messages to read from the broker that have not been written by an
+  ## output.  For best throughput set based on the number of metrics within
+  ## each message and the size of the output's metric_batch_size.
+  ##
+  ## For example, if each message from the queue contains 10 metrics and the
+  ## output metric_batch_size is 1000, setting this to 100 will ensure that a
+  ## full batch is collected and the write is triggered immediately without
+  ## waiting until the next flush_interval.
+  # max_undelivered_messages = 1000
+
  ## Data format to consume.
  ## Each data format has its own unique set of configuration options, read
  ## more about them here:
@ -94,10 +112,7 @@ func (n *natsConsumer) natsErrHandler(c *nats.Conn, s *nats.Subscription, e erro

 // Start the nats consumer. Caller must call *natsConsumer.Stop() to clean up.
 func (n *natsConsumer) Start(acc telegraf.Accumulator) error {
-	n.Lock()
-	defer n.Unlock()
-
-	n.acc = acc
+	n.acc = acc.WithTracking(n.MaxUndeliveredMessages)

 	var connectErr error

@ -112,89 +127,106 @@ func (n *natsConsumer) Start(acc telegraf.Accumulator) error {

 	opts.Secure = n.Secure

-	if n.Conn == nil || n.Conn.IsClosed() {
-		n.Conn, connectErr = opts.Connect()
+	if n.conn == nil || n.conn.IsClosed() {
+		n.conn, connectErr = opts.Connect()
 		if connectErr != nil {
 			return connectErr
 		}

 		// Setup message and error channels
 		n.errs = make(chan error)
-		n.Conn.SetErrorHandler(n.natsErrHandler)
+		n.conn.SetErrorHandler(n.natsErrHandler)

 		n.in = make(chan *nats.Msg, 1000)
 		for _, subj := range n.Subjects {
-			sub, err := n.Conn.QueueSubscribe(subj, n.QueueGroup, func(m *nats.Msg) {
+			sub, err := n.conn.QueueSubscribe(subj, n.QueueGroup, func(m *nats.Msg) {
 				n.in <- m
 			})
 			if err != nil {
 				return err
 			}
 			// ensure that the subscription has been processed by the server
-			if err = n.Conn.Flush(); err != nil {
+			if err = n.conn.Flush(); err != nil {
 				return err
 			}
 			// set the subscription pending limits
 			if err = sub.SetPendingLimits(n.PendingMessageLimit, n.PendingBytesLimit); err != nil {
 				return err
 			}
-			n.Subs = append(n.Subs, sub)
+			n.subs = append(n.subs, sub)
 		}
 	}

-	n.done = make(chan struct{})
+	ctx, cancel := context.WithCancel(context.Background())
+	n.cancel = cancel

 	// Start the message reader
 	n.wg.Add(1)
-	go n.receiver()
+	go func() {
+		defer n.wg.Done()
+		go n.receiver(ctx)
+	}()
+
 	log.Printf("I! Started the NATS consumer service, nats: %v, subjects: %v, queue: %v\n",
-		n.Conn.ConnectedUrl(), n.Subjects, n.QueueGroup)
+		n.conn.ConnectedUrl(), n.Subjects, n.QueueGroup)

 	return nil
 }

 // receiver() reads all incoming messages from NATS, and parses them into
 // telegraf metrics.
-func (n *natsConsumer) receiver() {
-	defer n.wg.Done()
+func (n *natsConsumer) receiver(ctx context.Context) {
+	sem := make(semaphore, n.MaxUndeliveredMessages)
+
 	for {
 		select {
-		case <-n.done:
+		case <-ctx.Done():
 			return
+		case <-n.acc.Delivered():
+			<-sem
 		case err := <-n.errs:
-			n.acc.AddError(fmt.Errorf("E! error reading from %s\n", err.Error()))
-		case msg := <-n.in:
-			metrics, err := n.parser.Parse(msg.Data)
-			if err != nil {
-				n.acc.AddError(fmt.Errorf("E! subject: %s, error: %s", msg.Subject, err.Error()))
-			}
+			n.acc.AddError(err)
+		case sem <- empty{}:
+			select {
+			case <-ctx.Done():
+				return
+			case err := <-n.errs:
+				<-sem
+				n.acc.AddError(err)
+			case <-n.acc.Delivered():
+				<-sem
+				<-sem
+			case msg := <-n.in:
+				metrics, err := n.parser.Parse(msg.Data)
+				if err != nil {
+					n.acc.AddError(fmt.Errorf("subject: %s, error: %s", msg.Subject, err.Error()))
+					<-sem
+					continue
+				}

-			for _, metric := range metrics {
-				n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
+				n.acc.AddTrackingMetricGroup(metrics)
 			}
 		}
 	}
 }

 func (n *natsConsumer) clean() {
-	for _, sub := range n.Subs {
+	for _, sub := range n.subs {
 		if err := sub.Unsubscribe(); err != nil {
-			n.acc.AddError(fmt.Errorf("E! Error unsubscribing from subject %s in queue %s: %s\n",
+			n.acc.AddError(fmt.Errorf("Error unsubscribing from subject %s in queue %s: %s\n",
 				sub.Subject, sub.Queue, err.Error()))
 		}
 	}

-	if n.Conn != nil && !n.Conn.IsClosed() {
-		n.Conn.Close()
+	if n.conn != nil && !n.conn.IsClosed() {
+		n.conn.Close()
 	}
 }

 func (n *natsConsumer) Stop() {
-	n.Lock()
-	close(n.done)
+	n.cancel()
 	n.wg.Wait()
 	n.clean()
-	n.Unlock()
 }

 func (n *natsConsumer) Gather(acc telegraf.Accumulator) error {
@ -204,12 +236,13 @@ func (n *natsConsumer) Gather(acc telegraf.Accumulator) error {
 func init() {
 	inputs.Add("nats_consumer", func() telegraf.Input {
 		return &natsConsumer{
-			Servers:             []string{"nats://localhost:4222"},
-			Secure:              false,
-			Subjects:            []string{"telegraf"},
-			QueueGroup:          "telegraf_consumers",
-			PendingBytesLimit:   nats.DefaultSubPendingBytesLimit,
-			PendingMessageLimit: nats.DefaultSubPendingMsgsLimit,
+			Servers:                []string{"nats://localhost:4222"},
+			Secure:                 false,
+			Subjects:               []string{"telegraf"},
+			QueueGroup:             "telegraf_consumers",
+			PendingBytesLimit:      nats.DefaultSubPendingBytesLimit,
+			PendingMessageLimit:    nats.DefaultSubPendingMsgsLimit,
+			MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
 		}
 	})
 }
--- a/plugins/inputs/nats_consumer/nats_consumer_test.go
+++ b/plugins/inputs/nats_consumer/nats_consumer_test.go
@ -1,134 +0,0 @@
-package natsconsumer
-
-import (
-	"testing"
-
-	"github.com/influxdata/telegraf/plugins/parsers"
-	"github.com/influxdata/telegraf/testutil"
-	nats "github.com/nats-io/go-nats"
-	"github.com/stretchr/testify/assert"
-)
-
-const (
-	testMsg         = "cpu_load_short,host=server01 value=23422.0 1422568543702900257\n"
-	testMsgGraphite = "cpu.load.short.graphite 23422 1454780029"
-	testMsgJSON     = "{\"a\": 5, \"b\": {\"c\": 6}}\n"
-	invalidMsg      = "cpu_load_short,host=server01 1422568543702900257\n"
-	metricBuffer    = 5
-)
-
-func newTestNatsConsumer() (*natsConsumer, chan *nats.Msg) {
-	in := make(chan *nats.Msg, metricBuffer)
-	n := &natsConsumer{
-		QueueGroup: "test",
-		Subjects:   []string{"telegraf"},
-		Servers:    []string{"nats://localhost:4222"},
-		Secure:     false,
-		in:         in,
-		errs:       make(chan error, metricBuffer),
-		done:       make(chan struct{}),
-	}
-	return n, in
-}
-
-// Test that the parser parses NATS messages into metrics
-func TestRunParser(t *testing.T) {
-	n, in := newTestNatsConsumer()
-	acc := testutil.Accumulator{}
-	n.acc = &acc
-	defer close(n.done)
-
-	n.parser, _ = parsers.NewInfluxParser()
-	n.wg.Add(1)
-	go n.receiver()
-	in <- natsMsg(testMsg)
-
-	acc.Wait(1)
-}
-
-// Test that the parser ignores invalid messages
-func TestRunParserInvalidMsg(t *testing.T) {
-	n, in := newTestNatsConsumer()
-	acc := testutil.Accumulator{}
-	n.acc = &acc
-	defer close(n.done)
-
-	n.parser, _ = parsers.NewInfluxParser()
-	n.wg.Add(1)
-	go n.receiver()
-	in <- natsMsg(invalidMsg)
-
-	acc.WaitError(1)
-	assert.Contains(t, acc.Errors[0].Error(), "E! subject: telegraf, error: metric parse error")
-	assert.EqualValues(t, 0, acc.NMetrics())
-}
-
-// Test that the parser parses line format messages into metrics
-func TestRunParserAndGather(t *testing.T) {
-	n, in := newTestNatsConsumer()
-	acc := testutil.Accumulator{}
-	n.acc = &acc
-	defer close(n.done)
-
-	n.parser, _ = parsers.NewInfluxParser()
-	n.wg.Add(1)
-	go n.receiver()
-	in <- natsMsg(testMsg)
-
-	n.Gather(&acc)
-
-	acc.Wait(1)
-	acc.AssertContainsFields(t, "cpu_load_short",
-		map[string]interface{}{"value": float64(23422)})
-}
-
-// Test that the parser parses graphite format messages into metrics
-func TestRunParserAndGatherGraphite(t *testing.T) {
-	n, in := newTestNatsConsumer()
-	acc := testutil.Accumulator{}
-	n.acc = &acc
-	defer close(n.done)
-
-	n.parser, _ = parsers.NewGraphiteParser("_", []string{}, nil)
-	n.wg.Add(1)
-	go n.receiver()
-	in <- natsMsg(testMsgGraphite)
-
-	n.Gather(&acc)
-
-	acc.Wait(1)
-	acc.AssertContainsFields(t, "cpu_load_short_graphite",
-		map[string]interface{}{"value": float64(23422)})
-}
-
-// Test that the parser parses json format messages into metrics
-func TestRunParserAndGatherJSON(t *testing.T) {
-	n, in := newTestNatsConsumer()
-	acc := testutil.Accumulator{}
-	n.acc = &acc
-	defer close(n.done)
-
-	n.parser, _ = parsers.NewParser(&parsers.Config{
-		DataFormat: "json",
-		MetricName: "nats_json_test",
-	})
-	n.wg.Add(1)
-	go n.receiver()
-	in <- natsMsg(testMsgJSON)
-
-	n.Gather(&acc)
-
-	acc.Wait(1)
-	acc.AssertContainsFields(t, "nats_json_test",
-		map[string]interface{}{
-			"a":   float64(5),
-			"b_c": float64(6),
-		})
-}
-
-func natsMsg(val string) *nats.Msg {
-	return &nats.Msg{
-		Subject: "telegraf",
-		Data:    []byte(val),
-	}
-}
--- a/plugins/inputs/nsq_consumer/README.md
+++ b/plugins/inputs/nsq_consumer/README.md
@ -1,9 +1,9 @@
 # NSQ Consumer Input Plugin

-The [NSQ](http://nsq.io/) consumer plugin polls a specified NSQD
-topic and adds messages to InfluxDB. This plugin allows a message to be in any of the supported `data_format` types.
+The [NSQ][nsq] consumer plugin reads from NSQD and creates metrics using one
+of the supported [input data formats][].

-## Configuration
+### Configuration:

 ```toml
 # Read metrics from NSQD topic(s)
@ -18,6 +18,16 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
  channel = "consumer"
  max_in_flight = 100

+  ## Maximum messages to read from the broker that have not been written by an
+  ## output.  For best throughput set based on the number of metrics within
+  ## each message and the size of the output's metric_batch_size.
+  ##
+  ## For example, if each message from the queue contains 10 metrics and the
+  ## output metric_batch_size is 1000, setting this to 100 will ensure that a
+  ## full batch is collected and the write is triggered immediately without
+  ## waiting until the next flush_interval.
+  # max_undelivered_messages = 1000
+
  ## Data format to consume.
  ## Each data format has its own unique set of configuration options, read
  ## more about them here:
@ -25,5 +35,5 @@ topic and adds messages to InfluxDB. This plugin allows a message to be in any o
  data_format = "influx"
 ```

-## Testing
-The `nsq_consumer_test` mocks out the interaction with `NSQD`. It requires no outside dependencies.
+[nsq]: https://nsq.io
+[input data formats]: /docs/DATA_FORMATS_INPUT.md
--- a/plugins/inputs/nsq_consumer/nsq_consumer.go
+++ b/plugins/inputs/nsq_consumer/nsq_consumer.go
@ -1,7 +1,9 @@
 package nsq_consumer

 import (
-	"fmt"
+	"context"
+	"log"
+	"sync"

 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/plugins/inputs"
@ -9,17 +11,38 @@ import (
 	nsq "github.com/nsqio/go-nsq"
 )

+const (
+	defaultMaxUndeliveredMessages = 1000
+)
+
+type empty struct{}
+type semaphore chan empty
+
+type logger struct{}
+
+func (l *logger) Output(calldepth int, s string) error {
+	log.Println("D! [inputs.nsq_consumer] " + s)
+	return nil
+}
+
 //NSQConsumer represents the configuration of the plugin
 type NSQConsumer struct {
-	Server      string
-	Nsqd        []string
-	Nsqlookupd  []string
-	Topic       string
-	Channel     string
-	MaxInFlight int
-	parser      parsers.Parser
-	consumer    *nsq.Consumer
-	acc         telegraf.Accumulator
+	Server      string   `toml:"server"`
+	Nsqd        []string `toml:"nsqd"`
+	Nsqlookupd  []string `toml:"nsqlookupd"`
+	Topic       string   `toml:"topic"`
+	Channel     string   `toml:"channel"`
+	MaxInFlight int      `toml:"max_in_flight"`
+
+	MaxUndeliveredMessages int `toml:"max_undelivered_messages"`
+
+	parser   parsers.Parser
+	consumer *nsq.Consumer
+
+	mu       sync.Mutex
+	messages map[telegraf.TrackingID]*nsq.Message
+	wg       sync.WaitGroup
+	cancel   context.CancelFunc
 }

 var sampleConfig = `
@ -33,6 +56,16 @@ var sampleConfig = `
  channel = "consumer"
  max_in_flight = 100

+  ## Maximum messages to read from the broker that have not been written by an
+  ## output.  For best throughput set based on the number of metrics within
+  ## each message and the size of the output's metric_batch_size.
+  ##
+  ## For example, if each message from the queue contains 10 metrics and the
+  ## output metric_batch_size is 1000, setting this to 100 will ensure that a
+  ## full batch is collected and the write is triggered immediately without
+  ## waiting until the next flush_interval.
+  # max_undelivered_messages = 1000
+
  ## Data format to consume.
  ## Each data format has its own unique set of configuration options, read
  ## more about them here:
@ -40,12 +73,6 @@ var sampleConfig = `
  data_format = "influx"
 `

-func init() {
-	inputs.Add("nsq_consumer", func() telegraf.Input {
-		return &NSQConsumer{}
-	})
-}
-
 // SetParser takes the data_format from the config and finds the right parser for that format
 func (n *NSQConsumer) SetParser(parser parsers.Parser) {
 	n.parser = parser
@ -62,32 +89,88 @@ func (n *NSQConsumer) Description() string {
 }

 // Start pulls data from nsq
-func (n *NSQConsumer) Start(acc telegraf.Accumulator) error {
-	n.acc = acc
+func (n *NSQConsumer) Start(ac telegraf.Accumulator) error {
+	acc := ac.WithTracking(n.MaxUndeliveredMessages)
+	sem := make(semaphore, n.MaxUndeliveredMessages)
+	n.messages = make(map[telegraf.TrackingID]*nsq.Message, n.MaxUndeliveredMessages)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	n.cancel = cancel
+
 	n.connect()
-	n.consumer.AddConcurrentHandlers(nsq.HandlerFunc(func(message *nsq.Message) error {
+	n.consumer.SetLogger(&logger{}, nsq.LogLevelInfo)
+	n.consumer.AddHandler(nsq.HandlerFunc(func(message *nsq.Message) error {
 		metrics, err := n.parser.Parse(message.Body)
 		if err != nil {
-			acc.AddError(fmt.Errorf("E! NSQConsumer Parse Error\nmessage:%s\nerror:%s", string(message.Body), err.Error()))
+			acc.AddError(err)
+			// Remove the message from the queue
+			message.Finish()
 			return nil
 		}
-		for _, metric := range metrics {
-			n.acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
+		if len(metrics) == 0 {
+			message.Finish()
+			return nil
 		}
-		message.Finish()
+
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case sem <- empty{}:
+			break
+		}
+
+		n.mu.Lock()
+		id := acc.AddTrackingMetricGroup(metrics)
+		n.messages[id] = message
+		n.mu.Unlock()
+		message.DisableAutoResponse()
 		return nil
-	}), n.MaxInFlight)
+	}))

 	if len(n.Nsqlookupd) > 0 {
 		n.consumer.ConnectToNSQLookupds(n.Nsqlookupd)
 	}
 	n.consumer.ConnectToNSQDs(append(n.Nsqd, n.Server))
+
+	n.wg.Add(1)
+	go func() {
+		defer n.wg.Done()
+		n.onDelivery(ctx, acc, sem)
+	}()
 	return nil
 }

+func (n *NSQConsumer) onDelivery(ctx context.Context, acc telegraf.TrackingAccumulator, sem semaphore) {
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case info := <-acc.Delivered():
+			n.mu.Lock()
+			msg, ok := n.messages[info.ID()]
+			if !ok {
+				n.mu.Unlock()
+				continue
+			}
+			<-sem
+			delete(n.messages, info.ID())
+			n.mu.Unlock()
+
+			if info.Delivered() {
+				msg.Finish()
+			} else {
+				msg.Requeue(-1)
+			}
+		}
+	}
+}
+
 // Stop processing messages
 func (n *NSQConsumer) Stop() {
+	n.cancel()
+	n.wg.Wait()
 	n.consumer.Stop()
+	<-n.consumer.StopChan
 }

 // Gather is a noop
@ -107,3 +190,11 @@ func (n *NSQConsumer) connect() error {
 	}
 	return nil
 }
+
+func init() {
+	inputs.Add("nsq_consumer", func() telegraf.Input {
+		return &NSQConsumer{
+			MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
+		}
+	})
+}
--- a/plugins/inputs/nsq_consumer/nsq_consumer_test.go
+++ b/plugins/inputs/nsq_consumer/nsq_consumer_test.go
@ -36,11 +36,12 @@ func TestReadsMetricsFromNSQ(t *testing.T) {
 	newMockNSQD(script, addr.String())

 	consumer := &NSQConsumer{
-		Server:      "127.0.0.1:4155",
-		Topic:       "telegraf",
-		Channel:     "consume",
-		MaxInFlight: 1,
-		Nsqd:        []string{"127.0.0.1:4155"},
+		Server:                 "127.0.0.1:4155",
+		Topic:                  "telegraf",
+		Channel:                "consume",
+		MaxInFlight:            1,
+		MaxUndeliveredMessages: defaultMaxUndeliveredMessages,
+		Nsqd: []string{"127.0.0.1:4155"},
 	}

 	p, _ := parsers.NewInfluxParser()
--- a/plugins/inputs/socket_listener/socket_listener.go
+++ b/plugins/inputs/socket_listener/socket_listener.go
@ -2,6 +2,7 @@ package socket_listener

 import (
 	"bufio"
+	"crypto/tls"
 	"fmt"
 	"io"
 	"log"
@ -9,11 +10,8 @@ import (
 	"os"
 	"strings"
 	"sync"
-
 	"time"

-	"crypto/tls"
-
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/internal"
 	tlsint "github.com/influxdata/telegraf/internal/tls"
@ -120,7 +118,7 @@ func (ssl *streamSocketListener) read(c net.Conn) {
 			continue
 		}
 		for _, m := range metrics {
-			ssl.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
+			ssl.AddMetric(m)
 		}
 	}

@ -156,7 +154,7 @@ func (psl *packetSocketListener) listen() {
 			continue
 		}
 		for _, m := range metrics {
-			psl.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
+			psl.AddMetric(m)
 		}
 	}
 }
--- a/plugins/outputs/discard/discard.go
+++ b/plugins/outputs/discard/discard.go
@ -7,11 +7,13 @@ import (

 type Discard struct{}

-func (d *Discard) Connect() error                        { return nil }
-func (d *Discard) Close() error                          { return nil }
-func (d *Discard) SampleConfig() string                  { return "" }
-func (d *Discard) Description() string                   { return "Send metrics to nowhere at all" }
-func (d *Discard) Write(metrics []telegraf.Metric) error { return nil }
+func (d *Discard) Connect() error       { return nil }
+func (d *Discard) Close() error         { return nil }
+func (d *Discard) SampleConfig() string { return "" }
+func (d *Discard) Description() string  { return "Send metrics to nowhere at all" }
+func (d *Discard) Write(metrics []telegraf.Metric) error {
+	return nil
+}

 func init() {
 	outputs.Add("discard", func() telegraf.Output { return &Discard{} })
--- a/plugins/outputs/prometheus_client/prometheus_client.go
+++ b/plugins/outputs/prometheus_client/prometheus_client.go
@ -144,7 +144,7 @@ func (p *PrometheusClient) auth(h http.Handler) http.Handler {
 	})
 }

-func (p *PrometheusClient) Start() error {
+func (p *PrometheusClient) Connect() error {
 	defaultCollectors := map[string]bool{
 		"gocollector": true,
 		"process":     true,
@ -200,15 +200,6 @@ func (p *PrometheusClient) Start() error {
 	return nil
 }

-func (p *PrometheusClient) Stop() {
-	// plugin gets cleaned up in Close() already.
-}
-
-func (p *PrometheusClient) Connect() error {
-	// This service output does not need to make any further connections
-	return nil
-}
-
 func (p *PrometheusClient) Close() error {
 	ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
 	defer cancel()
--- a/plugins/outputs/prometheus_client/prometheus_client_test.go
+++ b/plugins/outputs/prometheus_client/prometheus_client_test.go
@ -600,7 +600,7 @@ func TestPrometheusWritePointEmptyTag(t *testing.T) {

 	pClient, p, err := setupPrometheus()
 	require.NoError(t, err)
-	defer pClient.Stop()
+	defer pClient.Close()

 	now := time.Now()
 	tags := make(map[string]string)
@ -675,7 +675,7 @@ func setupPrometheus() (*PrometheusClient, *prometheus_input.Prometheus, error)
 		pTesting = NewClient()
 		pTesting.Listen = "localhost:9127"
 		pTesting.Path = "/metrics"
-		err := pTesting.Start()
+		err := pTesting.Connect()
 		if err != nil {
 			return nil, nil, err
 		}
--- a/plugins/processors/topk/topk.go
+++ b/plugins/processors/topk/topk.go
@ -10,6 +10,7 @@ import (
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/filter"
 	"github.com/influxdata/telegraf/internal"
+	"github.com/influxdata/telegraf/metric"
 	"github.com/influxdata/telegraf/plugins/processors"
 )

@ -76,12 +77,12 @@ var sampleConfig = `
  ## tags. If this setting is different than "" the plugin will add a
  ## tag (which name will be the value of this setting) to each metric with
  ## the value of the calculated GroupBy tag. Useful for debugging
-  # add_groupby_tag = ""          
+  # add_groupby_tag = ""

  ## These settings provide a way to know the position of each metric in
  ## the top k. The 'add_rank_field' setting allows to specify for which
  ## fields the position is required. If the list is non empty, then a field
-  ## will be added to each and every metric for each string present in this 
+  ## will be added to each and every metric for each string present in this
  ## setting. This field will contain the ranking of the group that
  ## the metric belonged to when aggregated over that field.
  ## The name of the field will be set to the name of the aggregation field,
@ -208,6 +209,11 @@ func (t *TopK) Apply(in ...telegraf.Metric) []telegraf.Metric {

 	// Add the metrics received to our internal cache
 	for _, m := range in {
+		// When tracking metrics this plugin could deadlock the input by
+		// holding undelivered metrics while the input waits for metrics to be
+		// delivered.  Instead, treat all handled metrics as delivered and
+		// produced metrics as untracked in a similar way to aggregators.
+		m.Drop()

 		// Check if the metric has any of the fields over which we are aggregating
 		hasField := false
@ -281,7 +287,6 @@ func (t *TopK) push() []telegraf.Metric {

 		// Create a one dimensional list with the top K metrics of each key
 		for i, ag := range aggregations[0:min(t.K, len(aggregations))] {
-
 			// Check whether of not we need to add fields of tags to the selected metrics
 			if len(t.aggFieldSet) != 0 || len(t.rankFieldSet) != 0 || groupTag != "" {
 				for _, m := range t.cache[ag.groupbykey] {
@ -311,7 +316,16 @@ func (t *TopK) push() []telegraf.Metric {

 	t.Reset()

-	return ret
+	result := make([]telegraf.Metric, 0, len(ret))
+	for _, m := range ret {
+		copy, err := metric.New(m.Name(), m.Tags(), m.Fields(), m.Time(), m.Type())
+		if err != nil {
+			continue
+		}
+		result = append(result, copy)
+	}
+
+	return result
 }

 // Function that generates the aggregation functions
--- a/plugins/processors/topk/topk_test.go
+++ b/plugins/processors/topk/topk_test.go
@ -1,12 +1,12 @@
 package topk

 import (
-	"reflect"
 	"testing"
 	"time"

 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/internal"
+	"github.com/influxdata/telegraf/testutil"
 )

 // Key, value pair that represents a telegraf.Metric Field
@ -95,7 +95,7 @@ func deepCopy(a []telegraf.Metric) []telegraf.Metric {

 func belongs(m telegraf.Metric, ms []telegraf.Metric) bool {
 	for _, i := range ms {
-		if reflect.DeepEqual(i, m) {
+		if testutil.MetricEqual(i, m) {
 			return true
 		}
 	}
--- a/processor.go
+++ b/processor.go
@ -7,6 +7,6 @@ type Processor interface {
 	// Description returns a one-sentence description on the Input
 	Description() string

-	// Apply the filter to the given metric
+	// Apply the filter to the given metric.
 	Apply(in ...Metric) []Metric
 }
--- a/testutil/accumulator.go
+++ b/testutil/accumulator.go
@ -14,6 +14,15 @@ import (
 	"github.com/stretchr/testify/assert"
 )

+var (
+	lastID uint64
+)
+
+func newTrackingID() telegraf.TrackingID {
+	atomic.AddUint64(&lastID, 1)
+	return telegraf.TrackingID(lastID)
+}
+
 // Metric defines a single point measurement
 type Metric struct {
 	Measurement string
@ -23,7 +32,7 @@ type Metric struct {
 }

 func (p *Metric) String() string {
-	return fmt.Sprintf("%s %v", p.Measurement, p.Fields)
+	return fmt.Sprintf("%s %v %v", p.Measurement, p.Tags, p.Fields)
 }

 // Accumulator defines a mocked out accumulator
@ -31,11 +40,12 @@ type Accumulator struct {
 	sync.Mutex
 	*sync.Cond

-	Metrics  []*Metric
-	nMetrics uint64
-	Discard  bool
-	Errors   []error
-	debug    bool
+	Metrics   []*Metric
+	nMetrics  uint64
+	Discard   bool
+	Errors    []error
+	debug     bool
+	delivered chan telegraf.DeliveryInfo
 }

 func (a *Accumulator) NMetrics() uint64 {
@ -154,6 +164,33 @@ func (a *Accumulator) AddHistogram(
 	a.AddFields(measurement, fields, tags, timestamp...)
 }

+func (a *Accumulator) AddMetric(m telegraf.Metric) {
+	a.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
+}
+
+func (a *Accumulator) WithTracking(maxTracked int) telegraf.TrackingAccumulator {
+	return a
+}
+
+func (a *Accumulator) AddTrackingMetric(m telegraf.Metric) telegraf.TrackingID {
+	a.AddMetric(m)
+	return newTrackingID()
+}
+
+func (a *Accumulator) AddTrackingMetricGroup(group []telegraf.Metric) telegraf.TrackingID {
+	for _, m := range group {
+		a.AddMetric(m)
+	}
+	return newTrackingID()
+}
+
+func (a *Accumulator) Delivered() <-chan telegraf.DeliveryInfo {
+	if a.delivered == nil {
+		a.delivered = make(chan telegraf.DeliveryInfo)
+	}
+	return a.delivered
+}
+
 // AddError appends the given error to Accumulator.Errors.
 func (a *Accumulator) AddError(err error) {
 	if err == nil {
--- a/testutil/metric.go
+++ b/testutil/metric.go
@ -41,6 +41,18 @@ func newMetricDiff(metric telegraf.Metric) *metricDiff {
 	return m
 }

+func MetricEqual(expected, actual telegraf.Metric) bool {
+	var lhs, rhs *metricDiff
+	if expected != nil {
+		lhs = newMetricDiff(expected)
+	}
+	if actual != nil {
+		rhs = newMetricDiff(actual)
+	}
+
+	return cmp.Equal(lhs, rhs)
+}
+
 func RequireMetricEqual(t *testing.T, expected, actual telegraf.Metric) {
 	t.Helper()

@ -60,11 +72,11 @@ func RequireMetricEqual(t *testing.T, expected, actual telegraf.Metric) {
 func RequireMetricsEqual(t *testing.T, expected, actual []telegraf.Metric) {
 	t.Helper()

-	lhs := make([]*metricDiff, len(expected))
+	lhs := make([]*metricDiff, 0, len(expected))
 	for _, m := range expected {
 		lhs = append(lhs, newMetricDiff(m))
 	}
-	rhs := make([]*metricDiff, len(actual))
+	rhs := make([]*metricDiff, 0, len(actual))
 	for _, m := range actual {
 		rhs = append(rhs, newMetricDiff(m))
 	}