Use lifo order in metric buffer (#5287)

2019-01-15 11:48:52 -08:00
parent 193aba8673
commit da80276802
4 changed files with 365 additions and 77 deletions
--- a/internal/models/buffer.go
+++ b/internal/models/buffer.go
@@ -22,8 +22,7 @@ type Buffer struct {
 	cap   int // the capacity of the buffer
 	batchFirst int // index of the first metric in the batch
-	batchLast  int // one after the index of the last metric in the batch
+	batchSize  int // number of metrics currently in the batch
 	batchSize  int // number of metrics current in the batch
 	MetricsAdded   selfstat.Stat
 	MetricsWritten selfstat.Stat
@@ -82,46 +81,24 @@ func (b *Buffer) metricDropped(metric telegraf.Metric) {
 	metric.Reject()
 }
 func (b *Buffer) inBatch() bool {
 	if b.batchSize == 0 {
 		return false
 	}
 	if b.batchFirst < b.batchLast {
 		return b.last >= b.batchFirst && b.last < b.batchLast
 	} else {
 		return b.last >= b.batchFirst || b.last < b.batchLast
 	}
 }
 func (b *Buffer) add(m telegraf.Metric) {
 	// Check if Buffer is full
 	if b.size == b.cap {
 		if b.batchSize == 0 {
 			// No batch taken by the output, we can drop the metric now.
 		b.metricDropped(b.buf[b.last])
-		} else if b.inBatch() {
+
-			// There is an outstanding batch and this will overwrite a metric
+		if b.last == b.batchFirst && b.batchSize > 0 {
 			// in it, delay the dropping only in case the batch gets rejected.
 			b.batchSize--
-			b.batchFirst++
+			b.batchFirst = b.next(b.batchFirst)
 			b.batchFirst %= b.cap
 		} else {
 			// There is an outstanding batch, but this overwrites a metric
 			// outside of it.
 			b.metricDropped(b.buf[b.last])
 		}
 	}
 	b.metricAdded()
 	b.buf[b.last] = m
-	b.last++
+	b.last = b.next(b.last)
 	b.last %= b.cap
 	if b.size == b.cap {
-		b.first++
+		b.first = b.next(b.first)
 		b.first %= b.cap
 	}
 	b.size = min(b.size+1, b.cap)
@@ -138,10 +115,8 @@ func (b *Buffer) Add(metrics ...telegraf.Metric) {
 }
 // Batch returns a slice containing up to batchSize of the most recently added
-// metrics.
+// metrics.  Metrics are ordered from newest to oldest in the batch.  The
-//
+// batch must not be modified by the client.
 // The metrics contained in the batch are not removed from the buffer, instead
 // the last batch is recorded and removed only if Accept is called.
 func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
 	b.Lock()
 	defer b.Unlock()
@@ -152,21 +127,23 @@ func (b *Buffer) Batch(batchSize int) []telegraf.Metric {
 		return out
 	}
-	b.batchFirst = b.first
+	b.batchFirst = b.cap + b.last - outLen
-	b.batchLast = b.first + outLen
+	b.batchFirst %= b.cap
 	b.batchLast %= b.cap
 	b.batchSize = outLen
-	until := min(b.cap, b.first+outLen)
+	batchIndex := b.batchFirst
-
+	for i := range out {
-	n := copy(out, b.buf[b.first:until])
+		out[len(out)-1-i] = b.buf[batchIndex]
-	if n < outLen {
+		b.buf[batchIndex] = nil
-		copy(out[n:], b.buf[:outLen-n])
+		batchIndex = b.next(batchIndex)
 	}
 	b.last = b.batchFirst
 	b.size -= outLen
 	return out
 }
-// Accept removes the metrics contained in the last batch.
+// Accept marks the batch, acquired from Batch(), as successfully written.
 func (b *Buffer) Accept(batch []telegraf.Metric) {
 	b.Lock()
 	defer b.Unlock()
@@ -175,35 +152,89 @@ func (b *Buffer) Accept(batch []telegraf.Metric) {
 		b.metricWritten(m)
 	}
 	b.size -= b.batchSize
 	for i := 0; i < b.batchSize; i++ {
 		b.buf[b.first] = nil
 		b.first++
 		b.first %= b.cap
 	}
 	b.resetBatch()
 }
-// Reject clears the current batch record so that calls to Accept will have no
+// Reject returns the batch, acquired from Batch(), to the buffer and marks it
-// effect.
+// as unsent.
 func (b *Buffer) Reject(batch []telegraf.Metric) {
 	b.Lock()
 	defer b.Unlock()
-	if len(batch) > b.batchSize {
+	older := b.dist(b.first, b.batchFirst)
-		// Part or all of the batch was dropped before reject was called.
+	free := b.cap - b.size
-		for _, m := range batch[b.batchSize:] {
+	restore := min(len(batch), free+older)
-			b.metricDropped(m)
+
 	// Rotate newer metrics forward the number of metrics that we can restore.
 	rb := b.batchFirst
 	rp := b.last
 	re := b.nextby(rp, restore)
 	b.last = re
 	for rb != rp {
 		rp = b.prev(rp)
 		re = b.prev(re)
 		if b.buf[re] != nil {
 			b.metricDropped(b.buf[re])
 		}
 		b.buf[re] = b.buf[rp]
 		b.buf[rp] = nil
 	}
 	// Copy metrics from the batch back into the buffer; recall that the
 	// batch is in reverse order compared to b.buf
 	for i := range batch {
 		if i < restore {
 			re = b.prev(re)
 			b.buf[re] = batch[i]
 			b.size++
 		} else {
 			b.metricDropped(batch[i])
 		}
 	}
 	b.resetBatch()
 }
 // dist returns the distance between two indexes.  Because this data structure
 // uses a half open range the arguments must both either left side or right
 // side pairs.
 func (b *Buffer) dist(begin, end int) int {
 	if begin <= end {
 		return end - begin
 	} else {
 		return b.cap - begin - 1 + end
 	}
 }
 // next returns the next index with wrapping.
 func (b *Buffer) next(index int) int {
 	index++
 	if index == b.cap {
 		return 0
 	}
 	return index
 }
 // next returns the index that is count newer with wrapping.
 func (b *Buffer) nextby(index, count int) int {
 	index += count
 	index %= b.cap
 	return index
 }
 // next returns the prev index with wrapping.
 func (b *Buffer) prev(index int) int {
 	index--
 	if index < 0 {
 		return b.cap - 1
 	}
 	return index
 }
 func (b *Buffer) resetBatch() {
 	b.batchFirst = 0
 	b.batchLast = 0
 	b.batchSize = 0
 }
--- a/internal/models/buffer_test.go
+++ b/internal/models/buffer_test.go
@@ -6,6 +6,7 @@ import (
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/metric"
 	"github.com/influxdata/telegraf/testutil"
 	"github.com/stretchr/testify/require"
 )
@@ -29,13 +30,17 @@ func (m *MockMetric) Drop() {
 }
 func Metric() telegraf.Metric {
 	return MetricTime(0)
 }
 func MetricTime(sec int64) telegraf.Metric {
 	m, err := metric.New(
 		"cpu",
 		map[string]string{},
 		map[string]interface{}{
 			"value": 42.0,
 		},
-		time.Unix(0, 0),
+		time.Unix(sec, 0),
 	)
 	if err != nil {
 		panic(err)
@@ -147,6 +152,256 @@ func TestBuffer_BatchWrap(t *testing.T) {
 	require.Len(t, batch, 5)
 }
 func TestBuffer_BatchLatest(t *testing.T) {
 	b := setup(NewBuffer("test", 4))
 	b.Add(MetricTime(1))
 	b.Add(MetricTime(2))
 	b.Add(MetricTime(3))
 	batch := b.Batch(2)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(3),
 			MetricTime(2),
 		}, batch)
 }
 func TestBuffer_BatchLatestWrap(t *testing.T) {
 	b := setup(NewBuffer("test", 4))
 	b.Add(MetricTime(1))
 	b.Add(MetricTime(2))
 	b.Add(MetricTime(3))
 	b.Add(MetricTime(4))
 	b.Add(MetricTime(5))
 	batch := b.Batch(2)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(5),
 			MetricTime(4),
 		}, batch)
 }
 func TestBuffer_MultipleBatch(t *testing.T) {
 	b := setup(NewBuffer("test", 10))
 	b.Add(MetricTime(1))
 	b.Add(MetricTime(2))
 	b.Add(MetricTime(3))
 	b.Add(MetricTime(4))
 	b.Add(MetricTime(5))
 	b.Add(MetricTime(6))
 	batch := b.Batch(5)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(6),
 			MetricTime(5),
 			MetricTime(4),
 			MetricTime(3),
 			MetricTime(2),
 		}, batch)
 	b.Accept(batch)
 	batch = b.Batch(5)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(1),
 		}, batch)
 	b.Accept(batch)
 }
 func TestBuffer_RejectWithRoom(t *testing.T) {
 	b := setup(NewBuffer("test", 5))
 	b.Add(MetricTime(1))
 	b.Add(MetricTime(2))
 	b.Add(MetricTime(3))
 	batch := b.Batch(2)
 	b.Add(MetricTime(4))
 	b.Add(MetricTime(5))
 	b.Reject(batch)
 	require.Equal(t, int64(0), b.MetricsDropped.Get())
 	batch = b.Batch(5)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(5),
 			MetricTime(4),
 			MetricTime(3),
 			MetricTime(2),
 			MetricTime(1),
 		}, batch)
 }
 func TestBuffer_RejectNothingNewFull(t *testing.T) {
 	b := setup(NewBuffer("test", 5))
 	b.Add(MetricTime(1))
 	b.Add(MetricTime(2))
 	b.Add(MetricTime(3))
 	b.Add(MetricTime(4))
 	b.Add(MetricTime(5))
 	batch := b.Batch(2)
 	b.Reject(batch)
 	require.Equal(t, int64(0), b.MetricsDropped.Get())
 	batch = b.Batch(5)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(5),
 			MetricTime(4),
 			MetricTime(3),
 			MetricTime(2),
 			MetricTime(1),
 		}, batch)
 }
 func TestBuffer_RejectNoRoom(t *testing.T) {
 	b := setup(NewBuffer("test", 5))
 	b.Add(MetricTime(1))
 	b.Add(MetricTime(2))
 	b.Add(MetricTime(3))
 	batch := b.Batch(2)
 	b.Add(MetricTime(4))
 	b.Add(MetricTime(5))
 	b.Add(MetricTime(6))
 	b.Add(MetricTime(7))
 	b.Add(MetricTime(8))
 	b.Reject(batch)
 	require.Equal(t, int64(3), b.MetricsDropped.Get())
 	batch = b.Batch(5)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(8),
 			MetricTime(7),
 			MetricTime(6),
 			MetricTime(5),
 			MetricTime(4),
 		}, batch)
 }
 func TestBuffer_RejectRoomExact(t *testing.T) {
 	b := setup(NewBuffer("test", 5))
 	b.Add(MetricTime(1))
 	b.Add(MetricTime(2))
 	batch := b.Batch(2)
 	b.Add(MetricTime(3))
 	b.Add(MetricTime(4))
 	b.Add(MetricTime(5))
 	b.Reject(batch)
 	require.Equal(t, int64(0), b.MetricsDropped.Get())
 	batch = b.Batch(5)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(5),
 			MetricTime(4),
 			MetricTime(3),
 			MetricTime(2),
 			MetricTime(1),
 		}, batch)
 }
 func TestBuffer_RejectRoomOverwriteOld(t *testing.T) {
 	b := setup(NewBuffer("test", 5))
 	b.Add(MetricTime(1))
 	b.Add(MetricTime(2))
 	b.Add(MetricTime(3))
 	batch := b.Batch(1)
 	b.Add(MetricTime(4))
 	b.Add(MetricTime(5))
 	b.Add(MetricTime(6))
 	b.Reject(batch)
 	require.Equal(t, int64(1), b.MetricsDropped.Get())
 	batch = b.Batch(5)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(6),
 			MetricTime(5),
 			MetricTime(4),
 			MetricTime(3),
 			MetricTime(2),
 		}, batch)
 }
 func TestBuffer_RejectPartialRoom(t *testing.T) {
 	b := setup(NewBuffer("test", 5))
 	b.Add(MetricTime(1))
 	b.Add(MetricTime(2))
 	b.Add(MetricTime(3))
 	batch := b.Batch(2)
 	b.Add(MetricTime(4))
 	b.Add(MetricTime(5))
 	b.Add(MetricTime(6))
 	b.Add(MetricTime(7))
 	b.Reject(batch)
 	require.Equal(t, int64(2), b.MetricsDropped.Get())
 	batch = b.Batch(5)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(7),
 			MetricTime(6),
 			MetricTime(5),
 			MetricTime(4),
 			MetricTime(3),
 		}, batch)
 }
 func TestBuffer_RejectWrapped(t *testing.T) {
 	b := setup(NewBuffer("test", 5))
 	b.Add(MetricTime(1))
 	b.Add(MetricTime(2))
 	b.Add(MetricTime(3))
 	batch := b.Batch(2)
 	b.Add(MetricTime(4))
 	b.Add(MetricTime(5))
 	// buffer: 1, 4, 5; batch: 2, 3
 	require.Equal(t, int64(0), b.MetricsDropped.Get())
 	b.Add(MetricTime(6))
 	b.Add(MetricTime(7))
 	b.Add(MetricTime(8))
 	b.Add(MetricTime(9))
 	b.Add(MetricTime(10))
 	// buffer: 8, 9, 10, 6, 7; batch: 2, 3
 	require.Equal(t, int64(3), b.MetricsDropped.Get())
 	b.Add(MetricTime(11))
 	b.Add(MetricTime(12))
 	b.Add(MetricTime(13))
 	b.Add(MetricTime(14))
 	b.Add(MetricTime(15))
 	// buffer: 13, 14, 15, 11, 12; batch: 2, 3
 	require.Equal(t, int64(8), b.MetricsDropped.Get())
 	b.Reject(batch)
 	require.Equal(t, int64(10), b.MetricsDropped.Get())
 	batch = b.Batch(5)
 	testutil.RequireMetricsEqual(t,
 		[]telegraf.Metric{
 			MetricTime(15),
 			MetricTime(14),
 			MetricTime(13),
 			MetricTime(12),
 			MetricTime(11),
 		}, batch)
 }
 func TestBuffer_AddDropsOverwrittenMetrics(t *testing.T) {
 	m := Metric()
 	b := setup(NewBuffer("test", 5))
@@ -210,8 +465,8 @@ func TestBuffer_MetricsOverwriteBatchAccept(t *testing.T) {
 	batch := b.Batch(3)
 	b.Add(m, m, m)
 	b.Accept(batch)
-	require.Equal(t, int64(0), b.MetricsDropped.Get())
+	require.Equal(t, int64(0), b.MetricsDropped.Get(), "dropped")
-	require.Equal(t, int64(3), b.MetricsWritten.Get())
+	require.Equal(t, int64(3), b.MetricsWritten.Get(), "written")
 }
 func TestBuffer_MetricsOverwriteBatchReject(t *testing.T) {
@@ -254,7 +509,7 @@ func TestBuffer_BatchNotRemoved(t *testing.T) {
 	b := setup(NewBuffer("test", 5))
 	b.Add(m, m, m, m, m)
 	b.Batch(2)
-	require.Equal(t, 5, b.Len())
+	require.Equal(t, 3, b.Len())
 }
 func TestBuffer_BatchRejectAcceptNoop(t *testing.T) {
@@ -310,10 +565,8 @@ func TestBuffer_AddCallsMetricRejectWhenNotInBatch(t *testing.T) {
 	b.Add(mm, mm, mm, mm, mm)
 	batch := b.Batch(2)
 	b.Add(mm, mm, mm, mm)
 	// metric[2] and metric[3] rejected
 	require.Equal(t, 2, reject)
 	b.Reject(batch)
 	// metric[1] and metric[2] now rejected
 	require.Equal(t, 4, reject)
 }
--- a/internal/models/running_output_test.go
+++ b/internal/models/running_output_test.go
@@ -7,7 +7,6 @@ import (
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/testutil"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -28,6 +27,14 @@ var next5 = []telegraf.Metric{
 	testutil.TestMetric(101, "metric10"),
 }
 func reverse(metrics []telegraf.Metric) []telegraf.Metric {
 	result := make([]telegraf.Metric, 0, len(metrics))
 	for i := len(metrics) - 1; i >= 0; i-- {
 		result = append(result, metrics[i])
 	}
 	return result
 }
 // Benchmark adding metrics.
 func BenchmarkRunningOutputAddWrite(b *testing.B) {
 	conf := &OutputConfig{
@@ -297,7 +304,7 @@ func TestRunningOutputWriteFailOrder(t *testing.T) {
 	// Verify that 10 metrics were written
 	assert.Len(t, m.Metrics(), 10)
 	// Verify that they are in order
-	expected := append(first5, next5...)
+	expected := append(reverse(next5), reverse(first5)...)
 	assert.Equal(t, expected, m.Metrics())
 }
@@ -355,24 +362,17 @@ func TestRunningOutputWriteFailOrder2(t *testing.T) {
 	err = ro.Write()
 	require.NoError(t, err)
-	// Verify that 10 metrics were written
+	// Verify that 20 metrics were written
 	assert.Len(t, m.Metrics(), 20)
 	// Verify that they are in order
-	expected := append(first5, next5...)
+	expected := append(reverse(next5), reverse(first5)...)
-	expected = append(expected, first5...)
+	expected = append(expected, reverse(next5)...)
-	expected = append(expected, next5...)
+	expected = append(expected, reverse(first5)...)
 	assert.Equal(t, expected, m.Metrics())
 }
 // Verify that the order of points is preserved when there is a remainder
 // of points for the batch.
 //
 // ie, with a batch size of 5:
 //
 //     1 2 3 4 5 6 <-- order, failed points
 //     6 1 2 3 4 5 <-- order, after 1st write failure (1 2 3 4 5 was batch)
 //     1 2 3 4 5 6 <-- order, after 2nd write failure, (6 was batch)
 //
 func TestRunningOutputWriteFailOrder3(t *testing.T) {
 	conf := &OutputConfig{
 		Filter: Filter{},
@@ -408,7 +408,7 @@ func TestRunningOutputWriteFailOrder3(t *testing.T) {
 	// Verify that 6 metrics were written
 	assert.Len(t, m.Metrics(), 6)
 	// Verify that they are in order
-	expected := append(first5, next5[0])
+	expected := []telegraf.Metric{next5[0], first5[4], first5[3], first5[2], first5[1], first5[0]}
 	assert.Equal(t, expected, m.Metrics())
 }
--- a/testutil/metric.go
+++ b/testutil/metric.go
@@ -19,6 +19,10 @@ type metricDiff struct {
 }
 func newMetricDiff(metric telegraf.Metric) *metricDiff {
 	if metric == nil {
 		return nil
 	}
 	m := &metricDiff{}
 	m.Measurement = metric.Name()