add option to randomize Kinesis partition key (#2705)

2017-04-26 13:54:24 -04:00
parent 9b874dff8d
commit c66e2896c6
4 changed files with 25 additions and 5 deletions
--- a/plugins/outputs/kinesis/README.md
+++ b/plugins/outputs/kinesis/README.md
@@ -50,6 +50,11 @@ This is used to group data within a stream. Currently this plugin only supports
 Manually configuring different hosts, or groups of hosts with manually selected partitionkeys might be a workable
 solution to scale out.

+### use_random_partitionkey
+
+When true a random UUID will be generated and used as the partitionkey when sending data to Kinesis. This allows data to evenly spread across multiple shards in the stream. Due to using a random paritionKey there can be no guarantee of ordering when consuming the data off the shards.
+If true then the partitionkey option will be ignored.
+
 ### format

 The format configuration value has been designated to allow people to change the format of the Point as written to
--- a/plugins/outputs/kinesis/kinesis.go
+++ b/plugins/outputs/kinesis/kinesis.go
@@ -7,6 +7,7 @@ import (

 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/service/kinesis"
+	"github.com/satori/go.uuid"

 	"github.com/influxdata/telegraf"
 	internalaws "github.com/influxdata/telegraf/internal/config/aws"
@@ -23,10 +24,11 @@ type KinesisOutput struct {
 	Filename  string `toml:"shared_credential_file"`
 	Token     string `toml:"token"`

-	StreamName   string `toml:"streamname"`
-	PartitionKey string `toml:"partitionkey"`
-	Debug        bool   `toml:"debug"`
-	svc          *kinesis.Kinesis
+	StreamName         string `toml:"streamname"`
+	PartitionKey       string `toml:"partitionkey"`
+	RandomPartitionKey bool   `toml:"use_random_partitionkey"`
+	Debug              bool   `toml:"debug"`
+	svc                *kinesis.Kinesis

 	serializer serializers.Serializer
 }
@@ -54,6 +56,11 @@ var sampleConfig = `
  streamname = "StreamName"
  ## PartitionKey as used for sharding data.
  partitionkey = "PartitionKey"
+  ## If set the paritionKey will be a random UUID on every put.
+  ## This allows for scaling across multiple shards in a stream.
+  ## This will cause issues with ordering.
+  use_random_partitionkey = false
+

  ## Data format to output.
  ## Each data format has it's own unique set of configuration options, read
@@ -173,9 +180,15 @@ func (k *KinesisOutput) Write(metrics []telegraf.Metric) error {
 			return err
 		}

+		partitionKey := k.PartitionKey
+		if k.RandomPartitionKey {
+			u := uuid.NewV4()
+			partitionKey = u.String()
+		}
+
 		d := kinesis.PutRecordsRequestEntry{
 			Data:         values,
-			PartitionKey: aws.String(k.PartitionKey),
+			PartitionKey: aws.String(partitionKey),
 		}

 		r = append(r, &d)