From 05d691aa81ef82307f9c765c0c39286ae35d87c6 Mon Sep 17 00:00:00 2001 From: atzoum Date: Tue, 9 Jan 2018 01:11:36 +0200 Subject: [PATCH] Add support for dropwizard input format (#2846) --- Godeps | 2 + README.md | 1 + docs/DATA_FORMATS_INPUT.md | 174 ++++++++ internal/config/config.go | 46 ++ internal/templating/engine.go | 86 ++++ internal/templating/matcher.go | 58 +++ internal/templating/node.go | 122 ++++++ internal/templating/template.go | 148 +++++++ plugins/parsers/dropwizard/parser.go | 253 +++++++++++ plugins/parsers/dropwizard/parser_test.go | 485 ++++++++++++++++++++++ plugins/parsers/graphite/parser.go | 342 +-------------- plugins/parsers/graphite/parser_test.go | 5 +- plugins/parsers/registry.go | 47 +++ 13 files changed, 1436 insertions(+), 333 deletions(-) create mode 100644 internal/templating/engine.go create mode 100644 internal/templating/matcher.go create mode 100644 internal/templating/node.go create mode 100644 internal/templating/template.go create mode 100644 plugins/parsers/dropwizard/parser.go create mode 100644 plugins/parsers/dropwizard/parser_test.go diff --git a/Godeps b/Godeps index f69949c29..784c6044c 100644 --- a/Godeps +++ b/Godeps @@ -72,6 +72,8 @@ github.com/StackExchange/wmi f3e2bae1e0cb5aef83e319133eabfee30013a4a5 github.com/streadway/amqp 63795daa9a446c920826655f26ba31c81c860fd6 github.com/stretchr/objx 1a9d0bb9f541897e62256577b352fdbc1fb4fd94 github.com/stretchr/testify 4d4bfba8f1d1027c4fdbe371823030df51419987 +github.com/tidwall/gjson 0623bd8fbdbf97cc62b98d15108832851a658e59 +github.com/tidwall/match 173748da739a410c5b0b813b956f89ff94730b4c github.com/vjeantet/grok d73e972b60935c7fec0b4ffbc904ed39ecaf7efe github.com/wvanbergen/kafka bc265fedb9ff5b5c5d3c0fdcef4a819b3523d3ee github.com/wvanbergen/kazoo-go 968957352185472eacb69215fa3dbfcfdbac1096 diff --git a/README.md b/README.md index c2a5fb064..917975c78 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,7 @@ formats may be used with input plugins supporting the `data_format` option: * [Value](./docs/DATA_FORMATS_INPUT.md#value) * [Nagios](./docs/DATA_FORMATS_INPUT.md#nagios) * [Collectd](./docs/DATA_FORMATS_INPUT.md#collectd) +* [Dropwizard](./docs/DATA_FORMATS_INPUT.md#dropwizard) ## Processor Plugins diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index 1b6c0b683..64097e7d6 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -8,6 +8,7 @@ Telegraf is able to parse the following input data formats into metrics: 1. [Value](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#value), ie: 45 or "booyah" 1. [Nagios](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#nagios) (exec input only) 1. [Collectd](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#collectd) +1. [Dropwizard](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#dropwizard) Telegraf metrics, like InfluxDB [points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/), @@ -479,3 +480,176 @@ You can also change the path to the typesdb or add additional typesdb using ## Path of to TypesDB specifications collectd_typesdb = ["/usr/share/collectd/types.db"] ``` + +# Dropwizard: + +The dropwizard format can parse the JSON representation of a single dropwizard metric registry. By default, tags are parsed from metric names as if they were actual influxdb line protocol keys (`measurement<,tag_set>`) which can be overriden by defining custom [measurement & tag templates](./DATA_FORMATS_INPUT.md#measurement--tag-templates). All field values are collected as float64 fields. + +A typical JSON of a dropwizard metric registry: + +```json +{ + "version": "3.0.0", + "counters" : { + "measurement,tag1=green" : { + "count" : 1 + } + }, + "meters" : { + "measurement" : { + "count" : 1, + "m15_rate" : 1.0, + "m1_rate" : 1.0, + "m5_rate" : 1.0, + "mean_rate" : 1.0, + "units" : "events/second" + } + }, + "gauges" : { + "measurement" : { + "value" : 1 + } + }, + "histograms" : { + "measurement" : { + "count" : 1, + "max" : 1.0, + "mean" : 1.0, + "min" : 1.0, + "p50" : 1.0, + "p75" : 1.0, + "p95" : 1.0, + "p98" : 1.0, + "p99" : 1.0, + "p999" : 1.0, + "stddev" : 1.0 + } + }, + "timers" : { + "measurement" : { + "count" : 1, + "max" : 1.0, + "mean" : 1.0, + "min" : 1.0, + "p50" : 1.0, + "p75" : 1.0, + "p95" : 1.0, + "p98" : 1.0, + "p99" : 1.0, + "p999" : 1.0, + "stddev" : 1.0, + "m15_rate" : 1.0, + "m1_rate" : 1.0, + "m5_rate" : 1.0, + "mean_rate" : 1.0, + "duration_units" : "seconds", + "rate_units" : "calls/second" + } + } +} +``` + +Would get translated into 4 different measurements: + +``` +measurement,metric_type=counter,tag1=green count=1 +measurement,metric_type=meter count=1,m15_rate=1.0,m1_rate=1.0,m5_rate=1.0,mean_rate=1.0 +measurement,metric_type=gauge value=1 +measurement,metric_type=histogram count=1,max=1.0,mean=1.0,min=1.0,p50=1.0,p75=1.0,p95=1.0,p98=1.0,p99=1.0,p999=1.0 +measurement,metric_type=timer count=1,max=1.0,mean=1.0,min=1.0,p50=1.0,p75=1.0,p95=1.0,p98=1.0,p99=1.0,p999=1.0,stddev=1.0,m15_rate=1.0,m1_rate=1.0,m5_rate=1.0,mean_rate=1.0 +``` + +You may also parse a dropwizard registry from any JSON document which contains a dropwizard registry in some inner field. +Eg. to parse the following JSON document: + +```json +{ + "time" : "2017-02-22T14:33:03.662+02:00", + "tags" : { + "tag1" : "green", + "tag2" : "yellow" + }, + "metrics" : { + "counters" : { + "measurement" : { + "count" : 1 + } + }, + "meters" : {}, + "gauges" : {}, + "histograms" : {}, + "timers" : {} + } +} +``` +and translate it into: + +``` +measurement,metric_type=counter,tag1=green,tag2=yellow count=1 1487766783662000000 +``` + +you simply need to use the following additional configuration properties: + +```toml +dropwizard_metric_registry_path = "metrics" +dropwizard_time_path = "time" +dropwizard_time_format = "2006-01-02T15:04:05Z07:00" +dropwizard_tags_path = "tags" +## tag paths per tag are supported too, eg. +#[inputs.yourinput.dropwizard_tag_paths] +# tag1 = "tags.tag1" +# tag2 = "tags.tag2" +``` + + +For more information about the dropwizard json format see +[here](http://metrics.dropwizard.io/3.1.0/manual/json/). + +#### Dropwizard Configuration: + +```toml +[[inputs.exec]] + ## Commands array + commands = ["curl http://localhost:8080/sys/metrics"] + timeout = "5s" + + ## Data format to consume. + ## Each data format has its own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "dropwizard" + + ## Used by the templating engine to join matched values when cardinality is > 1 + separator = "_" + + ## Each template line requires a template pattern. It can have an optional + ## filter before the template and separated by spaces. It can also have optional extra + ## tags following the template. Multiple tags should be separated by commas and no spaces + ## similar to the line protocol format. There can be only one default template. + ## Templates support below format: + ## 1. filter + template + ## 2. filter + template + extra tag(s) + ## 3. filter + template with field key + ## 4. default template + ## By providing an empty template array, templating is disabled and measurements are parsed as influxdb line protocol keys (measurement<,tag_set>) + templates = [] + + ## You may use an appropriate [gjson path](https://github.com/tidwall/gjson#path-syntax) + ## to locate the metric registry within the JSON document + # dropwizard_metric_registry_path = "metrics" + + ## You may use an appropriate [gjson path](https://github.com/tidwall/gjson#path-syntax) + ## to locate the default time of the measurements within the JSON document + # dropwizard_time_path = "time" + # dropwizard_time_format = "2006-01-02T15:04:05Z07:00" + + ## You may use an appropriate [gjson path](https://github.com/tidwall/gjson#path-syntax) + ## to locate the tags map within the JSON document + # dropwizard_tags_path = "tags" + + ## You may even use tag paths per tag + # [inputs.exec.dropwizard_tag_paths] + # tag1 = "tags.tag1" + # tag2 = "tags.tag2" + +``` \ No newline at end of file diff --git a/internal/config/config.go b/internal/config/config.go index 2aaa2da19..8488df28a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1272,6 +1272,47 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { } } + if node, ok := tbl.Fields["dropwizard_metric_registry_path"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.String); ok { + c.DropwizardMetricRegistryPath = str.Value + } + } + } + if node, ok := tbl.Fields["dropwizard_time_path"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.String); ok { + c.DropwizardTimePath = str.Value + } + } + } + if node, ok := tbl.Fields["dropwizard_time_format"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.String); ok { + c.DropwizardTimeFormat = str.Value + } + } + } + if node, ok := tbl.Fields["dropwizard_tags_path"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.String); ok { + c.DropwizardTagsPath = str.Value + } + } + } + c.DropwizardTagPathsMap = make(map[string]string) + if node, ok := tbl.Fields["dropwizard_tag_paths"]; ok { + if subtbl, ok := node.(*ast.Table); ok { + for name, val := range subtbl.Fields { + if kv, ok := val.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.String); ok { + c.DropwizardTagPathsMap[name] = str.Value + } + } + } + } + } + c.MetricName = name delete(tbl.Fields, "data_format") @@ -1282,6 +1323,11 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { delete(tbl.Fields, "collectd_auth_file") delete(tbl.Fields, "collectd_security_level") delete(tbl.Fields, "collectd_typesdb") + delete(tbl.Fields, "dropwizard_metric_registry_path") + delete(tbl.Fields, "dropwizard_time_path") + delete(tbl.Fields, "dropwizard_time_format") + delete(tbl.Fields, "dropwizard_tags_path") + delete(tbl.Fields, "dropwizard_tag_paths") return parsers.NewParser(c) } diff --git a/internal/templating/engine.go b/internal/templating/engine.go new file mode 100644 index 000000000..65d15a426 --- /dev/null +++ b/internal/templating/engine.go @@ -0,0 +1,86 @@ +package templating + +import ( + "sort" + "strings" +) + +const ( + // DefaultSeparator is the default separation character to use when separating template parts. + DefaultSeparator = "." +) + +// Engine uses a Matcher to retrieve the appropriate template and applies the template +// to the input string +type Engine struct { + joiner string + matcher *matcher +} + +// Apply extracts the template fields from the given line and returns the measurement +// name, tags and field name +func (e *Engine) Apply(line string) (string, map[string]string, string, error) { + return e.matcher.match(line).Apply(line, e.joiner) +} + +// NewEngine creates a new templating engine +func NewEngine(joiner string, defaultTemplate *Template, templates []string) (*Engine, error) { + engine := Engine{ + joiner: joiner, + matcher: newMatcher(defaultTemplate), + } + templateSpecs := parseTemplateSpecs(templates) + + for _, templateSpec := range templateSpecs { + if err := engine.matcher.addSpec(templateSpec); err != nil { + return nil, err + } + } + + return &engine, nil +} + +func parseTemplateSpecs(templates []string) templateSpecs { + tmplts := templateSpecs{} + for _, pattern := range templates { + tmplt := templateSpec{ + separator: DefaultSeparator, + } + + // Format is [separator] [filter]