diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index 753523843..ded0170ec 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -670,6 +670,66 @@ The best way to get acquainted with grok patterns is to read the logstash docs, which are available here: https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html +The grok parser uses a slightly modified version of logstash "grok" +patterns, with the format: + +``` +%{[:][:]} +``` + +The `capture_syntax` defines the grok pattern that's used to parse the input +line and the `semantic_name` is used to name the field or tag. The extension +`modifier` controls the data type that the parsed item is converted to or +other special handling. + +By default all named captures are converted into string fields. +Timestamp modifiers can be used to convert captures to the timestamp of the +parsed metric. If no timestamp is parsed the metric will be created using the +current time. + +You must capture at least one field per line. + +- Available modifiers: + - string (default if nothing is specified) + - int + - float + - duration (ie, 5.23ms gets converted to int nanoseconds) + - tag (converts the field into a tag) + - drop (drops the field completely) + - measurement (use the matched text as the measurement name) +- Timestamp modifiers: + - ts (This will auto-learn the timestamp format) + - ts-ansic ("Mon Jan _2 15:04:05 2006") + - ts-unix ("Mon Jan _2 15:04:05 MST 2006") + - ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") + - ts-rfc822 ("02 Jan 06 15:04 MST") + - ts-rfc822z ("02 Jan 06 15:04 -0700") + - ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") + - ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") + - ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") + - ts-rfc3339 ("2006-01-02T15:04:05Z07:00") + - ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") + - ts-httpd ("02/Jan/2006:15:04:05 -0700") + - ts-epoch (seconds since unix epoch, may contain decimal) + - ts-epochnano (nanoseconds since unix epoch) + - ts-syslog ("Jan 02 15:04:05", parsed time is set to the current year) + - ts-"CUSTOM" + +CUSTOM time layouts must be within quotes and be the representation of the +"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006`. +To match a comma decimal point you can use a period. For example `%{TIMESTAMP:timestamp:ts-"2006-01-02 15:04:05.000"}` can be used to match `"2018-01-02 15:04:05,000"` +To match a comma decimal point you can use a period in the pattern string. +See https://golang.org/pkg/time/#Parse for more details. + +Telegraf has many of its own [built-in patterns](./grok/patterns/influx-patterns), +as well as support for most of +[logstash's builtin patterns](https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns). +_Golang regular expressions do not support lookahead or lookbehind. +logstash patterns that depend on these are not supported._ + +If you need help building patterns to match your logs, +you will find the https://grokdebug.herokuapp.com application quite useful! + #### Grok Configuration: ```toml [[inputs.file]] @@ -714,65 +774,6 @@ which are available here: grok_timezone = "Canada/Eastern" ``` -The grok parser uses a slightly modified version of logstash "grok" -patterns, with the format: - -``` -%{[:][:]} -``` - -The `capture_syntax` defines the grok pattern that's used to parse the input -line and the `semantic_name` is used to name the field or tag. The extension -`modifier` controls the data type that the parsed item is converted to or -other special handling. - -By default all named captures are converted into string fields. -Timestamp modifiers can be used to convert captures to the timestamp of the -parsed metric. If no timestamp is parsed the metric will be created using the -current time. - -You must capture at least one field per line. - -- Available modifiers: - - string (default if nothing is specified) - - int - - float - - duration (ie, 5.23ms gets converted to int nanoseconds) - - tag (converts the field into a tag) - - drop (drops the field completely) -- Timestamp modifiers: - - ts (This will auto-learn the timestamp format) - - ts-ansic ("Mon Jan _2 15:04:05 2006") - - ts-unix ("Mon Jan _2 15:04:05 MST 2006") - - ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") - - ts-rfc822 ("02 Jan 06 15:04 MST") - - ts-rfc822z ("02 Jan 06 15:04 -0700") - - ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") - - ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") - - ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") - - ts-rfc3339 ("2006-01-02T15:04:05Z07:00") - - ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") - - ts-httpd ("02/Jan/2006:15:04:05 -0700") - - ts-epoch (seconds since unix epoch, may contain decimal) - - ts-epochnano (nanoseconds since unix epoch) - - ts-syslog ("Jan 02 15:04:05", parsed time is set to the current year) - - ts-"CUSTOM" - -CUSTOM time layouts must be within quotes and be the representation of the -"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006`. -To match a comma decimal point you can use a period. For example `%{TIMESTAMP:timestamp:ts-"2006-01-02 15:04:05.000"}` can be used to match `"2018-01-02 15:04:05,000"` -To match a comma decimal point you can use a period in the pattern string. -See https://golang.org/pkg/time/#Parse for more details. - -Telegraf has many of its own [built-in patterns](./grok/patterns/influx-patterns), -as well as support for most of -[logstash's builtin patterns](https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns). -_Golang regular expressions do not support lookahead or lookbehind. -logstash patterns that depend on these are not supported._ - -If you need help building patterns to match your logs, -you will find the https://grokdebug.herokuapp.com application quite useful! - #### Timestamp Examples This example input and config parses a file using a custom timestamp conversion: diff --git a/plugins/inputs/file/README.md b/plugins/inputs/file/README.md index 73a3a2362..4358b67ad 100644 --- a/plugins/inputs/file/README.md +++ b/plugins/inputs/file/README.md @@ -14,7 +14,7 @@ use the [tail input plugin](/plugins/inputs/tail) instead. ## ** as a "super asterisk". ie: ## /var/log/**.log -> recursively find all .log files in /var/log ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log - ## /var/log/apache.log -> only tail the apache log file + ## /var/log/apache.log -> only read the apache log file files = ["/var/log/apache/access.log"] ## Data format to consume. diff --git a/plugins/inputs/file/dev/docker-compose.yml b/plugins/inputs/file/dev/docker-compose.yml index 3c16fca90..efce389f7 100644 --- a/plugins/inputs/file/dev/docker-compose.yml +++ b/plugins/inputs/file/dev/docker-compose.yml @@ -6,7 +6,7 @@ services: volumes: - ./telegraf.conf:/telegraf.conf - ../../../../telegraf:/telegraf - - ./json_a.log:/var/log/test.log + - ./dev/json_a.log:/var/log/test.log entrypoint: - /telegraf - --config diff --git a/plugins/inputs/file/dev/json_a.log b/plugins/inputs/file/dev/json_a.log deleted file mode 100644 index 0f52e9d1e..000000000 --- a/plugins/inputs/file/dev/json_a.log +++ /dev/null @@ -1,14 +0,0 @@ -{ -"parent": { - "child": 3.0, - "ignored_child": "hi" -}, -"ignored_null": null, -"integer": 4, -"list": [3, 4], -"ignored_parent": { - "another_ignored_null": null, - "ignored_string": "hello, world!" -}, -"another_list": [4] -} diff --git a/plugins/inputs/file/testfiles/grok_a.log b/plugins/inputs/file/dev/testfiles/grok_a.log similarity index 100% rename from plugins/inputs/file/testfiles/grok_a.log rename to plugins/inputs/file/dev/testfiles/grok_a.log diff --git a/plugins/inputs/file/testfiles/json_a.log b/plugins/inputs/file/dev/testfiles/json_a.log similarity index 100% rename from plugins/inputs/file/testfiles/json_a.log rename to plugins/inputs/file/dev/testfiles/json_a.log diff --git a/plugins/inputs/file/file.go b/plugins/inputs/file/file.go index 2779561fc..d6714301e 100644 --- a/plugins/inputs/file/file.go +++ b/plugins/inputs/file/file.go @@ -11,9 +11,8 @@ import ( ) type File struct { - Files []string `toml:"files"` - FromBeginning bool - parser parsers.Parser + Files []string `toml:"files"` + parser parsers.Parser filenames []string } @@ -24,7 +23,7 @@ const sampleConfig = ` ## ** as a "super asterisk". ie: ## /var/log/**.log -> recursively find all .log files in /var/log ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log - ## /var/log/apache.log -> only tail the apache log file + ## /var/log/apache.log -> only read the apache log file files = ["/var/log/apache/access.log"] ## The dataformat to be read from files @@ -40,7 +39,7 @@ func (f *File) SampleConfig() string { } func (f *File) Description() string { - return "reload and gather from file[s] on telegraf's interval" + return "Reload and gather from file[s] on telegraf's interval." } func (f *File) Gather(acc telegraf.Accumulator) error { diff --git a/plugins/inputs/file/file_test.go b/plugins/inputs/file/file_test.go index 281056646..43322c2e8 100644 --- a/plugins/inputs/file/file_test.go +++ b/plugins/inputs/file/file_test.go @@ -14,26 +14,26 @@ import ( func TestRefreshFilePaths(t *testing.T) { wd, err := os.Getwd() r := File{ - Files: []string{filepath.Join(wd, "testfiles/**.log")}, + Files: []string{filepath.Join(wd, "dev/testfiles/**.log")}, } err = r.refreshFilePaths() require.NoError(t, err) - assert.Equal(t, len(r.filenames), 2) + assert.Equal(t, 2, len(r.filenames)) } func TestJSONParserCompile(t *testing.T) { var acc testutil.Accumulator wd, _ := os.Getwd() r := File{ - Files: []string{filepath.Join(wd, "testfiles/json_a.log")}, + Files: []string{filepath.Join(wd, "dev/testfiles/json_a.log")}, } parserConfig := parsers.Config{ DataFormat: "json", TagKeys: []string{"parent_ignored_child"}, } nParser, err := parsers.NewParser(&parserConfig) - r.parser = nParser assert.NoError(t, err) + r.parser = nParser r.Gather(&acc) assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags) @@ -44,7 +44,7 @@ func TestGrokParser(t *testing.T) { wd, _ := os.Getwd() var acc testutil.Accumulator r := File{ - Files: []string{filepath.Join(wd, "testfiles/grok_a.log")}, + Files: []string{filepath.Join(wd, "dev/testfiles/grok_a.log")}, } parserConfig := parsers.Config{ @@ -57,5 +57,5 @@ func TestGrokParser(t *testing.T) { assert.NoError(t, err) err = r.Gather(&acc) - assert.Equal(t, 2, len(acc.Metrics)) + assert.Equal(t, len(acc.Metrics), 2) } diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index 096cb8ed8..bc65588eb 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -38,6 +38,7 @@ var timeLayouts = map[string]string{ } const ( + MEASUREMENT = "measurement" INT = "int" TAG = "tag" FLOAT = "float" @@ -217,7 +218,6 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { if k == "" || v == "" { continue } - // t is the modifier of the field var t string // check if pattern has some modifiers @@ -239,6 +239,8 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { } switch t { + case MEASUREMENT: + p.Measurement = v case INT: iv, err := strconv.ParseInt(v, 10, 64) if err != nil { @@ -350,7 +352,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { } if len(fields) == 0 { - return nil, fmt.Errorf("logparser_grok: must have one or more fields") + return nil, fmt.Errorf("grok: must have one or more fields") } return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp)) diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 09f8fa16d..8133d3021 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -1,6 +1,7 @@ package grok import ( + "log" "testing" "time" @@ -959,3 +960,52 @@ func TestReplaceTimestampComma(t *testing.T) { //Convert Nanosecond to milisecond for compare require.Equal(t, 555, m.Time().Nanosecond()/1000000) } + +func TestDynamicMeasurementModifier(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST}"}, + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:test:measurement}", + } + + require.NoError(t, p.Compile()) + m, err := p.ParseLine("4 5 hello") + require.NoError(t, err) + require.Equal(t, m.Name(), "hello") +} + +func TestStaticMeasurementModifier(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{WORD:hi:measurement} %{NUMBER:num:string}"}, + } + + require.NoError(t, p.Compile()) + m, err := p.ParseLine("test_name 42") + log.Printf("%v", m) + require.NoError(t, err) + require.Equal(t, "test_name", m.Name()) +} + +// tests that the top level measurement name is used +func TestTwoMeasurementModifier(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST:test_name:measurement}"}, + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:measurement} %{WORD:var3:measurement}", + } + + require.NoError(t, p.Compile()) + m, err := p.ParseLine("4 5 hello") + require.NoError(t, err) + require.Equal(t, m.Name(), "4 5 hello") +} + +func TestMeasurementModifierNoName(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST}"}, + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:hi:measurement}", + } + + require.NoError(t, p.Compile()) + m, err := p.ParseLine("4 5 hello") + require.NoError(t, err) + require.Equal(t, m.Name(), "hello") +}