Add ability to set measurement from matched text in grok parser (#4433)

This commit is contained in:
maxunt 2018-08-17 13:45:22 -07:00 committed by Daniel Nelson
parent 34614582a7
commit 9e0eb0c0e0
10 changed files with 126 additions and 88 deletions

View File

@ -670,6 +670,66 @@ The best way to get acquainted with grok patterns is to read the logstash docs,
which are available here:
https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html
The grok parser uses a slightly modified version of logstash "grok"
patterns, with the format:
```
%{<capture_syntax>[:<semantic_name>][:<modifier>]}
```
The `capture_syntax` defines the grok pattern that's used to parse the input
line and the `semantic_name` is used to name the field or tag. The extension
`modifier` controls the data type that the parsed item is converted to or
other special handling.
By default all named captures are converted into string fields.
Timestamp modifiers can be used to convert captures to the timestamp of the
parsed metric. If no timestamp is parsed the metric will be created using the
current time.
You must capture at least one field per line.
- Available modifiers:
- string (default if nothing is specified)
- int
- float
- duration (ie, 5.23ms gets converted to int nanoseconds)
- tag (converts the field into a tag)
- drop (drops the field completely)
- measurement (use the matched text as the measurement name)
- Timestamp modifiers:
- ts (This will auto-learn the timestamp format)
- ts-ansic ("Mon Jan _2 15:04:05 2006")
- ts-unix ("Mon Jan _2 15:04:05 MST 2006")
- ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
- ts-rfc822 ("02 Jan 06 15:04 MST")
- ts-rfc822z ("02 Jan 06 15:04 -0700")
- ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST")
- ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST")
- ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700")
- ts-rfc3339 ("2006-01-02T15:04:05Z07:00")
- ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00")
- ts-httpd ("02/Jan/2006:15:04:05 -0700")
- ts-epoch (seconds since unix epoch, may contain decimal)
- ts-epochnano (nanoseconds since unix epoch)
- ts-syslog ("Jan 02 15:04:05", parsed time is set to the current year)
- ts-"CUSTOM"
CUSTOM time layouts must be within quotes and be the representation of the
"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006`.
To match a comma decimal point you can use a period. For example `%{TIMESTAMP:timestamp:ts-"2006-01-02 15:04:05.000"}` can be used to match `"2018-01-02 15:04:05,000"`
To match a comma decimal point you can use a period in the pattern string.
See https://golang.org/pkg/time/#Parse for more details.
Telegraf has many of its own [built-in patterns](./grok/patterns/influx-patterns),
as well as support for most of
[logstash's builtin patterns](https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns).
_Golang regular expressions do not support lookahead or lookbehind.
logstash patterns that depend on these are not supported._
If you need help building patterns to match your logs,
you will find the https://grokdebug.herokuapp.com application quite useful!
#### Grok Configuration:
```toml
[[inputs.file]]
@ -714,65 +774,6 @@ which are available here:
grok_timezone = "Canada/Eastern"
```
The grok parser uses a slightly modified version of logstash "grok"
patterns, with the format:
```
%{<capture_syntax>[:<semantic_name>][:<modifier>]}
```
The `capture_syntax` defines the grok pattern that's used to parse the input
line and the `semantic_name` is used to name the field or tag. The extension
`modifier` controls the data type that the parsed item is converted to or
other special handling.
By default all named captures are converted into string fields.
Timestamp modifiers can be used to convert captures to the timestamp of the
parsed metric. If no timestamp is parsed the metric will be created using the
current time.
You must capture at least one field per line.
- Available modifiers:
- string (default if nothing is specified)
- int
- float
- duration (ie, 5.23ms gets converted to int nanoseconds)
- tag (converts the field into a tag)
- drop (drops the field completely)
- Timestamp modifiers:
- ts (This will auto-learn the timestamp format)
- ts-ansic ("Mon Jan _2 15:04:05 2006")
- ts-unix ("Mon Jan _2 15:04:05 MST 2006")
- ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
- ts-rfc822 ("02 Jan 06 15:04 MST")
- ts-rfc822z ("02 Jan 06 15:04 -0700")
- ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST")
- ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST")
- ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700")
- ts-rfc3339 ("2006-01-02T15:04:05Z07:00")
- ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00")
- ts-httpd ("02/Jan/2006:15:04:05 -0700")
- ts-epoch (seconds since unix epoch, may contain decimal)
- ts-epochnano (nanoseconds since unix epoch)
- ts-syslog ("Jan 02 15:04:05", parsed time is set to the current year)
- ts-"CUSTOM"
CUSTOM time layouts must be within quotes and be the representation of the
"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006`.
To match a comma decimal point you can use a period. For example `%{TIMESTAMP:timestamp:ts-"2006-01-02 15:04:05.000"}` can be used to match `"2018-01-02 15:04:05,000"`
To match a comma decimal point you can use a period in the pattern string.
See https://golang.org/pkg/time/#Parse for more details.
Telegraf has many of its own [built-in patterns](./grok/patterns/influx-patterns),
as well as support for most of
[logstash's builtin patterns](https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns).
_Golang regular expressions do not support lookahead or lookbehind.
logstash patterns that depend on these are not supported._
If you need help building patterns to match your logs,
you will find the https://grokdebug.herokuapp.com application quite useful!
#### Timestamp Examples
This example input and config parses a file using a custom timestamp conversion:

View File

@ -14,7 +14,7 @@ use the [tail input plugin](/plugins/inputs/tail) instead.
## ** as a "super asterisk". ie:
## /var/log/**.log -> recursively find all .log files in /var/log
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
## /var/log/apache.log -> only tail the apache log file
## /var/log/apache.log -> only read the apache log file
files = ["/var/log/apache/access.log"]
## Data format to consume.

View File

@ -6,7 +6,7 @@ services:
volumes:
- ./telegraf.conf:/telegraf.conf
- ../../../../telegraf:/telegraf
- ./json_a.log:/var/log/test.log
- ./dev/json_a.log:/var/log/test.log
entrypoint:
- /telegraf
- --config

View File

@ -1,14 +0,0 @@
{
"parent": {
"child": 3.0,
"ignored_child": "hi"
},
"ignored_null": null,
"integer": 4,
"list": [3, 4],
"ignored_parent": {
"another_ignored_null": null,
"ignored_string": "hello, world!"
},
"another_list": [4]
}

View File

@ -11,9 +11,8 @@ import (
)
type File struct {
Files []string `toml:"files"`
FromBeginning bool
parser parsers.Parser
Files []string `toml:"files"`
parser parsers.Parser
filenames []string
}
@ -24,7 +23,7 @@ const sampleConfig = `
## ** as a "super asterisk". ie:
## /var/log/**.log -> recursively find all .log files in /var/log
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
## /var/log/apache.log -> only tail the apache log file
## /var/log/apache.log -> only read the apache log file
files = ["/var/log/apache/access.log"]
## The dataformat to be read from files
@ -40,7 +39,7 @@ func (f *File) SampleConfig() string {
}
func (f *File) Description() string {
return "reload and gather from file[s] on telegraf's interval"
return "Reload and gather from file[s] on telegraf's interval."
}
func (f *File) Gather(acc telegraf.Accumulator) error {

View File

@ -14,26 +14,26 @@ import (
func TestRefreshFilePaths(t *testing.T) {
wd, err := os.Getwd()
r := File{
Files: []string{filepath.Join(wd, "testfiles/**.log")},
Files: []string{filepath.Join(wd, "dev/testfiles/**.log")},
}
err = r.refreshFilePaths()
require.NoError(t, err)
assert.Equal(t, len(r.filenames), 2)
assert.Equal(t, 2, len(r.filenames))
}
func TestJSONParserCompile(t *testing.T) {
var acc testutil.Accumulator
wd, _ := os.Getwd()
r := File{
Files: []string{filepath.Join(wd, "testfiles/json_a.log")},
Files: []string{filepath.Join(wd, "dev/testfiles/json_a.log")},
}
parserConfig := parsers.Config{
DataFormat: "json",
TagKeys: []string{"parent_ignored_child"},
}
nParser, err := parsers.NewParser(&parserConfig)
r.parser = nParser
assert.NoError(t, err)
r.parser = nParser
r.Gather(&acc)
assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags)
@ -44,7 +44,7 @@ func TestGrokParser(t *testing.T) {
wd, _ := os.Getwd()
var acc testutil.Accumulator
r := File{
Files: []string{filepath.Join(wd, "testfiles/grok_a.log")},
Files: []string{filepath.Join(wd, "dev/testfiles/grok_a.log")},
}
parserConfig := parsers.Config{
@ -57,5 +57,5 @@ func TestGrokParser(t *testing.T) {
assert.NoError(t, err)
err = r.Gather(&acc)
assert.Equal(t, 2, len(acc.Metrics))
assert.Equal(t, len(acc.Metrics), 2)
}

View File

@ -38,6 +38,7 @@ var timeLayouts = map[string]string{
}
const (
MEASUREMENT = "measurement"
INT = "int"
TAG = "tag"
FLOAT = "float"
@ -217,7 +218,6 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
if k == "" || v == "" {
continue
}
// t is the modifier of the field
var t string
// check if pattern has some modifiers
@ -239,6 +239,8 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
}
switch t {
case MEASUREMENT:
p.Measurement = v
case INT:
iv, err := strconv.ParseInt(v, 10, 64)
if err != nil {
@ -350,7 +352,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
}
if len(fields) == 0 {
return nil, fmt.Errorf("logparser_grok: must have one or more fields")
return nil, fmt.Errorf("grok: must have one or more fields")
}
return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp))

View File

@ -1,6 +1,7 @@
package grok
import (
"log"
"testing"
"time"
@ -959,3 +960,52 @@ func TestReplaceTimestampComma(t *testing.T) {
//Convert Nanosecond to milisecond for compare
require.Equal(t, 555, m.Time().Nanosecond()/1000000)
}
func TestDynamicMeasurementModifier(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST}"},
CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:test:measurement}",
}
require.NoError(t, p.Compile())
m, err := p.ParseLine("4 5 hello")
require.NoError(t, err)
require.Equal(t, m.Name(), "hello")
}
func TestStaticMeasurementModifier(t *testing.T) {
p := &Parser{
Patterns: []string{"%{WORD:hi:measurement} %{NUMBER:num:string}"},
}
require.NoError(t, p.Compile())
m, err := p.ParseLine("test_name 42")
log.Printf("%v", m)
require.NoError(t, err)
require.Equal(t, "test_name", m.Name())
}
// tests that the top level measurement name is used
func TestTwoMeasurementModifier(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST:test_name:measurement}"},
CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:measurement} %{WORD:var3:measurement}",
}
require.NoError(t, p.Compile())
m, err := p.ParseLine("4 5 hello")
require.NoError(t, err)
require.Equal(t, m.Name(), "4 5 hello")
}
func TestMeasurementModifierNoName(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST}"},
CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:hi:measurement}",
}
require.NoError(t, p.Compile())
m, err := p.ParseLine("4 5 hello")
require.NoError(t, err)
require.Equal(t, m.Name(), "hello")
}