Add ability to set measurement from matched text in grok parser (#4433)
This commit is contained in:
parent
34614582a7
commit
9e0eb0c0e0
|
@ -670,6 +670,66 @@ The best way to get acquainted with grok patterns is to read the logstash docs,
|
||||||
which are available here:
|
which are available here:
|
||||||
https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html
|
https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html
|
||||||
|
|
||||||
|
The grok parser uses a slightly modified version of logstash "grok"
|
||||||
|
patterns, with the format:
|
||||||
|
|
||||||
|
```
|
||||||
|
%{<capture_syntax>[:<semantic_name>][:<modifier>]}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `capture_syntax` defines the grok pattern that's used to parse the input
|
||||||
|
line and the `semantic_name` is used to name the field or tag. The extension
|
||||||
|
`modifier` controls the data type that the parsed item is converted to or
|
||||||
|
other special handling.
|
||||||
|
|
||||||
|
By default all named captures are converted into string fields.
|
||||||
|
Timestamp modifiers can be used to convert captures to the timestamp of the
|
||||||
|
parsed metric. If no timestamp is parsed the metric will be created using the
|
||||||
|
current time.
|
||||||
|
|
||||||
|
You must capture at least one field per line.
|
||||||
|
|
||||||
|
- Available modifiers:
|
||||||
|
- string (default if nothing is specified)
|
||||||
|
- int
|
||||||
|
- float
|
||||||
|
- duration (ie, 5.23ms gets converted to int nanoseconds)
|
||||||
|
- tag (converts the field into a tag)
|
||||||
|
- drop (drops the field completely)
|
||||||
|
- measurement (use the matched text as the measurement name)
|
||||||
|
- Timestamp modifiers:
|
||||||
|
- ts (This will auto-learn the timestamp format)
|
||||||
|
- ts-ansic ("Mon Jan _2 15:04:05 2006")
|
||||||
|
- ts-unix ("Mon Jan _2 15:04:05 MST 2006")
|
||||||
|
- ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
|
||||||
|
- ts-rfc822 ("02 Jan 06 15:04 MST")
|
||||||
|
- ts-rfc822z ("02 Jan 06 15:04 -0700")
|
||||||
|
- ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST")
|
||||||
|
- ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST")
|
||||||
|
- ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700")
|
||||||
|
- ts-rfc3339 ("2006-01-02T15:04:05Z07:00")
|
||||||
|
- ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00")
|
||||||
|
- ts-httpd ("02/Jan/2006:15:04:05 -0700")
|
||||||
|
- ts-epoch (seconds since unix epoch, may contain decimal)
|
||||||
|
- ts-epochnano (nanoseconds since unix epoch)
|
||||||
|
- ts-syslog ("Jan 02 15:04:05", parsed time is set to the current year)
|
||||||
|
- ts-"CUSTOM"
|
||||||
|
|
||||||
|
CUSTOM time layouts must be within quotes and be the representation of the
|
||||||
|
"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006`.
|
||||||
|
To match a comma decimal point you can use a period. For example `%{TIMESTAMP:timestamp:ts-"2006-01-02 15:04:05.000"}` can be used to match `"2018-01-02 15:04:05,000"`
|
||||||
|
To match a comma decimal point you can use a period in the pattern string.
|
||||||
|
See https://golang.org/pkg/time/#Parse for more details.
|
||||||
|
|
||||||
|
Telegraf has many of its own [built-in patterns](./grok/patterns/influx-patterns),
|
||||||
|
as well as support for most of
|
||||||
|
[logstash's builtin patterns](https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns).
|
||||||
|
_Golang regular expressions do not support lookahead or lookbehind.
|
||||||
|
logstash patterns that depend on these are not supported._
|
||||||
|
|
||||||
|
If you need help building patterns to match your logs,
|
||||||
|
you will find the https://grokdebug.herokuapp.com application quite useful!
|
||||||
|
|
||||||
#### Grok Configuration:
|
#### Grok Configuration:
|
||||||
```toml
|
```toml
|
||||||
[[inputs.file]]
|
[[inputs.file]]
|
||||||
|
@ -714,65 +774,6 @@ which are available here:
|
||||||
grok_timezone = "Canada/Eastern"
|
grok_timezone = "Canada/Eastern"
|
||||||
```
|
```
|
||||||
|
|
||||||
The grok parser uses a slightly modified version of logstash "grok"
|
|
||||||
patterns, with the format:
|
|
||||||
|
|
||||||
```
|
|
||||||
%{<capture_syntax>[:<semantic_name>][:<modifier>]}
|
|
||||||
```
|
|
||||||
|
|
||||||
The `capture_syntax` defines the grok pattern that's used to parse the input
|
|
||||||
line and the `semantic_name` is used to name the field or tag. The extension
|
|
||||||
`modifier` controls the data type that the parsed item is converted to or
|
|
||||||
other special handling.
|
|
||||||
|
|
||||||
By default all named captures are converted into string fields.
|
|
||||||
Timestamp modifiers can be used to convert captures to the timestamp of the
|
|
||||||
parsed metric. If no timestamp is parsed the metric will be created using the
|
|
||||||
current time.
|
|
||||||
|
|
||||||
You must capture at least one field per line.
|
|
||||||
|
|
||||||
- Available modifiers:
|
|
||||||
- string (default if nothing is specified)
|
|
||||||
- int
|
|
||||||
- float
|
|
||||||
- duration (ie, 5.23ms gets converted to int nanoseconds)
|
|
||||||
- tag (converts the field into a tag)
|
|
||||||
- drop (drops the field completely)
|
|
||||||
- Timestamp modifiers:
|
|
||||||
- ts (This will auto-learn the timestamp format)
|
|
||||||
- ts-ansic ("Mon Jan _2 15:04:05 2006")
|
|
||||||
- ts-unix ("Mon Jan _2 15:04:05 MST 2006")
|
|
||||||
- ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
|
|
||||||
- ts-rfc822 ("02 Jan 06 15:04 MST")
|
|
||||||
- ts-rfc822z ("02 Jan 06 15:04 -0700")
|
|
||||||
- ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST")
|
|
||||||
- ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST")
|
|
||||||
- ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700")
|
|
||||||
- ts-rfc3339 ("2006-01-02T15:04:05Z07:00")
|
|
||||||
- ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00")
|
|
||||||
- ts-httpd ("02/Jan/2006:15:04:05 -0700")
|
|
||||||
- ts-epoch (seconds since unix epoch, may contain decimal)
|
|
||||||
- ts-epochnano (nanoseconds since unix epoch)
|
|
||||||
- ts-syslog ("Jan 02 15:04:05", parsed time is set to the current year)
|
|
||||||
- ts-"CUSTOM"
|
|
||||||
|
|
||||||
CUSTOM time layouts must be within quotes and be the representation of the
|
|
||||||
"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006`.
|
|
||||||
To match a comma decimal point you can use a period. For example `%{TIMESTAMP:timestamp:ts-"2006-01-02 15:04:05.000"}` can be used to match `"2018-01-02 15:04:05,000"`
|
|
||||||
To match a comma decimal point you can use a period in the pattern string.
|
|
||||||
See https://golang.org/pkg/time/#Parse for more details.
|
|
||||||
|
|
||||||
Telegraf has many of its own [built-in patterns](./grok/patterns/influx-patterns),
|
|
||||||
as well as support for most of
|
|
||||||
[logstash's builtin patterns](https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns).
|
|
||||||
_Golang regular expressions do not support lookahead or lookbehind.
|
|
||||||
logstash patterns that depend on these are not supported._
|
|
||||||
|
|
||||||
If you need help building patterns to match your logs,
|
|
||||||
you will find the https://grokdebug.herokuapp.com application quite useful!
|
|
||||||
|
|
||||||
#### Timestamp Examples
|
#### Timestamp Examples
|
||||||
|
|
||||||
This example input and config parses a file using a custom timestamp conversion:
|
This example input and config parses a file using a custom timestamp conversion:
|
||||||
|
|
|
@ -14,7 +14,7 @@ use the [tail input plugin](/plugins/inputs/tail) instead.
|
||||||
## ** as a "super asterisk". ie:
|
## ** as a "super asterisk". ie:
|
||||||
## /var/log/**.log -> recursively find all .log files in /var/log
|
## /var/log/**.log -> recursively find all .log files in /var/log
|
||||||
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
|
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
|
||||||
## /var/log/apache.log -> only tail the apache log file
|
## /var/log/apache.log -> only read the apache log file
|
||||||
files = ["/var/log/apache/access.log"]
|
files = ["/var/log/apache/access.log"]
|
||||||
|
|
||||||
## Data format to consume.
|
## Data format to consume.
|
||||||
|
|
|
@ -6,7 +6,7 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./telegraf.conf:/telegraf.conf
|
- ./telegraf.conf:/telegraf.conf
|
||||||
- ../../../../telegraf:/telegraf
|
- ../../../../telegraf:/telegraf
|
||||||
- ./json_a.log:/var/log/test.log
|
- ./dev/json_a.log:/var/log/test.log
|
||||||
entrypoint:
|
entrypoint:
|
||||||
- /telegraf
|
- /telegraf
|
||||||
- --config
|
- --config
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
{
|
|
||||||
"parent": {
|
|
||||||
"child": 3.0,
|
|
||||||
"ignored_child": "hi"
|
|
||||||
},
|
|
||||||
"ignored_null": null,
|
|
||||||
"integer": 4,
|
|
||||||
"list": [3, 4],
|
|
||||||
"ignored_parent": {
|
|
||||||
"another_ignored_null": null,
|
|
||||||
"ignored_string": "hello, world!"
|
|
||||||
},
|
|
||||||
"another_list": [4]
|
|
||||||
}
|
|
|
@ -12,7 +12,6 @@ import (
|
||||||
|
|
||||||
type File struct {
|
type File struct {
|
||||||
Files []string `toml:"files"`
|
Files []string `toml:"files"`
|
||||||
FromBeginning bool
|
|
||||||
parser parsers.Parser
|
parser parsers.Parser
|
||||||
|
|
||||||
filenames []string
|
filenames []string
|
||||||
|
@ -24,7 +23,7 @@ const sampleConfig = `
|
||||||
## ** as a "super asterisk". ie:
|
## ** as a "super asterisk". ie:
|
||||||
## /var/log/**.log -> recursively find all .log files in /var/log
|
## /var/log/**.log -> recursively find all .log files in /var/log
|
||||||
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
|
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
|
||||||
## /var/log/apache.log -> only tail the apache log file
|
## /var/log/apache.log -> only read the apache log file
|
||||||
files = ["/var/log/apache/access.log"]
|
files = ["/var/log/apache/access.log"]
|
||||||
|
|
||||||
## The dataformat to be read from files
|
## The dataformat to be read from files
|
||||||
|
@ -40,7 +39,7 @@ func (f *File) SampleConfig() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *File) Description() string {
|
func (f *File) Description() string {
|
||||||
return "reload and gather from file[s] on telegraf's interval"
|
return "Reload and gather from file[s] on telegraf's interval."
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *File) Gather(acc telegraf.Accumulator) error {
|
func (f *File) Gather(acc telegraf.Accumulator) error {
|
||||||
|
|
|
@ -14,26 +14,26 @@ import (
|
||||||
func TestRefreshFilePaths(t *testing.T) {
|
func TestRefreshFilePaths(t *testing.T) {
|
||||||
wd, err := os.Getwd()
|
wd, err := os.Getwd()
|
||||||
r := File{
|
r := File{
|
||||||
Files: []string{filepath.Join(wd, "testfiles/**.log")},
|
Files: []string{filepath.Join(wd, "dev/testfiles/**.log")},
|
||||||
}
|
}
|
||||||
|
|
||||||
err = r.refreshFilePaths()
|
err = r.refreshFilePaths()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, len(r.filenames), 2)
|
assert.Equal(t, 2, len(r.filenames))
|
||||||
}
|
}
|
||||||
func TestJSONParserCompile(t *testing.T) {
|
func TestJSONParserCompile(t *testing.T) {
|
||||||
var acc testutil.Accumulator
|
var acc testutil.Accumulator
|
||||||
wd, _ := os.Getwd()
|
wd, _ := os.Getwd()
|
||||||
r := File{
|
r := File{
|
||||||
Files: []string{filepath.Join(wd, "testfiles/json_a.log")},
|
Files: []string{filepath.Join(wd, "dev/testfiles/json_a.log")},
|
||||||
}
|
}
|
||||||
parserConfig := parsers.Config{
|
parserConfig := parsers.Config{
|
||||||
DataFormat: "json",
|
DataFormat: "json",
|
||||||
TagKeys: []string{"parent_ignored_child"},
|
TagKeys: []string{"parent_ignored_child"},
|
||||||
}
|
}
|
||||||
nParser, err := parsers.NewParser(&parserConfig)
|
nParser, err := parsers.NewParser(&parserConfig)
|
||||||
r.parser = nParser
|
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
r.parser = nParser
|
||||||
|
|
||||||
r.Gather(&acc)
|
r.Gather(&acc)
|
||||||
assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags)
|
assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags)
|
||||||
|
@ -44,7 +44,7 @@ func TestGrokParser(t *testing.T) {
|
||||||
wd, _ := os.Getwd()
|
wd, _ := os.Getwd()
|
||||||
var acc testutil.Accumulator
|
var acc testutil.Accumulator
|
||||||
r := File{
|
r := File{
|
||||||
Files: []string{filepath.Join(wd, "testfiles/grok_a.log")},
|
Files: []string{filepath.Join(wd, "dev/testfiles/grok_a.log")},
|
||||||
}
|
}
|
||||||
|
|
||||||
parserConfig := parsers.Config{
|
parserConfig := parsers.Config{
|
||||||
|
@ -57,5 +57,5 @@ func TestGrokParser(t *testing.T) {
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
err = r.Gather(&acc)
|
err = r.Gather(&acc)
|
||||||
assert.Equal(t, 2, len(acc.Metrics))
|
assert.Equal(t, len(acc.Metrics), 2)
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,6 +38,7 @@ var timeLayouts = map[string]string{
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
MEASUREMENT = "measurement"
|
||||||
INT = "int"
|
INT = "int"
|
||||||
TAG = "tag"
|
TAG = "tag"
|
||||||
FLOAT = "float"
|
FLOAT = "float"
|
||||||
|
@ -217,7 +218,6 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
||||||
if k == "" || v == "" {
|
if k == "" || v == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// t is the modifier of the field
|
// t is the modifier of the field
|
||||||
var t string
|
var t string
|
||||||
// check if pattern has some modifiers
|
// check if pattern has some modifiers
|
||||||
|
@ -239,6 +239,8 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
switch t {
|
switch t {
|
||||||
|
case MEASUREMENT:
|
||||||
|
p.Measurement = v
|
||||||
case INT:
|
case INT:
|
||||||
iv, err := strconv.ParseInt(v, 10, 64)
|
iv, err := strconv.ParseInt(v, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -350,7 +352,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(fields) == 0 {
|
if len(fields) == 0 {
|
||||||
return nil, fmt.Errorf("logparser_grok: must have one or more fields")
|
return nil, fmt.Errorf("grok: must have one or more fields")
|
||||||
}
|
}
|
||||||
|
|
||||||
return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp))
|
return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp))
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package grok
|
package grok
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -959,3 +960,52 @@ func TestReplaceTimestampComma(t *testing.T) {
|
||||||
//Convert Nanosecond to milisecond for compare
|
//Convert Nanosecond to milisecond for compare
|
||||||
require.Equal(t, 555, m.Time().Nanosecond()/1000000)
|
require.Equal(t, 555, m.Time().Nanosecond()/1000000)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDynamicMeasurementModifier(t *testing.T) {
|
||||||
|
p := &Parser{
|
||||||
|
Patterns: []string{"%{TEST}"},
|
||||||
|
CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:test:measurement}",
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, p.Compile())
|
||||||
|
m, err := p.ParseLine("4 5 hello")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, m.Name(), "hello")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticMeasurementModifier(t *testing.T) {
|
||||||
|
p := &Parser{
|
||||||
|
Patterns: []string{"%{WORD:hi:measurement} %{NUMBER:num:string}"},
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, p.Compile())
|
||||||
|
m, err := p.ParseLine("test_name 42")
|
||||||
|
log.Printf("%v", m)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, "test_name", m.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
// tests that the top level measurement name is used
|
||||||
|
func TestTwoMeasurementModifier(t *testing.T) {
|
||||||
|
p := &Parser{
|
||||||
|
Patterns: []string{"%{TEST:test_name:measurement}"},
|
||||||
|
CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:measurement} %{WORD:var3:measurement}",
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, p.Compile())
|
||||||
|
m, err := p.ParseLine("4 5 hello")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, m.Name(), "4 5 hello")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMeasurementModifierNoName(t *testing.T) {
|
||||||
|
p := &Parser{
|
||||||
|
Patterns: []string{"%{TEST}"},
|
||||||
|
CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:hi:measurement}",
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, p.Compile())
|
||||||
|
m, err := p.ParseLine("4 5 hello")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, m.Name(), "hello")
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue