Add ability to set measurement from matched text in grok parser (#4433)
This commit is contained in:
parent
34614582a7
commit
9e0eb0c0e0
|
@ -670,6 +670,66 @@ The best way to get acquainted with grok patterns is to read the logstash docs,
|
|||
which are available here:
|
||||
https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html
|
||||
|
||||
The grok parser uses a slightly modified version of logstash "grok"
|
||||
patterns, with the format:
|
||||
|
||||
```
|
||||
%{<capture_syntax>[:<semantic_name>][:<modifier>]}
|
||||
```
|
||||
|
||||
The `capture_syntax` defines the grok pattern that's used to parse the input
|
||||
line and the `semantic_name` is used to name the field or tag. The extension
|
||||
`modifier` controls the data type that the parsed item is converted to or
|
||||
other special handling.
|
||||
|
||||
By default all named captures are converted into string fields.
|
||||
Timestamp modifiers can be used to convert captures to the timestamp of the
|
||||
parsed metric. If no timestamp is parsed the metric will be created using the
|
||||
current time.
|
||||
|
||||
You must capture at least one field per line.
|
||||
|
||||
- Available modifiers:
|
||||
- string (default if nothing is specified)
|
||||
- int
|
||||
- float
|
||||
- duration (ie, 5.23ms gets converted to int nanoseconds)
|
||||
- tag (converts the field into a tag)
|
||||
- drop (drops the field completely)
|
||||
- measurement (use the matched text as the measurement name)
|
||||
- Timestamp modifiers:
|
||||
- ts (This will auto-learn the timestamp format)
|
||||
- ts-ansic ("Mon Jan _2 15:04:05 2006")
|
||||
- ts-unix ("Mon Jan _2 15:04:05 MST 2006")
|
||||
- ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
|
||||
- ts-rfc822 ("02 Jan 06 15:04 MST")
|
||||
- ts-rfc822z ("02 Jan 06 15:04 -0700")
|
||||
- ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST")
|
||||
- ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST")
|
||||
- ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700")
|
||||
- ts-rfc3339 ("2006-01-02T15:04:05Z07:00")
|
||||
- ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00")
|
||||
- ts-httpd ("02/Jan/2006:15:04:05 -0700")
|
||||
- ts-epoch (seconds since unix epoch, may contain decimal)
|
||||
- ts-epochnano (nanoseconds since unix epoch)
|
||||
- ts-syslog ("Jan 02 15:04:05", parsed time is set to the current year)
|
||||
- ts-"CUSTOM"
|
||||
|
||||
CUSTOM time layouts must be within quotes and be the representation of the
|
||||
"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006`.
|
||||
To match a comma decimal point you can use a period. For example `%{TIMESTAMP:timestamp:ts-"2006-01-02 15:04:05.000"}` can be used to match `"2018-01-02 15:04:05,000"`
|
||||
To match a comma decimal point you can use a period in the pattern string.
|
||||
See https://golang.org/pkg/time/#Parse for more details.
|
||||
|
||||
Telegraf has many of its own [built-in patterns](./grok/patterns/influx-patterns),
|
||||
as well as support for most of
|
||||
[logstash's builtin patterns](https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns).
|
||||
_Golang regular expressions do not support lookahead or lookbehind.
|
||||
logstash patterns that depend on these are not supported._
|
||||
|
||||
If you need help building patterns to match your logs,
|
||||
you will find the https://grokdebug.herokuapp.com application quite useful!
|
||||
|
||||
#### Grok Configuration:
|
||||
```toml
|
||||
[[inputs.file]]
|
||||
|
@ -714,65 +774,6 @@ which are available here:
|
|||
grok_timezone = "Canada/Eastern"
|
||||
```
|
||||
|
||||
The grok parser uses a slightly modified version of logstash "grok"
|
||||
patterns, with the format:
|
||||
|
||||
```
|
||||
%{<capture_syntax>[:<semantic_name>][:<modifier>]}
|
||||
```
|
||||
|
||||
The `capture_syntax` defines the grok pattern that's used to parse the input
|
||||
line and the `semantic_name` is used to name the field or tag. The extension
|
||||
`modifier` controls the data type that the parsed item is converted to or
|
||||
other special handling.
|
||||
|
||||
By default all named captures are converted into string fields.
|
||||
Timestamp modifiers can be used to convert captures to the timestamp of the
|
||||
parsed metric. If no timestamp is parsed the metric will be created using the
|
||||
current time.
|
||||
|
||||
You must capture at least one field per line.
|
||||
|
||||
- Available modifiers:
|
||||
- string (default if nothing is specified)
|
||||
- int
|
||||
- float
|
||||
- duration (ie, 5.23ms gets converted to int nanoseconds)
|
||||
- tag (converts the field into a tag)
|
||||
- drop (drops the field completely)
|
||||
- Timestamp modifiers:
|
||||
- ts (This will auto-learn the timestamp format)
|
||||
- ts-ansic ("Mon Jan _2 15:04:05 2006")
|
||||
- ts-unix ("Mon Jan _2 15:04:05 MST 2006")
|
||||
- ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
|
||||
- ts-rfc822 ("02 Jan 06 15:04 MST")
|
||||
- ts-rfc822z ("02 Jan 06 15:04 -0700")
|
||||
- ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST")
|
||||
- ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST")
|
||||
- ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700")
|
||||
- ts-rfc3339 ("2006-01-02T15:04:05Z07:00")
|
||||
- ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00")
|
||||
- ts-httpd ("02/Jan/2006:15:04:05 -0700")
|
||||
- ts-epoch (seconds since unix epoch, may contain decimal)
|
||||
- ts-epochnano (nanoseconds since unix epoch)
|
||||
- ts-syslog ("Jan 02 15:04:05", parsed time is set to the current year)
|
||||
- ts-"CUSTOM"
|
||||
|
||||
CUSTOM time layouts must be within quotes and be the representation of the
|
||||
"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006`.
|
||||
To match a comma decimal point you can use a period. For example `%{TIMESTAMP:timestamp:ts-"2006-01-02 15:04:05.000"}` can be used to match `"2018-01-02 15:04:05,000"`
|
||||
To match a comma decimal point you can use a period in the pattern string.
|
||||
See https://golang.org/pkg/time/#Parse for more details.
|
||||
|
||||
Telegraf has many of its own [built-in patterns](./grok/patterns/influx-patterns),
|
||||
as well as support for most of
|
||||
[logstash's builtin patterns](https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns).
|
||||
_Golang regular expressions do not support lookahead or lookbehind.
|
||||
logstash patterns that depend on these are not supported._
|
||||
|
||||
If you need help building patterns to match your logs,
|
||||
you will find the https://grokdebug.herokuapp.com application quite useful!
|
||||
|
||||
#### Timestamp Examples
|
||||
|
||||
This example input and config parses a file using a custom timestamp conversion:
|
||||
|
|
|
@ -14,7 +14,7 @@ use the [tail input plugin](/plugins/inputs/tail) instead.
|
|||
## ** as a "super asterisk". ie:
|
||||
## /var/log/**.log -> recursively find all .log files in /var/log
|
||||
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
|
||||
## /var/log/apache.log -> only tail the apache log file
|
||||
## /var/log/apache.log -> only read the apache log file
|
||||
files = ["/var/log/apache/access.log"]
|
||||
|
||||
## Data format to consume.
|
||||
|
|
|
@ -6,7 +6,7 @@ services:
|
|||
volumes:
|
||||
- ./telegraf.conf:/telegraf.conf
|
||||
- ../../../../telegraf:/telegraf
|
||||
- ./json_a.log:/var/log/test.log
|
||||
- ./dev/json_a.log:/var/log/test.log
|
||||
entrypoint:
|
||||
- /telegraf
|
||||
- --config
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
{
|
||||
"parent": {
|
||||
"child": 3.0,
|
||||
"ignored_child": "hi"
|
||||
},
|
||||
"ignored_null": null,
|
||||
"integer": 4,
|
||||
"list": [3, 4],
|
||||
"ignored_parent": {
|
||||
"another_ignored_null": null,
|
||||
"ignored_string": "hello, world!"
|
||||
},
|
||||
"another_list": [4]
|
||||
}
|
|
@ -12,7 +12,6 @@ import (
|
|||
|
||||
type File struct {
|
||||
Files []string `toml:"files"`
|
||||
FromBeginning bool
|
||||
parser parsers.Parser
|
||||
|
||||
filenames []string
|
||||
|
@ -24,7 +23,7 @@ const sampleConfig = `
|
|||
## ** as a "super asterisk". ie:
|
||||
## /var/log/**.log -> recursively find all .log files in /var/log
|
||||
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
|
||||
## /var/log/apache.log -> only tail the apache log file
|
||||
## /var/log/apache.log -> only read the apache log file
|
||||
files = ["/var/log/apache/access.log"]
|
||||
|
||||
## The dataformat to be read from files
|
||||
|
@ -40,7 +39,7 @@ func (f *File) SampleConfig() string {
|
|||
}
|
||||
|
||||
func (f *File) Description() string {
|
||||
return "reload and gather from file[s] on telegraf's interval"
|
||||
return "Reload and gather from file[s] on telegraf's interval."
|
||||
}
|
||||
|
||||
func (f *File) Gather(acc telegraf.Accumulator) error {
|
||||
|
|
|
@ -14,26 +14,26 @@ import (
|
|||
func TestRefreshFilePaths(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
r := File{
|
||||
Files: []string{filepath.Join(wd, "testfiles/**.log")},
|
||||
Files: []string{filepath.Join(wd, "dev/testfiles/**.log")},
|
||||
}
|
||||
|
||||
err = r.refreshFilePaths()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, len(r.filenames), 2)
|
||||
assert.Equal(t, 2, len(r.filenames))
|
||||
}
|
||||
func TestJSONParserCompile(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
wd, _ := os.Getwd()
|
||||
r := File{
|
||||
Files: []string{filepath.Join(wd, "testfiles/json_a.log")},
|
||||
Files: []string{filepath.Join(wd, "dev/testfiles/json_a.log")},
|
||||
}
|
||||
parserConfig := parsers.Config{
|
||||
DataFormat: "json",
|
||||
TagKeys: []string{"parent_ignored_child"},
|
||||
}
|
||||
nParser, err := parsers.NewParser(&parserConfig)
|
||||
r.parser = nParser
|
||||
assert.NoError(t, err)
|
||||
r.parser = nParser
|
||||
|
||||
r.Gather(&acc)
|
||||
assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags)
|
||||
|
@ -44,7 +44,7 @@ func TestGrokParser(t *testing.T) {
|
|||
wd, _ := os.Getwd()
|
||||
var acc testutil.Accumulator
|
||||
r := File{
|
||||
Files: []string{filepath.Join(wd, "testfiles/grok_a.log")},
|
||||
Files: []string{filepath.Join(wd, "dev/testfiles/grok_a.log")},
|
||||
}
|
||||
|
||||
parserConfig := parsers.Config{
|
||||
|
@ -57,5 +57,5 @@ func TestGrokParser(t *testing.T) {
|
|||
assert.NoError(t, err)
|
||||
|
||||
err = r.Gather(&acc)
|
||||
assert.Equal(t, 2, len(acc.Metrics))
|
||||
assert.Equal(t, len(acc.Metrics), 2)
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ var timeLayouts = map[string]string{
|
|||
}
|
||||
|
||||
const (
|
||||
MEASUREMENT = "measurement"
|
||||
INT = "int"
|
||||
TAG = "tag"
|
||||
FLOAT = "float"
|
||||
|
@ -217,7 +218,6 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
|||
if k == "" || v == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// t is the modifier of the field
|
||||
var t string
|
||||
// check if pattern has some modifiers
|
||||
|
@ -239,6 +239,8 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
|||
}
|
||||
|
||||
switch t {
|
||||
case MEASUREMENT:
|
||||
p.Measurement = v
|
||||
case INT:
|
||||
iv, err := strconv.ParseInt(v, 10, 64)
|
||||
if err != nil {
|
||||
|
@ -350,7 +352,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
|||
}
|
||||
|
||||
if len(fields) == 0 {
|
||||
return nil, fmt.Errorf("logparser_grok: must have one or more fields")
|
||||
return nil, fmt.Errorf("grok: must have one or more fields")
|
||||
}
|
||||
|
||||
return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp))
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package grok
|
||||
|
||||
import (
|
||||
"log"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -959,3 +960,52 @@ func TestReplaceTimestampComma(t *testing.T) {
|
|||
//Convert Nanosecond to milisecond for compare
|
||||
require.Equal(t, 555, m.Time().Nanosecond()/1000000)
|
||||
}
|
||||
|
||||
func TestDynamicMeasurementModifier(t *testing.T) {
|
||||
p := &Parser{
|
||||
Patterns: []string{"%{TEST}"},
|
||||
CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:test:measurement}",
|
||||
}
|
||||
|
||||
require.NoError(t, p.Compile())
|
||||
m, err := p.ParseLine("4 5 hello")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, m.Name(), "hello")
|
||||
}
|
||||
|
||||
func TestStaticMeasurementModifier(t *testing.T) {
|
||||
p := &Parser{
|
||||
Patterns: []string{"%{WORD:hi:measurement} %{NUMBER:num:string}"},
|
||||
}
|
||||
|
||||
require.NoError(t, p.Compile())
|
||||
m, err := p.ParseLine("test_name 42")
|
||||
log.Printf("%v", m)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "test_name", m.Name())
|
||||
}
|
||||
|
||||
// tests that the top level measurement name is used
|
||||
func TestTwoMeasurementModifier(t *testing.T) {
|
||||
p := &Parser{
|
||||
Patterns: []string{"%{TEST:test_name:measurement}"},
|
||||
CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:measurement} %{WORD:var3:measurement}",
|
||||
}
|
||||
|
||||
require.NoError(t, p.Compile())
|
||||
m, err := p.ParseLine("4 5 hello")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, m.Name(), "4 5 hello")
|
||||
}
|
||||
|
||||
func TestMeasurementModifierNoName(t *testing.T) {
|
||||
p := &Parser{
|
||||
Patterns: []string{"%{TEST}"},
|
||||
CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:hi:measurement}",
|
||||
}
|
||||
|
||||
require.NoError(t, p.Compile())
|
||||
m, err := p.ParseLine("4 5 hello")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, m.Name(), "hello")
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue