Compare commits

...

30 Commits

Author SHA1 Message Date
Max U e4b6f236b6 fix unit tests for grok parser 2018-07-06 15:47:03 -07:00
Max U c6087abf09 add more unit tests to grok parser 2018-07-06 14:52:37 -07:00
Max U cafa95e536 logparser no longer uses seperate grok 2018-07-06 13:16:51 -07:00
Max U 8a9da28734 logparser is linked to grok parser 2018-07-06 11:22:14 -07:00
Max U 67db14332f still unfinished logparser changes 2018-07-03 16:27:11 -07:00
Max U bfc13a744b incomplete changes to logparser plugin 2018-07-03 15:45:15 -07:00
Max U 8063b38b2d address some of Daniel's comments 2018-07-03 11:29:11 -07:00
Max U 04f09d65bf grok parser func unexported 2018-06-28 14:45:14 -07:00
Max U 892c95aa6e update readmes 2018-06-27 13:30:59 -07:00
Max U aa750ec2b8 add reader README.md 2018-06-27 10:13:52 -07:00
Greg Linton 1be2a8eeaf Formatting and revert Makefile 2018-06-26 17:12:11 -06:00
Max U 7fa27f400d more condensing 2018-06-26 13:19:53 -07:00
Max U 001658af30 condense telegraf.conf 2018-06-26 13:18:43 -07:00
Max U e450b266ec remove comments 2018-06-26 13:10:46 -07:00
Max U a931eb1c90 update DATA_FORMATS_INPUT.MD to include grok 2018-06-26 12:06:35 -07:00
Max U bf7220d2ce add test file to docker spin up 2018-06-26 11:53:40 -07:00
Max U bbd68b3820 docker will spin up 2018-06-26 11:26:43 -07:00
Max U 79d9ea4761 add docker-image spin up for reader 2018-06-26 10:26:48 -07:00
Max U cc406299ba allow for import from plugins/all 2018-06-25 15:52:43 -07:00
Max U 9c845950a7 add grok as a top level parser, still need README 2018-06-25 15:32:27 -07:00
Max U f40371e361 add init function to reader 2018-06-25 10:15:32 -07:00
Max U 36a23ea1ba Merge branch 'master' into plugin/reader 2018-06-25 09:54:49 -07:00
Max U 554b960339 add setparser to reader 2018-06-25 09:53:35 -07:00
Max U 542c030dc8 knock more errors from test files 2018-06-21 16:23:06 -07:00
Max U 504d978446 clean up some test cases 2018-06-21 16:12:26 -07:00
Max U ec7f13111f add more test files 2018-06-21 16:06:36 -07:00
Max U 4e24a1bbe3 add grok as a top level parser 2018-06-21 15:56:20 -07:00
Max U 9c4b52256d tweak metric output 2018-06-21 13:13:46 -07:00
Max U 08a11d7bfd change config file 2018-06-21 11:44:02 -07:00
Max U e12eced211 input plugin that reads files each interval 2018-06-21 11:26:14 -07:00
22 changed files with 593 additions and 174 deletions

View File

@ -9,6 +9,7 @@ Telegraf is able to parse the following input data formats into metrics:
1. [Nagios](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#nagios) (exec input only)
1. [Collectd](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#collectd)
1. [Dropwizard](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#dropwizard)
1. [Grok](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#grok)
Telegraf metrics, like InfluxDB
[points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/),
@ -651,5 +652,106 @@ For more information about the dropwizard json format see
# [inputs.exec.dropwizard_tag_paths]
# tag1 = "tags.tag1"
# tag2 = "tags.tag2"
```
#### Grok
Parse logstash-style "grok" patterns. Patterns can be added to patterns, or custom patterns read from custom_pattern_files.
# View logstash grok pattern docs here:
# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html
# All default logstash patterns are supported, these can be viewed here:
# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns
# Available modifiers:
# string (default if nothing is specified)
# int
# float
# duration (ie, 5.23ms gets converted to int nanoseconds)
# tag (converts the field into a tag)
# drop (drops the field completely)
# Timestamp modifiers:
# ts-ansic ("Mon Jan _2 15:04:05 2006")
# ts-unix ("Mon Jan _2 15:04:05 MST 2006")
# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
# ts-rfc822 ("02 Jan 06 15:04 MST")
# ts-rfc822z ("02 Jan 06 15:04 -0700")
# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST")
# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST")
# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700")
# ts-rfc3339 ("2006-01-02T15:04:05Z07:00")
# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00")
# ts-httpd ("02/Jan/2006:15:04:05 -0700")
# ts-epoch (seconds since unix epoch)
# ts-epochnano (nanoseconds since unix epoch)
# ts-"CUSTOM"
# CUSTOM time layouts must be within quotes and be the representation of the
# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006
# See https://golang.org/pkg/time/#Parse for more details.
# Example log file pattern, example log looks like this:
# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs
# Breakdown of the DURATION pattern below:
# NUMBER is a builtin logstash grok pattern matching float & int numbers.
# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets.
# s is also regex, this pattern must end in "s".
# so DURATION will match something like '5.324ms' or '6.1µs' or '10s'
DURATION %{NUMBER}[nuµm]?s
RESPONSE_CODE %{NUMBER:response_code:tag}
RESPONSE_TIME %{DURATION:response_time_ns:duration}
EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME}
# Wider-ranging username matching vs. logstash built-in %{USER}
NGUSERNAME [a-zA-Z0-9\.\@\-\+_%]+
NGUSER %{NGUSERNAME}
# Wider-ranging client IP matching
CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1)
##
## COMMON LOG PATTERNS
##
# apache & nginx logs, this is also known as the "common log format"
# see https://en.wikipedia.org/wiki/Common_Log_Format
COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-)
# Combined log format is the same as the common log format but with the addition
# of two quoted strings at the end for "referrer" and "agent"
# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html
COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent}
# HTTPD log formats
HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg}
HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message}
HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG}
#### Grok Configuration:
```toml
[[inputs.reader]]
## This is a list of patterns to check the given log file(s) for.
## Note that adding patterns here increases processing time. The most
## efficient configuration is to have one pattern per logparser.
## Other common built-in patterns are:
## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs)
## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent)
grok_patterns = ["%{COMBINED_LOG_FORMAT}"]
## Name of the outputted measurement name.
grok_name_override = "apache_access_log"
## Full path(s) to custom pattern files.
grok_custom_pattern_files = []
## Custom patterns can also be defined here. Put one pattern per line.
grok_custom_patterns = '''
## Timezone allows you to provide an override for timestamps that
## don't already include an offset
## e.g. 04/06/2016 12:41:45 data one two 5.43µs
##
## Default: "" which renders UTC
## Options are as follows:
## 1. Local -- interpret based on machine localtime
## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
## 3. UTC -- or blank/unspecified, will return timestamp in UTC
grok_timezone = "Canada/Eastern"
```

View File

@ -1338,6 +1338,59 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) {
}
}
//for grok data_format
if node, ok := tbl.Fields["grok_named_patterns"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
c.NamedPatterns = append(c.NamedPatterns, str.Value)
}
}
}
}
}
if node, ok := tbl.Fields["grok_patterns"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
c.Patterns = append(c.Patterns, str.Value)
}
}
}
}
}
if node, ok := tbl.Fields["grok_custom_patterns"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if str, ok := kv.Value.(*ast.String); ok {
c.CustomPatterns = str.Value
}
}
}
if node, ok := tbl.Fields["grok_custom_pattern_files"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
c.CustomPatternFiles = append(c.CustomPatternFiles, str.Value)
}
}
}
}
}
if node, ok := tbl.Fields["grok_timezone"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if str, ok := kv.Value.(*ast.String); ok {
c.TimeZone = str.Value
}
}
}
c.MetricName = name
delete(tbl.Fields, "data_format")
@ -1353,6 +1406,11 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) {
delete(tbl.Fields, "dropwizard_time_format")
delete(tbl.Fields, "dropwizard_tags_path")
delete(tbl.Fields, "dropwizard_tag_paths")
delete(tbl.Fields, "grok_named_patterns")
delete(tbl.Fields, "grok_patterns")
delete(tbl.Fields, "grok_custom_patterns")
delete(tbl.Fields, "grok_custom_pattern_files")
delete(tbl.Fields, "grok_timezone")
return parsers.NewParser(c)
}

View File

@ -85,6 +85,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/puppetagent"
_ "github.com/influxdata/telegraf/plugins/inputs/rabbitmq"
_ "github.com/influxdata/telegraf/plugins/inputs/raindrops"
_ "github.com/influxdata/telegraf/plugins/inputs/reader"
_ "github.com/influxdata/telegraf/plugins/inputs/redis"
_ "github.com/influxdata/telegraf/plugins/inputs/rethinkdb"
_ "github.com/influxdata/telegraf/plugins/inputs/riak"

View File

@ -14,9 +14,8 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/globpath"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
// Parsers
"github.com/influxdata/telegraf/plugins/inputs/logparser/grok"
)
const (
@ -36,9 +35,10 @@ type logEntry struct {
// LogParserPlugin is the primary struct to implement the interface for logparser plugin
type LogParserPlugin struct {
Files []string
FromBeginning bool
WatchMethod string
Files []string
FromBeginning bool
WatchMethod string
MeasurementName string `toml:"measurement"`
tailers map[string]*tail.Tail
lines chan logEntry
@ -49,7 +49,13 @@ type LogParserPlugin struct {
sync.Mutex
GrokParser *grok.Parser `toml:"grok"`
GrokParser parsers.Parser `toml:"grok"`
Patterns []string
NamedPatterns []string
CustomPatterns string
CustomPatternFiles []string
TimeZone string
}
const sampleConfig = `
@ -132,6 +138,21 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error {
// Looks for fields which implement LogParser interface
l.parsers = []LogParser{}
config := &parsers.Config{
Patterns: l.Patterns,
NamedPatterns: l.NamedPatterns,
CustomPatterns: l.CustomPatterns,
CustomPatternFiles: l.CustomPatternFiles,
TimeZone: l.TimeZone,
DataFormat: "grok",
}
var err error
l.GrokParser, err = parsers.NewParser(config)
if err != nil {
return err
}
s := reflect.ValueOf(l).Elem()
for i := 0; i < s.NumField(); i++ {
f := s.Field(i)
@ -152,13 +173,6 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error {
return fmt.Errorf("logparser input plugin: no parser defined")
}
// compile log parser patterns:
for _, parser := range l.parsers {
if err := parser.Compile(); err != nil {
return err
}
}
l.wg.Add(1)
go l.parser()
@ -247,8 +261,8 @@ func (l *LogParserPlugin) receiver(tailer *tail.Tail) {
}
}
// parser is launched as a goroutine to watch the l.lines channel.
// when a line is available, parser parses it and adds the metric(s) to the
// parse is launched as a goroutine to watch the l.lines channel.
// when a line is available, parse parses it and adds the metric(s) to the
// accumulator.
func (l *LogParserPlugin) parser() {
defer l.wg.Done()
@ -265,18 +279,17 @@ func (l *LogParserPlugin) parser() {
continue
}
}
for _, parser := range l.parsers {
m, err = parser.ParseLine(entry.line)
if err == nil {
if m != nil {
tags := m.Tags()
tags["path"] = entry.path
l.acc.AddFields(m.Name(), m.Fields(), tags, m.Time())
}
} else {
log.Println("E! Error parsing log line: " + err.Error())
m, err = l.GrokParser.ParseLine(entry.line)
if err == nil {
if m != nil {
tags := m.Tags()
tags["path"] = entry.path
l.acc.AddFields(l.MeasurementName, m.Fields(), tags, m.Time())
}
} else {
log.Println("E! Error parsing log line: " + err.Error())
}
}
}

View File

@ -2,6 +2,7 @@ package logparser
import (
"io/ioutil"
"log"
"os"
"runtime"
"strings"
@ -9,8 +10,6 @@ import (
"github.com/influxdata/telegraf/testutil"
"github.com/influxdata/telegraf/plugins/inputs/logparser/grok"
"github.com/stretchr/testify/assert"
)
@ -26,15 +25,12 @@ func TestStartNoParsers(t *testing.T) {
func TestGrokParseLogFilesNonExistPattern(t *testing.T) {
thisdir := getCurrentDir()
p := &grok.Parser{
Patterns: []string{"%{FOOBAR}"},
CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"},
}
logparser := &LogParserPlugin{
FromBeginning: true,
Files: []string{thisdir + "grok/testdata/*.log"},
GrokParser: p,
FromBeginning: true,
Files: []string{thisdir + "grok/testdata/*.log"},
Patterns: []string{"%{FOOBAR}"},
CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"},
}
acc := testutil.Accumulator{}
@ -44,15 +40,13 @@ func TestGrokParseLogFilesNonExistPattern(t *testing.T) {
func TestGrokParseLogFiles(t *testing.T) {
thisdir := getCurrentDir()
p := &grok.Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"},
}
logparser := &LogParserPlugin{
FromBeginning: true,
Files: []string{thisdir + "grok/testdata/*.log"},
GrokParser: p,
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"},
FromBeginning: true,
Files: []string{thisdir + "grok/testdata/*.log"},
MeasurementName: "logparser_grok",
}
acc := testutil.Accumulator{}
@ -62,6 +56,7 @@ func TestGrokParseLogFiles(t *testing.T) {
logparser.Stop()
log.Printf("metric[0] %v, tags: %v, fields: %v", acc.Metrics[0].Measurement, acc.Metrics[0].Tags, acc.Metrics[0].Fields)
acc.AssertContainsTaggedFields(t, "logparser_grok",
map[string]interface{}{
"clientip": "192.168.1.1",
@ -91,15 +86,13 @@ func TestGrokParseLogFilesAppearLater(t *testing.T) {
assert.NoError(t, err)
thisdir := getCurrentDir()
p := &grok.Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"},
}
logparser := &LogParserPlugin{
FromBeginning: true,
Files: []string{emptydir + "/*.log"},
GrokParser: p,
FromBeginning: true,
Files: []string{emptydir + "/*.log"},
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"},
MeasurementName: "logparser_grok",
}
acc := testutil.Accumulator{}
@ -130,16 +123,13 @@ func TestGrokParseLogFilesAppearLater(t *testing.T) {
// pattern available for test_b.log
func TestGrokParseLogFilesOneBad(t *testing.T) {
thisdir := getCurrentDir()
p := &grok.Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_BAD}"},
CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"},
}
assert.NoError(t, p.Compile())
logparser := &LogParserPlugin{
FromBeginning: true,
Files: []string{thisdir + "grok/testdata/test_a.log"},
GrokParser: p,
FromBeginning: true,
Files: []string{thisdir + "grok/testdata/test_a.log"},
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_BAD}"},
CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"},
MeasurementName: "logparser_grok",
}
acc := testutil.Accumulator{}

View File

@ -0,0 +1,23 @@
# Reader Input Plugin
The Reader Plugin updates a list of files every interval and parses the data inside.
Files will always be read from the beginning.
This plugin can parse any "data_format" formats.
### Configuration:
```toml
[[inputs.reader]]
## Files to parse each interval.
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". ie:
## /var/log/**.log -> recursively find all .log files in /var/log
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
## /var/log/apache.log -> only tail the apache log file
files = ["/var/log/apache/access.log"]
## The dataformat to be read from files
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = ""
```

View File

@ -0,0 +1,13 @@
version: '3'
services:
telegraf:
image: glinton/scratch
volumes:
- ./telegraf.conf:/telegraf.conf
- ../../../../telegraf:/telegraf
- ./json_a.log:/var/log/test.log
entrypoint:
- /telegraf
- --config
- /telegraf.conf

View File

@ -0,0 +1,14 @@
{
"parent": {
"child": 3.0,
"ignored_child": "hi"
},
"ignored_null": null,
"integer": 4,
"list": [3, 4],
"ignored_parent": {
"another_ignored_null": null,
"ignored_string": "hello, world!"
},
"another_list": [4]
}

View File

@ -0,0 +1,7 @@
[[inputs.reader]]
files = ["/var/log/test.log"]
data_format = "json"
name_override = "json_reader"
[[outputs.file]]
files = ["stdout"]

View File

@ -0,0 +1,95 @@
package reader
import (
"fmt"
"io/ioutil"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/globpath"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
)
type Reader struct {
Filepaths []string `toml:"files"`
FromBeginning bool
parser parsers.Parser
Filenames []string
}
const sampleConfig = `## Files to parse each interval.
## These accept standard unix glob matching rules, but with the addition of
## ** as a "super asterisk". ie:
## /var/log/**.log -> recursively find all .log files in /var/log
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
## /var/log/apache.log -> only tail the apache log file
files = ["/var/log/apache/access.log"]
## The dataformat to be read from files
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = ""
`
// SampleConfig returns the default configuration of the Input
func (r *Reader) SampleConfig() string {
return sampleConfig
}
func (r *Reader) Description() string {
return "reload and gather from file[s] on telegraf's interval"
}
func (r *Reader) Gather(acc telegraf.Accumulator) error {
r.refreshFilePaths()
for _, k := range r.Filenames {
metrics, err := r.readMetric(k)
if err != nil {
return err
}
for _, m := range metrics {
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
}
}
return nil
}
func (r *Reader) SetParser(p parsers.Parser) {
r.parser = p
}
func (r *Reader) refreshFilePaths() error {
var allFiles []string
for _, filepath := range r.Filepaths {
g, err := globpath.Compile(filepath)
if err != nil {
return fmt.Errorf("E! Error Glob: %v could not be compiled, %s", filepath, err)
}
files := g.Match()
for k := range files {
allFiles = append(allFiles, k)
}
}
r.Filenames = allFiles
return nil
}
func (r *Reader) readMetric(filename string) ([]telegraf.Metric, error) {
fileContents, err := ioutil.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("E! Error file: %v could not be read, %s", filename, err)
}
return r.parser.Parse(fileContents)
}
func init() {
inputs.Add("reader", func() telegraf.Input {
return &Reader{}
})
}

View File

@ -0,0 +1,64 @@
package reader
import (
"runtime"
"strings"
"testing"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
func TestRefreshFilePaths(t *testing.T) {
testDir := getPluginDir()
r := Reader{
Filepaths: []string{testDir + "/logparser/grok/testdata/**.log"},
}
r.refreshFilePaths()
assert.Equal(t, len(r.Filenames), 2)
}
func TestJSONParserCompile(t *testing.T) {
testDir := getPluginDir()
var acc testutil.Accumulator
r := Reader{
Filepaths: []string{testDir + "/reader/testfiles/json_a.log"},
}
parserConfig := parsers.Config{
DataFormat: "json",
TagKeys: []string{"parent_ignored_child"},
}
nParser, err := parsers.NewParser(&parserConfig)
r.parser = nParser
assert.NoError(t, err)
r.Gather(&acc)
assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags)
assert.Equal(t, 5, len(acc.Metrics[0].Fields))
}
func TestGrokParser(t *testing.T) {
testDir := getPluginDir()
var acc testutil.Accumulator
r := Reader{
Filepaths: []string{testDir + "/reader/testfiles/grok_a.log"},
}
parserConfig := parsers.Config{
DataFormat: "grok",
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
}
nParser, err := parsers.NewParser(&parserConfig)
r.parser = nParser
assert.NoError(t, err)
err = r.Gather(&acc)
assert.Equal(t, 2, len(acc.Metrics))
}
func getPluginDir() string {
_, filename, _, _ := runtime.Caller(1)
return strings.Replace(filename, "/reader/reader_test.go", "", 1)
}

View File

@ -0,0 +1,2 @@
127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326
128.0.0.1 user-identifier tony [10/Oct/2000:13:55:36 -0800] "GET /apache_pb.gif HTTP/1.0" 300 45

View File

@ -0,0 +1,14 @@
{
"parent": {
"child": 3.0,
"ignored_child": "hi"
},
"ignored_null": null,
"integer": 4,
"list": [3, 4],
"ignored_parent": {
"another_ignored_null": null,
"ignored_string": "hello, world!"
},
"another_list": [4]
}

View File

@ -0,0 +1,73 @@
# Captures are a slightly modified version of logstash "grok" patterns, with
# the format %{<capture syntax>[:<semantic name>][:<modifier>]}
# By default all named captures are converted into string fields.
# Modifiers can be used to convert captures to other types or tags.
# Timestamp modifiers can be used to convert captures to the timestamp of the
# parsed metric.
# View logstash grok pattern docs here:
# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html
# All default logstash patterns are supported, these can be viewed here:
# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns
# Available modifiers:
# string (default if nothing is specified)
# int
# float
# duration (ie, 5.23ms gets converted to int nanoseconds)
# tag (converts the field into a tag)
# drop (drops the field completely)
# Timestamp modifiers:
# ts-ansic ("Mon Jan _2 15:04:05 2006")
# ts-unix ("Mon Jan _2 15:04:05 MST 2006")
# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
# ts-rfc822 ("02 Jan 06 15:04 MST")
# ts-rfc822z ("02 Jan 06 15:04 -0700")
# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST")
# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST")
# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700")
# ts-rfc3339 ("2006-01-02T15:04:05Z07:00")
# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00")
# ts-httpd ("02/Jan/2006:15:04:05 -0700")
# ts-epoch (seconds since unix epoch)
# ts-epochnano (nanoseconds since unix epoch)
# ts-"CUSTOM"
# CUSTOM time layouts must be within quotes and be the representation of the
# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006
# See https://golang.org/pkg/time/#Parse for more details.
# Example log file pattern, example log looks like this:
# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs
# Breakdown of the DURATION pattern below:
# NUMBER is a builtin logstash grok pattern matching float & int numbers.
# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets.
# s is also regex, this pattern must end in "s".
# so DURATION will match something like '5.324ms' or '6.1µs' or '10s'
DURATION %{NUMBER}[nuµm]?s
RESPONSE_CODE %{NUMBER:response_code:tag}
RESPONSE_TIME %{DURATION:response_time_ns:duration}
EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME}
# Wider-ranging username matching vs. logstash built-in %{USER}
NGUSERNAME [a-zA-Z0-9\.\@\-\+_%]+
NGUSER %{NGUSERNAME}
# Wider-ranging client IP matching
CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1)
##
## COMMON LOG PATTERNS
##
# apache & nginx logs, this is also known as the "common log format"
# see https://en.wikipedia.org/wiki/Common_Log_Format
COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-)
# Combined log format is the same as the common log format but with the addition
# of two quoted strings at the end for "referrer" and "agent"
# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html
COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent}
# HTTPD log formats
HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg}
HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message}
HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG}

View File

@ -68,10 +68,10 @@ type Parser struct {
// specified by the user in Patterns.
// They will look like:
// GROK_INTERNAL_PATTERN_0, GROK_INTERNAL_PATTERN_1, etc.
namedPatterns []string
NamedPatterns []string
CustomPatterns string
CustomPatternFiles []string
Measurement string
MetricName string
// Timezone is an optional component to help render log dates to
// your chosen zone.
@ -133,7 +133,7 @@ func (p *Parser) Compile() error {
// Give Patterns fake names so that they can be treated as named
// "custom patterns"
p.namedPatterns = make([]string, 0, len(p.Patterns))
p.NamedPatterns = make([]string, 0, len(p.Patterns))
for i, pattern := range p.Patterns {
pattern = strings.TrimSpace(pattern)
if pattern == "" {
@ -141,10 +141,10 @@ func (p *Parser) Compile() error {
}
name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i)
p.CustomPatterns += "\n" + name + " " + pattern + "\n"
p.namedPatterns = append(p.namedPatterns, "%{"+name+"}")
p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}")
}
if len(p.namedPatterns) == 0 {
if len(p.NamedPatterns) == 0 {
return fmt.Errorf("pattern required")
}
@ -167,10 +167,6 @@ func (p *Parser) Compile() error {
p.addCustomPatterns(scanner)
}
if p.Measurement == "" {
p.Measurement = "logparser_grok"
}
p.loc, err = time.LoadLocation(p.Timezone)
if err != nil {
log.Printf("W! improper timezone supplied (%s), setting loc to UTC", p.Timezone)
@ -191,7 +187,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
var values map[string]string
// the matching pattern string
var patternName string
for _, pattern := range p.namedPatterns {
for _, pattern := range p.NamedPatterns {
if values, err = p.g.Parse(pattern, line); err != nil {
return nil, err
}
@ -335,9 +331,6 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
case DROP:
// goodbye!
default:
// Replace commas with dot character
v = strings.Replace(v, ",", ".", -1)
ts, err := time.ParseInLocation(t, v, p.loc)
if err == nil {
timestamp = ts
@ -351,7 +344,26 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
return nil, fmt.Errorf("logparser_grok: must have one or more fields")
}
return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp))
return metric.New(p.MetricName, tags, fields, p.tsModder.tsMod(timestamp))
}
func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
lines := strings.Split(string(buf), "\n")
var metrics []telegraf.Metric
for _, line := range lines {
m, err := p.ParseLine(line)
if err != nil {
return nil, err
}
metrics = append(metrics, m)
}
return metrics, nil
}
func (p *Parser) SetDefaultTags(tags map[string]string) {
//needs implementation
}
func (p *Parser) addCustomPatterns(scanner *bufio.Scanner) {

View File

@ -4,79 +4,18 @@ import (
"testing"
"time"
"github.com/influxdata/telegraf"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var benchM telegraf.Metric
func Benchmark_ParseLine_CommonLogFormat(b *testing.B) {
p := &Parser{
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
func TestGrokParse(t *testing.T) {
parser := Parser{
MetricName: "t_met",
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
}
_ = p.Compile()
var m telegraf.Metric
for n := 0; n < b.N; n++ {
m, _ = p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)
}
benchM = m
}
func Benchmark_ParseLine_CombinedLogFormat(b *testing.B) {
p := &Parser{
Patterns: []string{"%{COMBINED_LOG_FORMAT}"},
}
_ = p.Compile()
var m telegraf.Metric
for n := 0; n < b.N; n++ {
m, _ = p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"`)
}
benchM = m
}
func Benchmark_ParseLine_CustomPattern(b *testing.B) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatterns: `
DURATION %{NUMBER}[nuµm]?s
RESPONSE_CODE %{NUMBER:response_code:tag}
RESPONSE_TIME %{DURATION:response_time:duration}
TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME}
`,
}
_ = p.Compile()
var m telegraf.Metric
for n := 0; n < b.N; n++ {
m, _ = p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
}
benchM = m
}
// Test a very simple parse pattern.
func TestSimpleParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TESTLOG}"},
CustomPatterns: `
TESTLOG %{NUMBER:num:int} %{WORD:client}
`,
}
assert.NoError(t, p.Compile())
m, err := p.ParseLine(`142 bot`)
parser.Compile()
_, err := parser.Parse([]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`))
assert.NoError(t, err)
require.NotNil(t, m)
assert.Equal(t,
map[string]interface{}{
"num": int64(142),
"client": "bot",
},
m.Fields())
}
// Verify that patterns with a regex lookahead fail at compile time.
@ -96,8 +35,7 @@ func TestParsePatternsWithLookahead(t *testing.T) {
func TestMeasurementName(t *testing.T) {
p := &Parser{
Measurement: "my_web_log",
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
}
assert.NoError(t, p.Compile())
@ -116,13 +54,11 @@ func TestMeasurementName(t *testing.T) {
},
m.Fields())
assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags())
assert.Equal(t, "my_web_log", m.Name())
}
func TestCLF_IPv6(t *testing.T) {
p := &Parser{
Measurement: "my_web_log",
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
}
assert.NoError(t, p.Compile())
@ -140,7 +76,6 @@ func TestCLF_IPv6(t *testing.T) {
},
m.Fields())
assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags())
assert.Equal(t, "my_web_log", m.Name())
m, err = p.ParseLine(`::1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)
require.NotNil(t, m)
@ -156,7 +91,6 @@ func TestCLF_IPv6(t *testing.T) {
},
m.Fields())
assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags())
assert.Equal(t, "my_web_log", m.Name())
}
func TestCustomInfluxdbHttpd(t *testing.T) {
@ -970,33 +904,3 @@ func TestNewlineInPatterns(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, m)
}
func TestSyslogTimestampParser(t *testing.T) {
p := &Parser{
Patterns: []string{`%{SYSLOGTIMESTAMP:timestamp:ts-syslog} value=%{NUMBER:value:int}`},
timeFunc: func() time.Time { return time.Date(2018, time.April, 1, 0, 0, 0, 0, nil) },
}
require.NoError(t, p.Compile())
m, err := p.ParseLine("Sep 25 09:01:55 value=42")
require.NoError(t, err)
require.NotNil(t, m)
require.Equal(t, 2018, m.Time().Year())
}
func TestReplaceTimestampComma(t *testing.T) {
p := &Parser{
Patterns: []string{`%{TIMESTAMP_ISO8601:timestamp:ts-"2006-01-02 15:04:05.000"} successfulMatches=%{NUMBER:value:int}`},
}
require.NoError(t, p.Compile())
m, err := p.ParseLine("2018-02-21 13:10:34,555 successfulMatches=1")
require.NoError(t, err)
require.NotNil(t, m)
require.Equal(t, 2018, m.Time().Year())
require.Equal(t, 13, m.Time().Hour())
require.Equal(t, 34, m.Time().Second())
//Convert Nanosecond to milisecond for compare
require.Equal(t, 555, m.Time().Nanosecond()/1000000)
}

BIN
plugins/parsers/grok/testdata/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -8,6 +8,7 @@ import (
"github.com/influxdata/telegraf/plugins/parsers/collectd"
"github.com/influxdata/telegraf/plugins/parsers/dropwizard"
"github.com/influxdata/telegraf/plugins/parsers/graphite"
"github.com/influxdata/telegraf/plugins/parsers/grok"
"github.com/influxdata/telegraf/plugins/parsers/influx"
"github.com/influxdata/telegraf/plugins/parsers/json"
"github.com/influxdata/telegraf/plugins/parsers/nagios"
@ -87,6 +88,13 @@ type Config struct {
// an optional map containing tag names as keys and json paths to retrieve the tag values from as values
// used if TagsPath is empty or doesn't return any tags
DropwizardTagPathsMap map[string]string
//grok patterns
Patterns []string
NamedPatterns []string
CustomPatterns string
CustomPatternFiles []string
TimeZone string
}
// NewParser returns a Parser interface based on the given config.
@ -120,12 +128,38 @@ func NewParser(config *Config) (Parser, error) {
config.DefaultTags,
config.Separator,
config.Templates)
case "grok":
parser, err = newGrokParser(
config.MetricName,
config.Patterns,
config.NamedPatterns,
config.CustomPatterns,
config.CustomPatternFiles,
config.TimeZone)
default:
err = fmt.Errorf("Invalid data format: %s", config.DataFormat)
}
return parser, err
}
func newGrokParser(metricName string,
patterns []string,
nPatterns []string,
cPatterns string,
cPatternFiles []string, tZone string) (Parser, error) {
parser := grok.Parser{
MetricName: metricName,
Patterns: patterns,
NamedPatterns: nPatterns,
CustomPatterns: cPatterns,
CustomPatternFiles: cPatternFiles,
Timezone: tZone,
}
err := parser.Compile()
return &parser, err
}
func NewJSONParser(
metricName string,
tagKeys []string,