logparser input plugin

closes #102
closes #328
This commit is contained in:
Cameron Sparr
2016-06-02 18:47:15 +01:00
parent d50a1e83ac
commit cb3c54a1ae
21 changed files with 1713 additions and 123 deletions

View File

@@ -0,0 +1,373 @@
package grok
import (
"bufio"
"fmt"
"log"
"os"
"regexp"
"strconv"
"strings"
"time"
"github.com/vjeantet/grok"
"github.com/influxdata/telegraf"
)
var timeFormats = map[string]string{
"ts-ansic": "Mon Jan _2 15:04:05 2006",
"ts-unix": "Mon Jan _2 15:04:05 MST 2006",
"ts-ruby": "Mon Jan 02 15:04:05 -0700 2006",
"ts-rfc822": "02 Jan 06 15:04 MST",
"ts-rfc822z": "02 Jan 06 15:04 -0700", // RFC822 with numeric zone
"ts-rfc850": "Monday, 02-Jan-06 15:04:05 MST",
"ts-rfc1123": "Mon, 02 Jan 2006 15:04:05 MST",
"ts-rfc1123z": "Mon, 02 Jan 2006 15:04:05 -0700", // RFC1123 with numeric zone
"ts-rfc3339": "2006-01-02T15:04:05Z07:00",
"ts-rfc3339nano": "2006-01-02T15:04:05.999999999Z07:00",
"ts-httpd": "02/Jan/2006:15:04:05 -0700",
"ts-epoch": "EPOCH",
"ts-epochnano": "EPOCH_NANO",
}
const (
INT = "int"
TAG = "tag"
FLOAT = "float"
STRING = "string"
DURATION = "duration"
DROP = "drop"
)
var (
// matches named captures that contain a type.
// ie,
// %{NUMBER:bytes:int}
// %{IPORHOST:clientip:tag}
// %{HTTPDATE:ts1:ts-http}
// %{HTTPDATE:ts2:ts-"02 Jan 06 15:04"}
typedRe = regexp.MustCompile(`%{\w+:(\w+):(ts-".+"|t?s?-?\w+)}`)
// matches a plain pattern name. ie, %{NUMBER}
patternOnlyRe = regexp.MustCompile(`%{(\w+)}`)
)
type Parser struct {
Patterns []string
CustomPatterns string
CustomPatternFiles []string
// typeMap is a map of patterns -> capture name -> modifier,
// ie, {
// "%{TESTLOG}":
// {
// "bytes": "int",
// "clientip": "tag"
// }
// }
typeMap map[string]map[string]string
// tsMap is a map of patterns -> capture name -> timestamp layout.
// ie, {
// "%{TESTLOG}":
// {
// "httptime": "02/Jan/2006:15:04:05 -0700"
// }
// }
tsMap map[string]map[string]string
// patterns is a map of all of the parsed patterns from CustomPatterns
// and CustomPatternFiles.
// ie, {
// "DURATION": "%{NUMBER}[nuµm]?s"
// "RESPONSE_CODE": "%{NUMBER:rc:tag}"
// }
patterns map[string]string
g *grok.Grok
tsModder *tsModder
}
func (p *Parser) Compile() error {
p.typeMap = make(map[string]map[string]string)
p.tsMap = make(map[string]map[string]string)
p.patterns = make(map[string]string)
p.tsModder = &tsModder{}
var err error
p.g, err = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true})
if err != nil {
return err
}
p.CustomPatterns = DEFAULT_PATTERNS + p.CustomPatterns
if len(p.CustomPatterns) != 0 {
scanner := bufio.NewScanner(strings.NewReader(p.CustomPatterns))
p.addCustomPatterns(scanner)
}
for _, filename := range p.CustomPatternFiles {
file, err := os.Open(filename)
if err != nil {
return err
}
scanner := bufio.NewScanner(bufio.NewReader(file))
p.addCustomPatterns(scanner)
}
return p.compileCustomPatterns()
}
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
var err error
var values map[string]string
// the matching pattern string
var patternName string
for _, pattern := range p.Patterns {
if values, err = p.g.Parse(pattern, line); err != nil {
return nil, err
}
if len(values) != 0 {
patternName = pattern
break
}
}
if len(values) == 0 {
return nil, nil
}
fields := make(map[string]interface{})
tags := make(map[string]string)
timestamp := time.Now()
for k, v := range values {
if k == "" || v == "" {
continue
}
var t string
// check if pattern has some modifiers
if types, ok := p.typeMap[patternName]; ok {
t = types[k]
}
// if we didn't find a modifier, check if we have a timestamp layout
if t == "" {
if ts, ok := p.tsMap[patternName]; ok {
// check if the modifier is a timestamp layout
if layout, ok := ts[k]; ok {
t = layout
}
}
}
// if we didn't find a type OR timestamp modifier, assume string
if t == "" {
t = STRING
}
switch t {
case INT:
iv, err := strconv.ParseInt(v, 10, 64)
if err != nil {
log.Printf("ERROR parsing %s to int: %s", v, err)
} else {
fields[k] = iv
}
case FLOAT:
fv, err := strconv.ParseFloat(v, 64)
if err != nil {
log.Printf("ERROR parsing %s to float: %s", v, err)
} else {
fields[k] = fv
}
case DURATION:
d, err := time.ParseDuration(v)
if err != nil {
log.Printf("ERROR parsing %s to duration: %s", v, err)
} else {
fields[k] = int64(d)
}
case TAG:
tags[k] = v
case STRING:
fields[k] = strings.Trim(v, `"`)
case "EPOCH":
iv, err := strconv.ParseInt(v, 10, 64)
if err != nil {
log.Printf("ERROR parsing %s to int: %s", v, err)
} else {
timestamp = time.Unix(iv, 0)
}
case "EPOCH_NANO":
iv, err := strconv.ParseInt(v, 10, 64)
if err != nil {
log.Printf("ERROR parsing %s to int: %s", v, err)
} else {
timestamp = time.Unix(0, iv)
}
case DROP:
// goodbye!
default:
ts, err := time.Parse(t, v)
if err == nil {
timestamp = ts
} else {
log.Printf("ERROR parsing %s to time layout [%s]: %s", v, t, err)
}
}
}
return telegraf.NewMetric("logparser_grok", tags, fields, p.tsModder.tsMod(timestamp))
}
func (p *Parser) addCustomPatterns(scanner *bufio.Scanner) {
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if len(line) > 0 && line[0] != '#' {
names := strings.SplitN(line, " ", 2)
p.patterns[names[0]] = names[1]
}
}
}
func (p *Parser) compileCustomPatterns() error {
var err error
// check if the pattern contains a subpattern that is already defined
// replace it with the subpattern for modifier inheritance.
for i := 0; i < 2; i++ {
for name, pattern := range p.patterns {
subNames := patternOnlyRe.FindAllStringSubmatch(pattern, -1)
for _, subName := range subNames {
if subPattern, ok := p.patterns[subName[1]]; ok {
pattern = strings.Replace(pattern, subName[0], subPattern, 1)
}
}
p.patterns[name] = pattern
}
}
// check if pattern contains modifiers. Parse them out if it does.
for name, pattern := range p.patterns {
if typedRe.MatchString(pattern) {
// this pattern has modifiers, so parse out the modifiers
pattern, err = p.parseTypedCaptures(name, pattern)
if err != nil {
return err
}
p.patterns[name] = pattern
}
}
return p.g.AddPatternsFromMap(p.patterns)
}
// parseTypedCaptures parses the capture types, and then deletes the type from
// the line so that it is a valid "grok" pattern again.
// ie,
// %{NUMBER:bytes:int} => %{NUMBER:bytes} (stores %{NUMBER}->bytes->int)
// %{IPORHOST:clientip:tag} => %{IPORHOST:clientip} (stores %{IPORHOST}->clientip->tag)
func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) {
matches := typedRe.FindAllStringSubmatch(pattern, -1)
// grab the name of the capture pattern
patternName := "%{" + name + "}"
// create type map for this pattern
p.typeMap[patternName] = make(map[string]string)
p.tsMap[patternName] = make(map[string]string)
// boolean to verify that each pattern only has a single ts- data type.
hasTimestamp := false
for _, match := range matches {
// regex capture 1 is the name of the capture
// regex capture 2 is the type of the capture
if strings.HasPrefix(match[2], "ts-") {
if hasTimestamp {
return pattern, fmt.Errorf("logparser pattern compile error: "+
"Each pattern is allowed only one named "+
"timestamp data type. pattern: %s", pattern)
}
if f, ok := timeFormats[match[2]]; ok {
p.tsMap[patternName][match[1]] = f
} else {
p.tsMap[patternName][match[1]] = strings.TrimSuffix(strings.TrimPrefix(match[2], `ts-"`), `"`)
}
hasTimestamp = true
} else {
p.typeMap[patternName][match[1]] = match[2]
}
// the modifier is not a valid part of a "grok" pattern, so remove it
// from the pattern.
pattern = strings.Replace(pattern, ":"+match[2]+"}", "}", 1)
}
return pattern, nil
}
// tsModder is a struct for incrementing identical timestamps of log lines
// so that we don't push identical metrics that will get overwritten.
type tsModder struct {
dupe time.Time
last time.Time
incr time.Duration
incrn time.Duration
rollover time.Duration
}
// tsMod increments the given timestamp one unit more from the previous
// duplicate timestamp.
// the increment unit is determined as the next smallest time unit below the
// most significant time unit of ts.
// ie, if the input is at ms precision, it will increment it 1µs.
func (t *tsModder) tsMod(ts time.Time) time.Time {
defer func() { t.last = ts }()
// don't mod the time if we don't need to
if t.last.IsZero() || ts.IsZero() {
t.incrn = 0
t.rollover = 0
return ts
}
if !ts.Equal(t.last) && !ts.Equal(t.dupe) {
t.incr = 0
t.incrn = 0
t.rollover = 0
return ts
}
if ts.Equal(t.last) {
t.dupe = ts
}
if ts.Equal(t.dupe) && t.incr == time.Duration(0) {
tsNano := ts.UnixNano()
d := int64(10)
counter := 1
for {
a := tsNano % d
if a > 0 {
break
}
d = d * 10
counter++
}
switch {
case counter <= 6:
t.incr = time.Nanosecond
case counter <= 9:
t.incr = time.Microsecond
case counter > 9:
t.incr = time.Millisecond
}
}
t.incrn++
if t.incrn == 999 && t.incr > time.Nanosecond {
t.rollover = t.incr * t.incrn
t.incrn = 1
t.incr = t.incr / 1000
if t.incr < time.Nanosecond {
t.incr = time.Nanosecond
}
}
return ts.Add(t.incr*t.incrn + t.rollover)
}

View File

@@ -0,0 +1,508 @@
package grok
import (
"testing"
"time"
"github.com/influxdata/telegraf"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var benchM telegraf.Metric
func Benchmark_ParseLine_CommonLogFormat(b *testing.B) {
p := &Parser{
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
}
p.Compile()
var m telegraf.Metric
for n := 0; n < b.N; n++ {
m, _ = p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)
}
benchM = m
}
func Benchmark_ParseLine_CombinedLogFormat(b *testing.B) {
p := &Parser{
Patterns: []string{"%{COMBINED_LOG_FORMAT}"},
}
p.Compile()
var m telegraf.Metric
for n := 0; n < b.N; n++ {
m, _ = p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"`)
}
benchM = m
}
func Benchmark_ParseLine_InfluxLog(b *testing.B) {
p := &Parser{
Patterns: []string{"%{INFLUXDB_HTTPD_LOG}"},
}
p.Compile()
var m telegraf.Metric
for n := 0; n < b.N; n++ {
m, _ = p.ParseLine(`[httpd] 192.168.1.1 - - [14/Jun/2016:11:33:29 +0100] "POST /write?consistency=any&db=telegraf&precision=ns&rp= HTTP/1.1" 204 0 "-" "InfluxDBClient" 6f61bc44-321b-11e6-8050-000000000000 2513`)
}
benchM = m
}
func Benchmark_ParseLine_InfluxLog_NoMatch(b *testing.B) {
p := &Parser{
Patterns: []string{"%{INFLUXDB_HTTPD_LOG}"},
}
p.Compile()
var m telegraf.Metric
for n := 0; n < b.N; n++ {
m, _ = p.ParseLine(`[retention] 2016/06/14 14:38:24 retention policy shard deletion check commencing`)
}
benchM = m
}
func Benchmark_ParseLine_CustomPattern(b *testing.B) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatterns: `
DURATION %{NUMBER}[nuµm]?s
RESPONSE_CODE %{NUMBER:response_code:tag}
RESPONSE_TIME %{DURATION:response_time:duration}
TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME}
`,
}
p.Compile()
var m telegraf.Metric
for n := 0; n < b.N; n++ {
m, _ = p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
}
benchM = m
}
func TestBuiltinInfluxdbHttpd(t *testing.T) {
p := &Parser{
Patterns: []string{"%{INFLUXDB_HTTPD_LOG}"},
}
assert.NoError(t, p.Compile())
// Parse an influxdb POST request
m, err := p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:11:33:29 +0100] "POST /write?consistency=any&db=telegraf&precision=ns&rp= HTTP/1.1" 204 0 "-" "InfluxDBClient" 6f61bc44-321b-11e6-8050-000000000000 2513`)
require.NotNil(t, m)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"resp_bytes": int64(0),
"auth": "-",
"client_ip": "::1",
"resp_code": int64(204),
"http_version": float64(1.1),
"ident": "-",
"referrer": "-",
"request": "/write?consistency=any&db=telegraf&precision=ns&rp=",
"response_time_us": int64(2513),
"agent": "InfluxDBClient",
},
m.Fields())
assert.Equal(t, map[string]string{"verb": "POST"}, m.Tags())
// Parse an influxdb GET request
m, err = p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:12:10:02 +0100] "GET /query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h HTTP/1.1" 200 578 "http://localhost:8083/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36" 8a3806f1-3220-11e6-8006-000000000000 988`)
require.NotNil(t, m)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"resp_bytes": int64(578),
"auth": "-",
"client_ip": "::1",
"resp_code": int64(200),
"http_version": float64(1.1),
"ident": "-",
"referrer": "http://localhost:8083/",
"request": "/query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h",
"response_time_us": int64(988),
"agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36",
},
m.Fields())
assert.Equal(t, map[string]string{"verb": "GET"}, m.Tags())
}
// common log format
// 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326
func TestBuiltinCommonLogFormat(t *testing.T) {
p := &Parser{
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
}
assert.NoError(t, p.Compile())
// Parse an influxdb POST request
m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)
require.NotNil(t, m)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"resp_bytes": int64(2326),
"auth": "frank",
"client_ip": "127.0.0.1",
"resp_code": int64(200),
"http_version": float64(1.0),
"ident": "user-identifier",
"request": "/apache_pb.gif",
},
m.Fields())
assert.Equal(t, map[string]string{"verb": "GET"}, m.Tags())
}
// combined log format
// 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"
func TestBuiltinCombinedLogFormat(t *testing.T) {
p := &Parser{
Patterns: []string{"%{COMBINED_LOG_FORMAT}"},
}
assert.NoError(t, p.Compile())
// Parse an influxdb POST request
m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"`)
require.NotNil(t, m)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"resp_bytes": int64(2326),
"auth": "frank",
"client_ip": "127.0.0.1",
"resp_code": int64(200),
"http_version": float64(1.0),
"ident": "user-identifier",
"request": "/apache_pb.gif",
"referrer": "-",
"agent": "Mozilla",
},
m.Fields())
assert.Equal(t, map[string]string{"verb": "GET"}, m.Tags())
}
func TestCompileStringAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatterns: `
DURATION %{NUMBER}[nuµm]?s
RESPONSE_CODE %{NUMBER:response_code:tag}
RESPONSE_TIME %{DURATION:response_time:duration}
TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME}
`,
}
assert.NoError(t, p.Compile())
metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
}
func TestParseEpochNano(t *testing.T) {
p := &Parser{
Patterns: []string{"%{MYAPP}"},
CustomPatterns: `
MYAPP %{POSINT:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}
`,
}
assert.NoError(t, p.Compile())
metricA, err := p.ParseLine(`1466004605359052000 response_time=20821 mymetric=10890.645`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"response_time": int64(20821),
"metric": float64(10890.645),
},
metricA.Fields())
assert.Equal(t, map[string]string{}, metricA.Tags())
assert.Equal(t, time.Unix(0, 1466004605359052000), metricA.Time())
}
func TestParseEpoch(t *testing.T) {
p := &Parser{
Patterns: []string{"%{MYAPP}"},
CustomPatterns: `
MYAPP %{POSINT:ts:ts-epoch} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}
`,
}
assert.NoError(t, p.Compile())
metricA, err := p.ParseLine(`1466004605 response_time=20821 mymetric=10890.645`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"response_time": int64(20821),
"metric": float64(10890.645),
},
metricA.Fields())
assert.Equal(t, map[string]string{}, metricA.Tags())
assert.Equal(t, time.Unix(1466004605, 0), metricA.Time())
}
func TestParseEpochErrors(t *testing.T) {
p := &Parser{
Patterns: []string{"%{MYAPP}"},
CustomPatterns: `
MYAPP %{WORD:ts:ts-epoch} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}
`,
}
assert.NoError(t, p.Compile())
_, err := p.ParseLine(`foobar response_time=20821 mymetric=10890.645`)
assert.NoError(t, err)
p = &Parser{
Patterns: []string{"%{MYAPP}"},
CustomPatterns: `
MYAPP %{WORD:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}
`,
}
assert.NoError(t, p.Compile())
_, err = p.ParseLine(`foobar response_time=20821 mymetric=10890.645`)
assert.NoError(t, err)
}
func TestCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
}
assert.NoError(t, p.Compile())
metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t,
time.Date(2016, time.June, 4, 12, 41, 45, 0, time.FixedZone("foo", 60*60)).Nanosecond(),
metricA.Time().Nanosecond())
metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t,
time.Date(2016, time.June, 4, 12, 41, 45, 0, time.FixedZone("foo", 60*60)).Nanosecond(),
metricB.Time().Nanosecond())
}
func TestCompileNoModifiersAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_C}"},
CustomPatterns: `
DURATION %{NUMBER}[nuµm]?s
TEST_LOG_C %{NUMBER:myfloat} %{NUMBER} %{IPORHOST:clientip} %{DURATION:rt}
`,
}
assert.NoError(t, p.Compile())
metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": "1.25",
"rt": "5.432µs",
},
metricA.Fields())
assert.Equal(t, map[string]string{}, metricA.Tags())
}
func TestCompileNoNamesAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_C}"},
CustomPatterns: `
DURATION %{NUMBER}[nuµm]?s
TEST_LOG_C %{NUMBER} %{NUMBER} %{IPORHOST} %{DURATION}
`,
}
assert.NoError(t, p.Compile())
metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`)
require.Nil(t, metricA)
assert.NoError(t, err)
}
func TestParseNoMatch(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
}
assert.NoError(t, p.Compile())
metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`)
assert.NoError(t, err)
assert.Nil(t, metricA)
}
func TestCompileErrors(t *testing.T) {
// Compile fails because there are multiple timestamps:
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatterns: `
TEST_LOG_A %{HTTPDATE:ts1:ts-httpd} %{HTTPDATE:ts2:ts-httpd} %{NUMBER:mynum:int}
`,
}
assert.Error(t, p.Compile())
// Compile fails because file doesn't exist:
p = &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"/tmp/foo/bar/baz"},
}
assert.Error(t, p.Compile())
}
func TestParseErrors(t *testing.T) {
// Parse fails because the pattern doesn't exist
p := &Parser{
Patterns: []string{"%{TEST_LOG_B}"},
CustomPatterns: `
TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int} %{}
`,
}
assert.NoError(t, p.Compile())
_, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`)
assert.Error(t, err)
// Parse fails because myword is not an int
p = &Parser{
Patterns: []string{"%{TEST_LOG_A}"},
CustomPatterns: `
TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int}
`,
}
assert.NoError(t, p.Compile())
_, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`)
assert.Error(t, err)
// Parse fails because myword is not a float
p = &Parser{
Patterns: []string{"%{TEST_LOG_A}"},
CustomPatterns: `
TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:float}
`,
}
assert.NoError(t, p.Compile())
_, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`)
assert.Error(t, err)
// Parse fails because myword is not a duration
p = &Parser{
Patterns: []string{"%{TEST_LOG_A}"},
CustomPatterns: `
TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:duration}
`,
}
assert.NoError(t, p.Compile())
_, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`)
assert.Error(t, err)
// Parse fails because the time layout is wrong.
p = &Parser{
Patterns: []string{"%{TEST_LOG_A}"},
CustomPatterns: `
TEST_LOG_A %{HTTPDATE:ts:ts-unix} %{WORD:myword:duration}
`,
}
assert.NoError(t, p.Compile())
_, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`)
assert.Error(t, err)
}
func TestTsModder(t *testing.T) {
tsm := &tsModder{}
reftime := time.Date(2006, time.December, 1, 1, 1, 1, int(time.Millisecond), time.UTC)
modt := tsm.tsMod(reftime)
assert.Equal(t, reftime, modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Microsecond*1), modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Microsecond*2), modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Microsecond*3), modt)
reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond), time.UTC)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime, modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Nanosecond*1), modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Nanosecond*2), modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Nanosecond*3), modt)
reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond)*999, time.UTC)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime, modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Nanosecond*1), modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Nanosecond*2), modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Nanosecond*3), modt)
reftime = time.Date(2006, time.December, 1, 1, 1, 1, 0, time.UTC)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime, modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Millisecond*1), modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Millisecond*2), modt)
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime.Add(time.Millisecond*3), modt)
reftime = time.Time{}
modt = tsm.tsMod(reftime)
assert.Equal(t, reftime, modt)
}
func TestTsModder_Rollover(t *testing.T) {
tsm := &tsModder{}
reftime := time.Date(2006, time.December, 1, 1, 1, 1, int(time.Millisecond), time.UTC)
modt := tsm.tsMod(reftime)
for i := 1; i < 1000; i++ {
modt = tsm.tsMod(reftime)
}
assert.Equal(t, reftime.Add(time.Microsecond*999+time.Nanosecond), modt)
reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond), time.UTC)
modt = tsm.tsMod(reftime)
for i := 1; i < 1001; i++ {
modt = tsm.tsMod(reftime)
}
assert.Equal(t, reftime.Add(time.Nanosecond*1000), modt)
}

View File

@@ -0,0 +1,80 @@
package grok
// THIS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns
const DEFAULT_PATTERNS = `
# Captures are a slightly modified version of logstash "grok" patterns, with
# the format %{<capture syntax>[:<semantic name>][:<modifier>]}
# By default all named captures are converted into string fields.
# Modifiers can be used to convert captures to other types or tags.
# Timestamp modifiers can be used to convert captures to the timestamp of the
# parsed metric.
# View logstash grok pattern docs here:
# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html
# All default logstash patterns are supported, these can be viewed here:
# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns
# Available modifiers:
# string (default if nothing is specified)
# int
# float
# duration (ie, 5.23ms gets converted to int nanoseconds)
# tag (converts the field into a tag)
# drop (drops the field completely)
# Timestamp modifiers:
# ts-ansic ("Mon Jan _2 15:04:05 2006")
# ts-unix ("Mon Jan _2 15:04:05 MST 2006")
# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
# ts-rfc822 ("02 Jan 06 15:04 MST")
# ts-rfc822z ("02 Jan 06 15:04 -0700")
# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST")
# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST")
# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700")
# ts-rfc3339 ("2006-01-02T15:04:05Z07:00")
# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00")
# ts-httpd ("02/Jan/2006:15:04:05 -0700")
# ts-epoch (seconds since unix epoch)
# ts-epochnano (nanoseconds since unix epoch)
# ts-"CUSTOM"
# CUSTOM time layouts must be within quotes and be the representation of the
# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006
# See https://golang.org/pkg/time/#Parse for more details.
# Example log file pattern, example log looks like this:
# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs
# Breakdown of the DURATION pattern below:
# NUMBER is a builtin logstash grok pattern matching float & int numbers.
# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets.
# s is also regex, this pattern must end in "s".
# so DURATION will match something like '5.324ms' or '6.1µs' or '10s'
DURATION %{NUMBER}[nuµm]?s
RESPONSE_CODE %{NUMBER:response_code:tag}
RESPONSE_TIME %{DURATION:response_time_ns:duration}
EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME}
# Wider-ranging username matching vs. logstash built-in %{USER}
NGUSERNAME [a-zA-Z\.\@\-\+_%]+
NGUSER %{NGUSERNAME}
##
## COMMON LOG PATTERNS
##
# InfluxDB log patterns
CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1)
INFLUXDB_HTTPD_LOG \[httpd\] %{COMBINED_LOG_FORMAT} %{UUID:uuid:drop} %{NUMBER:response_time_us:int}
# apache & nginx logs, this is also known as the "common log format"
# see https://en.wikipedia.org/wiki/Common_Log_Format
COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:int} (?:%{NUMBER:resp_bytes:int}|-)
# Combined log format is the same as the common log format but with the addition
# of two quoted strings at the end for "referrer" and "agent"
# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html
COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent}
# HTTPD log formats
HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg}
HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message}
HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG}
`

View File

@@ -0,0 +1,75 @@
# Captures are a slightly modified version of logstash "grok" patterns, with
# the format %{<capture syntax>[:<semantic name>][:<modifier>]}
# By default all named captures are converted into string fields.
# Modifiers can be used to convert captures to other types or tags.
# Timestamp modifiers can be used to convert captures to the timestamp of the
# parsed metric.
# View logstash grok pattern docs here:
# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html
# All default logstash patterns are supported, these can be viewed here:
# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns
# Available modifiers:
# string (default if nothing is specified)
# int
# float
# duration (ie, 5.23ms gets converted to int nanoseconds)
# tag (converts the field into a tag)
# drop (drops the field completely)
# Timestamp modifiers:
# ts-ansic ("Mon Jan _2 15:04:05 2006")
# ts-unix ("Mon Jan _2 15:04:05 MST 2006")
# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
# ts-rfc822 ("02 Jan 06 15:04 MST")
# ts-rfc822z ("02 Jan 06 15:04 -0700")
# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST")
# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST")
# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700")
# ts-rfc3339 ("2006-01-02T15:04:05Z07:00")
# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00")
# ts-httpd ("02/Jan/2006:15:04:05 -0700")
# ts-epoch (seconds since unix epoch)
# ts-epochnano (nanoseconds since unix epoch)
# ts-"CUSTOM"
# CUSTOM time layouts must be within quotes and be the representation of the
# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006
# See https://golang.org/pkg/time/#Parse for more details.
# Example log file pattern, example log looks like this:
# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs
# Breakdown of the DURATION pattern below:
# NUMBER is a builtin logstash grok pattern matching float & int numbers.
# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets.
# s is also regex, this pattern must end in "s".
# so DURATION will match something like '5.324ms' or '6.1µs' or '10s'
DURATION %{NUMBER}[nuµm]?s
RESPONSE_CODE %{NUMBER:response_code:tag}
RESPONSE_TIME %{DURATION:response_time_ns:duration}
EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME}
# Wider-ranging username matching vs. logstash built-in %{USER}
NGUSERNAME [a-zA-Z\.\@\-\+_%]+
NGUSER %{NGUSERNAME}
##
## COMMON LOG PATTERNS
##
# InfluxDB log patterns
CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1)
INFLUXDB_HTTPD_LOG \[httpd\] %{COMBINED_LOG_FORMAT} %{UUID:uuid:drop} %{NUMBER:response_time_us:int}
# apache & nginx logs, this is also known as the "common log format"
# see https://en.wikipedia.org/wiki/Common_Log_Format
COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:int} (?:%{NUMBER:resp_bytes:int}|-)
# Combined log format is the same as the common log format but with the addition
# of two quoted strings at the end for "referrer" and "agent"
# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html
COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent}
# HTTPD log formats
HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg}
HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message}
HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG}

View File

@@ -0,0 +1,14 @@
# Test A log line:
# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101
DURATION %{NUMBER}[nuµm]?s
RESPONSE_CODE %{NUMBER:response_code:tag}
RESPONSE_TIME %{DURATION:response_time:duration}
TEST_LOG_A \[%{HTTPDATE:timestamp:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} %{NUMBER:myint:int}
# Test B log line:
# [04/06/2016--12:41:45] 1.25 mystring dropme nomodifier
TEST_TIMESTAMP %{MONTHDAY}/%{MONTHNUM}/%{YEAR}--%{TIME}
TEST_LOG_B \[%{TEST_TIMESTAMP:timestamp:ts-"02/01/2006--15:04:05"}\] %{NUMBER:myfloat:float} %{WORD:mystring:string} %{WORD:dropme:drop} %{WORD:nomodifier}
TEST_TIMESTAMP %{MONTHDAY}/%{MONTHNUM}/%{YEAR}--%{TIME}
TEST_LOG_BAD \[%{TEST_TIMESTAMP:timestamp:ts-"02/01/2006--15:04:05"}\] %{NUMBER:myfloat:float} %{WORD:mystring:int} %{WORD:dropme:drop} %{WORD:nomodifier}

View File

@@ -0,0 +1 @@
[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101

View File

@@ -0,0 +1 @@
[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier