Begin implementing generic timestamp logparser capability
This commit is contained in:
parent
8b23230a55
commit
60aeb8f88c
|
@ -69,6 +69,7 @@ Timestamp modifiers can be used to convert captures to the timestamp of the
|
||||||
- tag (converts the field into a tag)
|
- tag (converts the field into a tag)
|
||||||
- drop (drops the field completely)
|
- drop (drops the field completely)
|
||||||
- Timestamp modifiers:
|
- Timestamp modifiers:
|
||||||
|
- ts (This will auto-learn the timestamp format)
|
||||||
- ts-ansic ("Mon Jan _2 15:04:05 2006")
|
- ts-ansic ("Mon Jan _2 15:04:05 2006")
|
||||||
- ts-unix ("Mon Jan _2 15:04:05 MST 2006")
|
- ts-unix ("Mon Jan _2 15:04:05 MST 2006")
|
||||||
- ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
|
- ts-ruby ("Mon Jan 02 15:04:05 -0700 2006")
|
||||||
|
|
|
@ -15,7 +15,7 @@ import (
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
)
|
)
|
||||||
|
|
||||||
var timeFormats = map[string]string{
|
var timeLayouts = map[string]string{
|
||||||
"ts-ansic": "Mon Jan _2 15:04:05 2006",
|
"ts-ansic": "Mon Jan _2 15:04:05 2006",
|
||||||
"ts-unix": "Mon Jan _2 15:04:05 MST 2006",
|
"ts-unix": "Mon Jan _2 15:04:05 MST 2006",
|
||||||
"ts-ruby": "Mon Jan 02 15:04:05 -0700 2006",
|
"ts-ruby": "Mon Jan 02 15:04:05 -0700 2006",
|
||||||
|
@ -27,27 +27,33 @@ var timeFormats = map[string]string{
|
||||||
"ts-rfc3339": "2006-01-02T15:04:05Z07:00",
|
"ts-rfc3339": "2006-01-02T15:04:05Z07:00",
|
||||||
"ts-rfc3339nano": "2006-01-02T15:04:05.999999999Z07:00",
|
"ts-rfc3339nano": "2006-01-02T15:04:05.999999999Z07:00",
|
||||||
"ts-httpd": "02/Jan/2006:15:04:05 -0700",
|
"ts-httpd": "02/Jan/2006:15:04:05 -0700",
|
||||||
"ts-epoch": "EPOCH",
|
// These three are not exactly "layouts", but they are special cases that
|
||||||
"ts-epochnano": "EPOCH_NANO",
|
// will get handled in the ParseLine function.
|
||||||
|
"ts-epoch": "EPOCH",
|
||||||
|
"ts-epochnano": "EPOCH_NANO",
|
||||||
|
"ts": "GENERIC_TIMESTAMP", // try parsing all known timestamp layouts.
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
INT = "int"
|
INT = "int"
|
||||||
TAG = "tag"
|
TAG = "tag"
|
||||||
FLOAT = "float"
|
FLOAT = "float"
|
||||||
STRING = "string"
|
STRING = "string"
|
||||||
DURATION = "duration"
|
DURATION = "duration"
|
||||||
DROP = "drop"
|
DROP = "drop"
|
||||||
|
EPOCH = "EPOCH"
|
||||||
|
EPOCH_NANO = "EPOCH_NANO"
|
||||||
|
GENERIC_TIMESTAMP = "GENERIC_TIMESTAMP"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// matches named captures that contain a type.
|
// matches named captures that contain a modifier.
|
||||||
// ie,
|
// ie,
|
||||||
// %{NUMBER:bytes:int}
|
// %{NUMBER:bytes:int}
|
||||||
// %{IPORHOST:clientip:tag}
|
// %{IPORHOST:clientip:tag}
|
||||||
// %{HTTPDATE:ts1:ts-http}
|
// %{HTTPDATE:ts1:ts-http}
|
||||||
// %{HTTPDATE:ts2:ts-"02 Jan 06 15:04"}
|
// %{HTTPDATE:ts2:ts-"02 Jan 06 15:04"}
|
||||||
typedRe = regexp.MustCompile(`%{\w+:(\w+):(ts-".+"|t?s?-?\w+)}`)
|
modifierRe = regexp.MustCompile(`%{\w+:(\w+):(ts-".+"|t?s?-?\w+)}`)
|
||||||
// matches a plain pattern name. ie, %{NUMBER}
|
// matches a plain pattern name. ie, %{NUMBER}
|
||||||
patternOnlyRe = regexp.MustCompile(`%{(\w+)}`)
|
patternOnlyRe = regexp.MustCompile(`%{(\w+)}`)
|
||||||
)
|
)
|
||||||
|
@ -87,6 +93,12 @@ type Parser struct {
|
||||||
// "RESPONSE_CODE": "%{NUMBER:rc:tag}"
|
// "RESPONSE_CODE": "%{NUMBER:rc:tag}"
|
||||||
// }
|
// }
|
||||||
patterns map[string]string
|
patterns map[string]string
|
||||||
|
// foundTsLayouts is a slice of timestamp patterns that have been found
|
||||||
|
// in the log lines. This slice gets updated if the user uses the generic
|
||||||
|
// 'ts' modifier for timestamps. This slice is checked first for matches,
|
||||||
|
// so that previously-matched layouts get priority over all other timestamp
|
||||||
|
// layouts.
|
||||||
|
foundTsLayouts []string
|
||||||
|
|
||||||
g *grok.Grok
|
g *grok.Grok
|
||||||
tsModder *tsModder
|
tsModder *tsModder
|
||||||
|
@ -140,6 +152,7 @@ func (p *Parser) Compile() error {
|
||||||
|
|
||||||
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
||||||
var err error
|
var err error
|
||||||
|
// values are the parsed fields from the log line
|
||||||
var values map[string]string
|
var values map[string]string
|
||||||
// the matching pattern string
|
// the matching pattern string
|
||||||
var patternName string
|
var patternName string
|
||||||
|
@ -165,6 +178,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// t is the modifier of the field
|
||||||
var t string
|
var t string
|
||||||
// check if pattern has some modifiers
|
// check if pattern has some modifiers
|
||||||
if types, ok := p.typeMap[patternName]; ok {
|
if types, ok := p.typeMap[patternName]; ok {
|
||||||
|
@ -210,20 +224,50 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
||||||
tags[k] = v
|
tags[k] = v
|
||||||
case STRING:
|
case STRING:
|
||||||
fields[k] = strings.Trim(v, `"`)
|
fields[k] = strings.Trim(v, `"`)
|
||||||
case "EPOCH":
|
case EPOCH:
|
||||||
iv, err := strconv.ParseInt(v, 10, 64)
|
iv, err := strconv.ParseInt(v, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("ERROR parsing %s to int: %s", v, err)
|
log.Printf("ERROR parsing %s to int: %s", v, err)
|
||||||
} else {
|
} else {
|
||||||
timestamp = time.Unix(iv, 0)
|
timestamp = time.Unix(iv, 0)
|
||||||
}
|
}
|
||||||
case "EPOCH_NANO":
|
case EPOCH_NANO:
|
||||||
iv, err := strconv.ParseInt(v, 10, 64)
|
iv, err := strconv.ParseInt(v, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("ERROR parsing %s to int: %s", v, err)
|
log.Printf("ERROR parsing %s to int: %s", v, err)
|
||||||
} else {
|
} else {
|
||||||
timestamp = time.Unix(0, iv)
|
timestamp = time.Unix(0, iv)
|
||||||
}
|
}
|
||||||
|
case GENERIC_TIMESTAMP:
|
||||||
|
var foundTs bool
|
||||||
|
// first try timestamp layouts that we've already found
|
||||||
|
for _, layout := range p.foundTsLayouts {
|
||||||
|
ts, err := time.Parse(layout, v)
|
||||||
|
if err == nil {
|
||||||
|
timestamp = ts
|
||||||
|
foundTs = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if we haven't found a timestamp layout yet, try all timestamp
|
||||||
|
// layouts.
|
||||||
|
if !foundTs {
|
||||||
|
for _, layout := range timeLayouts {
|
||||||
|
ts, err := time.Parse(layout, v)
|
||||||
|
if err == nil {
|
||||||
|
timestamp = ts
|
||||||
|
foundTs = true
|
||||||
|
p.foundTsLayouts = append(p.foundTsLayouts, layout)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if we still haven't found a timestamp layout, log it and we will
|
||||||
|
// just use time.Now()
|
||||||
|
if !foundTs {
|
||||||
|
log.Printf("ERROR parsing timestamp [%s], could not find any "+
|
||||||
|
"suitable time layouts.", v)
|
||||||
|
}
|
||||||
case DROP:
|
case DROP:
|
||||||
// goodbye!
|
// goodbye!
|
||||||
default:
|
default:
|
||||||
|
@ -267,7 +311,7 @@ func (p *Parser) compileCustomPatterns() error {
|
||||||
|
|
||||||
// check if pattern contains modifiers. Parse them out if it does.
|
// check if pattern contains modifiers. Parse them out if it does.
|
||||||
for name, pattern := range p.patterns {
|
for name, pattern := range p.patterns {
|
||||||
if typedRe.MatchString(pattern) {
|
if modifierRe.MatchString(pattern) {
|
||||||
// this pattern has modifiers, so parse out the modifiers
|
// this pattern has modifiers, so parse out the modifiers
|
||||||
pattern, err = p.parseTypedCaptures(name, pattern)
|
pattern, err = p.parseTypedCaptures(name, pattern)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -280,13 +324,13 @@ func (p *Parser) compileCustomPatterns() error {
|
||||||
return p.g.AddPatternsFromMap(p.patterns)
|
return p.g.AddPatternsFromMap(p.patterns)
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseTypedCaptures parses the capture types, and then deletes the type from
|
// parseTypedCaptures parses the capture modifiers, and then deletes the
|
||||||
// the line so that it is a valid "grok" pattern again.
|
// modifier from the line so that it is a valid "grok" pattern again.
|
||||||
// ie,
|
// ie,
|
||||||
// %{NUMBER:bytes:int} => %{NUMBER:bytes} (stores %{NUMBER}->bytes->int)
|
// %{NUMBER:bytes:int} => %{NUMBER:bytes} (stores %{NUMBER}->bytes->int)
|
||||||
// %{IPORHOST:clientip:tag} => %{IPORHOST:clientip} (stores %{IPORHOST}->clientip->tag)
|
// %{IPORHOST:clientip:tag} => %{IPORHOST:clientip} (stores %{IPORHOST}->clientip->tag)
|
||||||
func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) {
|
func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) {
|
||||||
matches := typedRe.FindAllStringSubmatch(pattern, -1)
|
matches := modifierRe.FindAllStringSubmatch(pattern, -1)
|
||||||
|
|
||||||
// grab the name of the capture pattern
|
// grab the name of the capture pattern
|
||||||
patternName := "%{" + name + "}"
|
patternName := "%{" + name + "}"
|
||||||
|
@ -298,16 +342,18 @@ func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) {
|
||||||
hasTimestamp := false
|
hasTimestamp := false
|
||||||
for _, match := range matches {
|
for _, match := range matches {
|
||||||
// regex capture 1 is the name of the capture
|
// regex capture 1 is the name of the capture
|
||||||
// regex capture 2 is the type of the capture
|
// regex capture 2 is the modifier of the capture
|
||||||
if strings.HasPrefix(match[2], "ts-") {
|
if strings.HasPrefix(match[2], "ts") {
|
||||||
if hasTimestamp {
|
if hasTimestamp {
|
||||||
return pattern, fmt.Errorf("logparser pattern compile error: "+
|
return pattern, fmt.Errorf("logparser pattern compile error: "+
|
||||||
"Each pattern is allowed only one named "+
|
"Each pattern is allowed only one named "+
|
||||||
"timestamp data type. pattern: %s", pattern)
|
"timestamp data type. pattern: %s", pattern)
|
||||||
}
|
}
|
||||||
if f, ok := timeFormats[match[2]]; ok {
|
if layout, ok := timeLayouts[match[2]]; ok {
|
||||||
p.tsMap[patternName][match[1]] = f
|
// built-in time format
|
||||||
|
p.tsMap[patternName][match[1]] = layout
|
||||||
} else {
|
} else {
|
||||||
|
// custom time format
|
||||||
p.tsMap[patternName][match[1]] = strings.TrimSuffix(strings.TrimPrefix(match[2], `ts-"`), `"`)
|
p.tsMap[patternName][match[1]] = strings.TrimSuffix(strings.TrimPrefix(match[2], `ts-"`), `"`)
|
||||||
}
|
}
|
||||||
hasTimestamp = true
|
hasTimestamp = true
|
||||||
|
|
|
@ -333,6 +333,55 @@ func TestParseEpochErrors(t *testing.T) {
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseGenericTimestamp(t *testing.T) {
|
||||||
|
p := &Parser{
|
||||||
|
Patterns: []string{`\[%{HTTPDATE:ts:ts}\] response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}`},
|
||||||
|
}
|
||||||
|
assert.NoError(t, p.Compile())
|
||||||
|
|
||||||
|
metricA, err := p.ParseLine(`[09/Jun/2016:03:37:03 +0000] response_time=20821 mymetric=10890.645`)
|
||||||
|
require.NotNil(t, metricA)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t,
|
||||||
|
map[string]interface{}{
|
||||||
|
"response_time": int64(20821),
|
||||||
|
"metric": float64(10890.645),
|
||||||
|
},
|
||||||
|
metricA.Fields())
|
||||||
|
assert.Equal(t, map[string]string{}, metricA.Tags())
|
||||||
|
assert.Equal(t, time.Unix(1465443423, 0).UTC(), metricA.Time().UTC())
|
||||||
|
|
||||||
|
metricB, err := p.ParseLine(`[09/Jun/2016:03:37:04 +0000] response_time=20821 mymetric=10890.645`)
|
||||||
|
require.NotNil(t, metricB)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t,
|
||||||
|
map[string]interface{}{
|
||||||
|
"response_time": int64(20821),
|
||||||
|
"metric": float64(10890.645),
|
||||||
|
},
|
||||||
|
metricB.Fields())
|
||||||
|
assert.Equal(t, map[string]string{}, metricB.Tags())
|
||||||
|
assert.Equal(t, time.Unix(1465443424, 0).UTC(), metricB.Time().UTC())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseGenericTimestampNotFound(t *testing.T) {
|
||||||
|
p := &Parser{
|
||||||
|
Patterns: []string{`\[%{NOTSPACE:ts:ts}\] response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}`},
|
||||||
|
}
|
||||||
|
assert.NoError(t, p.Compile())
|
||||||
|
|
||||||
|
metricA, err := p.ParseLine(`[foobar] response_time=20821 mymetric=10890.645`)
|
||||||
|
require.NotNil(t, metricA)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t,
|
||||||
|
map[string]interface{}{
|
||||||
|
"response_time": int64(20821),
|
||||||
|
"metric": float64(10890.645),
|
||||||
|
},
|
||||||
|
metricA.Fields())
|
||||||
|
assert.Equal(t, map[string]string{}, metricA.Tags())
|
||||||
|
}
|
||||||
|
|
||||||
func TestCompileFileAndParse(t *testing.T) {
|
func TestCompileFileAndParse(t *testing.T) {
|
||||||
p := &Parser{
|
p := &Parser{
|
||||||
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
|
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
|
||||||
|
|
Loading…
Reference in New Issue