From cafa95e536eedd62c9182ea34c271d250e59a51e Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 6 Jul 2018 13:16:51 -0700 Subject: [PATCH] logparser no longer uses seperate grok --- plugins/inputs/logparser/grok/grok.go | 511 --------- plugins/inputs/logparser/grok/grok_test.go | 1002 ----------------- .../inputs/logparser/grok/influx_patterns.go | 78 -- plugins/parsers/registry.go | 4 +- 4 files changed, 2 insertions(+), 1593 deletions(-) delete mode 100644 plugins/inputs/logparser/grok/grok.go delete mode 100644 plugins/inputs/logparser/grok/grok_test.go delete mode 100644 plugins/inputs/logparser/grok/influx_patterns.go diff --git a/plugins/inputs/logparser/grok/grok.go b/plugins/inputs/logparser/grok/grok.go deleted file mode 100644 index 766d149fe..000000000 --- a/plugins/inputs/logparser/grok/grok.go +++ /dev/null @@ -1,511 +0,0 @@ -package grok - -import ( - "bufio" - "fmt" - "log" - "os" - "regexp" - "strconv" - "strings" - "time" - - "github.com/vjeantet/grok" - - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/metric" -) - -var timeLayouts = map[string]string{ - "ts-ansic": "Mon Jan _2 15:04:05 2006", - "ts-unix": "Mon Jan _2 15:04:05 MST 2006", - "ts-ruby": "Mon Jan 02 15:04:05 -0700 2006", - "ts-rfc822": "02 Jan 06 15:04 MST", - "ts-rfc822z": "02 Jan 06 15:04 -0700", // RFC822 with numeric zone - "ts-rfc850": "Monday, 02-Jan-06 15:04:05 MST", - "ts-rfc1123": "Mon, 02 Jan 2006 15:04:05 MST", - "ts-rfc1123z": "Mon, 02 Jan 2006 15:04:05 -0700", // RFC1123 with numeric zone - "ts-rfc3339": "2006-01-02T15:04:05Z07:00", - "ts-rfc3339nano": "2006-01-02T15:04:05.999999999Z07:00", - "ts-httpd": "02/Jan/2006:15:04:05 -0700", - // These three are not exactly "layouts", but they are special cases that - // will get handled in the ParseLine function. - "ts-epoch": "EPOCH", - "ts-epochnano": "EPOCH_NANO", - "ts-syslog": "SYSLOG_TIMESTAMP", - "ts": "GENERIC_TIMESTAMP", // try parsing all known timestamp layouts. -} - -const ( - INT = "int" - TAG = "tag" - FLOAT = "float" - STRING = "string" - DURATION = "duration" - DROP = "drop" - EPOCH = "EPOCH" - EPOCH_NANO = "EPOCH_NANO" - SYSLOG_TIMESTAMP = "SYSLOG_TIMESTAMP" - GENERIC_TIMESTAMP = "GENERIC_TIMESTAMP" -) - -var ( - // matches named captures that contain a modifier. - // ie, - // %{NUMBER:bytes:int} - // %{IPORHOST:clientip:tag} - // %{HTTPDATE:ts1:ts-http} - // %{HTTPDATE:ts2:ts-"02 Jan 06 15:04"} - modifierRe = regexp.MustCompile(`%{\w+:(\w+):(ts-".+"|t?s?-?\w+)}`) - // matches a plain pattern name. ie, %{NUMBER} - patternOnlyRe = regexp.MustCompile(`%{(\w+)}`) -) - -// Parser is the primary struct to handle and grok-patterns defined in the config toml -type Parser struct { - Patterns []string - // namedPatterns is a list of internally-assigned names to the patterns - // specified by the user in Patterns. - // They will look like: - // GROK_INTERNAL_PATTERN_0, GROK_INTERNAL_PATTERN_1, etc. - namedPatterns []string - CustomPatterns string - CustomPatternFiles []string - Measurement string - - // Timezone is an optional component to help render log dates to - // your chosen zone. - // Default: "" which renders UTC - // Options are as follows: - // 1. Local -- interpret based on machine localtime - // 2. "America/Chicago" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones - // 3. UTC -- or blank/unspecified, will return timestamp in UTC - Timezone string - loc *time.Location - - // typeMap is a map of patterns -> capture name -> modifier, - // ie, { - // "%{TESTLOG}": - // { - // "bytes": "int", - // "clientip": "tag" - // } - // } - typeMap map[string]map[string]string - // tsMap is a map of patterns -> capture name -> timestamp layout. - // ie, { - // "%{TESTLOG}": - // { - // "httptime": "02/Jan/2006:15:04:05 -0700" - // } - // } - tsMap map[string]map[string]string - // patterns is a map of all of the parsed patterns from CustomPatterns - // and CustomPatternFiles. - // ie, { - // "DURATION": "%{NUMBER}[nuµm]?s" - // "RESPONSE_CODE": "%{NUMBER:rc:tag}" - // } - patterns map[string]string - // foundTsLayouts is a slice of timestamp patterns that have been found - // in the log lines. This slice gets updated if the user uses the generic - // 'ts' modifier for timestamps. This slice is checked first for matches, - // so that previously-matched layouts get priority over all other timestamp - // layouts. - foundTsLayouts []string - - timeFunc func() time.Time - g *grok.Grok - tsModder *tsModder -} - -// Compile is a bound method to Parser which will process the options for our parser -func (p *Parser) Compile() error { - p.typeMap = make(map[string]map[string]string) - p.tsMap = make(map[string]map[string]string) - p.patterns = make(map[string]string) - p.tsModder = &tsModder{} - var err error - p.g, err = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true}) - if err != nil { - return err - } - - // Give Patterns fake names so that they can be treated as named - // "custom patterns" - p.namedPatterns = make([]string, 0, len(p.Patterns)) - for i, pattern := range p.Patterns { - pattern = strings.TrimSpace(pattern) - if pattern == "" { - continue - } - name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) - p.CustomPatterns += "\n" + name + " " + pattern + "\n" - p.namedPatterns = append(p.namedPatterns, "%{"+name+"}") - } - - if len(p.namedPatterns) == 0 { - return fmt.Errorf("pattern required") - } - - // Combine user-supplied CustomPatterns with DEFAULT_PATTERNS and parse - // them together as the same type of pattern. - p.CustomPatterns = DEFAULT_PATTERNS + p.CustomPatterns - if len(p.CustomPatterns) != 0 { - scanner := bufio.NewScanner(strings.NewReader(p.CustomPatterns)) - p.addCustomPatterns(scanner) - } - - // Parse any custom pattern files supplied. - for _, filename := range p.CustomPatternFiles { - file, fileErr := os.Open(filename) - if fileErr != nil { - return fileErr - } - - scanner := bufio.NewScanner(bufio.NewReader(file)) - p.addCustomPatterns(scanner) - } - - if p.Measurement == "" { - p.Measurement = "logparser_grok" - } - - p.loc, err = time.LoadLocation(p.Timezone) - if err != nil { - log.Printf("W! improper timezone supplied (%s), setting loc to UTC", p.Timezone) - p.loc, _ = time.LoadLocation("UTC") - } - - if p.timeFunc == nil { - p.timeFunc = time.Now - } - - return p.compileCustomPatterns() -} - -// ParseLine is the primary function to process individual lines, returning the metrics -func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { - var err error - // values are the parsed fields from the log line - var values map[string]string - // the matching pattern string - var patternName string - for _, pattern := range p.namedPatterns { - if values, err = p.g.Parse(pattern, line); err != nil { - return nil, err - } - if len(values) != 0 { - patternName = pattern - break - } - } - - if len(values) == 0 { - log.Printf("D! Grok no match found for: %q", line) - return nil, nil - } - - fields := make(map[string]interface{}) - tags := make(map[string]string) - timestamp := time.Now() - for k, v := range values { - if k == "" || v == "" { - continue - } - - // t is the modifier of the field - var t string - // check if pattern has some modifiers - if types, ok := p.typeMap[patternName]; ok { - t = types[k] - } - // if we didn't find a modifier, check if we have a timestamp layout - if t == "" { - if ts, ok := p.tsMap[patternName]; ok { - // check if the modifier is a timestamp layout - if layout, ok := ts[k]; ok { - t = layout - } - } - } - // if we didn't find a type OR timestamp modifier, assume string - if t == "" { - t = STRING - } - - switch t { - case INT: - iv, err := strconv.ParseInt(v, 10, 64) - if err != nil { - log.Printf("E! Error parsing %s to int: %s", v, err) - } else { - fields[k] = iv - } - case FLOAT: - fv, err := strconv.ParseFloat(v, 64) - if err != nil { - log.Printf("E! Error parsing %s to float: %s", v, err) - } else { - fields[k] = fv - } - case DURATION: - d, err := time.ParseDuration(v) - if err != nil { - log.Printf("E! Error parsing %s to duration: %s", v, err) - } else { - fields[k] = int64(d) - } - case TAG: - tags[k] = v - case STRING: - fields[k] = strings.Trim(v, `"`) - case EPOCH: - parts := strings.SplitN(v, ".", 2) - if len(parts) == 0 { - log.Printf("E! Error parsing %s to timestamp: %s", v, err) - break - } - - sec, err := strconv.ParseInt(parts[0], 10, 64) - if err != nil { - log.Printf("E! Error parsing %s to timestamp: %s", v, err) - break - } - ts := time.Unix(sec, 0) - - if len(parts) == 2 { - padded := fmt.Sprintf("%-9s", parts[1]) - nsString := strings.Replace(padded[:9], " ", "0", -1) - nanosec, err := strconv.ParseInt(nsString, 10, 64) - if err != nil { - log.Printf("E! Error parsing %s to timestamp: %s", v, err) - break - } - ts = ts.Add(time.Duration(nanosec) * time.Nanosecond) - } - timestamp = ts - case EPOCH_NANO: - iv, err := strconv.ParseInt(v, 10, 64) - if err != nil { - log.Printf("E! Error parsing %s to int: %s", v, err) - } else { - timestamp = time.Unix(0, iv) - } - case SYSLOG_TIMESTAMP: - ts, err := time.ParseInLocation("Jan 02 15:04:05", v, p.loc) - if err == nil { - if ts.Year() == 0 { - ts = ts.AddDate(timestamp.Year(), 0, 0) - } - timestamp = ts - } else { - log.Printf("E! Error parsing %s to time layout [%s]: %s", v, t, err) - } - case GENERIC_TIMESTAMP: - var foundTs bool - // first try timestamp layouts that we've already found - for _, layout := range p.foundTsLayouts { - ts, err := time.ParseInLocation(layout, v, p.loc) - if err == nil { - timestamp = ts - foundTs = true - break - } - } - // if we haven't found a timestamp layout yet, try all timestamp - // layouts. - if !foundTs { - for _, layout := range timeLayouts { - ts, err := time.ParseInLocation(layout, v, p.loc) - if err == nil { - timestamp = ts - foundTs = true - p.foundTsLayouts = append(p.foundTsLayouts, layout) - break - } - } - } - // if we still haven't found a timestamp layout, log it and we will - // just use time.Now() - if !foundTs { - log.Printf("E! Error parsing timestamp [%s], could not find any "+ - "suitable time layouts.", v) - } - case DROP: - // goodbye! - default: - // Replace commas with dot character - v = strings.Replace(v, ",", ".", -1) - - ts, err := time.ParseInLocation(t, v, p.loc) - if err == nil { - timestamp = ts - } else { - log.Printf("E! Error parsing %s to time layout [%s]: %s", v, t, err) - } - } - } - - if len(fields) == 0 { - return nil, fmt.Errorf("logparser_grok: must have one or more fields") - } - - return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp)) -} - -func (p *Parser) addCustomPatterns(scanner *bufio.Scanner) { - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - if len(line) > 0 && line[0] != '#' { - names := strings.SplitN(line, " ", 2) - p.patterns[names[0]] = names[1] - } - } -} - -func (p *Parser) compileCustomPatterns() error { - var err error - // check if the pattern contains a subpattern that is already defined - // replace it with the subpattern for modifier inheritance. - for i := 0; i < 2; i++ { - for name, pattern := range p.patterns { - subNames := patternOnlyRe.FindAllStringSubmatch(pattern, -1) - for _, subName := range subNames { - if subPattern, ok := p.patterns[subName[1]]; ok { - pattern = strings.Replace(pattern, subName[0], subPattern, 1) - } - } - p.patterns[name] = pattern - } - } - - // check if pattern contains modifiers. Parse them out if it does. - for name, pattern := range p.patterns { - if modifierRe.MatchString(pattern) { - // this pattern has modifiers, so parse out the modifiers - pattern, err = p.parseTypedCaptures(name, pattern) - if err != nil { - return err - } - p.patterns[name] = pattern - } - } - - return p.g.AddPatternsFromMap(p.patterns) -} - -// parseTypedCaptures parses the capture modifiers, and then deletes the -// modifier from the line so that it is a valid "grok" pattern again. -// ie, -// %{NUMBER:bytes:int} => %{NUMBER:bytes} (stores %{NUMBER}->bytes->int) -// %{IPORHOST:clientip:tag} => %{IPORHOST:clientip} (stores %{IPORHOST}->clientip->tag) -func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) { - matches := modifierRe.FindAllStringSubmatch(pattern, -1) - - // grab the name of the capture pattern - patternName := "%{" + name + "}" - // create type map for this pattern - p.typeMap[patternName] = make(map[string]string) - p.tsMap[patternName] = make(map[string]string) - - // boolean to verify that each pattern only has a single ts- data type. - hasTimestamp := false - for _, match := range matches { - // regex capture 1 is the name of the capture - // regex capture 2 is the modifier of the capture - if strings.HasPrefix(match[2], "ts") { - if hasTimestamp { - return pattern, fmt.Errorf("logparser pattern compile error: "+ - "Each pattern is allowed only one named "+ - "timestamp data type. pattern: %s", pattern) - } - if layout, ok := timeLayouts[match[2]]; ok { - // built-in time format - p.tsMap[patternName][match[1]] = layout - } else { - // custom time format - p.tsMap[patternName][match[1]] = strings.TrimSuffix(strings.TrimPrefix(match[2], `ts-"`), `"`) - } - hasTimestamp = true - } else { - p.typeMap[patternName][match[1]] = match[2] - } - - // the modifier is not a valid part of a "grok" pattern, so remove it - // from the pattern. - pattern = strings.Replace(pattern, ":"+match[2]+"}", "}", 1) - } - - return pattern, nil -} - -// tsModder is a struct for incrementing identical timestamps of log lines -// so that we don't push identical metrics that will get overwritten. -type tsModder struct { - dupe time.Time - last time.Time - incr time.Duration - incrn time.Duration - rollover time.Duration -} - -// tsMod increments the given timestamp one unit more from the previous -// duplicate timestamp. -// the increment unit is determined as the next smallest time unit below the -// most significant time unit of ts. -// ie, if the input is at ms precision, it will increment it 1µs. -func (t *tsModder) tsMod(ts time.Time) time.Time { - defer func() { t.last = ts }() - // don't mod the time if we don't need to - if t.last.IsZero() || ts.IsZero() { - t.incrn = 0 - t.rollover = 0 - return ts - } - if !ts.Equal(t.last) && !ts.Equal(t.dupe) { - t.incr = 0 - t.incrn = 0 - t.rollover = 0 - return ts - } - - if ts.Equal(t.last) { - t.dupe = ts - } - - if ts.Equal(t.dupe) && t.incr == time.Duration(0) { - tsNano := ts.UnixNano() - - d := int64(10) - counter := 1 - for { - a := tsNano % d - if a > 0 { - break - } - d = d * 10 - counter++ - } - - switch { - case counter <= 6: - t.incr = time.Nanosecond - case counter <= 9: - t.incr = time.Microsecond - case counter > 9: - t.incr = time.Millisecond - } - } - - t.incrn++ - if t.incrn == 999 && t.incr > time.Nanosecond { - t.rollover = t.incr * t.incrn - t.incrn = 1 - t.incr = t.incr / 1000 - if t.incr < time.Nanosecond { - t.incr = time.Nanosecond - } - } - return ts.Add(t.incr*t.incrn + t.rollover) -} diff --git a/plugins/inputs/logparser/grok/grok_test.go b/plugins/inputs/logparser/grok/grok_test.go deleted file mode 100644 index 6a143bb7d..000000000 --- a/plugins/inputs/logparser/grok/grok_test.go +++ /dev/null @@ -1,1002 +0,0 @@ -package grok - -import ( - "testing" - "time" - - "github.com/influxdata/telegraf" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -var benchM telegraf.Metric - -func Benchmark_ParseLine_CommonLogFormat(b *testing.B) { - p := &Parser{ - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - _ = p.Compile() - - var m telegraf.Metric - for n := 0; n < b.N; n++ { - m, _ = p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - } - benchM = m -} - -func Benchmark_ParseLine_CombinedLogFormat(b *testing.B) { - p := &Parser{ - Patterns: []string{"%{COMBINED_LOG_FORMAT}"}, - } - _ = p.Compile() - - var m telegraf.Metric - for n := 0; n < b.N; n++ { - m, _ = p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"`) - } - benchM = m -} - -func Benchmark_ParseLine_CustomPattern(b *testing.B) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatterns: ` - DURATION %{NUMBER}[nuµm]?s - RESPONSE_CODE %{NUMBER:response_code:tag} - RESPONSE_TIME %{DURATION:response_time:duration} - TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} - `, - } - _ = p.Compile() - - var m telegraf.Metric - for n := 0; n < b.N; n++ { - m, _ = p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - } - benchM = m -} - -// Test a very simple parse pattern. -func TestSimpleParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TESTLOG}"}, - CustomPatterns: ` - TESTLOG %{NUMBER:num:int} %{WORD:client} - `, - } - assert.NoError(t, p.Compile()) - - m, err := p.ParseLine(`142 bot`) - assert.NoError(t, err) - require.NotNil(t, m) - - assert.Equal(t, - map[string]interface{}{ - "num": int64(142), - "client": "bot", - }, - m.Fields()) -} - -// Verify that patterns with a regex lookahead fail at compile time. -func TestParsePatternsWithLookahead(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{MYLOG}"}, - CustomPatterns: ` - NOBOT ((?!bot|crawl).)* - MYLOG %{NUMBER:num:int} %{NOBOT:client} - `, - } - assert.NoError(t, p.Compile()) - - _, err := p.ParseLine(`1466004605359052000 bot`) - assert.Error(t, err) -} - -func TestMeasurementName(t *testing.T) { - p := &Parser{ - Measurement: "my_web_log", - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - assert.NoError(t, p.Compile()) - - // Parse an influxdb POST request - m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank", - "client_ip": "127.0.0.1", - "http_version": float64(1.0), - "ident": "user-identifier", - "request": "/apache_pb.gif", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) - assert.Equal(t, "my_web_log", m.Name()) -} - -func TestCLF_IPv6(t *testing.T) { - p := &Parser{ - Measurement: "my_web_log", - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - assert.NoError(t, p.Compile()) - - m, err := p.ParseLine(`2001:0db8:85a3:0000:0000:8a2e:0370:7334 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank", - "client_ip": "2001:0db8:85a3:0000:0000:8a2e:0370:7334", - "http_version": float64(1.0), - "ident": "user-identifier", - "request": "/apache_pb.gif", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) - assert.Equal(t, "my_web_log", m.Name()) - - m, err = p.ParseLine(`::1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank", - "client_ip": "::1", - "http_version": float64(1.0), - "ident": "user-identifier", - "request": "/apache_pb.gif", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) - assert.Equal(t, "my_web_log", m.Name()) -} - -func TestCustomInfluxdbHttpd(t *testing.T) { - p := &Parser{ - Patterns: []string{`\[httpd\] %{COMBINED_LOG_FORMAT} %{UUID:uuid:drop} %{NUMBER:response_time_us:int}`}, - } - assert.NoError(t, p.Compile()) - - // Parse an influxdb POST request - m, err := p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:11:33:29 +0100] "POST /write?consistency=any&db=telegraf&precision=ns&rp= HTTP/1.1" 204 0 "-" "InfluxDBClient" 6f61bc44-321b-11e6-8050-000000000000 2513`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(0), - "auth": "-", - "client_ip": "::1", - "http_version": float64(1.1), - "ident": "-", - "referrer": "-", - "request": "/write?consistency=any&db=telegraf&precision=ns&rp=", - "response_time_us": int64(2513), - "agent": "InfluxDBClient", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "POST", "resp_code": "204"}, m.Tags()) - - // Parse an influxdb GET request - m, err = p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:12:10:02 +0100] "GET /query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h HTTP/1.1" 200 578 "http://localhost:8083/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36" 8a3806f1-3220-11e6-8006-000000000000 988`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(578), - "auth": "-", - "client_ip": "::1", - "http_version": float64(1.1), - "ident": "-", - "referrer": "http://localhost:8083/", - "request": "/query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h", - "response_time_us": int64(988), - "agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) -} - -// common log format -// 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 -func TestBuiltinCommonLogFormat(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - assert.NoError(t, p.Compile()) - - // Parse an influxdb POST request - m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank", - "client_ip": "127.0.0.1", - "http_version": float64(1.0), - "ident": "user-identifier", - "request": "/apache_pb.gif", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) -} - -// common log format -// 127.0.0.1 user1234 frank1234 [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 -func TestBuiltinCommonLogFormatWithNumbers(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - assert.NoError(t, p.Compile()) - - // Parse an influxdb POST request - m, err := p.ParseLine(`127.0.0.1 user1234 frank1234 [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank1234", - "client_ip": "127.0.0.1", - "http_version": float64(1.0), - "ident": "user1234", - "request": "/apache_pb.gif", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) -} - -// combined log format -// 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla" -func TestBuiltinCombinedLogFormat(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{COMBINED_LOG_FORMAT}"}, - } - assert.NoError(t, p.Compile()) - - // Parse an influxdb POST request - m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank", - "client_ip": "127.0.0.1", - "http_version": float64(1.0), - "ident": "user-identifier", - "request": "/apache_pb.gif", - "referrer": "-", - "agent": "Mozilla", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) -} - -func TestCompileStringAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}"}, - CustomPatterns: ` - DURATION %{NUMBER}[nuµm]?s - RESPONSE_CODE %{NUMBER:response_code:tag} - RESPONSE_TIME %{DURATION:response_time:duration} - TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} - `, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) -} - -func TestCompileErrorsOnInvalidPattern(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatterns: ` - DURATION %{NUMBER}[nuµm]?s - RESPONSE_CODE %{NUMBER:response_code:tag} - RESPONSE_TIME %{DURATION:response_time:duration} - TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} - `, - } - assert.Error(t, p.Compile()) - - metricA, _ := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) - require.Nil(t, metricA) -} - -func TestParsePatternsWithoutCustom(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{POSINT:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}"}, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1466004605359052000 response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) - assert.Equal(t, time.Unix(0, 1466004605359052000), metricA.Time()) -} - -func TestParseEpochNano(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{MYAPP}"}, - CustomPatterns: ` - MYAPP %{POSINT:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} - `, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1466004605359052000 response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) - assert.Equal(t, time.Unix(0, 1466004605359052000), metricA.Time()) -} - -func TestParseEpoch(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{MYAPP}"}, - CustomPatterns: ` - MYAPP %{POSINT:ts:ts-epoch} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} - `, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1466004605 response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) - assert.Equal(t, time.Unix(1466004605, 0), metricA.Time()) -} - -func TestParseEpochDecimal(t *testing.T) { - var tests = []struct { - name string - line string - noMatch bool - err error - tags map[string]string - fields map[string]interface{} - time time.Time - }{ - { - name: "ns precision", - line: "1466004605.359052000 value=42", - tags: map[string]string{}, - fields: map[string]interface{}{ - "value": int64(42), - }, - time: time.Unix(0, 1466004605359052000), - }, - { - name: "ms precision", - line: "1466004605.359 value=42", - tags: map[string]string{}, - fields: map[string]interface{}{ - "value": int64(42), - }, - time: time.Unix(0, 1466004605359000000), - }, - { - name: "second precision", - line: "1466004605 value=42", - tags: map[string]string{}, - fields: map[string]interface{}{ - "value": int64(42), - }, - time: time.Unix(0, 1466004605000000000), - }, - { - name: "sub ns precision", - line: "1466004605.123456789123 value=42", - tags: map[string]string{}, - fields: map[string]interface{}{ - "value": int64(42), - }, - time: time.Unix(0, 1466004605123456789), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - parser := &Parser{ - Patterns: []string{"%{NUMBER:ts:ts-epoch} value=%{NUMBER:value:int}"}, - } - assert.NoError(t, parser.Compile()) - m, err := parser.ParseLine(tt.line) - - if tt.noMatch { - require.Nil(t, m) - require.Nil(t, err) - return - } - - require.Equal(t, tt.err, err) - - require.NotNil(t, m) - require.Equal(t, tt.tags, m.Tags()) - require.Equal(t, tt.fields, m.Fields()) - require.Equal(t, tt.time, m.Time()) - }) - } -} - -func TestParseEpochErrors(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{MYAPP}"}, - CustomPatterns: ` - MYAPP %{WORD:ts:ts-epoch} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} - `, - } - assert.NoError(t, p.Compile()) - - _, err := p.ParseLine(`foobar response_time=20821 mymetric=10890.645`) - assert.NoError(t, err) - - p = &Parser{ - Patterns: []string{"%{MYAPP}"}, - CustomPatterns: ` - MYAPP %{WORD:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} - `, - } - assert.NoError(t, p.Compile()) - - _, err = p.ParseLine(`foobar response_time=20821 mymetric=10890.645`) - assert.NoError(t, err) -} - -func TestParseGenericTimestamp(t *testing.T) { - p := &Parser{ - Patterns: []string{`\[%{HTTPDATE:ts:ts}\] response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}`}, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[09/Jun/2016:03:37:03 +0000] response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) - assert.Equal(t, time.Unix(1465443423, 0).UTC(), metricA.Time().UTC()) - - metricB, err := p.ParseLine(`[09/Jun/2016:03:37:04 +0000] response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, time.Unix(1465443424, 0).UTC(), metricB.Time().UTC()) -} - -func TestParseGenericTimestampNotFound(t *testing.T) { - p := &Parser{ - Patterns: []string{`\[%{NOTSPACE:ts:ts}\] response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}`}, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[foobar] response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) -} - -func TestCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, - time.Date(2016, time.June, 4, 12, 41, 45, 0, time.FixedZone("foo", 60*60)).Nanosecond(), - metricA.Time().Nanosecond()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, - time.Date(2016, time.June, 4, 12, 41, 45, 0, time.FixedZone("foo", 60*60)).Nanosecond(), - metricB.Time().Nanosecond()) -} - -func TestCompileNoModifiersAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_C}"}, - CustomPatterns: ` - DURATION %{NUMBER}[nuµm]?s - TEST_LOG_C %{NUMBER:myfloat} %{NUMBER} %{IPORHOST:clientip} %{DURATION:rt} - `, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": "1.25", - "rt": "5.432µs", - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) -} - -func TestCompileNoNamesAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_C}"}, - CustomPatterns: ` - DURATION %{NUMBER}[nuµm]?s - TEST_LOG_C %{NUMBER} %{NUMBER} %{IPORHOST} %{DURATION} - `, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) - require.Nil(t, metricA) - assert.NoError(t, err) -} - -func TestParseNoMatch(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`) - assert.NoError(t, err) - assert.Nil(t, metricA) -} - -func TestCompileErrors(t *testing.T) { - // Compile fails because there are multiple timestamps: - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts1:ts-httpd} %{HTTPDATE:ts2:ts-httpd} %{NUMBER:mynum:int} - `, - } - assert.Error(t, p.Compile()) - - // Compile fails because file doesn't exist: - p = &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"/tmp/foo/bar/baz"}, - } - assert.Error(t, p.Compile()) -} - -func TestParseErrors(t *testing.T) { - // Parse fails because the pattern doesn't exist - p := &Parser{ - Patterns: []string{"%{TEST_LOG_B}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int} %{} - `, - } - assert.Error(t, p.Compile()) - _, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`) - assert.Error(t, err) - - // Parse fails because myword is not an int - p = &Parser{ - Patterns: []string{"%{TEST_LOG_A}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int} - `, - } - assert.NoError(t, p.Compile()) - _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) - assert.Error(t, err) - - // Parse fails because myword is not a float - p = &Parser{ - Patterns: []string{"%{TEST_LOG_A}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:float} - `, - } - assert.NoError(t, p.Compile()) - _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) - assert.Error(t, err) - - // Parse fails because myword is not a duration - p = &Parser{ - Patterns: []string{"%{TEST_LOG_A}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:duration} - `, - } - assert.NoError(t, p.Compile()) - _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) - assert.Error(t, err) - - // Parse fails because the time layout is wrong. - p = &Parser{ - Patterns: []string{"%{TEST_LOG_A}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts:ts-unix} %{WORD:myword:duration} - `, - } - assert.NoError(t, p.Compile()) - _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) - assert.Error(t, err) -} - -func TestTsModder(t *testing.T) { - tsm := &tsModder{} - - reftime := time.Date(2006, time.December, 1, 1, 1, 1, int(time.Millisecond), time.UTC) - modt := tsm.tsMod(reftime) - assert.Equal(t, reftime, modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Microsecond*1), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Microsecond*2), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Microsecond*3), modt) - - reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond), time.UTC) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime, modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*1), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*2), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*3), modt) - - reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond)*999, time.UTC) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime, modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*1), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*2), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*3), modt) - - reftime = time.Date(2006, time.December, 1, 1, 1, 1, 0, time.UTC) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime, modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Millisecond*1), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Millisecond*2), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Millisecond*3), modt) - - reftime = time.Time{} - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime, modt) -} - -func TestTsModder_Rollover(t *testing.T) { - tsm := &tsModder{} - - reftime := time.Date(2006, time.December, 1, 1, 1, 1, int(time.Millisecond), time.UTC) - modt := tsm.tsMod(reftime) - for i := 1; i < 1000; i++ { - modt = tsm.tsMod(reftime) - } - assert.Equal(t, reftime.Add(time.Microsecond*999+time.Nanosecond), modt) - - reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond), time.UTC) - modt = tsm.tsMod(reftime) - for i := 1; i < 1001; i++ { - modt = tsm.tsMod(reftime) - } - assert.Equal(t, reftime.Add(time.Nanosecond*1000), modt) -} - -func TestShortPatternRegression(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TS_UNIX:timestamp:ts-unix} %{NUMBER:value:int}"}, - CustomPatterns: ` - TS_UNIX %{DAY} %{MONTH} %{MONTHDAY} %{HOUR}:%{MINUTE}:%{SECOND} %{TZ} %{YEAR} - `, - } - require.NoError(t, p.Compile()) - - metric, err := p.ParseLine(`Wed Apr 12 13:10:34 PST 2017 42`) - require.NoError(t, err) - require.NotNil(t, metric) - - require.Equal(t, - map[string]interface{}{ - "value": int64(42), - }, - metric.Fields()) -} - -func TestTimezoneEmptyCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - Timezone: "", - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, int64(1465044105000000000), metricB.Time().UnixNano()) -} - -func TestTimezoneMalformedCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - Timezone: "Something/Weird", - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, int64(1465044105000000000), metricB.Time().UnixNano()) -} - -func TestTimezoneEuropeCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - Timezone: "Europe/Berlin", - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, int64(1465036905000000000), metricB.Time().UnixNano()) -} - -func TestTimezoneAmericasCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - Timezone: "Canada/Eastern", - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, int64(1465058505000000000), metricB.Time().UnixNano()) -} - -func TestTimezoneLocalCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - Timezone: "Local", - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, time.Date(2016, time.June, 4, 12, 41, 45, 0, time.Local).UnixNano(), metricB.Time().UnixNano()) -} - -func TestNewlineInPatterns(t *testing.T) { - p := &Parser{ - Patterns: []string{` - %{SYSLOGTIMESTAMP:timestamp} - `}, - } - require.NoError(t, p.Compile()) - m, err := p.ParseLine("Apr 10 05:11:57") - require.NoError(t, err) - require.NotNil(t, m) -} - -func TestSyslogTimestampParser(t *testing.T) { - p := &Parser{ - Patterns: []string{`%{SYSLOGTIMESTAMP:timestamp:ts-syslog} value=%{NUMBER:value:int}`}, - timeFunc: func() time.Time { return time.Date(2018, time.April, 1, 0, 0, 0, 0, nil) }, - } - require.NoError(t, p.Compile()) - m, err := p.ParseLine("Sep 25 09:01:55 value=42") - require.NoError(t, err) - require.NotNil(t, m) - require.Equal(t, 2018, m.Time().Year()) -} - -func TestReplaceTimestampComma(t *testing.T) { - - p := &Parser{ - Patterns: []string{`%{TIMESTAMP_ISO8601:timestamp:ts-"2006-01-02 15:04:05.000"} successfulMatches=%{NUMBER:value:int}`}, - } - - require.NoError(t, p.Compile()) - m, err := p.ParseLine("2018-02-21 13:10:34,555 successfulMatches=1") - require.NoError(t, err) - require.NotNil(t, m) - - require.Equal(t, 2018, m.Time().Year()) - require.Equal(t, 13, m.Time().Hour()) - require.Equal(t, 34, m.Time().Second()) - //Convert Nanosecond to milisecond for compare - require.Equal(t, 555, m.Time().Nanosecond()/1000000) -} diff --git a/plugins/inputs/logparser/grok/influx_patterns.go b/plugins/inputs/logparser/grok/influx_patterns.go deleted file mode 100644 index 6dc990622..000000000 --- a/plugins/inputs/logparser/grok/influx_patterns.go +++ /dev/null @@ -1,78 +0,0 @@ -package grok - -// DEFAULT_PATTERNS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns -const DEFAULT_PATTERNS = ` -# Captures are a slightly modified version of logstash "grok" patterns, with -# the format %{[:][:]} -# By default all named captures are converted into string fields. -# Modifiers can be used to convert captures to other types or tags. -# Timestamp modifiers can be used to convert captures to the timestamp of the -# parsed metric. - -# View logstash grok pattern docs here: -# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html -# All default logstash patterns are supported, these can be viewed here: -# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns - -# Available modifiers: -# string (default if nothing is specified) -# int -# float -# duration (ie, 5.23ms gets converted to int nanoseconds) -# tag (converts the field into a tag) -# drop (drops the field completely) -# Timestamp modifiers: -# ts-ansic ("Mon Jan _2 15:04:05 2006") -# ts-unix ("Mon Jan _2 15:04:05 MST 2006") -# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") -# ts-rfc822 ("02 Jan 06 15:04 MST") -# ts-rfc822z ("02 Jan 06 15:04 -0700") -# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") -# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") -# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") -# ts-rfc3339 ("2006-01-02T15:04:05Z07:00") -# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") -# ts-httpd ("02/Jan/2006:15:04:05 -0700") -# ts-epoch (seconds since unix epoch) -# ts-epochnano (nanoseconds since unix epoch) -# ts-"CUSTOM" -# CUSTOM time layouts must be within quotes and be the representation of the -# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006 -# See https://golang.org/pkg/time/#Parse for more details. - -# Example log file pattern, example log looks like this: -# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs -# Breakdown of the DURATION pattern below: -# NUMBER is a builtin logstash grok pattern matching float & int numbers. -# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets. -# s is also regex, this pattern must end in "s". -# so DURATION will match something like '5.324ms' or '6.1µs' or '10s' -DURATION %{NUMBER}[nuµm]?s -RESPONSE_CODE %{NUMBER:response_code:tag} -RESPONSE_TIME %{DURATION:response_time_ns:duration} -EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} - -# Wider-ranging username matching vs. logstash built-in %{USER} -NGUSERNAME [a-zA-Z0-9\.\@\-\+_%]+ -NGUSER %{NGUSERNAME} -# Wider-ranging client IP matching -CLIENT (?:%{IPV6}|%{IPV4}|%{HOSTNAME}|%{HOSTPORT}) - -## -## COMMON LOG PATTERNS -## - -# apache & nginx logs, this is also known as the "common log format" -# see https://en.wikipedia.org/wiki/Common_Log_Format -COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) - -# Combined log format is the same as the common log format but with the addition -# of two quoted strings at the end for "referrer" and "agent" -# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html -COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent} - -# HTTPD log formats -HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} -HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message} -HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} -` diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index f45067ea7..5748fc7d3 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -156,8 +156,8 @@ func newGrokParser(metricName string, Timezone: tZone, } - parser.Compile() - return &parser, nil + err := parser.Compile() + return &parser, err } func NewJSONParser(