From d627bdbbdbddb80b1dbd58fc978d749b9eec770b Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 7 Oct 2016 12:08:35 +0100 Subject: [PATCH] logparser: allow numbers in ident & auth parameters fixes #1810 --- CHANGELOG.md | 1 + plugins/inputs/logparser/grok/grok_test.go | 25 +++++++++++++++++++ .../inputs/logparser/grok/influx_patterns.go | 4 +-- .../logparser/grok/patterns/influx-patterns | 4 +-- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93162e67e..5af8596b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ continue sending logs to /var/log/telegraf/telegraf.log. - [#1833](https://github.com/influxdata/telegraf/issues/1833): Fix translating SNMP fields not in MIB. - [#1835](https://github.com/influxdata/telegraf/issues/1835): Fix SNMP emitting empty fields. - [#1854](https://github.com/influxdata/telegraf/pull/1853): SQL Server waitstats truncation bug. +- [#1810](https://github.com/influxdata/telegraf/issues/1810): Fix logparser common log format: numbers in ident. ## v1.0.1 [2016-09-26] diff --git a/plugins/inputs/logparser/grok/grok_test.go b/plugins/inputs/logparser/grok/grok_test.go index bc8d980f2..105cc048c 100644 --- a/plugins/inputs/logparser/grok/grok_test.go +++ b/plugins/inputs/logparser/grok/grok_test.go @@ -152,6 +152,31 @@ func TestBuiltinCommonLogFormat(t *testing.T) { assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) } +// common log format +// 127.0.0.1 user1234 frank1234 [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 +func TestBuiltinCommonLogFormatWithNumbers(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`127.0.0.1 user1234 frank1234 [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank1234", + "client_ip": "127.0.0.1", + "http_version": float64(1.0), + "ident": "user1234", + "request": "/apache_pb.gif", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) +} + // combined log format // 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla" func TestBuiltinCombinedLogFormat(t *testing.T) { diff --git a/plugins/inputs/logparser/grok/influx_patterns.go b/plugins/inputs/logparser/grok/influx_patterns.go index ff9d60ebf..052791140 100644 --- a/plugins/inputs/logparser/grok/influx_patterns.go +++ b/plugins/inputs/logparser/grok/influx_patterns.go @@ -53,7 +53,7 @@ RESPONSE_TIME %{DURATION:response_time_ns:duration} EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} # Wider-ranging username matching vs. logstash built-in %{USER} -NGUSERNAME [a-zA-Z\.\@\-\+_%]+ +NGUSERNAME [a-zA-Z0-9\.\@\-\+_%]+ NGUSER %{NGUSERNAME} # Wider-ranging client IP matching CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1) @@ -64,7 +64,7 @@ CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1) # apache & nginx logs, this is also known as the "common log format" # see https://en.wikipedia.org/wiki/Common_Log_Format -COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) +COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) # Combined log format is the same as the common log format but with the addition # of two quoted strings at the end for "referrer" and "agent" diff --git a/plugins/inputs/logparser/grok/patterns/influx-patterns b/plugins/inputs/logparser/grok/patterns/influx-patterns index 6f4d81f89..931b61bc8 100644 --- a/plugins/inputs/logparser/grok/patterns/influx-patterns +++ b/plugins/inputs/logparser/grok/patterns/influx-patterns @@ -49,7 +49,7 @@ RESPONSE_TIME %{DURATION:response_time_ns:duration} EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} # Wider-ranging username matching vs. logstash built-in %{USER} -NGUSERNAME [a-zA-Z\.\@\-\+_%]+ +NGUSERNAME [a-zA-Z0-9\.\@\-\+_%]+ NGUSER %{NGUSERNAME} # Wider-ranging client IP matching CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1) @@ -60,7 +60,7 @@ CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1) # apache & nginx logs, this is also known as the "common log format" # see https://en.wikipedia.org/wiki/Common_Log_Format -COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NGUSER:ident} %{NGUSER:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) +COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) # Combined log format is the same as the common log format but with the addition # of two quoted strings at the end for "referrer" and "agent"