Add formdata parser (#5749)

This commit is contained in:
Boris Yonchev 2019-06-17 23:34:54 +03:00 committed by Daniel Nelson
parent 1da81799cb
commit fd9abd2166
8 changed files with 551 additions and 26 deletions

View File

@ -1726,6 +1726,18 @@ func getParserConfig(name string, tbl *ast.Table) (*parsers.Config, error) {
}
}
if node, ok := tbl.Fields["form_data_tag_keys"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
c.FormDataTagKeys = append(c.FormDataTagKeys, str.Value)
}
}
}
}
}
c.MetricName = name
delete(tbl.Fields, "data_format")
@ -1767,6 +1779,7 @@ func getParserConfig(name string, tbl *ast.Table) (*parsers.Config, error) {
delete(tbl.Fields, "csv_timestamp_column")
delete(tbl.Fields, "csv_timestamp_format")
delete(tbl.Fields, "csv_trim_space")
delete(tbl.Fields, "form_data_tag_keys")
return c, nil
}

View File

@ -49,11 +49,17 @@ This is a sample configuration for the plugin.
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
## Part of the request to consume.
## Available options are "body" and "query".
## Note that the data source and data format are independent properties.
## To consume standard query params and POST forms - use "formdata" as a data_format.
# data_source = "body"
```
### Metrics:
Metrics are created from the request body and are dependant on the value of `data_format`.
Metrics are collected from the part of the request specified by the `data_source` param and are parsed depending on the value of `data_format`.
### Troubleshooting:
@ -67,5 +73,10 @@ curl -i -XPOST 'http://localhost:8080/telegraf' --data-binary 'cpu_load_short,ho
curl -i -XPOST 'http://localhost:8080/telegraf' --data-binary '{"value1": 42, "value2": 42}'
```
**Send query params**
```
curl -i -XGET 'http://localhost:8080/telegraf?host=server01&value=0.42'
```
[data_format]: /docs/DATA_FORMATS_INPUT.md
[influxdb_listener]: /plugins/inputs/influxdb_listener/README.md

View File

@ -8,6 +8,8 @@ import (
"log"
"net"
"net/http"
"net/url"
"strings"
"sync"
"time"
@ -23,12 +25,21 @@ import (
// 500 MB
const defaultMaxBodySize = 500 * 1024 * 1024
const (
body = "body"
query = "query"
)
// TimeFunc provides a timestamp for the metrics
type TimeFunc func() time.Time
// HTTPListenerV2 is an input plugin that collects external metrics sent via HTTP
type HTTPListenerV2 struct {
ServiceAddress string
Path string
Methods []string
DataSource string
ReadTimeout internal.Duration
WriteTimeout internal.Duration
MaxBodySize internal.Size
@ -86,6 +97,12 @@ const sampleConfig = `
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
## Part of the request to consume.
## Available options are "body" and "query".
## Note that the data source and data format are independent properties.
## To consume standard query params and POST forms - use "formdata" as a data_format.
# data_source = "body"
`
func (h *HTTPListenerV2) SampleConfig() string {
@ -164,11 +181,13 @@ func (h *HTTPListenerV2) Stop() {
}
func (h *HTTPListenerV2) ServeHTTP(res http.ResponseWriter, req *http.Request) {
if req.URL.Path == h.Path {
h.AuthenticateIfSet(h.serveWrite, res, req)
} else {
h.AuthenticateIfSet(http.NotFound, res, req)
handler := h.serveWrite
if req.URL.Path != h.Path {
handler = http.NotFound
}
h.authenticateIfSet(handler, res, req)
}
func (h *HTTPListenerV2) serveWrite(res http.ResponseWriter, req *http.Request) {
@ -191,23 +210,17 @@ func (h *HTTPListenerV2) serveWrite(res http.ResponseWriter, req *http.Request)
return
}
// Handle gzip request bodies
body := req.Body
if req.Header.Get("Content-Encoding") == "gzip" {
var err error
body, err = gzip.NewReader(req.Body)
if err != nil {
log.Println("D! " + err.Error())
badRequest(res)
return
}
defer body.Close()
var bytes []byte
var ok bool
switch strings.ToLower(h.DataSource) {
case query:
bytes, ok = h.collectQuery(res, req)
default:
bytes, ok = h.collectBody(res, req)
}
body = http.MaxBytesReader(res, body, h.MaxBodySize.Size)
bytes, err := ioutil.ReadAll(body)
if err != nil {
tooLarge(res)
if !ok {
return
}
@ -217,12 +230,52 @@ func (h *HTTPListenerV2) serveWrite(res http.ResponseWriter, req *http.Request)
badRequest(res)
return
}
for _, m := range metrics {
h.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
}
res.WriteHeader(http.StatusNoContent)
}
func (h *HTTPListenerV2) collectBody(res http.ResponseWriter, req *http.Request) ([]byte, bool) {
body := req.Body
// Handle gzip request bodies
if req.Header.Get("Content-Encoding") == "gzip" {
var err error
body, err = gzip.NewReader(req.Body)
if err != nil {
log.Println("D! " + err.Error())
badRequest(res)
return nil, false
}
defer body.Close()
}
body = http.MaxBytesReader(res, body, h.MaxBodySize.Size)
bytes, err := ioutil.ReadAll(body)
if err != nil {
tooLarge(res)
return nil, false
}
return bytes, true
}
func (h *HTTPListenerV2) collectQuery(res http.ResponseWriter, req *http.Request) ([]byte, bool) {
rawQuery := req.URL.RawQuery
query, err := url.QueryUnescape(rawQuery)
if err != nil {
log.Println("D! " + err.Error())
badRequest(res)
return nil, false
}
return []byte(query), true
}
func tooLarge(res http.ResponseWriter) {
res.Header().Set("Content-Type", "application/json")
res.WriteHeader(http.StatusRequestEntityTooLarge)
@ -246,7 +299,7 @@ func badRequest(res http.ResponseWriter) {
res.Write([]byte(`{"error":"http: bad request"}`))
}
func (h *HTTPListenerV2) AuthenticateIfSet(handler http.HandlerFunc, res http.ResponseWriter, req *http.Request) {
func (h *HTTPListenerV2) authenticateIfSet(handler http.HandlerFunc, res http.ResponseWriter, req *http.Request) {
if h.BasicUsername != "" && h.BasicPassword != "" {
reqUsername, reqPassword, ok := req.BasicAuth()
if !ok ||
@ -269,6 +322,7 @@ func init() {
TimeFunc: time.Now,
Path: "/telegraf",
Methods: []string{"POST", "PUT"},
DataSource: body,
}
})
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,76 @@
# FormData
The FormData data format parses a [query string/x-www-form-urlencoded][query_string] data into metric fields.
Common use case is to pair it with http listener input plugin to parse request body or query params.
### Configuration
```toml
[[inputs.http_listener_v2]]
## Address and port to host HTTP listener on
service_address = ":8080"
## Part of the request to consume.
## Available options are "body" and "query".
## To consume standard query params or application/x-www-form-urlencoded body,
## set the data_format option to "formdata".
data_source = "body"
## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "formdata"
## Array of key names which should be collected as tags.
## By default, keys with string value are ignored if not marked as tags.
form_data_tag_keys = ["tag1"]
```
### Examples
#### Basic parsing
Config:
```toml
[[inputs.http_listener_v2]]
service_address = ":8080"
data_source = "query"
data_format = "formdata"
name_override = "mymetric"
```
Request:
```bash
curl -i -XGET 'http://localhost:8080/telegraf?field=0.42'
```
Output:
```
mymetric field=0.42
```
#### Tags and key filter
Config:
```toml
[[inputs.http_listener_v2]]
service_address = ":8080"
data_source = "query"
data_format = "formdata"
name_override = "mymetric"
fielddrop = ["tag2", "field2"]
form_data_tag_keys = ["tag1"]
```
Request:
```bash
curl -i -XGET 'http://localhost:8080/telegraf?tag1=foo&tag2=bar&field1=42&field2=69'
```
Output:
```
mymetric,tag1=foo field1=42
```
[query_string]: https://en.wikipedia.org/wiki/Query_string

View File

@ -0,0 +1,130 @@
package formdata
import (
"bytes"
"fmt"
"net/url"
"strconv"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
)
var (
// ErrNoMetric is returned when no metric is found in input line
ErrNoMetric = fmt.Errorf("no metric in line")
)
// Parser decodes "application/x-www-form-urlencoded" data into metrics
type Parser struct {
MetricName string
DefaultTags map[string]string
TagKeys []string
AllowedKeys []string
}
// Parse converts a slice of bytes in "application/x-www-form-urlencoded" format into metrics
func (p Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
buf = bytes.TrimSpace(buf)
if len(buf) == 0 {
return make([]telegraf.Metric, 0), nil
}
values, err := url.ParseQuery(string(buf))
if err != nil {
return nil, err
}
if len(p.AllowedKeys) > 0 {
values = p.filterAllowedKeys(values)
}
tags := p.extractTags(values)
fields := p.parseFields(values)
for key, value := range p.DefaultTags {
tags[key] = value
}
metric, err := metric.New(p.MetricName, tags, fields, time.Now().UTC())
if err != nil {
return nil, err
}
return []telegraf.Metric{metric}, nil
}
// ParseLine delegates a single line of text to the Parse function
func (p Parser) ParseLine(line string) (telegraf.Metric, error) {
metrics, err := p.Parse([]byte(line))
if err != nil {
return nil, err
}
if len(metrics) < 1 {
return nil, ErrNoMetric
}
return metrics[0], nil
}
// SetDefaultTags sets the default tags for every metric
func (p *Parser) SetDefaultTags(tags map[string]string) {
p.DefaultTags = tags
}
func (p Parser) filterAllowedKeys(original url.Values) url.Values {
result := make(url.Values)
for _, key := range p.AllowedKeys {
value, exists := original[key]
if !exists {
continue
}
result[key] = value
}
return result
}
func (p Parser) extractTags(values url.Values) map[string]string {
tags := make(map[string]string)
for _, key := range p.TagKeys {
value, exists := values[key]
if !exists || len(key) == 0 {
continue
}
tags[key] = value[0]
delete(values, key)
}
return tags
}
func (p Parser) parseFields(values url.Values) map[string]interface{} {
fields := make(map[string]interface{})
for key, value := range values {
if len(key) == 0 || len(value) == 0 {
continue
}
field, err := strconv.ParseFloat(value[0], 64)
if err != nil {
continue
}
fields[key] = field
}
return fields
}

View File

@ -0,0 +1,172 @@
package formdata
import (
"testing"
"github.com/stretchr/testify/require"
)
const (
validFormData = "tag1=foo&tag2=bar&tag3=baz&field1=42&field2=69"
encodedFormData = "tag1=%24%24%24&field1=1e%2B3"
notEscapedProperlyFormData = "invalid=%Y5"
blankKeyFormData = "=42&field2=69"
emptyFormData = ""
)
func TestParseValidFormData(t *testing.T) {
parser := Parser{
MetricName: "formdata_test",
}
metrics, err := parser.Parse([]byte(validFormData))
require.NoError(t, err)
require.Len(t, metrics, 1)
require.Equal(t, "formdata_test", metrics[0].Name())
require.Equal(t, map[string]string{}, metrics[0].Tags())
require.Equal(t, map[string]interface{}{
"field1": float64(42),
"field2": float64(69),
}, metrics[0].Fields())
}
func TestParseLineValidFormData(t *testing.T) {
parser := Parser{
MetricName: "formdata_test",
}
metric, err := parser.ParseLine(validFormData)
require.NoError(t, err)
require.Equal(t, "formdata_test", metric.Name())
require.Equal(t, map[string]string{}, metric.Tags())
require.Equal(t, map[string]interface{}{
"field1": float64(42),
"field2": float64(69),
}, metric.Fields())
}
func TestParseValidFormDataWithTags(t *testing.T) {
parser := Parser{
MetricName: "formdata_test",
TagKeys: []string{"tag1", "tag2"},
}
metrics, err := parser.Parse([]byte(validFormData))
require.NoError(t, err)
require.Len(t, metrics, 1)
require.Equal(t, "formdata_test", metrics[0].Name())
require.Equal(t, map[string]string{
"tag1": "foo",
"tag2": "bar",
}, metrics[0].Tags())
require.Equal(t, map[string]interface{}{
"field1": float64(42),
"field2": float64(69),
}, metrics[0].Fields())
}
func TestParseValidFormDataDefaultTags(t *testing.T) {
parser := Parser{
MetricName: "formdata_test",
TagKeys: []string{"tag1", "tag2"},
DefaultTags: map[string]string{"tag4": "default"},
}
metrics, err := parser.Parse([]byte(validFormData))
require.NoError(t, err)
require.Len(t, metrics, 1)
require.Equal(t, "formdata_test", metrics[0].Name())
require.Equal(t, map[string]string{
"tag1": "foo",
"tag2": "bar",
"tag4": "default",
}, metrics[0].Tags())
require.Equal(t, map[string]interface{}{
"field1": float64(42),
"field2": float64(69),
}, metrics[0].Fields())
}
func TestParseValidFormDataDefaultTagsOverride(t *testing.T) {
parser := Parser{
MetricName: "formdata_test",
TagKeys: []string{"tag1", "tag2"},
DefaultTags: map[string]string{"tag1": "default"},
}
metrics, err := parser.Parse([]byte(validFormData))
require.NoError(t, err)
require.Len(t, metrics, 1)
require.Equal(t, "formdata_test", metrics[0].Name())
require.Equal(t, map[string]string{
"tag1": "default",
"tag2": "bar",
}, metrics[0].Tags())
require.Equal(t, map[string]interface{}{
"field1": float64(42),
"field2": float64(69),
}, metrics[0].Fields())
}
func TestParseEncodedFormData(t *testing.T) {
parser := Parser{
MetricName: "formdata_test",
TagKeys: []string{"tag1"},
}
metrics, err := parser.Parse([]byte(encodedFormData))
require.NoError(t, err)
require.Len(t, metrics, 1)
require.Equal(t, "formdata_test", metrics[0].Name())
require.Equal(t, map[string]string{
"tag1": "$$$",
}, metrics[0].Tags())
require.Equal(t, map[string]interface{}{
"field1": float64(1000),
}, metrics[0].Fields())
}
func TestParseInvalidFormDataError(t *testing.T) {
parser := Parser{
MetricName: "formdata_test",
}
metrics, err := parser.Parse([]byte(notEscapedProperlyFormData))
require.Error(t, err)
require.Len(t, metrics, 0)
}
func TestParseInvalidFormDataEmptyKey(t *testing.T) {
parser := Parser{
MetricName: "formdata_test",
}
// Empty key for field
metrics, err := parser.Parse([]byte(blankKeyFormData))
require.NoError(t, err)
require.Len(t, metrics, 1)
require.Equal(t, map[string]string{}, metrics[0].Tags())
require.Equal(t, map[string]interface{}{
"field2": float64(69),
}, metrics[0].Fields())
// Empty key for tag
parser.TagKeys = []string{""}
metrics, err = parser.Parse([]byte(blankKeyFormData))
require.NoError(t, err)
require.Len(t, metrics, 1)
require.Equal(t, map[string]string{}, metrics[0].Tags())
require.Equal(t, map[string]interface{}{
"field2": float64(69),
}, metrics[0].Fields())
}
func TestParseInvalidFormDataEmptyString(t *testing.T) {
parser := Parser{
MetricName: "formdata_test",
}
metrics, err := parser.Parse([]byte(emptyFormData))
require.NoError(t, err)
require.Len(t, metrics, 0)
}

View File

@ -8,6 +8,7 @@ import (
"github.com/influxdata/telegraf/plugins/parsers/collectd"
"github.com/influxdata/telegraf/plugins/parsers/csv"
"github.com/influxdata/telegraf/plugins/parsers/dropwizard"
"github.com/influxdata/telegraf/plugins/parsers/formdata"
"github.com/influxdata/telegraf/plugins/parsers/graphite"
"github.com/influxdata/telegraf/plugins/parsers/grok"
"github.com/influxdata/telegraf/plugins/parsers/influx"
@ -141,6 +142,9 @@ type Config struct {
CSVTimestampColumn string `toml:"csv_timestamp_column"`
CSVTimestampFormat string `toml:"csv_timestamp_format"`
CSVTrimSpace bool `toml:"csv_trim_space"`
// FormData configuration
FormDataTagKeys []string `toml:"form_data_tag_keys"`
}
// NewParser returns a Parser interface based on the given config.
@ -209,6 +213,12 @@ func NewParser(config *Config) (Parser, error) {
config.DefaultTags)
case "logfmt":
parser, err = NewLogFmtParser(config.MetricName, config.DefaultTags)
case "formdata":
parser, err = NewFormDataParser(
config.MetricName,
config.DefaultTags,
config.FormDataTagKeys,
)
default:
err = fmt.Errorf("Invalid data format: %s", config.DataFormat)
}
@ -400,3 +410,15 @@ func NewLogFmtParser(metricName string, defaultTags map[string]string) (Parser,
func NewWavefrontParser(defaultTags map[string]string) (Parser, error) {
return wavefront.NewWavefrontParser(defaultTags), nil
}
func NewFormDataParser(
metricName string,
defaultTags map[string]string,
tagKeys []string,
) (Parser, error) {
return &formdata.Parser{
MetricName: metricName,
DefaultTags: defaultTags,
TagKeys: tagKeys,
}, nil
}