Add csv parser (#4439)
This commit is contained in:
parent
80346b2e93
commit
889745a112
|
@ -12,6 +12,7 @@ Telegraf is able to parse the following input data formats into metrics:
|
||||||
1. [Grok](#grok)
|
1. [Grok](#grok)
|
||||||
1. [Logfmt](#logfmt)
|
1. [Logfmt](#logfmt)
|
||||||
1. [Wavefront](#wavefront)
|
1. [Wavefront](#wavefront)
|
||||||
|
1. [CSV](#csv)
|
||||||
|
|
||||||
Telegraf metrics, like InfluxDB
|
Telegraf metrics, like InfluxDB
|
||||||
[points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/),
|
[points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/),
|
||||||
|
@ -107,28 +108,28 @@ but can be overridden using the `name_override` config option.
|
||||||
|
|
||||||
#### JSON Configuration:
|
#### JSON Configuration:
|
||||||
|
|
||||||
The JSON data format supports specifying "tag_keys", "string_keys", and "json_query".
|
The JSON data format supports specifying "tag_keys", "string_keys", and "json_query".
|
||||||
If specified, keys in "tag_keys" and "string_keys" will be searched for in the root-level
|
If specified, keys in "tag_keys" and "string_keys" will be searched for in the root-level
|
||||||
and any nested lists of the JSON blob. All int and float values are added to fields by default.
|
and any nested lists of the JSON blob. All int and float values are added to fields by default.
|
||||||
If the key(s) exist, they will be applied as tags or fields to the Telegraf metrics.
|
If the key(s) exist, they will be applied as tags or fields to the Telegraf metrics.
|
||||||
If "string_keys" is specified, the string will be added as a field.
|
If "string_keys" is specified, the string will be added as a field.
|
||||||
|
|
||||||
The "json_query" configuration is a gjson path to an JSON object or
|
The "json_query" configuration is a gjson path to an JSON object or
|
||||||
list of JSON objects. If this path leads to an array of values or
|
list of JSON objects. If this path leads to an array of values or
|
||||||
single data point an error will be thrown. If this configuration
|
single data point an error will be thrown. If this configuration
|
||||||
is specified, only the result of the query will be parsed and returned as metrics.
|
is specified, only the result of the query will be parsed and returned as metrics.
|
||||||
|
|
||||||
The "json_name_key" configuration specifies the key of the field whos value will be
|
The "json_name_key" configuration specifies the key of the field whos value will be
|
||||||
added as the metric name.
|
added as the metric name.
|
||||||
|
|
||||||
Object paths are specified using gjson path format, which is denoted by object keys
|
Object paths are specified using gjson path format, which is denoted by object keys
|
||||||
concatenated with "." to go deeper in nested JSON objects.
|
concatenated with "." to go deeper in nested JSON objects.
|
||||||
Additional information on gjson paths can be found here: https://github.com/tidwall/gjson#path-syntax
|
Additional information on gjson paths can be found here: https://github.com/tidwall/gjson#path-syntax
|
||||||
|
|
||||||
The JSON data format also supports extracting time values through the
|
The JSON data format also supports extracting time values through the
|
||||||
config "json_time_key" and "json_time_format". If "json_time_key" is set,
|
config "json_time_key" and "json_time_format". If "json_time_key" is set,
|
||||||
"json_time_format" must be specified. The "json_time_key" describes the
|
"json_time_format" must be specified. The "json_time_key" describes the
|
||||||
name of the field containing time information. The "json_time_format"
|
name of the field containing time information. The "json_time_format"
|
||||||
must be a recognized Go time format.
|
must be a recognized Go time format.
|
||||||
If there is no year provided, the metrics will have the current year.
|
If there is no year provided, the metrics will have the current year.
|
||||||
More info on time formats can be found here: https://golang.org/pkg/time/#Parse
|
More info on time formats can be found here: https://golang.org/pkg/time/#Parse
|
||||||
|
@ -161,8 +162,8 @@ For example, if you had this configuration:
|
||||||
## List of field names to extract from JSON and add as string fields
|
## List of field names to extract from JSON and add as string fields
|
||||||
# json_string_fields = []
|
# json_string_fields = []
|
||||||
|
|
||||||
## gjson query path to specify a specific chunk of JSON to be parsed with
|
## gjson query path to specify a specific chunk of JSON to be parsed with
|
||||||
## the above configuration. If not specified, the whole file will be parsed.
|
## the above configuration. If not specified, the whole file will be parsed.
|
||||||
## gjson query paths are described here: https://github.com/tidwall/gjson#path-syntax
|
## gjson query paths are described here: https://github.com/tidwall/gjson#path-syntax
|
||||||
# json_query = ""
|
# json_query = ""
|
||||||
|
|
||||||
|
@ -191,8 +192,8 @@ Your Telegraf metrics would get tagged with "my_tag_1"
|
||||||
exec_mycollector,my_tag_1=foo a=5,b_c=6
|
exec_mycollector,my_tag_1=foo a=5,b_c=6
|
||||||
```
|
```
|
||||||
|
|
||||||
If the JSON data is an array, then each element of the array is
|
If the JSON data is an array, then each element of the array is
|
||||||
parsed with the configured settings. Each resulting metric will
|
parsed with the configured settings. Each resulting metric will
|
||||||
be output with the same timestamp.
|
be output with the same timestamp.
|
||||||
|
|
||||||
For example, if the following configuration:
|
For example, if the following configuration:
|
||||||
|
@ -220,7 +221,7 @@ For example, if the following configuration:
|
||||||
## List of field names to extract from JSON and add as string fields
|
## List of field names to extract from JSON and add as string fields
|
||||||
# string_fields = []
|
# string_fields = []
|
||||||
|
|
||||||
## gjson query path to specify a specific chunk of JSON to be parsed with
|
## gjson query path to specify a specific chunk of JSON to be parsed with
|
||||||
## the above configuration. If not specified, the whole file will be parsed
|
## the above configuration. If not specified, the whole file will be parsed
|
||||||
# json_query = ""
|
# json_query = ""
|
||||||
|
|
||||||
|
@ -264,7 +265,7 @@ exec_mycollector,my_tag_1=foo,my_tag_2=baz b_c=6 1136387040000000000
|
||||||
exec_mycollector,my_tag_1=bar,my_tag_2=baz b_c=8 1168527840000000000
|
exec_mycollector,my_tag_1=bar,my_tag_2=baz b_c=8 1168527840000000000
|
||||||
```
|
```
|
||||||
|
|
||||||
If you want to only use a specific portion of your JSON, use the "json_query"
|
If you want to only use a specific portion of your JSON, use the "json_query"
|
||||||
configuration to specify a path to a JSON object.
|
configuration to specify a path to a JSON object.
|
||||||
|
|
||||||
For example, with the following config:
|
For example, with the following config:
|
||||||
|
@ -288,7 +289,7 @@ For example, with the following config:
|
||||||
## List of field names to extract from JSON and add as string fields
|
## List of field names to extract from JSON and add as string fields
|
||||||
string_fields = ["last"]
|
string_fields = ["last"]
|
||||||
|
|
||||||
## gjson query path to specify a specific chunk of JSON to be parsed with
|
## gjson query path to specify a specific chunk of JSON to be parsed with
|
||||||
## the above configuration. If not specified, the whole file will be parsed
|
## the above configuration. If not specified, the whole file will be parsed
|
||||||
json_query = "obj.friends"
|
json_query = "obj.friends"
|
||||||
|
|
||||||
|
@ -1038,3 +1039,84 @@ There are no additional configuration options for Wavefront Data Format line-pro
|
||||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||||
data_format = "wavefront"
|
data_format = "wavefront"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# CSV
|
||||||
|
Parse out metrics from a CSV formatted table. By default, the parser assumes there is no header and
|
||||||
|
will read data from the first line. If `csv_header_row_count` is set to anything besides 0, the parser
|
||||||
|
will extract column names from the first number of rows. Headers of more than 1 row will have their
|
||||||
|
names concatenated together. Any unnamed columns will be ignored by the parser.
|
||||||
|
|
||||||
|
The `csv_skip_rows` config indicates the number of rows to skip before looking for header information or data
|
||||||
|
to parse. By default, no rows will be skipped.
|
||||||
|
|
||||||
|
The `csv_skip_columns` config indicates the number of columns to be skipped before parsing data. These
|
||||||
|
columns will not be read out of the header. Naming with the `csv_column_names` will begin at the first
|
||||||
|
parsed column after skipping the indicated columns. By default, no columns are skipped.
|
||||||
|
|
||||||
|
To assign custom column names, the `csv_column_names` config is available. If the `csv_column_names`
|
||||||
|
config is used, all columns must be named as additional columns will be ignored. If `csv_header_row_count`
|
||||||
|
is set to 0, `csv_column_names` must be specified. Names listed in `csv_column_names` will override names extracted
|
||||||
|
from the header.
|
||||||
|
|
||||||
|
The `csv_tag_columns` and `csv_field_columns` configs are available to add the column data to the metric.
|
||||||
|
The name used to specify the column is the name in the header, or if specified, the corresponding
|
||||||
|
name assigned in `csv_column_names`. If neither config is specified, no data will be added to the metric.
|
||||||
|
|
||||||
|
Additional configs are available to dynamically name metrics and set custom timestamps. If the
|
||||||
|
`csv_column_names` config is specified, the parser will assign the metric name to the value found
|
||||||
|
in that column. If the `csv_timestamp_column` is specified, the parser will extract the timestamp from
|
||||||
|
that column. If `csv_timestamp_column` is specified, the `csv_timestamp_format` must also be specified
|
||||||
|
or an error will be thrown.
|
||||||
|
|
||||||
|
#### CSV Configuration
|
||||||
|
```toml
|
||||||
|
data_format = "csv"
|
||||||
|
|
||||||
|
## Indicates how many rows to treat as a header. By default, the parser assumes
|
||||||
|
## there is no header and will parse the first row as data. If set to anything more
|
||||||
|
## than 1, column names will be concatenated with the name listed in the next header row.
|
||||||
|
## If `csv_column_names` is specified, the column names in header will be overridden.
|
||||||
|
# csv_header_row_count = 0
|
||||||
|
|
||||||
|
## Indicates the number of rows to skip before looking for header information.
|
||||||
|
# csv_skip_rows = 0
|
||||||
|
|
||||||
|
## Indicates the number of columns to skip before looking for data to parse.
|
||||||
|
## These columns will be skipped in the header as well.
|
||||||
|
# csv_skip_columns = 0
|
||||||
|
|
||||||
|
## The seperator between csv fields
|
||||||
|
## By default, the parser assumes a comma (",")
|
||||||
|
# csv_delimiter = ","
|
||||||
|
|
||||||
|
## The character reserved for marking a row as a comment row
|
||||||
|
## Commented rows are skipped and not parsed
|
||||||
|
# csv_comment = ""
|
||||||
|
|
||||||
|
## If set to true, the parser will remove leading whitespace from fields
|
||||||
|
## By default, this is false
|
||||||
|
# csv_trim_space = false
|
||||||
|
|
||||||
|
## For assigning custom names to columns
|
||||||
|
## If this is specified, all columns should have a name
|
||||||
|
## Unnamed columns will be ignored by the parser.
|
||||||
|
## If `csv_header_row_count` is set to 0, this config must be used
|
||||||
|
csv_column_names = []
|
||||||
|
|
||||||
|
## Columns listed here will be added as tags. Any other columns
|
||||||
|
## will be added as fields.
|
||||||
|
csv_tag_columns = []
|
||||||
|
|
||||||
|
## The column to extract the name of the metric from
|
||||||
|
## By default, this is the name of the plugin
|
||||||
|
## the `name_override` config overrides this
|
||||||
|
# csv_measurement_column = ""
|
||||||
|
|
||||||
|
## The column to extract time information for the metric
|
||||||
|
## `csv_timestamp_format` must be specified if this is used
|
||||||
|
# csv_timestamp_column = ""
|
||||||
|
|
||||||
|
## The format of time data extracted from `csv_timestamp_column`
|
||||||
|
## this must be specified if `csv_timestamp_column` is specified
|
||||||
|
# csv_timestamp_format = ""
|
||||||
|
```
|
||||||
|
|
|
@ -1443,6 +1443,120 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//for csv parser
|
||||||
|
if node, ok := tbl.Fields["csv_column_names"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if ary, ok := kv.Value.(*ast.Array); ok {
|
||||||
|
for _, elem := range ary.Value {
|
||||||
|
if str, ok := elem.(*ast.String); ok {
|
||||||
|
c.CSVColumnNames = append(c.CSVColumnNames, str.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["csv_tag_columns"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if ary, ok := kv.Value.(*ast.Array); ok {
|
||||||
|
for _, elem := range ary.Value {
|
||||||
|
if str, ok := elem.(*ast.String); ok {
|
||||||
|
c.CSVTagColumns = append(c.CSVTagColumns, str.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["csv_delimiter"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.String); ok {
|
||||||
|
c.CSVDelimiter = str.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["csv_comment"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.String); ok {
|
||||||
|
c.CSVComment = str.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["csv_measurement_column"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.String); ok {
|
||||||
|
c.CSVMeasurementColumn = str.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["csv_timestamp_column"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.String); ok {
|
||||||
|
c.CSVTimestampColumn = str.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["csv_timestamp_format"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.String); ok {
|
||||||
|
c.CSVTimestampFormat = str.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["csv_header_row_count"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.String); ok {
|
||||||
|
iVal, err := strconv.Atoi(str.Value)
|
||||||
|
c.CSVHeaderRowCount = iVal
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("E! parsing to int: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["csv_skip_rows"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.String); ok {
|
||||||
|
iVal, err := strconv.Atoi(str.Value)
|
||||||
|
c.CSVSkipRows = iVal
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("E! parsing to int: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["csv_skip_columns"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.String); ok {
|
||||||
|
iVal, err := strconv.Atoi(str.Value)
|
||||||
|
c.CSVSkipColumns = iVal
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("E! parsing to int: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node, ok := tbl.Fields["csv_trim_space"]; ok {
|
||||||
|
if kv, ok := node.(*ast.KeyValue); ok {
|
||||||
|
if str, ok := kv.Value.(*ast.Boolean); ok {
|
||||||
|
//for config with no quotes
|
||||||
|
val, err := strconv.ParseBool(str.Value)
|
||||||
|
c.CSVTrimSpace = val
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("E! parsing to bool: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
c.MetricName = name
|
c.MetricName = name
|
||||||
|
|
||||||
delete(tbl.Fields, "data_format")
|
delete(tbl.Fields, "data_format")
|
||||||
|
@ -1469,6 +1583,14 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) {
|
||||||
delete(tbl.Fields, "grok_custom_patterns")
|
delete(tbl.Fields, "grok_custom_patterns")
|
||||||
delete(tbl.Fields, "grok_custom_pattern_files")
|
delete(tbl.Fields, "grok_custom_pattern_files")
|
||||||
delete(tbl.Fields, "grok_timezone")
|
delete(tbl.Fields, "grok_timezone")
|
||||||
|
delete(tbl.Fields, "csv_data_columns")
|
||||||
|
delete(tbl.Fields, "csv_tag_columns")
|
||||||
|
delete(tbl.Fields, "csv_field_columns")
|
||||||
|
delete(tbl.Fields, "csv_name_column")
|
||||||
|
delete(tbl.Fields, "csv_timestamp_column")
|
||||||
|
delete(tbl.Fields, "csv_timestamp_format")
|
||||||
|
delete(tbl.Fields, "csv_delimiter")
|
||||||
|
delete(tbl.Fields, "csv_header")
|
||||||
|
|
||||||
return parsers.NewParser(c)
|
return parsers.NewParser(c)
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,196 @@
|
||||||
|
package csv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/metric"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Parser struct {
|
||||||
|
MetricName string
|
||||||
|
HeaderRowCount int
|
||||||
|
SkipRows int
|
||||||
|
SkipColumns int
|
||||||
|
Delimiter string
|
||||||
|
Comment string
|
||||||
|
TrimSpace bool
|
||||||
|
ColumnNames []string
|
||||||
|
TagColumns []string
|
||||||
|
MeasurementColumn string
|
||||||
|
TimestampColumn string
|
||||||
|
TimestampFormat string
|
||||||
|
DefaultTags map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Parser) compile(r *bytes.Reader) (*csv.Reader, error) {
|
||||||
|
csvReader := csv.NewReader(r)
|
||||||
|
// ensures that the reader reads records of different lengths without an error
|
||||||
|
csvReader.FieldsPerRecord = -1
|
||||||
|
if p.Delimiter != "" {
|
||||||
|
csvReader.Comma = []rune(p.Delimiter)[0]
|
||||||
|
}
|
||||||
|
if p.Comment != "" {
|
||||||
|
csvReader.Comment = []rune(p.Comment)[0]
|
||||||
|
}
|
||||||
|
return csvReader, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
|
||||||
|
r := bytes.NewReader(buf)
|
||||||
|
csvReader, err := p.compile(r)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// skip first rows
|
||||||
|
for i := 0; i < p.SkipRows; i++ {
|
||||||
|
csvReader.Read()
|
||||||
|
}
|
||||||
|
// if there is a header and nothing in DataColumns
|
||||||
|
// set DataColumns to names extracted from the header
|
||||||
|
headerNames := make([]string, 0)
|
||||||
|
if len(p.ColumnNames) == 0 {
|
||||||
|
for i := 0; i < p.HeaderRowCount; i++ {
|
||||||
|
header, err := csvReader.Read()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
//concatenate header names
|
||||||
|
for i := range header {
|
||||||
|
name := header[i]
|
||||||
|
if p.TrimSpace {
|
||||||
|
name = strings.Trim(name, " ")
|
||||||
|
}
|
||||||
|
if len(headerNames) <= i {
|
||||||
|
headerNames = append(headerNames, name)
|
||||||
|
} else {
|
||||||
|
headerNames[i] = headerNames[i] + name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p.ColumnNames = headerNames[p.SkipColumns:]
|
||||||
|
} else {
|
||||||
|
// if columns are named, just skip header rows
|
||||||
|
for i := 0; i < p.HeaderRowCount; i++ {
|
||||||
|
csvReader.Read()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
table, err := csvReader.ReadAll()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics := make([]telegraf.Metric, 0)
|
||||||
|
for _, record := range table {
|
||||||
|
m, err := p.parseRecord(record)
|
||||||
|
if err != nil {
|
||||||
|
return metrics, err
|
||||||
|
}
|
||||||
|
metrics = append(metrics, m)
|
||||||
|
}
|
||||||
|
return metrics, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseLine does not use any information in header and assumes DataColumns is set
|
||||||
|
// it will also not skip any rows
|
||||||
|
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
||||||
|
r := bytes.NewReader([]byte(line))
|
||||||
|
csvReader, err := p.compile(r)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// if there is nothing in DataColumns, ParseLine will fail
|
||||||
|
if len(p.ColumnNames) == 0 {
|
||||||
|
return nil, fmt.Errorf("[parsers.csv] data columns must be specified")
|
||||||
|
}
|
||||||
|
|
||||||
|
record, err := csvReader.Read()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
m, err := p.parseRecord(record)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Parser) parseRecord(record []string) (telegraf.Metric, error) {
|
||||||
|
recordFields := make(map[string]interface{})
|
||||||
|
tags := make(map[string]string)
|
||||||
|
|
||||||
|
// skip columns in record
|
||||||
|
record = record[p.SkipColumns:]
|
||||||
|
outer:
|
||||||
|
for i, fieldName := range p.ColumnNames {
|
||||||
|
if i < len(record) {
|
||||||
|
value := record[i]
|
||||||
|
if p.TrimSpace {
|
||||||
|
value = strings.Trim(value, " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tagName := range p.TagColumns {
|
||||||
|
if tagName == fieldName {
|
||||||
|
tags[tagName] = value
|
||||||
|
continue outer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// attempt type conversions
|
||||||
|
if iValue, err := strconv.ParseInt(value, 10, 64); err == nil {
|
||||||
|
recordFields[fieldName] = iValue
|
||||||
|
} else if fValue, err := strconv.ParseFloat(value, 64); err == nil {
|
||||||
|
recordFields[fieldName] = fValue
|
||||||
|
} else if bValue, err := strconv.ParseBool(value); err == nil {
|
||||||
|
recordFields[fieldName] = bValue
|
||||||
|
} else {
|
||||||
|
recordFields[fieldName] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// add default tags
|
||||||
|
for k, v := range p.DefaultTags {
|
||||||
|
tags[k] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
// will default to plugin name
|
||||||
|
measurementName := p.MetricName
|
||||||
|
if recordFields[p.MeasurementColumn] != nil {
|
||||||
|
measurementName = fmt.Sprintf("%v", recordFields[p.MeasurementColumn])
|
||||||
|
}
|
||||||
|
|
||||||
|
metricTime := time.Now()
|
||||||
|
if p.TimestampColumn != "" {
|
||||||
|
if recordFields[p.TimestampColumn] == nil {
|
||||||
|
return nil, fmt.Errorf("timestamp column: %v could not be found", p.TimestampColumn)
|
||||||
|
}
|
||||||
|
tStr := fmt.Sprintf("%v", recordFields[p.TimestampColumn])
|
||||||
|
if p.TimestampFormat == "" {
|
||||||
|
return nil, fmt.Errorf("timestamp format must be specified")
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
metricTime, err = time.Parse(p.TimestampFormat, tStr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m, err := metric.New(measurementName, tags, recordFields, metricTime)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Parser) SetDefaultTags(tags map[string]string) {
|
||||||
|
p.DefaultTags = tags
|
||||||
|
}
|
|
@ -0,0 +1,231 @@
|
||||||
|
package csv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf/metric"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBasicCSV(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
ColumnNames: []string{"first", "second", "third"},
|
||||||
|
TagColumns: []string{"third"},
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := p.ParseLine("1.4,true,hi")
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHeaderConcatenationCSV(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
HeaderRowCount: 2,
|
||||||
|
MeasurementColumn: "3",
|
||||||
|
}
|
||||||
|
testCSV := `first,second
|
||||||
|
1,2,3
|
||||||
|
3.4,70,test_name`
|
||||||
|
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, "test_name", metrics[0].Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHeaderOverride(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
HeaderRowCount: 1,
|
||||||
|
ColumnNames: []string{"first", "second", "third"},
|
||||||
|
MeasurementColumn: "third",
|
||||||
|
}
|
||||||
|
testCSV := `line1,line2,line3
|
||||||
|
3.4,70,test_name`
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, "test_name", metrics[0].Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTimestamp(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
HeaderRowCount: 1,
|
||||||
|
ColumnNames: []string{"first", "second", "third"},
|
||||||
|
MeasurementColumn: "third",
|
||||||
|
TimestampColumn: "first",
|
||||||
|
TimestampFormat: "02/01/06 03:04:05 PM",
|
||||||
|
}
|
||||||
|
testCSV := `line1,line2,line3
|
||||||
|
23/05/09 04:05:06 PM,70,test_name
|
||||||
|
07/11/09 04:05:06 PM,80,test_name2`
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, metrics[0].Time().UnixNano(), int64(1243094706000000000))
|
||||||
|
require.Equal(t, metrics[1].Time().UnixNano(), int64(1257609906000000000))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTimestampError(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
HeaderRowCount: 1,
|
||||||
|
ColumnNames: []string{"first", "second", "third"},
|
||||||
|
MeasurementColumn: "third",
|
||||||
|
TimestampColumn: "first",
|
||||||
|
}
|
||||||
|
testCSV := `line1,line2,line3
|
||||||
|
23/05/09 04:05:06 PM,70,test_name
|
||||||
|
07/11/09 04:05:06 PM,80,test_name2`
|
||||||
|
_, err := p.Parse([]byte(testCSV))
|
||||||
|
require.Equal(t, fmt.Errorf("timestamp format must be specified"), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQuotedCharacter(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
HeaderRowCount: 1,
|
||||||
|
ColumnNames: []string{"first", "second", "third"},
|
||||||
|
MeasurementColumn: "third",
|
||||||
|
}
|
||||||
|
|
||||||
|
testCSV := `line1,line2,line3
|
||||||
|
"3,4",70,test_name`
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, "3,4", metrics[0].Fields()["first"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDelimiter(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
HeaderRowCount: 1,
|
||||||
|
Delimiter: "%",
|
||||||
|
ColumnNames: []string{"first", "second", "third"},
|
||||||
|
MeasurementColumn: "third",
|
||||||
|
}
|
||||||
|
|
||||||
|
testCSV := `line1%line2%line3
|
||||||
|
3,4%70%test_name`
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, "3,4", metrics[0].Fields()["first"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValueConversion(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
HeaderRowCount: 0,
|
||||||
|
Delimiter: ",",
|
||||||
|
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||||
|
MetricName: "test_value",
|
||||||
|
}
|
||||||
|
testCSV := `3.3,4,true,hello`
|
||||||
|
|
||||||
|
expectedTags := make(map[string]string)
|
||||||
|
expectedFields := map[string]interface{}{
|
||||||
|
"first": 3.3,
|
||||||
|
"second": 4,
|
||||||
|
"third": true,
|
||||||
|
"fourth": "hello",
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
expectedMetric, err1 := metric.New("test_value", expectedTags, expectedFields, time.Unix(0, 0))
|
||||||
|
returnedMetric, err2 := metric.New(metrics[0].Name(), metrics[0].Tags(), metrics[0].Fields(), time.Unix(0, 0))
|
||||||
|
require.NoError(t, err1)
|
||||||
|
require.NoError(t, err2)
|
||||||
|
|
||||||
|
//deep equal fields
|
||||||
|
require.Equal(t, expectedMetric.Fields(), returnedMetric.Fields())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSkipComment(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
HeaderRowCount: 0,
|
||||||
|
Comment: "#",
|
||||||
|
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||||
|
MetricName: "test_value",
|
||||||
|
}
|
||||||
|
testCSV := `#3.3,4,true,hello
|
||||||
|
4,9.9,true,name_this`
|
||||||
|
|
||||||
|
expectedFields := map[string]interface{}{
|
||||||
|
"first": int64(4),
|
||||||
|
"second": 9.9,
|
||||||
|
"third": true,
|
||||||
|
"fourth": "name_this",
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, expectedFields, metrics[0].Fields())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTrimSpace(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
HeaderRowCount: 0,
|
||||||
|
TrimSpace: true,
|
||||||
|
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||||
|
MetricName: "test_value",
|
||||||
|
}
|
||||||
|
testCSV := ` 3.3, 4, true,hello`
|
||||||
|
|
||||||
|
expectedFields := map[string]interface{}{
|
||||||
|
"first": 3.3,
|
||||||
|
"second": int64(4),
|
||||||
|
"third": true,
|
||||||
|
"fourth": "hello",
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, expectedFields, metrics[0].Fields())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSkipRows(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
HeaderRowCount: 1,
|
||||||
|
SkipRows: 1,
|
||||||
|
TagColumns: []string{"line1"},
|
||||||
|
MeasurementColumn: "line3",
|
||||||
|
}
|
||||||
|
testCSV := `garbage nonsense
|
||||||
|
line1,line2,line3
|
||||||
|
hello,80,test_name2`
|
||||||
|
|
||||||
|
expectedFields := map[string]interface{}{
|
||||||
|
"line2": int64(80),
|
||||||
|
"line3": "test_name2",
|
||||||
|
}
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, expectedFields, metrics[0].Fields())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSkipColumns(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
SkipColumns: 1,
|
||||||
|
ColumnNames: []string{"line1", "line2"},
|
||||||
|
}
|
||||||
|
testCSV := `hello,80,test_name`
|
||||||
|
|
||||||
|
expectedFields := map[string]interface{}{
|
||||||
|
"line1": int64(80),
|
||||||
|
"line2": "test_name",
|
||||||
|
}
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, expectedFields, metrics[0].Fields())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSkipColumnsWithHeader(t *testing.T) {
|
||||||
|
p := Parser{
|
||||||
|
SkipColumns: 1,
|
||||||
|
HeaderRowCount: 2,
|
||||||
|
}
|
||||||
|
testCSV := `col,col,col
|
||||||
|
1,2,3
|
||||||
|
trash,80,test_name`
|
||||||
|
|
||||||
|
// we should expect an error if we try to get col1
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, map[string]interface{}{"col2": int64(80), "col3": "test_name"}, metrics[0].Fields())
|
||||||
|
}
|
|
@ -6,6 +6,7 @@ import (
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf/plugins/parsers/collectd"
|
"github.com/influxdata/telegraf/plugins/parsers/collectd"
|
||||||
|
"github.com/influxdata/telegraf/plugins/parsers/csv"
|
||||||
"github.com/influxdata/telegraf/plugins/parsers/dropwizard"
|
"github.com/influxdata/telegraf/plugins/parsers/dropwizard"
|
||||||
"github.com/influxdata/telegraf/plugins/parsers/graphite"
|
"github.com/influxdata/telegraf/plugins/parsers/graphite"
|
||||||
"github.com/influxdata/telegraf/plugins/parsers/grok"
|
"github.com/influxdata/telegraf/plugins/parsers/grok"
|
||||||
|
@ -113,6 +114,19 @@ type Config struct {
|
||||||
GrokCustomPatterns string
|
GrokCustomPatterns string
|
||||||
GrokCustomPatternFiles []string
|
GrokCustomPatternFiles []string
|
||||||
GrokTimeZone string
|
GrokTimeZone string
|
||||||
|
|
||||||
|
//csv configuration
|
||||||
|
CSVDelimiter string
|
||||||
|
CSVComment string
|
||||||
|
CSVTrimSpace bool
|
||||||
|
CSVColumnNames []string
|
||||||
|
CSVTagColumns []string
|
||||||
|
CSVMeasurementColumn string
|
||||||
|
CSVTimestampColumn string
|
||||||
|
CSVTimestampFormat string
|
||||||
|
CSVHeaderRowCount int
|
||||||
|
CSVSkipRows int
|
||||||
|
CSVSkipColumns int
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewParser returns a Parser interface based on the given config.
|
// NewParser returns a Parser interface based on the given config.
|
||||||
|
@ -162,6 +176,20 @@ func NewParser(config *Config) (Parser, error) {
|
||||||
config.GrokCustomPatterns,
|
config.GrokCustomPatterns,
|
||||||
config.GrokCustomPatternFiles,
|
config.GrokCustomPatternFiles,
|
||||||
config.GrokTimeZone)
|
config.GrokTimeZone)
|
||||||
|
case "csv":
|
||||||
|
parser, err = newCSVParser(config.MetricName,
|
||||||
|
config.CSVHeaderRowCount,
|
||||||
|
config.CSVSkipRows,
|
||||||
|
config.CSVSkipColumns,
|
||||||
|
config.CSVDelimiter,
|
||||||
|
config.CSVComment,
|
||||||
|
config.CSVTrimSpace,
|
||||||
|
config.CSVColumnNames,
|
||||||
|
config.CSVTagColumns,
|
||||||
|
config.CSVMeasurementColumn,
|
||||||
|
config.CSVTimestampColumn,
|
||||||
|
config.CSVTimestampFormat,
|
||||||
|
config.DefaultTags)
|
||||||
case "logfmt":
|
case "logfmt":
|
||||||
parser, err = NewLogFmtParser(config.MetricName, config.DefaultTags)
|
parser, err = NewLogFmtParser(config.MetricName, config.DefaultTags)
|
||||||
default:
|
default:
|
||||||
|
@ -170,6 +198,60 @@ func NewParser(config *Config) (Parser, error) {
|
||||||
return parser, err
|
return parser, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newCSVParser(metricName string,
|
||||||
|
header int,
|
||||||
|
skipRows int,
|
||||||
|
skipColumns int,
|
||||||
|
delimiter string,
|
||||||
|
comment string,
|
||||||
|
trimSpace bool,
|
||||||
|
dataColumns []string,
|
||||||
|
tagColumns []string,
|
||||||
|
nameColumn string,
|
||||||
|
timestampColumn string,
|
||||||
|
timestampFormat string,
|
||||||
|
defaultTags map[string]string) (Parser, error) {
|
||||||
|
|
||||||
|
if header == 0 && len(dataColumns) == 0 {
|
||||||
|
// if there is no header and no DataColumns, that's an error
|
||||||
|
return nil, fmt.Errorf("there must be a header if `csv_data_columns` is not specified")
|
||||||
|
}
|
||||||
|
|
||||||
|
if delimiter != "" {
|
||||||
|
runeStr := []rune(delimiter)
|
||||||
|
if len(runeStr) > 1 {
|
||||||
|
return nil, fmt.Errorf("delimiter must be a single character, got: %s", delimiter)
|
||||||
|
}
|
||||||
|
delimiter = fmt.Sprintf("%v", runeStr[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
if comment != "" {
|
||||||
|
runeStr := []rune(comment)
|
||||||
|
if len(runeStr) > 1 {
|
||||||
|
return nil, fmt.Errorf("delimiter must be a single character, got: %s", comment)
|
||||||
|
}
|
||||||
|
comment = fmt.Sprintf("%v", runeStr[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
parser := &csv.Parser{
|
||||||
|
MetricName: metricName,
|
||||||
|
HeaderRowCount: header,
|
||||||
|
SkipRows: skipRows,
|
||||||
|
SkipColumns: skipColumns,
|
||||||
|
Delimiter: delimiter,
|
||||||
|
Comment: comment,
|
||||||
|
TrimSpace: trimSpace,
|
||||||
|
ColumnNames: dataColumns,
|
||||||
|
TagColumns: tagColumns,
|
||||||
|
MeasurementColumn: nameColumn,
|
||||||
|
TimestampColumn: timestampColumn,
|
||||||
|
TimestampFormat: timestampFormat,
|
||||||
|
DefaultTags: defaultTags,
|
||||||
|
}
|
||||||
|
|
||||||
|
return parser, nil
|
||||||
|
}
|
||||||
|
|
||||||
func newJSONParser(
|
func newJSONParser(
|
||||||
metricName string,
|
metricName string,
|
||||||
tagKeys []string,
|
tagKeys []string,
|
||||||
|
|
Loading…
Reference in New Issue