Add csv parser (#4439)
This commit is contained in:
196
plugins/parsers/csv/parser.go
Normal file
196
plugins/parsers/csv/parser.go
Normal file
@@ -0,0 +1,196 @@
|
||||
package csv
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
MetricName string
|
||||
HeaderRowCount int
|
||||
SkipRows int
|
||||
SkipColumns int
|
||||
Delimiter string
|
||||
Comment string
|
||||
TrimSpace bool
|
||||
ColumnNames []string
|
||||
TagColumns []string
|
||||
MeasurementColumn string
|
||||
TimestampColumn string
|
||||
TimestampFormat string
|
||||
DefaultTags map[string]string
|
||||
}
|
||||
|
||||
func (p *Parser) compile(r *bytes.Reader) (*csv.Reader, error) {
|
||||
csvReader := csv.NewReader(r)
|
||||
// ensures that the reader reads records of different lengths without an error
|
||||
csvReader.FieldsPerRecord = -1
|
||||
if p.Delimiter != "" {
|
||||
csvReader.Comma = []rune(p.Delimiter)[0]
|
||||
}
|
||||
if p.Comment != "" {
|
||||
csvReader.Comment = []rune(p.Comment)[0]
|
||||
}
|
||||
return csvReader, nil
|
||||
}
|
||||
|
||||
func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
|
||||
r := bytes.NewReader(buf)
|
||||
csvReader, err := p.compile(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// skip first rows
|
||||
for i := 0; i < p.SkipRows; i++ {
|
||||
csvReader.Read()
|
||||
}
|
||||
// if there is a header and nothing in DataColumns
|
||||
// set DataColumns to names extracted from the header
|
||||
headerNames := make([]string, 0)
|
||||
if len(p.ColumnNames) == 0 {
|
||||
for i := 0; i < p.HeaderRowCount; i++ {
|
||||
header, err := csvReader.Read()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
//concatenate header names
|
||||
for i := range header {
|
||||
name := header[i]
|
||||
if p.TrimSpace {
|
||||
name = strings.Trim(name, " ")
|
||||
}
|
||||
if len(headerNames) <= i {
|
||||
headerNames = append(headerNames, name)
|
||||
} else {
|
||||
headerNames[i] = headerNames[i] + name
|
||||
}
|
||||
}
|
||||
}
|
||||
p.ColumnNames = headerNames[p.SkipColumns:]
|
||||
} else {
|
||||
// if columns are named, just skip header rows
|
||||
for i := 0; i < p.HeaderRowCount; i++ {
|
||||
csvReader.Read()
|
||||
}
|
||||
}
|
||||
|
||||
table, err := csvReader.ReadAll()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metrics := make([]telegraf.Metric, 0)
|
||||
for _, record := range table {
|
||||
m, err := p.parseRecord(record)
|
||||
if err != nil {
|
||||
return metrics, err
|
||||
}
|
||||
metrics = append(metrics, m)
|
||||
}
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
// ParseLine does not use any information in header and assumes DataColumns is set
|
||||
// it will also not skip any rows
|
||||
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
||||
r := bytes.NewReader([]byte(line))
|
||||
csvReader, err := p.compile(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// if there is nothing in DataColumns, ParseLine will fail
|
||||
if len(p.ColumnNames) == 0 {
|
||||
return nil, fmt.Errorf("[parsers.csv] data columns must be specified")
|
||||
}
|
||||
|
||||
record, err := csvReader.Read()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m, err := p.parseRecord(record)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (p *Parser) parseRecord(record []string) (telegraf.Metric, error) {
|
||||
recordFields := make(map[string]interface{})
|
||||
tags := make(map[string]string)
|
||||
|
||||
// skip columns in record
|
||||
record = record[p.SkipColumns:]
|
||||
outer:
|
||||
for i, fieldName := range p.ColumnNames {
|
||||
if i < len(record) {
|
||||
value := record[i]
|
||||
if p.TrimSpace {
|
||||
value = strings.Trim(value, " ")
|
||||
}
|
||||
|
||||
for _, tagName := range p.TagColumns {
|
||||
if tagName == fieldName {
|
||||
tags[tagName] = value
|
||||
continue outer
|
||||
}
|
||||
}
|
||||
|
||||
// attempt type conversions
|
||||
if iValue, err := strconv.ParseInt(value, 10, 64); err == nil {
|
||||
recordFields[fieldName] = iValue
|
||||
} else if fValue, err := strconv.ParseFloat(value, 64); err == nil {
|
||||
recordFields[fieldName] = fValue
|
||||
} else if bValue, err := strconv.ParseBool(value); err == nil {
|
||||
recordFields[fieldName] = bValue
|
||||
} else {
|
||||
recordFields[fieldName] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// add default tags
|
||||
for k, v := range p.DefaultTags {
|
||||
tags[k] = v
|
||||
}
|
||||
|
||||
// will default to plugin name
|
||||
measurementName := p.MetricName
|
||||
if recordFields[p.MeasurementColumn] != nil {
|
||||
measurementName = fmt.Sprintf("%v", recordFields[p.MeasurementColumn])
|
||||
}
|
||||
|
||||
metricTime := time.Now()
|
||||
if p.TimestampColumn != "" {
|
||||
if recordFields[p.TimestampColumn] == nil {
|
||||
return nil, fmt.Errorf("timestamp column: %v could not be found", p.TimestampColumn)
|
||||
}
|
||||
tStr := fmt.Sprintf("%v", recordFields[p.TimestampColumn])
|
||||
if p.TimestampFormat == "" {
|
||||
return nil, fmt.Errorf("timestamp format must be specified")
|
||||
}
|
||||
|
||||
var err error
|
||||
metricTime, err = time.Parse(p.TimestampFormat, tStr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
m, err := metric.New(measurementName, tags, recordFields, metricTime)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (p *Parser) SetDefaultTags(tags map[string]string) {
|
||||
p.DefaultTags = tags
|
||||
}
|
||||
231
plugins/parsers/csv/parser_test.go
Normal file
231
plugins/parsers/csv/parser_test.go
Normal file
@@ -0,0 +1,231 @@
|
||||
package csv
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestBasicCSV(t *testing.T) {
|
||||
p := Parser{
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
TagColumns: []string{"third"},
|
||||
}
|
||||
|
||||
_, err := p.ParseLine("1.4,true,hi")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestHeaderConcatenationCSV(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 2,
|
||||
MeasurementColumn: "3",
|
||||
}
|
||||
testCSV := `first,second
|
||||
1,2,3
|
||||
3.4,70,test_name`
|
||||
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "test_name", metrics[0].Name())
|
||||
}
|
||||
|
||||
func TestHeaderOverride(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
}
|
||||
testCSV := `line1,line2,line3
|
||||
3.4,70,test_name`
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "test_name", metrics[0].Name())
|
||||
}
|
||||
|
||||
func TestTimestamp(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimestampColumn: "first",
|
||||
TimestampFormat: "02/01/06 03:04:05 PM",
|
||||
}
|
||||
testCSV := `line1,line2,line3
|
||||
23/05/09 04:05:06 PM,70,test_name
|
||||
07/11/09 04:05:06 PM,80,test_name2`
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, metrics[0].Time().UnixNano(), int64(1243094706000000000))
|
||||
require.Equal(t, metrics[1].Time().UnixNano(), int64(1257609906000000000))
|
||||
}
|
||||
|
||||
func TestTimestampError(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimestampColumn: "first",
|
||||
}
|
||||
testCSV := `line1,line2,line3
|
||||
23/05/09 04:05:06 PM,70,test_name
|
||||
07/11/09 04:05:06 PM,80,test_name2`
|
||||
_, err := p.Parse([]byte(testCSV))
|
||||
require.Equal(t, fmt.Errorf("timestamp format must be specified"), err)
|
||||
}
|
||||
|
||||
func TestQuotedCharacter(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
}
|
||||
|
||||
testCSV := `line1,line2,line3
|
||||
"3,4",70,test_name`
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "3,4", metrics[0].Fields()["first"])
|
||||
}
|
||||
|
||||
func TestDelimiter(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 1,
|
||||
Delimiter: "%",
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
}
|
||||
|
||||
testCSV := `line1%line2%line3
|
||||
3,4%70%test_name`
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "3,4", metrics[0].Fields()["first"])
|
||||
}
|
||||
|
||||
func TestValueConversion(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 0,
|
||||
Delimiter: ",",
|
||||
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||
MetricName: "test_value",
|
||||
}
|
||||
testCSV := `3.3,4,true,hello`
|
||||
|
||||
expectedTags := make(map[string]string)
|
||||
expectedFields := map[string]interface{}{
|
||||
"first": 3.3,
|
||||
"second": 4,
|
||||
"third": true,
|
||||
"fourth": "hello",
|
||||
}
|
||||
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
|
||||
expectedMetric, err1 := metric.New("test_value", expectedTags, expectedFields, time.Unix(0, 0))
|
||||
returnedMetric, err2 := metric.New(metrics[0].Name(), metrics[0].Tags(), metrics[0].Fields(), time.Unix(0, 0))
|
||||
require.NoError(t, err1)
|
||||
require.NoError(t, err2)
|
||||
|
||||
//deep equal fields
|
||||
require.Equal(t, expectedMetric.Fields(), returnedMetric.Fields())
|
||||
}
|
||||
|
||||
func TestSkipComment(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 0,
|
||||
Comment: "#",
|
||||
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||
MetricName: "test_value",
|
||||
}
|
||||
testCSV := `#3.3,4,true,hello
|
||||
4,9.9,true,name_this`
|
||||
|
||||
expectedFields := map[string]interface{}{
|
||||
"first": int64(4),
|
||||
"second": 9.9,
|
||||
"third": true,
|
||||
"fourth": "name_this",
|
||||
}
|
||||
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, expectedFields, metrics[0].Fields())
|
||||
}
|
||||
|
||||
func TestTrimSpace(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 0,
|
||||
TrimSpace: true,
|
||||
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||
MetricName: "test_value",
|
||||
}
|
||||
testCSV := ` 3.3, 4, true,hello`
|
||||
|
||||
expectedFields := map[string]interface{}{
|
||||
"first": 3.3,
|
||||
"second": int64(4),
|
||||
"third": true,
|
||||
"fourth": "hello",
|
||||
}
|
||||
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, expectedFields, metrics[0].Fields())
|
||||
}
|
||||
|
||||
func TestSkipRows(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 1,
|
||||
SkipRows: 1,
|
||||
TagColumns: []string{"line1"},
|
||||
MeasurementColumn: "line3",
|
||||
}
|
||||
testCSV := `garbage nonsense
|
||||
line1,line2,line3
|
||||
hello,80,test_name2`
|
||||
|
||||
expectedFields := map[string]interface{}{
|
||||
"line2": int64(80),
|
||||
"line3": "test_name2",
|
||||
}
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, expectedFields, metrics[0].Fields())
|
||||
}
|
||||
|
||||
func TestSkipColumns(t *testing.T) {
|
||||
p := Parser{
|
||||
SkipColumns: 1,
|
||||
ColumnNames: []string{"line1", "line2"},
|
||||
}
|
||||
testCSV := `hello,80,test_name`
|
||||
|
||||
expectedFields := map[string]interface{}{
|
||||
"line1": int64(80),
|
||||
"line2": "test_name",
|
||||
}
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, expectedFields, metrics[0].Fields())
|
||||
}
|
||||
|
||||
func TestSkipColumnsWithHeader(t *testing.T) {
|
||||
p := Parser{
|
||||
SkipColumns: 1,
|
||||
HeaderRowCount: 2,
|
||||
}
|
||||
testCSV := `col,col,col
|
||||
1,2,3
|
||||
trash,80,test_name`
|
||||
|
||||
// we should expect an error if we try to get col1
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, map[string]interface{}{"col2": int64(80), "col3": "test_name"}, metrics[0].Fields())
|
||||
}
|
||||
Reference in New Issue
Block a user