Fix cleanup of csv parser options, use per file parser (#4712)

This commit is contained in:
Daniel Nelson
2018-09-18 09:23:45 -07:00
committed by GitHub
parent 1d76343422
commit b5299f4cc4
7 changed files with 160 additions and 53 deletions

View File

@@ -26,6 +26,11 @@ type Parser struct {
TimestampColumn string
TimestampFormat string
DefaultTags map[string]string
TimeFunc func() time.Time
}
func (p *Parser) SetTimeFunc(fn metric.TimeFunc) {
p.TimeFunc = fn
}
func (p *Parser) compile(r *bytes.Reader) (*csv.Reader, error) {
@@ -167,7 +172,7 @@ outer:
measurementName = fmt.Sprintf("%v", recordFields[p.MeasurementColumn])
}
metricTime := time.Now()
metricTime := p.TimeFunc()
if p.TimestampColumn != "" {
if recordFields[p.TimestampColumn] == nil {
return nil, fmt.Errorf("timestamp column: %v could not be found", p.TimestampColumn)

View File

@@ -6,13 +6,19 @@ import (
"time"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)
var DefaultTime = func() time.Time {
return time.Unix(3600, 0)
}
func TestBasicCSV(t *testing.T) {
p := Parser{
ColumnNames: []string{"first", "second", "third"},
TagColumns: []string{"third"},
TimeFunc: DefaultTime,
}
_, err := p.ParseLine("1.4,true,hi")
@@ -23,6 +29,7 @@ func TestHeaderConcatenationCSV(t *testing.T) {
p := Parser{
HeaderRowCount: 2,
MeasurementColumn: "3",
TimeFunc: DefaultTime,
}
testCSV := `first,second
1,2,3
@@ -38,6 +45,7 @@ func TestHeaderOverride(t *testing.T) {
HeaderRowCount: 1,
ColumnNames: []string{"first", "second", "third"},
MeasurementColumn: "third",
TimeFunc: DefaultTime,
}
testCSV := `line1,line2,line3
3.4,70,test_name`
@@ -53,6 +61,7 @@ func TestTimestamp(t *testing.T) {
MeasurementColumn: "third",
TimestampColumn: "first",
TimestampFormat: "02/01/06 03:04:05 PM",
TimeFunc: DefaultTime,
}
testCSV := `line1,line2,line3
23/05/09 04:05:06 PM,70,test_name
@@ -70,6 +79,7 @@ func TestTimestampError(t *testing.T) {
ColumnNames: []string{"first", "second", "third"},
MeasurementColumn: "third",
TimestampColumn: "first",
TimeFunc: DefaultTime,
}
testCSV := `line1,line2,line3
23/05/09 04:05:06 PM,70,test_name
@@ -83,6 +93,7 @@ func TestQuotedCharacter(t *testing.T) {
HeaderRowCount: 1,
ColumnNames: []string{"first", "second", "third"},
MeasurementColumn: "third",
TimeFunc: DefaultTime,
}
testCSV := `line1,line2,line3
@@ -98,6 +109,7 @@ func TestDelimiter(t *testing.T) {
Delimiter: "%",
ColumnNames: []string{"first", "second", "third"},
MeasurementColumn: "third",
TimeFunc: DefaultTime,
}
testCSV := `line1%line2%line3
@@ -113,6 +125,7 @@ func TestValueConversion(t *testing.T) {
Delimiter: ",",
ColumnNames: []string{"first", "second", "third", "fourth"},
MetricName: "test_value",
TimeFunc: DefaultTime,
}
testCSV := `3.3,4,true,hello`
@@ -142,6 +155,7 @@ func TestSkipComment(t *testing.T) {
Comment: "#",
ColumnNames: []string{"first", "second", "third", "fourth"},
MetricName: "test_value",
TimeFunc: DefaultTime,
}
testCSV := `#3.3,4,true,hello
4,9.9,true,name_this`
@@ -164,6 +178,7 @@ func TestTrimSpace(t *testing.T) {
TrimSpace: true,
ColumnNames: []string{"first", "second", "third", "fourth"},
MetricName: "test_value",
TimeFunc: DefaultTime,
}
testCSV := ` 3.3, 4, true,hello`
@@ -185,6 +200,7 @@ func TestSkipRows(t *testing.T) {
SkipRows: 1,
TagColumns: []string{"line1"},
MeasurementColumn: "line3",
TimeFunc: DefaultTime,
}
testCSV := `garbage nonsense
line1,line2,line3
@@ -203,6 +219,7 @@ func TestSkipColumns(t *testing.T) {
p := Parser{
SkipColumns: 1,
ColumnNames: []string{"line1", "line2"},
TimeFunc: DefaultTime,
}
testCSV := `hello,80,test_name`
@@ -219,6 +236,7 @@ func TestSkipColumnsWithHeader(t *testing.T) {
p := Parser{
SkipColumns: 1,
HeaderRowCount: 2,
TimeFunc: DefaultTime,
}
testCSV := `col,col,col
1,2,3
@@ -229,3 +247,30 @@ func TestSkipColumnsWithHeader(t *testing.T) {
require.NoError(t, err)
require.Equal(t, map[string]interface{}{"col2": int64(80), "col3": "test_name"}, metrics[0].Fields())
}
func TestParseStream(t *testing.T) {
p := Parser{
MetricName: "csv",
HeaderRowCount: 1,
TimeFunc: DefaultTime,
}
csvHeader := "a,b,c"
csvBody := "1,2,3"
metrics, err := p.Parse([]byte(csvHeader))
require.NoError(t, err)
require.Len(t, metrics, 0)
metric, err := p.ParseLine(csvBody)
testutil.RequireMetricEqual(t,
testutil.MustMetric(
"csv",
map[string]string{},
map[string]interface{}{
"a": int64(1),
"b": int64(2),
"c": int64(3),
},
DefaultTime(),
), metric)
}

View File

@@ -2,6 +2,7 @@ package parsers
import (
"fmt"
"time"
"github.com/influxdata/telegraf"
@@ -18,6 +19,8 @@ import (
"github.com/influxdata/telegraf/plugins/parsers/wavefront"
)
type ParserFunc func() (Parser, error)
// ParserInput is an interface for input plugins that are able to parse
// arbitrary data formats.
type ParserInput interface {
@@ -25,6 +28,13 @@ type ParserInput interface {
SetParser(parser Parser)
}
// ParserFuncInput is an interface for input plugins that are able to parse
// arbitrary data formats.
type ParserFuncInput interface {
// GetParser returns a new parser.
SetParserFunc(fn ParserFunc)
}
// Parser is an interface defining functions that a parser plugin must satisfy.
type Parser interface {
// Parse takes a byte buffer separated by newlines
@@ -116,17 +126,17 @@ type Config struct {
GrokTimeZone string
//csv configuration
CSVDelimiter string
CSVComment string
CSVTrimSpace bool
CSVColumnNames []string
CSVTagColumns []string
CSVMeasurementColumn string
CSVTimestampColumn string
CSVTimestampFormat string
CSVHeaderRowCount int
CSVSkipRows int
CSVSkipColumns int
CSVColumnNames []string `toml:"csv_column_names"`
CSVComment string `toml:"csv_comment"`
CSVDelimiter string `toml:"csv_delimiter"`
CSVHeaderRowCount int `toml:"csv_header_row_count"`
CSVMeasurementColumn string `toml:"csv_measurement_column"`
CSVSkipColumns int `toml:"csv_skip_columns"`
CSVSkipRows int `toml:"csv_skip_rows"`
CSVTagColumns []string `toml:"csv_tag_columns"`
CSVTimestampColumn string `toml:"csv_timestamp_column"`
CSVTimestampFormat string `toml:"csv_timestamp_format"`
CSVTrimSpace bool `toml:"csv_trim_space"`
}
// NewParser returns a Parser interface based on the given config.
@@ -199,28 +209,27 @@ func NewParser(config *Config) (Parser, error) {
}
func newCSVParser(metricName string,
header int,
headerRowCount int,
skipRows int,
skipColumns int,
delimiter string,
comment string,
trimSpace bool,
dataColumns []string,
columnNames []string,
tagColumns []string,
nameColumn string,
timestampColumn string,
timestampFormat string,
defaultTags map[string]string) (Parser, error) {
if header == 0 && len(dataColumns) == 0 {
// if there is no header and no DataColumns, that's an error
return nil, fmt.Errorf("there must be a header if `csv_data_columns` is not specified")
if headerRowCount == 0 && len(columnNames) == 0 {
return nil, fmt.Errorf("there must be a header if `csv_column_names` is not specified")
}
if delimiter != "" {
runeStr := []rune(delimiter)
if len(runeStr) > 1 {
return nil, fmt.Errorf("delimiter must be a single character, got: %s", delimiter)
return nil, fmt.Errorf("csv_delimiter must be a single character, got: %s", delimiter)
}
delimiter = fmt.Sprintf("%v", runeStr[0])
}
@@ -228,25 +237,26 @@ func newCSVParser(metricName string,
if comment != "" {
runeStr := []rune(comment)
if len(runeStr) > 1 {
return nil, fmt.Errorf("delimiter must be a single character, got: %s", comment)
return nil, fmt.Errorf("csv_delimiter must be a single character, got: %s", comment)
}
comment = fmt.Sprintf("%v", runeStr[0])
}
parser := &csv.Parser{
MetricName: metricName,
HeaderRowCount: header,
HeaderRowCount: headerRowCount,
SkipRows: skipRows,
SkipColumns: skipColumns,
Delimiter: delimiter,
Comment: comment,
TrimSpace: trimSpace,
ColumnNames: dataColumns,
ColumnNames: columnNames,
TagColumns: tagColumns,
MeasurementColumn: nameColumn,
TimestampColumn: timestampColumn,
TimestampFormat: timestampFormat,
DefaultTags: defaultTags,
TimeFunc: time.Now,
}
return parser, nil