Fix cleanup of csv parser options, use per file parser (#4712)
This commit is contained in:
@@ -26,6 +26,11 @@ type Parser struct {
|
||||
TimestampColumn string
|
||||
TimestampFormat string
|
||||
DefaultTags map[string]string
|
||||
TimeFunc func() time.Time
|
||||
}
|
||||
|
||||
func (p *Parser) SetTimeFunc(fn metric.TimeFunc) {
|
||||
p.TimeFunc = fn
|
||||
}
|
||||
|
||||
func (p *Parser) compile(r *bytes.Reader) (*csv.Reader, error) {
|
||||
@@ -167,7 +172,7 @@ outer:
|
||||
measurementName = fmt.Sprintf("%v", recordFields[p.MeasurementColumn])
|
||||
}
|
||||
|
||||
metricTime := time.Now()
|
||||
metricTime := p.TimeFunc()
|
||||
if p.TimestampColumn != "" {
|
||||
if recordFields[p.TimestampColumn] == nil {
|
||||
return nil, fmt.Errorf("timestamp column: %v could not be found", p.TimestampColumn)
|
||||
|
||||
@@ -6,13 +6,19 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
var DefaultTime = func() time.Time {
|
||||
return time.Unix(3600, 0)
|
||||
}
|
||||
|
||||
func TestBasicCSV(t *testing.T) {
|
||||
p := Parser{
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
TagColumns: []string{"third"},
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
|
||||
_, err := p.ParseLine("1.4,true,hi")
|
||||
@@ -23,6 +29,7 @@ func TestHeaderConcatenationCSV(t *testing.T) {
|
||||
p := Parser{
|
||||
HeaderRowCount: 2,
|
||||
MeasurementColumn: "3",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
testCSV := `first,second
|
||||
1,2,3
|
||||
@@ -38,6 +45,7 @@ func TestHeaderOverride(t *testing.T) {
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
testCSV := `line1,line2,line3
|
||||
3.4,70,test_name`
|
||||
@@ -53,6 +61,7 @@ func TestTimestamp(t *testing.T) {
|
||||
MeasurementColumn: "third",
|
||||
TimestampColumn: "first",
|
||||
TimestampFormat: "02/01/06 03:04:05 PM",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
testCSV := `line1,line2,line3
|
||||
23/05/09 04:05:06 PM,70,test_name
|
||||
@@ -70,6 +79,7 @@ func TestTimestampError(t *testing.T) {
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimestampColumn: "first",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
testCSV := `line1,line2,line3
|
||||
23/05/09 04:05:06 PM,70,test_name
|
||||
@@ -83,6 +93,7 @@ func TestQuotedCharacter(t *testing.T) {
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
|
||||
testCSV := `line1,line2,line3
|
||||
@@ -98,6 +109,7 @@ func TestDelimiter(t *testing.T) {
|
||||
Delimiter: "%",
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
|
||||
testCSV := `line1%line2%line3
|
||||
@@ -113,6 +125,7 @@ func TestValueConversion(t *testing.T) {
|
||||
Delimiter: ",",
|
||||
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||
MetricName: "test_value",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
testCSV := `3.3,4,true,hello`
|
||||
|
||||
@@ -142,6 +155,7 @@ func TestSkipComment(t *testing.T) {
|
||||
Comment: "#",
|
||||
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||
MetricName: "test_value",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
testCSV := `#3.3,4,true,hello
|
||||
4,9.9,true,name_this`
|
||||
@@ -164,6 +178,7 @@ func TestTrimSpace(t *testing.T) {
|
||||
TrimSpace: true,
|
||||
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||
MetricName: "test_value",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
testCSV := ` 3.3, 4, true,hello`
|
||||
|
||||
@@ -185,6 +200,7 @@ func TestSkipRows(t *testing.T) {
|
||||
SkipRows: 1,
|
||||
TagColumns: []string{"line1"},
|
||||
MeasurementColumn: "line3",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
testCSV := `garbage nonsense
|
||||
line1,line2,line3
|
||||
@@ -203,6 +219,7 @@ func TestSkipColumns(t *testing.T) {
|
||||
p := Parser{
|
||||
SkipColumns: 1,
|
||||
ColumnNames: []string{"line1", "line2"},
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
testCSV := `hello,80,test_name`
|
||||
|
||||
@@ -219,6 +236,7 @@ func TestSkipColumnsWithHeader(t *testing.T) {
|
||||
p := Parser{
|
||||
SkipColumns: 1,
|
||||
HeaderRowCount: 2,
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
testCSV := `col,col,col
|
||||
1,2,3
|
||||
@@ -229,3 +247,30 @@ func TestSkipColumnsWithHeader(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, map[string]interface{}{"col2": int64(80), "col3": "test_name"}, metrics[0].Fields())
|
||||
}
|
||||
|
||||
func TestParseStream(t *testing.T) {
|
||||
p := Parser{
|
||||
MetricName: "csv",
|
||||
HeaderRowCount: 1,
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
|
||||
csvHeader := "a,b,c"
|
||||
csvBody := "1,2,3"
|
||||
|
||||
metrics, err := p.Parse([]byte(csvHeader))
|
||||
require.NoError(t, err)
|
||||
require.Len(t, metrics, 0)
|
||||
metric, err := p.ParseLine(csvBody)
|
||||
testutil.RequireMetricEqual(t,
|
||||
testutil.MustMetric(
|
||||
"csv",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"a": int64(1),
|
||||
"b": int64(2),
|
||||
"c": int64(3),
|
||||
},
|
||||
DefaultTime(),
|
||||
), metric)
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package parsers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
|
||||
@@ -18,6 +19,8 @@ import (
|
||||
"github.com/influxdata/telegraf/plugins/parsers/wavefront"
|
||||
)
|
||||
|
||||
type ParserFunc func() (Parser, error)
|
||||
|
||||
// ParserInput is an interface for input plugins that are able to parse
|
||||
// arbitrary data formats.
|
||||
type ParserInput interface {
|
||||
@@ -25,6 +28,13 @@ type ParserInput interface {
|
||||
SetParser(parser Parser)
|
||||
}
|
||||
|
||||
// ParserFuncInput is an interface for input plugins that are able to parse
|
||||
// arbitrary data formats.
|
||||
type ParserFuncInput interface {
|
||||
// GetParser returns a new parser.
|
||||
SetParserFunc(fn ParserFunc)
|
||||
}
|
||||
|
||||
// Parser is an interface defining functions that a parser plugin must satisfy.
|
||||
type Parser interface {
|
||||
// Parse takes a byte buffer separated by newlines
|
||||
@@ -116,17 +126,17 @@ type Config struct {
|
||||
GrokTimeZone string
|
||||
|
||||
//csv configuration
|
||||
CSVDelimiter string
|
||||
CSVComment string
|
||||
CSVTrimSpace bool
|
||||
CSVColumnNames []string
|
||||
CSVTagColumns []string
|
||||
CSVMeasurementColumn string
|
||||
CSVTimestampColumn string
|
||||
CSVTimestampFormat string
|
||||
CSVHeaderRowCount int
|
||||
CSVSkipRows int
|
||||
CSVSkipColumns int
|
||||
CSVColumnNames []string `toml:"csv_column_names"`
|
||||
CSVComment string `toml:"csv_comment"`
|
||||
CSVDelimiter string `toml:"csv_delimiter"`
|
||||
CSVHeaderRowCount int `toml:"csv_header_row_count"`
|
||||
CSVMeasurementColumn string `toml:"csv_measurement_column"`
|
||||
CSVSkipColumns int `toml:"csv_skip_columns"`
|
||||
CSVSkipRows int `toml:"csv_skip_rows"`
|
||||
CSVTagColumns []string `toml:"csv_tag_columns"`
|
||||
CSVTimestampColumn string `toml:"csv_timestamp_column"`
|
||||
CSVTimestampFormat string `toml:"csv_timestamp_format"`
|
||||
CSVTrimSpace bool `toml:"csv_trim_space"`
|
||||
}
|
||||
|
||||
// NewParser returns a Parser interface based on the given config.
|
||||
@@ -199,28 +209,27 @@ func NewParser(config *Config) (Parser, error) {
|
||||
}
|
||||
|
||||
func newCSVParser(metricName string,
|
||||
header int,
|
||||
headerRowCount int,
|
||||
skipRows int,
|
||||
skipColumns int,
|
||||
delimiter string,
|
||||
comment string,
|
||||
trimSpace bool,
|
||||
dataColumns []string,
|
||||
columnNames []string,
|
||||
tagColumns []string,
|
||||
nameColumn string,
|
||||
timestampColumn string,
|
||||
timestampFormat string,
|
||||
defaultTags map[string]string) (Parser, error) {
|
||||
|
||||
if header == 0 && len(dataColumns) == 0 {
|
||||
// if there is no header and no DataColumns, that's an error
|
||||
return nil, fmt.Errorf("there must be a header if `csv_data_columns` is not specified")
|
||||
if headerRowCount == 0 && len(columnNames) == 0 {
|
||||
return nil, fmt.Errorf("there must be a header if `csv_column_names` is not specified")
|
||||
}
|
||||
|
||||
if delimiter != "" {
|
||||
runeStr := []rune(delimiter)
|
||||
if len(runeStr) > 1 {
|
||||
return nil, fmt.Errorf("delimiter must be a single character, got: %s", delimiter)
|
||||
return nil, fmt.Errorf("csv_delimiter must be a single character, got: %s", delimiter)
|
||||
}
|
||||
delimiter = fmt.Sprintf("%v", runeStr[0])
|
||||
}
|
||||
@@ -228,25 +237,26 @@ func newCSVParser(metricName string,
|
||||
if comment != "" {
|
||||
runeStr := []rune(comment)
|
||||
if len(runeStr) > 1 {
|
||||
return nil, fmt.Errorf("delimiter must be a single character, got: %s", comment)
|
||||
return nil, fmt.Errorf("csv_delimiter must be a single character, got: %s", comment)
|
||||
}
|
||||
comment = fmt.Sprintf("%v", runeStr[0])
|
||||
}
|
||||
|
||||
parser := &csv.Parser{
|
||||
MetricName: metricName,
|
||||
HeaderRowCount: header,
|
||||
HeaderRowCount: headerRowCount,
|
||||
SkipRows: skipRows,
|
||||
SkipColumns: skipColumns,
|
||||
Delimiter: delimiter,
|
||||
Comment: comment,
|
||||
TrimSpace: trimSpace,
|
||||
ColumnNames: dataColumns,
|
||||
ColumnNames: columnNames,
|
||||
TagColumns: tagColumns,
|
||||
MeasurementColumn: nameColumn,
|
||||
TimestampColumn: timestampColumn,
|
||||
TimestampFormat: timestampFormat,
|
||||
DefaultTags: defaultTags,
|
||||
TimeFunc: time.Now,
|
||||
}
|
||||
|
||||
return parser, nil
|
||||
|
||||
Reference in New Issue
Block a user