Add new config for csv column explicit type conversion (#4781)

This commit is contained in:
Rudy 2018-10-04 08:19:44 +07:00 committed by Daniel Nelson
parent 9efe7c12f0
commit a1f9f63463
5 changed files with 73 additions and 0 deletions

View File

@ -1460,6 +1460,18 @@ func getParserConfig(name string, tbl *ast.Table) (*parsers.Config, error) {
}
}
if node, ok := tbl.Fields["csv_column_types"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
c.CSVColumnTypes = append(c.CSVColumnTypes, str.Value)
}
}
}
}
}
if node, ok := tbl.Fields["csv_tag_columns"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
@ -1588,6 +1600,7 @@ func getParserConfig(name string, tbl *ast.Table) (*parsers.Config, error) {
delete(tbl.Fields, "grok_custom_pattern_files")
delete(tbl.Fields, "grok_timezone")
delete(tbl.Fields, "csv_column_names")
delete(tbl.Fields, "csv_column_types")
delete(tbl.Fields, "csv_comment")
delete(tbl.Fields, "csv_delimiter")
delete(tbl.Fields, "csv_field_columns")

View File

@ -27,6 +27,11 @@ values.
## If `csv_header_row_count` is set to 0, this config must be used
csv_column_names = []
## For assigning explicit data types to columns.
## Supported types: "int", "float", "bool", "string".
## If this is not specified, type conversion will be done on the types above.
csv_column_types = []
## Indicates the number of rows to skip before looking for header information.
csv_skip_rows = 0

View File

@ -21,6 +21,7 @@ type Parser struct {
Comment string
TrimSpace bool
ColumnNames []string
ColumnTypes []string
TagColumns []string
MeasurementColumn string
TimestampColumn string
@ -148,6 +149,40 @@ outer:
}
}
// Try explicit conversion only when column types is defined.
if len(p.ColumnTypes) > 0 {
// Throw error if current column count exceeds defined types.
if i >= len(p.ColumnTypes) {
return nil, fmt.Errorf("column type: column count exceeded")
}
var val interface{}
var err error
switch p.ColumnTypes[i] {
case "int":
val, err = strconv.ParseInt(value, 10, 64)
if err != nil {
return nil, fmt.Errorf("column type: parse int error %s", err)
}
case "float":
val, err = strconv.ParseFloat(value, 64)
if err != nil {
return nil, fmt.Errorf("column type: parse float error %s", err)
}
case "bool":
val, err = strconv.ParseBool(value)
if err != nil {
return nil, fmt.Errorf("column type: parse bool error %s", err)
}
default:
val = value
}
recordFields[fieldName] = val
continue
}
// attempt type conversions
if iValue, err := strconv.ParseInt(value, 10, 64); err == nil {
recordFields[fieldName] = iValue

View File

@ -147,6 +147,18 @@ func TestValueConversion(t *testing.T) {
//deep equal fields
require.Equal(t, expectedMetric.Fields(), returnedMetric.Fields())
// Test explicit type conversion.
p.ColumnTypes = []string{"float", "int", "bool", "string"}
metrics, err = p.Parse([]byte(testCSV))
require.NoError(t, err)
returnedMetric, err2 = metric.New(metrics[0].Name(), metrics[0].Tags(), metrics[0].Fields(), time.Unix(0, 0))
require.NoError(t, err2)
//deep equal fields
require.Equal(t, expectedMetric.Fields(), returnedMetric.Fields())
}
func TestSkipComment(t *testing.T) {

View File

@ -127,6 +127,7 @@ type Config struct {
//csv configuration
CSVColumnNames []string `toml:"csv_column_names"`
CSVColumnTypes []string `toml:"csv_column_types"`
CSVComment string `toml:"csv_comment"`
CSVDelimiter string `toml:"csv_delimiter"`
CSVHeaderRowCount int `toml:"csv_header_row_count"`
@ -195,6 +196,7 @@ func NewParser(config *Config) (Parser, error) {
config.CSVComment,
config.CSVTrimSpace,
config.CSVColumnNames,
config.CSVColumnTypes,
config.CSVTagColumns,
config.CSVMeasurementColumn,
config.CSVTimestampColumn,
@ -216,6 +218,7 @@ func newCSVParser(metricName string,
comment string,
trimSpace bool,
columnNames []string,
columnTypes []string,
tagColumns []string,
nameColumn string,
timestampColumn string,
@ -240,6 +243,10 @@ func newCSVParser(metricName string,
}
}
if len(columnNames) > 0 && len(columnTypes) > 0 && len(columnNames) != len(columnTypes) {
return nil, fmt.Errorf("csv_column_names field count doesn't match with csv_column_types")
}
parser := &csv.Parser{
MetricName: metricName,
HeaderRowCount: headerRowCount,
@ -249,6 +256,7 @@ func newCSVParser(metricName string,
Comment: comment,
TrimSpace: trimSpace,
ColumnNames: columnNames,
ColumnTypes: columnTypes,
TagColumns: tagColumns,
MeasurementColumn: nameColumn,
TimestampColumn: timestampColumn,