Implementing generic parser plugins and documentation

This constitutes a large change in how we will parse different data
formats going forward (for the plugins that support it)

This is working off @henrypfhu's changes.
This commit is contained in:
Cameron Sparr
2016-02-05 17:36:35 -07:00
parent 1449c8b887
commit e619493ece
32 changed files with 1971 additions and 522 deletions

View File

@@ -15,6 +15,7 @@ import (
"github.com/influxdata/telegraf/internal/models"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/outputs"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/config"
"github.com/naoina/toml/ast"
@@ -428,6 +429,17 @@ func (c *Config) addInput(name string, table *ast.Table) error {
}
input := creator()
// If the input has a SetParser function, then this means it can accept
// arbitrary types of input, so build the parser and set it.
switch t := input.(type) {
case parsers.ParserInput:
parser, err := buildParser(name, table)
if err != nil {
return err
}
t.SetParser(parser)
}
pluginConfig, err := buildInput(name, table)
if err != nil {
return err
@@ -583,6 +595,66 @@ func buildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, erro
return cp, nil
}
// buildParser grabs the necessary entries from the ast.Table for creating
// a parsers.Parser object, and creates it, which can then be added onto
// an Input object.
func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) {
c := &parsers.Config{}
if node, ok := tbl.Fields["data_format"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if str, ok := kv.Value.(*ast.String); ok {
c.DataFormat = str.Value
}
}
}
if c.DataFormat == "" {
c.DataFormat = "influx"
}
if node, ok := tbl.Fields["separator"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if str, ok := kv.Value.(*ast.String); ok {
c.Separator = str.Value
}
}
}
if node, ok := tbl.Fields["templates"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
c.Templates = append(c.Templates, str.Value)
}
}
}
}
}
if node, ok := tbl.Fields["tag_keys"]; ok {
if kv, ok := node.(*ast.KeyValue); ok {
if ary, ok := kv.Value.(*ast.Array); ok {
for _, elem := range ary.Value {
if str, ok := elem.(*ast.String); ok {
c.TagKeys = append(c.TagKeys, str.Value)
}
}
}
}
}
c.MetricName = name
delete(tbl.Fields, "data_format")
delete(tbl.Fields, "separator")
delete(tbl.Fields, "templates")
delete(tbl.Fields, "tag_keys")
return parsers.NewParser(c)
}
// buildOutput parses output specific items from the ast.Table, builds the filter and returns an
// internal_models.OutputConfig to be inserted into internal_models.RunningInput
// Note: error exists in the return for future calls that might require error

View File

@@ -9,6 +9,7 @@ import (
"github.com/influxdata/telegraf/plugins/inputs/exec"
"github.com/influxdata/telegraf/plugins/inputs/memcached"
"github.com/influxdata/telegraf/plugins/inputs/procstat"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/stretchr/testify/assert"
)
@@ -91,6 +92,9 @@ func TestConfig_LoadDirectory(t *testing.T) {
"Testdata did not produce correct memcached metadata.")
ex := inputs.Inputs["exec"]().(*exec.Exec)
p, err := parsers.NewInfluxParser()
assert.NoError(t, err)
ex.SetParser(p)
ex.Command = "/usr/bin/myothercollector --foo=bar"
eConfig := &internal_models.InputConfig{
Name: "exec",

View File

@@ -1,31 +0,0 @@
package encoding
import (
"fmt"
"github.com/influxdata/telegraf"
)
type Parser interface {
InitConfig(configs map[string]interface{}) error
Parse(buf []byte) ([]telegraf.Metric, error)
ParseLine(line string) (telegraf.Metric, error)
}
type Creator func() Parser
var Parsers = map[string]Creator{}
func Add(name string, creator Creator) {
Parsers[name] = creator
}
func NewParser(dataFormat string, configs map[string]interface{}) (parser Parser, err error) {
creator := Parsers[dataFormat]
if creator == nil {
return nil, fmt.Errorf("Unsupported data format: %s. ", dataFormat)
}
parser = creator()
err = parser.InitConfig(configs)
return parser, err
}

View File

@@ -1,135 +0,0 @@
package graphite
import (
"fmt"
"strings"
)
const (
// DefaultSeparator is the default join character to use when joining multiple
// measurment parts in a template.
DefaultSeparator = "."
)
// Config represents the configuration for Graphite endpoints.
type Config struct {
Separator string
Templates []string
}
// Validate validates the config's templates and tags.
func (c *Config) Validate() error {
if err := c.validateTemplates(); err != nil {
return err
}
return nil
}
func (c *Config) validateTemplates() error {
// map to keep track of filters we see
filters := map[string]struct{}{}
for i, t := range c.Templates {
parts := strings.Fields(t)
// Ensure template string is non-empty
if len(parts) == 0 {
return fmt.Errorf("missing template at position: %d", i)
}
if len(parts) == 1 && parts[0] == "" {
return fmt.Errorf("missing template at position: %d", i)
}
if len(parts) > 3 {
return fmt.Errorf("invalid template format: '%s'", t)
}
template := t
filter := ""
tags := ""
if len(parts) >= 2 {
// We could have <filter> <template> or <template> <tags>. Equals is only allowed in
// tags section.
if strings.Contains(parts[1], "=") {
template = parts[0]
tags = parts[1]
} else {
filter = parts[0]
template = parts[1]
}
}
if len(parts) == 3 {
tags = parts[2]
}
// Validate the template has one and only one measurement
if err := c.validateTemplate(template); err != nil {
return err
}
// Prevent duplicate filters in the config
if _, ok := filters[filter]; ok {
return fmt.Errorf("duplicate filter '%s' found at position: %d", filter, i)
}
filters[filter] = struct{}{}
if filter != "" {
// Validate filter expression is valid
if err := c.validateFilter(filter); err != nil {
return err
}
}
if tags != "" {
// Validate tags
for _, tagStr := range strings.Split(tags, ",") {
if err := c.validateTag(tagStr); err != nil {
return err
}
}
}
}
return nil
}
func (c *Config) validateTemplate(template string) error {
hasMeasurement := false
for _, p := range strings.Split(template, ".") {
if p == "measurement" || p == "measurement*" {
hasMeasurement = true
}
}
if !hasMeasurement {
return fmt.Errorf("no measurement in template `%s`", template)
}
return nil
}
func (c *Config) validateFilter(filter string) error {
for _, p := range strings.Split(filter, ".") {
if p == "" {
return fmt.Errorf("filter contains blank section: %s", filter)
}
if strings.Contains(p, "*") && p != "*" {
return fmt.Errorf("invalid filter wildcard section: %s", filter)
}
}
return nil
}
func (c *Config) validateTag(keyValue string) error {
parts := strings.Split(keyValue, "=")
if len(parts) != 2 {
return fmt.Errorf("invalid template tags: '%s'", keyValue)
}
if parts[0] == "" || parts[1] == "" {
return fmt.Errorf("invalid template tags: %s'", keyValue)
}
return nil
}

View File

@@ -1,14 +0,0 @@
package graphite
import "fmt"
// An UnsupposedValueError is returned when a parsed value is not
// supposed.
type UnsupposedValueError struct {
Field string
Value float64
}
func (err *UnsupposedValueError) Error() string {
return fmt.Sprintf(`field "%s" value: "%v" is unsupported`, err.Field, err.Value)
}

View File

@@ -1,414 +0,0 @@
package graphite
import (
"bytes"
"fmt"
"io"
"math"
"sort"
"strconv"
"strings"
"time"
"bufio"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/encoding"
)
// Minimum and maximum supported dates for timestamps.
var (
MinDate = time.Date(1901, 12, 13, 0, 0, 0, 0, time.UTC)
MaxDate = time.Date(2038, 1, 19, 0, 0, 0, 0, time.UTC)
)
// Options are configurable values that can be provided to a Parser
type Options struct {
Separator string
Templates []string
}
// Parser encapsulates a Graphite Parser.
type GraphiteParser struct {
matcher *matcher
}
func NewParser() *GraphiteParser {
return &GraphiteParser{}
}
func (p *GraphiteParser) InitConfig(configs map[string]interface{}) error {
var err error
options := Options{
Templates: configs["Templates"].([]string),
Separator: configs["Separator"].(string)}
matcher := newMatcher()
p.matcher = matcher
defaultTemplate, _ := NewTemplate("measurement*", nil, DefaultSeparator)
matcher.AddDefaultTemplate(defaultTemplate)
for _, pattern := range options.Templates {
template := pattern
filter := ""
// Format is [filter] <template> [tag1=value1,tag2=value2]
parts := strings.Fields(pattern)
if len(parts) < 1 {
continue
} else if len(parts) >= 2 {
if strings.Contains(parts[1], "=") {
template = parts[0]
} else {
filter = parts[0]
template = parts[1]
}
}
// Parse out the default tags specific to this template
tags := models.Tags{}
if strings.Contains(parts[len(parts)-1], "=") {
tagStrs := strings.Split(parts[len(parts)-1], ",")
for _, kv := range tagStrs {
parts := strings.Split(kv, "=")
tags[parts[0]] = parts[1]
}
}
tmpl, err1 := NewTemplate(template, tags, options.Separator)
if err1 != nil {
err = err1
break
}
matcher.Add(filter, tmpl)
}
if err != nil {
return fmt.Errorf("exec input parser config is error: %s ", err.Error())
} else {
return nil
}
}
func init() {
encoding.Add("graphite", func() encoding.Parser {
return NewParser()
})
}
func (p *GraphiteParser) Parse(buf []byte) ([]telegraf.Metric, error) {
// parse even if the buffer begins with a newline
buf = bytes.TrimPrefix(buf, []byte("\n"))
metrics := make([]telegraf.Metric, 0)
buffer := bytes.NewBuffer(buf)
reader := bufio.NewReader(buffer)
for {
// Read up to the next newline.
buf, err := reader.ReadBytes('\n')
if err == io.EOF {
return metrics, nil
}
if err != nil && err != io.EOF {
return metrics, err
}
// Trim the buffer, even though there should be no padding
line := strings.TrimSpace(string(buf))
if metric, err := p.ParseLine(line); err == nil {
metrics = append(metrics, metric)
}
}
}
// Parse performs Graphite parsing of a single line.
func (p *GraphiteParser) ParseLine(line string) (telegraf.Metric, error) {
// Break into 3 fields (name, value, timestamp).
fields := strings.Fields(line)
if len(fields) != 2 && len(fields) != 3 {
return nil, fmt.Errorf("received %q which doesn't have required fields", line)
}
// decode the name and tags
template := p.matcher.Match(fields[0])
measurement, tags, field, err := template.Apply(fields[0])
if err != nil {
return nil, err
}
// Could not extract measurement, use the raw value
if measurement == "" {
measurement = fields[0]
}
// Parse value.
v, err := strconv.ParseFloat(fields[1], 64)
if err != nil {
return nil, fmt.Errorf(`field "%s" value: %s`, fields[0], err)
}
if math.IsNaN(v) || math.IsInf(v, 0) {
return nil, &UnsupposedValueError{Field: fields[0], Value: v}
}
fieldValues := map[string]interface{}{}
if field != "" {
fieldValues[field] = v
} else {
fieldValues["value"] = v
}
// If no 3rd field, use now as timestamp
timestamp := time.Now().UTC()
if len(fields) == 3 {
// Parse timestamp.
unixTime, err := strconv.ParseFloat(fields[2], 64)
if err != nil {
return nil, fmt.Errorf(`field "%s" time: %s`, fields[0], err)
}
// -1 is a special value that gets converted to current UTC time
// See https://github.com/graphite-project/carbon/issues/54
if unixTime != float64(-1) {
// Check if we have fractional seconds
timestamp = time.Unix(int64(unixTime), int64((unixTime-math.Floor(unixTime))*float64(time.Second)))
if timestamp.Before(MinDate) || timestamp.After(MaxDate) {
return nil, fmt.Errorf("timestamp out of range")
}
}
}
return telegraf.NewMetric(measurement, tags, fieldValues, timestamp)
}
// ApplyTemplate extracts the template fields from the given line and
// returns the measurement name and tags.
func (p *GraphiteParser) ApplyTemplate(line string) (string, map[string]string, string, error) {
// Break line into fields (name, value, timestamp), only name is used
fields := strings.Fields(line)
if len(fields) == 0 {
return "", make(map[string]string), "", nil
}
// decode the name and tags
template := p.matcher.Match(fields[0])
name, tags, field, err := template.Apply(fields[0])
return name, tags, field, err
}
// template represents a pattern and tags to map a graphite metric string to a influxdb Point
type template struct {
tags []string
defaultTags models.Tags
greedyMeasurement bool
separator string
}
// NewTemplate returns a new template ensuring it has a measurement
// specified.
func NewTemplate(pattern string, defaultTags models.Tags, separator string) (*template, error) {
tags := strings.Split(pattern, ".")
hasMeasurement := false
template := &template{tags: tags, defaultTags: defaultTags, separator: separator}
for _, tag := range tags {
if strings.HasPrefix(tag, "measurement") {
hasMeasurement = true
}
if tag == "measurement*" {
template.greedyMeasurement = true
}
}
if !hasMeasurement {
return nil, fmt.Errorf("no measurement specified for template. %q", pattern)
}
return template, nil
}
// Apply extracts the template fields from the given line and returns the measurement
// name and tags
func (t *template) Apply(line string) (string, map[string]string, string, error) {
fields := strings.Split(line, ".")
var (
measurement []string
tags = make(map[string]string)
field string
)
// Set any default tags
for k, v := range t.defaultTags {
tags[k] = v
}
for i, tag := range t.tags {
if i >= len(fields) {
continue
}
if tag == "measurement" {
measurement = append(measurement, fields[i])
} else if tag == "field" {
if len(field) != 0 {
return "", nil, "", fmt.Errorf("'field' can only be used once in each template: %q", line)
}
field = fields[i]
} else if tag == "measurement*" {
measurement = append(measurement, fields[i:]...)
break
} else if tag != "" {
tags[tag] = fields[i]
}
}
return strings.Join(measurement, t.separator), tags, field, nil
}
// matcher determines which template should be applied to a given metric
// based on a filter tree.
type matcher struct {
root *node
defaultTemplate *template
}
func newMatcher() *matcher {
return &matcher{
root: &node{},
}
}
// Add inserts the template in the filter tree based the given filter
func (m *matcher) Add(filter string, template *template) {
if filter == "" {
m.AddDefaultTemplate(template)
return
}
m.root.Insert(filter, template)
}
func (m *matcher) AddDefaultTemplate(template *template) {
m.defaultTemplate = template
}
// Match returns the template that matches the given graphite line
func (m *matcher) Match(line string) *template {
tmpl := m.root.Search(line)
if tmpl != nil {
return tmpl
}
return m.defaultTemplate
}
// node is an item in a sorted k-ary tree. Each child is sorted by its value.
// The special value of "*", is always last.
type node struct {
value string
children nodes
template *template
}
func (n *node) insert(values []string, template *template) {
// Add the end, set the template
if len(values) == 0 {
n.template = template
return
}
// See if the the current element already exists in the tree. If so, insert the
// into that sub-tree
for _, v := range n.children {
if v.value == values[0] {
v.insert(values[1:], template)
return
}
}
// New element, add it to the tree and sort the children
newNode := &node{value: values[0]}
n.children = append(n.children, newNode)
sort.Sort(&n.children)
// Now insert the rest of the tree into the new element
newNode.insert(values[1:], template)
}
// Insert inserts the given string template into the tree. The filter string is separated
// on "." and each part is used as the path in the tree.
func (n *node) Insert(filter string, template *template) {
n.insert(strings.Split(filter, "."), template)
}
func (n *node) search(lineParts []string) *template {
// Nothing to search
if len(lineParts) == 0 || len(n.children) == 0 {
return n.template
}
// If last element is a wildcard, don't include in this search since it's sorted
// to the end but lexicographically it would not always be and sort.Search assumes
// the slice is sorted.
length := len(n.children)
if n.children[length-1].value == "*" {
length--
}
// Find the index of child with an exact match
i := sort.Search(length, func(i int) bool {
return n.children[i].value >= lineParts[0]
})
// Found an exact match, so search that child sub-tree
if i < len(n.children) && n.children[i].value == lineParts[0] {
return n.children[i].search(lineParts[1:])
}
// Not an exact match, see if we have a wildcard child to search
if n.children[len(n.children)-1].value == "*" {
return n.children[len(n.children)-1].search(lineParts[1:])
}
return n.template
}
func (n *node) Search(line string) *template {
return n.search(strings.Split(line, "."))
}
type nodes []*node
// Less returns a boolean indicating whether the filter at position j
// is less than the filter at position k. Filters are order by string
// comparison of each component parts. A wildcard value "*" is never
// less than a non-wildcard value.
//
// For example, the filters:
// "*.*"
// "servers.*"
// "servers.localhost"
// "*.localhost"
//
// Would be sorted as:
// "servers.localhost"
// "servers.*"
// "*.localhost"
// "*.*"
func (n *nodes) Less(j, k int) bool {
if (*n)[j].value == "*" && (*n)[k].value != "*" {
return false
}
if (*n)[j].value != "*" && (*n)[k].value == "*" {
return true
}
return (*n)[j].value < (*n)[k].value
}
func (n *nodes) Swap(i, j int) { (*n)[i], (*n)[j] = (*n)[j], (*n)[i] }
func (n *nodes) Len() int { return len(*n) }

View File

@@ -1,48 +0,0 @@
package influx
import (
"fmt"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/encoding"
)
type InfluxParser struct {
}
func (p *InfluxParser) Parse(buf []byte) ([]telegraf.Metric, error) {
metrics, err := telegraf.ParseMetrics(buf)
if err != nil {
return nil, err
}
return metrics, nil
}
func (p *InfluxParser) ParseLine(line string) (telegraf.Metric, error) {
metrics, err := p.Parse([]byte(line + "\n"))
if err != nil {
return nil, err
}
if len(metrics) < 1 {
return nil, fmt.Errorf("Can not parse the line: %s, for data format: influx ", line)
}
return metrics[0], nil
}
func NewParser() *InfluxParser {
return &InfluxParser{}
}
func (p *InfluxParser) InitConfig(configs map[string]interface{}) error {
return nil
}
func init() {
encoding.Add("influx", func() encoding.Parser {
return NewParser()
})
}

View File

@@ -1,68 +0,0 @@
package json
import (
"encoding/json"
"fmt"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/encoding"
)
type JsonParser struct {
}
func (p *JsonParser) Parse(buf []byte) ([]telegraf.Metric, error) {
metrics := make([]telegraf.Metric, 0)
var jsonOut interface{}
err := json.Unmarshal(buf, &jsonOut)
if err != nil {
err = fmt.Errorf("unable to parse out as JSON, %s", err)
return nil, err
}
f := internal.JSONFlattener{}
err = f.FlattenJSON("", jsonOut)
if err != nil {
return nil, err
}
metric, err := telegraf.NewMetric("exec", nil, f.Fields, time.Now().UTC())
if err != nil {
return nil, err
}
return append(metrics, metric), nil
}
func (p *JsonParser) ParseLine(line string) (telegraf.Metric, error) {
metrics, err := p.Parse([]byte(line + "\n"))
if err != nil {
return nil, err
}
if len(metrics) < 1 {
return nil, fmt.Errorf("Can not parse the line: %s, for data format: influx ", line)
}
return metrics[0], nil
}
func NewParser() *JsonParser {
return &JsonParser{}
}
func (p *JsonParser) InitConfig(configs map[string]interface{}) error {
return nil
}
func init() {
encoding.Add("json", func() encoding.Parser {
return NewParser()
})
}

View File

@@ -9,7 +9,6 @@ import (
"fmt"
"io/ioutil"
"os"
"strconv"
"strings"
"time"
)
@@ -35,47 +34,6 @@ func (d *Duration) UnmarshalTOML(b []byte) error {
var NotImplementedError = errors.New("not implemented yet")
type JSONFlattener struct {
Fields map[string]interface{}
}
// FlattenJSON flattens nested maps/interfaces into a fields map
func (f *JSONFlattener) FlattenJSON(
fieldname string,
v interface{},
) error {
if f.Fields == nil {
f.Fields = make(map[string]interface{})
}
fieldname = strings.Trim(fieldname, "_")
switch t := v.(type) {
case map[string]interface{}:
for k, v := range t {
err := f.FlattenJSON(fieldname+"_"+k+"_", v)
if err != nil {
return err
}
}
case []interface{}:
for i, v := range t {
k := strconv.Itoa(i)
err := f.FlattenJSON(fieldname+"_"+k+"_", v)
if err != nil {
return nil
}
}
case float64:
f.Fields[fieldname] = t
case bool, string, nil:
// ignored types
return nil
default:
return fmt.Errorf("JSON Flattener: got unexpected type %T with value %v (%s)",
t, t, fieldname)
}
return nil
}
// ReadLines reads contents from a file and splits them by new lines.
// A convenience wrapper to ReadLinesOffsetN(filename, 0, -1).
func ReadLines(filename string) ([]string, error) {