Implement telegraf's own full metric type

main reasons behind this:
- make adding/removing tags cheap
- make adding/removing fields cheap
- make parsing cheaper
- make parse -> decorate -> write out bytes metric flow much faster

Refactor serializer to use byte buffer
This commit is contained in:
Cameron Sparr
2016-11-22 12:51:57 +00:00
parent 332f678afb
commit db7a4b24b6
40 changed files with 1376 additions and 398 deletions

View File

@@ -0,0 +1,38 @@
package metric
import (
"reflect"
"strconv"
"unsafe"
)
// parseIntBytes is a zero-alloc wrapper around strconv.ParseInt.
func parseIntBytes(b []byte, base int, bitSize int) (i int64, err error) {
s := unsafeBytesToString(b)
return strconv.ParseInt(s, base, bitSize)
}
// parseFloatBytes is a zero-alloc wrapper around strconv.ParseFloat.
func parseFloatBytes(b []byte, bitSize int) (float64, error) {
s := unsafeBytesToString(b)
return strconv.ParseFloat(s, bitSize)
}
// parseBoolBytes is a zero-alloc wrapper around strconv.ParseBool.
func parseBoolBytes(b []byte) (bool, error) {
return strconv.ParseBool(unsafeBytesToString(b))
}
// unsafeBytesToString converts a []byte to a string without a heap allocation.
//
// It is unsafe, and is intended to prepare input to short-lived functions
// that require strings.
func unsafeBytesToString(in []byte) string {
src := *(*reflect.SliceHeader)(unsafe.Pointer(&in))
dst := reflect.StringHeader{
Data: src.Data,
Len: src.Len,
}
s := *(*string)(unsafe.Pointer(&dst))
return s
}

455
metric/metric.go Normal file
View File

@@ -0,0 +1,455 @@
package metric
import (
"bytes"
"fmt"
"hash/fnv"
"sort"
"strconv"
"strings"
"time"
"github.com/influxdata/telegraf"
// TODO remove
"github.com/influxdata/influxdb/client/v2"
)
var (
// escaper is for escaping:
// - tag keys
// - tag values
// - field keys
// see https://docs.influxdata.com/influxdb/v1.0/write_protocols/line_protocol_tutorial/#special-characters-and-keywords
escaper = strings.NewReplacer(`,`, `\,`, `"`, `\"`, ` `, `\ `, `=`, `\=`)
// nameEscaper is for escaping measurement names only.
// see https://docs.influxdata.com/influxdb/v1.0/write_protocols/line_protocol_tutorial/#special-characters-and-keywords
nameEscaper = strings.NewReplacer(`,`, `\,`, ` `, `\ `)
// stringFieldEscaper is for escaping string field values only.
// see https://docs.influxdata.com/influxdb/v1.0/write_protocols/line_protocol_tutorial/#special-characters-and-keywords
stringFieldEscaper = strings.NewReplacer(`"`, `\"`)
)
func New(
name string,
tags map[string]string,
fields map[string]interface{},
t time.Time,
mType ...telegraf.ValueType,
) (telegraf.Metric, error) {
if len(fields) == 0 {
return nil, fmt.Errorf("Metric cannot be made without any fields")
}
var thisType telegraf.ValueType
if len(mType) > 0 {
thisType = mType[0]
} else {
thisType = telegraf.Untyped
}
m := &metric{
name: []byte(nameEscaper.Replace(name)),
t: []byte(fmt.Sprint(t.UnixNano())),
nsec: t.UnixNano(),
mType: thisType,
}
m.tags = []byte{}
for k, v := range tags {
m.tags = append(m.tags, []byte(","+escaper.Replace(k))...)
m.tags = append(m.tags, []byte("="+escaper.Replace(v))...)
}
m.fields = []byte{' '}
i := 0
for k, v := range fields {
if i != 0 {
m.fields = append(m.fields, ',')
}
m.fields = appendField(m.fields, k, v)
i++
}
m.fields = append(m.fields, ' ')
return m, nil
}
// indexUnescapedByte finds the index of the first byte equal to b in buf that
// is not escaped. Returns -1 if not found.
func indexUnescapedByte(buf []byte, b byte) int {
var keyi int
for {
i := bytes.IndexByte(buf[keyi:], b)
if i == -1 {
return -1
} else if i == 0 {
break
}
keyi += i
if countBackslashes(buf, keyi-1)%2 == 0 {
break
} else {
keyi++
}
}
return keyi
}
// countBackslashes counts the number of preceding backslashes starting at
// the 'start' index.
func countBackslashes(buf []byte, index int) int {
var count int
for {
if buf[index] == '\\' {
count++
index--
} else {
break
}
}
return count
}
type metric struct {
name []byte
tags []byte
fields []byte
t []byte
mType telegraf.ValueType
aggregate bool
// cached values for reuse in "get" functions
hashID uint64
nsec int64
fieldMap map[string]interface{}
tagMap map[string]string
}
func (m *metric) Point() *client.Point {
return &client.Point{}
}
func (m *metric) String() string {
return string(m.Serialize())
}
func (m *metric) SetAggregate(b bool) {
m.aggregate = b
}
func (m *metric) IsAggregate() bool {
return m.aggregate
}
func (m *metric) Type() telegraf.ValueType {
return m.mType
}
func (m *metric) Len() int {
return len(m.name) + len(m.tags) + len(m.fields) + len(m.t) + 1
}
func (m *metric) Serialize() []byte {
tmp := make([]byte, m.Len())
copy(tmp, m.name)
copy(tmp[len(m.name):], m.tags)
copy(tmp[len(m.name)+len(m.tags):], m.fields)
copy(tmp[len(m.name)+len(m.tags)+len(m.fields):], m.t)
tmp[len(tmp)-1] = '\n'
return tmp
}
func (m *metric) Fields() map[string]interface{} {
if m.fieldMap != nil {
// TODO should we return a copy?
return m.fieldMap
}
m.fieldMap = map[string]interface{}{}
i := 1
for {
if i >= len(m.fields) {
break
}
// end index of field key
i1 := indexUnescapedByte(m.fields[i:], '=')
if i1 == -1 {
break
}
// start index of field value
i2 := i1 + 1
// end index of field value
i3 := indexUnescapedByte(m.fields[i:], ',')
if i3 == -1 {
i3 = len(m.fields[i:]) - 1
}
switch m.fields[i:][i2] {
case '"':
// string field
m.fieldMap[string(m.fields[i:][0:i1])] = string(m.fields[i:][i2+1 : i3-1])
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
// number field
switch m.fields[i:][i3-1] {
case 'i':
// integer field
n, err := strconv.ParseInt(string(m.fields[i:][i2:i3-1]), 10, 64)
if err == nil {
m.fieldMap[string(m.fields[i:][0:i1])] = n
} else {
// TODO handle error or just ignore field silently?
}
default:
// float field
n, err := strconv.ParseFloat(string(m.fields[i:][i2:i3]), 64)
if err == nil {
m.fieldMap[string(m.fields[i:][0:i1])] = n
} else {
// TODO handle error or just ignore field silently?
}
}
case 'T', 't':
// TODO handle "true" booleans
case 'F', 'f':
// TODO handle "false" booleans
default:
// TODO handle unsupported field type
}
i += i3 + 1
}
return m.fieldMap
}
func (m *metric) Tags() map[string]string {
if m.tagMap != nil {
// TODO should we return a copy?
return m.tagMap
}
m.tagMap = map[string]string{}
if len(m.tags) == 0 {
return m.tagMap
}
i := 0
for {
// start index of tag key
i0 := indexUnescapedByte(m.tags[i:], ',') + 1
if i0 == 0 {
// didn't find a tag start
break
}
// end index of tag key
i1 := indexUnescapedByte(m.tags[i:], '=')
// start index of tag value
i2 := i1 + 1
// end index of tag value (starting from i2)
i3 := indexUnescapedByte(m.tags[i+i2:], ',')
if i3 == -1 {
m.tagMap[string(m.tags[i:][i0:i1])] = string(m.tags[i:][i2:])
break
}
m.tagMap[string(m.tags[i:][i0:i1])] = string(m.tags[i:][i2 : i2+i3])
// increment start index for the next tag
i += i2 + i3
}
return m.tagMap
}
func (m *metric) Name() string {
return string(m.name)
}
func (m *metric) Time() time.Time {
// assume metric has been verified already and ignore error:
if m.nsec == 0 {
m.nsec, _ = strconv.ParseInt(string(m.t), 10, 64)
}
return time.Unix(0, m.nsec)
}
func (m *metric) UnixNano() int64 {
// assume metric has been verified already and ignore error:
if m.nsec == 0 {
m.nsec, _ = strconv.ParseInt(string(m.t), 10, 64)
}
return m.nsec
}
func (m *metric) SetName(name string) {
m.name = []byte(nameEscaper.Replace(name))
}
func (m *metric) SetPrefix(prefix string) {
m.name = append([]byte(nameEscaper.Replace(prefix)), m.name...)
}
func (m *metric) SetSuffix(suffix string) {
m.name = append(m.name, []byte(nameEscaper.Replace(suffix))...)
}
func (m *metric) AddTag(key, value string) {
m.RemoveTag(key)
m.tags = append(m.tags, []byte(","+escaper.Replace(key)+"="+escaper.Replace(value))...)
}
func (m *metric) HasTag(key string) bool {
i := bytes.Index(m.tags, []byte(escaper.Replace(key)+"="))
if i == -1 {
return false
}
return true
}
func (m *metric) RemoveTag(key string) bool {
m.tagMap = nil
m.hashID = 0
i := bytes.Index(m.tags, []byte(escaper.Replace(key)+"="))
if i == -1 {
return false
}
tmp := m.tags[0 : i-1]
j := indexUnescapedByte(m.tags[i:], ',')
if j != -1 {
tmp = append(tmp, m.tags[i+j:]...)
}
m.tags = tmp
return true
}
func (m *metric) AddField(key string, value interface{}) {
m.fieldMap = nil
m.fields = append(m.fields, ',')
appendField(m.fields, key, value)
}
func (m *metric) HasField(key string) bool {
i := bytes.Index(m.fields, []byte(escaper.Replace(key)+"="))
if i == -1 {
return false
}
return true
}
func (m *metric) RemoveField(key string) bool {
m.fieldMap = nil
m.hashID = 0
i := bytes.Index(m.fields, []byte(escaper.Replace(key)+"="))
if i == -1 {
return false
}
tmp := m.fields[0 : i-1]
j := indexUnescapedByte(m.fields[i:], ',')
if j != -1 {
tmp = append(tmp, m.fields[i+j:]...)
}
m.fields = tmp
return true
}
func (m *metric) Copy() telegraf.Metric {
name := make([]byte, len(m.name))
tags := make([]byte, len(m.tags))
fields := make([]byte, len(m.fields))
t := make([]byte, len(m.t))
copy(name, m.name)
copy(tags, m.tags)
copy(fields, m.fields)
copy(t, m.t)
return &metric{
name: name,
tags: tags,
fields: fields,
t: t,
hashID: m.hashID,
}
}
func (m *metric) HashID() uint64 {
if m.hashID == 0 {
h := fnv.New64a()
h.Write(m.name)
tags := m.Tags()
tmp := make([]string, len(tags))
i := 0
for k, v := range tags {
tmp[i] = k + v
i++
}
sort.Strings(tmp)
for _, s := range tmp {
h.Write([]byte(s))
}
m.hashID = h.Sum64()
}
return m.hashID
}
func appendField(b []byte, k string, v interface{}) []byte {
b = append(b, []byte(escaper.Replace(k)+"=")...)
// check popular types first
switch v := v.(type) {
case float64:
b = strconv.AppendFloat(b, v, 'f', -1, 64)
case int64:
b = strconv.AppendInt(b, v, 10)
b = append(b, 'i')
case string:
b = append(b, '"')
b = append(b, []byte(stringFieldEscaper.Replace(v))...)
b = append(b, '"')
case bool:
b = strconv.AppendBool(b, v)
case int32:
b = strconv.AppendInt(b, int64(v), 10)
b = append(b, 'i')
case int16:
b = strconv.AppendInt(b, int64(v), 10)
b = append(b, 'i')
case int8:
b = strconv.AppendInt(b, int64(v), 10)
b = append(b, 'i')
case int:
b = strconv.AppendInt(b, int64(v), 10)
b = append(b, 'i')
case uint32:
b = strconv.AppendInt(b, int64(v), 10)
b = append(b, 'i')
case uint16:
b = strconv.AppendInt(b, int64(v), 10)
b = append(b, 'i')
case uint8:
b = strconv.AppendInt(b, int64(v), 10)
b = append(b, 'i')
// TODO: 'uint' should be considered just as "dangerous" as a uint64,
// perhaps the value should be checked and capped at MaxInt64? We could
// then include uint64 as an accepted value
case uint:
b = strconv.AppendInt(b, int64(v), 10)
b = append(b, 'i')
case float32:
b = strconv.AppendFloat(b, float64(v), 'f', -1, 32)
case []byte:
b = append(b, v...)
case nil:
// skip
default:
// Can't determine the type, so convert to string
b = append(b, '"')
b = append(b, []byte(stringFieldEscaper.Replace(fmt.Sprintf("%v", v)))...)
b = append(b, '"')
}
return b
}

View File

@@ -0,0 +1,141 @@
package metric
import (
"fmt"
"testing"
"time"
)
// vars for making sure that the compiler doesnt optimize out the benchmarks:
var (
s string
I interface{}
tags map[string]string
fields map[string]interface{}
)
func BenchmarkNewMetric(b *testing.B) {
var mt Metric
for n := 0; n < b.N; n++ {
mt, _ = NewMetric("test_metric",
map[string]string{
"test_tag_1": "tag_value_1",
"test_tag_2": "tag_value_2",
"test_tag_3": "tag_value_3",
},
map[string]interface{}{
"string_field": "string",
"int_field": int64(1000),
"float_field": float64(2.1),
},
time.Now(),
)
}
s = string(mt.String())
}
func BenchmarkNewMetricAndInspect(b *testing.B) {
var mt Metric
for n := 0; n < b.N; n++ {
mt, _ = NewMetric("test_metric",
map[string]string{
"test_tag_1": "tag_value_1",
"test_tag_2": "tag_value_2",
"test_tag_3": "tag_value_3",
},
map[string]interface{}{
"string_field": "string",
"int_field": int64(1000),
"float_field": float64(2.1),
},
time.Now(),
)
for k, v := range mt.Fields() {
s = k
I = v
}
}
s = mt.String()
}
func BenchmarkTags(b *testing.B) {
var mt, _ = NewMetric("test_metric",
map[string]string{
"test_tag_1": "tag_value_1",
"test_tag_2": "tag_value_2",
"test_tag_3": "tag_value_3",
},
map[string]interface{}{
"string_field": "string",
"int_field": int64(1000),
"float_field": float64(2.1),
},
time.Now(),
)
for n := 0; n < b.N; n++ {
tags = mt.Tags()
}
s = fmt.Sprint(tags)
}
func BenchmarkFields(b *testing.B) {
var mt, _ = NewMetric("test_metric",
map[string]string{
"test_tag_1": "tag_value_1",
"test_tag_2": "tag_value_2",
"test_tag_3": "tag_value_3",
},
map[string]interface{}{
"string_field": "string",
"int_field": int64(1000),
"float_field": float64(2.1),
},
time.Now(),
)
for n := 0; n < b.N; n++ {
fields = mt.Fields()
}
s = fmt.Sprint(fields)
}
func BenchmarkSerializeMetric(b *testing.B) {
mt, _ := NewMetric("test_metric",
map[string]string{
"test_tag_1": "tag_value_1",
"test_tag_2": "tag_value_2",
"test_tag_3": "tag_value_3",
},
map[string]interface{}{
"string_field": "string",
"int_field": int64(1000),
"float_field": float64(2.1),
},
time.Now(),
)
var S string
for n := 0; n < b.N; n++ {
S = mt.String()
}
s = S
}
func BenchmarkSerializeMetricBytes(b *testing.B) {
mt, _ := NewMetric("test_metric",
map[string]string{
"test_tag_1": "tag_value_1",
"test_tag_2": "tag_value_2",
"test_tag_3": "tag_value_3",
},
map[string]interface{}{
"string_field": "string",
"int_field": int64(1000),
"float_field": float64(2.1),
},
time.Now(),
)
var B []byte
for n := 0; n < b.N; n++ {
B = mt.Serialize()
}
s = string(B)
}

107
metric/metric_test.go Normal file
View File

@@ -0,0 +1,107 @@
package metric
import (
"fmt"
"math"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestNewMetric(t *testing.T) {
now := time.Now()
tags := map[string]string{
"host": "localhost",
"datacenter": "us-east-1",
}
fields := map[string]interface{}{
"usage_idle": float64(99),
"usage_busy": float64(1),
}
m, err := New("cpu", tags, fields, now)
assert.NoError(t, err)
assert.Equal(t, Untyped, m.Type())
assert.Equal(t, tags, m.Tags())
assert.Equal(t, fields, m.Fields())
assert.Equal(t, "cpu", m.Name())
assert.Equal(t, now, m.Time())
assert.Equal(t, now.UnixNano(), m.UnixNano())
}
func TestNewGaugeMetric(t *testing.T) {
now := time.Now()
tags := map[string]string{
"host": "localhost",
"datacenter": "us-east-1",
}
fields := map[string]interface{}{
"usage_idle": float64(99),
"usage_busy": float64(1),
}
m, err := New("cpu", tags, fields, now, Gauge)
assert.NoError(t, err)
assert.Equal(t, Gauge, m.Type())
assert.Equal(t, tags, m.Tags())
assert.Equal(t, fields, m.Fields())
assert.Equal(t, "cpu", m.Name())
assert.Equal(t, now, m.Time())
assert.Equal(t, now.UnixNano(), m.UnixNano())
}
func TestNewCounterMetric(t *testing.T) {
now := time.Now()
tags := map[string]string{
"host": "localhost",
"datacenter": "us-east-1",
}
fields := map[string]interface{}{
"usage_idle": float64(99),
"usage_busy": float64(1),
}
m, err := New("cpu", tags, fields, now, Counter)
assert.NoError(t, err)
assert.Equal(t, Counter, m.Type())
assert.Equal(t, tags, m.Tags())
assert.Equal(t, fields, m.Fields())
assert.Equal(t, "cpu", m.Name())
assert.Equal(t, now, m.Time())
assert.Equal(t, now.UnixNano(), m.UnixNano())
}
func TestNewMetricString(t *testing.T) {
now := time.Now()
tags := map[string]string{
"host": "localhost",
}
fields := map[string]interface{}{
"usage_idle": float64(99),
}
m, err := New("cpu", tags, fields, now)
assert.NoError(t, err)
lineProto := fmt.Sprintf("cpu,host=localhost usage_idle=99 %d\n",
now.UnixNano())
assert.Equal(t, lineProto, m.String())
}
func TestNewMetricFailNaN(t *testing.T) {
now := time.Now()
tags := map[string]string{
"host": "localhost",
}
fields := map[string]interface{}{
"usage_idle": math.NaN(),
}
_, err := New("cpu", tags, fields, now)
assert.NoError(t, err)
}

624
metric/parse.go Normal file
View File

@@ -0,0 +1,624 @@
package metric
import (
"bytes"
"errors"
"fmt"
"time"
"github.com/influxdata/telegraf"
)
var (
ErrInvalidNumber = errors.New("invalid number")
)
const (
// the number of characters for the largest possible int64 (9223372036854775807)
maxInt64Digits = 19
// the number of characters for the smallest possible int64 (-9223372036854775808)
minInt64Digits = 20
// the number of characters required for the largest float64 before a range check
// would occur during parsing
maxFloat64Digits = 25
// the number of characters required for smallest float64 before a range check occur
// would occur during parsing
minFloat64Digits = 27
MaxKeyLength = 65535
)
// The following constants allow us to specify which state to move to
// next, when scanning sections of a Point.
const (
tagKeyState = iota
tagValueState
fieldsState
)
func Parse(buf []byte) ([]telegraf.Metric, error) {
return ParseWithDefaultTime(buf, time.Now())
}
func ParseWithDefaultTime(buf []byte, t time.Time) ([]telegraf.Metric, error) {
metrics := make([]telegraf.Metric, 0, bytes.Count(buf, []byte("\n"))+1)
var (
errStr string
line []byte
err error
)
b := bytes.NewBuffer(buf)
for {
line, err = b.ReadBytes('\n')
if err != nil {
break
}
if len(line) < 2 {
continue
}
// trim the newline:
line = line[0 : len(line)-1]
m, err := parseMetric(line, t)
if err != nil {
errStr += " " + err.Error()
continue
}
metrics = append(metrics, m)
}
if len(errStr) > 0 {
return metrics, fmt.Errorf(errStr)
}
return metrics, nil
}
func parseMetric(buf []byte, defaultTime time.Time) (telegraf.Metric, error) {
var dTime string
// scan the first block which is measurement[,tag1=value1,tag2=value=2...]
pos, key, err := scanKey(buf, 0)
if err != nil {
return nil, err
}
// measurement name is required
if len(key) == 0 {
return nil, fmt.Errorf("missing measurement")
}
if len(key) > MaxKeyLength {
return nil, fmt.Errorf("max key length exceeded: %v > %v", len(key), MaxKeyLength)
}
// scan the second block is which is field1=value1[,field2=value2,...]
pos, fields, err := scanFields(buf, pos)
if err != nil {
return nil, err
}
// at least one field is required
if len(fields) == 0 {
return nil, fmt.Errorf("missing fields")
}
// scan the last block which is an optional integer timestamp
pos, ts, err := scanTime(buf, pos)
if err != nil {
return nil, err
}
m := &metric{
fields: fields,
t: ts,
}
// parse out the measurement name
// namei is the index at which the "name" ends
namei := indexUnescapedByte(key, ',')
if namei < 1 {
// no tags
m.name = key
} else {
m.name = key[0:namei]
m.tags = key[namei:]
}
if len(m.t) == 0 {
if len(dTime) == 0 {
dTime = fmt.Sprint(defaultTime.UnixNano())
}
// use default time
m.t = []byte(dTime)
}
return m, nil
}
// scanKey scans buf starting at i for the measurement and tag portion of the point.
// It returns the ending position and the byte slice of key within buf. If there
// are tags, they will be sorted if they are not already.
func scanKey(buf []byte, i int) (int, []byte, error) {
start := skipWhitespace(buf, i)
i = start
// First scan the Point's measurement.
state, i, err := scanMeasurement(buf, i)
if err != nil {
return i, buf[start:i], err
}
// Optionally scan tags if needed.
if state == tagKeyState {
i, err = scanTags(buf, i)
if err != nil {
return i, buf[start:i], err
}
}
return i, buf[start:i], nil
}
// scanMeasurement examines the measurement part of a Point, returning
// the next state to move to, and the current location in the buffer.
func scanMeasurement(buf []byte, i int) (int, int, error) {
// Check first byte of measurement, anything except a comma is fine.
// It can't be a space, since whitespace is stripped prior to this
// function call.
if i >= len(buf) || buf[i] == ',' {
return -1, i, fmt.Errorf("missing measurement")
}
for {
i++
if i >= len(buf) {
// cpu
return -1, i, fmt.Errorf("missing fields")
}
if buf[i-1] == '\\' {
// Skip character (it's escaped).
continue
}
// Unescaped comma; move onto scanning the tags.
if buf[i] == ',' {
return tagKeyState, i + 1, nil
}
// Unescaped space; move onto scanning the fields.
if buf[i] == ' ' {
// cpu value=1.0
return fieldsState, i, nil
}
}
}
// scanTags examines all the tags in a Point, keeping track of and
// returning the updated indices slice, number of commas and location
// in buf where to start examining the Point fields.
func scanTags(buf []byte, i int) (int, error) {
var (
err error
state = tagKeyState
)
for {
switch state {
case tagKeyState:
i, err = scanTagsKey(buf, i)
state = tagValueState // tag value always follows a tag key
case tagValueState:
state, i, err = scanTagsValue(buf, i)
case fieldsState:
return i, nil
}
if err != nil {
return i, err
}
}
}
// scanTagsKey scans each character in a tag key.
func scanTagsKey(buf []byte, i int) (int, error) {
// First character of the key.
if i >= len(buf) || buf[i] == ' ' || buf[i] == ',' || buf[i] == '=' {
// cpu,{'', ' ', ',', '='}
return i, fmt.Errorf("missing tag key")
}
// Examine each character in the tag key until we hit an unescaped
// equals (the tag value), or we hit an error (i.e., unescaped
// space or comma).
for {
i++
// Either we reached the end of the buffer or we hit an
// unescaped comma or space.
if i >= len(buf) ||
((buf[i] == ' ' || buf[i] == ',') && buf[i-1] != '\\') {
// cpu,tag{'', ' ', ','}
return i, fmt.Errorf("missing tag value")
}
if buf[i] == '=' && buf[i-1] != '\\' {
// cpu,tag=
return i + 1, nil
}
}
}
// scanTagsValue scans each character in a tag value.
func scanTagsValue(buf []byte, i int) (int, int, error) {
// Tag value cannot be empty.
if i >= len(buf) || buf[i] == ',' || buf[i] == ' ' {
// cpu,tag={',', ' '}
return -1, i, fmt.Errorf("missing tag value")
}
// Examine each character in the tag value until we hit an unescaped
// comma (move onto next tag key), an unescaped space (move onto
// fields), or we error out.
for {
i++
if i >= len(buf) {
// cpu,tag=value
return -1, i, fmt.Errorf("missing fields")
}
// An unescaped equals sign is an invalid tag value.
if buf[i] == '=' && buf[i-1] != '\\' {
// cpu,tag={'=', 'fo=o'}
return -1, i, fmt.Errorf("invalid tag format")
}
if buf[i] == ',' && buf[i-1] != '\\' {
// cpu,tag=foo,
return tagKeyState, i + 1, nil
}
// cpu,tag=foo value=1.0
// cpu, tag=foo\= value=1.0
if buf[i] == ' ' && buf[i-1] != '\\' {
return fieldsState, i, nil
}
}
}
// scanFields scans buf, starting at i for the fields section of a point. It returns
// the ending position and the byte slice of the fields within buf
func scanFields(buf []byte, i int) (int, []byte, error) {
start := skipWhitespace(buf, i)
i = start
quoted := false
// tracks how many '=' we've seen
equals := 0
// tracks how many commas we've seen
commas := 0
for {
// reached the end of buf?
if i >= len(buf) {
break
}
// escaped characters?
if buf[i] == '\\' && i+1 < len(buf) {
i += 2
continue
}
// If the value is quoted, scan until we get to the end quote
// Only quote values in the field value since quotes are not significant
// in the field key
if buf[i] == '"' && equals > commas {
quoted = !quoted
i++
continue
}
// If we see an =, ensure that there is at least on char before and after it
if buf[i] == '=' && !quoted {
equals++
// check for "... =123" but allow "a\ =123"
if buf[i-1] == ' ' && buf[i-2] != '\\' {
return i, buf[start:i], fmt.Errorf("missing field key")
}
// check for "...a=123,=456" but allow "a=123,a\,=456"
if buf[i-1] == ',' && buf[i-2] != '\\' {
return i, buf[start:i], fmt.Errorf("missing field key")
}
// check for "... value="
if i+1 >= len(buf) {
return i, buf[start:i], fmt.Errorf("missing field value")
}
// check for "... value=,value2=..."
if buf[i+1] == ',' || buf[i+1] == ' ' {
return i, buf[start:i], fmt.Errorf("missing field value")
}
if isNumeric(buf[i+1]) || buf[i+1] == '-' || buf[i+1] == 'N' || buf[i+1] == 'n' {
var err error
i, err = scanNumber(buf, i+1)
if err != nil {
return i, buf[start:i], err
}
continue
}
// If next byte is not a double-quote, the value must be a boolean
if buf[i+1] != '"' {
var err error
i, _, err = scanBoolean(buf, i+1)
if err != nil {
return i, buf[start:i], err
}
continue
}
}
if buf[i] == ',' && !quoted {
commas++
}
// reached end of block?
if buf[i] == ' ' && !quoted {
break
}
i++
}
if quoted {
return i, buf[start:i], fmt.Errorf("unbalanced quotes")
}
// check that all field sections had key and values (e.g. prevent "a=1,b"
if equals == 0 || commas != equals-1 {
return i, buf[start:i], fmt.Errorf("invalid field format")
}
return i, buf[start:i], nil
}
// scanTime scans buf, starting at i for the time section of a point. It
// returns the ending position and the byte slice of the timestamp within buf
// and and error if the timestamp is not in the correct numeric format.
func scanTime(buf []byte, i int) (int, []byte, error) {
start := skipWhitespace(buf, i)
i = start
for {
// reached the end of buf?
if i >= len(buf) {
break
}
// Reached end of block or trailing whitespace?
if buf[i] == '\n' || buf[i] == ' ' {
break
}
// Handle negative timestamps
if i == start && buf[i] == '-' {
i++
continue
}
// Timestamps should be integers, make sure they are so we don't need
// to actually parse the timestamp until needed.
if buf[i] < '0' || buf[i] > '9' {
return i, buf[start:i], fmt.Errorf("bad timestamp")
}
i++
}
return i, buf[start:i], nil
}
func isNumeric(b byte) bool {
return (b >= '0' && b <= '9') || b == '.'
}
// scanNumber returns the end position within buf, start at i after
// scanning over buf for an integer, or float. It returns an
// error if a invalid number is scanned.
func scanNumber(buf []byte, i int) (int, error) {
start := i
var isInt bool
// Is negative number?
if i < len(buf) && buf[i] == '-' {
i++
// There must be more characters now, as just '-' is illegal.
if i == len(buf) {
return i, ErrInvalidNumber
}
}
// how many decimal points we've see
decimal := false
// indicates the number is float in scientific notation
scientific := false
for {
if i >= len(buf) {
break
}
if buf[i] == ',' || buf[i] == ' ' {
break
}
if buf[i] == 'i' && i > start && !isInt {
isInt = true
i++
continue
}
if buf[i] == '.' {
// Can't have more than 1 decimal (e.g. 1.1.1 should fail)
if decimal {
return i, ErrInvalidNumber
}
decimal = true
}
// `e` is valid for floats but not as the first char
if i > start && (buf[i] == 'e' || buf[i] == 'E') {
scientific = true
i++
continue
}
// + and - are only valid at this point if they follow an e (scientific notation)
if (buf[i] == '+' || buf[i] == '-') && (buf[i-1] == 'e' || buf[i-1] == 'E') {
i++
continue
}
// NaN is an unsupported value
if i+2 < len(buf) && (buf[i] == 'N' || buf[i] == 'n') {
return i, ErrInvalidNumber
}
if !isNumeric(buf[i]) {
return i, ErrInvalidNumber
}
i++
}
if isInt && (decimal || scientific) {
return i, ErrInvalidNumber
}
numericDigits := i - start
if isInt {
numericDigits--
}
if decimal {
numericDigits--
}
if buf[start] == '-' {
numericDigits--
}
if numericDigits == 0 {
return i, ErrInvalidNumber
}
// It's more common that numbers will be within min/max range for their type but we need to prevent
// out or range numbers from being parsed successfully. This uses some simple heuristics to decide
// if we should parse the number to the actual type. It does not do it all the time because it incurs
// extra allocations and we end up converting the type again when writing points to disk.
if isInt {
// Make sure the last char is an 'i' for integers (e.g. 9i10 is not valid)
if buf[i-1] != 'i' {
return i, ErrInvalidNumber
}
// Parse the int to check bounds the number of digits could be larger than the max range
// We subtract 1 from the index to remove the `i` from our tests
if len(buf[start:i-1]) >= maxInt64Digits || len(buf[start:i-1]) >= minInt64Digits {
if _, err := parseIntBytes(buf[start:i-1], 10, 64); err != nil {
return i, fmt.Errorf("unable to parse integer %s: %s", buf[start:i-1], err)
}
}
} else {
// Parse the float to check bounds if it's scientific or the number of digits could be larger than the max range
if scientific || len(buf[start:i]) >= maxFloat64Digits || len(buf[start:i]) >= minFloat64Digits {
if _, err := parseFloatBytes(buf[start:i], 10); err != nil {
return i, fmt.Errorf("invalid float")
}
}
}
return i, nil
}
// scanBoolean returns the end position within buf, start at i after
// scanning over buf for boolean. Valid values for a boolean are
// t, T, true, TRUE, f, F, false, FALSE. It returns an error if a invalid boolean
// is scanned.
func scanBoolean(buf []byte, i int) (int, []byte, error) {
start := i
if i < len(buf) && (buf[i] != 't' && buf[i] != 'f' && buf[i] != 'T' && buf[i] != 'F') {
return i, buf[start:i], fmt.Errorf("invalid boolean")
}
i++
for {
if i >= len(buf) {
break
}
if buf[i] == ',' || buf[i] == ' ' {
break
}
i++
}
// Single char bool (t, T, f, F) is ok
if i-start == 1 {
return i, buf[start:i], nil
}
// length must be 4 for true or TRUE
if (buf[start] == 't' || buf[start] == 'T') && i-start != 4 {
return i, buf[start:i], fmt.Errorf("invalid boolean")
}
// length must be 5 for false or FALSE
if (buf[start] == 'f' || buf[start] == 'F') && i-start != 5 {
return i, buf[start:i], fmt.Errorf("invalid boolean")
}
// Otherwise
valid := false
switch buf[start] {
case 't':
valid = bytes.Equal(buf[start:i], []byte("true"))
case 'f':
valid = bytes.Equal(buf[start:i], []byte("false"))
case 'T':
valid = bytes.Equal(buf[start:i], []byte("TRUE")) || bytes.Equal(buf[start:i], []byte("True"))
case 'F':
valid = bytes.Equal(buf[start:i], []byte("FALSE")) || bytes.Equal(buf[start:i], []byte("False"))
}
if !valid {
return i, buf[start:i], fmt.Errorf("invalid boolean")
}
return i, buf[start:i], nil
}
// skipWhitespace returns the end position within buf, starting at i after
// scanning over spaces in tags
func skipWhitespace(buf []byte, i int) int {
for i < len(buf) {
if buf[i] != ' ' && buf[i] != '\t' && buf[i] != 0 {
break
}
i++
}
return i
}
// makeError is a helper function for making a metric parsing error.
// reason is the reason that the error occured.
// buf should be the current buffer we are parsing.
// i is the current index, to give some context on where in the buffer we are.
func makeError(reason string, buf []byte, i int) error {
return fmt.Errorf("metric parsing error, reason: [%s], buffer: [%s], index: [%d]",
reason, buf, i)
}