Statsd plugin, tags and timings

Closes #237 Closes #39
2015-10-07 16:11:52 -06:00
parent 52be516fa3
commit 6977119f1e
24 changed files with 1096 additions and 242 deletions
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/CHANGELOG.md
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/CHANGELOG.md
@@ -38,6 +38,11 @@
 - [#4296](https://github.com/influxdb/influxdb/pull/4296): Reject line protocol ending with '-'. Fixes [#4272](https://github.com/influxdb/influxdb/issues/4272)
 - [#4333](https://github.com/influxdb/influxdb/pull/4333): Retry monitor storage creation and only on Leader.
 - [#4276](https://github.com/influxdb/influxdb/issues/4276): Walk DropSeriesStatement & check for empty sources
+- [#4342](https://github.com/influxdb/influxdb/pull/4342): Fix mixing aggregates and math with non-aggregates. Thanks @kostya-sh.
+- [#4349](https://github.com/influxdb/influxdb/issues/4349): If HH can't unmarshal a block, skip that block.
+- [#4354](https://github.com/influxdb/influxdb/pull/4353): Fully lock node queues during hinted handoff. Fixes one cause of missing data on clusters.
+- [#4357](https://github.com/influxdb/influxdb/issues/4357): Fix similar float values encoding overflow Thanks @dgryski!
+- [#4344](https://github.com/influxdb/influxdb/issues/4344): Make client.Write default to client.precision if none is given.

 ## v0.9.4 [2015-09-14]

--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/balancer.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/balancer.go
@@ -72,7 +72,7 @@ func (b *nodeBalancer) Next() *meta.NodeInfo {
 	}

 	d := &up[b.p]
-	b.p += 1
+	b.p++

 	return d
 }
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/influxdb.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/influxdb.go
@@ -220,10 +220,16 @@ func (c *Client) Write(bp BatchPoints) (*Response, error) {
 	if c.username != "" {
 		req.SetBasicAuth(c.username, c.password)
 	}
+
+	precision := bp.Precision
+	if precision == "" {
+		precision = c.precision
+	}
+
 	params := req.URL.Query()
 	params.Set("db", bp.Database)
 	params.Set("rp", bp.RetentionPolicy)
-	params.Set("precision", bp.Precision)
+	params.Set("precision", precision)
 	params.Set("consistency", bp.WriteConsistency)
 	req.URL.RawQuery = params.Encode()

--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/errors.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/errors.go
@@ -16,15 +16,19 @@ var (
 	ErrFieldTypeConflict = errors.New("field type conflict")
 )

+// ErrDatabaseNotFound indicates that a database operation failed on the
+// specified database because the specified database does not exist.
 func ErrDatabaseNotFound(name string) error { return fmt.Errorf("database not found: %s", name) }

+// ErrRetentionPolicyNotFound indicates that the named retention policy could
+// not be found in the database.
 func ErrRetentionPolicyNotFound(name string) error {
 	return fmt.Errorf("retention policy not found: %s", name)
 }

-func ErrMeasurementNotFound(name string) error { return fmt.Errorf("measurement not found: %s", name) }
+func errMeasurementNotFound(name string) error { return fmt.Errorf("measurement not found: %s", name) }

-func Errorf(format string, a ...interface{}) (err error) {
+func errorf(format string, a ...interface{}) (err error) {
 	if _, file, line, ok := runtime.Caller(2); ok {
 		a = append(a, file, line)
 		err = fmt.Errorf(format+" (%s:%d)", a...)
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast.go
@@ -1131,8 +1131,11 @@ func (s *SelectStatement) validSelectWithAggregate() error {
 	calls := map[string]struct{}{}
 	numAggregates := 0
 	for _, f := range s.Fields {
-		if c, ok := f.Expr.(*Call); ok {
+		fieldCalls := walkFunctionCalls(f.Expr)
+		for _, c := range fieldCalls {
 			calls[c.Name] = struct{}{}
+		}
+		if len(fieldCalls) != 0 {
 			numAggregates++
 		}
 	}
@@ -1166,8 +1169,7 @@ func (s *SelectStatement) validSelectWithAggregate() error {

 func (s *SelectStatement) validateAggregates(tr targetRequirement) error {
 	for _, f := range s.Fields {
-		switch expr := f.Expr.(type) {
-		case *Call:
+		for _, expr := range walkFunctionCalls(f.Expr) {
 			switch expr.Name {
 			case "derivative", "non_negative_derivative":
 				if err := s.validSelectWithAggregate(); err != nil {
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser_test.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser_test.go
@@ -1486,6 +1486,7 @@ func TestParser_ParseStatement(t *testing.T) {
 		{s: `SELECT field1 AS`, err: `found EOF, expected identifier at line 1, char 18`},
 		{s: `SELECT field1 FROM foo group by time(1s)`, err: `GROUP BY requires at least one aggregate function`},
 		{s: `SELECT count(value), value FROM foo`, err: `mixing aggregate and non-aggregate queries is not supported`},
+		{s: `SELECT count(value)/10, value FROM foo`, err: `mixing aggregate and non-aggregate queries is not supported`},
 		{s: `SELECT count(value) FROM foo group by time(1s)`, err: `aggregate functions with GROUP BY time require a WHERE time clause`},
 		{s: `SELECT count(value) FROM foo group by time(1s) where host = 'hosta.influxdb.org'`, err: `aggregate functions with GROUP BY time require a WHERE time clause`},
 		{s: `SELECT count(value) FROM foo group by time`, err: `time() is a function and expects at least one argument`},
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/nightly.sh
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/nightly.sh
@@ -21,14 +21,20 @@ function send_failure_notification {
        --body "The nightly build has failed, version: $version"
 }

-if [ $# -ne 4 ]; then
-    echo "$0 <smtp server> <user> <password> <to>"
+if [ $# -lt 4 ]; then
+    echo "$0 <smtp server> <user> <password> <to> [RACE_ENABLED]"
    exit 1
 fi
 SMTP=$1
 USER=$2
 PASSWORD=$3
 TO=$4
+RACE_ENABLED=$5
+
+if [ -n "$RACE_ENABLED" ]; then
+    race="-x"
+    echo "Race-detection build enabled."
+fi

 REPO_DIR=`mktemp -d`
 echo "Using $REPO_DIR for all work..."
@@ -41,7 +47,7 @@ git clone https://github.com/influxdb/influxdb.git

 cd $GOPATH/src/github.com/influxdb/influxdb
 VERSION="$MASTER_VERSION-nightly-`git log --pretty=format:'%h' -n 1`"
-NIGHTLY_BUILD=true ./package.sh $VERSION
+NIGHTLY_BUILD=true ./package.sh $race $VERSION

 if [ $? -ne 0 ]; then
    # Send notification e-mail.
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/package.sh
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/package.sh
@@ -83,6 +83,7 @@ $0 [-h] [-p|-w] [-t <dist>] [-r <number>] <version>
    -r release candidate number, if any.
       Example: -r 7
    -p just build packages
+    -x build with race-detection enabled
    -w build packages for current working directory
       imply -p
    -t <dist>
@@ -264,7 +265,7 @@ do_build() {
    fi

    date=`date -u --iso-8601=seconds`
-    go install -a -ldflags="-X main.version=$version -X main.branch=$branch -X main.commit=$commit -X main.buildTime='$date'" ./...
+    go install $RACE -a -ldflags="-X main.version=$version -X main.branch=$branch -X main.commit=$commit -X main.buildTime='$date'" ./...
    if [ $? -ne 0 ]; then
        echo "Build failed, unable to create package -- aborting"
        cleanup_exit 1
@@ -357,6 +358,11 @@ do
        shift 2
        ;;

+    -x)
+        RACE="-race"
+        shift
+        ;;
+
    -w | --working-directory)
 	PACKAGES_ONLY="PACKAGES_ONLY"
        WORKING_DIR="WORKING_DIR"
@@ -482,19 +488,6 @@ if [ -z "$NIGHTLY_BUILD" -a -z "$PACKAGES_ONLY" ]; then
    fi
 fi

-if [ $ARCH == "i386" ]; then
-    rpm_package=influxdb-${VERSION}-1.i686.rpm # RPM packages use 1 for default package release.
-    debian_package=influxdb_`full_version $VERSION $RC`_i686.deb
-    deb_args="-a i686"
-    rpm_args="setarch i686"
-elif [ $ARCH == "arm" ]; then
-    rpm_package=influxdb-${VERSION}-1.armel.rpm
-    debian_package=influxdb_`full_version $VERSION $RC`_armel.deb
-else
-    rpm_package=influxdb-${VERSION}-1.x86_64.rpm
-    debian_package=influxdb_`full_version $VERSION $RC`_amd64.deb
-fi
-
 COMMON_FPM_ARGS="\
 --log error \
 -C $TMP_WORK_DIR \
@@ -504,7 +497,7 @@ COMMON_FPM_ARGS="\
 --maintainer $MAINTAINER \
 --after-install $POST_INSTALL_PATH \
 --after-remove $POST_UNINSTALL_PATH \
--name influxdb \
+--name influxdb${RACE} \
 --config-files $CONFIG_ROOT_DIR \
 --config-files $LOGROTATE_DIR"

@@ -518,7 +511,11 @@ if [ -n "$DEB_WANTED" ]; then
 fi

 if [ -n "$TAR_WANTED" ]; then
-    $FPM -s dir -t tar --prefix influxdb_`full_version $VERSION $RC`_${ARCH} -p influxdb_`full_version $VERSION $RC`_${ARCH}.tar.gz --description "$DESCRIPTION" $COMMON_FPM_ARGS --version `full_version $VERSION $RC ` .
+    if [ -n "$RACE" ]; then
+        # Tweak race prefix for tarball.
+        race="race_"
+    fi
+    $FPM -s dir -t tar --prefix influxdb_$race`full_version $VERSION $RC`_${ARCH} -p influxdb_$race`full_version $VERSION $RC`_${ARCH}.tar.gz --description "$DESCRIPTION" $COMMON_FPM_ARGS --version `full_version $VERSION $RC ` .
    if [ $? -ne 0 ]; then
        echo "Failed to create Tar package -- aborting."
        cleanup_exit 1
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/services/graphite/parser.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/services/graphite/parser.go
@@ -157,7 +157,14 @@ func (p *Parser) ApplyTemplate(line string) (string, map[string]string) {
 	}
 	// decode the name and tags
 	template := p.matcher.Match(fields[0])
-	return template.Apply(fields[0])
+	name, tags := template.Apply(fields[0])
+	// Set the default tags on the point if they are not already set
+	for k, v := range p.tags {
+		if _, ok := tags[k]; !ok {
+			tags[k] = v
+		}
+	}
+	return name, tags
 }

 // template represents a pattern and tags to map a graphite metric string to a influxdb Point
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/services/hh/processor.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/services/hh/processor.go
@@ -4,6 +4,7 @@ import (
 	"encoding/binary"
 	"expvar"
 	"fmt"
+	"io"
 	"io/ioutil"
 	"log"
 	"os"
@@ -18,9 +19,13 @@ import (
 )

 const (
-	pointsHint  = "points_hint"
-	pointsWrite = "points_write"
-	bytesWrite  = "bytes_write"
+	pointsHint   = "points_hint"
+	pointsWrite  = "points_write"
+	bytesWrite   = "bytes_write"
+	writeErr     = "write_err"
+	unmarshalErr = "unmarshal_err"
+	advanceErr   = "advance_err"
+	currentErr   = "current_err"
 )

 type Processor struct {
@@ -98,10 +103,9 @@ func (p *Processor) loadQueues() error {
 	return nil
 }

+// addQueue adds a hinted-handoff queue for the given node. This function is not thread-safe
+// and the caller must ensure this function is not called concurrently.
 func (p *Processor) addQueue(nodeID uint64) (*queue, error) {
-	p.mu.Lock()
-	defer p.mu.Unlock()
-
 	path := filepath.Join(p.dir, strconv.FormatUint(nodeID, 10))
 	if err := os.MkdirAll(path, 0700); err != nil {
 		return nil, err
@@ -123,11 +127,27 @@ func (p *Processor) addQueue(nodeID uint64) (*queue, error) {
 	return queue, nil
 }

+// WriteShard writes hinted-handoff data for the given shard and node. Since it may manipulate
+// hinted-handoff queues, and be called concurrently, it takes a lock during queue access.
 func (p *Processor) WriteShard(shardID, ownerID uint64, points []models.Point) error {
+	p.mu.RLock()
 	queue, ok := p.queues[ownerID]
+	p.mu.RUnlock()
 	if !ok {
-		var err error
-		if queue, err = p.addQueue(ownerID); err != nil {
+		if err := func() error {
+			// Check again under write-lock.
+			p.mu.Lock()
+			defer p.mu.Unlock()
+
+			queue, ok = p.queues[ownerID]
+			if !ok {
+				var err error
+				if queue, err = p.addQueue(ownerID); err != nil {
+					return err
+				}
+			}
+			return nil
+		}(); err != nil {
 			return err
 		}
 	}
@@ -162,6 +182,9 @@ func (p *Processor) Process() error {
 				// Get the current block from the queue
 				buf, err := q.Current()
 				if err != nil {
+					if err != io.EOF {
+						p.nodeStatMaps[nodeID].Add(currentErr, 1)
+					}
 					res <- nil
 					break
 				}
@@ -169,15 +192,20 @@ func (p *Processor) Process() error {
 				// unmarshal the byte slice back to shard ID and points
 				shardID, points, err := p.unmarshalWrite(buf)
 				if err != nil {
+					p.nodeStatMaps[nodeID].Add(unmarshalErr, 1)
 					p.Logger.Printf("unmarshal write failed: %v", err)
 					if err := q.Advance(); err != nil {
+						p.nodeStatMaps[nodeID].Add(advanceErr, 1)
 						res <- err
 					}
-					return
+
+					// Skip and try the next block.
+					continue
 				}

 				// Try to send the write to the node
 				if err := p.writer.WriteShard(shardID, nodeID, points); err != nil && tsdb.IsRetryable(err) {
+					p.nodeStatMaps[nodeID].Add(writeErr, 1)
 					p.Logger.Printf("remote write failed: %v", err)
 					res <- nil
 					break
@@ -187,6 +215,7 @@ func (p *Processor) Process() error {

 				// If we get here, the write succeeded so advance the queue to the next item
 				if err := q.Advance(); err != nil {
+					p.nodeStatMaps[nodeID].Add(advanceErr, 1)
 					res <- err
 					return
 				}
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/tsm1/encoding_test.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/tsm1/encoding_test.go
@@ -49,6 +49,26 @@ func TestEncoding_FloatBlock_ZeroTime(t *testing.T) {
 	}
 }

+func TestEncoding_FloatBlock_SimilarFloats(t *testing.T) {
+	values := make(tsm1.Values, 5)
+	values[0] = tsm1.NewValue(time.Unix(0, 1444238178437870000), 6.00065e+06)
+	values[1] = tsm1.NewValue(time.Unix(0, 1444238185286830000), 6.000656e+06)
+	values[2] = tsm1.NewValue(time.Unix(0, 1444238188441501000), 6.000657e+06)
+	values[3] = tsm1.NewValue(time.Unix(0, 1444238195286811000), 6.000659e+06)
+	values[4] = tsm1.NewValue(time.Unix(0, 1444238198439917000), 6.000661e+06)
+
+	b, err := values.Encode(nil)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	decodedValues := values.DecodeSameTypeBlock(b)
+
+	if !reflect.DeepEqual(decodedValues, values) {
+		t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values)
+	}
+}
+
 func TestEncoding_IntBlock_Basic(t *testing.T) {
 	valueCount := 1000
 	times := getTimes(valueCount, 60, time.Second)
--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/tsm1/float.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/tsm1/float.go
@@ -56,7 +56,7 @@ func (s *FloatEncoder) Bytes() []byte {

 func (s *FloatEncoder) Finish() {
 	if !s.finished {
-		// // write an end-of-stream record
+		// write an end-of-stream record
 		s.Push(math.NaN())
 		s.bw.Flush(bitstream.Zero)
 		s.finished = true
@@ -82,6 +82,12 @@ func (s *FloatEncoder) Push(v float64) {
 		leading := bits.Clz(vDelta)
 		trailing := bits.Ctz(vDelta)

+		// Clamp number of leading zeros to avoid overflow when encoding
+		leading &= 0x1F
+		if leading >= 32 {
+			leading = 31
+		}
+
 		// TODO(dgryski): check if it's 'cheaper' to reset the leading/trailing bits instead
 		if s.leading != ^uint64(0) && leading >= s.leading && trailing >= s.trailing {
 			s.bw.WriteBit(bitstream.Zero)
@@ -92,6 +98,11 @@ func (s *FloatEncoder) Push(v float64) {
 			s.bw.WriteBit(bitstream.One)
 			s.bw.WriteBits(leading, 5)

+			// Note that if leading == trailing == 0, then sigbits == 64.  But that
+			// value doesn't actually fit into the 6 bits we have.
+			// Luckily, we never need to encode 0 significant bits, since that would
+			// put us in the other case (vdelta == 0).  So instead we write out a 0 and
+			// adjust it back to 64 on unpacking.
 			sigbits := 64 - leading - trailing
 			s.bw.WriteBits(sigbits, 6)
 			s.bw.WriteBits(vDelta>>trailing, int(sigbits))
@@ -178,6 +189,10 @@ func (it *FloatDecoder) Next() bool {
 				return false
 			}
 			mbits := bits
+			// 0 significant bits here means we overflowed and we actually need 64; see comment in encoder
+			if mbits == 0 {
+				mbits = 64
+			}
 			it.trailing = 64 - it.leading - mbits
 		}

--- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/tsm1/float_test.go
+++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/tsm1/float_test.go
@@ -7,7 +7,6 @@ import (
 )

 func TestFloatEncoder_Simple(t *testing.T) {
-
 	// Example from the paper
 	s := tsm1.NewFloatEncoder()

@@ -67,6 +66,49 @@ func TestFloatEncoder_Simple(t *testing.T) {
 	}
 }

+func TestFloatEncoder_SimilarFloats(t *testing.T) {
+	s := tsm1.NewFloatEncoder()
+	want := []float64{
+		6.00065e+06,
+		6.000656e+06,
+		6.000657e+06,
+
+		6.000659e+06,
+		6.000661e+06,
+	}
+
+	for _, v := range want {
+		s.Push(v)
+	}
+
+	s.Finish()
+
+	b := s.Bytes()
+
+	it, err := tsm1.NewFloatDecoder(b)
+	if err != nil {
+		t.Fatalf("unexpected error creating float decoder: %v", err)
+	}
+
+	for _, w := range want {
+		if !it.Next() {
+			t.Fatalf("Next()=false, want true")
+		}
+		vv := it.Values()
+		if w != vv {
+			t.Errorf("Values()=(%v), want (%v)\n", vv, w)
+		}
+	}
+
+	if it.Next() {
+		t.Fatalf("Next()=true, want false")
+	}
+
+	if err := it.Error(); err != nil {
+		t.Errorf("it.Error()=%v, want nil", err)
+	}
+}
+
 var TwoHoursData = []struct {
 	v float64
 }{