From d98bedd6e122a6b2099c47597f7b489dd62af9e1 Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Tue, 18 Aug 2015 12:11:41 -0600 Subject: [PATCH] Telegraf 0.1.5, update InfluxDB client to HEAD --- Godeps/Godeps.json | 24 +- .../influxdb/influxdb/client/README.md | 1 + .../influxdb/influxdb/client/influxdb.go | 145 ++- .../influxdb/influxdb/influxql/INFLUXQL.md | 4 +- .../influxdb/influxdb/influxql/ast.go | 97 +- .../influxdb/influxdb/influxql/ast_test.go | 12 +- .../influxdb/influxdb/influxql/functions.go | 5 +- .../influxdb/influxdb/influxql/parser.go | 80 +- .../influxdb/influxdb/influxql/parser_test.go | 11 +- .../influxdb/influxdb/influxql/result.go | 56 +- .../influxdb/influxdb/influxql/scanner.go | 2 + .../influxdb/influxql/scanner_test.go | 1 + .../influxdb/influxdb/meta/config.go | 7 +- .../github.com/influxdb/influxdb/meta/data.go | 36 +- .../influxdb/influxdb/meta/data_test.go | 27 +- .../influxdb/influxdb/meta/errors.go | 7 +- .../influxdb/meta/internal/meta.pb.go | 254 ++++ .../influxdb/meta/internal/meta.proto | 63 + .../influxdb/influxdb/meta/proxy.go | 62 + .../github.com/influxdb/influxdb/meta/rpc.go | 460 ++++++++ .../influxdb/influxdb/meta/rpc_test.go | 242 ++++ .../influxdb/influxdb/meta/state.go | 489 ++++++++ .../influxdb/meta/statement_executor.go | 10 +- .../influxdb/meta/statement_executor_test.go | 14 +- .../influxdb/influxdb/meta/store.go | 553 ++++++--- .../influxdb/influxdb/meta/store_test.go | 261 ++++- .../influxdb/influxdb/tsdb/batcher.go | 4 +- .../influxdb/influxdb/tsdb/batcher_test.go | 24 +- .../influxdb/influxdb/tsdb/cursor.go | 119 ++ .../influxdb/influxdb/tsdb/cursor_test.go | 221 ++++ .../influxdb/influxdb/tsdb/engine.go | 1028 ++--------------- .../influxdb/influxdb/tsdb/engine/b1/b1.go | 695 +++++++++++ .../influxdb/tsdb/engine/b1/b1_test.go | 134 +++ .../influxdb/influxdb/tsdb/engine/bz1/bz1.go | 627 ++++++++++ .../influxdb/tsdb/engine/bz1/bz1_test.go | 439 +++++++ .../influxdb/influxdb/tsdb/engine/engine.go | 6 + .../influxdb/influxdb/tsdb/engine_test.go | 816 +------------ .../influxdb/influxdb/tsdb/executor.go | 981 ++++++++++++++++ .../influxdb/influxdb/tsdb/executor_test.go | 991 ++++++++++++++++ .../influxdb/tsdb/internal/meta.pb.go | 8 +- .../influxdb/influxdb/tsdb/mapper.go | 926 ++++++++------- .../influxdb/influxdb/tsdb/mapper_test.go | 281 +++-- .../github.com/influxdb/influxdb/tsdb/meta.go | 142 ++- .../influxdb/influxdb/tsdb/meta_test.go | 105 +- .../influxdb/influxdb/tsdb/points.go | 155 ++- .../influxdb/influxdb/tsdb/points_test.go | 493 ++++---- .../influxdb/influxdb/tsdb/query_executor.go | 139 +-- .../influxdb/tsdb/query_executor_test.go | 165 ++- .../influxdb/influxdb/tsdb/shard.go | 715 ++---------- .../influxdb/influxdb/tsdb/shard_test.go | 95 +- .../influxdb/influxdb/tsdb/store.go | 84 +- .../influxdb/influxdb/tsdb/store_test.go | 85 +- 52 files changed, 8464 insertions(+), 3937 deletions(-) create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/proxy.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/rpc.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/rpc_test.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/meta/state.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/cursor.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/cursor_test.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/b1/b1.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/b1/b1_test.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/bz1/bz1.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/bz1/bz1_test.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/engine.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/executor.go create mode 100644 Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/executor_test.go diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index c53cca2f9..0bbb9582d 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -89,33 +89,33 @@ }, { "ImportPath": "github.com/influxdb/influxdb/client", - "Comment": "v0.9.2", - "Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09" + "Comment": "v0.9.1-rc1-545-g8de66eb", + "Rev": "8de66eb37024cd6bd953662e5588253f0888874b" }, { "ImportPath": "github.com/influxdb/influxdb/influxql", - "Comment": "v0.9.2", - "Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09" + "Comment": "v0.9.1-rc1-545-g8de66eb", + "Rev": "8de66eb37024cd6bd953662e5588253f0888874b" }, { "ImportPath": "github.com/influxdb/influxdb/meta", - "Comment": "v0.9.2", - "Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09" + "Comment": "v0.9.1-rc1-545-g8de66eb", + "Rev": "8de66eb37024cd6bd953662e5588253f0888874b" }, { "ImportPath": "github.com/influxdb/influxdb/snapshot", - "Comment": "v0.9.2", - "Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09" + "Comment": "v0.9.1-rc1-545-g8de66eb", + "Rev": "8de66eb37024cd6bd953662e5588253f0888874b" }, { "ImportPath": "github.com/influxdb/influxdb/toml", - "Comment": "v0.9.2", - "Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09" + "Comment": "v0.9.1-rc1-545-g8de66eb", + "Rev": "8de66eb37024cd6bd953662e5588253f0888874b" }, { "ImportPath": "github.com/influxdb/influxdb/tsdb", - "Comment": "v0.9.2", - "Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09" + "Comment": "v0.9.1-rc1-545-g8de66eb", + "Rev": "8de66eb37024cd6bd953662e5588253f0888874b" }, { "ImportPath": "github.com/lib/pq", diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/README.md b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/README.md index 012109bc0..883941e90 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/README.md +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/README.md @@ -45,6 +45,7 @@ the configuration below. package main import "github.com/influxdb/influxdb/client" +import "net/url" const ( MyHost = "localhost" diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/influxdb.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/influxdb.go index 2ec08a96e..c4f34d84c 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/client/influxdb.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/client/influxdb.go @@ -5,22 +5,69 @@ import ( "encoding/json" "errors" "fmt" - "io" "io/ioutil" + "net" "net/http" "net/url" + "strconv" + "strings" "time" "github.com/influxdb/influxdb/influxql" "github.com/influxdb/influxdb/tsdb" ) +const ( + // DefaultHost is the default host used to connect to an InfluxDB instance + DefaultHost = "localhost" + + // DefaultPort is the default port used to connect to an InfluxDB instance + DefaultPort = 8086 + + // DefaultTimeout is the default connection timeout used to connect to an InfluxDB instance + DefaultTimeout = 0 +) + // Query is used to send a command to the server. Both Command and Database are required. type Query struct { Command string Database string } +// ParseConnectionString will parse a string to create a valid connection URL +func ParseConnectionString(path string, ssl bool) (url.URL, error) { + var host string + var port int + + if strings.Contains(path, ":") { + h := strings.Split(path, ":") + i, e := strconv.Atoi(h[1]) + if e != nil { + return url.URL{}, fmt.Errorf("invalid port number %q: %s\n", path, e) + } + port = i + if h[0] == "" { + host = DefaultHost + } else { + host = h[0] + } + } else { + host = path + // If they didn't specify a port, always use the default port + port = DefaultPort + } + + u := url.URL{ + Scheme: "http", + } + if ssl { + u.Scheme = "https" + } + u.Host = net.JoinHostPort(host, strconv.Itoa(port)) + + return u, nil +} + // Config is used to specify what server to connect to. // URL: The URL of the server connecting to. // Username/Password are optional. They will be passed via basic auth if provided. @@ -34,6 +81,13 @@ type Config struct { Timeout time.Duration } +// NewConfig will create a config to be used in connecting to the client +func NewConfig() Config { + return Config{ + Timeout: DefaultTimeout, + } +} + // Client is used to make calls to the server. type Client struct { url url.URL @@ -120,7 +174,8 @@ func (c *Client) Query(q Query) (*Response, error) { // If successful, error is nil and Response is nil // If an error occurs, Response may contain additional information if populated. func (c *Client) Write(bp BatchPoints) (*Response, error) { - c.url.Path = "write" + u := c.url + u.Path = "write" var b bytes.Buffer for _, p := range bp.Points { @@ -146,7 +201,7 @@ func (c *Client) Write(bp BatchPoints) (*Response, error) { } } - req, err := http.NewRequest("POST", c.url.String(), &b) + req, err := http.NewRequest("POST", u.String(), &b) if err != nil { return nil, err } @@ -156,10 +211,10 @@ func (c *Client) Write(bp BatchPoints) (*Response, error) { req.SetBasicAuth(c.username, c.password) } params := req.URL.Query() - params.Add("db", bp.Database) - params.Add("rp", bp.RetentionPolicy) - params.Add("precision", bp.Precision) - params.Add("consistency", bp.WriteConsistency) + params.Set("db", bp.Database) + params.Set("rp", bp.RetentionPolicy) + params.Set("precision", bp.Precision) + params.Set("consistency", bp.WriteConsistency) req.URL.RawQuery = params.Encode() resp, err := c.httpClient.Do(req) @@ -170,7 +225,7 @@ func (c *Client) Write(bp BatchPoints) (*Response, error) { var response Response body, err := ioutil.ReadAll(resp.Body) - if err != nil && err.Error() != "EOF" { + if err != nil { return nil, err } @@ -183,6 +238,52 @@ func (c *Client) Write(bp BatchPoints) (*Response, error) { return nil, nil } +// WriteLineProtocol takes a string with line returns to delimit each write +// If successful, error is nil and Response is nil +// If an error occurs, Response may contain additional information if populated. +func (c *Client) WriteLineProtocol(data, database, retentionPolicy, precision, writeConsistency string) (*Response, error) { + u := c.url + u.Path = "write" + + r := strings.NewReader(data) + + req, err := http.NewRequest("POST", u.String(), r) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "") + req.Header.Set("User-Agent", c.userAgent) + if c.username != "" { + req.SetBasicAuth(c.username, c.password) + } + params := req.URL.Query() + params.Set("db", database) + params.Set("rp", retentionPolicy) + params.Set("precision", precision) + params.Set("consistency", writeConsistency) + req.URL.RawQuery = params.Encode() + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var response Response + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK { + err := fmt.Errorf(string(body)) + response.Err = err + return &response, err + } + + return nil, nil +} + // Ping will check to see if the server is up // Ping returns how long the request took, the version of the server it connected to, and an error if one occurred. func (c *Client) Ping() (time.Duration, string, error) { @@ -209,34 +310,6 @@ func (c *Client) Ping() (time.Duration, string, error) { return time.Since(now), version, nil } -// Dump connects to server and retrieves all data stored for specified database. -// If successful, Dump returns the entire response body, which is an io.ReadCloser -func (c *Client) Dump(db string) (io.ReadCloser, error) { - u := c.url - u.Path = "dump" - values := u.Query() - values.Set("db", db) - u.RawQuery = values.Encode() - - req, err := http.NewRequest("GET", u.String(), nil) - if err != nil { - return nil, err - } - req.Header.Set("User-Agent", c.userAgent) - if c.username != "" { - req.SetBasicAuth(c.username, c.password) - } - - resp, err := c.httpClient.Do(req) - if err != nil { - return nil, err - } - if resp.StatusCode != http.StatusOK { - return resp.Body, fmt.Errorf("HTTP Protocol error %d", resp.StatusCode) - } - return resp.Body, nil -} - // Structs // Result represents a resultset returned from a single statement. diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/INFLUXQL.md b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/INFLUXQL.md index 087fc3b9f..3ef272f41 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/INFLUXQL.md +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/INFLUXQL.md @@ -469,14 +469,14 @@ SHOW MEASUREMENTS WHERE region = 'uswest' AND host = 'serverA'; ### SHOW RETENTION POLICIES ``` -show_retention_policies = "SHOW RETENTION POLICIES" db_name . +show_retention_policies = "SHOW RETENTION POLICIES ON" db_name . ``` #### Example: ```sql -- show all retention policies on a database -SHOW RETENTION POLICIES mydb; +SHOW RETENTION POLICIES ON mydb; ``` ### SHOW SERIES diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast.go index 0942b7792..04bb66bbb 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast.go @@ -936,6 +936,11 @@ func (s *SelectStatement) walkForTime(node Node) bool { // HasWildcard returns whether or not the select statement has at least 1 wildcard func (s *SelectStatement) HasWildcard() bool { + return s.HasFieldWildcard() || s.HasDimensionWildcard() +} + +// HasFieldWildcard returns whether or not the select statement has at least 1 wildcard in the fields +func (s *SelectStatement) HasFieldWildcard() bool { for _, f := range s.Fields { _, ok := f.Expr.(*Wildcard) if ok { @@ -943,6 +948,12 @@ func (s *SelectStatement) HasWildcard() bool { } } + return false +} + +// HasDimensionWildcard returns whether or not the select statement has +// at least 1 wildcard in the dimensions aka `GROUP BY` +func (s *SelectStatement) HasDimensionWildcard() bool { for _, d := range s.Dimensions { _, ok := d.Expr.(*Wildcard) if ok { @@ -990,11 +1001,27 @@ func (s *SelectStatement) validate(tr targetRequirement) error { return err } + if err := s.validateWildcard(); err != nil { + return err + } + return nil } func (s *SelectStatement) validateAggregates(tr targetRequirement) error { - // First, determine if specific calls have at least one and only one argument + // First, if 1 field is an aggregate, then all fields must be an aggregate. This is + // a explicit limitation of the current system. + numAggregates := 0 + for _, f := range s.Fields { + if _, ok := f.Expr.(*Call); ok { + numAggregates++ + } + } + if numAggregates != 0 && numAggregates != len(s.Fields) { + return fmt.Errorf("mixing aggregate and non-aggregate queries is not supported") + } + + // Secondly, determine if specific calls have at least one and only one argument for _, f := range s.Fields { if c, ok := f.Expr.(*Call); ok { switch c.Name { @@ -1033,6 +1060,13 @@ func (s *SelectStatement) validateAggregates(tr targetRequirement) error { return nil } +func (s *SelectStatement) validateWildcard() error { + if s.HasWildcard() && len(s.Fields) > 1 { + return fmt.Errorf("wildcards can not be combined with other fields") + } + return nil +} + func (s *SelectStatement) HasDistinct() bool { // determine if we have a call named distinct for _, f := range s.Fields { @@ -1321,6 +1355,17 @@ func (s *SelectStatement) NamesInSelect() []string { return a } +// NamesInDimension returns the field and tag names (idents) in the group by +func (s *SelectStatement) NamesInDimension() []string { + var a []string + + for _, d := range s.Dimensions { + a = append(a, walkNames(d.Expr)...) + } + + return a +} + // walkNames will walk the Expr and return the database fields func walkNames(exp Expr) []string { switch expr := exp.(type) { @@ -1953,6 +1998,32 @@ func (s *ShowFieldKeysStatement) RequiredPrivileges() ExecutionPrivileges { // Fields represents a list of fields. type Fields []*Field +// AliasNames returns a list of calculated field names in +// order of alias, function name, then field. +func (a Fields) AliasNames() []string { + names := []string{} + for _, f := range a { + names = append(names, f.Name()) + } + return names +} + +// Names returns a list of raw field names. +func (a Fields) Names() []string { + names := []string{} + for _, f := range a { + var name string + switch expr := f.Expr.(type) { + case *Call: + name = expr.Name + case *VarRef: + name = expr.Val + } + names = append(names, name) + } + return names +} + // String returns a string representation of the fields. func (a Fields) String() string { var str []string @@ -1992,26 +2063,6 @@ func (f *Field) Name() string { func (f *Field) String() string { str := f.Expr.String() - switch f.Expr.(type) { - case *VarRef: - quoted := false - // Escape any double-quotes in the field - if strings.Contains(str, `"`) { - str = strings.Replace(str, `"`, `\"`, -1) - quoted = true - } - - // Escape any single-quotes in the field - if strings.Contains(str, `'`) { - quoted = true - } - - // Double-quote field names with spaces or that were previously escaped - if strings.Contains(str, " ") || quoted { - str = fmt.Sprintf("\"%s\"", str) - } - } - if f.Alias == "" { return str } @@ -2132,7 +2183,9 @@ type VarRef struct { } // String returns a string representation of the variable reference. -func (r *VarRef) String() string { return r.Val } +func (r *VarRef) String() string { + return QuoteIdent(r.Val) +} // Call represents a function call. type Call struct { diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast_test.go index 3d074d5f1..468b7b1f3 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/ast_test.go @@ -44,35 +44,35 @@ func TestSelectStatement_Substatement(t *testing.T) { { stmt: `SELECT sum(aa.value) + sum(bb.value) FROM aa, bb`, expr: &influxql.VarRef{Val: "aa.value"}, - sub: `SELECT aa.value FROM aa`, + sub: `SELECT "aa.value" FROM aa`, }, // 2. Simple merge { stmt: `SELECT sum(aa.value) + sum(bb.value) FROM aa, bb`, expr: &influxql.VarRef{Val: "bb.value"}, - sub: `SELECT bb.value FROM bb`, + sub: `SELECT "bb.value" FROM bb`, }, // 3. Join with condition { stmt: `SELECT sum(aa.value) + sum(bb.value) FROM aa, bb WHERE aa.host = 'servera' AND bb.host = 'serverb'`, expr: &influxql.VarRef{Val: "bb.value"}, - sub: `SELECT bb.value FROM bb WHERE bb.host = 'serverb'`, + sub: `SELECT "bb.value" FROM bb WHERE "bb.host" = 'serverb'`, }, // 4. Join with complex condition { stmt: `SELECT sum(aa.value) + sum(bb.value) FROM aa, bb WHERE aa.host = 'servera' AND (bb.host = 'serverb' OR bb.host = 'serverc') AND 1 = 2`, expr: &influxql.VarRef{Val: "bb.value"}, - sub: `SELECT bb.value FROM bb WHERE (bb.host = 'serverb' OR bb.host = 'serverc') AND 1.000 = 2.000`, + sub: `SELECT "bb.value" FROM bb WHERE ("bb.host" = 'serverb' OR "bb.host" = 'serverc') AND 1.000 = 2.000`, }, // 5. 4 with different condition order { stmt: `SELECT sum(aa.value) + sum(bb.value) FROM aa, bb WHERE ((bb.host = 'serverb' OR bb.host = 'serverc') AND aa.host = 'servera') AND 1 = 2`, expr: &influxql.VarRef{Val: "bb.value"}, - sub: `SELECT bb.value FROM bb WHERE ((bb.host = 'serverb' OR bb.host = 'serverc')) AND 1.000 = 2.000`, + sub: `SELECT "bb.value" FROM bb WHERE (("bb.host" = 'serverb' OR "bb.host" = 'serverc')) AND 1.000 = 2.000`, }, } @@ -217,7 +217,7 @@ func TestSelectStatement_SetTimeRange(t *testing.T) { // Ensure the idents from the select clause can come out func TestSelect_NamesInSelect(t *testing.T) { - s := MustParseSelectStatement("select count(asdf), bar from cpu") + s := MustParseSelectStatement("select count(asdf), count(bar) from cpu") a := s.NamesInSelect() if !reflect.DeepEqual(a, []string{"asdf", "bar"}) { t.Fatal("expected names asdf and bar") diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/functions.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/functions.go index f09c321e7..3d75c5a27 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/functions.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/functions.go @@ -550,10 +550,9 @@ func ReduceMedian(values []interface{}) interface{} { sortedRange = getSortedRange(data, middle-1, 2) var low, high = sortedRange[0], sortedRange[1] return low + (high-low)/2 - } else { - sortedRange = getSortedRange(data, middle, 1) - return sortedRange[0] } + sortedRange = getSortedRange(data, middle, 1) + return sortedRange[0] } // getSortedRange returns a sorted subset of data. By using discardLowerRange and discardUpperRange to get the target diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser.go index 366cbbec0..b2c51e595 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser.go @@ -1471,11 +1471,18 @@ func (p *Parser) parseFields() (Fields, error) { func (p *Parser) parseField() (*Field, error) { f := &Field{} + _, pos, _ := p.scanIgnoreWhitespace() + p.unscan() // Parse the expression first. expr, err := p.ParseExpr() if err != nil { return nil, err } + var c validateField + Walk(&c, expr) + if c.foundInvalid { + return nil, fmt.Errorf("invalid operator %s in SELECT clause at line %d, char %d; operator is intended for WHERE clause", c.badToken, pos.Line+1, pos.Char+1) + } f.Expr = expr // Parse the alias if the current and next tokens are "WS AS". @@ -1491,6 +1498,30 @@ func (p *Parser) parseField() (*Field, error) { return f, nil } +// validateField checks if the Expr is a valid field. We disallow all binary expression +// that return a boolean +type validateField struct { + foundInvalid bool + badToken Token +} + +func (c *validateField) Visit(n Node) Visitor { + e, ok := n.(*BinaryExpr) + if !ok { + return c + } + + switch e.Op { + case EQ, NEQ, EQREGEX, + NEQREGEX, LT, LTE, GT, GTE, + AND, OR: + c.foundInvalid = true + c.badToken = e.Op + return nil + } + return c +} + // parseAlias parses the "AS (IDENT|STRING)" alias for fields and dimensions. func (p *Parser) parseAlias() (string, error) { // Check if the next token is "AS". If not, then unscan and exit. @@ -1660,31 +1691,31 @@ func (p *Parser) parseFill() (FillOption, interface{}, error) { p.unscan() return NullFill, nil, nil } - if lit, ok := expr.(*Call); !ok { + lit, ok := expr.(*Call) + if !ok { p.unscan() return NullFill, nil, nil - } else { - if strings.ToLower(lit.Name) != "fill" { - p.unscan() - return NullFill, nil, nil - } - if len(lit.Args) != 1 { - return NullFill, nil, errors.New("fill requires an argument, e.g.: 0, null, none, previous") - } - switch lit.Args[0].String() { - case "null": - return NullFill, nil, nil - case "none": - return NoFill, nil, nil - case "previous": - return PreviousFill, nil, nil - default: - num, ok := lit.Args[0].(*NumberLiteral) - if !ok { - return NullFill, nil, fmt.Errorf("expected number argument in fill()") - } - return NumberFill, num.Val, nil + } + if strings.ToLower(lit.Name) != "fill" { + p.unscan() + return NullFill, nil, nil + } + if len(lit.Args) != 1 { + return NullFill, nil, errors.New("fill requires an argument, e.g.: 0, null, none, previous") + } + switch lit.Args[0].String() { + case "null": + return NullFill, nil, nil + case "none": + return NoFill, nil, nil + case "previous": + return PreviousFill, nil, nil + default: + num, ok := lit.Args[0].(*NumberLiteral) + if !ok { + return NullFill, nil, fmt.Errorf("expected number argument in fill()") } + return NumberFill, num.Val, nil } } @@ -2186,6 +2217,11 @@ func QuoteIdent(segments ...string) string { // IdentNeedsQuotes returns true if the ident string given would require quotes. func IdentNeedsQuotes(ident string) bool { + // check if this identifier is a keyword + tok := Lookup(ident) + if tok != IDENT { + return true + } for i, r := range ident { if i == 0 && !isIdentFirstChar(r) { return true diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser_test.go index 2e1e9ffd7..71ccbffc3 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/parser_test.go @@ -1225,12 +1225,13 @@ func TestParser_ParseStatement(t *testing.T) { {s: `SELECT field1 FROM myseries ORDER BY time, field1`, err: `only ORDER BY time ASC supported at this time`}, {s: `SELECT field1 AS`, err: `found EOF, expected identifier at line 1, char 18`}, {s: `SELECT field1 FROM foo group by time(1s)`, err: `GROUP BY requires at least one aggregate function`}, + {s: `SELECT count(value), value FROM foo`, err: `mixing aggregate and non-aggregate queries is not supported`}, {s: `SELECT count(value) FROM foo group by time(1s)`, err: `aggregate functions with GROUP BY time require a WHERE time clause`}, {s: `SELECT count(value) FROM foo group by time(1s) where host = 'hosta.influxdb.org'`, err: `aggregate functions with GROUP BY time require a WHERE time clause`}, {s: `SELECT field1 FROM 12`, err: `found 12, expected identifier at line 1, char 20`}, {s: `SELECT 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 FROM myseries`, err: `unable to parse number at line 1, char 8`}, {s: `SELECT 10.5h FROM myseries`, err: `found h, expected FROM at line 1, char 12`}, - {s: `SELECT derivative(field1), field1 FROM myseries`, err: `derivative cannot be used with other fields`}, + {s: `SELECT derivative(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, {s: `SELECT distinct(field1), sum(field1) FROM myseries`, err: `aggregate function distinct() can not be combined with other functions or fields`}, {s: `SELECT distinct(field1), field2 FROM myseries`, err: `aggregate function distinct() can not be combined with other functions or fields`}, {s: `SELECT distinct(field1, field2) FROM myseries`, err: `distinct function can only have one argument`}, @@ -1244,6 +1245,12 @@ func TestParser_ParseStatement(t *testing.T) { {s: `select derivative() from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 0`}, {s: `select derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 3`}, {s: `SELECT field1 from myseries WHERE host =~ 'asd' LIMIT 1`, err: `found asd, expected regex at line 1, char 42`}, + {s: `SELECT value > 2 FROM cpu`, err: `invalid operator > in SELECT clause at line 1, char 8; operator is intended for WHERE clause`}, + {s: `SELECT value = 2 FROM cpu`, err: `invalid operator = in SELECT clause at line 1, char 8; operator is intended for WHERE clause`}, + {s: `SELECT s =~ /foo/ FROM cpu`, err: `invalid operator =~ in SELECT clause at line 1, char 8; operator is intended for WHERE clause`}, + {s: `SELECT foo, * from cpu`, err: `wildcards can not be combined with other fields`}, + {s: `SELECT *, * from cpu`, err: `found ,, expected FROM at line 1, char 9`}, + {s: `SELECT *, foo from cpu`, err: `found ,, expected FROM at line 1, char 9`}, {s: `DELETE`, err: `found EOF, expected FROM at line 1, char 8`}, {s: `DELETE FROM`, err: `found EOF, expected identifier at line 1, char 13`}, {s: `DELETE FROM myseries WHERE`, err: `found EOF, expected identifier, string, number, bool at line 1, char 28`}, @@ -1661,6 +1668,8 @@ func TestQuoteIdent(t *testing.T) { s string }{ {[]string{``}, ``}, + {[]string{`select`}, `"select"`}, + {[]string{`in-bytes`}, `"in-bytes"`}, {[]string{`foo`, `bar`}, `"foo".bar`}, {[]string{`foo`, ``, `bar`}, `"foo"..bar`}, {[]string{`foo bar`, `baz`}, `"foo bar".baz`}, diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/result.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/result.go index 94d102054..a74ed714a 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/result.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/result.go @@ -166,12 +166,8 @@ func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor { return func(values []interface{}) interface{} { l := lhs(values) r := rhs(values) - if lv, ok := l.(float64); ok { - if rv, ok := r.(float64); ok { - if rv != 0 { - return lv + rv - } - } + if lf, rf, ok := processorValuesAsFloat64(l, r); ok { + return lf + rf } return nil } @@ -179,12 +175,8 @@ func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor { return func(values []interface{}) interface{} { l := lhs(values) r := rhs(values) - if lv, ok := l.(float64); ok { - if rv, ok := r.(float64); ok { - if rv != 0 { - return lv - rv - } - } + if lf, rf, ok := processorValuesAsFloat64(l, r); ok { + return lf - rf } return nil } @@ -192,12 +184,8 @@ func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor { return func(values []interface{}) interface{} { l := lhs(values) r := rhs(values) - if lv, ok := l.(float64); ok { - if rv, ok := r.(float64); ok { - if rv != 0 { - return lv * rv - } - } + if lf, rf, ok := processorValuesAsFloat64(l, r); ok { + return lf * rf } return nil } @@ -205,12 +193,8 @@ func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor { return func(values []interface{}) interface{} { l := lhs(values) r := rhs(values) - if lv, ok := l.(float64); ok { - if rv, ok := r.(float64); ok { - if rv != 0 { - return lv / rv - } - } + if lf, rf, ok := processorValuesAsFloat64(l, r); ok { + return lf / rf } return nil } @@ -221,3 +205,27 @@ func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor { } } } + +func processorValuesAsFloat64(lhs interface{}, rhs interface{}) (float64, float64, bool) { + var lf float64 + var rf float64 + var ok bool + + lf, ok = lhs.(float64) + if !ok { + var li int64 + if li, ok = lhs.(int64); !ok { + return 0, 0, false + } + lf = float64(li) + } + rf, ok = rhs.(float64) + if !ok { + var ri int64 + if ri, ok = rhs.(int64); !ok { + return 0, 0, false + } + rf = float64(ri) + } + return lf, rf, true +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner.go index 4e15edddc..c6dab019c 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner.go @@ -514,6 +514,8 @@ func ScanString(r io.RuneScanner) (string, error) { _, _ = buf.WriteRune('\\') } else if ch1 == '"' { _, _ = buf.WriteRune('"') + } else if ch1 == '\'' { + _, _ = buf.WriteRune('\'') } else { return string(ch0) + string(ch1), errBadEscape } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner_test.go index ab0aab2e4..4a0124d53 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/influxql/scanner_test.go @@ -243,6 +243,7 @@ func TestScanString(t *testing.T) { {in: `"foo\nbar"`, out: "foo\nbar"}, {in: `"foo\\bar"`, out: `foo\bar`}, {in: `"foo\"bar"`, out: `foo"bar`}, + {in: `'foo\'bar'`, out: `foo'bar`}, {in: `"foo` + "\n", out: `foo`, err: "bad string"}, // newline in string {in: `"foo`, out: `foo`, err: "bad string"}, // unclosed quotes diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/config.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/config.go index 5512f059b..161bd70c0 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/config.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/config.go @@ -31,16 +31,17 @@ type Config struct { Dir string `toml:"dir"` Hostname string `toml:"hostname"` BindAddress string `toml:"bind-address"` - Peers []string `toml:"peers"` + Peers []string `toml:"-"` RetentionAutoCreate bool `toml:"retention-autocreate"` ElectionTimeout toml.Duration `toml:"election-timeout"` HeartbeatTimeout toml.Duration `toml:"heartbeat-timeout"` LeaderLeaseTimeout toml.Duration `toml:"leader-lease-timeout"` CommitTimeout toml.Duration `toml:"commit-timeout"` + ClusterTracing bool `toml:"cluster-tracing"` } -func NewConfig() Config { - return Config{ +func NewConfig() *Config { + return &Config{ Hostname: DefaultHostname, BindAddress: DefaultBindAddress, RetentionAutoCreate: true, diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data.go index 35a3f6e06..33c829bb8 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data.go @@ -141,8 +141,8 @@ func (data *Data) CreateRetentionPolicy(database string, rpi *RetentionPolicyInf // Validate retention policy. if rpi.Name == "" { return ErrRetentionPolicyNameRequired - } else if rpi.ReplicaN != len(data.Nodes) { - return ErrReplicationFactorMismatch + } else if rpi.ReplicaN < 1 { + return ErrReplicationFactorTooLow } // Find database. @@ -706,14 +706,18 @@ func (di *DatabaseInfo) unmarshal(pb *internal.DatabaseInfo) { di.Name = pb.GetName() di.DefaultRetentionPolicy = pb.GetDefaultRetentionPolicy() - di.RetentionPolicies = make([]RetentionPolicyInfo, len(pb.GetRetentionPolicies())) - for i, x := range pb.GetRetentionPolicies() { - di.RetentionPolicies[i].unmarshal(x) + if len(pb.GetRetentionPolicies()) > 0 { + di.RetentionPolicies = make([]RetentionPolicyInfo, len(pb.GetRetentionPolicies())) + for i, x := range pb.GetRetentionPolicies() { + di.RetentionPolicies[i].unmarshal(x) + } } - di.ContinuousQueries = make([]ContinuousQueryInfo, len(pb.GetContinuousQueries())) - for i, x := range pb.GetContinuousQueries() { - di.ContinuousQueries[i].unmarshal(x) + if len(pb.GetContinuousQueries()) > 0 { + di.ContinuousQueries = make([]ContinuousQueryInfo, len(pb.GetContinuousQueries())) + for i, x := range pb.GetContinuousQueries() { + di.ContinuousQueries[i].unmarshal(x) + } } } @@ -794,9 +798,11 @@ func (rpi *RetentionPolicyInfo) unmarshal(pb *internal.RetentionPolicyInfo) { rpi.Duration = time.Duration(pb.GetDuration()) rpi.ShardGroupDuration = time.Duration(pb.GetShardGroupDuration()) - rpi.ShardGroups = make([]ShardGroupInfo, len(pb.GetShardGroups())) - for i, x := range pb.GetShardGroups() { - rpi.ShardGroups[i].unmarshal(x) + if len(pb.GetShardGroups()) > 0 { + rpi.ShardGroups = make([]ShardGroupInfo, len(pb.GetShardGroups())) + for i, x := range pb.GetShardGroups() { + rpi.ShardGroups[i].unmarshal(x) + } } } @@ -900,9 +906,11 @@ func (sgi *ShardGroupInfo) unmarshal(pb *internal.ShardGroupInfo) { sgi.EndTime = UnmarshalTime(pb.GetEndTime()) sgi.DeletedAt = UnmarshalTime(pb.GetDeletedAt()) - sgi.Shards = make([]ShardInfo, len(pb.GetShards())) - for i, x := range pb.GetShards() { - sgi.Shards[i].unmarshal(x) + if len(pb.GetShards()) > 0 { + sgi.Shards = make([]ShardInfo, len(pb.GetShards())) + for i, x := range pb.GetShards() { + sgi.Shards[i].unmarshal(x) + } } } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data_test.go index d1985591e..945d78242 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/data_test.go @@ -127,14 +127,10 @@ func TestData_CreateRetentionPolicy_ErrNameRequired(t *testing.T) { } } -// Ensure that creating a policy with a replication factor that doesn't match -// the number of nodes in the cluster will return an error. This is a temporary -// restriction until v0.9.1 is released. -func TestData_CreateRetentionPolicy_ErrReplicationFactorMismatch(t *testing.T) { - data := meta.Data{ - Nodes: []meta.NodeInfo{{ID: 1}, {ID: 2}, {ID: 3}}, - } - if err := data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 2}); err != meta.ErrReplicationFactorMismatch { +// Ensure that creating a policy with a replication factor less than 1 returns an error. +func TestData_CreateRetentionPolicy_ErrReplicationFactorTooLow(t *testing.T) { + data := meta.Data{Nodes: []meta.NodeInfo{{ID: 1}}} + if err := data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 0}); err != meta.ErrReplicationFactorTooLow { t.Fatalf("unexpected error: %s", err) } } @@ -152,10 +148,10 @@ func TestData_CreateRetentionPolicy_ErrRetentionPolicyExists(t *testing.T) { var data meta.Data if err := data.CreateDatabase("db0"); err != nil { t.Fatal(err) - } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != nil { + } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != nil { t.Fatal(err) } - if err := data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != meta.ErrRetentionPolicyExists { + if err := data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != meta.ErrRetentionPolicyExists { t.Fatalf("unexpected error: %s", err) } } @@ -165,7 +161,7 @@ func TestData_UpdateRetentionPolicy(t *testing.T) { var data meta.Data if err := data.CreateDatabase("db0"); err != nil { t.Fatal(err) - } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != nil { + } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != nil { t.Fatal(err) } @@ -194,7 +190,7 @@ func TestData_DropRetentionPolicy(t *testing.T) { var data meta.Data if err := data.CreateDatabase("db0"); err != nil { t.Fatal(err) - } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != nil { + } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != nil { t.Fatal(err) } @@ -229,9 +225,9 @@ func TestData_RetentionPolicy(t *testing.T) { var data meta.Data if err := data.CreateDatabase("db0"); err != nil { t.Fatal(err) - } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != nil { + } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != nil { t.Fatal(err) - } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp1"}); err != nil { + } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp1", ReplicaN: 1}); err != nil { t.Fatal(err) } @@ -240,6 +236,7 @@ func TestData_RetentionPolicy(t *testing.T) { } else if !reflect.DeepEqual(rpi, &meta.RetentionPolicyInfo{ Name: "rp0", ShardGroupDuration: 604800000000000, + ReplicaN: 1, }) { t.Fatalf("unexpected value: %#v", rpi) } @@ -258,7 +255,7 @@ func TestData_SetDefaultRetentionPolicy(t *testing.T) { var data meta.Data if err := data.CreateDatabase("db0"); err != nil { t.Fatal(err) - } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != nil { + } else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != nil { t.Fatal(err) } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/errors.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/errors.go index dfc69aa36..44c54f964 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/errors.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/errors.go @@ -58,10 +58,9 @@ var ( ErrRetentionPolicyDurationTooLow = errors.New(fmt.Sprintf("retention policy duration must be at least %s", RetentionPolicyMinDuration)) - // ErrReplicationFactorMismatch is returned when the replication factor - // does not match the number of nodes in the cluster. This is a temporary - // restriction until v0.9.1 is released. - ErrReplicationFactorMismatch = errors.New("replication factor must match cluster size; this limitation will be lifted in v0.9.1") + // ErrReplicationFactorTooLow is returned when the replication factor is not in an + // acceptable range. + ErrReplicationFactorTooLow = errors.New("replication factor must be greater than 0") ) var ( diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.pb.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.pb.go index 979c842b9..fbd8cc504 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.pb.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.pb.go @@ -37,7 +37,14 @@ It has these top-level messages: SetPrivilegeCommand SetDataCommand SetAdminPrivilegeCommand + UpdateNodeCommand Response + ResponseHeader + ErrorResponse + FetchDataRequest + FetchDataResponse + JoinRequest + JoinResponse */ package internal @@ -48,6 +55,42 @@ import math "math" var _ = proto.Marshal var _ = math.Inf +type RPCType int32 + +const ( + RPCType_Error RPCType = 1 + RPCType_FetchData RPCType = 2 + RPCType_Join RPCType = 3 +) + +var RPCType_name = map[int32]string{ + 1: "Error", + 2: "FetchData", + 3: "Join", +} +var RPCType_value = map[string]int32{ + "Error": 1, + "FetchData": 2, + "Join": 3, +} + +func (x RPCType) Enum() *RPCType { + p := new(RPCType) + *p = x + return p +} +func (x RPCType) String() string { + return proto.EnumName(RPCType_name, int32(x)) +} +func (x *RPCType) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(RPCType_value, data, "RPCType") + if err != nil { + return err + } + *x = RPCType(value) + return nil +} + type Command_Type int32 const ( @@ -69,6 +112,7 @@ const ( Command_SetPrivilegeCommand Command_Type = 16 Command_SetDataCommand Command_Type = 17 Command_SetAdminPrivilegeCommand Command_Type = 18 + Command_UpdateNodeCommand Command_Type = 19 ) var Command_Type_name = map[int32]string{ @@ -90,6 +134,7 @@ var Command_Type_name = map[int32]string{ 16: "SetPrivilegeCommand", 17: "SetDataCommand", 18: "SetAdminPrivilegeCommand", + 19: "UpdateNodeCommand", } var Command_Type_value = map[string]int32{ "CreateNodeCommand": 1, @@ -110,6 +155,7 @@ var Command_Type_value = map[string]int32{ "SetPrivilegeCommand": 16, "SetDataCommand": 17, "SetAdminPrivilegeCommand": 18, + "UpdateNodeCommand": 19, } func (x Command_Type) Enum() *Command_Type { @@ -1112,6 +1158,38 @@ var E_SetAdminPrivilegeCommand_Command = &proto.ExtensionDesc{ Tag: "bytes,118,opt,name=command", } +type UpdateNodeCommand struct { + ID *uint64 `protobuf:"varint,1,req" json:"ID,omitempty"` + Host *string `protobuf:"bytes,2,req" json:"Host,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *UpdateNodeCommand) Reset() { *m = UpdateNodeCommand{} } +func (m *UpdateNodeCommand) String() string { return proto.CompactTextString(m) } +func (*UpdateNodeCommand) ProtoMessage() {} + +func (m *UpdateNodeCommand) GetID() uint64 { + if m != nil && m.ID != nil { + return *m.ID + } + return 0 +} + +func (m *UpdateNodeCommand) GetHost() string { + if m != nil && m.Host != nil { + return *m.Host + } + return "" +} + +var E_UpdateNodeCommand_Command = &proto.ExtensionDesc{ + ExtendedType: (*Command)(nil), + ExtensionType: (*UpdateNodeCommand)(nil), + Field: 119, + Name: "internal.UpdateNodeCommand.command", + Tag: "bytes,119,opt,name=command", +} + type Response struct { OK *bool `protobuf:"varint,1,req" json:"OK,omitempty"` Error *string `protobuf:"bytes,2,opt" json:"Error,omitempty"` @@ -1144,7 +1222,182 @@ func (m *Response) GetIndex() uint64 { return 0 } +type ResponseHeader struct { + OK *bool `protobuf:"varint,1,req" json:"OK,omitempty"` + Error *string `protobuf:"bytes,2,opt" json:"Error,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ResponseHeader) Reset() { *m = ResponseHeader{} } +func (m *ResponseHeader) String() string { return proto.CompactTextString(m) } +func (*ResponseHeader) ProtoMessage() {} + +func (m *ResponseHeader) GetOK() bool { + if m != nil && m.OK != nil { + return *m.OK + } + return false +} + +func (m *ResponseHeader) GetError() string { + if m != nil && m.Error != nil { + return *m.Error + } + return "" +} + +type ErrorResponse struct { + Header *ResponseHeader `protobuf:"bytes,1,req" json:"Header,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ErrorResponse) Reset() { *m = ErrorResponse{} } +func (m *ErrorResponse) String() string { return proto.CompactTextString(m) } +func (*ErrorResponse) ProtoMessage() {} + +func (m *ErrorResponse) GetHeader() *ResponseHeader { + if m != nil { + return m.Header + } + return nil +} + +type FetchDataRequest struct { + Index *uint64 `protobuf:"varint,1,req" json:"Index,omitempty"` + Term *uint64 `protobuf:"varint,2,req" json:"Term,omitempty"` + Blocking *bool `protobuf:"varint,3,opt,def=0" json:"Blocking,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *FetchDataRequest) Reset() { *m = FetchDataRequest{} } +func (m *FetchDataRequest) String() string { return proto.CompactTextString(m) } +func (*FetchDataRequest) ProtoMessage() {} + +const Default_FetchDataRequest_Blocking bool = false + +func (m *FetchDataRequest) GetIndex() uint64 { + if m != nil && m.Index != nil { + return *m.Index + } + return 0 +} + +func (m *FetchDataRequest) GetTerm() uint64 { + if m != nil && m.Term != nil { + return *m.Term + } + return 0 +} + +func (m *FetchDataRequest) GetBlocking() bool { + if m != nil && m.Blocking != nil { + return *m.Blocking + } + return Default_FetchDataRequest_Blocking +} + +type FetchDataResponse struct { + Header *ResponseHeader `protobuf:"bytes,1,req" json:"Header,omitempty"` + Index *uint64 `protobuf:"varint,2,req" json:"Index,omitempty"` + Term *uint64 `protobuf:"varint,3,req" json:"Term,omitempty"` + Data []byte `protobuf:"bytes,4,opt" json:"Data,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *FetchDataResponse) Reset() { *m = FetchDataResponse{} } +func (m *FetchDataResponse) String() string { return proto.CompactTextString(m) } +func (*FetchDataResponse) ProtoMessage() {} + +func (m *FetchDataResponse) GetHeader() *ResponseHeader { + if m != nil { + return m.Header + } + return nil +} + +func (m *FetchDataResponse) GetIndex() uint64 { + if m != nil && m.Index != nil { + return *m.Index + } + return 0 +} + +func (m *FetchDataResponse) GetTerm() uint64 { + if m != nil && m.Term != nil { + return *m.Term + } + return 0 +} + +func (m *FetchDataResponse) GetData() []byte { + if m != nil { + return m.Data + } + return nil +} + +type JoinRequest struct { + Addr *string `protobuf:"bytes,1,req" json:"Addr,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *JoinRequest) Reset() { *m = JoinRequest{} } +func (m *JoinRequest) String() string { return proto.CompactTextString(m) } +func (*JoinRequest) ProtoMessage() {} + +func (m *JoinRequest) GetAddr() string { + if m != nil && m.Addr != nil { + return *m.Addr + } + return "" +} + +type JoinResponse struct { + Header *ResponseHeader `protobuf:"bytes,1,req" json:"Header,omitempty"` + // Indicates that this node should take part in the raft cluster. + EnableRaft *bool `protobuf:"varint,2,opt" json:"EnableRaft,omitempty"` + // The addresses of raft peers to use if joining as a raft member. If not joining + // as a raft member, these are the nodes running raft. + RaftNodes []string `protobuf:"bytes,3,rep" json:"RaftNodes,omitempty"` + // The node ID assigned to the requesting node. + NodeID *uint64 `protobuf:"varint,4,opt" json:"NodeID,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *JoinResponse) Reset() { *m = JoinResponse{} } +func (m *JoinResponse) String() string { return proto.CompactTextString(m) } +func (*JoinResponse) ProtoMessage() {} + +func (m *JoinResponse) GetHeader() *ResponseHeader { + if m != nil { + return m.Header + } + return nil +} + +func (m *JoinResponse) GetEnableRaft() bool { + if m != nil && m.EnableRaft != nil { + return *m.EnableRaft + } + return false +} + +func (m *JoinResponse) GetRaftNodes() []string { + if m != nil { + return m.RaftNodes + } + return nil +} + +func (m *JoinResponse) GetNodeID() uint64 { + if m != nil && m.NodeID != nil { + return *m.NodeID + } + return 0 +} + func init() { + proto.RegisterEnum("internal.RPCType", RPCType_name, RPCType_value) proto.RegisterEnum("internal.Command_Type", Command_Type_name, Command_Type_value) proto.RegisterExtension(E_CreateNodeCommand_Command) proto.RegisterExtension(E_DeleteNodeCommand_Command) @@ -1164,4 +1417,5 @@ func init() { proto.RegisterExtension(E_SetPrivilegeCommand_Command) proto.RegisterExtension(E_SetDataCommand_Command) proto.RegisterExtension(E_SetAdminPrivilegeCommand_Command) + proto.RegisterExtension(E_UpdateNodeCommand_Command) } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.proto b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.proto index d5f5bf1fe..2aa50244b 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.proto +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/internal/meta.proto @@ -98,6 +98,8 @@ message Command { UpdateUserCommand = 15; SetPrivilegeCommand = 16; SetDataCommand = 17; + SetAdminPrivilegeCommand = 18; + UpdateNodeCommand = 19; } required Type type = 1; @@ -250,8 +252,69 @@ message SetAdminPrivilegeCommand { required bool Admin = 2; } +message UpdateNodeCommand { + extend Command { + optional UpdateNodeCommand command = 119; + } + required uint64 ID = 1; + required string Host = 2; +} + message Response { required bool OK = 1; optional string Error = 2; optional uint64 Index = 3; } + + +//======================================================================== +// +// RPC - higher-level cluster communication operations +// +//======================================================================== + +enum RPCType { + Error = 1; + FetchData = 2; + Join = 3; +} + +message ResponseHeader { + required bool OK = 1; + optional string Error = 2; +} + +message ErrorResponse { + required ResponseHeader Header = 1; +} + +message FetchDataRequest { + required uint64 Index = 1; + required uint64 Term = 2; + optional bool Blocking = 3 [default = false]; +} + +message FetchDataResponse { + required ResponseHeader Header = 1; + required uint64 Index = 2; + required uint64 Term = 3; + optional bytes Data = 4; +} + +message JoinRequest { + required string Addr = 1; +} + +message JoinResponse { + required ResponseHeader Header = 1; + + // Indicates that this node should take part in the raft cluster. + optional bool EnableRaft = 2; + + // The addresses of raft peers to use if joining as a raft member. If not joining + // as a raft member, these are the nodes running raft. + repeated string RaftNodes = 3; + + // The node ID assigned to the requesting node. + optional uint64 NodeID = 4; +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/proxy.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/proxy.go new file mode 100644 index 000000000..bb0fa75ab --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/proxy.go @@ -0,0 +1,62 @@ +package meta + +import ( + "io" + "net" +) + +// proxy brokers a connection from src to dst +func proxy(dst, src *net.TCPConn) error { + // channels to wait on the close event for each connection + serverClosed := make(chan struct{}, 1) + clientClosed := make(chan struct{}, 1) + errors := make(chan error, 1) + + go broker(dst, src, clientClosed, errors) + go broker(src, dst, serverClosed, errors) + + // wait for one half of the proxy to exit, then trigger a shutdown of the + // other half by calling CloseRead(). This will break the read loop in the + // broker and allow us to fully close the connection cleanly without a + // "use of closed network connection" error. + var waitFor chan struct{} + select { + case <-clientClosed: + // the client closed first and any more packets from the server aren't + // useful, so we can optionally SetLinger(0) here to recycle the port + // faster. + dst.SetLinger(0) + dst.CloseRead() + waitFor = serverClosed + case <-serverClosed: + src.CloseRead() + waitFor = clientClosed + case err := <-errors: + src.CloseRead() + dst.SetLinger(0) + dst.CloseRead() + return err + } + + // Wait for the other connection to close. + <-waitFor + return nil +} + +// This does the actual data transfer. +// The broker only closes the Read side. +func broker(dst, src net.Conn, srcClosed chan struct{}, errors chan error) { + // We can handle errors in a finer-grained manner by inlining io.Copy (it's + // simple, and we drop the ReaderFrom or WriterTo checks for + // net.Conn->net.Conn transfers, which aren't needed). This would also let + // us adjust buffersize. + _, err := io.Copy(dst, src) + + if err != nil { + errors <- err + } + if err := src.Close(); err != nil { + errors <- err + } + srcClosed <- struct{}{} +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/rpc.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/rpc.go new file mode 100644 index 000000000..d52c0b0a7 --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/rpc.go @@ -0,0 +1,460 @@ +package meta + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + "io/ioutil" + "log" + "net" + "time" + + "github.com/gogo/protobuf/proto" + "github.com/hashicorp/raft" + "github.com/influxdb/influxdb/meta/internal" +) + +// Max size of a message before we treat the size as invalid +const ( + MaxMessageSize = 1024 * 1024 * 1024 + leaderDialTimeout = 10 * time.Second +) + +// rpc handles request/response style messaging between cluster nodes +type rpc struct { + logger *log.Logger + tracingEnabled bool + + store interface { + cachedData() *Data + IsLeader() bool + Leader() string + Peers() ([]string, error) + AddPeer(host string) error + CreateNode(host string) (*NodeInfo, error) + NodeByHost(host string) (*NodeInfo, error) + WaitForDataChanged() error + } +} + +type JoinResult struct { + RaftEnabled bool + RaftNodes []string + NodeID uint64 +} + +type Reply interface { + GetHeader() *internal.ResponseHeader +} + +// proxyLeader proxies the connection to the current raft leader +func (r *rpc) proxyLeader(conn *net.TCPConn) { + if r.store.Leader() == "" { + r.sendError(conn, "no leader") + return + } + + leaderConn, err := net.DialTimeout("tcp", r.store.Leader(), leaderDialTimeout) + if err != nil { + r.sendError(conn, fmt.Sprintf("dial leader: %v", err)) + return + } + defer leaderConn.Close() + + leaderConn.Write([]byte{MuxRPCHeader}) + if err := proxy(leaderConn.(*net.TCPConn), conn); err != nil { + r.sendError(conn, fmt.Sprintf("leader proxy error: %v", err)) + } +} + +// handleRPCConn reads a command from the connection and executes it. +func (r *rpc) handleRPCConn(conn net.Conn) { + defer conn.Close() + // RPC connections should execute on the leader. If we are not the leader, + // proxy the connection to the leader so that clients an connect to any node + // in the cluster. + r.traceCluster("rpc connection from: %v", conn.RemoteAddr()) + + if !r.store.IsLeader() { + r.proxyLeader(conn.(*net.TCPConn)) + return + } + + // Read and execute request. + typ, resp, err := func() (internal.RPCType, proto.Message, error) { + // Read request size. + var sz uint64 + if err := binary.Read(conn, binary.BigEndian, &sz); err != nil { + return internal.RPCType_Error, nil, fmt.Errorf("read size: %s", err) + } + + if sz == 0 { + return 0, nil, fmt.Errorf("invalid message size: %d", sz) + } + + if sz >= MaxMessageSize { + return 0, nil, fmt.Errorf("max message size of %d exceeded: %d", MaxMessageSize, sz) + } + + // Read request. + buf := make([]byte, sz) + if _, err := io.ReadFull(conn, buf); err != nil { + return internal.RPCType_Error, nil, fmt.Errorf("read request: %s", err) + } + + // Determine the RPC type + rpcType := internal.RPCType(btou64(buf[0:8])) + buf = buf[8:] + + r.traceCluster("recv %v request on: %v", rpcType, conn.RemoteAddr()) + switch rpcType { + case internal.RPCType_FetchData: + var req internal.FetchDataRequest + if err := proto.Unmarshal(buf, &req); err != nil { + return internal.RPCType_Error, nil, fmt.Errorf("fetch request unmarshal: %v", err) + } + resp, err := r.handleFetchData(&req) + return rpcType, resp, err + case internal.RPCType_Join: + var req internal.JoinRequest + if err := proto.Unmarshal(buf, &req); err != nil { + return internal.RPCType_Error, nil, fmt.Errorf("join request unmarshal: %v", err) + } + resp, err := r.handleJoinRequest(&req) + return rpcType, resp, err + default: + return internal.RPCType_Error, nil, fmt.Errorf("unknown rpc type:%v", rpcType) + } + }() + + // Handle unexpected RPC errors + if err != nil { + resp = &internal.ErrorResponse{ + Header: &internal.ResponseHeader{ + OK: proto.Bool(false), + }, + } + typ = internal.RPCType_Error + } + + // Set the status header and error message + if reply, ok := resp.(Reply); ok { + reply.GetHeader().OK = proto.Bool(err == nil) + if err != nil { + reply.GetHeader().Error = proto.String(err.Error()) + } + } + + r.sendResponse(conn, typ, resp) +} + +func (r *rpc) sendResponse(conn net.Conn, typ internal.RPCType, resp proto.Message) { + // Marshal the response back to a protobuf + buf, err := proto.Marshal(resp) + if err != nil { + r.logger.Printf("unable to marshal response: %v", err) + return + } + + // Encode response back to connection. + if _, err := conn.Write(r.pack(typ, buf)); err != nil { + r.logger.Printf("unable to write rpc response: %s", err) + } +} + +func (r *rpc) sendError(conn net.Conn, msg string) { + r.traceCluster(msg) + resp := &internal.ErrorResponse{ + Header: &internal.ResponseHeader{ + OK: proto.Bool(false), + Error: proto.String(msg), + }, + } + + r.sendResponse(conn, internal.RPCType_Error, resp) +} + +// handleFetchData handles a request for the current nodes meta data +func (r *rpc) handleFetchData(req *internal.FetchDataRequest) (*internal.FetchDataResponse, error) { + var ( + b []byte + data *Data + err error + ) + + for { + data = r.store.cachedData() + if data.Index != req.GetIndex() { + b, err = data.MarshalBinary() + if err != nil { + return nil, err + } + break + } + + if !req.GetBlocking() { + break + } + + if err := r.store.WaitForDataChanged(); err != nil { + return nil, err + } + } + + return &internal.FetchDataResponse{ + Header: &internal.ResponseHeader{ + OK: proto.Bool(true), + }, + Index: proto.Uint64(data.Index), + Term: proto.Uint64(data.Term), + Data: b}, nil +} + +// handleJoinRequest handles a request to join the cluster +func (r *rpc) handleJoinRequest(req *internal.JoinRequest) (*internal.JoinResponse, error) { + r.traceCluster("join request from: %v", *req.Addr) + + node, err := func() (*NodeInfo, error) { + + // attempt to create the node + node, err := r.store.CreateNode(*req.Addr) + // if it exists, return the existing node + if err == ErrNodeExists { + node, err = r.store.NodeByHost(*req.Addr) + if err != nil { + return node, err + } + r.logger.Printf("existing node re-joined: id=%v addr=%v", node.ID, node.Host) + } else if err != nil { + return nil, fmt.Errorf("create node: %v", err) + } + + peers, err := r.store.Peers() + if err != nil { + return nil, fmt.Errorf("list peers: %v", err) + } + + // If we have less than 3 nodes, add them as raft peers if they are not + // already a peer + if len(peers) < MaxRaftNodes && !raft.PeerContained(peers, *req.Addr) { + r.logger.Printf("adding new raft peer: nodeId=%v addr=%v", node.ID, *req.Addr) + if err = r.store.AddPeer(*req.Addr); err != nil { + return node, fmt.Errorf("add peer: %v", err) + } + } + return node, err + }() + + nodeID := uint64(0) + if node != nil { + nodeID = node.ID + } + + if err != nil { + return nil, err + } + + // get the current raft peers + peers, err := r.store.Peers() + if err != nil { + return nil, fmt.Errorf("list peers: %v", err) + } + + return &internal.JoinResponse{ + Header: &internal.ResponseHeader{ + OK: proto.Bool(true), + }, + EnableRaft: proto.Bool(raft.PeerContained(peers, *req.Addr)), + RaftNodes: peers, + NodeID: proto.Uint64(nodeID), + }, err + +} + +// pack returns a TLV style byte slice encoding the size of the payload, the RPC type +// and the RPC data +func (r *rpc) pack(typ internal.RPCType, b []byte) []byte { + buf := u64tob(uint64(len(b)) + 8) + buf = append(buf, u64tob(uint64(typ))...) + buf = append(buf, b...) + return buf +} + +// fetchMetaData returns the latest copy of the meta store data from the current +// leader. +func (r *rpc) fetchMetaData(blocking bool) (*Data, error) { + assert(r.store != nil, "store is nil") + + // Retrieve the current known leader. + leader := r.store.Leader() + if leader == "" { + return nil, errors.New("no leader") + } + + var index, term uint64 + data := r.store.cachedData() + if data != nil { + index = data.Index + term = data.Index + } + resp, err := r.call(leader, &internal.FetchDataRequest{ + Index: proto.Uint64(index), + Term: proto.Uint64(term), + Blocking: proto.Bool(blocking), + }) + if err != nil { + return nil, err + } + + switch t := resp.(type) { + case *internal.FetchDataResponse: + // If data is nil, then the term and index we sent matches the leader + if t.GetData() == nil { + return nil, nil + } + ms := &Data{} + if err := ms.UnmarshalBinary(t.GetData()); err != nil { + return nil, fmt.Errorf("rpc unmarshal metadata: %v", err) + } + return ms, nil + case *internal.ErrorResponse: + return nil, fmt.Errorf("rpc failed: %s", t.GetHeader().GetError()) + default: + return nil, fmt.Errorf("rpc failed: unknown response type: %v", t.String()) + } +} + +// join attempts to join a cluster at remoteAddr using localAddr as the current +// node's cluster address +func (r *rpc) join(localAddr, remoteAddr string) (*JoinResult, error) { + req := &internal.JoinRequest{ + Addr: proto.String(localAddr), + } + + resp, err := r.call(remoteAddr, req) + if err != nil { + return nil, err + } + + switch t := resp.(type) { + case *internal.JoinResponse: + return &JoinResult{ + RaftEnabled: t.GetEnableRaft(), + RaftNodes: t.GetRaftNodes(), + NodeID: t.GetNodeID(), + }, nil + case *internal.ErrorResponse: + return nil, fmt.Errorf("rpc failed: %s", t.GetHeader().GetError()) + default: + return nil, fmt.Errorf("rpc failed: unknown response type: %v", t.String()) + } +} + +// call sends an encoded request to the remote leader and returns +// an encoded response value. +func (r *rpc) call(dest string, req proto.Message) (proto.Message, error) { + // Determine type of request + var rpcType internal.RPCType + switch t := req.(type) { + case *internal.JoinRequest: + rpcType = internal.RPCType_Join + case *internal.FetchDataRequest: + rpcType = internal.RPCType_FetchData + default: + return nil, fmt.Errorf("unknown rpc request type: %v", t) + } + + // Create a connection to the leader. + conn, err := net.DialTimeout("tcp", dest, leaderDialTimeout) + if err != nil { + return nil, fmt.Errorf("rpc dial: %v", err) + } + defer conn.Close() + + // Write a marker byte for rpc messages. + _, err = conn.Write([]byte{MuxRPCHeader}) + if err != nil { + return nil, err + } + + b, err := proto.Marshal(req) + if err != nil { + return nil, fmt.Errorf("rpc marshal: %v", err) + } + + // Write request size & bytes. + if _, err := conn.Write(r.pack(rpcType, b)); err != nil { + return nil, fmt.Errorf("write %v rpc: %s", rpcType, err) + } + + data, err := ioutil.ReadAll(conn) + if err != nil { + return nil, fmt.Errorf("read %v rpc: %v", rpcType, err) + } + + // Should always have a size and type + if exp := 16; len(data) < exp { + r.traceCluster("recv: %v", string(data)) + return nil, fmt.Errorf("rpc %v failed: short read: got %v, exp %v", rpcType, len(data), exp) + } + + sz := btou64(data[0:8]) + if len(data[8:]) != int(sz) { + r.traceCluster("recv: %v", string(data)) + return nil, fmt.Errorf("rpc %v failed: short read: got %v, exp %v", rpcType, len(data[8:]), sz) + } + + // See what response type we got back, could get a general error response + rpcType = internal.RPCType(btou64(data[8:16])) + data = data[16:] + + var resp proto.Message + switch rpcType { + case internal.RPCType_Join: + resp = &internal.JoinResponse{} + case internal.RPCType_FetchData: + resp = &internal.FetchDataResponse{} + case internal.RPCType_Error: + resp = &internal.ErrorResponse{} + default: + return nil, fmt.Errorf("unknown rpc response type: %v", rpcType) + } + + if err := proto.Unmarshal(data, resp); err != nil { + return nil, fmt.Errorf("rpc unmarshal: %v", err) + } + + if reply, ok := resp.(Reply); ok { + if !reply.GetHeader().GetOK() { + return nil, fmt.Errorf("rpc %v failed: %s", rpcType, reply.GetHeader().GetError()) + } + } + + return resp, nil +} + +func (r *rpc) traceCluster(msg string, args ...interface{}) { + if r.tracingEnabled { + r.logger.Printf("rpc: "+msg, args...) + } +} + +func u64tob(v uint64) []byte { + b := make([]byte, 8) + binary.BigEndian.PutUint64(b, v) + return b +} + +func btou64(b []byte) uint64 { + return binary.BigEndian.Uint64(b) +} + +func contains(s []string, e string) bool { + for _, a := range s { + if a == e { + return true + } + } + return false +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/rpc_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/rpc_test.go new file mode 100644 index 000000000..3f60c6bd0 --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/rpc_test.go @@ -0,0 +1,242 @@ +package meta + +import ( + "net" + "sync" + "testing" +) + +func TestRPCFetchData(t *testing.T) { + + serverRPC := &rpc{ + store: &fakeStore{ + md: &Data{Index: 99}, + }, + } + + srv := newTestServer(t, serverRPC) + defer srv.Close() + go srv.Serve() + + // Wait for the RPC server to be ready + <-srv.Ready + + // create a new RPC with no existing meta.Data cache + clientRPC := &rpc{ + store: &fakeStore{ + leader: srv.Listener.Addr().String(), + }, + } + + // fetch the servers meta-data + md, err := clientRPC.fetchMetaData(false) + if err != nil { + t.Fatalf("failed to fetchMetaData: %v", err) + } + + if md == nil { + t.Fatalf("meta-data is nil") + } + + if exp := uint64(99); md.Index != exp { + t.Fatalf("meta-data mismatch. got %v, exp %v", md.Index, exp) + } +} + +func TestRPCFetchDataMatchesLeader(t *testing.T) { + serverRPC := &rpc{ + store: &fakeStore{ + md: &Data{Index: 99}, + }, + } + + srv := newTestServer(t, serverRPC) + defer srv.Close() + go srv.Serve() + + // Wait for the RPC server to be ready + <-srv.Ready + + // create a new RPC with a matching index as the server + clientRPC := &rpc{ + store: &fakeStore{ + leader: srv.Listener.Addr().String(), + md: &Data{Index: 99}, + }, + } + + // fetch the servers meta-data + md, err := clientRPC.fetchMetaData(false) + if err != nil { + t.Fatalf("failed to fetchMetaData: %v", err) + } + + if md != nil { + t.Fatalf("meta-data is not nil") + } +} + +func TestRPCFetchDataMatchesBlocking(t *testing.T) { + fs := &fakeStore{ + md: &Data{Index: 99}, + blockChan: make(chan struct{}), + } + serverRPC := &rpc{ + store: fs, + } + + srv := newTestServer(t, serverRPC) + defer srv.Close() + go srv.Serve() + + // Wait for the RPC server to be ready + <-srv.Ready + + // create a new RPC with a matching index as the server + clientRPC := &rpc{ + store: &fakeStore{ + leader: srv.Listener.Addr().String(), + md: &Data{Index: 99}, + }, + } + + // Kick off the fetching block + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + // fetch the servers meta-data + md, err := clientRPC.fetchMetaData(true) + if err != nil { + t.Fatalf("failed to fetchMetaData: %v", err) + } + + if md == nil { + t.Fatalf("meta-data is nil") + } + + if exp := uint64(100); md.Index != exp { + t.Fatalf("meta-data mismatch. got %v, exp %v", md.Index, exp) + } + }() + + // Simulate the rmote index changing and unblocking + fs.mu.Lock() + fs.md.Index = 100 + fs.mu.Unlock() + close(fs.blockChan) + wg.Wait() +} + +func TestRPCJoin(t *testing.T) { + fs := &fakeStore{ + leader: "1.2.3.4:1234", + md: &Data{Index: 99}, + newNodeID: uint64(100), + blockChan: make(chan struct{}), + } + serverRPC := &rpc{ + store: fs, + } + + srv := newTestServer(t, serverRPC) + defer srv.Close() + go srv.Serve() + + // Wait for the RPC server to be ready + <-srv.Ready + + // create a new RPC with a matching index as the server + clientRPC := &rpc{ + store: &fakeStore{ + leader: srv.Listener.Addr().String(), + md: &Data{Index: 99}, + }, + } + + res, err := clientRPC.join("1.2.3.4:1234", srv.Listener.Addr().String()) + if err != nil { + t.Fatalf("failed to join: %v", err) + } + + if exp := true; res.RaftEnabled != true { + t.Fatalf("raft enabled mismatch: got %v, exp %v", res.RaftEnabled, exp) + } + + if exp := 1; len(res.RaftNodes) != exp { + t.Fatalf("raft peer mismatch: got %v, exp %v", len(res.RaftNodes), exp) + } + + if exp := "1.2.3.4:1234"; res.RaftNodes[0] != exp { + t.Fatalf("raft peer mismatch: got %v, exp %v", res.RaftNodes[0], exp) + } + + if exp := uint64(100); res.NodeID != exp { + t.Fatalf("node id mismatch. got %v, exp %v", res.NodeID, exp) + } +} + +type fakeStore struct { + mu sync.RWMutex + leader string + newNodeID uint64 + md *Data + blockChan chan struct{} +} + +type testServer struct { + Listener net.Listener + Ready chan struct{} + rpc *rpc + t *testing.T +} + +func newTestServer(t *testing.T, rpc *rpc) *testServer { + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to listen: %v", err) + } + return &testServer{ + Listener: ln, + Ready: make(chan struct{}), + rpc: rpc, + } +} + +func (s *testServer) Close() { + s.Listener.Close() +} + +func (s *testServer) Serve() { + close(s.Ready) + conn, err := s.Listener.Accept() + if err != nil { + s.t.Fatalf("failed to accept: %v", err) + } + + // Demux... + b := make([]byte, 1) + if _, err := conn.Read(b); err != nil { + s.t.Fatalf("failed to demux: %v", err) + } + s.rpc.handleRPCConn(conn) +} + +func (f *fakeStore) cachedData() *Data { + f.mu.RLock() + defer f.mu.RUnlock() + return f.md +} + +func (f *fakeStore) IsLeader() bool { return true } +func (f *fakeStore) Leader() string { return f.leader } +func (f *fakeStore) Peers() ([]string, error) { return []string{f.leader}, nil } +func (f *fakeStore) AddPeer(host string) error { return nil } +func (f *fakeStore) CreateNode(host string) (*NodeInfo, error) { + return &NodeInfo{ID: f.newNodeID, Host: host}, nil +} +func (f *fakeStore) NodeByHost(host string) (*NodeInfo, error) { return nil, nil } +func (f *fakeStore) WaitForDataChanged() error { + <-f.blockChan + return nil +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/state.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/state.go new file mode 100644 index 000000000..a442a8007 --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/state.go @@ -0,0 +1,489 @@ +package meta + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "math/rand" + "os" + "path/filepath" + "sync" + "time" + + "github.com/hashicorp/raft" + "github.com/hashicorp/raft-boltdb" +) + +// raftState abstracts the interaction of the raft consensus layer +// across local or remote nodes. It is a form of the state design pattern and allows +// the meta.Store to change its behavior with the raft layer at runtime. +type raftState interface { + open() error + remove() error + initialize() error + leader() string + isLeader() bool + sync(index uint64, timeout time.Duration) error + setPeers(addrs []string) error + addPeer(addr string) error + peers() ([]string, error) + invalidate() error + close() error + lastIndex() uint64 + apply(b []byte) error + snapshot() error +} + +// localRaft is a consensus strategy that uses a local raft implementation for +// consensus operations. +type localRaft struct { + wg sync.WaitGroup + closing chan struct{} + store *Store + raft *raft.Raft + transport *raft.NetworkTransport + peerStore raft.PeerStore + raftStore *raftboltdb.BoltStore + raftLayer *raftLayer +} + +func (r *localRaft) remove() error { + if err := os.RemoveAll(filepath.Join(r.store.path, "raft.db")); err != nil { + return err + } + if err := os.RemoveAll(filepath.Join(r.store.path, "peers.json")); err != nil { + return err + } + if err := os.RemoveAll(filepath.Join(r.store.path, "snapshots")); err != nil { + return err + } + return nil +} + +func (r *localRaft) updateMetaData(ms *Data) { + if ms == nil { + return + } + + updated := false + r.store.mu.RLock() + if ms.Index > r.store.data.Index { + updated = true + } + r.store.mu.RUnlock() + + if updated { + r.store.Logger.Printf("Updating metastore to term=%v index=%v", ms.Term, ms.Index) + r.store.mu.Lock() + r.store.data = ms + r.store.mu.Unlock() + } +} + +func (r *localRaft) invalidate() error { + if r.store.IsLeader() { + return nil + } + + ms, err := r.store.rpc.fetchMetaData(false) + if err != nil { + return err + } + + r.updateMetaData(ms) + return nil +} + +func (r *localRaft) open() error { + r.closing = make(chan struct{}) + + s := r.store + // Setup raft configuration. + config := raft.DefaultConfig() + config.LogOutput = ioutil.Discard + + if s.clusterTracingEnabled { + config.Logger = s.Logger + } + config.HeartbeatTimeout = s.HeartbeatTimeout + config.ElectionTimeout = s.ElectionTimeout + config.LeaderLeaseTimeout = s.LeaderLeaseTimeout + config.CommitTimeout = s.CommitTimeout + + // If no peers are set in the config or there is one and we are it, then start as a single server. + if len(s.peers) <= 1 { + config.EnableSingleNode = true + // Ensure we can always become the leader + config.DisableBootstrapAfterElect = false + // Don't shutdown raft automatically if we renamed our hostname back to a previous name + config.ShutdownOnRemove = false + } + + // Build raft layer to multiplex listener. + r.raftLayer = newRaftLayer(s.RaftListener, s.RemoteAddr) + + // Create a transport layer + r.transport = raft.NewNetworkTransport(r.raftLayer, 3, 10*time.Second, config.LogOutput) + + // Create peer storage. + r.peerStore = raft.NewJSONPeers(s.path, r.transport) + + peers, err := r.peerStore.Peers() + if err != nil { + return err + } + + // For single-node clusters, we can update the raft peers before we start the cluster if the hostname + // has changed. + if config.EnableSingleNode { + if err := r.peerStore.SetPeers([]string{s.RemoteAddr.String()}); err != nil { + return err + } + peers = []string{s.RemoteAddr.String()} + } + + // If we have multiple nodes in the cluster, make sure our address is in the raft peers or + // we won't be able to boot into the cluster because the other peers will reject our new hostname. This + // is difficult to resolve automatically because we need to have all the raft peers agree on the current members + // of the cluster before we can change them. + if len(peers) > 0 && !raft.PeerContained(peers, s.RemoteAddr.String()) { + s.Logger.Printf("%v is not in the list of raft peers. Please update %v/peers.json on all raft nodes to have the same contents.", s.RemoteAddr.String(), s.Path()) + return fmt.Errorf("peers out of sync: %v not in %v", s.RemoteAddr.String(), peers) + } + + // Create the log store and stable store. + store, err := raftboltdb.NewBoltStore(filepath.Join(s.path, "raft.db")) + if err != nil { + return fmt.Errorf("new bolt store: %s", err) + } + r.raftStore = store + + // Create the snapshot store. + snapshots, err := raft.NewFileSnapshotStore(s.path, raftSnapshotsRetained, os.Stderr) + if err != nil { + return fmt.Errorf("file snapshot store: %s", err) + } + + // Create raft log. + ra, err := raft.NewRaft(config, (*storeFSM)(s), store, store, snapshots, r.peerStore, r.transport) + if err != nil { + return fmt.Errorf("new raft: %s", err) + } + r.raft = ra + + r.wg.Add(1) + go r.logLeaderChanges() + + return nil +} + +func (r *localRaft) logLeaderChanges() { + defer r.wg.Done() + // Logs our current state (Node at 1.2.3.4:8088 [Follower]) + r.store.Logger.Printf(r.raft.String()) + for { + select { + case <-r.closing: + return + case <-r.raft.LeaderCh(): + peers, err := r.peers() + if err != nil { + r.store.Logger.Printf("failed to lookup peers: %v", err) + } + r.store.Logger.Printf("%v. peers=%v", r.raft.String(), peers) + } + } +} + +func (r *localRaft) close() error { + close(r.closing) + r.wg.Wait() + + if r.transport != nil { + r.transport.Close() + r.transport = nil + } + + if r.raftLayer != nil { + r.raftLayer.Close() + r.raftLayer = nil + } + + // Shutdown raft. + if r.raft != nil { + if err := r.raft.Shutdown().Error(); err != nil { + return err + } + r.raft = nil + } + + if r.raftStore != nil { + r.raftStore.Close() + r.raftStore = nil + } + + return nil +} + +func (r *localRaft) initialize() error { + s := r.store + // If we have committed entries then the store is already in the cluster. + if index, err := r.raftStore.LastIndex(); err != nil { + return fmt.Errorf("last index: %s", err) + } else if index > 0 { + return nil + } + + // Force set peers. + if err := r.setPeers(s.peers); err != nil { + return fmt.Errorf("set raft peers: %s", err) + } + + return nil +} + +// apply applies a serialized command to the raft log. +func (r *localRaft) apply(b []byte) error { + // Apply to raft log. + f := r.raft.Apply(b, 0) + if err := f.Error(); err != nil { + return err + } + + // Return response if it's an error. + // No other non-nil objects should be returned. + resp := f.Response() + if err, ok := resp.(error); ok { + return lookupError(err) + } + assert(resp == nil, "unexpected response: %#v", resp) + + return nil +} + +func (r *localRaft) lastIndex() uint64 { + return r.raft.LastIndex() +} + +func (r *localRaft) sync(index uint64, timeout time.Duration) error { + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + timer := time.NewTimer(timeout) + defer timer.Stop() + + for { + // Wait for next tick or timeout. + select { + case <-ticker.C: + case <-timer.C: + return errors.New("timeout") + } + + // Compare index against current metadata. + r.store.mu.Lock() + ok := (r.store.data.Index >= index) + r.store.mu.Unlock() + + // Exit if we are at least at the given index. + if ok { + return nil + } + } +} + +func (r *localRaft) snapshot() error { + future := r.raft.Snapshot() + return future.Error() +} + +// addPeer adds addr to the list of peers in the cluster. +func (r *localRaft) addPeer(addr string) error { + peers, err := r.peerStore.Peers() + if err != nil { + return err + } + + if len(peers) >= 3 { + return nil + } + + if fut := r.raft.AddPeer(addr); fut.Error() != nil { + return fut.Error() + } + return nil +} + +// setPeers sets a list of peers in the cluster. +func (r *localRaft) setPeers(addrs []string) error { + return r.raft.SetPeers(addrs).Error() +} + +func (r *localRaft) peers() ([]string, error) { + return r.peerStore.Peers() +} + +func (r *localRaft) leader() string { + if r.raft == nil { + return "" + } + + return r.raft.Leader() +} + +func (r *localRaft) isLeader() bool { + r.store.mu.RLock() + defer r.store.mu.RUnlock() + if r.raft == nil { + return false + } + return r.raft.State() == raft.Leader +} + +// remoteRaft is a consensus strategy that uses a remote raft cluster for +// consensus operations. +type remoteRaft struct { + store *Store +} + +func (r *remoteRaft) remove() error { + return nil +} + +func (r *remoteRaft) updateMetaData(ms *Data) { + if ms == nil { + return + } + + updated := false + r.store.mu.RLock() + if ms.Index > r.store.data.Index { + updated = true + } + r.store.mu.RUnlock() + + if updated { + r.store.Logger.Printf("Updating metastore to term=%v index=%v", ms.Term, ms.Index) + r.store.mu.Lock() + r.store.data = ms + r.store.mu.Unlock() + } +} + +func (r *remoteRaft) invalidate() error { + ms, err := r.store.rpc.fetchMetaData(false) + if err != nil { + return err + } + + r.updateMetaData(ms) + return nil +} + +func (r *remoteRaft) setPeers(addrs []string) error { + // Convert to JSON + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + if err := enc.Encode(addrs); err != nil { + return err + } + + // Write out as JSON + return ioutil.WriteFile(filepath.Join(r.store.path, "peers.json"), buf.Bytes(), 0755) +} + +// addPeer adds addr to the list of peers in the cluster. +func (r *remoteRaft) addPeer(addr string) error { + return fmt.Errorf("cannot add peer using remote raft") +} + +func (r *remoteRaft) peers() ([]string, error) { + return readPeersJSON(filepath.Join(r.store.path, "peers.json")) +} + +func (r *remoteRaft) open() error { + if err := r.setPeers(r.store.peers); err != nil { + return err + } + + go func() { + for { + select { + case <-r.store.closing: + return + default: + } + + ms, err := r.store.rpc.fetchMetaData(true) + if err != nil { + r.store.Logger.Printf("fetch metastore: %v", err) + time.Sleep(time.Second) + continue + } + r.updateMetaData(ms) + } + }() + return nil +} + +func (r *remoteRaft) close() error { + return nil +} + +// apply applies a serialized command to the raft log. +func (r *remoteRaft) apply(b []byte) error { + return fmt.Errorf("cannot apply log while in remote raft state") +} + +func (r *remoteRaft) initialize() error { + return nil +} + +func (r *remoteRaft) leader() string { + if len(r.store.peers) == 0 { + return "" + } + + return r.store.peers[rand.Intn(len(r.store.peers))] +} + +func (r *remoteRaft) isLeader() bool { + return false +} + +func (r *remoteRaft) lastIndex() uint64 { + return r.store.cachedData().Index +} + +func (r *remoteRaft) sync(index uint64, timeout time.Duration) error { + //FIXME: jwilder: check index and timeout + return r.store.invalidate() +} + +func (r *remoteRaft) snapshot() error { + return fmt.Errorf("cannot snapshot while in remote raft state") +} + +func readPeersJSON(path string) ([]string, error) { + // Read the file + buf, err := ioutil.ReadFile(path) + if err != nil && !os.IsNotExist(err) { + return nil, err + } + + // Check for no peers + if len(buf) == 0 { + return nil, nil + } + + // Decode the peers + var peers []string + dec := json.NewDecoder(bytes.NewReader(buf)) + if err := dec.Decode(&peers); err != nil { + return nil, err + } + + return peers, nil +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor.go index 58f86393c..08207a1b2 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor.go @@ -10,6 +10,7 @@ import ( type StatementExecutor struct { Store interface { Nodes() ([]NodeInfo, error) + Peers() ([]string, error) Database(name string) (*DatabaseInfo, error) Databases() ([]DatabaseInfo, error) @@ -127,9 +128,14 @@ func (e *StatementExecutor) executeShowServersStatement(q *influxql.ShowServersS return &influxql.Result{Err: err} } - row := &influxql.Row{Columns: []string{"id", "url"}} + peers, err := e.Store.Peers() + if err != nil { + return &influxql.Result{Err: err} + } + + row := &influxql.Row{Columns: []string{"id", "cluster_addr", "raft"}} for _, ni := range nis { - row.Values = append(row.Values, []interface{}{ni.ID, "http://" + ni.Host}) + row.Values = append(row.Values, []interface{}{ni.ID, ni.Host, contains(peers, ni.Host)}) } return &influxql.Result{Series: []*influxql.Row{row}} } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor_test.go index b382a09f6..64894aaea 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/statement_executor_test.go @@ -121,15 +121,18 @@ func TestStatementExecutor_ExecuteStatement_ShowServers(t *testing.T) { {ID: 2, Host: "node1"}, }, nil } + e.Store.PeersFn = func() ([]string, error) { + return []string{"node0"}, nil + } if res := e.ExecuteStatement(influxql.MustParseStatement(`SHOW SERVERS`)); res.Err != nil { t.Fatal(res.Err) } else if !reflect.DeepEqual(res.Series, influxql.Rows{ { - Columns: []string{"id", "url"}, + Columns: []string{"id", "cluster_addr", "raft"}, Values: [][]interface{}{ - {uint64(1), "http://node0"}, - {uint64(2), "http://node1"}, + {uint64(1), "node0", true}, + {uint64(2), "node1", false}, }, }, }) { @@ -778,6 +781,7 @@ func NewStatementExecutor() *StatementExecutor { // StatementExecutorStore represents a mock implementation of StatementExecutor.Store. type StatementExecutorStore struct { NodesFn func() ([]meta.NodeInfo, error) + PeersFn func() ([]string, error) DatabaseFn func(name string) (*meta.DatabaseInfo, error) DatabasesFn func() ([]meta.DatabaseInfo, error) CreateDatabaseFn func(name string) (*meta.DatabaseInfo, error) @@ -804,6 +808,10 @@ func (s *StatementExecutorStore) Nodes() ([]meta.NodeInfo, error) { return s.NodesFn() } +func (s *StatementExecutorStore) Peers() ([]string, error) { + return s.PeersFn() +} + func (s *StatementExecutorStore) Database(name string) (*meta.DatabaseInfo, error) { return s.DatabaseFn(name) } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store.go index 2a0c6fc73..23bac17f2 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store.go @@ -21,7 +21,6 @@ import ( "github.com/gogo/protobuf/proto" "github.com/hashicorp/raft" - "github.com/hashicorp/raft-boltdb" "github.com/influxdb/influxdb/influxql" "github.com/influxdb/influxdb/meta/internal" "golang.org/x/crypto/bcrypt" @@ -31,9 +30,12 @@ import ( const ( MuxRaftHeader = 0 MuxExecHeader = 1 + MuxRPCHeader = 5 // SaltBytes is the number of bytes used for salts SaltBytes = 32 + + DefaultSyncNodeDelay = time.Second ) // ExecMagic is the first 4 bytes sent to a remote exec connection to verify @@ -45,6 +47,10 @@ const ( AutoCreateRetentionPolicyName = "default" AutoCreateRetentionPolicyPeriod = 0 RetentionPolicyMinDuration = time.Hour + + // MaxAutoCreatedRetentionPolicyReplicaN is the maximum replication factor that will + // be set for auto-created retention policies. + MaxAutoCreatedRetentionPolicyReplicaN = 3 ) // Raft configuration. @@ -53,6 +59,7 @@ const ( raftSnapshotsRetained = 2 raftTransportMaxPool = 3 raftTransportTimeout = 10 * time.Second + MaxRaftNodes = 3 ) // Store represents a raft-backed metastore. @@ -68,17 +75,22 @@ type Store struct { data *Data - remoteAddr net.Addr - raft *raft.Raft - raftLayer *raftLayer - peerStore raft.PeerStore - transport *raft.NetworkTransport - store *raftboltdb.BoltStore + rpc *rpc + + // The address used by other nodes to reach this node. + RemoteAddr net.Addr + + raftState raftState ready chan struct{} err chan error closing chan struct{} wg sync.WaitGroup + changed chan struct{} + + // clusterTracingEnabled controls whether low-level cluster communcation is logged. + // Useful for troubleshooting + clusterTracingEnabled bool retentionAutoCreate bool @@ -86,6 +98,9 @@ type Store struct { RaftListener net.Listener ExecListener net.Listener + // The listener for higher-level, cluster operations + RPCListener net.Listener + // The advertised hostname of the store. Addr net.Addr @@ -118,8 +133,8 @@ type authUser struct { } // NewStore returns a new instance of Store. -func NewStore(c Config) *Store { - return &Store{ +func NewStore(c *Config) *Store { + s := &Store{ path: c.Dir, peers: c.Peers, data: &Data{}, @@ -127,8 +142,10 @@ func NewStore(c Config) *Store { ready: make(chan struct{}), err: make(chan error), closing: make(chan struct{}), + changed: make(chan struct{}), - retentionAutoCreate: c.RetentionAutoCreate, + clusterTracingEnabled: c.ClusterTracing, + retentionAutoCreate: c.RetentionAutoCreate, HeartbeatTimeout: time.Duration(c.HeartbeatTimeout), ElectionTimeout: time.Duration(c.ElectionTimeout), @@ -140,6 +157,14 @@ func NewStore(c Config) *Store { }, Logger: log.New(os.Stderr, "[metastore] ", log.LstdFlags), } + + s.raftState = &localRaft{store: s} + s.rpc = &rpc{ + store: s, + tracingEnabled: c.ClusterTracing, + logger: s.Logger, + } + return s } // Path returns the root path when open. @@ -153,7 +178,7 @@ func (s *Store) IDPath() string { return filepath.Join(s.path, "id") } func (s *Store) Open() error { // Verify that no more than 3 peers. // https://github.com/influxdb/influxdb/issues/2750 - if len(s.peers) > 3 { + if len(s.peers) > MaxRaftNodes { return ErrTooManyPeers } @@ -162,8 +187,12 @@ func (s *Store) Open() error { panic("Store.RaftListener not set") } else if s.ExecListener == nil { panic("Store.ExecListener not set") + } else if s.RPCListener == nil { + panic("Store.RPCListener not set") } + s.Logger.Printf("Using data dir: %v", s.Path()) + if err := func() error { s.mu.Lock() defer s.mu.Unlock() @@ -174,8 +203,13 @@ func (s *Store) Open() error { } s.opened = true + // load our raft state + if err := s.loadState(); err != nil { + return err + } + // Create the root directory if it doesn't already exist. - if err := os.MkdirAll(s.path, 0777); err != nil { + if err := s.createRootDir(); err != nil { return fmt.Errorf("mkdir all: %s", err) } @@ -204,78 +238,186 @@ func (s *Store) Open() error { s.wg.Add(1) go s.serveExecListener() + s.wg.Add(1) + go s.serveRPCListener() + + // Join an existing cluster if we needed + if err := s.joinCluster(); err != nil { + return fmt.Errorf("join: %v", err) + } + // If the ID doesn't exist then create a new node. if s.id == 0 { go s.init() } else { + go s.syncNodeInfo() close(s.ready) } return nil } +// syncNodeInfo continuously tries to update the current nodes hostname +// in the meta store. It will retry until successful. +func (s *Store) syncNodeInfo() error { + <-s.ready + + for { + if err := func() error { + if err := s.WaitForLeader(0); err != nil { + return err + } + + ni, err := s.Node(s.id) + if err != nil { + return err + } + + if ni == nil { + return ErrNodeNotFound + } + + if ni.Host == s.RemoteAddr.String() { + s.Logger.Printf("Updated node id=%d hostname=%v", s.id, s.RemoteAddr.String()) + return nil + } + + _, err = s.UpdateNode(s.id, s.RemoteAddr.String()) + if err != nil { + return err + } + return nil + }(); err != nil { + // If we get an error, the cluster has not stabilized so just try again + time.Sleep(DefaultSyncNodeDelay) + continue + } + return nil + } +} + +// loadState sets the appropriate raftState from our persistent storage +func (s *Store) loadState() error { + peers, err := readPeersJSON(filepath.Join(s.path, "peers.json")) + if err != nil { + return err + } + + // If we have existing peers, use those. This will override what's in the + // config. + if len(peers) > 0 { + s.peers = peers + } + + // if no peers on disk, we need to start raft in order to initialize a new + // cluster or join an existing one. + if len(peers) == 0 { + s.raftState = &localRaft{store: s} + // if we have a raft database, (maybe restored), we should start raft locally + } else if _, err := os.Stat(filepath.Join(s.path, "raft.db")); err == nil { + s.raftState = &localRaft{store: s} + // otherwise, we should use remote raft + } else { + s.raftState = &remoteRaft{store: s} + } + return nil +} + +func (s *Store) joinCluster() error { + + // No join options, so nothing to do + if len(s.peers) == 0 { + return nil + } + + // We already have a node ID so were already part of a cluster, + // don't join again so we can use our existing state. + if s.id != 0 { + s.Logger.Printf("Skipping cluster join: already member of cluster: nodeId=%v raftEnabled=%v peers=%v", + s.id, raft.PeerContained(s.peers, s.RemoteAddr.String()), s.peers) + return nil + } + + s.Logger.Printf("Joining cluster at: %v", s.peers) + for { + for _, join := range s.peers { + res, err := s.rpc.join(s.RemoteAddr.String(), join) + if err != nil { + s.Logger.Printf("Join node %v failed: %v: retrying...", join, err) + continue + } + + s.Logger.Printf("Joined remote node %v", join) + s.Logger.Printf("nodeId=%v raftEnabled=%v peers=%v", res.NodeID, res.RaftEnabled, res.RaftNodes) + + s.peers = res.RaftNodes + s.id = res.NodeID + + if err := s.writeNodeID(res.NodeID); err != nil { + s.Logger.Printf("Write node id failed: %v", err) + break + } + + if !res.RaftEnabled { + // Shutdown our local raft and transition to a remote raft state + if err := s.enableRemoteRaft(); err != nil { + s.Logger.Printf("Enable remote raft failed: %v", err) + break + } + } + return nil + } + time.Sleep(time.Second) + } +} + +func (s *Store) enableLocalRaft() error { + if _, ok := s.raftState.(*localRaft); ok { + return nil + } + s.Logger.Printf("Switching to local raft") + + lr := &localRaft{store: s} + return s.changeState(lr) +} + +func (s *Store) enableRemoteRaft() error { + if _, ok := s.raftState.(*remoteRaft); ok { + return nil + } + + s.Logger.Printf("Switching to remote raft") + rr := &remoteRaft{store: s} + return s.changeState(rr) +} + +func (s *Store) changeState(state raftState) error { + if err := s.raftState.close(); err != nil { + return err + } + + // Clear out any persistent state + if err := s.raftState.remove(); err != nil { + return err + } + + s.raftState = state + + if err := s.raftState.open(); err != nil { + return err + } + + return nil +} + // openRaft initializes the raft store. func (s *Store) openRaft() error { - // Setup raft configuration. - config := raft.DefaultConfig() - config.Logger = s.Logger - config.HeartbeatTimeout = s.HeartbeatTimeout - config.ElectionTimeout = s.ElectionTimeout - config.LeaderLeaseTimeout = s.LeaderLeaseTimeout - config.CommitTimeout = s.CommitTimeout - - // If no peers are set in the config then start as a single server. - config.EnableSingleNode = (len(s.peers) == 0) - - // Build raft layer to multiplex listener. - s.raftLayer = newRaftLayer(s.RaftListener, s.Addr) - - // Create a transport layer - s.transport = raft.NewNetworkTransport(s.raftLayer, 3, 10*time.Second, os.Stderr) - - // Create peer storage. - s.peerStore = raft.NewJSONPeers(s.path, s.transport) - - // Create the log store and stable store. - store, err := raftboltdb.NewBoltStore(filepath.Join(s.path, "raft.db")) - if err != nil { - return fmt.Errorf("new bolt store: %s", err) - } - s.store = store - - // Create the snapshot store. - snapshots, err := raft.NewFileSnapshotStore(s.path, raftSnapshotsRetained, os.Stderr) - if err != nil { - return fmt.Errorf("file snapshot store: %s", err) - } - - // Create raft log. - r, err := raft.NewRaft(config, (*storeFSM)(s), store, store, snapshots, s.peerStore, s.transport) - if err != nil { - return fmt.Errorf("new raft: %s", err) - } - s.raft = r - - return nil + return s.raftState.open() } // initialize attempts to bootstrap the raft store if there are no committed entries. func (s *Store) initialize() error { - // If we have committed entries then the store is already in the cluster. - /* - if index, err := s.store.LastIndex(); err != nil { - return fmt.Errorf("last index: %s", err) - } else if index > 0 { - return nil - } - */ - - // Force set peers. - if err := s.SetPeers(s.peers); err != nil { - return fmt.Errorf("set raft peers: %s", err) - } - - return nil + return s.raftState.initialize() } // Close closes the store and shuts down the node in the cluster. @@ -285,6 +427,23 @@ func (s *Store) Close() error { return s.close() } +// WaitForDataChanged will block the current goroutine until the metastore index has +// be updated. +func (s *Store) WaitForDataChanged() error { + s.mu.RLock() + changed := s.changed + s.mu.RUnlock() + + for { + select { + case <-s.closing: + return errors.New("closing") + case <-changed: + return nil + } + } +} + func (s *Store) close() error { // Check if store has already been closed. if !s.opened { @@ -296,18 +455,9 @@ func (s *Store) close() error { close(s.closing) // FIXME(benbjohnson): s.wg.Wait() - // Shutdown raft. - if s.raft != nil { - s.raft.Shutdown() - s.raft = nil - } - if s.transport != nil { - s.transport.Close() - s.transport = nil - } - if s.store != nil { - s.store.Close() - s.store = nil + if s.raftState != nil { + s.raftState.close() + s.raftState = nil } return nil @@ -329,8 +479,6 @@ func (s *Store) readID() error { } s.id = id - s.Logger.Printf("read local node id: %d", s.id) - return nil } @@ -357,37 +505,43 @@ func (s *Store) createLocalNode() error { } // Create new node. - ni, err := s.CreateNode(s.Addr.String()) + ni, err := s.CreateNode(s.RemoteAddr.String()) if err != nil { return fmt.Errorf("create node: %s", err) } // Write node id to file. - if err := ioutil.WriteFile(s.IDPath(), []byte(strconv.FormatUint(ni.ID, 10)), 0666); err != nil { + if err := s.writeNodeID(ni.ID); err != nil { return fmt.Errorf("write file: %s", err) } // Set ID locally. s.id = ni.ID - s.Logger.Printf("created local node: id=%d, host=%s", s.id, s.Addr.String()) + s.Logger.Printf("Created local node: id=%d, host=%s", s.id, s.RemoteAddr) return nil } +func (s *Store) createRootDir() error { + return os.MkdirAll(s.path, 0777) +} + +func (s *Store) writeNodeID(id uint64) error { + if err := s.createRootDir(); err != nil { + return err + } + return ioutil.WriteFile(s.IDPath(), []byte(strconv.FormatUint(id, 10)), 0666) +} + // Snapshot saves a snapshot of the current state. func (s *Store) Snapshot() error { - future := s.raft.Snapshot() - return future.Error() + return s.raftState.snapshot() } // WaitForLeader sleeps until a leader is found or a timeout occurs. // timeout == 0 means to wait forever. func (s *Store) WaitForLeader(timeout time.Duration) error { - if s.raft.Leader() != "" { - return nil - } - // Begin timeout timer. timer := time.NewTimer(timeout) defer timer.Stop() @@ -404,7 +558,7 @@ func (s *Store) WaitForLeader(timeout time.Duration) error { return errors.New("timeout") } case <-ticker.C: - if s.raft.Leader() != "" { + if s.Leader() != "" { return nil } } @@ -421,10 +575,10 @@ func (s *Store) Err() <-chan error { return s.err } func (s *Store) IsLeader() bool { s.mu.RLock() defer s.mu.RUnlock() - if s.raft == nil { + if s.raftState == nil { return false } - return s.raft.State() == raft.Leader + return s.raftState.isLeader() } // Leader returns what the store thinks is the current leader. An empty @@ -432,32 +586,27 @@ func (s *Store) IsLeader() bool { func (s *Store) Leader() string { s.mu.RLock() defer s.mu.RUnlock() - if s.raft == nil { + if s.raftState == nil { return "" } - return s.raft.Leader() -} - -// LeaderCh returns a channel that notifies on leadership change. -// Panics when the store has not been opened yet. -func (s *Store) LeaderCh() <-chan bool { - s.mu.RLock() - defer s.mu.RUnlock() - assert(s.raft != nil, "cannot retrieve leadership channel when closed") - return s.raft.LeaderCh() + return s.raftState.leader() } // SetPeers sets a list of peers in the cluster. func (s *Store) SetPeers(addrs []string) error { - a := make([]string, len(addrs)) - for i, s := range addrs { - addr, err := net.ResolveTCPAddr("tcp", s) - if err != nil { - return fmt.Errorf("cannot resolve addr: %s, err=%s", s, err) - } - a[i] = addr.String() - } - return s.raft.SetPeers(a).Error() + return s.raftState.setPeers(addrs) +} + +// AddPeer adds addr to the list of peers in the cluster. +func (s *Store) AddPeer(addr string) error { + return s.raftState.addPeer(addr) +} + +// Peers returns the list of peers in the cluster. +func (s *Store) Peers() ([]string, error) { + s.mu.RLock() + defer s.mu.RUnlock() + return s.raftState.peers() } // serveExecListener processes remote exec connections. @@ -471,10 +620,9 @@ func (s *Store) serveExecListener() { if err != nil { if strings.Contains(err.Error(), "connection closed") { return - } else { - s.Logger.Printf("temporary accept error: %s", err) - continue } + s.Logger.Printf("temporary accept error: %s", err) + continue } // Handle connection in a separate goroutine. @@ -487,6 +635,31 @@ func (s *Store) serveExecListener() { func (s *Store) handleExecConn(conn net.Conn) { defer s.wg.Done() + // Nodes not part of the raft cluster may initiate remote exec commands + // but may not know who the current leader of the cluster. If we are not + // the leader, proxy the request to the current leader. + if !s.IsLeader() { + + if s.Leader() == s.RemoteAddr.String() { + s.Logger.Printf("No leader") + return + } + + leaderConn, err := net.DialTimeout("tcp", s.Leader(), 10*time.Second) + if err != nil { + s.Logger.Printf("Dial leader: %v", err) + return + } + defer leaderConn.Close() + leaderConn.Write([]byte{MuxExecHeader}) + + if err := proxy(leaderConn.(*net.TCPConn), conn.(*net.TCPConn)); err != nil { + s.Logger.Printf("Leader proxy error: %v", err) + } + conn.Close() + return + } + // Read and execute command. err := func() error { // Read marker message. @@ -524,7 +697,7 @@ func (s *Store) handleExecConn(conn net.Conn) { // Build response message. var resp internal.Response resp.OK = proto.Bool(err == nil) - resp.Index = proto.Uint64(s.raft.LastIndex()) + resp.Index = proto.Uint64(s.raftState.lastIndex()) if err != nil { resp.Error = proto.String(err.Error()) } @@ -533,13 +706,39 @@ func (s *Store) handleExecConn(conn net.Conn) { if b, err := proto.Marshal(&resp); err != nil { panic(err) } else if err = binary.Write(conn, binary.BigEndian, uint64(len(b))); err != nil { - s.Logger.Printf("unable to write exec response size: %s", err) + s.Logger.Printf("Unable to write exec response size: %s", err) } else if _, err = conn.Write(b); err != nil { - s.Logger.Printf("unable to write exec response: %s", err) + s.Logger.Printf("Unable to write exec response: %s", err) } conn.Close() } +// serveRPCListener processes remote exec connections. +// This function runs in a separate goroutine. +func (s *Store) serveRPCListener() { + defer s.wg.Done() + + for { + // Accept next TCP connection. + conn, err := s.RPCListener.Accept() + if err != nil { + if strings.Contains(err.Error(), "connection closed") { + return + } else { + s.Logger.Printf("temporary accept error: %s", err) + continue + } + } + + // Handle connection in a separate goroutine. + s.wg.Add(1) + go func() { + defer s.wg.Done() + s.rpc.handleRPCConn(conn) + }() + } +} + // MarshalBinary encodes the store's data to a binary protobuf format. func (s *Store) MarshalBinary() ([]byte, error) { s.mu.RLock() @@ -607,6 +806,19 @@ func (s *Store) CreateNode(host string) (*NodeInfo, error) { return s.NodeByHost(host) } +// UpdateNode updates an existing node in the store. +func (s *Store) UpdateNode(id uint64, host string) (*NodeInfo, error) { + if err := s.exec(internal.Command_UpdateNodeCommand, internal.E_UpdateNodeCommand_Command, + &internal.UpdateNodeCommand{ + ID: proto.Uint64(id), + Host: proto.String(host), + }, + ); err != nil { + return nil, err + } + return s.NodeByHost(host) +} + // DeleteNode removes a node from the metastore by id. func (s *Store) DeleteNode(id uint64) error { return s.exec(internal.Command_DeleteNodeCommand, internal.E_DeleteNodeCommand_Command, @@ -658,6 +870,10 @@ func (s *Store) CreateDatabase(name string) (*DatabaseInfo, error) { return nil, fmt.Errorf("read: %s", err) } + if nodeN > MaxAutoCreatedRetentionPolicyReplicaN { + nodeN = MaxAutoCreatedRetentionPolicyReplicaN + } + // Create a retention policy. rpi := NewRetentionPolicyInfo(AutoCreateRetentionPolicyName) rpi.ReplicaN = nodeN @@ -685,11 +901,11 @@ func (s *Store) CreateDatabaseIfNotExists(name string) (*DatabaseInfo, error) { } // Attempt to create database. - if di, err := s.CreateDatabase(name); err == ErrDatabaseExists { + di, err := s.CreateDatabase(name) + if err == ErrDatabaseExists { return s.Database(name) - } else { - return di, err } + return di, err } // DropDatabase removes a database from the metastore by name. @@ -774,11 +990,11 @@ func (s *Store) CreateRetentionPolicyIfNotExists(database string, rpi *Retention } // Attempt to create policy. - if other, err := s.CreateRetentionPolicy(database, rpi); err == ErrRetentionPolicyExists { + other, err := s.CreateRetentionPolicy(database, rpi) + if err == ErrRetentionPolicyExists { return s.RetentionPolicy(database, rpi.Name) - } else { - return other, err } + return other, err } // SetDefaultRetentionPolicy sets the default retention policy for a database. @@ -858,11 +1074,11 @@ func (s *Store) CreateShardGroupIfNotExists(database, policy string, timestamp t } // Attempt to create database. - if sgi, err := s.CreateShardGroup(database, policy, timestamp); err == ErrShardGroupExists { + sgi, err := s.CreateShardGroup(database, policy, timestamp) + if err == ErrShardGroupExists { return s.ShardGroupByTimestamp(database, policy, timestamp) - } else { - return sgi, err } + return sgi, err } // DeleteShardGroup removes an existing shard group from a policy by ID. @@ -1037,9 +1253,8 @@ func (s *Store) Authenticate(username, password string) (ui *UserInfo, err error if bytes.Equal(hashed, au.hash) { ui = u return nil - } else { - return ErrAuthenticate } + return ErrAuthenticate } // Compare password with user hash. @@ -1264,8 +1479,7 @@ func (s *Store) read(fn func(*Data) error) error { var errInvalidate = errors.New("invalidate cache") func (s *Store) invalidate() error { - time.Sleep(1 * time.Second) - return nil // FIXME(benbjohnson): Reload cache from the leader. + return s.raftState.invalidate() } func (s *Store) exec(typ internal.Command_Type, desc *proto.ExtensionDesc, value interface{}) error { @@ -1280,36 +1494,21 @@ func (s *Store) exec(typ internal.Command_Type, desc *proto.ExtensionDesc, value // Apply the command if this is the leader. // Otherwise remotely execute the command against the current leader. - if s.raft.State() == raft.Leader { + if s.raftState.isLeader() { return s.apply(b) - } else { - return s.remoteExec(b) } + return s.remoteExec(b) } // apply applies a serialized command to the raft log. func (s *Store) apply(b []byte) error { - // Apply to raft log. - f := s.raft.Apply(b, 0) - if err := f.Error(); err != nil { - return err - } - - // Return response if it's an error. - // No other non-nil objects should be returned. - resp := f.Response() - if err, ok := resp.(error); ok { - return lookupError(err) - } - assert(resp == nil, "unexpected response: %#v", resp) - - return nil + return s.raftState.apply(b) } // remoteExec sends an encoded command to the remote leader. func (s *Store) remoteExec(b []byte) error { // Retrieve the current known leader. - leader := s.raft.Leader() + leader := s.raftState.leader() if leader == "" { return errors.New("no leader") } @@ -1368,30 +1567,13 @@ func (s *Store) remoteExec(b []byte) error { // sync polls the state machine until it reaches a given index. func (s *Store) sync(index uint64, timeout time.Duration) error { - ticker := time.NewTicker(100 * time.Millisecond) - defer ticker.Stop() + return s.raftState.sync(index, timeout) +} - timer := time.NewTimer(timeout) - defer timer.Stop() - - for { - // Wait for next tick or timeout. - select { - case <-ticker.C: - case <-timer.C: - return errors.New("timeout") - } - - // Compare index against current metadata. - s.mu.Lock() - ok := (s.data.Index >= index) - s.mu.Unlock() - - // Exit if we are at least at the given index. - if ok { - return nil - } - } +func (s *Store) cachedData() *Data { + s.mu.RLock() + defer s.mu.RUnlock() + return s.data.Clone() } // BcryptCost is the cost associated with generating password with Bcrypt. @@ -1467,6 +1649,8 @@ func (fsm *storeFSM) Apply(l *raft.Log) interface{} { return fsm.applySetAdminPrivilegeCommand(&cmd) case internal.Command_SetDataCommand: return fsm.applySetDataCommand(&cmd) + case internal.Command_UpdateNodeCommand: + return fsm.applyUpdateNodeCommand(&cmd) default: panic(fmt.Errorf("cannot apply command: %x", l.Data)) } @@ -1475,6 +1659,8 @@ func (fsm *storeFSM) Apply(l *raft.Log) interface{} { // Copy term and index to new metadata. fsm.data.Term = l.Term fsm.data.Index = l.Index + close(s.changed) + s.changed = make(chan struct{}) return err } @@ -1498,6 +1684,23 @@ func (fsm *storeFSM) applyCreateNodeCommand(cmd *internal.Command) interface{} { return nil } +func (fsm *storeFSM) applyUpdateNodeCommand(cmd *internal.Command) interface{} { + ext, _ := proto.GetExtension(cmd, internal.E_UpdateNodeCommand_Command) + v := ext.(*internal.UpdateNodeCommand) + + // Copy data and update. + other := fsm.data.Clone() + ni := other.Node(v.GetID()) + if ni == nil { + return ErrNodeNotFound + } + + ni.Host = v.GetHost() + + fsm.data = other + return nil +} + func (fsm *storeFSM) applyDeleteNodeCommand(cmd *internal.Command) interface{} { ext, _ := proto.GetExtension(cmd, internal.E_DeleteNodeCommand_Command) v := ext.(*internal.DeleteNodeCommand) diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store_test.go index f498767ec..6dca9b57c 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store_test.go @@ -218,14 +218,18 @@ func TestStore_DropDatabase(t *testing.T) { } // Ensure remaining nodes are correct. - if di, _ := s.Database("db0"); !reflect.DeepEqual(di, &meta.DatabaseInfo{Name: "db0"}) { - t.Fatalf("unexpected database(0): %#v", di) + exp := &meta.DatabaseInfo{Name: "db0"} + if di, _ := s.Database("db0"); !reflect.DeepEqual(di, exp) { + t.Fatalf("unexpected database(0): \ngot: %#v\nexp: %#v", di, exp) + } if di, _ := s.Database("db1"); di != nil { t.Fatalf("unexpected database(1): %#v", di) } - if di, _ := s.Database("db2"); !reflect.DeepEqual(di, &meta.DatabaseInfo{Name: "db2"}) { - t.Fatalf("unexpected database(2): %#v", di) + + exp = &meta.DatabaseInfo{Name: "db2"} + if di, _ := s.Database("db2"); !reflect.DeepEqual(di, exp) { + t.Fatalf("unexpected database(2): \ngot: %#v\nexp: %#v", di, exp) } } @@ -300,8 +304,9 @@ func TestStore_DropRetentionPolicy(t *testing.T) { if rpi, _ := s.RetentionPolicy("db0", "rp1"); rpi != nil { t.Fatalf("unexpected policy(1): %#v", rpi) } - if rpi, _ := s.RetentionPolicy("db0", "rp2"); !reflect.DeepEqual(rpi, &meta.RetentionPolicyInfo{Name: "rp2", ReplicaN: 1, ShardGroupDuration: 7 * 24 * time.Hour}) { - t.Fatalf("unexpected policy(2): %#v", rpi) + exp := &meta.RetentionPolicyInfo{Name: "rp2", ReplicaN: 1, ShardGroupDuration: 7 * 24 * time.Hour} + if rpi, _ := s.RetentionPolicy("db0", "rp2"); !reflect.DeepEqual(rpi, exp) { + t.Fatalf("unexpected policy(2): \ngot: %#v\nexp: %#v", rpi, exp) } } @@ -730,6 +735,7 @@ func TestStore_Snapshot_And_Restore(t *testing.T) { s := MustOpenStore() s.LeaveFiles = true + addr := s.RemoteAddr.String() // Create a bunch of databases in the Store nDatabases := 5 @@ -744,12 +750,12 @@ func TestStore_Snapshot_And_Restore(t *testing.T) { s.Close() + // Allow the kernel to free up the port so we can re-use it again + time.Sleep(100 * time.Millisecond) + // Test restoring the snapshot taken above. existingDataPath := s.Path() - s = NewStore(NewConfig(existingDataPath)) - if err := s.Open(); err != nil { - panic(err) - } + s = MustOpenStoreWithPath(addr, existingDataPath) defer s.Close() // Wait until the server is ready. @@ -782,37 +788,105 @@ func TestCluster_Open(t *testing.T) { t.Fatal("no leader found") } - // Add a database to each node. - for i, s := range c.Stores { - if di, err := s.CreateDatabase(fmt.Sprintf("db%d", i)); err != nil { - t.Fatal(err) - } else if di == nil { - t.Fatal("expected database") + // ensure all the nodes see the same metastore data + assertDatabaseReplicated(t, c) +} + +// Ensure a multi-node cluster can start, join the cluster, and the first three members are raft nodes. +func TestCluster_OpenRaft(t *testing.T) { + // Start a single node. + c := MustOpenCluster(1) + defer c.Close() + + // Check that the node becomes leader. + if s := c.Leader(); s == nil { + t.Fatal("no leader found") + } + + // Add 5 more nodes. + for i := 0; i < 5; i++ { + if err := c.Join(); err != nil { + t.Fatalf("failed to join cluster: %v", err) } } - // Verify that each store has all databases. - for i := 0; i < len(c.Stores); i++ { - for _, s := range c.Stores { - if di, err := s.Database(fmt.Sprintf("db%d", i)); err != nil { - t.Fatal(err) - } else if di == nil { - t.Fatal("expected database") - } + // ensure we have 3 raft nodes + assertRaftPeerNodes(t, c, 3) + + // ensure all the nodes see the same metastore data + assertDatabaseReplicated(t, c) +} + +// Ensure a multi-node cluster can restart +func TestCluster_Restart(t *testing.T) { + // Start a single node. + c := MustOpenCluster(1) + defer c.Close() + + // Check that one node is leader. + if s := c.Leader(); s == nil { + t.Fatal("no leader found") + } + + // Add 5 more ndes, 2 should become raft peers, 3 remote raft clients + for i := 0; i < 5; i++ { + if err := c.Join(); err != nil { + t.Fatalf("failed to join cluster: %v", err) } } + + // The tests use a host host assigned listener port. We need to re-use + // the original ports when the new cluster is restarted so that the existing + // peer store addresses can be reached. + addrs := []string{} + + // Make sure we keep files on disk when we shutdown as well as record the + // current cluster IP addresses + for _, s := range c.Stores { + s.LeaveFiles = true + addrs = append(addrs, s.Addr.String()) + } + + // Stop the cluster + if err := c.Close(); err != nil { + t.Fatalf("failed to close cluster: %v", err) + } + + // Wait a bit to avoid spurious port in use conflict errors from trying to + // start the new cluster to fast + time.Sleep(100 * time.Millisecond) + + // Re-create the cluster nodes from existing disk paths and addresses + stores := []*Store{} + for i, s := range c.Stores { + store := MustOpenStoreWithPath(addrs[i], s.Path()) + stores = append(stores, store) + } + c.Stores = stores + + // Wait for the cluster to stabilize + if err := c.WaitForLeader(); err != nil { + t.Fatal("no leader found") + } + + // ensure we have 3 raft nodes + assertRaftPeerNodes(t, c, 3) + + // ensure all the nodes see the same metastore data + assertDatabaseReplicated(t, c) } // Store is a test wrapper for meta.Store. type Store struct { *meta.Store - Listener net.Listener - Stderr bytes.Buffer - LeaveFiles bool // set to true to leave temporary files on close + BindAddress string + Listener net.Listener + Stderr bytes.Buffer + LeaveFiles bool // set to true to leave temporary files on close } // NewStore returns a new test wrapper for Store. -func NewStore(c meta.Config) *Store { +func NewStore(c *meta.Config) *Store { s := &Store{ Store: meta.NewStore(c), } @@ -823,7 +897,16 @@ func NewStore(c meta.Config) *Store { // MustOpenStore opens a store in a temporary path. Panic on error. func MustOpenStore() *Store { - s := NewStore(NewConfig(MustTempFile())) + return MustOpenStoreWithPath("", MustTempFile()) +} + +// MustOpenStoreWith opens a store from a given path. Panic on error. +func MustOpenStoreWithPath(addr, path string) *Store { + c := NewConfig(path) + s := NewStore(c) + if addr != "" { + s.BindAddress = addr + } if err := s.Open(); err != nil { panic(err) } @@ -840,18 +923,26 @@ func MustOpenStore() *Store { // Open opens the store on a random TCP port. func (s *Store) Open() error { + + addr := "127.0.0.1:0" + if s.BindAddress != "" { + addr = s.BindAddress + } // Open a TCP port. - ln, err := net.Listen("tcp", "127.0.0.1:0") + ln, err := net.Listen("tcp", addr) if err != nil { return fmt.Errorf("listen: %s", err) } s.Addr = ln.Addr() s.Listener = ln + s.RemoteAddr = s.Addr // Wrap listener in a muxer. mux := tcp.NewMux() s.RaftListener = mux.Listen(meta.MuxRaftHeader) s.ExecListener = mux.Listen(meta.MuxExecHeader) + s.RPCListener = mux.Listen(meta.MuxRPCHeader) + go mux.Serve(ln) // Open store. @@ -874,8 +965,8 @@ func (s *Store) Close() error { } // NewConfig returns the default test configuration. -func NewConfig(path string) meta.Config { - return meta.Config{ +func NewConfig(path string) *meta.Config { + return &meta.Config{ Dir: path, Hostname: "localhost", BindAddress: "127.0.0.1:0", @@ -888,27 +979,17 @@ func NewConfig(path string) meta.Config { // Cluster represents a group of stores joined as a raft cluster. type Cluster struct { + path string Stores []*Store + n int } // NewCluster returns a cluster of n stores within path. func NewCluster(path string, n int) *Cluster { - c := &Cluster{} - - // Construct a list of temporary peers. - peers := make([]string, n) - for i := range peers { - peers[i] = "127.0.0.1:0" - } - - // Create new stores with temporary peers. - for i := 0; i < n; i++ { - config := NewConfig(filepath.Join(path, strconv.Itoa(i))) - config.Peers = peers - s := NewStore(config) - c.Stores = append(c.Stores, s) - } - + c := &Cluster{path: path, n: n} + config := NewConfig(filepath.Join(path, strconv.Itoa(0))) + s := NewStore(config) + c.Stores = append(c.Stores, s) return c } @@ -930,22 +1011,34 @@ func MustOpenCluster(n int) *Cluster { return c } +func (c *Cluster) Join() error { + config := NewConfig(filepath.Join(c.path, strconv.Itoa(len(c.Stores)))) + config.Peers = []string{c.Stores[0].Addr.String()} + s := NewStore(config) + if err := s.Open(); err != nil { + return err + } + select { + case err := <-s.Err(): + panic(fmt.Sprintf("store: i=%d, addr=%s, err=%s", len(c.Stores), s.Addr.String(), err)) + case <-s.Ready(): + } + + c.Stores = append(c.Stores, s) + return nil +} + // Open opens and initializes all stores in the cluster. func (c *Cluster) Open() error { if err := func() error { - // Open each store and add to peer list. - peers := make([]string, len(c.Stores)) - for i, s := range c.Stores { - if err := s.Open(); err != nil { - return fmt.Errorf("open test store #%d: %s", i, err) - } - peers[i] = s.Addr.String() + + if err := c.Stores[0].Open(); err != nil { + return err } - // Reset peers on all stores. - for _, s := range c.Stores { - if err := s.SetPeers(peers); err != nil { - return fmt.Errorf("set peers: %s", err) + for i := 1; i < c.n; i++ { + if err := c.Join(); err != nil { + panic(fmt.Sprintf("failed to add new cluster node: %v", err)) } } @@ -965,6 +1058,15 @@ func (c *Cluster) Close() error { return nil } +func (c *Cluster) WaitForLeader() error { + for _, s := range c.Stores { + if err := s.WaitForLeader(5 * time.Second); err != nil { + return err + } + } + return nil +} + // Leader returns the store that is currently leader. func (c *Cluster) Leader() *Store { for _, s := range c.Stores { @@ -987,3 +1089,44 @@ func MustTempFile() string { func mockHashPassword(password string) ([]byte, error) { return []byte(password), nil } + +// assertRaftPeerNodes counts the number of nodes running with a local raft +// database and asserts that the count is equal to n +func assertRaftPeerNodes(t *testing.T, c *Cluster, n int) { + // Ensure we have the required number of raft nodes + raftCount := 0 + for _, s := range c.Stores { + if _, err := os.Stat(filepath.Join(s.Path(), "raft.db")); err == nil { + raftCount += 1 + } + } + + if raftCount != n { + t.Errorf("raft nodes mismatch: got %v, exp %v", raftCount, n) + } +} + +// assertDatabaseReplicated creates a new database named after each node and +// then verifies that each node can see all the created databases from their +// local meta data +func assertDatabaseReplicated(t *testing.T, c *Cluster) { + // Add a database to each node. + for i, s := range c.Stores { + if di, err := s.CreateDatabase(fmt.Sprintf("db%d", i)); err != nil { + t.Fatal(err) + } else if di == nil { + t.Fatal("expected database") + } + } + + // Verify that each store has all databases. + for i := 0; i < len(c.Stores); i++ { + for _, s := range c.Stores { + if di, err := s.Database(fmt.Sprintf("db%d", i)); err != nil { + t.Fatal(err) + } else if di == nil { + t.Fatal("expected database") + } + } + } +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher.go index d1a4cf2b7..aefbea723 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher.go @@ -9,6 +9,8 @@ import ( // PointBatcher accepts Points and will emit a batch of those points when either // a) the batch reaches a certain size, or b) a certain time passes. type PointBatcher struct { + stats PointBatcherStats + size int duration time.Duration @@ -17,8 +19,6 @@ type PointBatcher struct { out chan []Point flush chan struct{} - stats PointBatcherStats - wg *sync.WaitGroup } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher_test.go index 15105bfcd..f3652e6c8 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/batcher_test.go @@ -1,21 +1,23 @@ -package tsdb +package tsdb_test import ( "testing" "time" + + "github.com/influxdb/influxdb/tsdb" ) // TestBatch_Size ensures that a batcher generates a batch when the size threshold is reached. func TestBatch_Size(t *testing.T) { batchSize := 5 - batcher := NewPointBatcher(batchSize, time.Hour) + batcher := tsdb.NewPointBatcher(batchSize, time.Hour) if batcher == nil { t.Fatal("failed to create batcher for size test") } batcher.Start() - var p Point + var p tsdb.Point go func() { for i := 0; i < batchSize; i++ { batcher.In() <- p @@ -31,14 +33,14 @@ func TestBatch_Size(t *testing.T) { // TestBatch_Size ensures that a batcher generates a batch when the timeout triggers. func TestBatch_Timeout(t *testing.T) { batchSize := 5 - batcher := NewPointBatcher(batchSize+1, 100*time.Millisecond) + batcher := tsdb.NewPointBatcher(batchSize+1, 100*time.Millisecond) if batcher == nil { t.Fatal("failed to create batcher for timeout test") } batcher.Start() - var p Point + var p tsdb.Point go func() { for i := 0; i < batchSize; i++ { batcher.In() <- p @@ -54,14 +56,14 @@ func TestBatch_Timeout(t *testing.T) { // TestBatch_Flush ensures that a batcher generates a batch when flushed func TestBatch_Flush(t *testing.T) { batchSize := 2 - batcher := NewPointBatcher(batchSize, time.Hour) + batcher := tsdb.NewPointBatcher(batchSize, time.Hour) if batcher == nil { t.Fatal("failed to create batcher for flush test") } batcher.Start() - var p Point + var p tsdb.Point go func() { batcher.In() <- p batcher.Flush() @@ -76,15 +78,15 @@ func TestBatch_Flush(t *testing.T) { // TestBatch_MultipleBatches ensures that a batcher correctly processes multiple batches. func TestBatch_MultipleBatches(t *testing.T) { batchSize := 2 - batcher := NewPointBatcher(batchSize, 100*time.Millisecond) + batcher := tsdb.NewPointBatcher(batchSize, 100*time.Millisecond) if batcher == nil { t.Fatal("failed to create batcher for size test") } batcher.Start() - var p Point - var b []Point + var p tsdb.Point + var b []tsdb.Point batcher.In() <- p batcher.In() <- p @@ -102,7 +104,7 @@ func TestBatch_MultipleBatches(t *testing.T) { checkPointBatcherStats(t, batcher, -1, 3, 1, 1) } -func checkPointBatcherStats(t *testing.T, b *PointBatcher, batchTotal, pointTotal, sizeTotal, timeoutTotal int) { +func checkPointBatcherStats(t *testing.T, b *tsdb.PointBatcher, batchTotal, pointTotal, sizeTotal, timeoutTotal int) { stats := b.Stats() if batchTotal != -1 && stats.BatchTotal != uint64(batchTotal) { diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/cursor.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/cursor.go new file mode 100644 index 000000000..e5c42ff1d --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/cursor.go @@ -0,0 +1,119 @@ +package tsdb + +import ( + "bytes" + "container/heap" +) + +// MultiCursor returns a single cursor that combines the results of all cursors in order. +// +// If the same key is returned from multiple cursors then the first cursor +// specified will take precendence. A key will only be returned once from the +// returned cursor. +func MultiCursor(cursors ...Cursor) Cursor { + return &multiCursor{cursors: cursors} +} + +// multiCursor represents a cursor that combines multiple cursors into one. +type multiCursor struct { + cursors []Cursor + heap cursorHeap + prev []byte +} + +// Seek moves the cursor to a given key. +func (mc *multiCursor) Seek(seek []byte) (key, value []byte) { + // Initialize heap. + h := make(cursorHeap, 0, len(mc.cursors)) + for i, c := range mc.cursors { + // Move cursor to position. Skip if it's empty. + k, v := c.Seek(seek) + if k == nil { + continue + } + + // Append cursor to heap. + h = append(h, &cursorHeapItem{ + key: k, + value: v, + cursor: c, + priority: len(mc.cursors) - i, + }) + } + + heap.Init(&h) + mc.heap = h + mc.prev = nil + + return mc.pop() +} + +// Next returns the next key/value from the cursor. +func (mc *multiCursor) Next() (key, value []byte) { return mc.pop() } + +// pop returns the next item from the heap. +// Reads the next key/value from item's cursor and puts it back on the heap. +func (mc *multiCursor) pop() (key, value []byte) { + // Read items until we have a key that doesn't match the previously read one. + // This is to perform deduplication when there's multiple items with the same key. + // The highest priority cursor will be read first and then remaining keys will be dropped. + for { + // Return nil if there are no more items left. + if len(mc.heap) == 0 { + return nil, nil + } + + // Read the next item from the heap. + item := heap.Pop(&mc.heap).(*cursorHeapItem) + + // Save the key/value for return. + key, value = item.key, item.value + + // Read the next item from the cursor. Push back to heap if one exists. + if item.key, item.value = item.cursor.Next(); item.key != nil { + heap.Push(&mc.heap, item) + } + + // Skip if this key matches the previously returned one. + if bytes.Equal(mc.prev, key) { + continue + } + + mc.prev = key + return + } +} + +// cursorHeap represents a heap of cursorHeapItems. +type cursorHeap []*cursorHeapItem + +func (h cursorHeap) Len() int { return len(h) } +func (h cursorHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } +func (h cursorHeap) Less(i, j int) bool { + if cmp := bytes.Compare(h[i].key, h[j].key); cmp == -1 { + return true + } else if cmp == 0 { + return h[i].priority > h[j].priority + } + return false +} + +func (h *cursorHeap) Push(x interface{}) { + *h = append(*h, x.(*cursorHeapItem)) +} + +func (h *cursorHeap) Pop() interface{} { + old := *h + n := len(old) + item := old[n-1] + *h = old[0 : n-1] + return item +} + +// cursorHeapItem is something we manage in a priority queue. +type cursorHeapItem struct { + key []byte + value []byte + cursor Cursor + priority int +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/cursor_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/cursor_test.go new file mode 100644 index 000000000..1857a34a4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/cursor_test.go @@ -0,0 +1,221 @@ +package tsdb_test + +import ( + "bytes" + "encoding/binary" + "math/rand" + "reflect" + "sort" + "testing" + "testing/quick" + + "github.com/influxdb/influxdb/tsdb" +) + +// Ensure the multi-cursor can correctly iterate across a single subcursor. +func TestMultiCursor_Single(t *testing.T) { + mc := tsdb.MultiCursor( + NewCursor([]CursorItem{ + {Key: []byte{0x00}, Value: []byte{0x00}}, + {Key: []byte{0x01}, Value: []byte{0x10}}, + {Key: []byte{0x02}, Value: []byte{0x20}}, + }), + ) + + if k, v := mc.Seek([]byte{0x00}); !bytes.Equal(k, []byte{0x00}) || !bytes.Equal(v, []byte{0x00}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x01}) || !bytes.Equal(v, []byte{0x10}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x02}) || !bytes.Equal(v, []byte{0x20}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); k != nil { + t.Fatalf("expected eof, got: %x / %x", k, v) + } +} + +// Ensure the multi-cursor can correctly iterate across multiple non-overlapping subcursors. +func TestMultiCursor_Multiple_NonOverlapping(t *testing.T) { + mc := tsdb.MultiCursor( + NewCursor([]CursorItem{ + {Key: []byte{0x00}, Value: []byte{0x00}}, + {Key: []byte{0x03}, Value: []byte{0x30}}, + {Key: []byte{0x04}, Value: []byte{0x40}}, + }), + NewCursor([]CursorItem{ + {Key: []byte{0x01}, Value: []byte{0x10}}, + {Key: []byte{0x02}, Value: []byte{0x20}}, + }), + ) + + if k, v := mc.Seek([]byte{0x00}); !bytes.Equal(k, []byte{0x00}) || !bytes.Equal(v, []byte{0x00}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x01}) || !bytes.Equal(v, []byte{0x10}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x02}) || !bytes.Equal(v, []byte{0x20}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x03}) || !bytes.Equal(v, []byte{0x30}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x04}) || !bytes.Equal(v, []byte{0x40}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); k != nil { + t.Fatalf("expected eof, got: %x / %x", k, v) + } +} + +// Ensure the multi-cursor can correctly iterate across multiple overlapping subcursors. +func TestMultiCursor_Multiple_Overlapping(t *testing.T) { + mc := tsdb.MultiCursor( + NewCursor([]CursorItem{ + {Key: []byte{0x00}, Value: []byte{0x00}}, + {Key: []byte{0x03}, Value: []byte{0x03}}, + {Key: []byte{0x04}, Value: []byte{0x04}}, + }), + NewCursor([]CursorItem{ + {Key: []byte{0x00}, Value: []byte{0xF0}}, + {Key: []byte{0x02}, Value: []byte{0xF2}}, + {Key: []byte{0x04}, Value: []byte{0xF4}}, + }), + ) + + if k, v := mc.Seek([]byte{0x00}); !bytes.Equal(k, []byte{0x00}) || !bytes.Equal(v, []byte{0x00}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x02}) || !bytes.Equal(v, []byte{0xF2}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x03}) || !bytes.Equal(v, []byte{0x03}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x04}) || !bytes.Equal(v, []byte{0x04}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = mc.Next(); k != nil { + t.Fatalf("expected eof, got: %x / %x", k, v) + } +} + +// Ensure the multi-cursor can handle randomly generated data. +func TestMultiCursor_Quick(t *testing.T) { + quick.Check(func(seek uint64, cursors []Cursor) bool { + var got, exp [][]byte + seek %= 100 + + // Merge all cursor data to determine expected output. + // First seen key overrides all other items with the same key. + m := make(map[string][]byte) + for _, c := range cursors { + for _, item := range c.items { + if bytes.Compare(item.Key, u64tob(seek)) == -1 { + continue + } + if _, ok := m[string(item.Key)]; ok { + continue + } + m[string(item.Key)] = item.Value + } + } + + // Convert map back to single item list. + for k, v := range m { + exp = append(exp, append([]byte(k), v...)) + } + sort.Sort(byteSlices(exp)) + + // Create multi-cursor and iterate over all items. + mc := tsdb.MultiCursor(tsdbCursorSlice(cursors)...) + for k, v := mc.Seek(u64tob(seek)); k != nil; k, v = mc.Next() { + got = append(got, append(k, v...)) + } + + // Verify results. + if !reflect.DeepEqual(got, exp) { + t.Fatalf("mismatch: seek=%d\n\ngot=%+v\n\nexp=%+v", seek, got, exp) + } + + return true + }, nil) +} + +// Cursor represents an in-memory test cursor. +type Cursor struct { + items []CursorItem + index int +} + +// NewCursor returns a new instance of Cursor. +func NewCursor(items []CursorItem) *Cursor { + sort.Sort(CursorItems(items)) + return &Cursor{items: items} +} + +// Seek seeks to an item by key. +func (c *Cursor) Seek(seek []byte) (key, value []byte) { + for c.index = 0; c.index < len(c.items); c.index++ { + if bytes.Compare(c.items[c.index].Key, seek) == -1 { // skip keys less than seek + continue + } + return c.items[c.index].Key, c.items[c.index].Value + } + return nil, nil +} + +// Next returns the next key/value pair. +func (c *Cursor) Next() (key, value []byte) { + if c.index >= len(c.items)-1 { + return nil, nil + } + + c.index++ + return c.items[c.index].Key, c.items[c.index].Value +} + +// Generate returns a randomly generated cursor. Implements quick.Generator. +func (c Cursor) Generate(rand *rand.Rand, size int) reflect.Value { + c.index = 0 + + c.items = make([]CursorItem, rand.Intn(size)) + for i := range c.items { + value, _ := quick.Value(reflect.TypeOf([]byte(nil)), rand) + + c.items[i] = CursorItem{ + Key: u64tob(uint64(rand.Intn(size))), + Value: value.Interface().([]byte), + } + } + + // Sort items by key. + sort.Sort(CursorItems(c.items)) + + return reflect.ValueOf(c) +} + +// tsdbCursorSlice converts a Cursor slice to a tsdb.Cursor slice. +func tsdbCursorSlice(a []Cursor) []tsdb.Cursor { + var other []tsdb.Cursor + for i := range a { + other = append(other, &a[i]) + } + return other +} + +// CursorItem represents a key/value pair in a cursor. +type CursorItem struct { + Key []byte + Value []byte +} + +type CursorItems []CursorItem + +func (a CursorItems) Len() int { return len(a) } +func (a CursorItems) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a CursorItems) Less(i, j int) bool { return bytes.Compare(a[i].Key, a[j].Key) == -1 } + +// byteSlices represents a sortable slice of byte slices. +type byteSlices [][]byte + +func (a byteSlices) Len() int { return len(a) } +func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 } +func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +// u64tob converts a uint64 into an 8-byte slice. +func u64tob(v uint64) []byte { + b := make([]byte, 8) + binary.BigEndian.PutUint64(b, v) + return b +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine.go index 74fed3298..65e8bb0da 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine.go @@ -1,961 +1,127 @@ package tsdb import ( + "errors" "fmt" - "math" - "sort" + "io" + "os" "time" - "github.com/influxdb/influxdb/influxql" + "github.com/boltdb/bolt" ) -const ( - // Return an error if the user is trying to select more than this number of points in a group by statement. - // Most likely they specified a group by interval without time boundaries. - MaxGroupByPoints = 100000 - - // Since time is always selected, the column count when selecting only a single other value will be 2 - SelectColumnCountWithOneValue = 2 - - // IgnoredChunkSize is what gets passed into Mapper.Begin for aggregate queries as they don't chunk points out - IgnoredChunkSize = 0 +var ( + // ErrFormatNotFound is returned when no format can be determined from a path. + ErrFormatNotFound = errors.New("format not found") ) -// MapperResponse is the structure responses from mappers take over the network. Tagsets -// is only set with the first response. Data will be nil when the Mapper has no more data. -type MapperResponse struct { - TagSets []string `json:"tagSets,omitempty"` - Data []byte `json:"data"` -} +// DefaultEngine is the default engine used by the shard when initializing. +const DefaultEngine = "b1" -// Mapper is the interface all Mapper types must implement. -type Mapper interface { +// Engine represents a swappable storage engine for the shard. +type Engine interface { Open() error - TagSets() []string - NextChunk() (interface{}, error) - Close() + Close() error + + SetLogOutput(io.Writer) + LoadMetadataIndex(index *DatabaseIndex, measurementFields map[string]*MeasurementFields) error + + Begin(writable bool) (Tx, error) + WritePoints(points []Point, measurementFieldsToSave map[string]*MeasurementFields, seriesToCreate []*SeriesCreate) error + DeleteSeries(keys []string) error + DeleteMeasurement(name string, seriesKeys []string) error + SeriesCount() (n int, err error) } -// StatefulMapper encapsulates a Mapper and some state that the executor needs to -// track for that mapper. -type StatefulMapper struct { - Mapper - bufferedChunk *mapperOutput // Last read chunk. - drained bool +// NewEngineFunc creates a new engine. +type NewEngineFunc func(path string, options EngineOptions) Engine + +// newEngineFuncs is a lookup of engine constructors by name. +var newEngineFuncs = make(map[string]NewEngineFunc) + +// RegisterEngine registers a storage engine initializer by name. +func RegisterEngine(name string, fn NewEngineFunc) { + if _, ok := newEngineFuncs[name]; ok { + panic("engine already registered: " + name) + } + newEngineFuncs[name] = fn } -// Executor is the interface all Executor types must implement. -type Executor interface { - Execute() <-chan *influxql.Row -} +// NewEngine returns an instance of an engine based on its format. +// If the path does not exist then the DefaultFormat is used. +func NewEngine(path string, options EngineOptions) (Engine, error) { + // Create a new engine + if _, err := os.Stat(path); os.IsNotExist(err) { + return newEngineFuncs[DefaultEngine](path, options), nil + } -// NextChunk wraps a RawMapper and some state. -func (srm *StatefulMapper) NextChunk() (*mapperOutput, error) { - c, err := srm.Mapper.NextChunk() - if err != nil { + // Only bolt-based backends are currently supported so open it and check the format. + var format string + if err := func() error { + db, err := bolt.Open(path, 0666, &bolt.Options{Timeout: 1 * time.Second}) + if err != nil { + return err + } + defer db.Close() + + return db.View(func(tx *bolt.Tx) error { + // Retrieve the meta bucket. + b := tx.Bucket([]byte("meta")) + + // If no format is specified then it must be an original b1 database. + if b == nil { + format = "b1" + return nil + } + + // Save the format. + format = string(b.Get([]byte("format"))) + if format == "v1" { + format = "b1" + } + return nil + }) + }(); err != nil { return nil, err } - chunk, ok := c.(*mapperOutput) - if !ok { - if chunk == interface{}(nil) { - return nil, nil - } + + // Lookup engine by format. + fn := newEngineFuncs[format] + if fn == nil { + return nil, fmt.Errorf("invalid engine format: %q", format) } - return chunk, nil + + return fn(path, options), nil } -// RawExecutor is an executor for RawMappers. -type RawExecutor struct { - stmt *influxql.SelectStatement - mappers []*StatefulMapper - chunkSize int - limitedTagSets map[string]struct{} // Set tagsets for which data has reached the LIMIT. +// EngineOptions represents the options used to initialize the engine. +type EngineOptions struct { + MaxWALSize int + WALFlushInterval time.Duration + WALPartitionFlushDelay time.Duration } -// NewRawExecutor returns a new RawExecutor. -func NewRawExecutor(stmt *influxql.SelectStatement, mappers []Mapper, chunkSize int) *RawExecutor { - a := []*StatefulMapper{} - for _, m := range mappers { - a = append(a, &StatefulMapper{m, nil, false}) - } - return &RawExecutor{ - stmt: stmt, - mappers: a, - chunkSize: chunkSize, - limitedTagSets: make(map[string]struct{}), +// NewEngineOptions returns the default options. +func NewEngineOptions() EngineOptions { + return EngineOptions{ + MaxWALSize: DefaultMaxWALSize, + WALFlushInterval: DefaultWALFlushInterval, + WALPartitionFlushDelay: DefaultWALPartitionFlushDelay, } } -// Execute begins execution of the query and returns a channel to receive rows. -func (re *RawExecutor) Execute() <-chan *influxql.Row { - // Create output channel and stream data in a separate goroutine. - out := make(chan *influxql.Row, 0) - go re.execute(out) - return out +// Tx represents a transaction. +type Tx interface { + io.WriterTo + + Cursor(series string) Cursor + Size() int64 + Commit() error + Rollback() error } -func (re *RawExecutor) execute(out chan *influxql.Row) { - // It's important that all resources are released when execution completes. - defer re.close() - - // Open the mappers. - for _, m := range re.mappers { - if err := m.Open(); err != nil { - out <- &influxql.Row{Err: err} - return - } - } - - // Used to read ahead chunks from mappers. - var rowWriter *limitedRowWriter - var currTagset string - - // Keep looping until all mappers drained. - var err error - for { - // Get the next chunk from each Mapper. - for _, m := range re.mappers { - if m.drained { - continue - } - - // Set the next buffered chunk on the mapper, or mark it drained. - for { - if m.bufferedChunk == nil { - m.bufferedChunk, err = m.NextChunk() - if err != nil { - out <- &influxql.Row{Err: err} - return - } - if m.bufferedChunk == nil { - // Mapper can do no more for us. - m.drained = true - break - } - } - - if re.tagSetIsLimited(m.bufferedChunk.Name) { - // chunk's tagset is limited, so no good. Try again. - m.bufferedChunk = nil - continue - } - // This mapper has a chunk available, and it is not limited. - break - } - } - - // All Mappers done? - if re.mappersDrained() { - rowWriter.Flush() - break - } - - // Send out data for the next alphabetically-lowest tagset. All Mappers emit data in this order, - // so by always continuing with the lowest tagset until it is finished, we process all data in - // the required order, and don't "miss" any. - tagset := re.nextMapperTagSet() - if tagset != currTagset { - currTagset = tagset - // Tagset has changed, time for a new rowWriter. Be sure to kick out any residual values. - rowWriter.Flush() - rowWriter = nil - } - - // Process the mapper outputs. We can send out everything up to the min of the last time - // of the chunks for the next tagset. - minTime := re.nextMapperLowestTime(tagset) - - // Now empty out all the chunks up to the min time. Create new output struct for this data. - var chunkedOutput *mapperOutput - for _, m := range re.mappers { - if m.drained { - continue - } - - // This mapper's next chunk is not for the next tagset, or the very first value of - // the chunk is at a higher acceptable timestamp. Skip it. - if m.bufferedChunk.key() != tagset || m.bufferedChunk.Values[0].Time > minTime { - continue - } - - // Find the index of the point up to the min. - ind := len(m.bufferedChunk.Values) - for i, mo := range m.bufferedChunk.Values { - if mo.Time > minTime { - ind = i - break - } - } - - // Add up to the index to the values - if chunkedOutput == nil { - chunkedOutput = &mapperOutput{ - Name: m.bufferedChunk.Name, - Tags: m.bufferedChunk.Tags, - } - chunkedOutput.Values = m.bufferedChunk.Values[:ind] - } else { - chunkedOutput.Values = append(chunkedOutput.Values, m.bufferedChunk.Values[:ind]...) - } - - // Clear out the values being sent out, keep the remainder. - m.bufferedChunk.Values = m.bufferedChunk.Values[ind:] - - // If we emptied out all the values, clear the mapper's buffered chunk. - if len(m.bufferedChunk.Values) == 0 { - m.bufferedChunk = nil - } - } - - // Sort the values by time first so we can then handle offset and limit - sort.Sort(mapperValues(chunkedOutput.Values)) - - // Now that we have full name and tag details, initialize the rowWriter. - // The Name and Tags will be the same for all mappers. - if rowWriter == nil { - rowWriter = &limitedRowWriter{ - limit: re.stmt.Limit, - offset: re.stmt.Offset, - chunkSize: re.chunkSize, - name: chunkedOutput.Name, - tags: chunkedOutput.Tags, - selectNames: re.stmt.NamesInSelect(), - fields: re.stmt.Fields, - c: out, - } - } - if re.stmt.HasDerivative() { - interval, err := derivativeInterval(re.stmt) - if err != nil { - out <- &influxql.Row{Err: err} - return - } - rowWriter.transformer = &rawQueryDerivativeProcessor{ - isNonNegative: re.stmt.FunctionCalls()[0].Name == "non_negative_derivative", - derivativeInterval: interval, - } - } - - // Emit the data via the limiter. - if limited := rowWriter.Add(chunkedOutput.Values); limited { - // Limit for this tagset was reached, mark it and start draining a new tagset. - re.limitTagSet(chunkedOutput.key()) - continue - } - } - - close(out) +// Cursor represents an iterator over a series. +type Cursor interface { + Seek(seek []byte) (key, value []byte) + Next() (key, value []byte) } - -// mappersDrained returns whether all the executors Mappers have been drained of data. -func (re *RawExecutor) mappersDrained() bool { - for _, m := range re.mappers { - if !m.drained { - return false - } - } - return true -} - -// nextMapperTagset returns the alphabetically lowest tagset across all Mappers. -func (re *RawExecutor) nextMapperTagSet() string { - tagset := "" - for _, m := range re.mappers { - if m.bufferedChunk != nil { - if tagset == "" { - tagset = m.bufferedChunk.key() - } else if m.bufferedChunk.key() < tagset { - tagset = m.bufferedChunk.key() - } - } - } - return tagset -} - -// nextMapperLowestTime returns the lowest minimum time across all Mappers, for the given tagset. -func (re *RawExecutor) nextMapperLowestTime(tagset string) int64 { - minTime := int64(math.MaxInt64) - for _, m := range re.mappers { - if !m.drained && m.bufferedChunk != nil { - if m.bufferedChunk.key() != tagset { - continue - } - t := m.bufferedChunk.Values[len(m.bufferedChunk.Values)-1].Time - if t < minTime { - minTime = t - } - } - } - return minTime -} - -// tagSetIsLimited returns whether data for the given tagset has been LIMITed. -func (re *RawExecutor) tagSetIsLimited(tagset string) bool { - _, ok := re.limitedTagSets[tagset] - return ok -} - -// limitTagSet marks the given taset as LIMITed. -func (re *RawExecutor) limitTagSet(tagset string) { - re.limitedTagSets[tagset] = struct{}{} -} - -// Close closes the executor such that all resources are released. Once closed, -// an executor may not be re-used. -func (re *RawExecutor) close() { - if re != nil { - for _, m := range re.mappers { - m.Close() - } - } -} - -// AggregateExecutor is an executor for AggregateMappers. -type AggregateExecutor struct { - stmt *influxql.SelectStatement - queryTMin int64 // Needed? - queryTMax int64 // Needed? - mappers []*StatefulMapper -} - -// NewAggregateExecutor returns a new AggregateExecutor. -func NewAggregateExecutor(stmt *influxql.SelectStatement, mappers []Mapper) *AggregateExecutor { - a := []*StatefulMapper{} - for _, m := range mappers { - a = append(a, &StatefulMapper{m, nil, false}) - } - return &AggregateExecutor{ - stmt: stmt, - mappers: a, - } -} - -// Execute begins execution of the query and returns a channel to receive rows. -func (ae *AggregateExecutor) Execute() <-chan *influxql.Row { - // Create output channel and stream data in a separate goroutine. - out := make(chan *influxql.Row, 0) - go ae.execute(out) - return out -} - -func (ae *AggregateExecutor) execute(out chan *influxql.Row) { - // It's important to close all resources when execution completes. - defer ae.close() - - // Create the functions which will reduce values from mappers for - // a given interval. The function offsets within this slice match - // the offsets within the value slices that are returned by the - // mapper. - aggregates := ae.stmt.FunctionCalls() - reduceFuncs := make([]influxql.ReduceFunc, len(aggregates)) - for i, c := range aggregates { - reduceFunc, err := influxql.InitializeReduceFunc(c) - if err != nil { - out <- &influxql.Row{Err: err} - return - } - reduceFuncs[i] = reduceFunc - } - - // Put together the rows to return, starting with columns. - columnNames := make([]string, len(ae.stmt.Fields)+1) - columnNames[0] = "time" - for i, f := range ae.stmt.Fields { - columnNames[i+1] = f.Name() - } - - // Open the mappers. - for _, m := range ae.mappers { - if err := m.Open(); err != nil { - out <- &influxql.Row{Err: err} - return - } - } - - // Build the set of available tagsets across all mappers. This is used for - // later checks. - availTagSets := newStringSet() - for _, m := range ae.mappers { - for _, t := range m.TagSets() { - availTagSets.add(t) - } - } - - // Prime each mapper's chunk buffer. - var err error - for _, m := range ae.mappers { - m.bufferedChunk, err = m.NextChunk() - if err != nil { - out <- &influxql.Row{Err: err} - return - } - if m.bufferedChunk == nil { - m.drained = true - } - } - - // Keep looping until all mappers drained. - for !ae.mappersDrained() { - // Send out data for the next alphabetically-lowest tagset. All Mappers send out in this order - // so collect data for this tagset, ignoring all others. - tagset := ae.nextMapperTagSet() - chunks := []*mapperOutput{} - - // Pull as much as possible from each mapper. Stop when a mapper offers - // data for a new tagset, or empties completely. - for _, m := range ae.mappers { - if m.drained { - continue - } - - for { - if m.bufferedChunk == nil { - m.bufferedChunk, err = m.NextChunk() - if err != nil { - out <- &influxql.Row{Err: err} - return - } - if m.bufferedChunk == nil { - m.drained = true - break - } - } - - // Got a chunk. Can we use it? - if m.bufferedChunk.key() != tagset { - // No, so just leave it in the buffer. - break - } - // We can, take it. - chunks = append(chunks, m.bufferedChunk) - m.bufferedChunk = nil - } - } - - // Prep a row, ready for kicking out. - var row *influxql.Row - - // Prep for bucketing data by start time of the interval. - buckets := map[int64][][]interface{}{} - - for _, chunk := range chunks { - if row == nil { - row = &influxql.Row{ - Name: chunk.Name, - Tags: chunk.Tags, - Columns: columnNames, - } - } - - startTime := chunk.Values[0].Time - _, ok := buckets[startTime] - values := chunk.Values[0].Value.([]interface{}) - if !ok { - buckets[startTime] = make([][]interface{}, len(values)) - } - for i, v := range values { - buckets[startTime][i] = append(buckets[startTime][i], v) - } - } - - // Now, after the loop above, within each time bucket is a slice. Within the element of each - // slice is another slice of interface{}, ready for passing to the reducer functions. - - // Work each bucket of time, in time ascending order. - tMins := make(int64arr, 0, len(buckets)) - for k, _ := range buckets { - tMins = append(tMins, k) - } - sort.Sort(tMins) - - values := make([][]interface{}, len(tMins)) - for i, t := range tMins { - values[i] = make([]interface{}, 0, len(columnNames)) - values[i] = append(values[i], time.Unix(0, t).UTC()) // Time value is always first. - - for j, f := range reduceFuncs { - reducedVal := f(buckets[t][j]) - values[i] = append(values[i], reducedVal) - } - } - - // Perform any mathematics. - values = processForMath(ae.stmt.Fields, values) - - // Handle any fill options - values = ae.processFill(values) - - // process derivatives - values = ae.processDerivative(values) - - // If we have multiple tag sets we'll want to filter out the empty ones - if len(availTagSets.list()) > 1 && resultsEmpty(values) { - continue - } - - row.Values = values - out <- row - } - - close(out) -} - -// processFill will take the results and return new results (or the same if no fill modifications are needed) -// with whatever fill options the query has. -func (ae *AggregateExecutor) processFill(results [][]interface{}) [][]interface{} { - // don't do anything if we're supposed to leave the nulls - if ae.stmt.Fill == influxql.NullFill { - return results - } - - if ae.stmt.Fill == influxql.NoFill { - // remove any rows that have even one nil value. This one is tricky because they could have multiple - // aggregates, but this option means that any row that has even one nil gets purged. - newResults := make([][]interface{}, 0, len(results)) - for _, vals := range results { - hasNil := false - // start at 1 because the first value is always time - for j := 1; j < len(vals); j++ { - if vals[j] == nil { - hasNil = true - break - } - } - if !hasNil { - newResults = append(newResults, vals) - } - } - return newResults - } - - // They're either filling with previous values or a specific number - for i, vals := range results { - // start at 1 because the first value is always time - for j := 1; j < len(vals); j++ { - if vals[j] == nil { - switch ae.stmt.Fill { - case influxql.PreviousFill: - if i != 0 { - vals[j] = results[i-1][j] - } - case influxql.NumberFill: - vals[j] = ae.stmt.FillValue - } - } - } - } - return results -} - -// processDerivative returns the derivatives of the results -func (ae *AggregateExecutor) processDerivative(results [][]interface{}) [][]interface{} { - // Return early if we're not supposed to process the derivatives - if ae.stmt.HasDerivative() { - interval, err := derivativeInterval(ae.stmt) - if err != nil { - return results // XXX need to handle this better. - } - - // Determines whether to drop negative differences - isNonNegative := ae.stmt.FunctionCalls()[0].Name == "non_negative_derivative" - return processAggregateDerivative(results, isNonNegative, interval) - } - return results -} - -// mappersDrained returns whether all the executors Mappers have been drained of data. -func (ae *AggregateExecutor) mappersDrained() bool { - for _, m := range ae.mappers { - if !m.drained { - return false - } - } - return true -} - -// nextMapperTagset returns the alphabetically lowest tagset across all Mappers. -func (ae *AggregateExecutor) nextMapperTagSet() string { - tagset := "" - for _, m := range ae.mappers { - if m.bufferedChunk != nil { - if tagset == "" { - tagset = m.bufferedChunk.key() - } else if m.bufferedChunk.key() < tagset { - tagset = m.bufferedChunk.key() - } - } - } - return tagset -} - -// Close closes the executor such that all resources are released. Once closed, -// an executor may not be re-used. -func (ae *AggregateExecutor) close() { - for _, m := range ae.mappers { - m.Close() - } -} - -// limitedRowWriter accepts raw mapper values, and will emit those values as rows in chunks -// of the given size. If the chunk size is 0, no chunking will be performed. In addiiton if -// limit is reached, outstanding values will be emitted. If limit is zero, no limit is enforced. -type limitedRowWriter struct { - chunkSize int - limit int - offset int - name string - tags map[string]string - selectNames []string - fields influxql.Fields - c chan *influxql.Row - - currValues []*mapperValue - totalOffSet int - totalSent int - - transformer interface { - process(input []*mapperValue) []*mapperValue - } -} - -// Add accepts a slice of values, and will emit those values as per chunking requirements. -// If limited is returned as true, the limit was also reached and no more values should be -// added. In that case only up the limit of values are emitted. -func (r *limitedRowWriter) Add(values []*mapperValue) (limited bool) { - if r.currValues == nil { - r.currValues = make([]*mapperValue, 0, r.chunkSize) - } - - // Enforce offset. - if r.totalOffSet < r.offset { - // Still some offsetting to do. - offsetRequired := r.offset - r.totalOffSet - if offsetRequired >= len(values) { - r.totalOffSet += len(values) - return false - } else { - // Drop leading values and keep going. - values = values[offsetRequired:] - r.totalOffSet += offsetRequired - } - } - r.currValues = append(r.currValues, values...) - - // Check limit. - limitReached := r.limit > 0 && r.totalSent+len(r.currValues) >= r.limit - if limitReached { - // Limit will be satified with current values. Truncate 'em. - r.currValues = r.currValues[:r.limit-r.totalSent] - } - - // Is chunking in effect? - if r.chunkSize != IgnoredChunkSize { - // Chunking level reached? - for len(r.currValues) >= r.chunkSize { - index := len(r.currValues) - (len(r.currValues) - r.chunkSize) - r.c <- r.processValues(r.currValues[:index]) - r.currValues = r.currValues[index:] - } - - // After values have been sent out by chunking, there may still be some - // values left, if the remainder is less than the chunk size. But if the - // limit has been reached, kick them out. - if len(r.currValues) > 0 && limitReached { - r.c <- r.processValues(r.currValues) - r.currValues = nil - } - } else if limitReached { - // No chunking in effect, but the limit has been reached. - r.c <- r.processValues(r.currValues) - r.currValues = nil - } - - return limitReached -} - -// Flush instructs the limitedRowWriter to emit any pending values as a single row, -// adhering to any limits. Chunking is not enforced. -func (r *limitedRowWriter) Flush() { - if r == nil { - return - } - - // If at least some rows were sent, and no values are pending, then don't - // emit anything, since at least 1 row was previously emitted. This ensures - // that if no rows were ever sent, at least 1 will be emitted, even an empty row. - if r.totalSent != 0 && len(r.currValues) == 0 { - return - } - - if r.limit > 0 && len(r.currValues) > r.limit { - r.currValues = r.currValues[:r.limit] - } - r.c <- r.processValues(r.currValues) - r.currValues = nil -} - -// processValues emits the given values in a single row. -func (r *limitedRowWriter) processValues(values []*mapperValue) *influxql.Row { - defer func() { - r.totalSent += len(values) - }() - - selectNames := r.selectNames - - if r.transformer != nil { - values = r.transformer.process(values) - } - - // ensure that time is in the select names and in the first position - hasTime := false - for i, n := range selectNames { - if n == "time" { - // Swap time to the first argument for names - if i != 0 { - selectNames[0], selectNames[i] = selectNames[i], selectNames[0] - } - hasTime = true - break - } - } - - // time should always be in the list of names they get back - if !hasTime { - selectNames = append([]string{"time"}, selectNames...) - } - - // since selectNames can contain tags, we need to strip them out - selectFields := make([]string, 0, len(selectNames)) - - for _, n := range selectNames { - if _, found := r.tags[n]; !found { - selectFields = append(selectFields, n) - } - } - - row := &influxql.Row{ - Name: r.name, - Tags: r.tags, - Columns: selectFields, - } - - // Kick out an empty row it no results available. - if len(values) == 0 { - return row - } - - // if they've selected only a single value we have to handle things a little differently - singleValue := len(selectFields) == SelectColumnCountWithOneValue - - // the results will have all of the raw mapper results, convert into the row - for _, v := range values { - vals := make([]interface{}, len(selectFields)) - - if singleValue { - vals[0] = time.Unix(0, v.Time).UTC() - vals[1] = v.Value.(interface{}) - } else { - fields := v.Value.(map[string]interface{}) - - // time is always the first value - vals[0] = time.Unix(0, v.Time).UTC() - - // populate the other values - for i := 1; i < len(selectFields); i++ { - vals[i] = fields[selectFields[i]] - } - } - - row.Values = append(row.Values, vals) - } - - // Perform any mathematical post-processing. - row.Values = processForMath(r.fields, row.Values) - - return row -} - -type rawQueryDerivativeProcessor struct { - lastValueFromPreviousChunk *mapperValue - isNonNegative bool // Whether to drop negative differences - derivativeInterval time.Duration -} - -func (rqdp *rawQueryDerivativeProcessor) process(input []*mapperValue) []*mapperValue { - if len(input) == 0 { - return input - } - - // If we only have 1 value, then the value did not change, so return - // a single row with 0.0 - if len(input) == 1 { - return []*mapperValue{ - &mapperValue{ - Time: input[0].Time, - Value: 0.0, - }, - } - } - - if rqdp.lastValueFromPreviousChunk == nil { - rqdp.lastValueFromPreviousChunk = input[0] - } - - derivativeValues := []*mapperValue{} - for i := 1; i < len(input); i++ { - v := input[i] - - // Calculate the derivative of successive points by dividing the difference - // of each value by the elapsed time normalized to the interval - diff := int64toFloat64(v.Value) - int64toFloat64(rqdp.lastValueFromPreviousChunk.Value) - - elapsed := v.Time - rqdp.lastValueFromPreviousChunk.Time - - value := 0.0 - if elapsed > 0 { - value = diff / (float64(elapsed) / float64(rqdp.derivativeInterval)) - } - - rqdp.lastValueFromPreviousChunk = v - - // Drop negative values for non-negative derivatives - if rqdp.isNonNegative && diff < 0 { - continue - } - - derivativeValues = append(derivativeValues, &mapperValue{ - Time: v.Time, - Value: value, - }) - } - - return derivativeValues -} - -// processForMath will apply any math that was specified in the select statement -// against the passed in results -func processForMath(fields influxql.Fields, results [][]interface{}) [][]interface{} { - hasMath := false - for _, f := range fields { - if _, ok := f.Expr.(*influxql.BinaryExpr); ok { - hasMath = true - } else if _, ok := f.Expr.(*influxql.ParenExpr); ok { - hasMath = true - } - } - - if !hasMath { - return results - } - - processors := make([]influxql.Processor, len(fields)) - startIndex := 1 - for i, f := range fields { - processors[i], startIndex = influxql.GetProcessor(f.Expr, startIndex) - } - - mathResults := make([][]interface{}, len(results)) - for i, _ := range mathResults { - mathResults[i] = make([]interface{}, len(fields)+1) - // put the time in - mathResults[i][0] = results[i][0] - for j, p := range processors { - mathResults[i][j+1] = p(results[i]) - } - } - - return mathResults -} - -// processAggregateDerivative returns the derivatives of an aggregate result set -func processAggregateDerivative(results [][]interface{}, isNonNegative bool, interval time.Duration) [][]interface{} { - // Return early if we can't calculate derivatives - if len(results) == 0 { - return results - } - - // If we only have 1 value, then the value did not change, so return - // a single row w/ 0.0 - if len(results) == 1 { - return [][]interface{}{ - []interface{}{results[0][0], 0.0}, - } - } - - // Otherwise calculate the derivatives as the difference between consecutive - // points divided by the elapsed time. Then normalize to the requested - // interval. - derivatives := [][]interface{}{} - for i := 1; i < len(results); i++ { - prev := results[i-1] - cur := results[i] - - if cur[1] == nil || prev[1] == nil { - continue - } - - elapsed := cur[0].(time.Time).Sub(prev[0].(time.Time)) - diff := int64toFloat64(cur[1]) - int64toFloat64(prev[1]) - value := 0.0 - if elapsed > 0 { - value = float64(diff) / (float64(elapsed) / float64(interval)) - } - - // Drop negative values for non-negative derivatives - if isNonNegative && diff < 0 { - continue - } - - val := []interface{}{ - cur[0], - value, - } - derivatives = append(derivatives, val) - } - - return derivatives -} - -// derivativeInterval returns the time interval for the one (and only) derivative func -func derivativeInterval(stmt *influxql.SelectStatement) (time.Duration, error) { - if len(stmt.FunctionCalls()[0].Args) == 2 { - return stmt.FunctionCalls()[0].Args[1].(*influxql.DurationLiteral).Val, nil - } - interval, err := stmt.GroupByInterval() - if err != nil { - return 0, err - } - if interval > 0 { - return interval, nil - } - return time.Second, nil -} - -// resultsEmpty will return true if the all the result values are empty or contain only nulls -func resultsEmpty(resultValues [][]interface{}) bool { - for _, vals := range resultValues { - // start the loop at 1 because we want to skip over the time value - for i := 1; i < len(vals); i++ { - if vals[i] != nil { - return false - } - } - } - return true -} - -func int64toFloat64(v interface{}) float64 { - switch v.(type) { - case int64: - return float64(v.(int64)) - case float64: - return v.(float64) - } - panic(fmt.Sprintf("expected either int64 or float64, got %v", v)) -} - -type int64arr []int64 - -func (a int64arr) Len() int { return len(a) } -func (a int64arr) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a int64arr) Less(i, j int) bool { return a[i] < a[j] } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/b1/b1.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/b1/b1.go new file mode 100644 index 000000000..f0f7fbb18 --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/b1/b1.go @@ -0,0 +1,695 @@ +package b1 + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "hash/fnv" + "io" + "log" + "os" + "sort" + "sync" + "time" + + "github.com/boltdb/bolt" + "github.com/influxdb/influxdb/tsdb" +) + +// Format is the file format name of this engine. +const Format = "b1" + +func init() { + tsdb.RegisterEngine(Format, NewEngine) +} + +// topLevelBucketN is the number of non-series buckets in the bolt db. +const topLevelBucketN = 3 + +var ( + // ErrWALPartitionNotFound returns when flushing a partition that does not exist. + ErrWALPartitionNotFound = errors.New("wal partition not found") +) + +// Ensure Engine implements the interface. +var _ tsdb.Engine = &Engine{} + +// Engine represents a version 1 storage engine. +type Engine struct { + mu sync.RWMutex + + path string // path to data file + db *bolt.DB // underlying database + + cache map[uint8]map[string][][]byte // values by + + walSize int // approximate size of the WAL, in bytes + flush chan struct{} // signals background flush + flushTimer *time.Timer // signals time-based flush + + // These coordinate closing and waiting for running goroutines. + wg sync.WaitGroup + closing chan struct{} + + // Used for out-of-band error messages. + logger *log.Logger + + // The maximum size and time thresholds for flushing the WAL. + MaxWALSize int + WALFlushInterval time.Duration + WALPartitionFlushDelay time.Duration + + // The writer used by the logger. + LogOutput io.Writer +} + +// NewEngine returns a new instance of Engine. +func NewEngine(path string, opt tsdb.EngineOptions) tsdb.Engine { + e := &Engine{ + path: path, + flush: make(chan struct{}, 1), + + MaxWALSize: opt.MaxWALSize, + WALFlushInterval: opt.WALFlushInterval, + WALPartitionFlushDelay: opt.WALPartitionFlushDelay, + + LogOutput: os.Stderr, + } + + // Initialize all partitions of the cache. + e.cache = make(map[uint8]map[string][][]byte) + for i := uint8(0); i < WALPartitionN; i++ { + e.cache[i] = make(map[string][][]byte) + } + + return e +} + +// Path returns the path the engine was initialized with. +func (e *Engine) Path() string { return e.path } + +// Open opens and initializes the engine. +func (e *Engine) Open() error { + if err := func() error { + e.mu.Lock() + defer e.mu.Unlock() + + // Open underlying storage. + db, err := bolt.Open(e.path, 0666, &bolt.Options{Timeout: 1 * time.Second}) + if err != nil { + return err + } + e.db = db + + // Initialize data file. + if err := e.db.Update(func(tx *bolt.Tx) error { + _, _ = tx.CreateBucketIfNotExists([]byte("series")) + _, _ = tx.CreateBucketIfNotExists([]byte("fields")) + _, _ = tx.CreateBucketIfNotExists([]byte("wal")) + + // Set file format, if not set yet. + b, _ := tx.CreateBucketIfNotExists([]byte("meta")) + if v := b.Get([]byte("format")); v == nil { + if err := b.Put([]byte("format"), []byte(Format)); err != nil { + return fmt.Errorf("set format: %s", err) + } + } + + return nil + }); err != nil { + return fmt.Errorf("init: %s", err) + } + + // Start flush interval timer. + e.flushTimer = time.NewTimer(e.WALFlushInterval) + + // Initialize logger. + e.logger = log.New(e.LogOutput, "[b1] ", log.LstdFlags) + + // Start background goroutines. + e.wg.Add(1) + e.closing = make(chan struct{}) + go e.autoflusher(e.closing) + + return nil + }(); err != nil { + e.close() + return err + } + + // Flush on-disk WAL before we return to the caller. + if err := e.Flush(0); err != nil { + return fmt.Errorf("flush: %s", err) + } + + return nil +} + +func (e *Engine) Close() error { + e.mu.Lock() + err := e.close() + e.mu.Unlock() + + // Wait for open goroutines to finish. + e.wg.Wait() + return err +} + +func (e *Engine) close() error { + if e.db != nil { + e.db.Close() + } + if e.closing != nil { + close(e.closing) + e.closing = nil + } + return nil +} + +// SetLogOutput sets the writer used for log output. +// This must be set before opening the engine. +func (e *Engine) SetLogOutput(w io.Writer) { e.LogOutput = w } + +// LoadMetadataIndex loads the shard metadata into memory. +func (e *Engine) LoadMetadataIndex(index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error { + return e.db.View(func(tx *bolt.Tx) error { + // load measurement metadata + meta := tx.Bucket([]byte("fields")) + c := meta.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + m := index.CreateMeasurementIndexIfNotExists(string(k)) + mf := &tsdb.MeasurementFields{} + if err := mf.UnmarshalBinary(v); err != nil { + return err + } + for name, _ := range mf.Fields { + m.SetFieldName(name) + } + mf.Codec = tsdb.NewFieldCodec(mf.Fields) + measurementFields[m.Name] = mf + } + + // load series metadata + meta = tx.Bucket([]byte("series")) + c = meta.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + series := &tsdb.Series{} + if err := series.UnmarshalBinary(v); err != nil { + return err + } + index.CreateSeriesIndexIfNotExists(tsdb.MeasurementFromSeriesKey(string(k)), series) + } + return nil + }) +} + +// WritePoints will write the raw data points and any new metadata to the index in the shard +func (e *Engine) WritePoints(points []tsdb.Point, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) error { + // save to the underlying bolt instance + if err := e.db.Update(func(tx *bolt.Tx) error { + // save any new metadata + if len(seriesToCreate) > 0 { + b := tx.Bucket([]byte("series")) + for _, sc := range seriesToCreate { + data, err := sc.Series.MarshalBinary() + if err != nil { + return err + } + if err := b.Put([]byte(sc.Series.Key), data); err != nil { + return err + } + } + } + if len(measurementFieldsToSave) > 0 { + b := tx.Bucket([]byte("fields")) + for name, m := range measurementFieldsToSave { + data, err := m.MarshalBinary() + if err != nil { + return err + } + if err := b.Put([]byte(name), data); err != nil { + return err + } + } + } + + // Write points to WAL bucket. + wal := tx.Bucket([]byte("wal")) + for _, p := range points { + // Retrieve partition bucket. + key := p.Key() + b, err := wal.CreateBucketIfNotExists([]byte{WALPartition(key)}) + if err != nil { + return fmt.Errorf("create WAL partition bucket: %s", err) + } + + // Generate an autoincrementing index for the WAL partition. + id, _ := b.NextSequence() + + // Append points sequentially to the WAL bucket. + v := marshalWALEntry(key, p.UnixNano(), p.Data()) + if err := b.Put(u64tob(id), v); err != nil { + return fmt.Errorf("put wal: %s", err) + } + } + + return nil + }); err != nil { + return err + } + + // If successful then save points to in-memory cache. + if err := func() error { + e.mu.Lock() + defer e.mu.Unlock() + + // tracks which in-memory caches need to be resorted + resorts := map[uint8]map[string]struct{}{} + + for _, p := range points { + // Generate in-memory cache entry of . + key, data := p.Key(), p.Data() + v := make([]byte, 8+len(data)) + binary.BigEndian.PutUint64(v[0:8], uint64(p.UnixNano())) + copy(v[8:], data) + + // Determine if we are appending. + partitionID := WALPartition(key) + a := e.cache[partitionID][string(key)] + appending := (len(a) == 0 || bytes.Compare(a[len(a)-1], v) == -1) + + // Append to cache list. + a = append(a, v) + + // If not appending, keep track of cache lists that need to be resorted. + if !appending { + series := resorts[partitionID] + if series == nil { + series = map[string]struct{}{} + resorts[partitionID] = series + } + series[string(key)] = struct{}{} + } + + e.cache[partitionID][string(key)] = a + + // Calculate estimated WAL size. + e.walSize += len(key) + len(v) + } + + // Sort by timestamp if not appending. + for partitionID, cache := range resorts { + for key, _ := range cache { + sort.Sort(byteSlices(e.cache[partitionID][key])) + } + } + + // Check for flush threshold. + e.triggerAutoFlush() + + return nil + }(); err != nil { + return err + } + + return nil +} + +// DeleteSeries deletes the series from the engine. +func (e *Engine) DeleteSeries(keys []string) error { + e.mu.Lock() + defer e.mu.Unlock() + + if err := e.db.Update(func(tx *bolt.Tx) error { + b := tx.Bucket([]byte("series")) + for _, k := range keys { + if err := b.Delete([]byte(k)); err != nil { + return err + } + if err := tx.DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound { + return err + } + delete(e.cache[WALPartition([]byte(k))], k) + } + return nil + }); err != nil { + return err + } + + return nil +} + +// DeleteMeasurement deletes a measurement and all related series. +func (e *Engine) DeleteMeasurement(name string, seriesKeys []string) error { + e.mu.Lock() + defer e.mu.Unlock() + + if err := e.db.Update(func(tx *bolt.Tx) error { + bm := tx.Bucket([]byte("fields")) + if err := bm.Delete([]byte(name)); err != nil { + return err + } + b := tx.Bucket([]byte("series")) + for _, k := range seriesKeys { + if err := b.Delete([]byte(k)); err != nil { + return err + } + if err := tx.DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound { + return err + } + delete(e.cache[WALPartition([]byte(k))], k) + } + + return nil + }); err != nil { + return err + } + + return nil +} + +// Flush writes all points from the write ahead log to the index. +func (e *Engine) Flush(partitionFlushDelay time.Duration) error { + // Retrieve a list of WAL buckets. + var partitionIDs []uint8 + if err := e.db.View(func(tx *bolt.Tx) error { + return tx.Bucket([]byte("wal")).ForEach(func(key, _ []byte) error { + partitionIDs = append(partitionIDs, uint8(key[0])) + return nil + }) + }); err != nil { + return err + } + + // Continue flushing until there are no more partition buckets. + for _, partitionID := range partitionIDs { + if err := e.FlushPartition(partitionID); err != nil { + return fmt.Errorf("flush partition: id=%d, err=%s", partitionID, err) + } + + // Wait momentarily so other threads can process. + time.Sleep(partitionFlushDelay) + } + + e.mu.Lock() + defer e.mu.Unlock() + + // Reset WAL size. + e.walSize = 0 + + // Reset the timer. + e.flushTimer.Reset(e.WALFlushInterval) + + return nil +} + +// FlushPartition flushes a single WAL partition. +func (e *Engine) FlushPartition(partitionID uint8) error { + e.mu.Lock() + defer e.mu.Unlock() + + startTime := time.Now() + + var pointN int + if err := e.db.Update(func(tx *bolt.Tx) error { + // Retrieve partition bucket. Exit if it doesn't exist. + pb := tx.Bucket([]byte("wal")).Bucket([]byte{byte(partitionID)}) + if pb == nil { + return ErrWALPartitionNotFound + } + + // Iterate over keys in the WAL partition bucket. + c := pb.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + key, timestamp, data := unmarshalWALEntry(v) + + // Create bucket for entry. + b, err := tx.CreateBucketIfNotExists(key) + if err != nil { + return fmt.Errorf("create bucket: %s", err) + } + + // Write point to bucket. + if err := b.Put(u64tob(uint64(timestamp)), data); err != nil { + return fmt.Errorf("put: %s", err) + } + + // Remove entry in the WAL. + if err := c.Delete(); err != nil { + return fmt.Errorf("delete: %s", err) + } + + pointN++ + } + + return nil + }); err != nil { + return err + } + + // Reset cache. + e.cache[partitionID] = make(map[string][][]byte) + + if pointN > 0 { + e.logger.Printf("flush %d points in %.3fs", pointN, time.Since(startTime).Seconds()) + } + + return nil +} + +// autoflusher waits for notification of a flush and kicks it off in the background. +// This method runs in a separate goroutine. +func (e *Engine) autoflusher(closing chan struct{}) { + defer e.wg.Done() + + for { + // Wait for close or flush signal. + select { + case <-closing: + return + case <-e.flushTimer.C: + if err := e.Flush(e.WALPartitionFlushDelay); err != nil { + e.logger.Printf("flush error: %s", err) + } + case <-e.flush: + if err := e.Flush(e.WALPartitionFlushDelay); err != nil { + e.logger.Printf("flush error: %s", err) + } + } + } +} + +// triggerAutoFlush signals that a flush should occur if the size is above the threshold. +// This function must be called within the context of a lock. +func (e *Engine) triggerAutoFlush() { + // Ignore if we haven't reached the threshold. + if e.walSize < e.MaxWALSize { + return + } + + // Otherwise send a non-blocking signal. + select { + case e.flush <- struct{}{}: + default: + } +} + +// SeriesCount returns the number of series buckets on the shard. +// This does not include a count from the WAL. +func (e *Engine) SeriesCount() (n int, err error) { + err = e.db.View(func(tx *bolt.Tx) error { + return tx.ForEach(func(_ []byte, _ *bolt.Bucket) error { + n++ + return nil + }) + }) + + // Remove top-level buckets. + n -= topLevelBucketN + + return +} + +// Begin starts a new transaction on the engine. +func (e *Engine) Begin(writable bool) (tsdb.Tx, error) { + tx, err := e.db.Begin(writable) + if err != nil { + return nil, err + } + return &Tx{Tx: tx, engine: e}, nil +} + +// DB returns the underlying Bolt database. +func (e *Engine) DB() *bolt.DB { return e.db } + +// Tx represents a transaction. +type Tx struct { + *bolt.Tx + engine *Engine +} + +// Cursor returns an iterator for a key. +func (tx *Tx) Cursor(key string) tsdb.Cursor { + // Retrieve key bucket. + b := tx.Bucket([]byte(key)) + + tx.engine.mu.RLock() + defer tx.engine.mu.RUnlock() + + // Ignore if there is no bucket or points in the cache. + partitionID := WALPartition([]byte(key)) + if b == nil && len(tx.engine.cache[partitionID][key]) == 0 { + return nil + } + + // Retrieve a copy of the in-cache points for the key. + cache := make([][]byte, len(tx.engine.cache[partitionID][key])) + copy(cache, tx.engine.cache[partitionID][key]) + + // Build a cursor that merges the bucket and cache together. + cur := &Cursor{cache: cache} + if b != nil { + cur.cursor = b.Cursor() + } + return cur +} + +// Cursor provides ordered iteration across a series. +type Cursor struct { + // Bolt cursor and readahead buffer. + cursor *bolt.Cursor + buf struct { + key, value []byte + } + + // Cache and current cache index. + cache [][]byte + index int + + // Previously read key. + prev []byte +} + +// Seek moves the cursor to a position and returns the closest key/value pair. +func (c *Cursor) Seek(seek []byte) (key, value []byte) { + // Seek bolt cursor. + if c.cursor != nil { + c.buf.key, c.buf.value = c.cursor.Seek(seek) + } + + // Seek cache index. + c.index = sort.Search(len(c.cache), func(i int) bool { + return bytes.Compare(c.cache[i][0:8], seek) != -1 + }) + + c.prev = nil + return c.read() +} + +// Next returns the next key/value pair from the cursor. +func (c *Cursor) Next() (key, value []byte) { + return c.read() +} + +// read returns the next key/value in the cursor buffer or cache. +func (c *Cursor) read() (key, value []byte) { + // Continue skipping ahead through duplicate keys in the cache list. + for { + // Read next value from the cursor. + if c.buf.key == nil && c.cursor != nil { + c.buf.key, c.buf.value = c.cursor.Next() + } + + // Read from the buffer or cache, which ever is lower. + if c.buf.key != nil && (c.index >= len(c.cache) || bytes.Compare(c.buf.key, c.cache[c.index][0:8]) == -1) { + key, value = c.buf.key, c.buf.value + c.buf.key, c.buf.value = nil, nil + } else if c.index < len(c.cache) { + key, value = c.cache[c.index][0:8], c.cache[c.index][8:] + c.index++ + } else { + key, value = nil, nil + } + + // Exit loop if we're at the end of the cache or the next key is different. + if key == nil || !bytes.Equal(key, c.prev) { + break + } + } + + c.prev = key + return +} + +// WALPartitionN is the number of partitions in the write ahead log. +const WALPartitionN = 8 + +// WALPartition returns the partition number that key belongs to. +func WALPartition(key []byte) uint8 { + h := fnv.New64a() + h.Write(key) + return uint8(h.Sum64() % WALPartitionN) +} + +// marshalWALEntry encodes point data into a single byte slice. +// +// The format of the byte slice is: +// +// uint64 timestamp +// uint32 key length +// []byte key +// []byte data +// +func marshalWALEntry(key []byte, timestamp int64, data []byte) []byte { + v := make([]byte, 8+4, 8+4+len(key)+len(data)) + binary.BigEndian.PutUint64(v[0:8], uint64(timestamp)) + binary.BigEndian.PutUint32(v[8:12], uint32(len(key))) + v = append(v, key...) + v = append(v, data...) + return v +} + +// unmarshalWALEntry decodes a WAL entry into it's separate parts. +// Returned byte slices point to the original slice. +func unmarshalWALEntry(v []byte) (key []byte, timestamp int64, data []byte) { + keyLen := binary.BigEndian.Uint32(v[8:12]) + key = v[12 : 12+keyLen] + timestamp = int64(binary.BigEndian.Uint64(v[0:8])) + data = v[12+keyLen:] + return +} + +// marshalCacheEntry encodes the timestamp and data to a single byte slice. +// +// The format of the byte slice is: +// +// uint64 timestamp +// []byte data +// +func marshalCacheEntry(timestamp int64, data []byte) []byte { + buf := make([]byte, 8, 8+len(data)) + binary.BigEndian.PutUint64(buf[0:8], uint64(timestamp)) + return append(buf, data...) +} + +// unmarshalCacheEntry returns the timestamp and data from an encoded byte slice. +func unmarshalCacheEntry(buf []byte) (timestamp int64, data []byte) { + timestamp = int64(binary.BigEndian.Uint64(buf[0:8])) + data = buf[8:] + return +} + +// u64tob converts a uint64 into an 8-byte slice. +func u64tob(v uint64) []byte { + b := make([]byte, 8) + binary.BigEndian.PutUint64(b, v) + return b +} + +// byteSlices represents a sortable slice of byte slices. +type byteSlices [][]byte + +func (a byteSlices) Len() int { return len(a) } +func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 } +func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/b1/b1_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/b1/b1_test.go new file mode 100644 index 000000000..ee1009dbd --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/b1/b1_test.go @@ -0,0 +1,134 @@ +package b1_test + +import ( + "bytes" + "encoding/binary" + "io/ioutil" + "os" + "testing" + "time" + + "github.com/influxdb/influxdb/influxql" + "github.com/influxdb/influxdb/tsdb" + "github.com/influxdb/influxdb/tsdb/engine/b1" +) + +// Ensure points can be written to the engine and queried. +func TestEngine_WritePoints(t *testing.T) { + e := OpenDefaultEngine() + defer e.Close() + + // Create metadata. + mf := &tsdb.MeasurementFields{Fields: make(map[string]*tsdb.Field)} + mf.CreateFieldIfNotExists("value", influxql.Float) + seriesToCreate := []*tsdb.SeriesCreate{ + {Series: &tsdb.Series{Key: string(tsdb.MakeKey([]byte("temperature"), nil))}}, + } + + // Parse point. + points, err := tsdb.ParsePointsWithPrecision([]byte("temperature value=100 1434059627"), time.Now().UTC(), "s") + if err != nil { + t.Fatal(err) + } else if data, err := mf.Codec.EncodeFields(points[0].Fields()); err != nil { + t.Fatal(err) + } else { + points[0].SetData(data) + } + + // Write original value. + if err := e.WritePoints(points, map[string]*tsdb.MeasurementFields{"temperature": mf}, seriesToCreate); err != nil { + t.Fatal(err) + } + + // Flush to disk. + if err := e.Flush(0); err != nil { + t.Fatal(err) + } + + // Parse new point. + points, err = tsdb.ParsePointsWithPrecision([]byte("temperature value=200 1434059627"), time.Now().UTC(), "s") + if err != nil { + t.Fatal(err) + } else if data, err := mf.Codec.EncodeFields(points[0].Fields()); err != nil { + t.Fatal(err) + } else { + points[0].SetData(data) + } + + // Update existing value. + if err := e.WritePoints(points, nil, nil); err != nil { + t.Fatal(err) + } + + // Ensure only the updated value is read. + tx := e.MustBegin(false) + defer tx.Rollback() + + c := tx.Cursor("temperature") + if k, v := c.Seek([]byte{0}); !bytes.Equal(k, u64tob(uint64(time.Unix(1434059627, 0).UnixNano()))) { + t.Fatalf("unexpected key: %#v", k) + } else if m, err := mf.Codec.DecodeFieldsWithNames(v); err != nil { + t.Fatal(err) + } else if m["value"] != float64(200) { + t.Errorf("unexpected value: %#v", m) + } + + if k, v := c.Next(); k != nil { + t.Fatalf("unexpected key/value: %#v / %#v", k, v) + } +} + +// Engine represents a test wrapper for b1.Engine. +type Engine struct { + *b1.Engine +} + +// NewEngine returns a new instance of Engine. +func NewEngine(opt tsdb.EngineOptions) *Engine { + // Generate temporary file. + f, _ := ioutil.TempFile("", "b1-") + f.Close() + os.Remove(f.Name()) + + return &Engine{ + Engine: b1.NewEngine(f.Name(), opt).(*b1.Engine), + } +} + +// OpenEngine returns an opened instance of Engine. Panic on error. +func OpenEngine(opt tsdb.EngineOptions) *Engine { + e := NewEngine(opt) + if err := e.Open(); err != nil { + panic(err) + } + return e +} + +// OpenDefaultEngine returns an open Engine with default options. +func OpenDefaultEngine() *Engine { return OpenEngine(tsdb.NewEngineOptions()) } + +// Close closes the engine and removes all data. +func (e *Engine) Close() error { + e.Engine.Close() + os.RemoveAll(e.Path()) + return nil +} + +// MustBegin returns a new tranaction. Panic on error. +func (e *Engine) MustBegin(writable bool) tsdb.Tx { + tx, err := e.Begin(writable) + if err != nil { + panic(err) + } + return tx +} + +func u64tob(v uint64) []byte { + b := make([]byte, 8) + binary.BigEndian.PutUint64(b, v) + return b +} + +func btou64(b []byte) uint64 { + return binary.BigEndian.Uint64(b) +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/bz1/bz1.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/bz1/bz1.go new file mode 100644 index 000000000..6ec3f8c8c --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/bz1/bz1.go @@ -0,0 +1,627 @@ +package bz1 + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "log" + "math" + "sort" + "sync" + "time" + + "github.com/boltdb/bolt" + "github.com/golang/snappy" + "github.com/influxdb/influxdb/tsdb" +) + +var ( + // ErrSeriesExists is returned when writing points to an existing series. + ErrSeriesExists = errors.New("series exists") +) + +// Format is the file format name of this engine. +const Format = "bz1" + +func init() { + tsdb.RegisterEngine(Format, NewEngine) +} + +const ( + // DefaultBlockSize is the default size of uncompressed points blocks. + DefaultBlockSize = 32 * 1024 // 32KB +) + +// Ensure Engine implements the interface. +var _ tsdb.Engine = &Engine{} + +// Engine represents a storage engine with compressed blocks. +type Engine struct { + mu sync.Mutex + path string + db *bolt.DB + + // Write-ahead log storage. + PointsWriter interface { + WritePoints(points []tsdb.Point) error + } + + // Size of uncompressed points to write to a block. + BlockSize int +} + +// NewEngine returns a new instance of Engine. +func NewEngine(path string, opt tsdb.EngineOptions) tsdb.Engine { + return &Engine{ + path: path, + + BlockSize: DefaultBlockSize, + } +} + +// Path returns the path the engine was opened with. +func (e *Engine) Path() string { return e.path } + +// Open opens and initializes the engine. +func (e *Engine) Open() error { + if err := func() error { + e.mu.Lock() + defer e.mu.Unlock() + + // Open underlying storage. + db, err := bolt.Open(e.path, 0666, &bolt.Options{Timeout: 1 * time.Second}) + if err != nil { + return err + } + e.db = db + + // Initialize data file. + if err := e.db.Update(func(tx *bolt.Tx) error { + _, _ = tx.CreateBucketIfNotExists([]byte("series")) + _, _ = tx.CreateBucketIfNotExists([]byte("fields")) + _, _ = tx.CreateBucketIfNotExists([]byte("points")) + + // Set file format, if not set yet. + b, _ := tx.CreateBucketIfNotExists([]byte("meta")) + if v := b.Get([]byte("format")); v == nil { + if err := b.Put([]byte("format"), []byte(Format)); err != nil { + return fmt.Errorf("set format: %s", err) + } + } + + return nil + }); err != nil { + return fmt.Errorf("init: %s", err) + } + + return nil + }(); err != nil { + e.close() + return err + } + return nil +} + +// Close closes the engine. +func (e *Engine) Close() error { + e.mu.Lock() + defer e.mu.Unlock() + return e.close() +} + +func (e *Engine) close() error { + if e.db != nil { + return e.db.Close() + } + return nil +} + +// SetLogOutput is a no-op. +func (e *Engine) SetLogOutput(w io.Writer) {} + +// LoadMetadataIndex loads the shard metadata into memory. +func (e *Engine) LoadMetadataIndex(index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error { + return e.db.View(func(tx *bolt.Tx) error { + // Load measurement metadata + meta := tx.Bucket([]byte("fields")) + c := meta.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + m := index.CreateMeasurementIndexIfNotExists(string(k)) + mf := &tsdb.MeasurementFields{} + if err := mf.UnmarshalBinary(v); err != nil { + return err + } + for name, _ := range mf.Fields { + m.SetFieldName(name) + } + mf.Codec = tsdb.NewFieldCodec(mf.Fields) + measurementFields[m.Name] = mf + } + + // Load series metadata + meta = tx.Bucket([]byte("series")) + c = meta.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + series := &tsdb.Series{} + if err := series.UnmarshalBinary(v); err != nil { + return err + } + index.CreateSeriesIndexIfNotExists(tsdb.MeasurementFromSeriesKey(string(k)), series) + } + return nil + }) +} + +// WritePoints writes metadata and point data into the engine. +// Returns an error if new points are added to an existing key. +func (e *Engine) WritePoints(points []tsdb.Point, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) error { + // Write series & field metadata. + if err := e.db.Update(func(tx *bolt.Tx) error { + if err := e.writeSeries(tx, seriesToCreate); err != nil { + return fmt.Errorf("write series: %s", err) + } + if err := e.writeFields(tx, measurementFieldsToSave); err != nil { + return fmt.Errorf("write fields: %s", err) + } + + return nil + }); err != nil { + return err + } + + // Write points to the WAL. + if err := e.PointsWriter.WritePoints(points); err != nil { + return fmt.Errorf("write points: %s", err) + } + + return nil +} + +// writeSeries writes a list of series to the metadata. +func (e *Engine) writeSeries(tx *bolt.Tx, a []*tsdb.SeriesCreate) error { + // Ignore if there are no series. + if len(a) == 0 { + return nil + } + + // Marshal and insert each series into the metadata. + b := tx.Bucket([]byte("series")) + for _, sc := range a { + // Marshal series into bytes. + data, err := sc.Series.MarshalBinary() + if err != nil { + return fmt.Errorf("marshal series: %s", err) + } + + // Insert marshaled data into appropriate key. + if err := b.Put([]byte(sc.Series.Key), data); err != nil { + return fmt.Errorf("put: %s", err) + } + } + + return nil +} + +// writeFields writes a list of measurement fields to the metadata. +func (e *Engine) writeFields(tx *bolt.Tx, m map[string]*tsdb.MeasurementFields) error { + // Ignore if there are no fields to save. + if len(m) == 0 { + return nil + } + + // Persist each measurement field in the map. + b := tx.Bucket([]byte("fields")) + for k, f := range m { + // Marshal field into bytes. + data, err := f.MarshalBinary() + if err != nil { + return fmt.Errorf("marshal measurement field: %s", err) + } + + // Insert marshaled data into key. + if err := b.Put([]byte(k), data); err != nil { + return fmt.Errorf("put: %s", err) + } + } + + return nil +} + +// WriteIndex writes marshaled points to the engine's underlying index. +func (e *Engine) WriteIndex(pointsByKey map[string][][]byte) error { + return e.db.Update(func(tx *bolt.Tx) error { + for key, values := range pointsByKey { + if err := e.writeIndex(tx, key, values); err != nil { + return fmt.Errorf("write: key=%x, err=%s", key, err) + } + } + return nil + }) +} + +// writeIndex writes a set of points for a single key. +func (e *Engine) writeIndex(tx *bolt.Tx, key string, a [][]byte) error { + // Ignore if there are no points. + if len(a) == 0 { + return nil + } + + // Create or retrieve series bucket. + bkt, err := tx.Bucket([]byte("points")).CreateBucketIfNotExists([]byte(key)) + if err != nil { + return fmt.Errorf("create series bucket: %s", err) + } + c := bkt.Cursor() + + // Ensure the slice is sorted before retrieving the time range. + a = DedupeEntries(a) + sort.Sort(byteSlices(a)) + + // Determine time range of new data. + tmin, tmax := int64(btou64(a[0][0:8])), int64(btou64(a[len(a)-1][0:8])) + + // If tmin is after the last block then append new blocks. + // + // This is the optimized fast path. Otherwise we need to merge the points + // with existing blocks on disk and rewrite all the blocks for that range. + if k, v := c.Last(); k == nil || int64(btou64(v[0:8])) < tmin { + if err := e.writeBlocks(bkt, a); err != nil { + return fmt.Errorf("append blocks: %s", err) + } + } + + // Generate map of inserted keys. + m := make(map[int64]struct{}) + for _, b := range a { + m[int64(btou64(b[0:8]))] = struct{}{} + } + + // If time range overlaps existing blocks then unpack full range and reinsert. + var existing [][]byte + for k, v := c.First(); k != nil; k, v = c.Next() { + // Determine block range. + bmin, bmax := int64(btou64(k)), int64(btou64(v[0:8])) + + // Skip over all blocks before the time range. + // Exit once we reach a block that is beyond our time range. + if bmax < tmin { + continue + } else if bmin > tmax { + break + } + + // Decode block. + buf, err := snappy.Decode(nil, v[8:]) + if err != nil { + return fmt.Errorf("decode block: %s", err) + } + + // Copy out any entries that aren't being overwritten. + for _, entry := range SplitEntries(buf) { + if _, ok := m[int64(btou64(entry[0:8]))]; !ok { + existing = append(existing, entry) + } + } + + // Delete block in database. + c.Delete() + } + + // Merge entries before rewriting. + a = append(existing, a...) + sort.Sort(byteSlices(a)) + + // Rewrite points to new blocks. + if err := e.writeBlocks(bkt, a); err != nil { + return fmt.Errorf("rewrite blocks: %s", err) + } + + return nil +} + +// writeBlocks writes point data to the bucket in blocks. +func (e *Engine) writeBlocks(bkt *bolt.Bucket, a [][]byte) error { + var block []byte + + // Dedupe points by key. + a = DedupeEntries(a) + + // Group points into blocks by size. + tmin, tmax := int64(math.MaxInt64), int64(math.MinInt64) + for i, p := range a { + // Update block time range. + timestamp := int64(btou64(p[0:8])) + if timestamp < tmin { + tmin = timestamp + } + if timestamp > tmax { + tmax = timestamp + } + + // Append point to the end of the block. + block = append(block, p...) + + // If the block is larger than the target block size or this is the + // last point then flush the block to the bucket. + if len(block) >= e.BlockSize || i == len(a)-1 { + // Encode block in the following format: + // tmax int64 + // data []byte (snappy compressed) + value := append(u64tob(uint64(tmax)), snappy.Encode(nil, block)...) + + // Write block to the bucket. + if err := bkt.Put(u64tob(uint64(tmin)), value); err != nil { + return fmt.Errorf("put: ts=%d-%d, err=%s", tmin, tmax, err) + } + + // Reset the block & time range. + block = nil + tmin, tmax = int64(math.MaxInt64), int64(math.MinInt64) + } + } + + return nil +} + +// DeleteSeries deletes the series from the engine. +func (e *Engine) DeleteSeries(keys []string) error { + return e.db.Update(func(tx *bolt.Tx) error { + for _, k := range keys { + if err := tx.Bucket([]byte("series")).Delete([]byte(k)); err != nil { + return fmt.Errorf("delete series metadata: %s", err) + } + if err := tx.Bucket([]byte("points")).DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound { + return fmt.Errorf("delete series data: %s", err) + } + } + return nil + }) +} + +// DeleteMeasurement deletes a measurement and all related series. +func (e *Engine) DeleteMeasurement(name string, seriesKeys []string) error { + return e.db.Update(func(tx *bolt.Tx) error { + if err := tx.Bucket([]byte("fields")).Delete([]byte(name)); err != nil { + return err + } + + for _, k := range seriesKeys { + if err := tx.Bucket([]byte("series")).Delete([]byte(k)); err != nil { + return fmt.Errorf("delete series metadata: %s", err) + } + if err := tx.Bucket([]byte("points")).DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound { + return fmt.Errorf("delete series data: %s", err) + } + } + + return nil + }) +} + +// SeriesCount returns the number of series buckets on the shard. +func (e *Engine) SeriesCount() (n int, err error) { + err = e.db.View(func(tx *bolt.Tx) error { + c := tx.Bucket([]byte("points")).Cursor() + for k, _ := c.First(); k != nil; k, _ = c.Next() { + n++ + } + return nil + }) + return +} + +// Begin starts a new transaction on the engine. +func (e *Engine) Begin(writable bool) (tsdb.Tx, error) { + tx, err := e.db.Begin(writable) + if err != nil { + return nil, err + } + return &Tx{Tx: tx, engine: e}, nil +} + +// Stats returns internal statistics for the engine. +func (e *Engine) Stats() (stats Stats, err error) { + err = e.db.View(func(tx *bolt.Tx) error { + stats.Size = tx.Size() + return nil + }) + return stats, err +} + +// Stats represents internal engine statistics. +type Stats struct { + Size int64 // BoltDB data size +} + +// Tx represents a transaction. +type Tx struct { + *bolt.Tx + engine *Engine +} + +// Cursor returns an iterator for a key. +func (tx *Tx) Cursor(key string) tsdb.Cursor { + // Retrieve points bucket. Ignore if there is no bucket. + b := tx.Bucket([]byte("points")).Bucket([]byte(key)) + if b == nil { + return nil + } + return &Cursor{ + cursor: b.Cursor(), + buf: make([]byte, DefaultBlockSize), + } +} + +// Cursor provides ordered iteration across a series. +type Cursor struct { + cursor *bolt.Cursor + buf []byte // uncompressed buffer + off int // buffer offset +} + +// Seek moves the cursor to a position and returns the closest key/value pair. +func (c *Cursor) Seek(seek []byte) (key, value []byte) { + // Move cursor to appropriate block and set to buffer. + _, v := c.cursor.Seek(seek) + c.setBuf(v) + + // Read current block up to seek position. + c.seekBuf(seek) + + // Return current entry. + return c.read() +} + +// seekBuf moves the cursor to a position within the current buffer. +func (c *Cursor) seekBuf(seek []byte) (key, value []byte) { + for { + // Slice off the current entry. + buf := c.buf[c.off:] + + // Exit if current entry's timestamp is on or after the seek. + if len(buf) == 0 || bytes.Compare(buf[0:8], seek) != -1 { + return + } + + // Otherwise skip ahead to the next entry. + c.off += entryHeaderSize + entryDataSize(buf) + } +} + +// Next returns the next key/value pair from the cursor. +func (c *Cursor) Next() (key, value []byte) { + // Ignore if there is no buffer. + if len(c.buf) == 0 { + return nil, nil + } + + // Move forward to next entry. + c.off += entryHeaderSize + entryDataSize(c.buf[c.off:]) + + // If no items left then read first item from next block. + if c.off >= len(c.buf) { + _, v := c.cursor.Next() + c.setBuf(v) + } + + return c.read() +} + +// setBuf saves a compressed block to the buffer. +func (c *Cursor) setBuf(block []byte) { + // Clear if the block is empty. + if len(block) == 0 { + c.buf, c.off = c.buf[0:0], 0 + return + } + + // Otherwise decode block into buffer. + // Skip over the first 8 bytes since they are the max timestamp. + buf, err := snappy.Decode(nil, block[8:]) + if err != nil { + c.buf = c.buf[0:0] + log.Printf("block decode error: %s", err) + } + c.buf, c.off = buf, 0 +} + +// read reads the current key and value from the current block. +func (c *Cursor) read() (key, value []byte) { + // Return nil if the offset is at the end of the buffer. + if c.off >= len(c.buf) { + return nil, nil + } + + // Otherwise read the current entry. + buf := c.buf[c.off:] + dataSize := entryDataSize(buf) + return buf[0:8], buf[entryHeaderSize : entryHeaderSize+dataSize] +} + +// MarshalEntry encodes point data into a single byte slice. +// +// The format of the byte slice is: +// +// uint64 timestamp +// uint32 data length +// []byte data +// +func MarshalEntry(timestamp int64, data []byte) []byte { + v := make([]byte, 8+4, 8+4+len(data)) + binary.BigEndian.PutUint64(v[0:8], uint64(timestamp)) + binary.BigEndian.PutUint32(v[8:12], uint32(len(data))) + v = append(v, data...) + return v +} + +// UnmarshalEntry decodes an entry into it's separate parts. +// Returns the timestamp, data and the number of bytes read. +// Returned byte slices point to the original slice. +func UnmarshalEntry(v []byte) (timestamp int64, data []byte, n int) { + timestamp = int64(binary.BigEndian.Uint64(v[0:8])) + dataLen := binary.BigEndian.Uint32(v[8:12]) + data = v[12+dataLen:] + return timestamp, data, 12 + int(dataLen) +} + +// SplitEntries returns a slice of individual entries from one continuous set. +func SplitEntries(b []byte) [][]byte { + var a [][]byte + for { + // Exit if there's no more data left. + if len(b) == 0 { + return a + } + + // Create slice that points to underlying entry. + dataSize := entryDataSize(b) + a = append(a, b[0:entryHeaderSize+dataSize]) + + // Move buffer forward. + b = b[entryHeaderSize+dataSize:] + } +} + +// DedupeEntries returns slices with unique keys (the first 8 bytes). +func DedupeEntries(a [][]byte) [][]byte { + // Convert to a map where the last slice is used. + m := make(map[string][]byte) + for _, b := range a { + m[string(b[0:8])] = b + } + + // Convert map back to a slice of byte slices. + other := make([][]byte, 0, len(m)) + for _, v := range m { + other = append(other, v) + } + + // Sort entries. + sort.Sort(byteSlices(other)) + + return other +} + +// entryHeaderSize is the number of bytes required for the header. +const entryHeaderSize = 8 + 4 + +// entryDataSize returns the size of an entry's data field, in bytes. +func entryDataSize(v []byte) int { return int(binary.BigEndian.Uint32(v[8:12])) } + +// u64tob converts a uint64 into an 8-byte slice. +func u64tob(v uint64) []byte { + b := make([]byte, 8) + binary.BigEndian.PutUint64(b, v) + return b +} + +// btou64 converts an 8-byte slice into an uint64. +func btou64(b []byte) uint64 { return binary.BigEndian.Uint64(b) } + +type byteSlices [][]byte + +func (a byteSlices) Len() int { return len(a) } +func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/bz1/bz1_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/bz1/bz1_test.go new file mode 100644 index 000000000..4354a3ee8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/bz1/bz1_test.go @@ -0,0 +1,439 @@ +package bz1_test + +import ( + "bytes" + "encoding/binary" + "errors" + "io/ioutil" + "math/rand" + "os" + "reflect" + "sort" + "strconv" + "testing" + "testing/quick" + "time" + + "github.com/influxdb/influxdb/tsdb" + "github.com/influxdb/influxdb/tsdb/engine/bz1" +) + +// Ensure the engine can write series metadata and reload it. +func TestEngine_LoadMetadataIndex_Series(t *testing.T) { + e := OpenDefaultEngine() + defer e.Close() + + // Setup nop mock. + e.PointsWriter.WritePointsFn = func(a []tsdb.Point) error { return nil } + + // Write series metadata. + if err := e.WritePoints(nil, nil, []*tsdb.SeriesCreate{ + {Series: &tsdb.Series{Key: string(tsdb.MakeKey([]byte("cpu"), map[string]string{"host": "server0"})), Tags: map[string]string{"host": "server0"}}}, + {Series: &tsdb.Series{Key: string(tsdb.MakeKey([]byte("cpu"), map[string]string{"host": "server1"})), Tags: map[string]string{"host": "server1"}}}, + {Series: &tsdb.Series{Key: "series with spaces"}}, + }); err != nil { + t.Fatal(err) + } + + // Load metadata index. + index := tsdb.NewDatabaseIndex() + if err := e.LoadMetadataIndex(index, make(map[string]*tsdb.MeasurementFields)); err != nil { + t.Fatal(err) + } + + // Verify index is correct. + if m := index.Measurement("cpu"); m == nil { + t.Fatal("measurement not found") + } else if s := m.SeriesByID(1); s.Key != "cpu,host=server0" || !reflect.DeepEqual(s.Tags, map[string]string{"host": "server0"}) { + t.Fatalf("unexpected series: %q / %#v", s.Key, s.Tags) + } else if s = m.SeriesByID(2); s.Key != "cpu,host=server1" || !reflect.DeepEqual(s.Tags, map[string]string{"host": "server1"}) { + t.Fatalf("unexpected series: %q / %#v", s.Key, s.Tags) + } + + if m := index.Measurement("series with spaces"); m == nil { + t.Fatal("measurement not found") + } else if s := m.SeriesByID(3); s.Key != "series with spaces" { + t.Fatalf("unexpected series: %q", s.Key) + } +} + +// Ensure the engine can write field metadata and reload it. +func TestEngine_LoadMetadataIndex_Fields(t *testing.T) { + e := OpenDefaultEngine() + defer e.Close() + + // Setup nop mock. + e.PointsWriter.WritePointsFn = func(a []tsdb.Point) error { return nil } + + // Write series metadata. + if err := e.WritePoints(nil, map[string]*tsdb.MeasurementFields{ + "cpu": &tsdb.MeasurementFields{ + Fields: map[string]*tsdb.Field{ + "value": &tsdb.Field{ID: 0, Name: "value"}, + }, + }, + }, nil); err != nil { + t.Fatal(err) + } + + // Load metadata index. + mfs := make(map[string]*tsdb.MeasurementFields) + if err := e.LoadMetadataIndex(tsdb.NewDatabaseIndex(), mfs); err != nil { + t.Fatal(err) + } + + // Verify measurement field is correct. + if mf := mfs["cpu"]; mf == nil { + t.Fatal("measurement fields not found") + } else if !reflect.DeepEqual(mf.Fields, map[string]*tsdb.Field{"value": &tsdb.Field{ID: 0, Name: "value"}}) { + t.Fatalf("unexpected fields: %#v", mf.Fields) + } +} + +// Ensure the engine can write points to storage. +func TestEngine_WritePoints_PointsWriter(t *testing.T) { + e := OpenDefaultEngine() + defer e.Close() + + // Points to be inserted. + points := []tsdb.Point{ + tsdb.NewPoint("cpu", tsdb.Tags{}, tsdb.Fields{}, time.Unix(0, 1)), + tsdb.NewPoint("cpu", tsdb.Tags{}, tsdb.Fields{}, time.Unix(0, 0)), + tsdb.NewPoint("cpu", tsdb.Tags{}, tsdb.Fields{}, time.Unix(1, 0)), + + tsdb.NewPoint("cpu", tsdb.Tags{"host": "serverA"}, tsdb.Fields{}, time.Unix(0, 0)), + } + + // Mock points writer to ensure points are passed through. + var invoked bool + e.PointsWriter.WritePointsFn = func(a []tsdb.Point) error { + invoked = true + if !reflect.DeepEqual(points, a) { + t.Fatalf("unexpected points: %#v", a) + } + return nil + } + + // Write points against two separate series. + if err := e.WritePoints(points, nil, nil); err != nil { + t.Fatal(err) + } else if !invoked { + t.Fatal("PointsWriter.WritePoints() not called") + } +} + +// Ensure the engine can return errors from the points writer. +func TestEngine_WritePoints_ErrPointsWriter(t *testing.T) { + e := OpenDefaultEngine() + defer e.Close() + + // Ensure points writer returns an error. + e.PointsWriter.WritePointsFn = func(a []tsdb.Point) error { return errors.New("marker") } + + // Write to engine. + if err := e.WritePoints(nil, nil, nil); err == nil || err.Error() != `write points: marker` { + t.Fatal(err) + } +} + +// Ensure the engine can write points to the index. +func TestEngine_WriteIndex_Append(t *testing.T) { + e := OpenDefaultEngine() + defer e.Close() + + // Append points to index. + if err := e.WriteIndex(map[string][][]byte{ + "cpu": [][]byte{ + bz1.MarshalEntry(1, []byte{0x10}), + bz1.MarshalEntry(2, []byte{0x20}), + }, + "mem": [][]byte{ + bz1.MarshalEntry(0, []byte{0x30}), + }, + }); err != nil { + t.Fatal(err) + } + + // Start transaction. + tx := e.MustBegin(false) + defer tx.Rollback() + + // Iterate over "cpu" series. + c := tx.Cursor("cpu") + if k, v := c.Seek(u64tob(0)); !reflect.DeepEqual(k, []byte{0, 0, 0, 0, 0, 0, 0, 1}) || !reflect.DeepEqual(v, []byte{0x10}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = c.Next(); !reflect.DeepEqual(k, []byte{0, 0, 0, 0, 0, 0, 0, 2}) || !reflect.DeepEqual(v, []byte{0x20}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, _ = c.Next(); k != nil { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } + + // Iterate over "mem" series. + c = tx.Cursor("mem") + if k, v := c.Seek(u64tob(0)); !reflect.DeepEqual(k, []byte{0, 0, 0, 0, 0, 0, 0, 0}) || !reflect.DeepEqual(v, []byte{0x30}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, _ = c.Next(); k != nil { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } +} + +// Ensure the engine can rewrite blocks that contain the new point range. +func TestEngine_WriteIndex_Insert(t *testing.T) { + e := OpenDefaultEngine() + defer e.Close() + + // Write initial points to index. + if err := e.WriteIndex(map[string][][]byte{ + "cpu": [][]byte{ + bz1.MarshalEntry(10, []byte{0x10}), + bz1.MarshalEntry(20, []byte{0x20}), + bz1.MarshalEntry(30, []byte{0x30}), + }, + }); err != nil { + t.Fatal(err) + } + + // Write overlapping points to index. + if err := e.WriteIndex(map[string][][]byte{ + "cpu": [][]byte{ + bz1.MarshalEntry(9, []byte{0x09}), + bz1.MarshalEntry(10, []byte{0xFF}), + bz1.MarshalEntry(25, []byte{0x25}), + bz1.MarshalEntry(31, []byte{0x31}), + }, + }); err != nil { + t.Fatal(err) + } + + // Write overlapping points to index again. + if err := e.WriteIndex(map[string][][]byte{ + "cpu": [][]byte{ + bz1.MarshalEntry(31, []byte{0xFF}), + }, + }); err != nil { + t.Fatal(err) + } + + // Start transaction. + tx := e.MustBegin(false) + defer tx.Rollback() + + // Iterate over "cpu" series. + c := tx.Cursor("cpu") + if k, v := c.Seek(u64tob(0)); btou64(k) != 9 || !bytes.Equal(v, []byte{0x09}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = c.Next(); btou64(k) != 10 || !bytes.Equal(v, []byte{0xFF}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = c.Next(); btou64(k) != 20 || !bytes.Equal(v, []byte{0x20}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = c.Next(); btou64(k) != 25 || !bytes.Equal(v, []byte{0x25}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = c.Next(); btou64(k) != 30 || !bytes.Equal(v, []byte{0x30}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } else if k, v = c.Next(); btou64(k) != 31 || !bytes.Equal(v, []byte{0xFF}) { + t.Fatalf("unexpected key/value: %x / %x", k, v) + } +} + +// Ensure the engine ignores writes without keys. +func TestEngine_WriteIndex_NoKeys(t *testing.T) { + e := OpenDefaultEngine() + defer e.Close() + if err := e.WriteIndex(nil); err != nil { + t.Fatal(err) + } +} + +// Ensure the engine ignores writes without points in a key. +func TestEngine_WriteIndex_NoPoints(t *testing.T) { + e := OpenDefaultEngine() + defer e.Close() + if err := e.WriteIndex(map[string][][]byte{"cpu": nil}); err != nil { + t.Fatal(err) + } +} + +// Ensure the engine ignores writes without points in a key. +func TestEngine_WriteIndex_Quick(t *testing.T) { + if testing.Short() { + t.Skip("short mode") + } + + quick.Check(func(sets []Points, blockSize int) bool { + e := OpenDefaultEngine() + e.BlockSize = blockSize % 1024 // 1KB max block size + defer e.Close() + + // Write points to index in multiple sets. + for _, set := range sets { + if err := e.WriteIndex(map[string][][]byte(set)); err != nil { + t.Fatal(err) + } + } + + // Merge all points together. + points := MergePoints(sets) + + // Retrieve a sorted list of keys so results are deterministic. + keys := points.Keys() + + // Start transaction to read index. + tx := e.MustBegin(false) + defer tx.Rollback() + + // Iterate over results to ensure they are correct. + for _, key := range keys { + c := tx.Cursor(key) + + // Read list of key/values. + var got [][]byte + for k, v := c.Seek(u64tob(0)); k != nil; k, v = c.Next() { + got = append(got, append(copyBytes(k), v...)) + } + + // Generate expected values. + // We need to remove the data length from the slice. + var exp [][]byte + for _, b := range points[key] { + exp = append(exp, append(copyBytes(b[0:8]), b[12:]...)) // remove data len + } + + if !reflect.DeepEqual(got, exp) { + t.Fatalf("points: block size=%d, key=%s:\n\ngot=%x\n\nexp=%x\n\n", e.BlockSize, key, got, exp) + } + } + + return true + }, nil) +} + +// Engine represents a test wrapper for bz1.Engine. +type Engine struct { + *bz1.Engine + PointsWriter EnginePointsWriter +} + +// NewEngine returns a new instance of Engine. +func NewEngine(opt tsdb.EngineOptions) *Engine { + // Generate temporary file. + f, _ := ioutil.TempFile("", "bz1-") + f.Close() + os.Remove(f.Name()) + + // Create test wrapper and attach mocks. + e := &Engine{ + Engine: bz1.NewEngine(f.Name(), opt).(*bz1.Engine), + } + e.Engine.PointsWriter = &e.PointsWriter + return e +} + +// OpenEngine returns an opened instance of Engine. Panic on error. +func OpenEngine(opt tsdb.EngineOptions) *Engine { + e := NewEngine(opt) + if err := e.Open(); err != nil { + panic(err) + } + return e +} + +// OpenDefaultEngine returns an open Engine with default options. +func OpenDefaultEngine() *Engine { return OpenEngine(tsdb.NewEngineOptions()) } + +// Close closes the engine and removes all data. +func (e *Engine) Close() error { + e.Engine.Close() + os.RemoveAll(e.Path()) + return nil +} + +// MustBegin returns a new tranaction. Panic on error. +func (e *Engine) MustBegin(writable bool) tsdb.Tx { + tx, err := e.Begin(writable) + if err != nil { + panic(err) + } + return tx +} + +// EnginePointsWriter represents a mock that implements Engine.PointsWriter. +type EnginePointsWriter struct { + WritePointsFn func(points []tsdb.Point) error +} + +func (w *EnginePointsWriter) WritePoints(points []tsdb.Point) error { + return w.WritePointsFn(points) +} + +// Points represents a set of encoded points by key. Implements quick.Generator. +type Points map[string][][]byte + +// Keys returns a sorted list of keys. +func (m Points) Keys() []string { + var keys []string + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} + +func (Points) Generate(rand *rand.Rand, size int) reflect.Value { + // Generate series with a random number of points in each. + m := make(map[string][][]byte) + for i, seriesN := 0, rand.Intn(size); i < seriesN; i++ { + key := strconv.Itoa(rand.Intn(20)) + + // Generate points for the series. + for j, pointN := 0, rand.Intn(size); j < pointN; j++ { + timestamp := time.Unix(0, 0).Add(time.Duration(rand.Intn(100))) + data, ok := quick.Value(reflect.TypeOf([]byte(nil)), rand) + if !ok { + panic("cannot generate data") + } + m[key] = append(m[key], bz1.MarshalEntry(timestamp.UnixNano(), data.Interface().([]byte))) + } + } + + return reflect.ValueOf(Points(m)) +} + +// MergePoints returns a map of all points merged together by key. +// Later points will overwrite earlier ones. +func MergePoints(a []Points) Points { + // Combine all points into one set. + m := make(Points) + for _, set := range a { + for key, values := range set { + m[key] = append(m[key], values...) + } + } + + // Dedupe points. + for key, values := range m { + m[key] = bz1.DedupeEntries(values) + } + + return m +} + +// copyBytes returns a copy of a byte slice. +func copyBytes(b []byte) []byte { + if b == nil { + return nil + } + + other := make([]byte, len(b)) + copy(other, b) + return other +} + +// u64tob converts a uint64 into an 8-byte slice. +func u64tob(v uint64) []byte { + b := make([]byte, 8) + binary.BigEndian.PutUint64(b, v) + return b +} + +// btou64 converts an 8-byte slice into an uint64. +func btou64(b []byte) uint64 { return binary.BigEndian.Uint64(b) } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/engine.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/engine.go new file mode 100644 index 000000000..c5565ff06 --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine/engine.go @@ -0,0 +1,6 @@ +package engine + +import ( + _ "github.com/influxdb/influxdb/tsdb/engine/b1" + _ "github.com/influxdb/influxdb/tsdb/engine/bz1" +) diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine_test.go index f86129542..c1c5c090f 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/engine_test.go @@ -1,815 +1,3 @@ -package tsdb +package tsdb_test -import ( - "fmt" - "io/ioutil" - "math" - "os" - "testing" - "time" - - "github.com/influxdb/influxdb/influxql" - "github.com/influxdb/influxdb/meta" -) - -var sID0 = uint64(1) -var sID1 = uint64(2) -var sgID1 = uint64(3) -var sgID2 = uint64(4) -var nID = uint64(42) - -// Simple test to ensure data can be read from two shards. -func TestWritePointsAndExecuteTwoShards(t *testing.T) { - // Create the mock planner and its metastore - store, query_executor := testStoreAndQueryExecutor() - defer os.RemoveAll(store.path) - query_executor.MetaStore = &testQEMetastore{ - sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) { - return []meta.ShardGroupInfo{ - { - ID: sgID, - StartTime: time.Now().Add(-time.Hour), - EndTime: time.Now().Add(time.Hour), - Shards: []meta.ShardInfo{ - { - ID: uint64(sID0), - OwnerIDs: []uint64{nID}, - }, - }, - }, - { - ID: sgID, - StartTime: time.Now().Add(-2 * time.Hour), - EndTime: time.Now().Add(-time.Hour), - Shards: []meta.ShardInfo{ - { - ID: uint64(sID1), - OwnerIDs: []uint64{nID}, - }, - }, - }, - }, nil - }, - } - - // Write two points across shards. - pt1time := time.Unix(1, 0).UTC() - if err := store.WriteToShard(sID0, []Point{NewPoint( - "cpu", - map[string]string{"host": "serverA", "region": "us-east"}, - map[string]interface{}{"value": 100}, - pt1time, - )}); err != nil { - t.Fatalf(err.Error()) - } - pt2time := time.Unix(2, 0).UTC() - if err := store.WriteToShard(sID1, []Point{NewPoint( - "cpu", - map[string]string{"host": "serverB", "region": "us-east"}, - map[string]interface{}{"value": 200}, - pt2time, - )}); err != nil { - t.Fatalf(err.Error()) - } - - var tests = []struct { - skip bool // Skip test - stmt string // Query statement - chunkSize int // Chunk size for driving the executor - expected string // Expected results, rendered as a string - }{ - { - stmt: `SELECT value FROM cpu`, - expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]}]`, - }, - { - stmt: `SELECT value FROM cpu`, - chunkSize: 1, - expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`, - }, - { - stmt: `SELECT value FROM cpu LIMIT 1`, - expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`, - }, - { - stmt: `SELECT value FROM cpu LIMIT 1`, - chunkSize: 2, - expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`, - }, - { - stmt: `SELECT value FROM cpu WHERE host='serverA'`, - expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`, - }, - { - stmt: `SELECT value FROM cpu WHERE host='serverB'`, - expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`, - }, - { - stmt: `SELECT value FROM cpu WHERE host='serverC'`, - expected: `null`, - }, - { - stmt: `SELECT value FROM cpu GROUP BY host`, - expected: `[{"name":"cpu","tags":{"host":"serverA"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","tags":{"host":"serverB"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`, - }, - { - stmt: `SELECT value FROM cpu GROUP BY region`, - expected: `[{"name":"cpu","tags":{"region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]}]`, - }, - { - stmt: `SELECT value FROM cpu GROUP BY host,region`, - expected: `[{"name":"cpu","tags":{"host":"serverA","region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","tags":{"host":"serverB","region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`, - }, - { - stmt: `SELECT value FROM cpu WHERE host='serverA' GROUP BY host`, - expected: `[{"name":"cpu","tags":{"host":"serverA"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`, - }, - - // Aggregate queries. - { - stmt: `SELECT sum(value) FROM cpu`, - expected: `[{"name":"cpu","columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",300]]}]`, - }, - } - - for _, tt := range tests { - if tt.skip { - t.Logf("Skipping test %s", tt.stmt) - continue - } - executor, err := query_executor.plan(mustParseSelectStatement(tt.stmt), tt.chunkSize) - if err != nil { - t.Fatalf("failed to plan query: %s", err.Error()) - } - got := executeAndGetResults(executor) - if got != tt.expected { - t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got) - } - } -} - -// Test that executor correctly orders data across shards. -func TestWritePointsAndExecuteTwoShardsAlign(t *testing.T) { - // Create the mock planner and its metastore - store, query_executor := testStoreAndQueryExecutor() - defer os.RemoveAll(store.path) - query_executor.MetaStore = &testQEMetastore{ - sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) { - return []meta.ShardGroupInfo{ - { - ID: sgID, - StartTime: time.Now().Add(-2 * time.Hour), - EndTime: time.Now().Add(-time.Hour), - Shards: []meta.ShardInfo{ - { - ID: uint64(sID1), - OwnerIDs: []uint64{nID}, - }, - }, - }, - { - ID: sgID, - StartTime: time.Now().Add(-2 * time.Hour), - EndTime: time.Now().Add(time.Hour), - Shards: []meta.ShardInfo{ - { - ID: uint64(sID0), - OwnerIDs: []uint64{nID}, - }, - }, - }, - }, nil - }, - } - - // Write interleaving, by time, chunks to the shards. - if err := store.WriteToShard(sID0, []Point{NewPoint( - "cpu", - map[string]string{"host": "serverA"}, - map[string]interface{}{"value": 100}, - time.Unix(1, 0).UTC(), - )}); err != nil { - t.Fatalf(err.Error()) - } - if err := store.WriteToShard(sID1, []Point{NewPoint( - "cpu", - map[string]string{"host": "serverB"}, - map[string]interface{}{"value": 200}, - time.Unix(2, 0).UTC(), - )}); err != nil { - t.Fatalf(err.Error()) - } - if err := store.WriteToShard(sID1, []Point{NewPoint( - "cpu", - map[string]string{"host": "serverA"}, - map[string]interface{}{"value": 300}, - time.Unix(3, 0).UTC(), - )}); err != nil { - t.Fatalf(err.Error()) - } - - var tests = []struct { - skip bool // Skip test - stmt string // Query statement - chunkSize int // Chunk size for driving the executor - expected string // Expected results, rendered as a string - }{ - { - stmt: `SELECT value FROM cpu`, - chunkSize: 1, - expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:03Z",300]]}]`, - }, - { - stmt: `SELECT value FROM cpu`, - chunkSize: 2, - expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:03Z",300]]}]`, - }, - { - stmt: `SELECT mean(value),sum(value) FROM cpu`, - chunkSize: 2, - expected: `[{"name":"cpu","columns":["time","mean","sum"],"values":[["1970-01-01T00:00:00Z",200,600]]}]`, - }, - } - - for _, tt := range tests { - if tt.skip { - t.Logf("Skipping test %s", tt.stmt) - continue - } - executor, err := query_executor.plan(mustParseSelectStatement(tt.stmt), tt.chunkSize) - if err != nil { - t.Fatalf("failed to plan query: %s", err.Error()) - } - got := executeAndGetResults(executor) - if got != tt.expected { - t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got) - } - } -} - -// Test that executor correctly orders data across shards when the tagsets -// are not presented in alphabetically order across shards. -func TestWritePointsAndExecuteTwoShardsTagSetOrdering(t *testing.T) { - // Create the mock planner and its metastore - store, query_executor := testStoreAndQueryExecutor() - defer os.RemoveAll(store.path) - query_executor.MetaStore = &testQEMetastore{ - sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) { - return []meta.ShardGroupInfo{ - { - ID: sgID, - Shards: []meta.ShardInfo{ - { - ID: uint64(sID0), - OwnerIDs: []uint64{nID}, - }, - }, - }, - { - ID: sgID, - Shards: []meta.ShardInfo{ - { - ID: uint64(sID1), - OwnerIDs: []uint64{nID}, - }, - }, - }, - }, nil - }, - } - - // Write tagsets "y" and "z" to first shard. - if err := store.WriteToShard(sID0, []Point{NewPoint( - "cpu", - map[string]string{"host": "y"}, - map[string]interface{}{"value": 100}, - time.Unix(1, 0).UTC(), - )}); err != nil { - t.Fatalf(err.Error()) - } - if err := store.WriteToShard(sID0, []Point{NewPoint( - "cpu", - map[string]string{"host": "z"}, - map[string]interface{}{"value": 200}, - time.Unix(1, 0).UTC(), - )}); err != nil { - t.Fatalf(err.Error()) - } - - // Write tagsets "x", y" and "z" to second shard. - if err := store.WriteToShard(sID1, []Point{NewPoint( - "cpu", - map[string]string{"host": "x"}, - map[string]interface{}{"value": 300}, - time.Unix(2, 0).UTC(), - )}); err != nil { - t.Fatalf(err.Error()) - } - if err := store.WriteToShard(sID1, []Point{NewPoint( - "cpu", - map[string]string{"host": "y"}, - map[string]interface{}{"value": 400}, - time.Unix(3, 0).UTC(), - )}); err != nil { - t.Fatalf(err.Error()) - } - if err := store.WriteToShard(sID1, []Point{NewPoint( - "cpu", - map[string]string{"host": "z"}, - map[string]interface{}{"value": 500}, - time.Unix(3, 0).UTC(), - )}); err != nil { - t.Fatalf(err.Error()) - } - - var tests = []struct { - skip bool // Skip test - stmt string // Query statement - chunkSize int // Chunk size for driving the executor - expected string // Expected results, rendered as a string - }{ - { - stmt: `SELECT sum(value) FROM cpu GROUP BY host`, - expected: `[{"name":"cpu","tags":{"host":"x"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",300]]},{"name":"cpu","tags":{"host":"y"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",500]]},{"name":"cpu","tags":{"host":"z"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",700]]}]`, - }, - { - stmt: `SELECT value FROM cpu GROUP BY host`, - expected: `[{"name":"cpu","tags":{"host":"x"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",300]]},{"name":"cpu","tags":{"host":"y"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:03Z",400]]},{"name":"cpu","tags":{"host":"z"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",200],["1970-01-01T00:00:03Z",500]]}]`, - }, - } - - for _, tt := range tests { - if tt.skip { - t.Logf("Skipping test %s", tt.stmt) - continue - } - executor, err := query_executor.plan(mustParseSelectStatement(tt.stmt), tt.chunkSize) - if err != nil { - t.Fatalf("failed to plan query: %s", err.Error()) - } - got := executeAndGetResults(executor) - if got != tt.expected { - t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got) - } - } -} - -// TestProccessAggregateDerivative tests the rawQueryDerivativeProcessor transformation function on the engine. -// The is called for a query with a GROUP BY. -func TestProcessAggregateDerivative(t *testing.T) { - tests := []struct { - name string - fn string - interval time.Duration - in [][]interface{} - exp [][]interface{} - }{ - { - name: "empty input", - fn: "derivative", - interval: 24 * time.Hour, - in: [][]interface{}{}, - exp: [][]interface{}{}, - }, - - { - name: "single row returns 0.0", - fn: "derivative", - interval: 24 * time.Hour, - in: [][]interface{}{ - []interface{}{ - time.Unix(0, 0), 1.0, - }, - }, - exp: [][]interface{}{ - []interface{}{ - time.Unix(0, 0), 0.0, - }, - }, - }, - { - name: "basic derivative", - fn: "derivative", - interval: 24 * time.Hour, - in: [][]interface{}{ - []interface{}{ - time.Unix(0, 0), 1.0, - }, - []interface{}{ - time.Unix(0, 0).Add(24 * time.Hour), 3.0, - }, - []interface{}{ - time.Unix(0, 0).Add(48 * time.Hour), 5.0, - }, - []interface{}{ - time.Unix(0, 0).Add(72 * time.Hour), 9.0, - }, - }, - exp: [][]interface{}{ - []interface{}{ - time.Unix(0, 0).Add(24 * time.Hour), 2.0, - }, - []interface{}{ - time.Unix(0, 0).Add(48 * time.Hour), 2.0, - }, - []interface{}{ - time.Unix(0, 0).Add(72 * time.Hour), 4.0, - }, - }, - }, - { - name: "12h interval", - fn: "derivative", - interval: 12 * time.Hour, - in: [][]interface{}{ - []interface{}{ - time.Unix(0, 0), 1.0, - }, - []interface{}{ - time.Unix(0, 0).Add(24 * time.Hour), 2.0, - }, - []interface{}{ - time.Unix(0, 0).Add(48 * time.Hour), 3.0, - }, - []interface{}{ - time.Unix(0, 0).Add(72 * time.Hour), 4.0, - }, - }, - exp: [][]interface{}{ - []interface{}{ - time.Unix(0, 0).Add(24 * time.Hour), 0.5, - }, - []interface{}{ - time.Unix(0, 0).Add(48 * time.Hour), 0.5, - }, - []interface{}{ - time.Unix(0, 0).Add(72 * time.Hour), 0.5, - }, - }, - }, - { - name: "negative derivatives", - fn: "derivative", - interval: 24 * time.Hour, - in: [][]interface{}{ - []interface{}{ - time.Unix(0, 0), 1.0, - }, - []interface{}{ - time.Unix(0, 0).Add(24 * time.Hour), 2.0, - }, - []interface{}{ - time.Unix(0, 0).Add(48 * time.Hour), 0.0, - }, - []interface{}{ - time.Unix(0, 0).Add(72 * time.Hour), 4.0, - }, - }, - exp: [][]interface{}{ - []interface{}{ - time.Unix(0, 0).Add(24 * time.Hour), 1.0, - }, - []interface{}{ - time.Unix(0, 0).Add(48 * time.Hour), -2.0, - }, - []interface{}{ - time.Unix(0, 0).Add(72 * time.Hour), 4.0, - }, - }, - }, - { - name: "negative derivatives", - fn: "non_negative_derivative", - interval: 24 * time.Hour, - in: [][]interface{}{ - []interface{}{ - time.Unix(0, 0), 1.0, - }, - []interface{}{ - time.Unix(0, 0).Add(24 * time.Hour), 2.0, - }, - // Show resultes in negative derivative - []interface{}{ - time.Unix(0, 0).Add(48 * time.Hour), 0.0, - }, - []interface{}{ - time.Unix(0, 0).Add(72 * time.Hour), 4.0, - }, - }, - exp: [][]interface{}{ - []interface{}{ - time.Unix(0, 0).Add(24 * time.Hour), 1.0, - }, - []interface{}{ - time.Unix(0, 0).Add(72 * time.Hour), 4.0, - }, - }, - }, - { - name: "float derivatives", - fn: "derivative", - interval: 24 * time.Hour, - in: [][]interface{}{ - []interface{}{ - time.Unix(0, 0), 1.0, - }, - []interface{}{ - time.Unix(0, 0).Add(24 * time.Hour), int64(3), - }, - []interface{}{ - time.Unix(0, 0).Add(48 * time.Hour), int64(5), - }, - []interface{}{ - time.Unix(0, 0).Add(72 * time.Hour), int64(9), - }, - }, - exp: [][]interface{}{ - []interface{}{ - time.Unix(0, 0).Add(24 * time.Hour), 2.0, - }, - []interface{}{ - time.Unix(0, 0).Add(48 * time.Hour), 2.0, - }, - []interface{}{ - time.Unix(0, 0).Add(72 * time.Hour), 4.0, - }, - }, - }, - } - - for _, test := range tests { - got := processAggregateDerivative(test.in, test.fn == "non_negative_derivative", test.interval) - - if len(got) != len(test.exp) { - t.Fatalf("processAggregateDerivative(%s) - %s\nlen mismatch: got %d, exp %d", test.fn, test.name, len(got), len(test.exp)) - } - - for i := 0; i < len(test.exp); i++ { - if test.exp[i][0] != got[i][0] || test.exp[i][1] != got[i][1] { - t.Fatalf("processAggregateDerivative - %s results mismatch:\ngot %v\nexp %v", test.name, got, test.exp) - } - } - } -} - -// TestProcessRawQueryDerivative tests the rawQueryDerivativeProcessor transformation function on the engine. -// The is called for a queries that do not have a group by. -func TestProcessRawQueryDerivative(t *testing.T) { - tests := []struct { - name string - fn string - interval time.Duration - in []*mapperValue - exp []*mapperValue - }{ - { - name: "empty input", - fn: "derivative", - interval: 24 * time.Hour, - in: []*mapperValue{}, - exp: []*mapperValue{}, - }, - - { - name: "single row returns 0.0", - fn: "derivative", - interval: 24 * time.Hour, - in: []*mapperValue{ - { - Time: time.Unix(0, 0).Unix(), - Value: 1.0, - }, - }, - exp: []*mapperValue{ - { - Time: time.Unix(0, 0).Unix(), - Value: 0.0, - }, - }, - }, - { - name: "basic derivative", - fn: "derivative", - interval: 24 * time.Hour, - in: []*mapperValue{ - { - Time: time.Unix(0, 0).Unix(), - Value: 0.0, - }, - { - Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), - Value: 3.0, - }, - { - Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), - Value: 5.0, - }, - { - Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), - Value: 9.0, - }, - }, - exp: []*mapperValue{ - { - Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), - Value: 3.0, - }, - { - Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), - Value: 2.0, - }, - { - Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), - Value: 4.0, - }, - }, - }, - { - name: "12h interval", - fn: "derivative", - interval: 12 * time.Hour, - in: []*mapperValue{ - { - Time: time.Unix(0, 0).UnixNano(), - Value: 1.0, - }, - { - Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), - Value: 2.0, - }, - { - Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), - Value: 3.0, - }, - { - Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), - Value: 4.0, - }, - }, - exp: []*mapperValue{ - { - Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), - Value: 0.5, - }, - { - Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), - Value: 0.5, - }, - { - Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), - Value: 0.5, - }, - }, - }, - { - name: "negative derivatives", - fn: "derivative", - interval: 24 * time.Hour, - in: []*mapperValue{ - { - Time: time.Unix(0, 0).Unix(), - Value: 1.0, - }, - { - Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), - Value: 2.0, - }, - // should go negative - { - Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), - Value: 0.0, - }, - { - Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), - Value: 4.0, - }, - }, - exp: []*mapperValue{ - { - Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), - Value: 1.0, - }, - { - Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), - Value: -2.0, - }, - { - Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), - Value: 4.0, - }, - }, - }, - { - name: "negative derivatives", - fn: "non_negative_derivative", - interval: 24 * time.Hour, - in: []*mapperValue{ - { - Time: time.Unix(0, 0).Unix(), - Value: 1.0, - }, - { - Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), - Value: 2.0, - }, - // should go negative - { - Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), - Value: 0.0, - }, - { - Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), - Value: 4.0, - }, - }, - exp: []*mapperValue{ - { - Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), - Value: 1.0, - }, - { - Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), - Value: 4.0, - }, - }, - }, - } - - for _, test := range tests { - p := rawQueryDerivativeProcessor{ - isNonNegative: test.fn == "non_negative_derivative", - derivativeInterval: test.interval, - } - got := p.process(test.in) - - if len(got) != len(test.exp) { - t.Fatalf("rawQueryDerivativeProcessor(%s) - %s\nlen mismatch: got %d, exp %d", test.fn, test.name, len(got), len(test.exp)) - } - - for i := 0; i < len(test.exp); i++ { - fmt.Println("Times:", test.exp[i].Time, got[i].Time) - if test.exp[i].Time != got[i].Time || math.Abs((test.exp[i].Value.(float64)-got[i].Value.(float64))) > 0.0000001 { - t.Fatalf("rawQueryDerivativeProcessor - %s results mismatch:\ngot %v\nexp %v", test.name, got, test.exp) - } - } - } -} - -type testQEMetastore struct { - sgFunc func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) -} - -func (t *testQEMetastore) ShardGroupsByTimeRange(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) { - return t.sgFunc(database, policy, min, max) -} - -func (t *testQEMetastore) Database(name string) (*meta.DatabaseInfo, error) { return nil, nil } -func (t *testQEMetastore) Databases() ([]meta.DatabaseInfo, error) { return nil, nil } -func (t *testQEMetastore) User(name string) (*meta.UserInfo, error) { return nil, nil } -func (t *testQEMetastore) AdminUserExists() (bool, error) { return false, nil } -func (t *testQEMetastore) Authenticate(username, password string) (*meta.UserInfo, error) { - return nil, nil -} -func (t *testQEMetastore) RetentionPolicy(database, name string) (rpi *meta.RetentionPolicyInfo, err error) { - return nil, nil -} -func (t *testQEMetastore) UserCount() (int, error) { return 0, nil } - -func (t *testQEMetastore) NodeID() uint64 { return nID } - -func testStoreAndQueryExecutor() (*Store, *QueryExecutor) { - path, _ := ioutil.TempDir("", "") - - store := NewStore(path) - err := store.Open() - if err != nil { - panic(err) - } - database := "foo" - retentionPolicy := "bar" - store.CreateShard(database, retentionPolicy, sID0) - store.CreateShard(database, retentionPolicy, sID1) - - query_executor := NewQueryExecutor(store) - query_executor.ShardMapper = &testQEShardMapper{store} - - return store, query_executor -} - -type testQEShardMapper struct { - store *Store -} - -func (t *testQEShardMapper) CreateMapper(shard meta.ShardInfo, stmt string, chunkSize int) (Mapper, error) { - return t.store.CreateMapper(shard.ID, stmt, chunkSize) -} - -func executeAndGetResults(executor Executor) string { - ch := executor.Execute() - - var rows []*influxql.Row - for r := range ch { - rows = append(rows, r) - } - return string(mustMarshalJSON(rows)) -} +import _ "github.com/influxdb/influxdb/tsdb/engine" diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/executor.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/executor.go new file mode 100644 index 000000000..d6770a892 --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/executor.go @@ -0,0 +1,981 @@ +package tsdb + +import ( + "fmt" + "math" + "sort" + "time" + + "github.com/influxdb/influxdb/influxql" +) + +const ( + // Return an error if the user is trying to select more than this number of points in a group by statement. + // Most likely they specified a group by interval without time boundaries. + MaxGroupByPoints = 100000 + + // Since time is always selected, the column count when selecting only a single other value will be 2 + SelectColumnCountWithOneValue = 2 + + // IgnoredChunkSize is what gets passed into Mapper.Begin for aggregate queries as they don't chunk points out + IgnoredChunkSize = 0 +) + +// Mapper is the interface all Mapper types must implement. +type Mapper interface { + Open() error + TagSets() []string + Fields() []string + NextChunk() (interface{}, error) + Close() +} + +// StatefulMapper encapsulates a Mapper and some state that the executor needs to +// track for that mapper. +type StatefulMapper struct { + Mapper + bufferedChunk *MapperOutput // Last read chunk. + drained bool +} + +// NextChunk wraps a RawMapper and some state. +func (sm *StatefulMapper) NextChunk() (*MapperOutput, error) { + c, err := sm.Mapper.NextChunk() + if err != nil { + return nil, err + } + chunk, ok := c.(*MapperOutput) + if !ok { + if chunk == interface{}(nil) { + return nil, nil + } + } + return chunk, nil +} + +type Executor struct { + stmt *influxql.SelectStatement + mappers []*StatefulMapper + chunkSize int + limitedTagSets map[string]struct{} // Set tagsets for which data has reached the LIMIT. +} + +// NewExecutor returns a new Executor. +func NewExecutor(stmt *influxql.SelectStatement, mappers []Mapper, chunkSize int) *Executor { + a := []*StatefulMapper{} + for _, m := range mappers { + a = append(a, &StatefulMapper{m, nil, false}) + } + return &Executor{ + stmt: stmt, + mappers: a, + chunkSize: chunkSize, + limitedTagSets: make(map[string]struct{}), + } +} + +// Execute begins execution of the query and returns a channel to receive rows. +func (e *Executor) Execute() <-chan *influxql.Row { + // Create output channel and stream data in a separate goroutine. + out := make(chan *influxql.Row, 0) + + // Certain operations on the SELECT statement can be performed by the Executor without + // assistance from the Mappers. This allows the Executor to prepare aggregation functions + // and mathematical functions. + e.stmt.RewriteDistinct() + + if (e.stmt.IsRawQuery && !e.stmt.HasDistinct()) || e.stmt.IsSimpleDerivative() { + go e.executeRaw(out) + } else { + go e.executeAggregate(out) + } + return out +} + +// mappersDrained returns whether all the executors Mappers have been drained of data. +func (e *Executor) mappersDrained() bool { + for _, m := range e.mappers { + if !m.drained { + return false + } + } + return true +} + +// nextMapperTagset returns the alphabetically lowest tagset across all Mappers. +func (e *Executor) nextMapperTagSet() string { + tagset := "" + for _, m := range e.mappers { + if m.bufferedChunk != nil { + if tagset == "" { + tagset = m.bufferedChunk.key() + } else if m.bufferedChunk.key() < tagset { + tagset = m.bufferedChunk.key() + } + } + } + return tagset +} + +// nextMapperLowestTime returns the lowest minimum time across all Mappers, for the given tagset. +func (e *Executor) nextMapperLowestTime(tagset string) int64 { + minTime := int64(math.MaxInt64) + for _, m := range e.mappers { + if !m.drained && m.bufferedChunk != nil { + if m.bufferedChunk.key() != tagset { + continue + } + t := m.bufferedChunk.Values[len(m.bufferedChunk.Values)-1].Time + if t < minTime { + minTime = t + } + } + } + return minTime +} + +// tagSetIsLimited returns whether data for the given tagset has been LIMITed. +func (e *Executor) tagSetIsLimited(tagset string) bool { + _, ok := e.limitedTagSets[tagset] + return ok +} + +// limitTagSet marks the given taset as LIMITed. +func (e *Executor) limitTagSet(tagset string) { + e.limitedTagSets[tagset] = struct{}{} +} + +func (e *Executor) executeRaw(out chan *influxql.Row) { + // It's important that all resources are released when execution completes. + defer e.close() + + // Open the mappers. + for _, m := range e.mappers { + if err := m.Open(); err != nil { + out <- &influxql.Row{Err: err} + return + } + } + + // Get the distinct fields across all mappers. + var selectFields, aliasFields []string + if e.stmt.HasWildcard() { + sf := newStringSet() + for _, m := range e.mappers { + sf.add(m.Fields()...) + } + selectFields = sf.list() + aliasFields = selectFields + } else { + selectFields = e.stmt.Fields.Names() + aliasFields = e.stmt.Fields.AliasNames() + } + + // Used to read ahead chunks from mappers. + var rowWriter *limitedRowWriter + var currTagset string + + // Keep looping until all mappers drained. + var err error + for { + // Get the next chunk from each Mapper. + for _, m := range e.mappers { + if m.drained { + continue + } + + // Set the next buffered chunk on the mapper, or mark it drained. + for { + if m.bufferedChunk == nil { + m.bufferedChunk, err = m.NextChunk() + if err != nil { + out <- &influxql.Row{Err: err} + return + } + if m.bufferedChunk == nil { + // Mapper can do no more for us. + m.drained = true + break + } + + // If the SELECT query is on more than 1 field, but the chunks values from the Mappers + // only contain a single value, create k-v pairs using the field name of the chunk + // and the value of the chunk. If there is only 1 SELECT field across all mappers then + // there is no need to create k-v pairs, and there is no need to distinguish field data, + // as it is all for the *same* field. + if len(selectFields) > 1 && len(m.bufferedChunk.Fields) == 1 { + fieldKey := m.bufferedChunk.Fields[0] + + for i := range m.bufferedChunk.Values { + field := map[string]interface{}{fieldKey: m.bufferedChunk.Values[i].Value} + m.bufferedChunk.Values[i].Value = field + } + } + } + + if e.tagSetIsLimited(m.bufferedChunk.Name) { + // chunk's tagset is limited, so no good. Try again. + m.bufferedChunk = nil + continue + } + // This mapper has a chunk available, and it is not limited. + break + } + } + + // All Mappers done? + if e.mappersDrained() { + rowWriter.Flush() + break + } + + // Send out data for the next alphabetically-lowest tagset. All Mappers emit data in this order, + // so by always continuing with the lowest tagset until it is finished, we process all data in + // the required order, and don't "miss" any. + tagset := e.nextMapperTagSet() + if tagset != currTagset { + currTagset = tagset + // Tagset has changed, time for a new rowWriter. Be sure to kick out any residual values. + rowWriter.Flush() + rowWriter = nil + } + + // Process the mapper outputs. We can send out everything up to the min of the last time + // of the chunks for the next tagset. + minTime := e.nextMapperLowestTime(tagset) + + // Now empty out all the chunks up to the min time. Create new output struct for this data. + var chunkedOutput *MapperOutput + for _, m := range e.mappers { + if m.drained { + continue + } + + // This mapper's next chunk is not for the next tagset, or the very first value of + // the chunk is at a higher acceptable timestamp. Skip it. + if m.bufferedChunk.key() != tagset || m.bufferedChunk.Values[0].Time > minTime { + continue + } + + // Find the index of the point up to the min. + ind := len(m.bufferedChunk.Values) + for i, mo := range m.bufferedChunk.Values { + if mo.Time > minTime { + ind = i + break + } + } + + // Add up to the index to the values + if chunkedOutput == nil { + chunkedOutput = &MapperOutput{ + Name: m.bufferedChunk.Name, + Tags: m.bufferedChunk.Tags, + cursorKey: m.bufferedChunk.key(), + } + chunkedOutput.Values = m.bufferedChunk.Values[:ind] + } else { + chunkedOutput.Values = append(chunkedOutput.Values, m.bufferedChunk.Values[:ind]...) + } + + // Clear out the values being sent out, keep the remainder. + m.bufferedChunk.Values = m.bufferedChunk.Values[ind:] + + // If we emptied out all the values, clear the mapper's buffered chunk. + if len(m.bufferedChunk.Values) == 0 { + m.bufferedChunk = nil + } + } + + // Sort the values by time first so we can then handle offset and limit + sort.Sort(MapperValues(chunkedOutput.Values)) + + // Now that we have full name and tag details, initialize the rowWriter. + // The Name and Tags will be the same for all mappers. + if rowWriter == nil { + rowWriter = &limitedRowWriter{ + limit: e.stmt.Limit, + offset: e.stmt.Offset, + chunkSize: e.chunkSize, + name: chunkedOutput.Name, + tags: chunkedOutput.Tags, + selectNames: selectFields, + aliasNames: aliasFields, + fields: e.stmt.Fields, + c: out, + } + } + if e.stmt.HasDerivative() { + interval, err := derivativeInterval(e.stmt) + if err != nil { + out <- &influxql.Row{Err: err} + return + } + rowWriter.transformer = &RawQueryDerivativeProcessor{ + IsNonNegative: e.stmt.FunctionCalls()[0].Name == "non_negative_derivative", + DerivativeInterval: interval, + } + } + + // Emit the data via the limiter. + if limited := rowWriter.Add(chunkedOutput.Values); limited { + // Limit for this tagset was reached, mark it and start draining a new tagset. + e.limitTagSet(chunkedOutput.key()) + continue + } + } + + close(out) +} + +func (e *Executor) executeAggregate(out chan *influxql.Row) { + // It's important to close all resources when execution completes. + defer e.close() + + // Create the functions which will reduce values from mappers for + // a given interval. The function offsets within this slice match + // the offsets within the value slices that are returned by the + // mapper. + aggregates := e.stmt.FunctionCalls() + reduceFuncs := make([]influxql.ReduceFunc, len(aggregates)) + for i, c := range aggregates { + reduceFunc, err := influxql.InitializeReduceFunc(c) + if err != nil { + out <- &influxql.Row{Err: err} + return + } + reduceFuncs[i] = reduceFunc + } + + // Put together the rows to return, starting with columns. + columnNames := make([]string, len(e.stmt.Fields)+1) + columnNames[0] = "time" + for i, f := range e.stmt.Fields { + columnNames[i+1] = f.Name() + } + + // Open the mappers. + for _, m := range e.mappers { + if err := m.Open(); err != nil { + out <- &influxql.Row{Err: err} + return + } + } + + // Build the set of available tagsets across all mappers. This is used for + // later checks. + availTagSets := newStringSet() + for _, m := range e.mappers { + for _, t := range m.TagSets() { + availTagSets.add(t) + } + } + + // Prime each mapper's chunk buffer. + var err error + for _, m := range e.mappers { + m.bufferedChunk, err = m.NextChunk() + if err != nil { + out <- &influxql.Row{Err: err} + return + } + if m.bufferedChunk == nil { + m.drained = true + } + } + + // Keep looping until all mappers drained. + for !e.mappersDrained() { + // Send out data for the next alphabetically-lowest tagset. All Mappers send out in this order + // so collect data for this tagset, ignoring all others. + tagset := e.nextMapperTagSet() + chunks := []*MapperOutput{} + + // Pull as much as possible from each mapper. Stop when a mapper offers + // data for a new tagset, or empties completely. + for _, m := range e.mappers { + if m.drained { + continue + } + + for { + if m.bufferedChunk == nil { + m.bufferedChunk, err = m.NextChunk() + if err != nil { + out <- &influxql.Row{Err: err} + return + } + if m.bufferedChunk == nil { + m.drained = true + break + } + } + + // Got a chunk. Can we use it? + if m.bufferedChunk.key() != tagset { + // No, so just leave it in the buffer. + break + } + // We can, take it. + chunks = append(chunks, m.bufferedChunk) + m.bufferedChunk = nil + } + } + + // Prep a row, ready for kicking out. + var row *influxql.Row + + // Prep for bucketing data by start time of the interval. + buckets := map[int64][][]interface{}{} + + for _, chunk := range chunks { + if row == nil { + row = &influxql.Row{ + Name: chunk.Name, + Tags: chunk.Tags, + Columns: columnNames, + } + } + + startTime := chunk.Values[0].Time + _, ok := buckets[startTime] + values := chunk.Values[0].Value.([]interface{}) + if !ok { + buckets[startTime] = make([][]interface{}, len(values)) + } + for i, v := range values { + buckets[startTime][i] = append(buckets[startTime][i], v) + } + } + + // Now, after the loop above, within each time bucket is a slice. Within the element of each + // slice is another slice of interface{}, ready for passing to the reducer functions. + + // Work each bucket of time, in time ascending order. + tMins := make(int64arr, 0, len(buckets)) + for k, _ := range buckets { + tMins = append(tMins, k) + } + sort.Sort(tMins) + + values := make([][]interface{}, len(tMins)) + for i, t := range tMins { + values[i] = make([]interface{}, 0, len(columnNames)) + values[i] = append(values[i], time.Unix(0, t).UTC()) // Time value is always first. + + for j, f := range reduceFuncs { + reducedVal := f(buckets[t][j]) + values[i] = append(values[i], reducedVal) + } + } + + // Perform any mathematics. + values = processForMath(e.stmt.Fields, values) + + // Handle any fill options + values = e.processFill(values) + + // process derivatives + values = e.processDerivative(values) + + // If we have multiple tag sets we'll want to filter out the empty ones + if len(availTagSets) > 1 && resultsEmpty(values) { + continue + } + + row.Values = values + out <- row + } + + close(out) +} + +// processFill will take the results and return new results (or the same if no fill modifications are needed) +// with whatever fill options the query has. +func (e *Executor) processFill(results [][]interface{}) [][]interface{} { + // don't do anything if we're supposed to leave the nulls + if e.stmt.Fill == influxql.NullFill { + return results + } + + if e.stmt.Fill == influxql.NoFill { + // remove any rows that have even one nil value. This one is tricky because they could have multiple + // aggregates, but this option means that any row that has even one nil gets purged. + newResults := make([][]interface{}, 0, len(results)) + for _, vals := range results { + hasNil := false + // start at 1 because the first value is always time + for j := 1; j < len(vals); j++ { + if vals[j] == nil { + hasNil = true + break + } + } + if !hasNil { + newResults = append(newResults, vals) + } + } + return newResults + } + + // They're either filling with previous values or a specific number + for i, vals := range results { + // start at 1 because the first value is always time + for j := 1; j < len(vals); j++ { + if vals[j] == nil { + switch e.stmt.Fill { + case influxql.PreviousFill: + if i != 0 { + vals[j] = results[i-1][j] + } + case influxql.NumberFill: + vals[j] = e.stmt.FillValue + } + } + } + } + return results +} + +// processDerivative returns the derivatives of the results +func (e *Executor) processDerivative(results [][]interface{}) [][]interface{} { + // Return early if we're not supposed to process the derivatives + if e.stmt.HasDerivative() { + interval, err := derivativeInterval(e.stmt) + if err != nil { + return results // XXX need to handle this better. + } + + // Determines whether to drop negative differences + isNonNegative := e.stmt.FunctionCalls()[0].Name == "non_negative_derivative" + return ProcessAggregateDerivative(results, isNonNegative, interval) + } + return results +} + +// Close closes the executor such that all resources are released. Once closed, +// an executor may not be re-used. +func (e *Executor) close() { + if e != nil { + for _, m := range e.mappers { + m.Close() + } + } +} + +// limitedRowWriter accepts raw mapper values, and will emit those values as rows in chunks +// of the given size. If the chunk size is 0, no chunking will be performed. In addiiton if +// limit is reached, outstanding values will be emitted. If limit is zero, no limit is enforced. +type limitedRowWriter struct { + chunkSize int + limit int + offset int + name string + tags map[string]string + fields influxql.Fields + selectNames []string + aliasNames []string + c chan *influxql.Row + + currValues []*MapperValue + totalOffSet int + totalSent int + + transformer interface { + Process(input []*MapperValue) []*MapperValue + } +} + +// Add accepts a slice of values, and will emit those values as per chunking requirements. +// If limited is returned as true, the limit was also reached and no more values should be +// added. In that case only up the limit of values are emitted. +func (r *limitedRowWriter) Add(values []*MapperValue) (limited bool) { + if r.currValues == nil { + r.currValues = make([]*MapperValue, 0, r.chunkSize) + } + + // Enforce offset. + if r.totalOffSet < r.offset { + // Still some offsetting to do. + offsetRequired := r.offset - r.totalOffSet + if offsetRequired >= len(values) { + r.totalOffSet += len(values) + return false + } else { + // Drop leading values and keep going. + values = values[offsetRequired:] + r.totalOffSet += offsetRequired + } + } + r.currValues = append(r.currValues, values...) + + // Check limit. + limitReached := r.limit > 0 && r.totalSent+len(r.currValues) >= r.limit + if limitReached { + // Limit will be satified with current values. Truncate 'em. + r.currValues = r.currValues[:r.limit-r.totalSent] + } + + // Is chunking in effect? + if r.chunkSize != IgnoredChunkSize { + // Chunking level reached? + for len(r.currValues) >= r.chunkSize { + index := len(r.currValues) - (len(r.currValues) - r.chunkSize) + r.c <- r.processValues(r.currValues[:index]) + r.currValues = r.currValues[index:] + } + + // After values have been sent out by chunking, there may still be some + // values left, if the remainder is less than the chunk size. But if the + // limit has been reached, kick them out. + if len(r.currValues) > 0 && limitReached { + r.c <- r.processValues(r.currValues) + r.currValues = nil + } + } else if limitReached { + // No chunking in effect, but the limit has been reached. + r.c <- r.processValues(r.currValues) + r.currValues = nil + } + + return limitReached +} + +// Flush instructs the limitedRowWriter to emit any pending values as a single row, +// adhering to any limits. Chunking is not enforced. +func (r *limitedRowWriter) Flush() { + if r == nil { + return + } + + // If at least some rows were sent, and no values are pending, then don't + // emit anything, since at least 1 row was previously emitted. This ensures + // that if no rows were ever sent, at least 1 will be emitted, even an empty row. + if r.totalSent != 0 && len(r.currValues) == 0 { + return + } + + if r.limit > 0 && len(r.currValues) > r.limit { + r.currValues = r.currValues[:r.limit] + } + r.c <- r.processValues(r.currValues) + r.currValues = nil +} + +// processValues emits the given values in a single row. +func (r *limitedRowWriter) processValues(values []*MapperValue) *influxql.Row { + defer func() { + r.totalSent += len(values) + }() + + selectNames := r.selectNames + aliasNames := r.aliasNames + + if r.transformer != nil { + values = r.transformer.Process(values) + } + + // ensure that time is in the select names and in the first position + hasTime := false + for i, n := range selectNames { + if n == "time" { + // Swap time to the first argument for names + if i != 0 { + selectNames[0], selectNames[i] = selectNames[i], selectNames[0] + } + hasTime = true + break + } + } + + // time should always be in the list of names they get back + if !hasTime { + selectNames = append([]string{"time"}, selectNames...) + aliasNames = append([]string{"time"}, aliasNames...) + } + + // since selectNames can contain tags, we need to strip them out + selectFields := make([]string, 0, len(selectNames)) + aliasFields := make([]string, 0, len(selectNames)) + + for i, n := range selectNames { + if _, found := r.tags[n]; !found { + selectFields = append(selectFields, n) + aliasFields = append(aliasFields, aliasNames[i]) + } + } + + row := &influxql.Row{ + Name: r.name, + Tags: r.tags, + Columns: aliasFields, + } + + // Kick out an empty row it no results available. + if len(values) == 0 { + return row + } + + // if they've selected only a single value we have to handle things a little differently + singleValue := len(selectFields) == SelectColumnCountWithOneValue + + // the results will have all of the raw mapper results, convert into the row + for _, v := range values { + vals := make([]interface{}, len(selectFields)) + + if singleValue { + vals[0] = time.Unix(0, v.Time).UTC() + switch val := v.Value.(type) { + case map[string]interface{}: + vals[1] = val[selectFields[1]] + default: + vals[1] = val + } + } else { + fields := v.Value.(map[string]interface{}) + + // time is always the first value + vals[0] = time.Unix(0, v.Time).UTC() + + // populate the other values + for i := 1; i < len(selectFields); i++ { + f, ok := fields[selectFields[i]] + if ok { + vals[i] = f + continue + } + if v.Tags != nil { + f, ok = v.Tags[selectFields[i]] + if ok { + vals[i] = f + } + } + } + } + + row.Values = append(row.Values, vals) + } + + // Perform any mathematical post-processing. + row.Values = processForMath(r.fields, row.Values) + + return row +} + +type RawQueryDerivativeProcessor struct { + LastValueFromPreviousChunk *MapperValue + IsNonNegative bool // Whether to drop negative differences + DerivativeInterval time.Duration +} + +func (rqdp *RawQueryDerivativeProcessor) canProcess(input []*MapperValue) bool { + // If we only have 1 value, then the value did not change, so return + // a single row with 0.0 + if len(input) == 1 { + return false + } + + // See if the field value is numeric, if it's not, we can't process the derivative + validType := false + switch input[0].Value.(type) { + case int64: + validType = true + case float64: + validType = true + } + + return validType +} + +func (rqdp *RawQueryDerivativeProcessor) Process(input []*MapperValue) []*MapperValue { + if len(input) == 0 { + return input + } + + if !rqdp.canProcess(input) { + return []*MapperValue{ + &MapperValue{ + Time: input[0].Time, + Value: 0.0, + }, + } + } + + if rqdp.LastValueFromPreviousChunk == nil { + rqdp.LastValueFromPreviousChunk = input[0] + } + + derivativeValues := []*MapperValue{} + for i := 1; i < len(input); i++ { + v := input[i] + + // Calculate the derivative of successive points by dividing the difference + // of each value by the elapsed time normalized to the interval + diff := int64toFloat64(v.Value) - int64toFloat64(rqdp.LastValueFromPreviousChunk.Value) + + elapsed := v.Time - rqdp.LastValueFromPreviousChunk.Time + + value := 0.0 + if elapsed > 0 { + value = diff / (float64(elapsed) / float64(rqdp.DerivativeInterval)) + } + + rqdp.LastValueFromPreviousChunk = v + + // Drop negative values for non-negative derivatives + if rqdp.IsNonNegative && diff < 0 { + continue + } + + derivativeValues = append(derivativeValues, &MapperValue{ + Time: v.Time, + Value: value, + }) + } + + return derivativeValues +} + +// processForMath will apply any math that was specified in the select statement +// against the passed in results +func processForMath(fields influxql.Fields, results [][]interface{}) [][]interface{} { + hasMath := false + for _, f := range fields { + if _, ok := f.Expr.(*influxql.BinaryExpr); ok { + hasMath = true + } else if _, ok := f.Expr.(*influxql.ParenExpr); ok { + hasMath = true + } + } + + if !hasMath { + return results + } + + processors := make([]influxql.Processor, len(fields)) + startIndex := 1 + for i, f := range fields { + processors[i], startIndex = influxql.GetProcessor(f.Expr, startIndex) + } + + mathResults := make([][]interface{}, len(results)) + for i, _ := range mathResults { + mathResults[i] = make([]interface{}, len(fields)+1) + // put the time in + mathResults[i][0] = results[i][0] + for j, p := range processors { + mathResults[i][j+1] = p(results[i]) + } + } + + return mathResults +} + +// ProcessAggregateDerivative returns the derivatives of an aggregate result set +func ProcessAggregateDerivative(results [][]interface{}, isNonNegative bool, interval time.Duration) [][]interface{} { + // Return early if we can't calculate derivatives + if len(results) == 0 { + return results + } + + // If we only have 1 value, then the value did not change, so return + // a single row w/ 0.0 + if len(results) == 1 { + return [][]interface{}{ + []interface{}{results[0][0], 0.0}, + } + } + + // Check the value's type to ensure it's an numeric, if not, return a 0 result. We only check the first value + // because derivatives cannot be combined with other aggregates currently. + validType := false + switch results[0][1].(type) { + case int64: + validType = true + case float64: + validType = true + } + + if !validType { + return [][]interface{}{ + []interface{}{results[0][0], 0.0}, + } + } + + // Otherwise calculate the derivatives as the difference between consecutive + // points divided by the elapsed time. Then normalize to the requested + // interval. + derivatives := [][]interface{}{} + for i := 1; i < len(results); i++ { + prev := results[i-1] + cur := results[i] + + if cur[1] == nil || prev[1] == nil { + continue + } + + elapsed := cur[0].(time.Time).Sub(prev[0].(time.Time)) + diff := int64toFloat64(cur[1]) - int64toFloat64(prev[1]) + value := 0.0 + if elapsed > 0 { + value = float64(diff) / (float64(elapsed) / float64(interval)) + } + + // Drop negative values for non-negative derivatives + if isNonNegative && diff < 0 { + continue + } + + val := []interface{}{ + cur[0], + value, + } + derivatives = append(derivatives, val) + } + + return derivatives +} + +// derivativeInterval returns the time interval for the one (and only) derivative func +func derivativeInterval(stmt *influxql.SelectStatement) (time.Duration, error) { + if len(stmt.FunctionCalls()[0].Args) == 2 { + return stmt.FunctionCalls()[0].Args[1].(*influxql.DurationLiteral).Val, nil + } + interval, err := stmt.GroupByInterval() + if err != nil { + return 0, err + } + if interval > 0 { + return interval, nil + } + return time.Second, nil +} + +// resultsEmpty will return true if the all the result values are empty or contain only nulls +func resultsEmpty(resultValues [][]interface{}) bool { + for _, vals := range resultValues { + // start the loop at 1 because we want to skip over the time value + for i := 1; i < len(vals); i++ { + if vals[i] != nil { + return false + } + } + } + return true +} + +func int64toFloat64(v interface{}) float64 { + switch v.(type) { + case int64: + return float64(v.(int64)) + case float64: + return v.(float64) + } + panic(fmt.Sprintf("expected either int64 or float64, got %v", v)) +} + +type int64arr []int64 + +func (a int64arr) Len() int { return len(a) } +func (a int64arr) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a int64arr) Less(i, j int) bool { return a[i] < a[j] } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/executor_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/executor_test.go new file mode 100644 index 000000000..1f0ee15ce --- /dev/null +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/executor_test.go @@ -0,0 +1,991 @@ +package tsdb_test + +import ( + "encoding/json" + "io/ioutil" + "math" + "os" + "testing" + "time" + + "github.com/influxdb/influxdb/influxql" + "github.com/influxdb/influxdb/meta" + "github.com/influxdb/influxdb/tsdb" +) + +var sID0 = uint64(1) +var sID1 = uint64(2) +var sgID1 = uint64(3) +var sgID2 = uint64(4) +var nID = uint64(42) + +// Simple test to ensure data can be read from two shards. +func TestWritePointsAndExecuteTwoShards(t *testing.T) { + // Create the mock planner and its metastore + store, query_executor := testStoreAndQueryExecutor() + defer os.RemoveAll(store.Path()) + query_executor.MetaStore = &testQEMetastore{ + sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) { + return []meta.ShardGroupInfo{ + { + ID: sgID, + StartTime: time.Now().Add(-time.Hour), + EndTime: time.Now().Add(time.Hour), + Shards: []meta.ShardInfo{ + { + ID: uint64(sID0), + OwnerIDs: []uint64{nID}, + }, + }, + }, + { + ID: sgID, + StartTime: time.Now().Add(-2 * time.Hour), + EndTime: time.Now().Add(-time.Hour), + Shards: []meta.ShardInfo{ + { + ID: uint64(sID1), + OwnerIDs: []uint64{nID}, + }, + }, + }, + }, nil + }, + } + + // Write two points across shards. + pt1time := time.Unix(1, 0).UTC() + if err := store.WriteToShard(sID0, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "serverA", "region": "us-east"}, + map[string]interface{}{"value": 100}, + pt1time, + )}); err != nil { + t.Fatalf(err.Error()) + } + pt2time := time.Unix(2, 0).UTC() + if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "serverB", "region": "us-east"}, + map[string]interface{}{"value": 200}, + pt2time, + )}); err != nil { + t.Fatalf(err.Error()) + } + + var tests = []struct { + skip bool // Skip test + stmt string // Query statement + chunkSize int // Chunk size for driving the executor + expected string // Expected results, rendered as a string + }{ + { + stmt: `SELECT value FROM cpu`, + expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]}]`, + }, + { + stmt: `SELECT value FROM cpu`, + chunkSize: 1, + expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`, + }, + { + stmt: `SELECT value FROM cpu LIMIT 1`, + expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`, + }, + { + stmt: `SELECT value FROM cpu LIMIT 1`, + chunkSize: 2, + expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`, + }, + { + stmt: `SELECT value FROM cpu WHERE host='serverA'`, + expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`, + }, + { + stmt: `SELECT value FROM cpu WHERE host='serverB'`, + expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`, + }, + { + stmt: `SELECT value FROM cpu WHERE host='serverC'`, + expected: `null`, + }, + { + stmt: `SELECT value FROM cpu GROUP BY host`, + expected: `[{"name":"cpu","tags":{"host":"serverA"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","tags":{"host":"serverB"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`, + }, + { + stmt: `SELECT value FROM cpu GROUP BY region`, + expected: `[{"name":"cpu","tags":{"region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]}]`, + }, + { + stmt: `SELECT value FROM cpu GROUP BY host,region`, + expected: `[{"name":"cpu","tags":{"host":"serverA","region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","tags":{"host":"serverB","region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`, + }, + { + stmt: `SELECT value FROM cpu WHERE host='serverA' GROUP BY host`, + expected: `[{"name":"cpu","tags":{"host":"serverA"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`, + }, + + // Aggregate queries. + { + stmt: `SELECT sum(value) FROM cpu`, + expected: `[{"name":"cpu","columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",300]]}]`, + }, + } + + for _, tt := range tests { + if tt.skip { + t.Logf("Skipping test %s", tt.stmt) + continue + } + executor, err := query_executor.Plan(mustParseSelectStatement(tt.stmt), tt.chunkSize) + if err != nil { + t.Fatalf("failed to plan query: %s", err.Error()) + } + got := executeAndGetResults(executor) + if got != tt.expected { + t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got) + } + } +} + +// Test that executor correctly orders data across shards. +func TestWritePointsAndExecuteTwoShardsAlign(t *testing.T) { + // Create the mock planner and its metastore + store, query_executor := testStoreAndQueryExecutor() + defer os.RemoveAll(store.Path()) + query_executor.MetaStore = &testQEMetastore{ + sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) { + return []meta.ShardGroupInfo{ + { + ID: sgID, + StartTime: time.Now().Add(-2 * time.Hour), + EndTime: time.Now().Add(-time.Hour), + Shards: []meta.ShardInfo{ + { + ID: uint64(sID1), + OwnerIDs: []uint64{nID}, + }, + }, + }, + { + ID: sgID, + StartTime: time.Now().Add(-2 * time.Hour), + EndTime: time.Now().Add(time.Hour), + Shards: []meta.ShardInfo{ + { + ID: uint64(sID0), + OwnerIDs: []uint64{nID}, + }, + }, + }, + }, nil + }, + } + + // Write interleaving, by time, chunks to the shards. + if err := store.WriteToShard(sID0, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "serverA"}, + map[string]interface{}{"value": 100}, + time.Unix(1, 0).UTC(), + )}); err != nil { + t.Fatalf(err.Error()) + } + if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "serverB"}, + map[string]interface{}{"value": 200}, + time.Unix(2, 0).UTC(), + )}); err != nil { + t.Fatalf(err.Error()) + } + if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "serverA"}, + map[string]interface{}{"value": 300}, + time.Unix(3, 0).UTC(), + )}); err != nil { + t.Fatalf(err.Error()) + } + + var tests = []struct { + skip bool // Skip test + stmt string // Query statement + chunkSize int // Chunk size for driving the executor + expected string // Expected results, rendered as a string + }{ + { + stmt: `SELECT value FROM cpu`, + chunkSize: 1, + expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:03Z",300]]}]`, + }, + { + stmt: `SELECT value FROM cpu`, + chunkSize: 2, + expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:03Z",300]]}]`, + }, + { + stmt: `SELECT mean(value),sum(value) FROM cpu`, + chunkSize: 2, + expected: `[{"name":"cpu","columns":["time","mean","sum"],"values":[["1970-01-01T00:00:00Z",200,600]]}]`, + }, + } + + for _, tt := range tests { + if tt.skip { + t.Logf("Skipping test %s", tt.stmt) + continue + } + executor, err := query_executor.Plan(mustParseSelectStatement(tt.stmt), tt.chunkSize) + if err != nil { + t.Fatalf("failed to plan query: %s", err.Error()) + } + got := executeAndGetResults(executor) + if got != tt.expected { + t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got) + } + } +} + +// Test to ensure the engine handles query re-writing across stores. +func TestWritePointsAndExecuteTwoShardsQueryRewrite(t *testing.T) { + // Create two distinct stores, ensuring shard mappers will shard nothing. + store0 := testStore() + defer os.RemoveAll(store0.Path()) + store1 := testStore() + defer os.RemoveAll(store1.Path()) + + // Create a shard in each store. + database := "foo" + retentionPolicy := "bar" + store0.CreateShard(database, retentionPolicy, sID0) + store1.CreateShard(database, retentionPolicy, sID1) + + // Write two points across shards. + pt1time := time.Unix(1, 0).UTC() + if err := store0.WriteToShard(sID0, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "serverA"}, + map[string]interface{}{"value1": 100}, + pt1time, + )}); err != nil { + t.Fatalf(err.Error()) + } + pt2time := time.Unix(2, 0).UTC() + if err := store1.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "serverB"}, + map[string]interface{}{"value2": 200}, + pt2time, + )}); err != nil { + t.Fatalf(err.Error()) + } + var tests = []struct { + skip bool // Skip test + stmt string // Query statement + chunkSize int // Chunk size for driving the executor + expected string // Expected results, rendered as a string + }{ + { + stmt: `SELECT * FROM cpu`, + expected: `[{"name":"cpu","columns":["time","host","value1","value2"],"values":[["1970-01-01T00:00:01Z","serverA",100,null],["1970-01-01T00:00:02Z","serverB",null,200]]}]`, + }, + { + stmt: `SELECT * FROM cpu GROUP BY *`, + expected: `[{"name":"cpu","tags":{"host":"serverA"},"columns":["time","value1","value2"],"values":[["1970-01-01T00:00:01Z",100,null]]},{"name":"cpu","tags":{"host":"serverB"},"columns":["time","value1","value2"],"values":[["1970-01-01T00:00:02Z",null,200]]}]`, + }, + } + for _, tt := range tests { + if tt.skip { + t.Logf("Skipping test %s", tt.stmt) + continue + } + + parsedSelectStmt := mustParseSelectStatement(tt.stmt) + + // Create Mappers and Executor. + mapper0, err := store0.CreateMapper(sID0, tt.stmt, tt.chunkSize) + if err != nil { + t.Fatalf("failed to create mapper0: %s", err.Error()) + } + mapper1, err := store1.CreateMapper(sID1, tt.stmt, tt.chunkSize) + if err != nil { + t.Fatalf("failed to create mapper1: %s", err.Error()) + } + executor := tsdb.NewExecutor(parsedSelectStmt, []tsdb.Mapper{mapper0, mapper1}, tt.chunkSize) + + // Check the results. + got := executeAndGetResults(executor) + if got != tt.expected { + t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got) + } + + } +} + +// Test that executor correctly orders data across shards when the tagsets +// are not presented in alphabetically order across shards. +func TestWritePointsAndExecuteTwoShardsTagSetOrdering(t *testing.T) { + // Create the mock planner and its metastore + store, query_executor := testStoreAndQueryExecutor() + defer os.RemoveAll(store.Path()) + query_executor.MetaStore = &testQEMetastore{ + sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) { + return []meta.ShardGroupInfo{ + { + ID: sgID, + Shards: []meta.ShardInfo{ + { + ID: uint64(sID0), + OwnerIDs: []uint64{nID}, + }, + }, + }, + { + ID: sgID, + Shards: []meta.ShardInfo{ + { + ID: uint64(sID1), + OwnerIDs: []uint64{nID}, + }, + }, + }, + }, nil + }, + } + + // Write tagsets "y" and "z" to first shard. + if err := store.WriteToShard(sID0, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "y"}, + map[string]interface{}{"value": 100}, + time.Unix(1, 0).UTC(), + )}); err != nil { + t.Fatalf(err.Error()) + } + if err := store.WriteToShard(sID0, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "z"}, + map[string]interface{}{"value": 200}, + time.Unix(1, 0).UTC(), + )}); err != nil { + t.Fatalf(err.Error()) + } + + // Write tagsets "x", y" and "z" to second shard. + if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "x"}, + map[string]interface{}{"value": 300}, + time.Unix(2, 0).UTC(), + )}); err != nil { + t.Fatalf(err.Error()) + } + if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "y"}, + map[string]interface{}{"value": 400}, + time.Unix(3, 0).UTC(), + )}); err != nil { + t.Fatalf(err.Error()) + } + if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint( + "cpu", + map[string]string{"host": "z"}, + map[string]interface{}{"value": 500}, + time.Unix(3, 0).UTC(), + )}); err != nil { + t.Fatalf(err.Error()) + } + + var tests = []struct { + skip bool // Skip test + stmt string // Query statement + chunkSize int // Chunk size for driving the executor + expected string // Expected results, rendered as a string + }{ + { + stmt: `SELECT sum(value) FROM cpu GROUP BY host`, + expected: `[{"name":"cpu","tags":{"host":"x"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",300]]},{"name":"cpu","tags":{"host":"y"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",500]]},{"name":"cpu","tags":{"host":"z"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",700]]}]`, + }, + { + stmt: `SELECT value FROM cpu GROUP BY host`, + expected: `[{"name":"cpu","tags":{"host":"x"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",300]]},{"name":"cpu","tags":{"host":"y"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:03Z",400]]},{"name":"cpu","tags":{"host":"z"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",200],["1970-01-01T00:00:03Z",500]]}]`, + }, + } + + for _, tt := range tests { + if tt.skip { + t.Logf("Skipping test %s", tt.stmt) + continue + } + executor, err := query_executor.Plan(mustParseSelectStatement(tt.stmt), tt.chunkSize) + if err != nil { + t.Fatalf("failed to plan query: %s", err.Error()) + } + got := executeAndGetResults(executor) + if got != tt.expected { + t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got) + } + } +} + +// TestProccessAggregateDerivative tests the RawQueryDerivativeProcessor transformation function on the engine. +// The is called for a query with a GROUP BY. +func TestProcessAggregateDerivative(t *testing.T) { + tests := []struct { + name string + fn string + interval time.Duration + in [][]interface{} + exp [][]interface{} + }{ + { + name: "empty input", + fn: "derivative", + interval: 24 * time.Hour, + in: [][]interface{}{}, + exp: [][]interface{}{}, + }, + + { + name: "single row returns 0.0", + fn: "derivative", + interval: 24 * time.Hour, + in: [][]interface{}{ + []interface{}{ + time.Unix(0, 0), 1.0, + }, + }, + exp: [][]interface{}{ + []interface{}{ + time.Unix(0, 0), 0.0, + }, + }, + }, + { + name: "basic derivative", + fn: "derivative", + interval: 24 * time.Hour, + in: [][]interface{}{ + []interface{}{ + time.Unix(0, 0), 1.0, + }, + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), 3.0, + }, + []interface{}{ + time.Unix(0, 0).Add(48 * time.Hour), 5.0, + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), 9.0, + }, + }, + exp: [][]interface{}{ + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), 2.0, + }, + []interface{}{ + time.Unix(0, 0).Add(48 * time.Hour), 2.0, + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), 4.0, + }, + }, + }, + { + name: "12h interval", + fn: "derivative", + interval: 12 * time.Hour, + in: [][]interface{}{ + []interface{}{ + time.Unix(0, 0), 1.0, + }, + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), 2.0, + }, + []interface{}{ + time.Unix(0, 0).Add(48 * time.Hour), 3.0, + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), 4.0, + }, + }, + exp: [][]interface{}{ + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), 0.5, + }, + []interface{}{ + time.Unix(0, 0).Add(48 * time.Hour), 0.5, + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), 0.5, + }, + }, + }, + { + name: "negative derivatives", + fn: "derivative", + interval: 24 * time.Hour, + in: [][]interface{}{ + []interface{}{ + time.Unix(0, 0), 1.0, + }, + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), 2.0, + }, + []interface{}{ + time.Unix(0, 0).Add(48 * time.Hour), 0.0, + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), 4.0, + }, + }, + exp: [][]interface{}{ + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), 1.0, + }, + []interface{}{ + time.Unix(0, 0).Add(48 * time.Hour), -2.0, + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), 4.0, + }, + }, + }, + { + name: "negative derivatives", + fn: "non_negative_derivative", + interval: 24 * time.Hour, + in: [][]interface{}{ + []interface{}{ + time.Unix(0, 0), 1.0, + }, + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), 2.0, + }, + // Show resultes in negative derivative + []interface{}{ + time.Unix(0, 0).Add(48 * time.Hour), 0.0, + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), 4.0, + }, + }, + exp: [][]interface{}{ + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), 1.0, + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), 4.0, + }, + }, + }, + { + name: "integer derivatives", + fn: "derivative", + interval: 24 * time.Hour, + in: [][]interface{}{ + []interface{}{ + time.Unix(0, 0), 1.0, + }, + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), int64(3), + }, + []interface{}{ + time.Unix(0, 0).Add(48 * time.Hour), int64(5), + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), int64(9), + }, + }, + exp: [][]interface{}{ + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), 2.0, + }, + []interface{}{ + time.Unix(0, 0).Add(48 * time.Hour), 2.0, + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), 4.0, + }, + }, + }, + { + name: "string derivatives", + fn: "derivative", + interval: 24 * time.Hour, + in: [][]interface{}{ + []interface{}{ + time.Unix(0, 0), "1.0", + }, + []interface{}{ + time.Unix(0, 0).Add(24 * time.Hour), "2.0", + }, + []interface{}{ + time.Unix(0, 0).Add(48 * time.Hour), "3.0", + }, + []interface{}{ + time.Unix(0, 0).Add(72 * time.Hour), "4.0", + }, + }, + exp: [][]interface{}{ + []interface{}{ + time.Unix(0, 0), 0.0, + }, + }, + }, + } + + for _, test := range tests { + got := tsdb.ProcessAggregateDerivative(test.in, test.fn == "non_negative_derivative", test.interval) + + if len(got) != len(test.exp) { + t.Fatalf("ProcessAggregateDerivative(%s) - %s\nlen mismatch: got %d, exp %d", test.fn, test.name, len(got), len(test.exp)) + } + + for i := 0; i < len(test.exp); i++ { + if test.exp[i][0] != got[i][0] || test.exp[i][1] != got[i][1] { + t.Fatalf("ProcessAggregateDerivative - %s results mismatch:\ngot %v\nexp %v", test.name, got, test.exp) + } + } + } +} + +// TestProcessRawQueryDerivative tests the RawQueryDerivativeProcessor transformation function on the engine. +// The is called for a queries that do not have a group by. +func TestProcessRawQueryDerivative(t *testing.T) { + tests := []struct { + name string + fn string + interval time.Duration + in []*tsdb.MapperValue + exp []*tsdb.MapperValue + }{ + { + name: "empty input", + fn: "derivative", + interval: 24 * time.Hour, + in: []*tsdb.MapperValue{}, + exp: []*tsdb.MapperValue{}, + }, + + { + name: "single row returns 0.0", + fn: "derivative", + interval: 24 * time.Hour, + in: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Unix(), + Value: 1.0, + }, + }, + exp: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Unix(), + Value: 0.0, + }, + }, + }, + { + name: "basic derivative", + fn: "derivative", + interval: 24 * time.Hour, + in: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Unix(), + Value: 0.0, + }, + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: 3.0, + }, + { + Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), + Value: 5.0, + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: 9.0, + }, + }, + exp: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: 3.0, + }, + { + Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), + Value: 2.0, + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: 4.0, + }, + }, + }, + { + name: "integer derivative", + fn: "derivative", + interval: 24 * time.Hour, + in: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Unix(), + Value: int64(0), + }, + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: int64(3), + }, + { + Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), + Value: int64(5), + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: int64(9), + }, + }, + exp: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: 3.0, + }, + { + Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), + Value: 2.0, + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: 4.0, + }, + }, + }, + { + name: "12h interval", + fn: "derivative", + interval: 12 * time.Hour, + in: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).UnixNano(), + Value: 1.0, + }, + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: 2.0, + }, + { + Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), + Value: 3.0, + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: 4.0, + }, + }, + exp: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: 0.5, + }, + { + Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), + Value: 0.5, + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: 0.5, + }, + }, + }, + { + name: "negative derivatives", + fn: "derivative", + interval: 24 * time.Hour, + in: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Unix(), + Value: 1.0, + }, + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: 2.0, + }, + // should go negative + { + Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), + Value: 0.0, + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: 4.0, + }, + }, + exp: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: 1.0, + }, + { + Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), + Value: -2.0, + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: 4.0, + }, + }, + }, + { + name: "negative derivatives", + fn: "non_negative_derivative", + interval: 24 * time.Hour, + in: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Unix(), + Value: 1.0, + }, + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: 2.0, + }, + // should go negative + { + Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), + Value: 0.0, + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: 4.0, + }, + }, + exp: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: 1.0, + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: 4.0, + }, + }, + }, + { + name: "string derivatives", + fn: "derivative", + interval: 24 * time.Hour, + in: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Unix(), + Value: "1.0", + }, + { + Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(), + Value: "2.0", + }, + { + Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(), + Value: "3.0", + }, + { + Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(), + Value: "4.0", + }, + }, + exp: []*tsdb.MapperValue{ + { + Time: time.Unix(0, 0).Unix(), + Value: 0.0, + }, + }, + }, + } + + for _, test := range tests { + p := tsdb.RawQueryDerivativeProcessor{ + IsNonNegative: test.fn == "non_negative_derivative", + DerivativeInterval: test.interval, + } + got := p.Process(test.in) + + if len(got) != len(test.exp) { + t.Fatalf("RawQueryDerivativeProcessor(%s) - %s\nlen mismatch: got %d, exp %d", test.fn, test.name, len(got), len(test.exp)) + } + + for i := 0; i < len(test.exp); i++ { + if test.exp[i].Time != got[i].Time || math.Abs((test.exp[i].Value.(float64)-got[i].Value.(float64))) > 0.0000001 { + t.Fatalf("RawQueryDerivativeProcessor - %s results mismatch:\ngot %v\nexp %v", test.name, got, test.exp) + } + } + } +} + +type testQEMetastore struct { + sgFunc func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) +} + +func (t *testQEMetastore) ShardGroupsByTimeRange(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) { + return t.sgFunc(database, policy, min, max) +} + +func (t *testQEMetastore) Database(name string) (*meta.DatabaseInfo, error) { return nil, nil } +func (t *testQEMetastore) Databases() ([]meta.DatabaseInfo, error) { return nil, nil } +func (t *testQEMetastore) User(name string) (*meta.UserInfo, error) { return nil, nil } +func (t *testQEMetastore) AdminUserExists() (bool, error) { return false, nil } +func (t *testQEMetastore) Authenticate(username, password string) (*meta.UserInfo, error) { + return nil, nil +} +func (t *testQEMetastore) RetentionPolicy(database, name string) (rpi *meta.RetentionPolicyInfo, err error) { + return nil, nil +} +func (t *testQEMetastore) UserCount() (int, error) { return 0, nil } + +func (t *testQEMetastore) NodeID() uint64 { return nID } + +func testStore() *tsdb.Store { + path, _ := ioutil.TempDir("", "") + + store := tsdb.NewStore(path) + err := store.Open() + if err != nil { + panic(err) + } + return store +} + +func testStoreAndQueryExecutor() (*tsdb.Store, *tsdb.QueryExecutor) { + store := testStore() + database := "foo" + retentionPolicy := "bar" + store.CreateShard(database, retentionPolicy, sID0) + store.CreateShard(database, retentionPolicy, sID1) + + query_executor := tsdb.NewQueryExecutor(store) + query_executor.ShardMapper = &testQEShardMapper{store} + + return store, query_executor +} + +type testQEShardMapper struct { + store *tsdb.Store +} + +func (t *testQEShardMapper) CreateMapper(shard meta.ShardInfo, stmt string, chunkSize int) (tsdb.Mapper, error) { + return t.store.CreateMapper(shard.ID, stmt, chunkSize) +} + +func executeAndGetResults(executor *tsdb.Executor) string { + ch := executor.Execute() + + var rows []*influxql.Row + for r := range ch { + rows = append(rows, r) + } + + b, err := json.Marshal(rows) + if err != nil { + panic(err) + } + return string(b) +} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.pb.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.pb.go index 43d3eb3bf..cbe051393 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.pb.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/internal/meta.pb.go @@ -1,12 +1,12 @@ -// Code generated by protoc-gen-go. -// source: meta.proto +// Code generated by protoc-gen-gogo. +// source: internal/meta.proto // DO NOT EDIT! /* Package internal is a generated protocol buffer package. It is generated from these files: - meta.proto + internal/meta.proto It has these top-level messages: Series @@ -16,7 +16,7 @@ It has these top-level messages: */ package internal -import proto "github.com/golang/protobuf/proto" +import proto "github.com/gogo/protobuf/proto" import math "math" // Reference imports to suppress errors if they are not otherwise used. diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper.go index b4ae8e8d0..a6d9e1e7b 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper.go @@ -1,65 +1,73 @@ package tsdb import ( + "container/heap" "encoding/binary" "errors" "fmt" - "math" "sort" "strings" - "github.com/boltdb/bolt" "github.com/influxdb/influxdb/influxql" ) -// mapperValue is a complex type, which can encapsulate data from both raw and aggregate +// MapperValue is a complex type, which can encapsulate data from both raw and aggregate // mappers. This currently allows marshalling and network system to remain simpler. For // aggregate output Time is ignored, and actual Time-Value pairs are contained soley // within the Value field. -type mapperValue struct { - Time int64 `json:"time,omitempty"` // Ignored for aggregate output. - Value interface{} `json:"value,omitempty"` // For aggregate, contains interval time multiple values. +type MapperValue struct { + Time int64 `json:"time,omitempty"` // Ignored for aggregate output. + Value interface{} `json:"value,omitempty"` // For aggregate, contains interval time multiple values. + Tags map[string]string `json:"tags,omitempty"` // Meta tags for results } -type mapperValues []*mapperValue +type MapperValues []*MapperValue -func (a mapperValues) Len() int { return len(a) } -func (a mapperValues) Less(i, j int) bool { return a[i].Time < a[j].Time } -func (a mapperValues) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a MapperValues) Len() int { return len(a) } +func (a MapperValues) Less(i, j int) bool { return a[i].Time < a[j].Time } +func (a MapperValues) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -type mapperOutput struct { - Name string `json:"name,omitempty"` - Tags map[string]string `json:"tags,omitempty"` - Values []*mapperValue `json:"values,omitempty"` // For aggregates contains a single value at [0] +type MapperOutput struct { + Name string `json:"name,omitempty"` + Tags map[string]string `json:"tags,omitempty"` + Fields []string `json:"fields,omitempty"` // Field names of returned data. + Values []*MapperValue `json:"values,omitempty"` // For aggregates contains a single value at [0] + cursorKey string // Tagset-based key for the source cursor. Cached for performance reasons. } -func (mo *mapperOutput) key() string { - return formMeasurementTagSetKey(mo.Name, mo.Tags) +func (mo *MapperOutput) key() string { + return mo.cursorKey } -// RawMapper is for retrieving data, for a raw query, for a single shard. -type RawMapper struct { - shard *Shard - stmt *influxql.SelectStatement - chunkSize int - - tx *bolt.Tx // Read transaction for this shard. - queryTMin int64 - queryTMax int64 - - whereFields []string // field names that occur in the where clause - selectFields []string // field names that occur in the select clause - selectTags []string // tag keys that occur in the select clause - fieldName string // the field name being read. - decoders map[string]*FieldCodec // byte decoder per measurement - +// LocalMapper is for retrieving data for a query, from a given shard. +type LocalMapper struct { + shard *Shard + stmt influxql.Statement + selectStmt *influxql.SelectStatement + rawMode bool + chunkSize int + tx Tx // Read transaction for this shard. + queryTMin int64 // Minimum time of the query. + queryTMax int64 // Maximum time of the query. + whereFields []string // field names that occur in the where clause + selectFields []string // field names that occur in the select clause + selectTags []string // tag keys that occur in the select clause cursors []*tagSetCursor // Cursors per tag sets. currCursorIndex int // Current tagset cursor being drained. + + // The following attributes are only used when mappers are for aggregate queries. + + queryTMinWindow int64 // Minimum time of the query floored to start of interval. + intervalSize int64 // Size of each interval. + numIntervals int // Maximum number of intervals to return. + currInterval int // Current interval for which data is being fetched. + mapFuncs []influxql.MapFunc // The mapping functions. + fieldNames []string // the field name being read for mapping. } -// NewRawMapper returns a mapper for the given shard, which will return data for the SELECT statement. -func NewRawMapper(shard *Shard, stmt *influxql.SelectStatement, chunkSize int) *RawMapper { - return &RawMapper{ +// NewLocalMapper returns a mapper for the given shard, which will return data for the SELECT statement. +func NewLocalMapper(shard *Shard, stmt influxql.Statement, chunkSize int) *LocalMapper { + return &LocalMapper{ shard: shard, stmt: stmt, chunkSize: chunkSize, @@ -67,56 +75,134 @@ func NewRawMapper(shard *Shard, stmt *influxql.SelectStatement, chunkSize int) * } } -// Open opens the raw mapper. -func (rm *RawMapper) Open() error { +// openMeta opens the mapper for a meta query. +func (lm *LocalMapper) openMeta() error { + return errors.New("not implemented") +} + +// Open opens the local mapper. +func (lm *LocalMapper) Open() error { + var err error + // Get a read-only transaction. - tx, err := rm.shard.DB().Begin(false) + tx, err := lm.shard.engine.Begin(false) if err != nil { return err } - rm.tx = tx + lm.tx = tx + + if s, ok := lm.stmt.(*influxql.SelectStatement); ok { + stmt, err := lm.rewriteSelectStatement(s) + if err != nil { + return err + } + lm.selectStmt = stmt + lm.rawMode = (s.IsRawQuery && !s.HasDistinct()) || s.IsSimpleDerivative() + } else { + return lm.openMeta() + } // Set all time-related parameters on the mapper. - rm.queryTMin, rm.queryTMax = influxql.TimeRangeAsEpochNano(rm.stmt.Condition) + lm.queryTMin, lm.queryTMax = influxql.TimeRangeAsEpochNano(lm.selectStmt.Condition) + + if !lm.rawMode { + if err := lm.initializeMapFunctions(); err != nil { + return err + } + + // For GROUP BY time queries, limit the number of data points returned by the limit and offset + d, err := lm.selectStmt.GroupByInterval() + if err != nil { + return err + } + lm.intervalSize = d.Nanoseconds() + if lm.queryTMin == 0 || lm.intervalSize == 0 { + lm.numIntervals = 1 + lm.intervalSize = lm.queryTMax - lm.queryTMin + } else { + intervalTop := lm.queryTMax/lm.intervalSize*lm.intervalSize + lm.intervalSize + intervalBottom := lm.queryTMin / lm.intervalSize * lm.intervalSize + lm.numIntervals = int((intervalTop - intervalBottom) / lm.intervalSize) + } + + if lm.selectStmt.Limit > 0 || lm.selectStmt.Offset > 0 { + // ensure that the offset isn't higher than the number of points we'd get + if lm.selectStmt.Offset > lm.numIntervals { + return nil + } + + // Take the lesser of either the pre computed number of GROUP BY buckets that + // will be in the result or the limit passed in by the user + if lm.selectStmt.Limit < lm.numIntervals { + lm.numIntervals = lm.selectStmt.Limit + } + } + + // If we are exceeding our MaxGroupByPoints error out + if lm.numIntervals > MaxGroupByPoints { + return errors.New("too many points in the group by interval. maybe you forgot to specify a where time clause?") + } + + // Ensure that the start time for the results is on the start of the window. + lm.queryTMinWindow = lm.queryTMin + if lm.intervalSize > 0 && lm.numIntervals > 1 { + lm.queryTMinWindow = lm.queryTMinWindow / lm.intervalSize * lm.intervalSize + } + } + + selectFields := newStringSet() + selectTags := newStringSet() + whereFields := newStringSet() // Create the TagSet cursors for the Mapper. - for _, src := range rm.stmt.Sources { + for _, src := range lm.selectStmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return fmt.Errorf("invalid source type: %#v", src) } - m := rm.shard.index.Measurement(mm.Name) + m := lm.shard.index.Measurement(mm.Name) if m == nil { // This shard have never received data for the measurement. No Mapper // required. return nil } + // Validate that ANY GROUP BY is not a field for thie measurement. + if err := m.ValidateGroupBy(lm.selectStmt); err != nil { + return err + } + // Create tagset cursors and determine various field types within SELECT statement. - tsf, err := createTagSetsAndFields(m, rm.stmt) + tsf, err := createTagSetsAndFields(m, lm.selectStmt) if err != nil { return err } tagSets := tsf.tagSets - rm.selectFields = tsf.selectFields - rm.selectTags = tsf.selectTags - rm.whereFields = tsf.whereFields + selectFields.add(tsf.selectFields...) + selectTags.add(tsf.selectTags...) + whereFields.add(tsf.whereFields...) - if len(rm.selectFields) == 0 { - return fmt.Errorf("select statement must include at least one field") + // If we only have tags in our select clause we just return + if len(selectFields) == 0 && len(selectTags) > 0 { + return fmt.Errorf("statement must have at least one field in select clause") + } + + // Validate that any GROUP BY is not on a field + if err := m.ValidateGroupBy(lm.selectStmt); err != nil { + return err } // SLIMIT and SOFFSET the unique series - if rm.stmt.SLimit > 0 || rm.stmt.SOffset > 0 { - if rm.stmt.SOffset > len(tagSets) { + if lm.selectStmt.SLimit > 0 || lm.selectStmt.SOffset > 0 { + if lm.selectStmt.SOffset > len(tagSets) { tagSets = nil } else { - if rm.stmt.SOffset+rm.stmt.SLimit > len(tagSets) { - rm.stmt.SLimit = len(tagSets) - rm.stmt.SOffset + if lm.selectStmt.SOffset+lm.selectStmt.SLimit > len(tagSets) { + lm.selectStmt.SLimit = len(tagSets) - lm.selectStmt.SOffset } - tagSets = tagSets[rm.stmt.SOffset : rm.stmt.SOffset+rm.stmt.SLimit] + tagSets = tagSets[lm.selectStmt.SOffset : lm.selectStmt.SOffset+lm.selectStmt.SLimit] } } @@ -125,51 +211,72 @@ func (rm *RawMapper) Open() error { cursors := []*seriesCursor{} for i, key := range t.SeriesKeys { - c := createCursorForSeries(rm.tx, rm.shard, key) + c := lm.tx.Cursor(key) if c == nil { // No data exists for this key. continue } - cm := newSeriesCursor(c, t.Filters[i]) + seriesTags := lm.shard.index.series[key].Tags + cm := newSeriesCursor(c, t.Filters[i], seriesTags) cursors = append(cursors, cm) } - tsc := newTagSetCursor(m.Name, t.Tags, cursors, rm.shard.FieldCodec(m.Name)) - // Prime the buffers. + tsc := newTagSetCursor(m.Name, t.Tags, cursors, lm.shard.FieldCodec(m.Name)) + tsc.pointHeap = newPointHeap() + //Prime the buffers. for i := 0; i < len(tsc.cursors); i++ { - k, v := tsc.cursors[i].SeekTo(rm.queryTMin) - tsc.keyBuffer[i] = k - tsc.valueBuffer[i] = v + k, v := tsc.cursors[i].SeekTo(lm.queryTMin) + if k == -1 { + continue + } + p := &pointHeapItem{ + timestamp: k, + value: v, + cursor: tsc.cursors[i], + } + heap.Push(tsc.pointHeap, p) } - rm.cursors = append(rm.cursors, tsc) + lm.cursors = append(lm.cursors, tsc) } - sort.Sort(tagSetCursors(rm.cursors)) + sort.Sort(tagSetCursors(lm.cursors)) + } + + lm.selectFields = selectFields.list() + lm.selectTags = selectTags.list() + lm.whereFields = whereFields.list() + + // If the query does not aggregate, then at least 1 SELECT field should be present. + if lm.rawMode && len(lm.selectFields) == 0 { + // None of the SELECT fields exist in this data. Wipe out all tagset cursors. + lm.cursors = nil } return nil } -// TagSets returns the list of TagSets for which this mapper has data. -func (rm *RawMapper) TagSets() []string { - return tagSetCursors(rm.cursors).Keys() +func (lm *LocalMapper) NextChunk() (interface{}, error) { + if lm.rawMode { + return lm.nextChunkRaw() + } + return lm.nextChunkAgg() } -// NextChunk returns the next chunk of data. Data comes in the same order as the +// nextChunkRaw returns the next chunk of data. Data comes in the same order as the // tags return by TagSets. A chunk never contains data for more than 1 tagset. // If there is no more data for any tagset, nil will be returned. -func (rm *RawMapper) NextChunk() (interface{}, error) { - var output *mapperOutput +func (lm *LocalMapper) nextChunkRaw() (*MapperOutput, error) { + var output *MapperOutput for { - if rm.currCursorIndex == len(rm.cursors) { + if lm.currCursorIndex == len(lm.cursors) { // All tagset cursors processed. NextChunk'ing complete. return nil, nil } - cursor := rm.cursors[rm.currCursorIndex] + cursor := lm.cursors[lm.currCursorIndex] - k, v := cursor.Next(rm.queryTMin, rm.queryTMax, rm.selectFields, rm.whereFields) + k, v, t := cursor.Next(lm.queryTMin, lm.queryTMax, lm.selectFields, lm.whereFields) if v == nil { // Tagset cursor is empty, move to next one. - rm.currCursorIndex++ + lm.currCursorIndex++ if output != nil { // There is data, so return it and continue when next called. return output, nil @@ -180,237 +287,55 @@ func (rm *RawMapper) NextChunk() (interface{}, error) { } if output == nil { - output = &mapperOutput{ - Name: cursor.measurement, - Tags: cursor.tags, + output = &MapperOutput{ + Name: cursor.measurement, + Tags: cursor.tags, + Fields: lm.selectFields, + cursorKey: cursor.key(), } } - value := &mapperValue{Time: k, Value: v} + value := &MapperValue{Time: k, Value: v, Tags: t} output.Values = append(output.Values, value) - if len(output.Values) == rm.chunkSize { + if len(output.Values) == lm.chunkSize { return output, nil } } } -// Close closes the mapper. -func (rm *RawMapper) Close() { - if rm != nil && rm.tx != nil { - _ = rm.tx.Rollback() - } -} - -// AggMapper is for retrieving data, for an aggregate query, from a given shard. -type AggMapper struct { - shard *Shard - stmt *influxql.SelectStatement - - tx *bolt.Tx // Read transaction for this shard. - queryTMin int64 // Minimum time of the query. - queryTMinWindow int64 // Minimum time of the query floored to start of interval. - queryTMax int64 // Maximum time of the query. - intervalSize int64 // Size of each interval. - - mapFuncs []influxql.MapFunc // The mapping functions. - fieldNames []string // the field name being read for mapping. - - whereFields []string // field names that occur in the where clause - selectFields []string // field names that occur in the select clause - selectTags []string // tag keys that occur in the select clause - - numIntervals int // Maximum number of intervals to return. - currInterval int // Current interval for which data is being fetched. - - cursors []*tagSetCursor // Cursors per tag sets. - currCursorIndex int // Current tagset cursor being drained. -} - -// NewAggMapper returns a mapper for the given shard, which will return data for the SELECT statement. -func NewAggMapper(shard *Shard, stmt *influxql.SelectStatement) *AggMapper { - return &AggMapper{ - shard: shard, - stmt: stmt, - cursors: make([]*tagSetCursor, 0), - } -} - -// Open opens the aggregate mapper. -func (am *AggMapper) Open() error { - var err error - - // Get a read-only transaction. - tx, err := am.shard.DB().Begin(false) - if err != nil { - return err - } - am.tx = tx - - // Set up each mapping function for this statement. - aggregates := am.stmt.FunctionCalls() - am.mapFuncs = make([]influxql.MapFunc, len(aggregates)) - am.fieldNames = make([]string, len(am.mapFuncs)) - for i, c := range aggregates { - am.mapFuncs[i], err = influxql.InitializeMapFunc(c) - if err != nil { - return err - } - - // Check for calls like `derivative(mean(value), 1d)` - var nested *influxql.Call = c - if fn, ok := c.Args[0].(*influxql.Call); ok { - nested = fn - } - switch lit := nested.Args[0].(type) { - case *influxql.VarRef: - am.fieldNames[i] = lit.Val - case *influxql.Distinct: - if c.Name != "count" { - return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) - } - am.fieldNames[i] = lit.Val - default: - return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) - } - } - - // Set all time-related parameters on the mapper. - am.queryTMin, am.queryTMax = influxql.TimeRangeAsEpochNano(am.stmt.Condition) - - // For GROUP BY time queries, limit the number of data points returned by the limit and offset - d, err := am.stmt.GroupByInterval() - if err != nil { - return err - } - am.intervalSize = d.Nanoseconds() - if am.queryTMin == 0 || am.intervalSize == 0 { - am.numIntervals = 1 - am.intervalSize = am.queryTMax - am.queryTMin - } else { - intervalTop := am.queryTMax/am.intervalSize*am.intervalSize + am.intervalSize - intervalBottom := am.queryTMin / am.intervalSize * am.intervalSize - am.numIntervals = int((intervalTop - intervalBottom) / am.intervalSize) - } - - if am.stmt.Limit > 0 || am.stmt.Offset > 0 { - // ensure that the offset isn't higher than the number of points we'd get - if am.stmt.Offset > am.numIntervals { - return nil - } - - // Take the lesser of either the pre computed number of GROUP BY buckets that - // will be in the result or the limit passed in by the user - if am.stmt.Limit < am.numIntervals { - am.numIntervals = am.stmt.Limit - } - } - - // If we are exceeding our MaxGroupByPoints error out - if am.numIntervals > MaxGroupByPoints { - return errors.New("too many points in the group by interval. maybe you forgot to specify a where time clause?") - } - - // Ensure that the start time for the results is on the start of the window. - am.queryTMinWindow = am.queryTMin - if am.intervalSize > 0 && am.numIntervals > 1 { - am.queryTMinWindow = am.queryTMinWindow / am.intervalSize * am.intervalSize - } - - // Create the TagSet cursors for the Mapper. - for _, src := range am.stmt.Sources { - mm, ok := src.(*influxql.Measurement) - if !ok { - return fmt.Errorf("invalid source type: %#v", src) - } - - m := am.shard.index.Measurement(mm.Name) - if m == nil { - // This shard have never received data for the measurement. No Mapper - // required. - return nil - } - - // Create tagset cursors and determine various field types within SELECT statement. - tsf, err := createTagSetsAndFields(m, am.stmt) - if err != nil { - return err - } - tagSets := tsf.tagSets - am.selectFields = tsf.selectFields - am.selectTags = tsf.selectTags - am.whereFields = tsf.whereFields - - // Validate that group by is not a field - if err := m.ValidateGroupBy(am.stmt); err != nil { - return err - } - - // SLIMIT and SOFFSET the unique series - if am.stmt.SLimit > 0 || am.stmt.SOffset > 0 { - if am.stmt.SOffset > len(tagSets) { - tagSets = nil - } else { - if am.stmt.SOffset+am.stmt.SLimit > len(tagSets) { - am.stmt.SLimit = len(tagSets) - am.stmt.SOffset - } - - tagSets = tagSets[am.stmt.SOffset : am.stmt.SOffset+am.stmt.SLimit] - } - } - - // Create all cursors for reading the data from this shard. - for _, t := range tagSets { - cursors := []*seriesCursor{} - - for i, key := range t.SeriesKeys { - c := createCursorForSeries(am.tx, am.shard, key) - if c == nil { - // No data exists for this key. - continue - } - cm := newSeriesCursor(c, t.Filters[i]) - cursors = append(cursors, cm) - } - tsc := newTagSetCursor(m.Name, t.Tags, cursors, am.shard.FieldCodec(m.Name)) - am.cursors = append(am.cursors, tsc) - } - sort.Sort(tagSetCursors(am.cursors)) - } - - return nil -} - -// NextChunk returns the next chunk of data, which is the next interval of data +// nextChunkAgg returns the next chunk of data, which is the next interval of data // for the current tagset. Tagsets are always processed in the same order as that // returned by AvailTagsSets(). When there is no more data for any tagset nil // is returned. -func (am *AggMapper) NextChunk() (interface{}, error) { - var output *mapperOutput +func (lm *LocalMapper) nextChunkAgg() (*MapperOutput, error) { + var output *MapperOutput for { - if am.currCursorIndex == len(am.cursors) { + if lm.currCursorIndex == len(lm.cursors) { // All tagset cursors processed. NextChunk'ing complete. return nil, nil } - tsc := am.cursors[am.currCursorIndex] - tmin, tmax := am.nextInterval() + tsc := lm.cursors[lm.currCursorIndex] + tmin, tmax := lm.nextInterval() if tmin < 0 { // All intervals complete for this tagset. Move to the next tagset. - am.resetIntervals() - am.currCursorIndex++ + lm.currInterval = 0 + lm.currCursorIndex++ continue } // Prep the return data for this tagset. This will hold data for a single interval // for a single tagset. if output == nil { - output = &mapperOutput{ - Name: tsc.measurement, - Tags: tsc.tags, - Values: make([]*mapperValue, 1), + output = &MapperOutput{ + Name: tsc.measurement, + Tags: tsc.tags, + Fields: lm.selectFields, + Values: make([]*MapperValue, 1), + cursorKey: tsc.key(), } // Aggregate values only use the first entry in the Values field. Set the time // to the start of the interval. - output.Values[0] = &mapperValue{ + output.Values[0] = &MapperValue{ Time: tmin, Value: make([]interface{}, 0)} } @@ -418,24 +343,32 @@ func (am *AggMapper) NextChunk() (interface{}, error) { // Always clamp tmin. This can happen as bucket-times are bucketed to the nearest // interval, and this can be less than the times in the query. qmin := tmin - if qmin < am.queryTMin { - qmin = am.queryTMin + if qmin < lm.queryTMin { + qmin = lm.queryTMin } - for i := range am.mapFuncs { + tsc.pointHeap = newPointHeap() + for i := range lm.mapFuncs { // Prime the tagset cursor for the start of the interval. This is not ideal, as // it should really calculate the values all in 1 pass, but that would require // changes to the mapper functions, which can come later. // Prime the buffers. for i := 0; i < len(tsc.cursors); i++ { k, v := tsc.cursors[i].SeekTo(tmin) - tsc.keyBuffer[i] = k - tsc.valueBuffer[i] = v + if k == -1 { + continue + } + p := &pointHeapItem{ + timestamp: k, + value: v, + cursor: tsc.cursors[i], + } + heap.Push(tsc.pointHeap, p) } - // Wrap the tagset cursor so it implements the mapping functions interface. f := func() (time int64, value interface{}) { - return tsc.Next(qmin, tmax, []string{am.fieldNames[i]}, am.whereFields) + k, v, _ := tsc.Next(qmin, tmax, []string{lm.fieldNames[i]}, lm.whereFields) + return k, v } tagSetCursor := &aggTagSetCursor{ @@ -445,7 +378,7 @@ func (am *AggMapper) NextChunk() (interface{}, error) { // Execute the map function which walks the entire interval, and aggregates // the result. values := output.Values[0].Value.([]interface{}) - output.Values[0].Value = append(values, am.mapFuncs[i](tagSetCursor)) + output.Values[0].Value = append(values, lm.mapFuncs[i](tagSetCursor)) } return output, nil } @@ -453,34 +386,199 @@ func (am *AggMapper) NextChunk() (interface{}, error) { // nextInterval returns the next interval for which to return data. If start is less than 0 // there are no more intervals. -func (am *AggMapper) nextInterval() (start, end int64) { - t := am.queryTMinWindow + int64(am.currInterval+am.stmt.Offset)*am.intervalSize +func (lm *LocalMapper) nextInterval() (start, end int64) { + t := lm.queryTMinWindow + int64(lm.currInterval+lm.selectStmt.Offset)*lm.intervalSize // Onto next interval. - am.currInterval++ - if t > am.queryTMax || am.currInterval > am.numIntervals { + lm.currInterval++ + if t > lm.queryTMax || lm.currInterval > lm.numIntervals { start, end = -1, 1 } else { - start, end = t, t+am.intervalSize + start, end = t, t+lm.intervalSize } return } -// resetIntervals starts the Mapper at the first interval. Subsequent intervals -// should be retrieved via nextInterval(). -func (am *AggMapper) resetIntervals() { - am.currInterval = 0 +// initializeMapFunctions initialize the mapping functions for the mapper. This only applies +// to aggregate queries. +func (lm *LocalMapper) initializeMapFunctions() error { + var err error + // Set up each mapping function for this statement. + aggregates := lm.selectStmt.FunctionCalls() + lm.mapFuncs = make([]influxql.MapFunc, len(aggregates)) + lm.fieldNames = make([]string, len(lm.mapFuncs)) + for i, c := range aggregates { + lm.mapFuncs[i], err = influxql.InitializeMapFunc(c) + if err != nil { + return err + } + + // Check for calls like `derivative(lmean(value), 1d)` + var nested *influxql.Call = c + if fn, ok := c.Args[0].(*influxql.Call); ok { + nested = fn + } + switch lit := nested.Args[0].(type) { + case *influxql.VarRef: + lm.fieldNames[i] = lit.Val + case *influxql.Distinct: + if c.Name != "count" { + return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) + } + lm.fieldNames[i] = lit.Val + default: + return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) + } + } + + return nil +} + +// rewriteSelectStatement performs any necessary query re-writing. +func (lm *LocalMapper) rewriteSelectStatement(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) { + var err error + // Expand regex expressions in the FROM clause. + sources, err := lm.expandSources(stmt.Sources) + if err != nil { + return nil, err + } + stmt.Sources = sources + // Expand wildcards in the fields or GROUP BY. + stmt, err = lm.expandWildcards(stmt) + if err != nil { + return nil, err + } + stmt.RewriteDistinct() + return stmt, nil +} + +// expandWildcards returns a new SelectStatement with wildcards expanded +// If only a `SELECT *` is present, without a `GROUP BY *`, both tags and fields expand in the SELECT +// If a `SELECT *` and a `GROUP BY *` are both present, then only fiels are expanded in the `SELECT` and only +// tags are expanded in the `GROUP BY` +func (lm *LocalMapper) expandWildcards(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) { + // If there are no wildcards in the statement, return it as-is. + if !stmt.HasWildcard() { + return stmt, nil + } + // Use sets to avoid duplicate field names. + fieldSet := map[string]struct{}{} + dimensionSet := map[string]struct{}{} + var fields influxql.Fields + var dimensions influxql.Dimensions + + // keep track of where the wildcards are in the select statement + hasFieldWildcard := stmt.HasFieldWildcard() + hasDimensionWildcard := stmt.HasDimensionWildcard() + + // Iterate measurements in the FROM clause getting the fields & dimensions for each. + for _, src := range stmt.Sources { + if m, ok := src.(*influxql.Measurement); ok { + // Lookup the measurement in the database. + mm := lm.shard.index.Measurement(m.Name) + if mm == nil { + // This shard have never received data for the measurement. No Mapper + // required. + return stmt, nil + } + // Get the fields for this measurement. + for _, name := range mm.FieldNames() { + if _, ok := fieldSet[name]; ok { + continue + } + fieldSet[name] = struct{}{} + fields = append(fields, &influxql.Field{Expr: &influxql.VarRef{Val: name}}) + } + + // Add tags to fields if a field wildcard was provided and a dimension wildcard was not. + if hasFieldWildcard && !hasDimensionWildcard { + for _, t := range mm.TagKeys() { + if _, ok := fieldSet[t]; ok { + continue + } + fieldSet[t] = struct{}{} + fields = append(fields, &influxql.Field{Expr: &influxql.VarRef{Val: t}}) + } + } + + // Get the dimensions for this measurement. + if hasDimensionWildcard { + for _, t := range mm.TagKeys() { + if _, ok := dimensionSet[t]; ok { + continue + } + dimensionSet[t] = struct{}{} + dimensions = append(dimensions, &influxql.Dimension{Expr: &influxql.VarRef{Val: t}}) + } + } + } + } + + // Return a new SelectStatement with the wild cards rewritten. + return stmt.RewriteWildcards(fields, dimensions), nil +} + +// expandSources expands regex sources and removes duplicates. +// NOTE: sources must be normalized (db and rp set) before calling this function. +func (lm *LocalMapper) expandSources(sources influxql.Sources) (influxql.Sources, error) { + // Use a map as a set to prevent duplicates. Two regexes might produce + // duplicates when expanded. + set := map[string]influxql.Source{} + names := []string{} + // Iterate all sources, expanding regexes when they're found. + for _, source := range sources { + switch src := source.(type) { + case *influxql.Measurement: + if src.Regex == nil { + name := src.String() + set[name] = src + names = append(names, name) + continue + } + // Get measurements from the database that match the regex. + measurements := lm.shard.index.measurementsByRegex(src.Regex.Val) + // Add those measurements to the set. + for _, m := range measurements { + m2 := &influxql.Measurement{ + Database: src.Database, + RetentionPolicy: src.RetentionPolicy, + Name: m.Name, + } + name := m2.String() + if _, ok := set[name]; !ok { + set[name] = m2 + names = append(names, name) + } + } + default: + return nil, fmt.Errorf("expandSources: unsuported source type: %T", source) + } + } + // Sort the list of source names. + sort.Strings(names) + // Convert set to a list of Sources. + expanded := make(influxql.Sources, 0, len(set)) + for _, name := range names { + expanded = append(expanded, set[name]) + } + return expanded, nil } // TagSets returns the list of TagSets for which this mapper has data. -func (am *AggMapper) TagSets() []string { - return tagSetCursors(am.cursors).Keys() +func (lm *LocalMapper) TagSets() []string { + return tagSetCursors(lm.cursors).Keys() +} + +// Fields returns any SELECT fields. If this Mapper is not processing a SELECT query +// then an empty slice is returned. +func (lm *LocalMapper) Fields() []string { + return append(lm.selectFields, lm.selectTags...) } // Close closes the mapper. -func (am *AggMapper) Close() { - if am != nil && am.tx != nil { - _ = am.tx.Rollback() +func (lm *LocalMapper) Close() { + if lm != nil && lm.tx != nil { + _ = lm.tx.Rollback() } } @@ -496,6 +594,42 @@ func (a *aggTagSetCursor) Next() (time int64, value interface{}) { return a.nextFunc() } +type pointHeapItem struct { + timestamp int64 + value []byte + cursor *seriesCursor // cursor whence pointHeapItem came +} + +type pointHeap []*pointHeapItem + +func newPointHeap() *pointHeap { + q := make(pointHeap, 0) + heap.Init(&q) + return &q +} + +func (pq pointHeap) Len() int { return len(pq) } + +func (pq pointHeap) Less(i, j int) bool { + // We want a min-heap (points in chronological order), so use less than. + return pq[i].timestamp < pq[j].timestamp +} + +func (pq pointHeap) Swap(i, j int) { pq[i], pq[j] = pq[j], pq[i] } + +func (pq *pointHeap) Push(x interface{}) { + item := x.(*pointHeapItem) + *pq = append(*pq, item) +} + +func (pq *pointHeap) Pop() interface{} { + old := *pq + n := len(old) + item := old[n-1] + *pq = old[0 : n-1] + return item +} + // tagSetCursor is virtual cursor that iterates over mutiple series cursors, as though it were // a single series. type tagSetCursor struct { @@ -504,11 +638,18 @@ type tagSetCursor struct { cursors []*seriesCursor // Underlying series cursors. decoder *FieldCodec // decoder for the raw data bytes - // Lookahead buffers for the cursors. Performance analysis shows that it is critical - // that these buffers are part of the tagSetCursor type and not part of the the - // cursors type. - keyBuffer []int64 // The current timestamp key for each cursor - valueBuffer [][]byte // The current value for each cursor + // pointHeap is a min-heap, ordered by timestamp, that contains the next + // point from each seriesCursor. Queries sometimes pull points from + // thousands of series. This makes it reasonably efficient to find the + // point with the next lowest timestamp among the thousands of series that + // the query is pulling points from. + // Performance profiling shows that this lookahead needs to be part + // of the tagSetCursor type and not part of the the cursors type. + pointHeap *pointHeap + + // Memomize the cursor's tagset-based key. Profiling shows that calculating this + // is significant CPU cost, and it only needs to be done once. + memokey string } // tagSetCursors represents a sortable slice of tagSetCursors. @@ -529,102 +670,115 @@ func (a tagSetCursors) Keys() []string { // newTagSetCursor returns a tagSetCursor func newTagSetCursor(m string, t map[string]string, c []*seriesCursor, d *FieldCodec) *tagSetCursor { - return &tagSetCursor{ + tsc := &tagSetCursor{ measurement: m, tags: t, cursors: c, decoder: d, - keyBuffer: make([]int64, len(c)), - valueBuffer: make([][]byte, len(c)), + pointHeap: newPointHeap(), } + + return tsc } func (tsc *tagSetCursor) key() string { - return formMeasurementTagSetKey(tsc.measurement, tsc.tags) + if tsc.memokey == "" { + tsc.memokey = formMeasurementTagSetKey(tsc.measurement, tsc.tags) + } + return tsc.memokey } -// Next returns the next matching series-key, timestamp and byte slice for the tagset. Filtering +// Next returns the next matching series-key, timestamp byte slice and meta tags for the tagset. Filtering // is enforced on the values. If there is no matching value, then a nil result is returned. -func (tsc *tagSetCursor) Next(tmin, tmax int64, selectFields, whereFields []string) (int64, interface{}) { +func (tsc *tagSetCursor) Next(tmin, tmax int64, selectFields, whereFields []string) (int64, interface{}, map[string]string) { for { - // Find the next lowest timestamp - min := -1 - minKey := int64(math.MaxInt64) - for i, k := range tsc.keyBuffer { - if k != -1 && (k == tmin) || k < minKey && k >= tmin && k < tmax { - min = i - minKey = k - } + // If we're out of points, we're done. + if tsc.pointHeap.Len() == 0 { + return -1, nil, nil } - // Return if there is no more data for this tagset. - if min == -1 { - return -1, nil - } + // Grab the next point with the lowest timestamp. + p := heap.Pop(tsc.pointHeap).(*pointHeapItem) - // set the current timestamp and seriesID - timestamp := tsc.keyBuffer[min] - - var value interface{} - if len(selectFields) > 1 { - if fieldsWithNames, err := tsc.decoder.DecodeFieldsWithNames(tsc.valueBuffer[min]); err == nil { - value = fieldsWithNames - - // if there's a where clause, make sure we don't need to filter this value - if tsc.cursors[min].filter != nil && !matchesWhere(tsc.cursors[min].filter, fieldsWithNames) { - value = nil - } - } - } else { - // With only 1 field SELECTed, decoding all fields may be avoidable, which is faster. - var err error - value, err = tsc.decoder.DecodeByName(selectFields[0], tsc.valueBuffer[min]) - if err != nil { - value = nil - } else { - // If there's a WHERE clase, see if we need to filter - if tsc.cursors[min].filter != nil { - // See if the WHERE is only on this field or on one or more other fields. - // If the latter, we'll have to decode everything - if len(whereFields) == 1 && whereFields[0] == selectFields[0] { - if !matchesWhere(tsc.cursors[min].filter, map[string]interface{}{selectFields[0]: value}) { - value = nil - } - } else { // Decode everything - fieldsWithNames, err := tsc.decoder.DecodeFieldsWithNames(tsc.valueBuffer[min]) - if err != nil || !matchesWhere(tsc.cursors[min].filter, fieldsWithNames) { - value = nil - } - } - } - } + // We're done if the point is outside the query's time range [tmin:tmax). + if p.timestamp != tmin && (tmin > p.timestamp || p.timestamp >= tmax) { + return -1, nil, nil } // Advance the cursor - nextKey, nextVal := tsc.cursors[min].Next() - tsc.keyBuffer[min] = nextKey - tsc.valueBuffer[min] = nextVal + nextKey, nextVal := p.cursor.Next() + if nextKey != -1 { + nextPoint := &pointHeapItem{ + timestamp: nextKey, + value: nextVal, + cursor: p.cursor, + } + heap.Push(tsc.pointHeap, nextPoint) + } + + // Decode the raw point. + value := tsc.decodeRawPoint(p, selectFields, whereFields) // Value didn't match, look for the next one. if value == nil { continue } - return timestamp, value + return p.timestamp, value, p.cursor.tags } } +// decodeRawPoint decodes raw point data into field names & values and does WHERE filtering. +func (tsc *tagSetCursor) decodeRawPoint(p *pointHeapItem, selectFields, whereFields []string) interface{} { + if len(selectFields) > 1 { + if fieldsWithNames, err := tsc.decoder.DecodeFieldsWithNames(p.value); err == nil { + // if there's a where clause, make sure we don't need to filter this value + if p.cursor.filter != nil && !matchesWhere(p.cursor.filter, fieldsWithNames) { + return nil + } + + return fieldsWithNames + } + } + + // With only 1 field SELECTed, decoding all fields may be avoidable, which is faster. + value, err := tsc.decoder.DecodeByName(selectFields[0], p.value) + if err != nil { + return nil + } + + // If there's a WHERE clase, see if we need to filter + if p.cursor.filter != nil { + // See if the WHERE is only on this field or on one or more other fields. + // If the latter, we'll have to decode everything + if len(whereFields) == 1 && whereFields[0] == selectFields[0] { + if !matchesWhere(p.cursor.filter, map[string]interface{}{selectFields[0]: value}) { + value = nil + } + } else { // Decode everything + fieldsWithNames, err := tsc.decoder.DecodeFieldsWithNames(p.value) + if err != nil || !matchesWhere(p.cursor.filter, fieldsWithNames) { + value = nil + } + } + } + + return value +} + // seriesCursor is a cursor that walks a single series. It provides lookahead functionality. type seriesCursor struct { - cursor *shardCursor // BoltDB cursor for a series + cursor Cursor // BoltDB cursor for a series filter influxql.Expr + tags map[string]string } // newSeriesCursor returns a new instance of a series cursor. -func newSeriesCursor(b *shardCursor, filter influxql.Expr) *seriesCursor { +func newSeriesCursor(cur Cursor, filter influxql.Expr, tags map[string]string) *seriesCursor { return &seriesCursor{ - cursor: b, + cursor: cur, filter: filter, + tags: tags, } } @@ -650,30 +804,6 @@ func (sc *seriesCursor) Next() (key int64, value []byte) { return } -// createCursorForSeries creates a cursor for walking the given series key. The cursor -// consolidates both the Bolt store and any WAL cache. -func createCursorForSeries(tx *bolt.Tx, shard *Shard, key string) *shardCursor { - // Retrieve key bucket. - b := tx.Bucket([]byte(key)) - - // Ignore if there is no bucket or points in the cache. - partitionID := WALPartition([]byte(key)) - if b == nil && len(shard.cache[partitionID][key]) == 0 { - return nil - } - - // Retrieve a copy of the in-cache points for the key. - cache := make([][]byte, len(shard.cache[partitionID][key])) - copy(cache, shard.cache[partitionID][key]) - - // Build a cursor that merges the bucket and cache together. - cur := &shardCursor{cache: cache} - if b != nil { - cur.cursor = b.Cursor() - } - return cur -} - type tagSetsAndFields struct { tagSets []*influxql.TagSet selectFields []string @@ -682,7 +812,7 @@ type tagSetsAndFields struct { } // createTagSetsAndFields returns the tagsets and various fields given a measurement and -// SELECT statement. It also ensures that the fields and tags exist. +// SELECT statement. func createTagSetsAndFields(m *Measurement, stmt *influxql.SelectStatement) (*tagSetsAndFields, error) { _, tagKeys, err := stmt.Dimensions.Normalize() if err != nil { @@ -699,12 +829,17 @@ func createTagSetsAndFields(m *Measurement, stmt *influxql.SelectStatement) (*ta sfs.add(n) continue } - if !m.HasTagKey(n) { - return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n) + if m.HasTagKey(n) { + sts.add(n) } - sts.add(n) - tagKeys = append(tagKeys, n) } + + for _, n := range stmt.NamesInDimension() { + if m.HasTagKey(n) { + tagKeys = append(tagKeys, n) + } + } + for _, n := range stmt.NamesInWhere() { if n == "time" { continue @@ -713,9 +848,6 @@ func createTagSetsAndFields(m *Measurement, stmt *influxql.SelectStatement) (*ta wfs.add(n) continue } - if !m.HasTagKey(n) { - return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n) - } } // Get the sorted unique tag sets for this statement. @@ -744,7 +876,7 @@ func formMeasurementTagSetKey(name string, tags map[string]string) string { if len(tags) == 0 { return name } - return strings.Join([]string{name, string(marshalTags(tags))}, "|") + return strings.Join([]string{name, string(MarshalTags(tags))}, "|") } // btou64 converts an 8-byte slice into an uint64. diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper_test.go index b6796fd95..5fff0e8c5 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/mapper_test.go @@ -1,4 +1,4 @@ -package tsdb +package tsdb_test import ( "encoding/json" @@ -12,92 +12,119 @@ import ( "time" "github.com/influxdb/influxdb/influxql" + "github.com/influxdb/influxdb/tsdb" ) -func TestShardMapper_RawMapperTagSets(t *testing.T) { +func TestShardMapper_RawMapperTagSetsFields(t *testing.T) { tmpDir, _ := ioutil.TempDir("", "shard_test") defer os.RemoveAll(tmpDir) shard := mustCreateShard(tmpDir) pt1time := time.Unix(1, 0).UTC() - pt1 := NewPoint( + pt1 := tsdb.NewPoint( "cpu", map[string]string{"host": "serverA", "region": "us-east"}, - map[string]interface{}{"value": 42}, + map[string]interface{}{"idle": 60}, pt1time, ) pt2time := time.Unix(2, 0).UTC() - pt2 := NewPoint( + pt2 := tsdb.NewPoint( "cpu", map[string]string{"host": "serverB", "region": "us-east"}, - map[string]interface{}{"value": 60}, + map[string]interface{}{"load": 60}, pt2time, ) - err := shard.WritePoints([]Point{pt1, pt2}) + err := shard.WritePoints([]tsdb.Point{pt1, pt2}) if err != nil { t.Fatalf(err.Error()) } var tests = []struct { - stmt string - expected []string + stmt string + expectedTags []string + expectedFields []string }{ { - stmt: `SELECT value FROM cpu`, - expected: []string{"cpu"}, + stmt: `SELECT load FROM cpu`, + expectedTags: []string{"cpu"}, + expectedFields: []string{"load"}, }, { - stmt: `SELECT value FROM cpu GROUP BY host`, - expected: []string{"cpu|host|serverA", "cpu|host|serverB"}, + stmt: `SELECT derivative(load) FROM cpu`, + expectedTags: []string{"cpu"}, + expectedFields: []string{"load"}, }, { - stmt: `SELECT value FROM cpu GROUP BY region`, - expected: []string{"cpu|region|us-east"}, + stmt: `SELECT idle,load FROM cpu`, + expectedTags: []string{"cpu"}, + expectedFields: []string{"idle", "load"}, }, { - stmt: `SELECT value FROM cpu WHERE host='serverA'`, - expected: []string{"cpu"}, + stmt: `SELECT load,idle FROM cpu`, + expectedTags: []string{"cpu"}, + expectedFields: []string{"idle", "load"}, }, { - stmt: `SELECT value FROM cpu WHERE host='serverB'`, - expected: []string{"cpu"}, + stmt: `SELECT load FROM cpu GROUP BY host`, + expectedTags: []string{"cpu|host|serverA", "cpu|host|serverB"}, + expectedFields: []string{"load"}, }, { - stmt: `SELECT value FROM cpu WHERE host='serverC'`, - expected: []string{}, + stmt: `SELECT load FROM cpu GROUP BY region`, + expectedTags: []string{"cpu|region|us-east"}, + expectedFields: []string{"load"}, + }, + { + stmt: `SELECT load FROM cpu WHERE host='serverA'`, + expectedTags: []string{"cpu"}, + expectedFields: []string{"load"}, + }, + { + stmt: `SELECT load FROM cpu WHERE host='serverB'`, + expectedTags: []string{"cpu"}, + expectedFields: []string{"load"}, + }, + { + stmt: `SELECT load FROM cpu WHERE host='serverC'`, + expectedTags: []string{}, + expectedFields: []string{"load"}, }, } for _, tt := range tests { stmt := mustParseSelectStatement(tt.stmt) mapper := openRawMapperOrFail(t, shard, stmt, 0) - got := mapper.TagSets() - if !reflect.DeepEqual(got, tt.expected) { - t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected) + tags := mapper.TagSets() + if !reflect.DeepEqual(tags, tt.expectedTags) { + t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, tags, tt.expectedTags) + } + fields := mapper.Fields() + if !reflect.DeepEqual(fields, tt.expectedFields) { + t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, fields, tt.expectedFields) } } } -func TestShardMapper_WriteAndSingleMapperRawQuery(t *testing.T) { +func TestShardMapper_WriteAndSingleMapperRawQuerySingleValue(t *testing.T) { tmpDir, _ := ioutil.TempDir("", "shard_test") defer os.RemoveAll(tmpDir) shard := mustCreateShard(tmpDir) pt1time := time.Unix(1, 0).UTC() - pt1 := NewPoint( + pt1 := tsdb.NewPoint( "cpu", map[string]string{"host": "serverA", "region": "us-east"}, - map[string]interface{}{"value": 42}, + map[string]interface{}{"load": 42}, pt1time, ) pt2time := time.Unix(2, 0).UTC() - pt2 := NewPoint( + pt2 := tsdb.NewPoint( "cpu", map[string]string{"host": "serverB", "region": "us-east"}, - map[string]interface{}{"value": 60}, + map[string]interface{}{"load": 60}, pt2time, ) - err := shard.WritePoints([]Point{pt1, pt2}) + err := shard.WritePoints([]tsdb.Point{pt1, pt2}) if err != nil { t.Fatalf(err.Error()) } @@ -108,62 +135,65 @@ func TestShardMapper_WriteAndSingleMapperRawQuery(t *testing.T) { expected []string }{ { - stmt: `SELECT value FROM cpu`, - expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42},{"time":2000000000,"value":60}]}`, `null`}, + stmt: `SELECT load FROM cpu`, + expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`, `null`}, }, { - stmt: `SELECT value FROM cpu`, + stmt: `SELECT load FROM cpu # chunkSize 1`, chunkSize: 1, - expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42}]}`, `{"name":"cpu","values":[{"time":2000000000,"value":60}]}`, `null`}, + expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`}, }, { - stmt: `SELECT value FROM cpu`, + stmt: `SELECT load FROM cpu # chunkSize 2`, chunkSize: 2, - expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42},{"time":2000000000,"value":60}]}`}, + expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`}, }, { - stmt: `SELECT value FROM cpu`, + stmt: `SELECT load FROM cpu # chunkSize 3`, chunkSize: 3, - expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42},{"time":2000000000,"value":60}]}`}, + expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`}, }, { - stmt: `SELECT value FROM cpu GROUP BY host`, - expected: []string{`{"name":"cpu","tags":{"host":"serverA"},"values":[{"time":1000000000,"value":42}]}`, `{"name":"cpu","tags":{"host":"serverB"},"values":[{"time":2000000000,"value":60}]}`, `null`}, + stmt: `SELECT load FROM cpu GROUP BY host`, + expected: []string{ + `{"name":"cpu","tags":{"host":"serverA"},"fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`, + `{"name":"cpu","tags":{"host":"serverB"},"fields":["load"],"values":[{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`, + }, }, { - stmt: `SELECT value FROM cpu GROUP BY region`, - expected: []string{`{"name":"cpu","tags":{"region":"us-east"},"values":[{"time":1000000000,"value":42},{"time":2000000000,"value":60}]}`, `null`}, + stmt: `SELECT load FROM cpu GROUP BY region`, + expected: []string{`{"name":"cpu","tags":{"region":"us-east"},"fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`}, }, { - stmt: `SELECT value FROM cpu WHERE host='serverA'`, - expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42}]}`, `null`}, + stmt: `SELECT load FROM cpu WHERE host='serverA'`, + expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`}, }, { - stmt: `SELECT value FROM cpu WHERE host='serverB'`, - expected: []string{`{"name":"cpu","values":[{"time":2000000000,"value":60}]}`, `null`}, + stmt: `SELECT load FROM cpu WHERE host='serverB'`, + expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`}, }, { - stmt: `SELECT value FROM cpu WHERE host='serverC'`, + stmt: `SELECT load FROM cpu WHERE host='serverC'`, expected: []string{`null`}, }, { - stmt: `SELECT value FROM cpu WHERE value = 60`, - expected: []string{`{"name":"cpu","values":[{"time":2000000000,"value":60}]}`, `null`}, + stmt: `SELECT load FROM cpu WHERE load = 60`, + expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`}, }, { - stmt: `SELECT value FROM cpu WHERE value != 60`, - expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42}]}`, `null`}, + stmt: `SELECT load FROM cpu WHERE load != 60`, + expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`}, }, { - stmt: fmt.Sprintf(`SELECT value FROM cpu WHERE time = '%s'`, pt1time.Format(influxql.DateTimeFormat)), - expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42}]}`, `null`}, + stmt: fmt.Sprintf(`SELECT load FROM cpu WHERE time = '%s'`, pt1time.Format(influxql.DateTimeFormat)), + expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`}, }, { - stmt: fmt.Sprintf(`SELECT value FROM cpu WHERE time > '%s'`, pt1time.Format(influxql.DateTimeFormat)), - expected: []string{`{"name":"cpu","values":[{"time":2000000000,"value":60}]}`, `null`}, + stmt: fmt.Sprintf(`SELECT load FROM cpu WHERE time > '%s'`, pt1time.Format(influxql.DateTimeFormat)), + expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`}, }, { - stmt: fmt.Sprintf(`SELECT value FROM cpu WHERE time > '%s'`, pt2time.Format(influxql.DateTimeFormat)), + stmt: fmt.Sprintf(`SELECT load FROM cpu WHERE time > '%s'`, pt2time.Format(influxql.DateTimeFormat)), expected: []string{`null`}, }, } @@ -188,20 +218,20 @@ func TestShardMapper_WriteAndSingleMapperRawQueryMultiValue(t *testing.T) { shard := mustCreateShard(tmpDir) pt1time := time.Unix(1, 0).UTC() - pt1 := NewPoint( + pt1 := tsdb.NewPoint( "cpu", map[string]string{"host": "serverA", "region": "us-east"}, map[string]interface{}{"foo": 42, "bar": 43}, pt1time, ) pt2time := time.Unix(2, 0).UTC() - pt2 := NewPoint( + pt2 := tsdb.NewPoint( "cpu", map[string]string{"host": "serverB", "region": "us-east"}, map[string]interface{}{"foo": 60, "bar": 61}, pt2time, ) - err := shard.WritePoints([]Point{pt1, pt2}) + err := shard.WritePoints([]tsdb.Point{pt1, pt2}) if err != nil { t.Fatalf(err.Error()) } @@ -213,11 +243,11 @@ func TestShardMapper_WriteAndSingleMapperRawQueryMultiValue(t *testing.T) { }{ { stmt: `SELECT foo FROM cpu`, - expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42},{"time":2000000000,"value":60}]}`, `null`}, + expected: []string{`{"name":"cpu","fields":["foo"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`}, }, { stmt: `SELECT foo,bar FROM cpu`, - expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":{"bar":43,"foo":42}},{"time":2000000000,"value":{"bar":61,"foo":60}}]}`, `null`}, + expected: []string{`{"name":"cpu","fields":["bar","foo"],"values":[{"time":1000000000,"value":{"bar":43,"foo":42},"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":{"bar":61,"foo":60},"tags":{"host":"serverB","region":"us-east"}}]}`}, }, } @@ -225,10 +255,75 @@ func TestShardMapper_WriteAndSingleMapperRawQueryMultiValue(t *testing.T) { stmt := mustParseSelectStatement(tt.stmt) mapper := openRawMapperOrFail(t, shard, stmt, tt.chunkSize) - for _, s := range tt.expected { + for i, s := range tt.expected { got := nextRawChunkAsJson(t, mapper) if got != s { - t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected) + t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected[i]) + break + } + } + } +} + +func TestShardMapper_WriteAndSingleMapperRawQueryMultiSource(t *testing.T) { + tmpDir, _ := ioutil.TempDir("", "shard_test") + defer os.RemoveAll(tmpDir) + shard := mustCreateShard(tmpDir) + + pt1time := time.Unix(1, 0).UTC() + pt1 := tsdb.NewPoint( + "cpu0", + map[string]string{"host": "serverA", "region": "us-east"}, + map[string]interface{}{"foo": 42}, + pt1time, + ) + pt2time := time.Unix(2, 0).UTC() + pt2 := tsdb.NewPoint( + "cpu1", + map[string]string{"host": "serverB", "region": "us-east"}, + map[string]interface{}{"bar": 60}, + pt2time, + ) + err := shard.WritePoints([]tsdb.Point{pt1, pt2}) + if err != nil { + t.Fatalf(err.Error()) + } + + var tests = []struct { + stmt string + chunkSize int + expected []string + }{ + { + stmt: `SELECT foo FROM cpu0,cpu1`, + expected: []string{`{"name":"cpu0","fields":["foo"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`}, + }, + { + stmt: `SELECT foo FROM cpu0,cpu1 WHERE foo=42`, + expected: []string{`{"name":"cpu0","fields":["foo"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`}, + }, + { + stmt: `SELECT bar FROM cpu0,cpu1`, + expected: []string{`{"name":"cpu1","fields":["bar"],"values":[{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`}, + }, + { + stmt: `SELECT bar FROM cpu0,cpu1 WHERE foo=42`, + expected: []string{`null`}, + }, + { + stmt: `SELECT bar FROM cpu0,cpu1 WHERE bar!=60`, + expected: []string{`null`}, + }, + } + + for _, tt := range tests { + stmt := mustParseSelectStatement(tt.stmt) + mapper := openRawMapperOrFail(t, shard, stmt, tt.chunkSize) + + for i, s := range tt.expected { + got := nextRawChunkAsJson(t, mapper) + if got != s { + t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected[i]) break } } @@ -241,20 +336,20 @@ func TestShardMapper_WriteAndSingleMapperAggregateQuery(t *testing.T) { shard := mustCreateShard(tmpDir) pt1time := time.Unix(10, 0).UTC() - pt1 := NewPoint( + pt1 := tsdb.NewPoint( "cpu", map[string]string{"host": "serverA", "region": "us-east"}, map[string]interface{}{"value": 1}, pt1time, ) pt2time := time.Unix(20, 0).UTC() - pt2 := NewPoint( + pt2 := tsdb.NewPoint( "cpu", map[string]string{"host": "serverB", "region": "us-east"}, map[string]interface{}{"value": 60}, pt2time, ) - err := shard.WritePoints([]Point{pt1, pt2}) + err := shard.WritePoints([]tsdb.Point{pt1, pt2}) if err != nil { t.Fatalf(err.Error()) } @@ -265,92 +360,92 @@ func TestShardMapper_WriteAndSingleMapperAggregateQuery(t *testing.T) { }{ { stmt: `SELECT sum(value) FROM cpu`, - expected: []string{`{"name":"cpu","values":[{"value":[61]}]}`, `null`}, + expected: []string{`{"name":"cpu","fields":["value"],"values":[{"value":[61]}]}`, `null`}, }, { stmt: `SELECT sum(value),mean(value) FROM cpu`, - expected: []string{`{"name":"cpu","values":[{"value":[61,{"Count":2,"Mean":30.5,"ResultType":1}]}]}`, `null`}, + expected: []string{`{"name":"cpu","fields":["value"],"values":[{"value":[61,{"Count":2,"Mean":30.5,"ResultType":1}]}]}`, `null`}, }, { stmt: `SELECT sum(value) FROM cpu GROUP BY host`, expected: []string{ - `{"name":"cpu","tags":{"host":"serverA"},"values":[{"value":[1]}]}`, - `{"name":"cpu","tags":{"host":"serverB"},"values":[{"value":[60]}]}`, + `{"name":"cpu","tags":{"host":"serverA"},"fields":["value"],"values":[{"value":[1]}]}`, + `{"name":"cpu","tags":{"host":"serverB"},"fields":["value"],"values":[{"value":[60]}]}`, `null`}, }, { stmt: `SELECT sum(value) FROM cpu GROUP BY region`, expected: []string{ - `{"name":"cpu","tags":{"region":"us-east"},"values":[{"value":[61]}]}`, + `{"name":"cpu","tags":{"region":"us-east"},"fields":["value"],"values":[{"value":[61]}]}`, `null`}, }, { stmt: `SELECT sum(value) FROM cpu GROUP BY region,host`, expected: []string{ - `{"name":"cpu","tags":{"host":"serverA","region":"us-east"},"values":[{"value":[1]}]}`, - `{"name":"cpu","tags":{"host":"serverB","region":"us-east"},"values":[{"value":[60]}]}`, + `{"name":"cpu","tags":{"host":"serverA","region":"us-east"},"fields":["value"],"values":[{"value":[1]}]}`, + `{"name":"cpu","tags":{"host":"serverB","region":"us-east"},"fields":["value"],"values":[{"value":[60]}]}`, `null`}, }, { stmt: `SELECT sum(value) FROM cpu WHERE host='serverB'`, expected: []string{ - `{"name":"cpu","values":[{"value":[60]}]}`, + `{"name":"cpu","fields":["value"],"values":[{"value":[60]}]}`, `null`}, }, { stmt: fmt.Sprintf(`SELECT sum(value) FROM cpu WHERE time = '%s'`, pt1time.Format(influxql.DateTimeFormat)), expected: []string{ - `{"name":"cpu","values":[{"time":10000000000,"value":[1]}]}`, + `{"name":"cpu","fields":["value"],"values":[{"time":10000000000,"value":[1]}]}`, `null`}, }, { stmt: fmt.Sprintf(`SELECT sum(value) FROM cpu WHERE time > '%s'`, pt1time.Format(influxql.DateTimeFormat)), expected: []string{ - `{"name":"cpu","values":[{"time":10000000001,"value":[60]}]}`, + `{"name":"cpu","fields":["value"],"values":[{"time":10000000001,"value":[60]}]}`, `null`}, }, { stmt: fmt.Sprintf(`SELECT sum(value) FROM cpu WHERE time > '%s'`, pt2time.Format(influxql.DateTimeFormat)), expected: []string{ - `{"name":"cpu","values":[{"time":20000000001,"value":[null]}]}`, + `{"name":"cpu","fields":["value"],"values":[{"time":20000000001,"value":[null]}]}`, `null`}, }, } for _, tt := range tests { stmt := mustParseSelectStatement(tt.stmt) - mapper := openAggMapperOrFail(t, shard, stmt) + mapper := openLocalMapperOrFail(t, shard, stmt) for i := range tt.expected { got := aggIntervalAsJson(t, mapper) if got != tt.expected[i] { - t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected[i]) + t.Fatalf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected[i]) break } } } } -func TestShardMapper_AggMapperTagSets(t *testing.T) { +func TestShardMapper_LocalMapperTagSets(t *testing.T) { tmpDir, _ := ioutil.TempDir("", "shard_test") defer os.RemoveAll(tmpDir) shard := mustCreateShard(tmpDir) pt1time := time.Unix(1, 0).UTC() - pt1 := NewPoint( + pt1 := tsdb.NewPoint( "cpu", map[string]string{"host": "serverA", "region": "us-east"}, map[string]interface{}{"value": 42}, pt1time, ) pt2time := time.Unix(2, 0).UTC() - pt2 := NewPoint( + pt2 := tsdb.NewPoint( "cpu", map[string]string{"host": "serverB", "region": "us-east"}, map[string]interface{}{"value": 60}, pt2time, ) - err := shard.WritePoints([]Point{pt1, pt2}) + err := shard.WritePoints([]tsdb.Point{pt1, pt2}) if err != nil { t.Fatalf(err.Error()) } @@ -387,7 +482,7 @@ func TestShardMapper_AggMapperTagSets(t *testing.T) { for _, tt := range tests { stmt := mustParseSelectStatement(tt.stmt) - mapper := openAggMapperOrFail(t, shard, stmt) + mapper := openLocalMapperOrFail(t, shard, stmt) got := mapper.TagSets() if !reflect.DeepEqual(got, tt.expected) { t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected) @@ -396,10 +491,10 @@ func TestShardMapper_AggMapperTagSets(t *testing.T) { } -func mustCreateShard(dir string) *Shard { +func mustCreateShard(dir string) *tsdb.Shard { tmpShard := path.Join(dir, "shard") - index := NewDatabaseIndex() - sh := NewShard(index, tmpShard) + index := tsdb.NewDatabaseIndex() + sh := tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions()) if err := sh.Open(); err != nil { panic(fmt.Sprintf("error opening shard: %s", err.Error())) } @@ -415,8 +510,8 @@ func mustParseSelectStatement(s string) *influxql.SelectStatement { return stmt.(*influxql.SelectStatement) } -func openRawMapperOrFail(t *testing.T, shard *Shard, stmt *influxql.SelectStatement, chunkSize int) *RawMapper { - mapper := NewRawMapper(shard, stmt, chunkSize) +func openRawMapperOrFail(t *testing.T, shard *tsdb.Shard, stmt *influxql.SelectStatement, chunkSize int) tsdb.Mapper { + mapper := tsdb.NewLocalMapper(shard, stmt, chunkSize) if err := mapper.Open(); err != nil { t.Fatalf("failed to open raw mapper: %s", err.Error()) @@ -424,7 +519,7 @@ func openRawMapperOrFail(t *testing.T, shard *Shard, stmt *influxql.SelectStatem return mapper } -func nextRawChunkAsJson(t *testing.T, mapper *RawMapper) string { +func nextRawChunkAsJson(t *testing.T, mapper tsdb.Mapper) string { r, err := mapper.NextChunk() if err != nil { t.Fatalf("failed to get next chunk from mapper: %s", err.Error()) @@ -436,8 +531,8 @@ func nextRawChunkAsJson(t *testing.T, mapper *RawMapper) string { return string(b) } -func openAggMapperOrFail(t *testing.T, shard *Shard, stmt *influxql.SelectStatement) *AggMapper { - mapper := NewAggMapper(shard, stmt) +func openLocalMapperOrFail(t *testing.T, shard *tsdb.Shard, stmt *influxql.SelectStatement) *tsdb.LocalMapper { + mapper := tsdb.NewLocalMapper(shard, stmt, 0) if err := mapper.Open(); err != nil { t.Fatalf("failed to open aggregate mapper: %s", err.Error()) @@ -445,7 +540,7 @@ func openAggMapperOrFail(t *testing.T, shard *Shard, stmt *influxql.SelectStatem return mapper } -func aggIntervalAsJson(t *testing.T, mapper *AggMapper) string { +func aggIntervalAsJson(t *testing.T, mapper *tsdb.LocalMapper) string { r, err := mapper.NextChunk() if err != nil { t.Fatalf("failed to get chunk from aggregate mapper: %s", err.Error()) diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta.go index 07c17f9b7..3d27dbb99 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta.go @@ -39,6 +39,27 @@ func NewDatabaseIndex() *DatabaseIndex { } } +// Names returns a sorted list of measurement names. +func (d *DatabaseIndex) Names() []string { + d.mu.RLock() + defer d.mu.RUnlock() + return d.names +} + +// Series returns a series by key. +func (d *DatabaseIndex) Series(key string) *Series { + d.mu.RLock() + defer d.mu.RUnlock() + return d.series[key] +} + +// SeriesN returns the number of series. +func (d *DatabaseIndex) SeriesN() int { + d.mu.RLock() + defer d.mu.RUnlock() + return len(d.series) +} + // Measurement returns the measurement object from the index by the name func (d *DatabaseIndex) Measurement(name string) *Measurement { d.mu.RLock() @@ -55,8 +76,8 @@ func (d *DatabaseIndex) MeasurementSeriesCounts() (nMeasurements int, nSeries in return } -// createSeriesIndexIfNotExists adds the series for the given measurement to the index and sets its ID or returns the existing series object -func (s *DatabaseIndex) createSeriesIndexIfNotExists(measurementName string, series *Series) *Series { +// CreateSeriesIndexIfNotExists adds the series for the given measurement to the index and sets its ID or returns the existing series object +func (s *DatabaseIndex) CreateSeriesIndexIfNotExists(measurementName string, series *Series) *Series { // if there is a measurement for this id, it's already been added ss := s.series[series.Key] if ss != nil { @@ -64,7 +85,7 @@ func (s *DatabaseIndex) createSeriesIndexIfNotExists(measurementName string, ser } // get or create the measurement index - m := s.createMeasurementIndexIfNotExists(measurementName) + m := s.CreateMeasurementIndexIfNotExists(measurementName) // set the in memory ID for query processing on this shard series.id = s.lastID + 1 @@ -78,8 +99,8 @@ func (s *DatabaseIndex) createSeriesIndexIfNotExists(measurementName string, ser return series } -// createMeasurementIndexIfNotExists creates or retrieves an in memory index object for the measurement -func (s *DatabaseIndex) createMeasurementIndexIfNotExists(name string) *Measurement { +// CreateMeasurementIndexIfNotExists creates or retrieves an in memory index object for the measurement +func (s *DatabaseIndex) CreateMeasurementIndexIfNotExists(name string) *Measurement { name = unescapeString(name) m := s.measurements[name] if m == nil { @@ -263,6 +284,7 @@ func (db *DatabaseIndex) DropSeries(keys []string) { continue } series.measurement.DropSeries(series.id) + delete(db.series, k) } } @@ -276,11 +298,10 @@ type Measurement struct { index *DatabaseIndex // in-memory index fields - series map[string]*Series // sorted tagset string to the series object seriesByID map[uint64]*Series // lookup table for series by their id measurement *Measurement - seriesByTagKeyValue map[string]map[string]seriesIDs // map from tag key to value to sorted set of series ids - seriesIDs seriesIDs // sorted list of series IDs in this measurement + seriesByTagKeyValue map[string]map[string]SeriesIDs // map from tag key to value to sorted set of series ids + seriesIDs SeriesIDs // sorted list of series IDs in this measurement } // NewMeasurement allocates and initializes a new Measurement. @@ -290,10 +311,9 @@ func NewMeasurement(name string, idx *DatabaseIndex) *Measurement { fieldNames: make(map[string]struct{}), index: idx, - series: make(map[string]*Series), seriesByID: make(map[uint64]*Series), - seriesByTagKeyValue: make(map[string]map[string]seriesIDs), - seriesIDs: make(seriesIDs, 0), + seriesByTagKeyValue: make(map[string]map[string]SeriesIDs), + seriesIDs: make(SeriesIDs, 0), } } @@ -305,6 +325,13 @@ func (m *Measurement) HasField(name string) bool { return hasField } +// SeriesByID returns a series by identifier. +func (m *Measurement) SeriesByID(id uint64) *Series { + m.mu.RLock() + defer m.mu.RUnlock() + return m.seriesByID[id] +} + // SeriesKeys returns the keys of every series in this measurement func (m *Measurement) SeriesKeys() []string { m.mu.RLock() @@ -321,7 +348,7 @@ func (m *Measurement) ValidateGroupBy(stmt *influxql.SelectStatement) error { for _, d := range stmt.Dimensions { switch e := d.Expr.(type) { case *influxql.VarRef: - if !m.HasTagKey(e.Val) { + if m.HasField(e.Val) { return fmt.Errorf("can not use field in GROUP BY clause: %s", e.Val) } } @@ -353,8 +380,6 @@ func (m *Measurement) AddSeries(s *Series) bool { return false } m.seriesByID[s.id] = s - tagset := string(marshalTags(s.Tags)) - m.series[tagset] = s m.seriesIDs = append(m.seriesIDs, s.id) // the series ID should always be higher than all others because it's a new @@ -367,7 +392,7 @@ func (m *Measurement) AddSeries(s *Series) bool { for k, v := range s.Tags { valueMap := m.seriesByTagKeyValue[k] if valueMap == nil { - valueMap = make(map[string]seriesIDs) + valueMap = make(map[string]SeriesIDs) m.seriesByTagKeyValue[k] = valueMap } ids := valueMap[v] @@ -392,10 +417,6 @@ func (m *Measurement) DropSeries(seriesID uint64) { if _, ok := m.seriesByID[seriesID]; !ok { return } - s := m.seriesByID[seriesID] - tagset := string(marshalTags(s.Tags)) - - delete(m.series, tagset) delete(m.seriesByID, seriesID) var ids []uint64 @@ -407,7 +428,7 @@ func (m *Measurement) DropSeries(seriesID uint64) { m.seriesIDs = ids // remove this series id to the tag index on the measurement - // s.seriesByTagKeyValue is defined as map[string]map[string]seriesIDs + // s.seriesByTagKeyValue is defined as map[string]map[string]SeriesIDs for k, v := range m.seriesByTagKeyValue { values := v for kk, vv := range values { @@ -497,7 +518,7 @@ func (m *Measurement) TagSets(stmt *influxql.SelectStatement, dimensions []strin // Convert the TagSet to a string, so it can be added to a map allowing TagSets to be handled // as a set. - tagsAsKey := string(marshalTags(tags)) + tagsAsKey := string(MarshalTags(tags)) tagSet, ok := tagSets[tagsAsKey] if !ok { // This TagSet is new, create a new entry for it. @@ -507,7 +528,7 @@ func (m *Measurement) TagSets(stmt *influxql.SelectStatement, dimensions []strin tagsForSet[k] = v } tagSet.Tags = tagsForSet - tagSet.Key = marshalTags(tagsForSet) + tagSet.Key = MarshalTags(tagsForSet) } // Associate the series and filter with the Tagset. @@ -534,11 +555,11 @@ func (m *Measurement) TagSets(stmt *influxql.SelectStatement, dimensions []strin } // mergeSeriesFilters merges two sets of filter expressions and culls series IDs. -func mergeSeriesFilters(op influxql.Token, ids seriesIDs, lfilters, rfilters map[uint64]influxql.Expr) (seriesIDs, map[uint64]influxql.Expr) { +func mergeSeriesFilters(op influxql.Token, ids SeriesIDs, lfilters, rfilters map[uint64]influxql.Expr) (SeriesIDs, map[uint64]influxql.Expr) { // Create a map to hold the final set of series filter expressions. filters := make(map[uint64]influxql.Expr, 0) // Resulting list of series IDs - var series seriesIDs + var series SeriesIDs // Combining logic: // +==========+==========+==========+=======================+=======================+ @@ -603,7 +624,7 @@ func mergeSeriesFilters(op influxql.Token, ids seriesIDs, lfilters, rfilters map // idsForExpr will return a collection of series ids and a filter expression that should // be used to filter points from those series. -func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, influxql.Expr, error) { +func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (SeriesIDs, influxql.Expr, error) { name, ok := n.LHS.(*influxql.VarRef) value := n.RHS if !ok { @@ -632,20 +653,20 @@ func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, influxql.Ex // if we're looking for series with a specific tag value if str, ok := value.(*influxql.StringLiteral); ok { - var ids seriesIDs + var ids SeriesIDs if n.Op == influxql.EQ { // return series that have a tag of specific value. ids = tagVals[str.Val] } else if n.Op == influxql.NEQ { - ids = m.seriesIDs.reject(tagVals[str.Val]) + ids = m.seriesIDs.Reject(tagVals[str.Val]) } return ids, &influxql.BooleanLiteral{Val: true}, nil } // if we're looking for series with a tag value that matches a regex if re, ok := value.(*influxql.RegexLiteral); ok { - var ids seriesIDs + var ids SeriesIDs // The operation is a NEQREGEX, code must start by assuming all match, even // series without any tags. @@ -657,9 +678,9 @@ func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, influxql.Ex match := re.Val.MatchString(k) if match && n.Op == influxql.EQREGEX { - ids = ids.union(tagVals[k]) + ids = ids.Union(tagVals[k]) } else if match && n.Op == influxql.NEQREGEX { - ids = ids.reject(tagVals[k]) + ids = ids.Reject(tagVals[k]) } } return ids, &influxql.BooleanLiteral{Val: true}, nil @@ -671,7 +692,7 @@ func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, influxql.Ex // walkWhereForSeriesIds recursively walks the WHERE clause and returns an ordered set of series IDs and // a map from those series IDs to filter expressions that should be used to limit points returned in // the final query result. -func (m *Measurement) walkWhereForSeriesIds(expr influxql.Expr) (seriesIDs, map[uint64]influxql.Expr, error) { +func (m *Measurement) walkWhereForSeriesIds(expr influxql.Expr) (SeriesIDs, map[uint64]influxql.Expr, error) { switch n := expr.(type) { case *influxql.BinaryExpr: switch n.Op { @@ -702,12 +723,12 @@ func (m *Measurement) walkWhereForSeriesIds(expr influxql.Expr) (seriesIDs, map[ } // Combine the series IDs from the LHS and RHS. - var ids seriesIDs + var ids SeriesIDs switch n.Op { case influxql.AND: - ids = lids.intersect(rids) + ids = lids.Intersect(rids) case influxql.OR: - ids = lids.union(rids) + ids = lids.Union(rids) } // Merge the filter expressions for the LHS and RHS. @@ -785,7 +806,7 @@ func expandExprWithValues(expr influxql.Expr, keys []string, tagExprs []tagExpr, // seriesIDsAllOrByExpr walks an expressions for matching series IDs // or, if no expressions is given, returns all series IDs for the measurement. -func (m *Measurement) seriesIDsAllOrByExpr(expr influxql.Expr) (seriesIDs, error) { +func (m *Measurement) seriesIDsAllOrByExpr(expr influxql.Expr) (SeriesIDs, error) { // If no expression given or the measurement has no series, // we can take just return the ids or nil accordingly. if expr == nil { @@ -997,16 +1018,16 @@ func (s *Series) match(tags map[string]string) bool { return true } -// seriesIDs is a convenience type for sorting, checking equality, and doing +// SeriesIDs is a convenience type for sorting, checking equality, and doing // union and intersection of collections of series ids. -type seriesIDs []uint64 +type SeriesIDs []uint64 -func (a seriesIDs) Len() int { return len(a) } -func (a seriesIDs) Less(i, j int) bool { return a[i] < a[j] } -func (a seriesIDs) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a SeriesIDs) Len() int { return len(a) } +func (a SeriesIDs) Less(i, j int) bool { return a[i] < a[j] } +func (a SeriesIDs) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -// equals assumes that both are sorted. -func (a seriesIDs) equals(other seriesIDs) bool { +// Equals assumes that both are sorted. +func (a SeriesIDs) Equals(other SeriesIDs) bool { if len(a) != len(other) { return false } @@ -1018,9 +1039,9 @@ func (a seriesIDs) equals(other seriesIDs) bool { return true } -// intersect returns a new collection of series ids in sorted order that is the intersection of the two. +// Intersect returns a new collection of series ids in sorted order that is the intersection of the two. // The two collections must already be sorted. -func (a seriesIDs) intersect(other seriesIDs) seriesIDs { +func (a SeriesIDs) Intersect(other SeriesIDs) SeriesIDs { l := a r := other @@ -1047,12 +1068,12 @@ func (a seriesIDs) intersect(other seriesIDs) seriesIDs { } } - return seriesIDs(ids) + return SeriesIDs(ids) } -// union returns a new collection of series ids in sorted order that is the union of the two. +// Union returns a new collection of series ids in sorted order that is the union of the two. // The two collections must already be sorted. -func (a seriesIDs) union(other seriesIDs) seriesIDs { +func (a SeriesIDs) Union(other SeriesIDs) SeriesIDs { l := a r := other ids := make([]uint64, 0, len(l)+len(r)) @@ -1081,9 +1102,9 @@ func (a seriesIDs) union(other seriesIDs) seriesIDs { return ids } -// reject returns a new collection of series ids in sorted order with the passed in set removed from the original. +// Reject returns a new collection of series ids in sorted order with the passed in set removed from the original. // This is useful for the NOT operator. The two collections must already be sorted. -func (a seriesIDs) reject(other seriesIDs) seriesIDs { +func (a SeriesIDs) Reject(other SeriesIDs) SeriesIDs { l := a r := other var i, j int @@ -1106,7 +1127,7 @@ func (a seriesIDs) reject(other seriesIDs) seriesIDs { ids = append(ids, l[i:]...) } - return seriesIDs(ids) + return SeriesIDs(ids) } // TagFilter represents a tag filter when looking up other tags or measurements. @@ -1118,7 +1139,7 @@ type TagFilter struct { } // used to convert the tag set to bytes for use as a lookup key -func marshalTags(tags map[string]string) []byte { +func MarshalTags(tags map[string]string) []byte { // Empty maps marshal to empty bytes. if len(tags) == 0 { return nil @@ -1169,6 +1190,13 @@ func (m *Measurement) TagKeys() []string { return keys } +// SetFieldName adds the field name to the measurement. +func (m *Measurement) SetFieldName(name string) { + m.mu.Lock() + m.fieldNames[name] = struct{}{} + m.mu.Unlock() +} + // FieldNames returns a list of the measurement's field names func (m *Measurement) FieldNames() (a []string) { m.mu.RLock() @@ -1180,7 +1208,7 @@ func (m *Measurement) FieldNames() (a []string) { return } -func (m *Measurement) tagValuesByKeyAndSeriesID(tagKeys []string, ids seriesIDs) map[string]stringSet { +func (m *Measurement) tagValuesByKeyAndSeriesID(tagKeys []string, ids SeriesIDs) map[string]stringSet { // If no tag keys were passed, get all tag keys for the measurement. if len(tagKeys) == 0 { for k := range m.seriesByTagKeyValue { @@ -1221,9 +1249,11 @@ func newStringSet() stringSet { return make(map[string]struct{}) } -// add adds a string to the set. -func (s stringSet) add(ss string) { - s[ss] = struct{}{} +// add adds strings to the set. +func (s stringSet) add(ss ...string) { + for _, n := range ss { + s[n] = struct{}{} + } } // contains returns whether the set contains the given string. @@ -1270,7 +1300,7 @@ func (s stringSet) intersect(o stringSet) stringSet { return ns } -func measurementFromSeriesKey(key string) string { +func MeasurementFromSeriesKey(key string) string { idx := strings.Index(key, ",") if idx == -1 { return key diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta_test.go index 167f14090..dac931bcb 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/meta_test.go @@ -1,4 +1,4 @@ -package tsdb +package tsdb_test import ( "bytes" @@ -6,86 +6,87 @@ import ( "testing" "github.com/influxdb/influxdb/influxql" + "github.com/influxdb/influxdb/tsdb" ) -// Test comparing seriesIDs for equality. -func Test_seriesIDs_equals(t *testing.T) { - ids1 := seriesIDs{1, 2, 3} - ids2 := seriesIDs{1, 2, 3} - ids3 := seriesIDs{4, 5, 6} +// Test comparing SeriesIDs for equality. +func Test_SeriesIDs_Equals(t *testing.T) { + ids1 := tsdb.SeriesIDs{1, 2, 3} + ids2 := tsdb.SeriesIDs{1, 2, 3} + ids3 := tsdb.SeriesIDs{4, 5, 6} - if !ids1.equals(ids2) { + if !ids1.Equals(ids2) { t.Fatal("expected ids1 == ids2") - } else if ids1.equals(ids3) { + } else if ids1.Equals(ids3) { t.Fatal("expected ids1 != ids3") } } -// Test intersecting sets of seriesIDs. -func Test_seriesIDs_intersect(t *testing.T) { +// Test intersecting sets of SeriesIDs. +func Test_SeriesIDs_Intersect(t *testing.T) { // Test swaping l & r, all branches of if-else, and exit loop when 'j < len(r)' - ids1 := seriesIDs{1, 3, 4, 5, 6} - ids2 := seriesIDs{1, 2, 3, 7} - exp := seriesIDs{1, 3} - got := ids1.intersect(ids2) + ids1 := tsdb.SeriesIDs{1, 3, 4, 5, 6} + ids2 := tsdb.SeriesIDs{1, 2, 3, 7} + exp := tsdb.SeriesIDs{1, 3} + got := ids1.Intersect(ids2) - if !exp.equals(got) { + if !exp.Equals(got) { t.Fatalf("exp=%v, got=%v", exp, got) } // Test exit for loop when 'i < len(l)' - ids1 = seriesIDs{1} - ids2 = seriesIDs{1, 2} - exp = seriesIDs{1} - got = ids1.intersect(ids2) + ids1 = tsdb.SeriesIDs{1} + ids2 = tsdb.SeriesIDs{1, 2} + exp = tsdb.SeriesIDs{1} + got = ids1.Intersect(ids2) - if !exp.equals(got) { + if !exp.Equals(got) { t.Fatalf("exp=%v, got=%v", exp, got) } } -// Test union sets of seriesIDs. -func Test_seriesIDs_union(t *testing.T) { +// Test union sets of SeriesIDs. +func Test_SeriesIDs_Union(t *testing.T) { // Test all branches of if-else, exit loop because of 'j < len(r)', and append remainder from left. - ids1 := seriesIDs{1, 2, 3, 7} - ids2 := seriesIDs{1, 3, 4, 5, 6} - exp := seriesIDs{1, 2, 3, 4, 5, 6, 7} - got := ids1.union(ids2) + ids1 := tsdb.SeriesIDs{1, 2, 3, 7} + ids2 := tsdb.SeriesIDs{1, 3, 4, 5, 6} + exp := tsdb.SeriesIDs{1, 2, 3, 4, 5, 6, 7} + got := ids1.Union(ids2) - if !exp.equals(got) { + if !exp.Equals(got) { t.Fatalf("exp=%v, got=%v", exp, got) } // Test exit because of 'i < len(l)' and append remainder from right. - ids1 = seriesIDs{1} - ids2 = seriesIDs{1, 2} - exp = seriesIDs{1, 2} - got = ids1.union(ids2) + ids1 = tsdb.SeriesIDs{1} + ids2 = tsdb.SeriesIDs{1, 2} + exp = tsdb.SeriesIDs{1, 2} + got = ids1.Union(ids2) - if !exp.equals(got) { + if !exp.Equals(got) { t.Fatalf("exp=%v, got=%v", exp, got) } } -// Test removing one set of seriesIDs from another. -func Test_seriesIDs_reject(t *testing.T) { +// Test removing one set of SeriesIDs from another. +func Test_SeriesIDs_Reject(t *testing.T) { // Test all branches of if-else, exit loop because of 'j < len(r)', and append remainder from left. - ids1 := seriesIDs{1, 2, 3, 7} - ids2 := seriesIDs{1, 3, 4, 5, 6} - exp := seriesIDs{2, 7} - got := ids1.reject(ids2) + ids1 := tsdb.SeriesIDs{1, 2, 3, 7} + ids2 := tsdb.SeriesIDs{1, 3, 4, 5, 6} + exp := tsdb.SeriesIDs{2, 7} + got := ids1.Reject(ids2) - if !exp.equals(got) { + if !exp.Equals(got) { t.Fatalf("exp=%v, got=%v", exp, got) } // Test exit because of 'i < len(l)'. - ids1 = seriesIDs{1} - ids2 = seriesIDs{1, 2} - exp = seriesIDs{} - got = ids1.reject(ids2) + ids1 = tsdb.SeriesIDs{1} + ids2 = tsdb.SeriesIDs{1, 2} + exp = tsdb.SeriesIDs{} + got = ids1.Reject(ids2) - if !exp.equals(got) { + if !exp.Equals(got) { t.Fatalf("exp=%v, got=%v", exp, got) } } @@ -113,7 +114,7 @@ func TestMarshalTags(t *testing.T) { result: []byte(`baz|foo|battttt|bar`), }, } { - result := marshalTags(tt.tags) + result := tsdb.MarshalTags(tt.tags) if !bytes.Equal(result, tt.result) { t.Fatalf("%d. unexpected result: exp=%s, got=%s", i, tt.result, result) } @@ -137,7 +138,7 @@ func benchmarkMarshalTags(b *testing.B, keyN int) { // Unmarshal map into byte slice. b.ReportAllocs() for i := 0; i < b.N; i++ { - marshalTags(tags) + tsdb.MarshalTags(tags) } } @@ -154,23 +155,23 @@ func BenchmarkCreateSeriesIndex_1M(b *testing.B) { } func benchmarkCreateSeriesIndex(b *testing.B, series []*TestSeries) { - idxs := make([]*DatabaseIndex, 0, b.N) + idxs := make([]*tsdb.DatabaseIndex, 0, b.N) for i := 0; i < b.N; i++ { - idxs = append(idxs, NewDatabaseIndex()) + idxs = append(idxs, tsdb.NewDatabaseIndex()) } b.ResetTimer() for n := 0; n < b.N; n++ { idx := idxs[n] for _, s := range series { - idx.createSeriesIndexIfNotExists(s.Measurement, s.Series) + idx.CreateSeriesIndexIfNotExists(s.Measurement, s.Series) } } } type TestSeries struct { Measurement string - Series *Series + Series *tsdb.Series } func genTestSeries(mCnt, tCnt, vCnt int) []*TestSeries { @@ -181,8 +182,8 @@ func genTestSeries(mCnt, tCnt, vCnt int) []*TestSeries { for _, ts := range tagSets { series = append(series, &TestSeries{ Measurement: m, - Series: &Series{ - Key: fmt.Sprintf("%s:%s", m, string(marshalTags(ts))), + Series: &tsdb.Series{ + Key: fmt.Sprintf("%s:%s", m, string(tsdb.MarshalTags(ts))), Tags: ts, }, }) diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points.go index 26cec3ed5..cc7d6d2e8 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points.go @@ -37,6 +37,13 @@ type Point interface { String() string } +// Points represents a sortable list of points by timestamp. +type Points []Point + +func (a Points) Len() int { return len(a) } +func (a Points) Less(i, j int) bool { return a[i].Time().Before(a[j].Time()) } +func (a Points) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + // point is the default implementation of Point. type point struct { time time.Time @@ -109,7 +116,7 @@ func ParsePointsWithPrecision(buf []byte, defaultTime time.Time, precision strin block []byte ) for { - pos, block = scanTo(buf, pos, '\n') + pos, block = scanLine(buf, pos) pos += 1 if len(block) == 0 { @@ -117,7 +124,14 @@ func ParsePointsWithPrecision(buf []byte, defaultTime time.Time, precision strin } // lines which start with '#' are comments - if start := skipWhitespace(block, 0); block[start] == '#' { + start := skipWhitespace(block, 0) + + // If line is all whitespace, just skip it + if start >= len(block) { + continue + } + + if block[start] == '#' { continue } @@ -222,6 +236,10 @@ func scanKey(buf []byte, i int) (int, []byte, error) { } if buf[i] == '=' { + if i-1 < 0 || i-2 < 0 { + return i, buf[start:i], fmt.Errorf("missing tag name") + } + // Check for "cpu,=value" but allow "cpu,a\,=value" if buf[i-1] == ',' && buf[i-2] != '\\' { return i, buf[start:i], fmt.Errorf("missing tag name") @@ -254,6 +272,13 @@ func scanKey(buf []byte, i int) (int, []byte, error) { return i, buf[start:i], fmt.Errorf("missing tag value") } i += 1 + + // grow our indices slice if we have too many tags + if commas >= len(indices) { + newIndics := make([]int, cap(indices)*2) + copy(newIndics, indices) + indices = newIndics + } indices[commas] = i commas += 1 @@ -273,6 +298,14 @@ func scanKey(buf []byte, i int) (int, []byte, error) { if equals > 0 && commas-1 != equals-1 { return i, buf[start:i], fmt.Errorf("missing tag value") } + + // grow our indices slice if we have too many tags + if commas >= len(indices) { + newIndics := make([]int, cap(indices)*2) + copy(newIndics, indices) + indices = newIndics + } + indices[commas] = i + 1 break } @@ -286,6 +319,12 @@ func scanKey(buf []byte, i int) (int, []byte, error) { return i, buf[start:i], fmt.Errorf("invalid tag format") } + // This check makes sure we actually received fields from the user. #3379 + // This will catch invalid syntax such as: `cpu,host=serverA,region=us-west` + if i >= len(buf) { + return i, buf[start:i], fmt.Errorf("missing fields") + } + // Now we know where the key region is within buf, and the locations of tags, we // need to deterimine if duplicate tags exist and if the tags are sorted. This iterates // 1/2 of the list comparing each end with each other, walking towards the center from @@ -408,21 +447,20 @@ func scanFields(buf []byte, i int) (int, []byte, error) { if isNumeric(buf[i+1]) || buf[i+1] == '-' || buf[i+1] == 'N' || buf[i+1] == 'n' { var err error - i, _, err = scanNumber(buf, i+1) + i, err = scanNumber(buf, i+1) if err != nil { return i, buf[start:i], err - } else { - continue } - // If next byte is not a double-quote, the value must be a boolean - } else if buf[i+1] != '"' { + continue + } + // If next byte is not a double-quote, the value must be a boolean + if buf[i+1] != '"' { var err error i, _, err = scanBoolean(buf, i+1) if err != nil { return i, buf[start:i], err - } else { - continue } + continue } } @@ -483,8 +521,9 @@ func isNumeric(b byte) bool { // scanNumber returns the end position within buf, start at i after // scanning over buf for an integer, or float. It returns an // error if a invalid number is scanned. -func scanNumber(buf []byte, i int) (int, []byte, error) { +func scanNumber(buf []byte, i int) (int, error) { start := i + var isInt bool // Is negative number? if i < len(buf) && buf[i] == '-' { @@ -506,13 +545,19 @@ func scanNumber(buf []byte, i int) (int, []byte, error) { break } + if buf[i] == 'i' && i > start && !isInt { + isInt = true + i += 1 + continue + } + if buf[i] == '.' { decimals += 1 } // Can't have more than 1 decimal (e.g. 1.1.1 should fail) if decimals > 1 { - return i, buf[start:i], fmt.Errorf("invalid number") + return i, fmt.Errorf("invalid number") } // `e` is valid for floats but not as the first char @@ -534,36 +579,44 @@ func scanNumber(buf []byte, i int) (int, []byte, error) { i += 3 continue } - return i, buf[start:i], fmt.Errorf("invalid number") + return i, fmt.Errorf("invalid number") } if !isNumeric(buf[i]) { - return i, buf[start:i], fmt.Errorf("invalid number") + return i, fmt.Errorf("invalid number") } i += 1 } + if isInt && (decimals > 0 || scientific) { + return i, fmt.Errorf("invalid number") + } // It's more common that numbers will be within min/max range for their type but we need to prevent // out or range numbers from being parsed successfully. This uses some simple heuristics to decide // if we should parse the number to the actual type. It does not do it all the time because it incurs // extra allocations and we end up converting the type again when writing points to disk. - if decimals == 0 { + if isInt { + // Make sure the last char is an 'i' for integers (e.g. 9i10 is not valid) + if buf[i-1] != 'i' { + return i, fmt.Errorf("invalid number") + } // Parse the int to check bounds the number of digits could be larger than the max range - if len(buf[start:i]) >= maxInt64Digits || len(buf[start:i]) >= minInt64Digits { - if _, err := strconv.ParseInt(string(buf[start:i]), 10, 64); err != nil { - return i, buf[start:i], fmt.Errorf("invalid integer") + // We subtract 1 from the index to remove the `i` from our tests + if len(buf[start:i-1]) >= maxInt64Digits || len(buf[start:i-1]) >= minInt64Digits { + if _, err := strconv.ParseInt(string(buf[start:i-1]), 10, 64); err != nil { + return i, fmt.Errorf("unable to parse integer %s: %s", buf[start:i-1], err) } } } else { // Parse the float to check bounds if it's scientific or the number of digits could be larger than the max range if scientific || len(buf[start:i]) >= maxFloat64Digits || len(buf[start:i]) >= minFloat64Digits { if _, err := strconv.ParseFloat(string(buf[start:i]), 10); err != nil { - return i, buf[start:i], fmt.Errorf("invalid float") + return i, fmt.Errorf("invalid float") } } } - return i, buf[start:i], nil + return i, nil } // scanBoolean returns the end position within buf, start at i after @@ -633,10 +686,6 @@ func skipWhitespace(buf []byte, i int) int { return i } - if buf[i] == '\\' { - i += 2 - continue - } if buf[i] == ' ' || buf[i] == '\t' { i += 1 continue @@ -646,6 +695,39 @@ func skipWhitespace(buf []byte, i int) int { return i } +// scanLine returns the end position in buf and the next line found within +// buf. +func scanLine(buf []byte, i int) (int, []byte) { + start := i + quoted := false + for { + // reached the end of buf? + if i >= len(buf) { + break + } + + // If we see a double quote, makes sure it is not escaped + if buf[i] == '"' && buf[i-1] != '\\' { + i += 1 + quoted = !quoted + continue + } + + if buf[i] == '\\' { + i += 2 + continue + } + + if buf[i] == '\n' && !quoted { + break + } + + i += 1 + } + + return i, buf[start:i] +} + // scanTo returns the end position in buf and the next consecutive block // of bytes, starting from i and ending with stop byte. If there are leading // spaces or escaped chars, they are skipped. @@ -791,7 +873,7 @@ func unescapeQuoteString(in string) string { // NewPoint returns a new point with the given measurement name, tags, fields and timestamp func NewPoint(name string, tags Tags, fields Fields, time time.Time) Point { return &point{ - key: makeKey([]byte(name), tags), + key: MakeKey([]byte(name), tags), time: time, fields: fields.MarshalBinary(), } @@ -821,7 +903,7 @@ func (p *point) Name() string { // SetName updates the measurement name for the point func (p *point) SetName(name string) { - p.key = makeKey([]byte(name), p.Tags()) + p.key = MakeKey([]byte(name), p.Tags()) } // Time return the timestamp for the point @@ -863,20 +945,20 @@ func (p *point) Tags() Tags { return tags } -func makeKey(name []byte, tags Tags) []byte { - return append(escape(name), tags.hashKey()...) +func MakeKey(name []byte, tags Tags) []byte { + return append(escape(name), tags.HashKey()...) } // SetTags replaces the tags for the point func (p *point) SetTags(tags Tags) { - p.key = makeKey(p.name(), tags) + p.key = MakeKey(p.name(), tags) } // AddTag adds or replaces a tag value for a point func (p *point) AddTag(key, value string) { tags := p.Tags() tags[key] = value - p.key = makeKey(p.name(), tags) + p.key = MakeKey(p.name(), tags) } // Fields returns the fields for the point @@ -950,7 +1032,7 @@ func (p *point) UnixNano() int64 { type Tags map[string]string -func (t Tags) hashKey() []byte { +func (t Tags) HashKey() []byte { // Empty maps marshal to empty bytes. if len(t) == 0 { return nil @@ -995,6 +1077,10 @@ func (t Tags) hashKey() []byte { type Fields map[string]interface{} func parseNumber(val []byte) (interface{}, error) { + if val[len(val)-1] == 'i' { + val = val[:len(val)-1] + return strconv.ParseInt(string(val), 10, 64) + } for i := 0; i < len(val); i++ { // If there is a decimal or an N (NaN), I (Inf), parse as float if val[i] == '.' || val[i] == 'N' || val[i] == 'n' || val[i] == 'I' || val[i] == 'i' || val[i] == 'e' { @@ -1004,7 +1090,7 @@ func parseNumber(val []byte) (interface{}, error) { return string(val), nil } } - return strconv.ParseInt(string(val), 10, 64) + return strconv.ParseFloat(string(val), 64) } func newFieldsFromBinary(buf []byte) Fields { @@ -1024,6 +1110,7 @@ func newFieldsFromBinary(buf []byte) Fields { if len(name) == 0 { continue } + name = unescape(name) i, valueBuf = scanFieldValue(buf, i+1) if len(valueBuf) == 0 { @@ -1051,7 +1138,7 @@ func newFieldsFromBinary(buf []byte) Fields { panic(fmt.Sprintf("unable to parse bool value '%v': %v\n", string(valueBuf), err)) } } - fields[string(unescape(name))] = value + fields[string(name)] = value i += 1 } return fields @@ -1074,12 +1161,16 @@ func (p Fields) MarshalBinary() []byte { switch t := v.(type) { case int: b = append(b, []byte(strconv.FormatInt(int64(t), 10))...) + b = append(b, 'i') case int32: b = append(b, []byte(strconv.FormatInt(int64(t), 10))...) + b = append(b, 'i') case uint64: b = append(b, []byte(strconv.FormatUint(t, 10))...) + b = append(b, 'i') case int64: b = append(b, []byte(strconv.FormatInt(t, 10))...) + b = append(b, 'i') case float64: // ensure there is a decimal in the encoded for diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points_test.go index a28e02e04..4efd97ce2 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/points_test.go @@ -1,4 +1,4 @@ -package tsdb +package tsdb_test import ( "bytes" @@ -9,16 +9,18 @@ import ( "strings" "testing" "time" + + "github.com/influxdb/influxdb/tsdb" ) var ( - tags = Tags{"foo": "bar", "apple": "orange", "host": "serverA", "region": "uswest"} + tags = tsdb.Tags{"foo": "bar", "apple": "orange", "host": "serverA", "region": "uswest"} maxFloat64 = strconv.FormatFloat(math.MaxFloat64, 'f', 1, 64) minFloat64 = strconv.FormatFloat(-math.MaxFloat64, 'f', 1, 64) ) func TestMarshal(t *testing.T) { - got := tags.hashKey() + got := tags.HashKey() if exp := ",apple=orange,foo=bar,host=serverA,region=uswest"; string(got) != exp { t.Log("got: ", string(got)) t.Log("exp: ", exp) @@ -28,71 +30,71 @@ func TestMarshal(t *testing.T) { func BenchmarkMarshal(b *testing.B) { for i := 0; i < b.N; i++ { - tags.hashKey() + tags.HashKey() } } func BenchmarkParsePointNoTags(b *testing.B) { - line := `cpu value=1 1000000000` + line := `cpu value=1i 1000000000` for i := 0; i < b.N; i++ { - ParsePoints([]byte(line)) + tsdb.ParsePoints([]byte(line)) b.SetBytes(int64(len(line))) } } func BenchmarkParsePointsTagsSorted2(b *testing.B) { - line := `cpu,host=serverA,region=us-west value=1 1000000000` + line := `cpu,host=serverA,region=us-west value=1i 1000000000` for i := 0; i < b.N; i++ { - ParsePoints([]byte(line)) + tsdb.ParsePoints([]byte(line)) b.SetBytes(int64(len(line))) } } func BenchmarkParsePointsTagsSorted5(b *testing.B) { - line := `cpu,env=prod,host=serverA,region=us-west,target=servers,zone=1c value=1 1000000000` + line := `cpu,env=prod,host=serverA,region=us-west,target=servers,zone=1c value=1i 1000000000` for i := 0; i < b.N; i++ { - ParsePoints([]byte(line)) + tsdb.ParsePoints([]byte(line)) b.SetBytes(int64(len(line))) } } func BenchmarkParsePointsTagsSorted10(b *testing.B) { - line := `cpu,env=prod,host=serverA,region=us-west,tag1=value1,tag2=value2,tag3=value3,tag4=value4,tag5=value5,target=servers,zone=1c value=1 1000000000` + line := `cpu,env=prod,host=serverA,region=us-west,tag1=value1,tag2=value2,tag3=value3,tag4=value4,tag5=value5,target=servers,zone=1c value=1i 1000000000` for i := 0; i < b.N; i++ { - ParsePoints([]byte(line)) + tsdb.ParsePoints([]byte(line)) b.SetBytes(int64(len(line))) } } func BenchmarkParsePointsTagsUnSorted2(b *testing.B) { - line := `cpu,region=us-west,host=serverA value=1 1000000000` + line := `cpu,region=us-west,host=serverA value=1i 1000000000` for i := 0; i < b.N; i++ { - pt, _ := ParsePoints([]byte(line)) + pt, _ := tsdb.ParsePoints([]byte(line)) b.SetBytes(int64(len(line))) pt[0].Key() } } func BenchmarkParsePointsTagsUnSorted5(b *testing.B) { - line := `cpu,region=us-west,host=serverA,env=prod,target=servers,zone=1c value=1 1000000000` + line := `cpu,region=us-west,host=serverA,env=prod,target=servers,zone=1c value=1i 1000000000` for i := 0; i < b.N; i++ { - pt, _ := ParsePoints([]byte(line)) + pt, _ := tsdb.ParsePoints([]byte(line)) b.SetBytes(int64(len(line))) pt[0].Key() } } func BenchmarkParsePointsTagsUnSorted10(b *testing.B) { - line := `cpu,region=us-west,host=serverA,env=prod,target=servers,zone=1c,tag1=value1,tag2=value2,tag3=value3,tag4=value4,tag5=value5 value=1 1000000000` + line := `cpu,region=us-west,host=serverA,env=prod,target=servers,zone=1c,tag1=value1,tag2=value2,tag3=value3,tag4=value4,tag5=value5 value=1i 1000000000` for i := 0; i < b.N; i++ { - pt, _ := ParsePoints([]byte(line)) + pt, _ := tsdb.ParsePoints([]byte(line)) b.SetBytes(int64(len(line))) pt[0].Key() } } -func test(t *testing.T, line string, point Point) { - pts, err := ParsePointsWithPrecision([]byte(line), time.Unix(0, 0), "n") +func test(t *testing.T, line string, point tsdb.Point) { + pts, err := tsdb.ParsePointsWithPrecision([]byte(line), time.Unix(0, 0), "n") if err != nil { t.Fatalf(`ParsePoints("%s") mismatch. got %v, exp nil`, line, err) } @@ -139,33 +141,60 @@ func test(t *testing.T, line string, point Point) { } func TestParsePointNoValue(t *testing.T) { - pts, err := ParsePointsString("") + pts, err := tsdb.ParsePointsString("") if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, "", err) } if exp := 0; len(pts) != exp { - t.Errorf(`ParsePoints("%s") len mismatch. got %v, exp %vr`, "", len(pts), exp) + t.Errorf(`ParsePoints("%s") len mismatch. got %v, exp %v`, "", len(pts), exp) + } +} + +func TestParsePointWhitespaceValue(t *testing.T) { + pts, err := tsdb.ParsePointsString(" ") + if err != nil { + t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, "", err) + } + + if exp := 0; len(pts) != exp { + t.Errorf(`ParsePoints("%s") len mismatch. got %v, exp %v`, "", len(pts), exp) + } +} + +func TestParsePointSingleEquals(t *testing.T) { + pts, err := tsdb.ParsePointsString("=") + if err == nil { + t.Errorf(`ParsePoints("%s") mismatch. expected error`, "=") + } + + if exp := 0; len(pts) != exp { + t.Errorf(`ParsePoints("%s") len mismatch. got %v, exp %v`, "", len(pts), exp) } } func TestParsePointNoFields(t *testing.T) { - _, err := ParsePointsString("cpu") + _, err := tsdb.ParsePointsString("cpu_load_short,host=server01,region=us-west") + if err == nil { + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, "cpu_load_short,host=server01,region=us-west") + } + + _, err = tsdb.ParsePointsString("cpu") if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, "cpu") } - _, err = ParsePointsString("cpu,") + _, err = tsdb.ParsePointsString("cpu,") if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, "cpu,") } - _, err = ParsePointsString("cpu, value=1") + _, err = tsdb.ParsePointsString("cpu, value=1") if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, "cpu, value=1") } - _, err = ParsePointsString("cpu,,, value=1") + _, err = tsdb.ParsePointsString("cpu,,, value=1") if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, "cpu,,, value=1") } @@ -173,165 +202,177 @@ func TestParsePointNoFields(t *testing.T) { } func TestParsePointNoTimestamp(t *testing.T) { - test(t, "cpu value=1", NewPoint("cpu", nil, nil, time.Unix(0, 0))) + test(t, "cpu value=1", tsdb.NewPoint("cpu", nil, nil, time.Unix(0, 0))) } func TestParsePointMissingQuote(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA value="test`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA value="test`) if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, "cpu") } } func TestParsePointMissingTagName(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,=us-east value=1`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,=us-east value=1i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,=us-east value=1`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,=us-east value=1i`) } - _, err = ParsePointsString(`cpu,host=serverAa\,,=us-east value=1`) + _, err = tsdb.ParsePointsString(`cpu,host=serverAa\,,=us-east value=1i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverAa\,,=us-east value=1`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverAa\,,=us-east value=1i`) } - _, err = ParsePointsString(`cpu,host=serverA\,,=us-east value=1`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA\,,=us-east value=1i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA\,,=us-east value=1`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA\,,=us-east value=1i`) } - _, err = ParsePointsString(`cpu,host=serverA,\ =us-east value=1`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,\ =us-east value=1i`) if err != nil { - t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,\ =us-east value=1`, err) + t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,\ =us-east value=1i`, err) } } func TestParsePointMissingTagValue(t *testing.T) { - _, err := ParsePointsString(`cpu,host value=1`) + _, err := tsdb.ParsePointsString(`cpu,host value=1i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host value=1`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host value=1i`) } - _, err = ParsePointsString(`cpu,host=serverA,region value=1`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region value=1i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region value=1`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region value=1i`) } - _, err = ParsePointsString(`cpu,host=serverA,region= value=1`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region= value=1i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region= value=1`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region= value=1i`) } } func TestParsePointMissingFieldName(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west =`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west =`) if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west =`) } - _, err = ParsePointsString(`cpu,host=serverA,region=us-west =123`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west =123i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west =123`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west =123i`) } - _, err = ParsePointsString(`cpu,host=serverA,region=us-west a\ =123`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west a\ =123i`) if err != nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west a\ =123`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west a\ =123i`) } - _, err = ParsePointsString(`cpu,host=serverA,region=us-west value=123,=456`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=123i,=456i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=123,=456`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=123i,=456i`) } } func TestParsePointMissingFieldValue(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=`) if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=`) } - _, err = ParsePointsString(`cpu,host=serverA,region=us-west value= 1000000000`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value= 1000000000i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value= 1000000000`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value= 1000000000i`) } - _, err = ParsePointsString(`cpu,host=serverA,region=us-west value=,value2=1`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=,value2=1i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=,value2=1`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=,value2=1i`) } - _, err = ParsePointsString(`cpu,host=server01,region=us-west 1434055562000000000`) + _, err = tsdb.ParsePointsString(`cpu,host=server01,region=us-west 1434055562000000000i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=server01,region=us-west 1434055562000000000`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=server01,region=us-west 1434055562000000000i`) } - _, err = ParsePointsString(`cpu,host=server01,region=us-west value=1,b`) + _, err = tsdb.ParsePointsString(`cpu,host=server01,region=us-west value=1i,b`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=server01,region=us-west value=1,b`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=server01,region=us-west value=1i,b`) } } func TestParsePointBadNumber(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=1a`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=1a`) if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=1a`) } + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=1ii`) + if err == nil { + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=1ii`) + } + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=1.0i`) + if err == nil { + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=1.0i`) + } } func TestParsePointMaxInt64(t *testing.T) { // out of range - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=9223372036854775808`) - if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=9223372036854775808`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=9223372036854775808i`) + exp := `unable to parse 'cpu,host=serverA,region=us-west value=9223372036854775808i': unable to parse integer 9223372036854775808: strconv.ParseInt: parsing "9223372036854775808": value out of range` + if err == nil || (err != nil && err.Error() != exp) { + t.Fatalf("Error mismatch:\nexp: %s\ngot: %v", exp, err) } // max int - _, err = ParsePointsString(`cpu,host=serverA,region=us-west value=9223372036854775807`) + p, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=9223372036854775807i`) if err != nil { - t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=9223372036854775807`, err) + t.Fatalf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=9223372036854775807i`, err) + } + if exp, got := int64(9223372036854775807), p[0].Fields()["value"].(int64); exp != got { + t.Fatalf("ParsePoints Value mistmatch. \nexp: %v\ngot: %v", exp, got) } // leading zeros - _, err = ParsePointsString(`cpu,host=serverA,region=us-west value=0009223372036854775807`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=0009223372036854775807i`) if err != nil { - t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=0009223372036854775807`, err) + t.Fatalf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=0009223372036854775807i`, err) } } func TestParsePointMinInt64(t *testing.T) { // out of range - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=-9223372036854775809`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=-9223372036854775809i`) if err == nil { - t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=-9223372036854775809`) + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=-9223372036854775809i`) } // min int - _, err = ParsePointsString(`cpu,host=serverA,region=us-west value=-9223372036854775808`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=-9223372036854775808i`) if err != nil { - t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-9223372036854775808`, err) + t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-9223372036854775808i`, err) } // leading zeros - _, err = ParsePointsString(`cpu,host=serverA,region=us-west value=-0009223372036854775808`) + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=-0009223372036854775808i`) if err != nil { - t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-0009223372036854775808`, err) + t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-0009223372036854775808i`, err) } } func TestParsePointMaxFloat64(t *testing.T) { // out of range - _, err := ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "1"+string(maxFloat64))) + _, err := tsdb.ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "1"+string(maxFloat64))) if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=...`) } // max float - _, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, string(maxFloat64))) + _, err = tsdb.ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, string(maxFloat64))) if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=9223372036854775807`, err) } // leading zeros - _, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "0000"+string(maxFloat64))) + _, err = tsdb.ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "0000"+string(maxFloat64))) if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=0009223372036854775807`, err) } @@ -339,40 +380,40 @@ func TestParsePointMaxFloat64(t *testing.T) { func TestParsePointMinFloat64(t *testing.T) { // out of range - _, err := ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "-1"+string(minFloat64)[1:])) + _, err := tsdb.ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "-1"+string(minFloat64)[1:])) if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=...`) } // min float - _, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, string(minFloat64))) + _, err = tsdb.ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, string(minFloat64))) if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=...`, err) } // leading zeros - _, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "-0000000"+string(minFloat64)[1:])) + _, err = tsdb.ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "-0000000"+string(minFloat64)[1:])) if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=...`, err) } } func TestParsePointNumberNonNumeric(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=.1a`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=.1a`) if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=.1a`) } } func TestParsePointNegativeWrongPlace(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=0.-1`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=0.-1`) if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=0.-1`) } } func TestParsePointFloatMultipleDecimals(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=1.1.1`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=1.1.1`) if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=1.1.1`) } @@ -380,40 +421,40 @@ func TestParsePointFloatMultipleDecimals(t *testing.T) { } func TestParsePointInteger(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=1`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=1i`) if err != nil { - t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=1`, err) + t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=1i`, err) } } func TestParsePointNegativeInteger(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=-1`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=-1i`) if err != nil { - t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-1`, err) + t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-1i`, err) } } func TestParsePointNegativeFloat(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=-1.0`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=-1.0`) if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-1.0`, err) } } func TestParsePointFloatNoLeadingDigit(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=.1`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=.1`) if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-1.0`, err) } } func TestParsePointFloatScientific(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=1.0e4`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=1.0e4`) if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=1.0e4`, err) } - pts, err := ParsePointsString(`cpu,host=serverA,region=us-west value=1e4`) + pts, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=1e4`) if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=1.0e4`, err) } @@ -425,160 +466,184 @@ func TestParsePointFloatScientific(t *testing.T) { } func TestParsePointFloatScientificDecimal(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=1.0e-4`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=1.0e-4`) if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=1.0e-4`, err) } } func TestParsePointFloatNegativeScientific(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=-1.0e-4`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=-1.0e-4`) if err != nil { t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-1.0e-4`, err) } } func TestParsePointBooleanInvalid(t *testing.T) { - _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=a`) + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=a`) if err == nil { t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=a`) } } +func TestParsePointScientificIntInvalid(t *testing.T) { + _, err := tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=9ie10`) + if err == nil { + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=9ie10`) + } + + _, err = tsdb.ParsePointsString(`cpu,host=serverA,region=us-west value=9e10i`) + if err == nil { + t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=9e10i`) + } + +} + func TestParsePointUnescape(t *testing.T) { - test(t, `foo\,bar value=1`, - NewPoint( + test(t, `foo\,bar value=1i`, + tsdb.NewPoint( "foo,bar", // comma in the name - Tags{}, - Fields{ + tsdb.Tags{}, + tsdb.Fields{ "value": 1, }, time.Unix(0, 0))) // commas in measuremnt name test(t, `cpu\,main,regions=east\,west value=1.0`, - NewPoint( + tsdb.NewPoint( "cpu,main", // comma in the name - Tags{ + tsdb.Tags{ "regions": "east,west", }, - Fields{ + tsdb.Fields{ "value": 1.0, }, time.Unix(0, 0))) // spaces in measurement name test(t, `cpu\ load,region=east value=1.0`, - NewPoint( + tsdb.NewPoint( "cpu load", // space in the name - Tags{ + tsdb.Tags{ "region": "east", }, - Fields{ + tsdb.Fields{ "value": 1.0, }, time.Unix(0, 0))) // commas in tag names test(t, `cpu,region\,zone=east value=1.0`, - NewPoint("cpu", - Tags{ + tsdb.NewPoint("cpu", + tsdb.Tags{ "region,zone": "east", // comma in the tag name }, - Fields{ + tsdb.Fields{ "value": 1.0, }, time.Unix(0, 0))) // spaces in tag names test(t, `cpu,region\ zone=east value=1.0`, - NewPoint("cpu", - Tags{ + tsdb.NewPoint("cpu", + tsdb.Tags{ "region zone": "east", // comma in the tag name }, - Fields{ + tsdb.Fields{ "value": 1.0, }, time.Unix(0, 0))) // commas in tag values test(t, `cpu,regions=east\,west value=1.0`, - NewPoint("cpu", - Tags{ + tsdb.NewPoint("cpu", + tsdb.Tags{ "regions": "east,west", // comma in the tag value }, - Fields{ + tsdb.Fields{ "value": 1.0, }, time.Unix(0, 0))) // spaces in tag values test(t, `cpu,regions=east\ west value=1.0`, - NewPoint("cpu", - Tags{ + tsdb.NewPoint("cpu", + tsdb.Tags{ "regions": "east west", // comma in the tag value }, - Fields{ + tsdb.Fields{ "value": 1.0, }, time.Unix(0, 0))) // commas in field names test(t, `cpu,regions=east value\,ms=1.0`, - NewPoint("cpu", - Tags{ + tsdb.NewPoint("cpu", + tsdb.Tags{ "regions": "east", }, - Fields{ + tsdb.Fields{ "value,ms": 1.0, // comma in the field name }, time.Unix(0, 0))) // spaces in field names test(t, `cpu,regions=east value\ ms=1.0`, - NewPoint("cpu", - Tags{ + tsdb.NewPoint("cpu", + tsdb.Tags{ "regions": "east", }, - Fields{ + tsdb.Fields{ "value ms": 1.0, // comma in the field name }, time.Unix(0, 0))) // commas in field values test(t, `cpu,regions=east value="1,0"`, - NewPoint("cpu", - Tags{ + tsdb.NewPoint("cpu", + tsdb.Tags{ "regions": "east", }, - Fields{ + tsdb.Fields{ "value": "1,0", // comma in the field value }, time.Unix(0, 0))) // random character escaped test(t, `cpu,regions=eas\t value=1.0`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{ + tsdb.Tags{ "regions": "eas\\t", }, - Fields{ + tsdb.Fields{ "value": 1.0, }, time.Unix(0, 0))) + + // field name using escape char. + test(t, `cpu \a=1i`, + tsdb.NewPoint( + "cpu", + tsdb.Tags{}, + tsdb.Fields{ + "\\a": 1, // Left as parsed since it's not a known escape sequence. + }, + time.Unix(0, 0))) + } func TestParsePointWithTags(t *testing.T) { test(t, "cpu,host=serverA,region=us-east value=1.0 1000000000", - NewPoint("cpu", - Tags{"host": "serverA", "region": "us-east"}, - Fields{"value": 1.0}, time.Unix(1, 0))) + tsdb.NewPoint("cpu", + tsdb.Tags{"host": "serverA", "region": "us-east"}, + tsdb.Fields{"value": 1.0}, time.Unix(1, 0))) } func TestParsPointWithDuplicateTags(t *testing.T) { - _, err := ParsePoints([]byte(`cpu,host=serverA,host=serverB value=1 1000000000`)) + _, err := tsdb.ParsePoints([]byte(`cpu,host=serverA,host=serverB value=1i 1000000000`)) if err == nil { t.Fatalf(`ParsePoint() expected error. got nil`) } @@ -586,12 +651,12 @@ func TestParsPointWithDuplicateTags(t *testing.T) { func TestParsePointWithStringField(t *testing.T) { test(t, `cpu,host=serverA,region=us-east value=1.0,str="foo",str2="bar" 1000000000`, - NewPoint("cpu", - Tags{ + tsdb.NewPoint("cpu", + tsdb.Tags{ "host": "serverA", "region": "us-east", }, - Fields{ + tsdb.Fields{ "value": 1.0, "str": "foo", "str2": "bar", @@ -600,12 +665,12 @@ func TestParsePointWithStringField(t *testing.T) { ) test(t, `cpu,host=serverA,region=us-east str="foo \" bar" 1000000000`, - NewPoint("cpu", - Tags{ + tsdb.NewPoint("cpu", + tsdb.Tags{ "host": "serverA", "region": "us-east", }, - Fields{ + tsdb.Fields{ "str": `foo " bar`, }, time.Unix(1, 0)), @@ -615,13 +680,13 @@ func TestParsePointWithStringField(t *testing.T) { func TestParsePointWithStringWithSpaces(t *testing.T) { test(t, `cpu,host=serverA,region=us-east value=1.0,str="foo bar" 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{ + tsdb.Tags{ "host": "serverA", "region": "us-east", }, - Fields{ + tsdb.Fields{ "value": 1.0, "str": "foo bar", // spaces in string value }, @@ -629,16 +694,32 @@ func TestParsePointWithStringWithSpaces(t *testing.T) { ) } +func TestParsePointWithStringWithNewline(t *testing.T) { + test(t, "cpu,host=serverA,region=us-east value=1.0,str=\"foo\nbar\" 1000000000", + tsdb.NewPoint( + "cpu", + tsdb.Tags{ + "host": "serverA", + "region": "us-east", + }, + tsdb.Fields{ + "value": 1.0, + "str": "foo\nbar", // newline in string value + }, + time.Unix(1, 0)), + ) +} + func TestParsePointWithStringWithCommas(t *testing.T) { // escaped comma test(t, `cpu,host=serverA,region=us-east value=1.0,str="foo\,bar" 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{ + tsdb.Tags{ "host": "serverA", "region": "us-east", }, - Fields{ + tsdb.Fields{ "value": 1.0, "str": `foo\,bar`, // commas in string value }, @@ -647,13 +728,13 @@ func TestParsePointWithStringWithCommas(t *testing.T) { // non-escaped comma test(t, `cpu,host=serverA,region=us-east value=1.0,str="foo,bar" 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{ + tsdb.Tags{ "host": "serverA", "region": "us-east", }, - Fields{ + tsdb.Fields{ "value": 1.0, "str": "foo,bar", // commas in string value }, @@ -665,13 +746,13 @@ func TestParsePointWithStringWithCommas(t *testing.T) { func TestParsePointEscapedStringsAndCommas(t *testing.T) { // non-escaped comma and quotes test(t, `cpu,host=serverA,region=us-east value="{Hello\"{,}\" World}" 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{ + tsdb.Tags{ "host": "serverA", "region": "us-east", }, - Fields{ + tsdb.Fields{ "value": `{Hello"{,}" World}`, }, time.Unix(1, 0)), @@ -679,13 +760,13 @@ func TestParsePointEscapedStringsAndCommas(t *testing.T) { // escaped comma and quotes test(t, `cpu,host=serverA,region=us-east value="{Hello\"{\,}\" World}" 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{ + tsdb.Tags{ "host": "serverA", "region": "us-east", }, - Fields{ + tsdb.Fields{ "value": `{Hello"{\,}" World}`, }, time.Unix(1, 0)), @@ -695,13 +776,13 @@ func TestParsePointEscapedStringsAndCommas(t *testing.T) { func TestParsePointWithStringWithEquals(t *testing.T) { test(t, `cpu,host=serverA,region=us-east str="foo=bar",value=1.0 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{ + tsdb.Tags{ "host": "serverA", "region": "us-east", }, - Fields{ + tsdb.Fields{ "value": 1.0, "str": "foo=bar", // spaces in string value }, @@ -711,13 +792,13 @@ func TestParsePointWithStringWithEquals(t *testing.T) { func TestParsePointWithBoolField(t *testing.T) { test(t, `cpu,host=serverA,region=us-east true=true,t=t,T=T,TRUE=TRUE,True=True,false=false,f=f,F=F,FALSE=FALSE,False=False 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{ + tsdb.Tags{ "host": "serverA", "region": "us-east", }, - Fields{ + tsdb.Fields{ "t": true, "T": true, "true": true, @@ -735,25 +816,36 @@ func TestParsePointWithBoolField(t *testing.T) { func TestParsePointUnicodeString(t *testing.T) { test(t, `cpu,host=serverA,region=us-east value="wè" 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{ + tsdb.Tags{ "host": "serverA", "region": "us-east", }, - Fields{ + tsdb.Fields{ "value": "wè", }, time.Unix(1, 0)), ) } +func TestNewPointFloatWithoutDecimal(t *testing.T) { + test(t, `cpu value=1 1000000000`, + tsdb.NewPoint( + "cpu", + tsdb.Tags{}, + tsdb.Fields{ + "value": 1.0, + }, + time.Unix(1, 0)), + ) +} func TestNewPointNegativeFloat(t *testing.T) { test(t, `cpu value=-0.64 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{}, - Fields{ + tsdb.Tags{}, + tsdb.Fields{ "value": -0.64, }, time.Unix(1, 0)), @@ -762,10 +854,10 @@ func TestNewPointNegativeFloat(t *testing.T) { func TestNewPointFloatNoDecimal(t *testing.T) { test(t, `cpu value=1. 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{}, - Fields{ + tsdb.Tags{}, + tsdb.Fields{ "value": 1.0, }, time.Unix(1, 0)), @@ -774,10 +866,10 @@ func TestNewPointFloatNoDecimal(t *testing.T) { func TestNewPointFloatScientific(t *testing.T) { test(t, `cpu value=6.632243e+06 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{}, - Fields{ + tsdb.Tags{}, + tsdb.Fields{ "value": float64(6632243), }, time.Unix(1, 0)), @@ -785,11 +877,11 @@ func TestNewPointFloatScientific(t *testing.T) { } func TestNewPointLargeInteger(t *testing.T) { - test(t, `cpu value=6632243 1000000000`, - NewPoint( + test(t, `cpu value=6632243i 1000000000`, + tsdb.NewPoint( "cpu", - Tags{}, - Fields{ + tsdb.Tags{}, + tsdb.Fields{ "value": 6632243, // if incorrectly encoded as a float, it would show up as 6.632243e+06 }, time.Unix(1, 0)), @@ -798,29 +890,44 @@ func TestNewPointLargeInteger(t *testing.T) { func TestNewPointNaN(t *testing.T) { test(t, `cpu value=NaN 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{}, - Fields{ + tsdb.Tags{}, + tsdb.Fields{ "value": math.NaN(), }, time.Unix(1, 0)), ) test(t, `cpu value=nAn 1000000000`, - NewPoint( + tsdb.NewPoint( "cpu", - Tags{}, - Fields{ + tsdb.Tags{}, + tsdb.Fields{ "value": math.NaN(), }, time.Unix(1, 0)), ) +} +func TestNewPointLargeNumberOfTags(t *testing.T) { + tags := "" + for i := 0; i < 255; i++ { + tags += fmt.Sprintf(",tag%d=value%d", i, i) + } + + pt, err := tsdb.ParsePointsString(fmt.Sprintf("cpu%s value=1", tags)) + if err != nil { + t.Fatalf("ParsePoints() with max tags failed: %v", err) + } + + if len(pt[0].Tags()) != 255 { + t.Fatalf("ParsePoints() with max tags failed: %v", err) + } } func TestParsePointIntsFloats(t *testing.T) { - pts, err := ParsePoints([]byte(`cpu,host=serverA,region=us-east int=10,float=11.0,float2=12.1 1000000000`)) + pts, err := tsdb.ParsePoints([]byte(`cpu,host=serverA,region=us-east int=10i,float=11.0,float2=12.1 1000000000`)) if err != nil { t.Fatalf(`ParsePoints() failed. got %s`, err) } @@ -845,7 +952,7 @@ func TestParsePointIntsFloats(t *testing.T) { } func TestParsePointKeyUnsorted(t *testing.T) { - pts, err := ParsePoints([]byte("cpu,last=1,first=2 value=1")) + pts, err := tsdb.ParsePoints([]byte("cpu,last=1,first=2 value=1i")) if err != nil { t.Fatalf(`ParsePoints() failed. got %s`, err) } @@ -861,8 +968,8 @@ func TestParsePointKeyUnsorted(t *testing.T) { } func TestParsePointToString(t *testing.T) { - line := `cpu,host=serverA,region=us-east bool=false,float=11.0,float2=12.123,int=10,str="string val" 1000000000` - pts, err := ParsePoints([]byte(line)) + line := `cpu,host=serverA,region=us-east bool=false,float=11.0,float2=12.123,int=10i,str="string val" 1000000000` + pts, err := tsdb.ParsePoints([]byte(line)) if err != nil { t.Fatalf(`ParsePoints() failed. got %s`, err) } @@ -876,8 +983,8 @@ func TestParsePointToString(t *testing.T) { t.Errorf("ParsePoint() to string mismatch:\n got %v\n exp %v", got, line) } - pt = NewPoint("cpu", Tags{"host": "serverA", "region": "us-east"}, - Fields{"int": 10, "float": float64(11.0), "float2": float64(12.123), "bool": false, "str": "string val"}, + pt = tsdb.NewPoint("cpu", tsdb.Tags{"host": "serverA", "region": "us-east"}, + tsdb.Fields{"int": 10, "float": float64(11.0), "float2": float64(12.123), "bool": false, "str": "string val"}, time.Unix(1, 0)) got = pt.String() @@ -937,7 +1044,7 @@ func TestParsePointsWithPrecision(t *testing.T) { }, } for _, test := range tests { - pts, err := ParsePointsWithPrecision([]byte(test.line), time.Now().UTC(), test.precision) + pts, err := tsdb.ParsePointsWithPrecision([]byte(test.line), time.Now().UTC(), test.precision) if err != nil { t.Fatalf(`%s: ParsePoints() failed. got %s`, test.name, err) } @@ -999,7 +1106,7 @@ func TestParsePointsWithPrecisionNoTime(t *testing.T) { } for _, test := range tests { - pts, err := ParsePointsWithPrecision([]byte(line), tm, test.precision) + pts, err := tsdb.ParsePointsWithPrecision([]byte(line), tm, test.precision) if err != nil { t.Fatalf(`%s: ParsePoints() failed. got %s`, test.name, err) } @@ -1051,7 +1158,7 @@ cpu,host=serverA,region=us-east value=1.0 946730096789012345`, }, } for _, test := range tests { - pts, err := ParsePointsWithPrecision([]byte(test.batch), time.Now().UTC(), "") + pts, err := tsdb.ParsePointsWithPrecision([]byte(test.batch), time.Now().UTC(), "") if err != nil { t.Fatalf(`%s: ParsePoints() failed. got %s`, test.name, err) } @@ -1073,19 +1180,19 @@ cpu,host=serverA,region=us-east value=1.0 946730096789012345`, func TestNewPointEscaped(t *testing.T) { // commas - pt := NewPoint("cpu,main", Tags{"tag,bar": "value"}, Fields{"name,bar": 1.0}, time.Unix(0, 0)) + pt := tsdb.NewPoint("cpu,main", tsdb.Tags{"tag,bar": "value"}, tsdb.Fields{"name,bar": 1.0}, time.Unix(0, 0)) if exp := `cpu\,main,tag\,bar=value name\,bar=1.0 0`; pt.String() != exp { t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp) } // spaces - pt = NewPoint("cpu main", Tags{"tag bar": "value"}, Fields{"name bar": 1.0}, time.Unix(0, 0)) + pt = tsdb.NewPoint("cpu main", tsdb.Tags{"tag bar": "value"}, tsdb.Fields{"name bar": 1.0}, time.Unix(0, 0)) if exp := `cpu\ main,tag\ bar=value name\ bar=1.0 0`; pt.String() != exp { t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp) } // equals - pt = NewPoint("cpu=main", Tags{"tag=bar": "value=foo"}, Fields{"name=bar": 1.0}, time.Unix(0, 0)) + pt = tsdb.NewPoint("cpu=main", tsdb.Tags{"tag=bar": "value=foo"}, tsdb.Fields{"name=bar": 1.0}, time.Unix(0, 0)) if exp := `cpu\=main,tag\=bar=value\=foo name\=bar=1.0 0`; pt.String() != exp { t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp) } @@ -1093,14 +1200,14 @@ func TestNewPointEscaped(t *testing.T) { func TestNewPointUnhandledType(t *testing.T) { // nil value - pt := NewPoint("cpu", nil, Fields{"value": nil}, time.Unix(0, 0)) + pt := tsdb.NewPoint("cpu", nil, tsdb.Fields{"value": nil}, time.Unix(0, 0)) if exp := `cpu value= 0`; pt.String() != exp { t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp) } // unsupported type gets stored as string now := time.Unix(0, 0).UTC() - pt = NewPoint("cpu", nil, Fields{"value": now}, time.Unix(0, 0)) + pt = tsdb.NewPoint("cpu", nil, tsdb.Fields{"value": now}, time.Unix(0, 0)) if exp := `cpu value="1970-01-01 00:00:00 +0000 UTC" 0`; pt.String() != exp { t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp) } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor.go index 4aca953b1..14acc11b2 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor.go @@ -44,13 +44,13 @@ type QueryExecutor struct { Logger *log.Logger // the local data store - store *Store + Store *Store } // NewQueryExecutor returns an initialized QueryExecutor func NewQueryExecutor(store *Store) *QueryExecutor { return &QueryExecutor{ - store: store, + Store: store, Logger: log.New(os.Stderr, "[query] ", log.LstdFlags), } } @@ -199,7 +199,7 @@ func (q *QueryExecutor) ExecuteQuery(query *influxql.Query, database string, chu } // Plan creates an execution plan for the given SelectStatement and returns an Executor. -func (q *QueryExecutor) plan(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) { +func (q *QueryExecutor) Plan(stmt *influxql.SelectStatement, chunkSize int) (*Executor, error) { shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers. // Replace instances of "now()" with the current time, and check the resultant times. @@ -245,31 +245,14 @@ func (q *QueryExecutor) plan(stmt *influxql.SelectStatement, chunkSize int) (Exe mappers = append(mappers, m) } - var executor Executor - if len(mappers) > 0 { - // All Mapper are of same type, so check first to determine correct Executor type. - if _, ok := mappers[0].(*RawMapper); ok { - executor = NewRawExecutor(stmt, mappers, chunkSize) - } else { - executor = NewAggregateExecutor(stmt, mappers) - } - } else { - // With no mappers, the Executor type doesn't matter. - executor = NewRawExecutor(stmt, nil, chunkSize) - } + executor := NewExecutor(stmt, mappers, chunkSize) return executor, nil } // executeSelectStatement plans and executes a select statement against a database. func (q *QueryExecutor) executeSelectStatement(statementID int, stmt *influxql.SelectStatement, results chan *influxql.Result, chunkSize int) error { - // Perform any necessary query re-writing. - stmt, err := q.rewriteSelectStatement(stmt) - if err != nil { - return err - } - // Plan statement execution. - e, err := q.plan(stmt, chunkSize) + e, err := q.Plan(stmt, chunkSize) if err != nil { return err } @@ -282,10 +265,9 @@ func (q *QueryExecutor) executeSelectStatement(statementID int, stmt *influxql.S for row := range ch { if row.Err != nil { return row.Err - } else { - resultSent = true - results <- &influxql.Result{StatementID: statementID, Series: []*influxql.Row{row}} } + resultSent = true + results <- &influxql.Result{StatementID: statementID, Series: []*influxql.Row{row}} } if !resultSent { @@ -295,85 +277,6 @@ func (q *QueryExecutor) executeSelectStatement(statementID int, stmt *influxql.S return nil } -// rewriteSelectStatement performs any necessary query re-writing. -func (q *QueryExecutor) rewriteSelectStatement(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) { - var err error - - // Expand regex expressions in the FROM clause. - sources, err := q.expandSources(stmt.Sources) - if err != nil { - return nil, err - } - stmt.Sources = sources - - // Expand wildcards in the fields or GROUP BY. - if stmt.HasWildcard() { - stmt, err = q.expandWildcards(stmt) - if err != nil { - return nil, err - } - } - - stmt.RewriteDistinct() - - return stmt, nil -} - -// expandWildcards returns a new SelectStatement with wildcards in the fields -// and/or GROUP BY expanded with actual field names. -func (q *QueryExecutor) expandWildcards(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) { - // If there are no wildcards in the statement, return it as-is. - if !stmt.HasWildcard() { - return stmt, nil - } - - // Use sets to avoid duplicate field names. - fieldSet := map[string]struct{}{} - dimensionSet := map[string]struct{}{} - - var fields influxql.Fields - var dimensions influxql.Dimensions - - // Iterate measurements in the FROM clause getting the fields & dimensions for each. - for _, src := range stmt.Sources { - if m, ok := src.(*influxql.Measurement); ok { - // Lookup the database. The database may not exist if no data for this database - // was ever written to the shard. - db := q.store.DatabaseIndex(m.Database) - if db == nil { - return stmt, nil - } - - // Lookup the measurement in the database. - mm := db.measurements[m.Name] - if mm == nil { - return nil, ErrMeasurementNotFound(m.String()) - } - - // Get the fields for this measurement. - for _, name := range mm.FieldNames() { - if _, ok := fieldSet[name]; ok { - continue - } - fieldSet[name] = struct{}{} - fields = append(fields, &influxql.Field{Expr: &influxql.VarRef{Val: name}}) - } - - // Get the dimensions for this measurement. - for _, t := range mm.TagKeys() { - if _, ok := dimensionSet[t]; ok { - continue - } - dimensionSet[t] = struct{}{} - dimensions = append(dimensions, &influxql.Dimension{Expr: &influxql.VarRef{Val: t}}) - } - } - } - - // Return a new SelectStatement with the wild cards rewritten. - return stmt.RewriteWildcards(fields, dimensions), nil -} - // expandSources expands regex sources and removes duplicates. // NOTE: sources must be normalized (db and rp set) before calling this function. func (q *QueryExecutor) expandSources(sources influxql.Sources) (influxql.Sources, error) { @@ -394,7 +297,7 @@ func (q *QueryExecutor) expandSources(sources influxql.Sources) (influxql.Source } // Lookup the database. - db := q.store.DatabaseIndex(src.Database) + db := q.Store.DatabaseIndex(src.Database) if db == nil { return nil, nil } @@ -453,7 +356,7 @@ func (q *QueryExecutor) executeDropDatabaseStatement(stmt *influxql.DropDatabase } } - err = q.store.DeleteDatabase(stmt.Name, shardIDs) + err = q.Store.DeleteDatabase(stmt.Name, shardIDs) if err != nil { return &influxql.Result{Err: err} } @@ -464,7 +367,7 @@ func (q *QueryExecutor) executeDropDatabaseStatement(stmt *influxql.DropDatabase // executeDropMeasurementStatement removes the measurement and all series data from the local store for the given measurement func (q *QueryExecutor) executeDropMeasurementStatement(stmt *influxql.DropMeasurementStatement, database string) *influxql.Result { // Find the database. - db := q.store.DatabaseIndex(database) + db := q.Store.DatabaseIndex(database) if db == nil { return &influxql.Result{} } @@ -478,7 +381,7 @@ func (q *QueryExecutor) executeDropMeasurementStatement(stmt *influxql.DropMeasu db.DropMeasurement(m.Name) // now drop the raw data - if err := q.store.deleteMeasurement(m.Name, m.SeriesKeys()); err != nil { + if err := q.Store.deleteMeasurement(m.Name, m.SeriesKeys()); err != nil { return &influxql.Result{Err: err} } @@ -488,7 +391,7 @@ func (q *QueryExecutor) executeDropMeasurementStatement(stmt *influxql.DropMeasu // executeDropSeriesStatement removes all series from the local store that match the drop query func (q *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStatement, database string) *influxql.Result { // Find the database. - db := q.store.DatabaseIndex(database) + db := q.Store.DatabaseIndex(database) if db == nil { return &influxql.Result{} } @@ -506,7 +409,7 @@ func (q *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStat var seriesKeys []string for _, m := range measurements { - var ids seriesIDs + var ids SeriesIDs if stmt.Condition != nil { // Get series IDs that match the WHERE clause. ids, _, err = m.walkWhereForSeriesIds(stmt.Condition) @@ -524,7 +427,7 @@ func (q *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStat } // delete the raw series data - if err := q.store.deleteSeries(seriesKeys); err != nil { + if err := q.Store.deleteSeries(seriesKeys); err != nil { return &influxql.Result{Err: err} } // remove them from the index @@ -535,7 +438,7 @@ func (q *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStat func (q *QueryExecutor) executeShowSeriesStatement(stmt *influxql.ShowSeriesStatement, database string) *influxql.Result { // Find the database. - db := q.store.DatabaseIndex(database) + db := q.Store.DatabaseIndex(database) if db == nil { return &influxql.Result{} } @@ -559,7 +462,7 @@ func (q *QueryExecutor) executeShowSeriesStatement(stmt *influxql.ShowSeriesStat // Loop through measurements to build result. One result row / measurement. for _, m := range measurements { - var ids seriesIDs + var ids SeriesIDs if stmt.Condition != nil { // Get series IDs that match the WHERE clause. @@ -646,7 +549,7 @@ func (q *QueryExecutor) filterShowSeriesResult(limit, offset int, rows influxql. func (q *QueryExecutor) executeShowMeasurementsStatement(stmt *influxql.ShowMeasurementsStatement, database string) *influxql.Result { // Find the database. - db := q.store.DatabaseIndex(database) + db := q.Store.DatabaseIndex(database) if db == nil { return &influxql.Result{} } @@ -705,7 +608,7 @@ func (q *QueryExecutor) executeShowMeasurementsStatement(stmt *influxql.ShowMeas func (q *QueryExecutor) executeShowTagKeysStatement(stmt *influxql.ShowTagKeysStatement, database string) *influxql.Result { // Find the database. - db := q.store.DatabaseIndex(database) + db := q.Store.DatabaseIndex(database) if db == nil { return &influxql.Result{} } @@ -758,7 +661,7 @@ func (q *QueryExecutor) executeShowTagKeysStatement(stmt *influxql.ShowTagKeysSt func (q *QueryExecutor) executeShowTagValuesStatement(stmt *influxql.ShowTagValuesStatement, database string) *influxql.Result { // Find the database. - db := q.store.DatabaseIndex(database) + db := q.Store.DatabaseIndex(database) if db == nil { return &influxql.Result{} } @@ -782,7 +685,7 @@ func (q *QueryExecutor) executeShowTagValuesStatement(stmt *influxql.ShowTagValu tagValues := make(map[string]stringSet) for _, m := range measurements { - var ids seriesIDs + var ids SeriesIDs if stmt.Condition != nil { // Get series IDs that match the WHERE clause. @@ -836,7 +739,7 @@ func (q *QueryExecutor) executeShowFieldKeysStatement(stmt *influxql.ShowFieldKe var err error // Find the database. - db := q.store.DatabaseIndex(database) + db := q.Store.DatabaseIndex(database) if db == nil { return &influxql.Result{} } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor_test.go index 936956e3a..3090c86e6 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/query_executor_test.go @@ -1,6 +1,7 @@ -package tsdb +package tsdb_test import ( + "encoding/json" "io/ioutil" "os" "path/filepath" @@ -10,6 +11,7 @@ import ( "github.com/influxdb/influxdb/influxql" "github.com/influxdb/influxdb/meta" + "github.com/influxdb/influxdb/tsdb" ) var sgID = uint64(2) @@ -17,10 +19,10 @@ var shardID = uint64(1) func TestWritePointsAndExecuteQuery(t *testing.T) { store, executor := testStoreAndExecutor() - defer os.RemoveAll(store.path) + defer os.RemoveAll(store.Path()) // Write first point. - if err := store.WriteToShard(shardID, []Point{NewPoint( + if err := store.WriteToShard(shardID, []tsdb.Point{tsdb.NewPoint( "cpu", map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0}, @@ -30,7 +32,7 @@ func TestWritePointsAndExecuteQuery(t *testing.T) { } // Write second point. - if err := store.WriteToShard(shardID, []Point{NewPoint( + if err := store.WriteToShard(shardID, []tsdb.Point{tsdb.NewPoint( "cpu", map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0}, @@ -39,100 +41,90 @@ func TestWritePointsAndExecuteQuery(t *testing.T) { t.Fatalf(err.Error()) } - got := executeAndGetJSON("select * from cpu", executor) - exepected := `[{"series":[{"name":"cpu","tags":{"host":"server"},"columns":["time","value"],"values":[["1970-01-01T00:00:01.000000002Z",1],["1970-01-01T00:00:02.000000003Z",1]]}]}]` + got := executeAndGetJSON("SELECT * FROM cpu", executor) + exepected := `[{"series":[{"name":"cpu","columns":["time","host","value"],"values":[["1970-01-01T00:00:01.000000002Z","server",1],["1970-01-01T00:00:02.000000003Z","server",1]]}]}]` if exepected != got { - t.Fatalf("exp: %s\ngot: %s", exepected, got) + t.Fatalf("\nexp: %s\ngot: %s", exepected, got) + } + + got = executeAndGetJSON("SELECT * FROM cpu GROUP BY *", executor) + exepected = `[{"series":[{"name":"cpu","tags":{"host":"server"},"columns":["time","value"],"values":[["1970-01-01T00:00:01.000000002Z",1],["1970-01-01T00:00:02.000000003Z",1]]}]}]` + if exepected != got { + t.Fatalf("\nexp: %s\ngot: %s", exepected, got) } store.Close() - store = NewStore(store.path) + store = tsdb.NewStore(store.Path()) if err := store.Open(); err != nil { t.Fatalf(err.Error()) } - executor.store = store + executor.Store = store executor.ShardMapper = &testShardMapper{store: store} - got = executeAndGetJSON("select * from cpu", executor) + got = executeAndGetJSON("SELECT * FROM cpu GROUP BY *", executor) if exepected != got { - t.Fatalf("exp: %s\ngot: %s", exepected, got) + t.Fatalf("\nexp: %s\ngot: %s", exepected, got) } } -// Ensure that points can be written and flushed even after a restart. -func TestWritePointsAndExecuteQuery_FlushRestart(t *testing.T) { +// Ensure writing a point and updating it results in only a single point. +func TestWritePointsAndExecuteQuery_Update(t *testing.T) { store, executor := testStoreAndExecutor() - defer os.RemoveAll(store.path) + defer os.RemoveAll(store.Path()) - // Write first point. - if err := store.WriteToShard(shardID, []Point{NewPoint( - "cpu", - map[string]string{"host": "server"}, - map[string]interface{}{"value": 1.0}, - time.Unix(1, 2), + // Write original point. + if err := store.WriteToShard(1, []tsdb.Point{tsdb.NewPoint( + "temperature", + map[string]string{}, + map[string]interface{}{"value": 100.0}, + time.Unix(0, 0), )}); err != nil { t.Fatalf(err.Error()) } - // Write second point. - if err := store.WriteToShard(shardID, []Point{NewPoint( - "cpu", - map[string]string{"host": "server"}, - map[string]interface{}{"value": 1.0}, - time.Unix(2, 3), - )}); err != nil { - t.Fatalf(err.Error()) - } - - // Restart the store. - if err := store.Close(); err != nil { - t.Fatal(err) - } else if err = store.Open(); err != nil { - t.Fatal(err) - } - - // Flush WAL data to the index. - if err := store.Flush(); err != nil { - t.Fatal(err) - } - - got := executeAndGetJSON("select * from cpu", executor) - exepected := `[{"series":[{"name":"cpu","tags":{"host":"server"},"columns":["time","value"],"values":[["1970-01-01T00:00:01.000000002Z",1],["1970-01-01T00:00:02.000000003Z",1]]}]}]` - if exepected != got { - t.Fatalf("exp: %s\ngot: %s", exepected, got) - } - + // Restart store. store.Close() - store = NewStore(store.path) + store = tsdb.NewStore(store.Path()) if err := store.Open(); err != nil { t.Fatalf(err.Error()) } - executor.store = store + executor.Store = store executor.ShardMapper = &testShardMapper{store: store} - got = executeAndGetJSON("select * from cpu", executor) - if exepected != got { - t.Fatalf("exp: %s\ngot: %s", exepected, got) + // Rewrite point with new value. + if err := store.WriteToShard(1, []tsdb.Point{tsdb.NewPoint( + "temperature", + map[string]string{}, + map[string]interface{}{"value": 200.0}, + time.Unix(0, 0), + )}); err != nil { + t.Fatalf(err.Error()) + } + + got := executeAndGetJSON("select * from temperature", executor) + exp := `[{"series":[{"name":"temperature","columns":["time","value"],"values":[["1970-01-01T00:00:00Z",200]]}]}]` + if exp != got { + t.Fatalf("\n\nexp: %s\ngot: %s", exp, got) } } func TestDropSeriesStatement(t *testing.T) { store, executor := testStoreAndExecutor() - defer os.RemoveAll(store.path) + defer os.RemoveAll(store.Path()) - pt := NewPoint( + pt := tsdb.NewPoint( "cpu", map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0}, time.Unix(1, 2), ) - err := store.WriteToShard(shardID, []Point{pt}) + err := store.WriteToShard(shardID, []tsdb.Point{pt}) if err != nil { t.Fatalf(err.Error()) } - got := executeAndGetJSON("select * from cpu", executor) + got := executeAndGetJSON("SELECT * FROM cpu GROUP BY *", executor) exepected := `[{"series":[{"name":"cpu","tags":{"host":"server"},"columns":["time","value"],"values":[["1970-01-01T00:00:01.000000002Z",1]]}]}]` if exepected != got { t.Fatalf("exp: %s\ngot: %s", exepected, got) @@ -140,7 +132,7 @@ func TestDropSeriesStatement(t *testing.T) { got = executeAndGetJSON("drop series from cpu", executor) - got = executeAndGetJSON("select * from cpu", executor) + got = executeAndGetJSON("SELECT * FROM cpu GROUP BY *", executor) exepected = `[{}]` if exepected != got { t.Fatalf("exp: %s\ngot: %s", exepected, got) @@ -153,9 +145,9 @@ func TestDropSeriesStatement(t *testing.T) { } store.Close() - store = NewStore(store.path) + store = tsdb.NewStore(store.Path()) store.Open() - executor.store = store + executor.Store = store got = executeAndGetJSON("select * from cpu", executor) exepected = `[{}]` @@ -172,22 +164,22 @@ func TestDropSeriesStatement(t *testing.T) { func TestDropMeasurementStatement(t *testing.T) { store, executor := testStoreAndExecutor() - defer os.RemoveAll(store.path) + defer os.RemoveAll(store.Path()) - pt := NewPoint( + pt := tsdb.NewPoint( "cpu", map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0}, time.Unix(1, 2), ) - pt2 := NewPoint( + pt2 := tsdb.NewPoint( "memory", map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0}, time.Unix(1, 2), ) - if err := store.WriteToShard(shardID, []Point{pt, pt2}); err != nil { + if err := store.WriteToShard(shardID, []tsdb.Point{pt, pt2}); err != nil { t.Fatal(err) } @@ -215,7 +207,7 @@ func TestDropMeasurementStatement(t *testing.T) { t.Fatalf("exp: %s\ngot: %s", exepected, got) } got = executeAndGetJSON("select * from memory", executor) - exepected = `[{"error":"measurement not found: \"foo\".\"foo\".memory"}]` + exepected = `[{}]` if exepected != got { t.Fatalf("exp: %s\ngot: %s", exepected, got) } @@ -223,9 +215,9 @@ func TestDropMeasurementStatement(t *testing.T) { validateDrop() store.Close() - store = NewStore(store.path) + store = tsdb.NewStore(store.Path()) store.Open() - executor.store = store + executor.Store = store validateDrop() } @@ -240,20 +232,20 @@ func (m *metaExec) ExecuteStatement(stmt influxql.Statement) *influxql.Result { func TestDropDatabase(t *testing.T) { store, executor := testStoreAndExecutor() - defer os.RemoveAll(store.path) + defer os.RemoveAll(store.Path()) - pt := NewPoint( + pt := tsdb.NewPoint( "cpu", map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0}, time.Unix(1, 2), ) - if err := store.WriteToShard(shardID, []Point{pt}); err != nil { + if err := store.WriteToShard(shardID, []tsdb.Point{pt}); err != nil { t.Fatal(err) } - got := executeAndGetJSON("select * from cpu", executor) + got := executeAndGetJSON("SELECT * FROM cpu GROUP BY *", executor) expected := `[{"series":[{"name":"cpu","tags":{"host":"server"},"columns":["time","value"],"values":[["1970-01-01T00:00:01.000000002Z",1]]}]}]` if expected != got { t.Fatalf("exp: %s\ngot: %s", expected, got) @@ -267,7 +259,7 @@ func TestDropDatabase(t *testing.T) { executor.MetaStatementExecutor = me // verify the database is there on disk - dbPath := filepath.Join(store.path, "foo") + dbPath := filepath.Join(store.Path(), "foo") if _, err := os.Stat(dbPath); err != nil { t.Fatalf("execpted database dir %s to exist", dbPath) } @@ -287,12 +279,12 @@ func TestDropDatabase(t *testing.T) { } store.Close() - store = NewStore(store.path) + store = tsdb.NewStore(store.Path()) store.Open() - executor.store = store + executor.Store = store executor.ShardMapper = &testShardMapper{store: store} - if err := store.WriteToShard(shardID, []Point{pt}); err == nil || err.Error() != "shard not found" { + if err := store.WriteToShard(shardID, []tsdb.Point{pt}); err == nil || err.Error() != "shard not found" { t.Fatalf("expected shard to not be found") } } @@ -300,7 +292,7 @@ func TestDropDatabase(t *testing.T) { // Ensure that queries for which there is no data result in an empty set. func TestQueryNoData(t *testing.T) { store, executor := testStoreAndExecutor() - defer os.RemoveAll(store.path) + defer os.RemoveAll(store.Path()) got := executeAndGetJSON("select * from /.*/", executor) expected := `[{}]` @@ -321,7 +313,7 @@ func TestQueryNoData(t *testing.T) { // to create a user. func TestAuthenticateIfUserCountZeroAndCreateUser(t *testing.T) { store, executor := testStoreAndExecutor() - defer os.RemoveAll(store.path) + defer os.RemoveAll(store.Path()) ms := &testMetastore{userCount: 0} executor.MetaStore = ms @@ -348,10 +340,10 @@ func TestAuthenticateIfUserCountZeroAndCreateUser(t *testing.T) { } } -func testStoreAndExecutor() (*Store, *QueryExecutor) { +func testStoreAndExecutor() (*tsdb.Store, *tsdb.QueryExecutor) { path, _ := ioutil.TempDir("", "") - store := NewStore(path) + store := tsdb.NewStore(path) err := store.Open() if err != nil { panic(err) @@ -361,14 +353,14 @@ func testStoreAndExecutor() (*Store, *QueryExecutor) { shardID := uint64(1) store.CreateShard(database, retentionPolicy, shardID) - executor := NewQueryExecutor(store) + executor := tsdb.NewQueryExecutor(store) executor.MetaStore = &testMetastore{} executor.ShardMapper = &testShardMapper{store: store} return store, executor } -func executeAndGetJSON(query string, executor *QueryExecutor) string { +func executeAndGetJSON(query string, executor *tsdb.QueryExecutor) string { ch, err := executor.ExecuteQuery(mustParseQuery(query), "foo", 20) if err != nil { panic(err.Error()) @@ -378,7 +370,12 @@ func executeAndGetJSON(query string, executor *QueryExecutor) string { for r := range ch { results = append(results, r) } - return string(mustMarshalJSON(results)) + + b, err := json.Marshal(results) + if err != nil { + panic(err) + } + return string(b) } type testMetastore struct { @@ -467,10 +464,10 @@ func (t *testMetastore) NodeID() uint64 { } type testShardMapper struct { - store *Store + store *tsdb.Store } -func (t *testShardMapper) CreateMapper(shard meta.ShardInfo, stmt string, chunkSize int) (Mapper, error) { +func (t *testShardMapper) CreateMapper(shard meta.ShardInfo, stmt string, chunkSize int) (tsdb.Mapper, error) { m, err := t.store.CreateMapper(shard.ID, stmt, chunkSize) return m, err } diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard.go index 2f7570ae2..b37500f14 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard.go @@ -1,19 +1,14 @@ package tsdb import ( - "bytes" "encoding/binary" "encoding/json" "errors" "fmt" - "hash/fnv" "io" - "log" "math" "os" - "sort" "sync" - "time" "github.com/influxdb/influxdb/influxql" "github.com/influxdb/influxdb/tsdb/internal" @@ -35,15 +30,8 @@ var ( // ErrFieldUnmappedID is returned when the system is presented, during decode, with a field ID // there is no mapping for. ErrFieldUnmappedID = errors.New("field ID not mapped") - - // ErrWALPartitionNotFound is returns when flushing a WAL partition that - // does not exist. - ErrWALPartitionNotFound = errors.New("wal partition not found") ) -// topLevelBucketN is the number of non-series buckets in the bolt db. -const topLevelBucketN = 3 - // Shard represents a self-contained time series database. An inverted index of // the measurement and tag data is kept along with the raw time series data. // Data can be split across many shards. The query engine in TSDB is responsible @@ -52,53 +40,27 @@ type Shard struct { db *bolt.DB // underlying data store index *DatabaseIndex path string - cache map[uint8]map[string][][]byte // values by - walSize int // approximate size of the WAL, in bytes - flush chan struct{} // signals background flush - flushTimer *time.Timer // signals time-based flush + engine Engine + options EngineOptions mu sync.RWMutex - measurementFields map[string]*measurementFields // measurement name to their fields - - // These coordinate closing and waiting for running goroutines. - wg sync.WaitGroup - closing chan struct{} - - // Used for out-of-band error messages. - logger *log.Logger - - // The maximum size and time thresholds for flushing the WAL. - MaxWALSize int - WALFlushInterval time.Duration - WALPartitionFlushDelay time.Duration + measurementFields map[string]*MeasurementFields // measurement name to their fields // The writer used by the logger. LogOutput io.Writer } // NewShard returns a new initialized Shard -func NewShard(index *DatabaseIndex, path string) *Shard { - s := &Shard{ +func NewShard(index *DatabaseIndex, path string, options EngineOptions) *Shard { + return &Shard{ index: index, path: path, - flush: make(chan struct{}, 1), - measurementFields: make(map[string]*measurementFields), - - MaxWALSize: DefaultMaxWALSize, - WALFlushInterval: DefaultWALFlushInterval, - WALPartitionFlushDelay: DefaultWALPartitionFlushDelay, + options: options, + measurementFields: make(map[string]*MeasurementFields), LogOutput: os.Stderr, } - - // Initialize all partitions of the cache. - s.cache = make(map[uint8]map[string][][]byte) - for i := uint8(0); i < WALPartitionN; i++ { - s.cache[i] = make(map[string][][]byte) - } - - return s } // Path returns the path set on the shard when it was created. @@ -110,87 +72,57 @@ func (s *Shard) Open() error { s.mu.Lock() defer s.mu.Unlock() + s.index.mu.Lock() + defer s.index.mu.Unlock() + // Return if the shard is already open - if s.db != nil { + if s.engine != nil { return nil } - // Open store on shard. - store, err := bolt.Open(s.path, 0666, &bolt.Options{Timeout: 1 * time.Second}) + // Initialize underlying engine. + e, err := NewEngine(s.path, s.options) if err != nil { - return err + return fmt.Errorf("new engine: %s", err) } - s.db = store + s.engine = e - // Initialize store. - if err := s.db.Update(func(tx *bolt.Tx) error { - _, _ = tx.CreateBucketIfNotExists([]byte("series")) - _, _ = tx.CreateBucketIfNotExists([]byte("fields")) - _, _ = tx.CreateBucketIfNotExists([]byte("wal")) + // Set log output on the engine. + s.engine.SetLogOutput(s.LogOutput) - return nil - }); err != nil { - return fmt.Errorf("init: %s", err) + // Open engine. + if err := s.engine.Open(); err != nil { + return fmt.Errorf("open engine: %s", err) } - if err := s.loadMetadataIndex(); err != nil { + // Load metadata index. + if err := s.engine.LoadMetadataIndex(s.index, s.measurementFields); err != nil { return fmt.Errorf("load metadata index: %s", err) } - // Initialize logger. - s.logger = log.New(s.LogOutput, "[shard] ", log.LstdFlags) - - // Start flush interval timer. - s.flushTimer = time.NewTimer(s.WALFlushInterval) - - // Start background goroutines. - s.wg.Add(1) - s.closing = make(chan struct{}) - go s.autoflusher(s.closing) - return nil }(); err != nil { s.close() return err } - // Flush on-disk WAL before we return to the caller. - if err := s.Flush(0); err != nil { - return fmt.Errorf("flush: %s", err) - } - return nil } // Close shuts down the shard's store. func (s *Shard) Close() error { s.mu.Lock() - err := s.close() - s.mu.Unlock() - - // Wait for open goroutines to finish. - s.wg.Wait() - - return err + defer s.mu.Unlock() + return s.close() } func (s *Shard) close() error { - if s.db != nil { - s.db.Close() - } - if s.closing != nil { - close(s.closing) - s.closing = nil + if s.engine != nil { + return s.engine.Close() } return nil } -// TODO: this is temporarily exported to make tx.go work. When the query engine gets refactored -// into the tsdb package this should be removed. No one outside tsdb should know the underlying store. -func (s *Shard) DB() *bolt.DB { - return s.db -} - // TODO: this is temporarily exported to make tx.go work. When the query engine gets refactored // into the tsdb package this should be removed. No one outside tsdb should know the underlying field encoding scheme. func (s *Shard) FieldCodec(measurementName string) *FieldCodec { @@ -198,21 +130,21 @@ func (s *Shard) FieldCodec(measurementName string) *FieldCodec { defer s.mu.RUnlock() m := s.measurementFields[measurementName] if m == nil { - return nil + return NewFieldCodec(nil) } - return m.codec + return m.Codec } // struct to hold information for a field to create on a measurement -type fieldCreate struct { - measurement string - field *field +type FieldCreate struct { + Measurement string + Field *Field } // struct to hold information for a series to create -type seriesCreate struct { - measurement string - series *Series +type SeriesCreate struct { + Measurement string + Series *Series } // WritePoints will write the raw data points and any new metadata to the index in the shard @@ -226,7 +158,7 @@ func (s *Shard) WritePoints(points []Point) error { if len(seriesToCreate) > 0 { s.index.mu.Lock() for _, ss := range seriesToCreate { - s.index.createSeriesIndexIfNotExists(ss.measurement, ss.series) + s.index.CreateSeriesIndexIfNotExists(ss.Measurement, ss.Series) } s.index.mu.Unlock() } @@ -239,262 +171,36 @@ func (s *Shard) WritePoints(points []Point) error { // make sure all data is encoded before attempting to save to bolt for _, p := range points { - // marshal the raw data if it hasn't been marshaled already - if p.Data() == nil { - // this was populated earlier, don't need to validate that it's there. - s.mu.RLock() - mf := s.measurementFields[p.Name()] - s.mu.RUnlock() - - // If a measurement is dropped while writes for it are in progress, this could be nil - if mf == nil { - return ErrFieldNotFound - } - - data, err := mf.codec.EncodeFields(p.Fields()) - if err != nil { - return err - } - p.SetData(data) + // Ignore if raw data has already been marshaled. + if p.Data() != nil { + continue } + + // This was populated earlier, don't need to validate that it's there. + s.mu.RLock() + mf := s.measurementFields[p.Name()] + s.mu.RUnlock() + + // If a measurement is dropped while writes for it are in progress, this could be nil + if mf == nil { + return ErrFieldNotFound + } + + data, err := mf.Codec.EncodeFields(p.Fields()) + if err != nil { + return err + } + p.SetData(data) } - // save to the underlying bolt instance - if err := s.db.Update(func(tx *bolt.Tx) error { - // save any new metadata - if len(seriesToCreate) > 0 { - b := tx.Bucket([]byte("series")) - for _, sc := range seriesToCreate { - data, err := sc.series.MarshalBinary() - if err != nil { - return err - } - if err := b.Put([]byte(sc.series.Key), data); err != nil { - return err - } - } - } - if len(measurementFieldsToSave) > 0 { - b := tx.Bucket([]byte("fields")) - for name, m := range measurementFieldsToSave { - data, err := m.MarshalBinary() - if err != nil { - return err - } - if err := b.Put([]byte(name), data); err != nil { - return err - } - } - } - - // Write points to WAL bucket. - wal := tx.Bucket([]byte("wal")) - for _, p := range points { - // Retrieve partition bucket. - key := p.Key() - b, err := wal.CreateBucketIfNotExists([]byte{WALPartition(key)}) - if err != nil { - return fmt.Errorf("create WAL partition bucket: %s", err) - } - - // Generate an autoincrementing index for the WAL partition. - id, _ := b.NextSequence() - - // Append points sequentially to the WAL bucket. - v := marshalWALEntry(key, p.UnixNano(), p.Data()) - if err := b.Put(u64tob(id), v); err != nil { - return fmt.Errorf("put wal: %s", err) - } - } - - return nil - }); err != nil { - return err - } - - // If successful then save points to in-memory cache. - if err := func() error { - s.mu.Lock() - defer s.mu.Unlock() - - // tracks which in-memory caches need to be resorted - resorts := map[uint8]map[string]struct{}{} - - for _, p := range points { - // Generate in-memory cache entry of . - key, data := p.Key(), p.Data() - v := make([]byte, 8+len(data)) - binary.BigEndian.PutUint64(v[0:8], uint64(p.UnixNano())) - copy(v[8:], data) - - // Determine if we are appending. - partitionID := WALPartition(key) - a := s.cache[partitionID][string(key)] - appending := (len(a) == 0 || bytes.Compare(a[len(a)-1], v) == -1) - - // Append to cache list. - a = append(a, v) - - // If not appending, keep track of cache lists that need to be resorted. - if !appending { - series := resorts[partitionID] - if series == nil { - series = map[string]struct{}{} - resorts[partitionID] = series - } - series[string(key)] = struct{}{} - } - - s.cache[partitionID][string(key)] = a - - // Calculate estimated WAL size. - s.walSize += len(key) + len(v) - } - - // Sort by timestamp if not appending. - for partitionID, cache := range resorts { - for key, _ := range cache { - sort.Sort(byteSlices(s.cache[partitionID][key])) - } - } - - // Check for flush threshold. - s.triggerAutoFlush() - - return nil - }(); err != nil { - return err + // Write to the engine. + if err := s.engine.WritePoints(points, measurementFieldsToSave, seriesToCreate); err != nil { + return fmt.Errorf("engine: %s", err) } return nil } -// Flush writes all points from the write ahead log to the index. -func (s *Shard) Flush(partitionFlushDelay time.Duration) error { - // Retrieve a list of WAL buckets. - var partitionIDs []uint8 - if err := s.db.View(func(tx *bolt.Tx) error { - return tx.Bucket([]byte("wal")).ForEach(func(key, _ []byte) error { - partitionIDs = append(partitionIDs, uint8(key[0])) - return nil - }) - }); err != nil { - return err - } - - // Continue flushing until there are no more partition buckets. - for _, partitionID := range partitionIDs { - if err := s.FlushPartition(partitionID); err != nil { - return fmt.Errorf("flush partition: id=%d, err=%s", partitionID, err) - } - - // Wait momentarily so other threads can process. - time.Sleep(partitionFlushDelay) - } - - s.mu.Lock() - defer s.mu.Unlock() - - // Reset WAL size. - s.walSize = 0 - - // Reset the timer. - s.flushTimer.Reset(s.WALFlushInterval) - - return nil -} - -// FlushPartition flushes a single WAL partition. -func (s *Shard) FlushPartition(partitionID uint8) error { - s.mu.Lock() - defer s.mu.Unlock() - - startTime := time.Now() - - var pointN int - if err := s.db.Update(func(tx *bolt.Tx) error { - // Retrieve partition bucket. Exit if it doesn't exist. - pb := tx.Bucket([]byte("wal")).Bucket([]byte{byte(partitionID)}) - if pb == nil { - return ErrWALPartitionNotFound - } - - // Iterate over keys in the WAL partition bucket. - c := pb.Cursor() - for k, v := c.First(); k != nil; k, v = c.Next() { - key, timestamp, data := unmarshalWALEntry(v) - - // Create bucket for entry. - b, err := tx.CreateBucketIfNotExists(key) - if err != nil { - return fmt.Errorf("create bucket: %s", err) - } - - // Write point to bucket. - if err := b.Put(u64tob(uint64(timestamp)), data); err != nil { - return fmt.Errorf("put: %s", err) - } - - // Remove entry in the WAL. - if err := c.Delete(); err != nil { - return fmt.Errorf("delete: %s", err) - } - - pointN++ - } - - return nil - }); err != nil { - return err - } - - // Reset cache. - s.cache[partitionID] = make(map[string][][]byte) - - if pointN > 0 { - s.logger.Printf("flush %d points in %.3fs", pointN, time.Since(startTime).Seconds()) - } - - return nil -} - -// autoflusher waits for notification of a flush and kicks it off in the background. -// This method runs in a separate goroutine. -func (s *Shard) autoflusher(closing chan struct{}) { - defer s.wg.Done() - - for { - // Wait for close or flush signal. - select { - case <-closing: - return - case <-s.flushTimer.C: - if err := s.Flush(s.WALPartitionFlushDelay); err != nil { - s.logger.Printf("flush error: %s", err) - } - case <-s.flush: - if err := s.Flush(s.WALPartitionFlushDelay); err != nil { - s.logger.Printf("flush error: %s", err) - } - } - } -} - -// triggerAutoFlush signals that a flush should occur if the size is above the threshold. -// This function must be called within the context of a lock. -func (s *Shard) triggerAutoFlush() { - // Ignore if we haven't reached the threshold. - if s.walSize < s.MaxWALSize { - return - } - - // Otherwise send a non-blocking signal. - select { - case s.flush <- struct{}{}: - default: - } -} - func (s *Shard) ValidateAggregateFieldsInStatement(measurementName string, stmt *influxql.SelectStatement) error { s.mu.RLock() defer s.mu.RUnlock() @@ -547,62 +253,27 @@ func (s *Shard) ValidateAggregateFieldsInStatement(measurementName string, stmt return nil } -// deleteSeries deletes the buckets and the metadata for the given series keys -func (s *Shard) deleteSeries(keys []string) error { - s.mu.Lock() - defer s.mu.Unlock() - - if err := s.db.Update(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("series")) - for _, k := range keys { - if err := b.Delete([]byte(k)); err != nil { - return err - } - if err := tx.DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound { - return err - } - delete(s.cache[WALPartition([]byte(k))], k) - } - return nil - }); err != nil { - return err - } - - return nil +// DeleteSeries deletes a list of series. +func (s *Shard) DeleteSeries(keys []string) error { + return s.engine.DeleteSeries(keys) } -// deleteMeasurement deletes the measurement field encoding information and all underlying series from the shard -func (s *Shard) deleteMeasurement(name string, seriesKeys []string) error { +// DeleteMeasurement deletes a measurement and all underlying series. +func (s *Shard) DeleteMeasurement(name string, seriesKeys []string) error { s.mu.Lock() defer s.mu.Unlock() - if err := s.db.Update(func(tx *bolt.Tx) error { - bm := tx.Bucket([]byte("fields")) - if err := bm.Delete([]byte(name)); err != nil { - return err - } - b := tx.Bucket([]byte("series")) - for _, k := range seriesKeys { - if err := b.Delete([]byte(k)); err != nil { - return err - } - if err := tx.DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound { - return err - } - delete(s.cache[WALPartition([]byte(k))], k) - } - - return nil - }); err != nil { + if err := s.engine.DeleteMeasurement(name, seriesKeys); err != nil { return err } // Remove entry from shard index. delete(s.measurementFields, name) + return nil } -func (s *Shard) createFieldsAndMeasurements(fieldsToCreate []*fieldCreate) (map[string]*measurementFields, error) { +func (s *Shard) createFieldsAndMeasurements(fieldsToCreate []*FieldCreate) (map[string]*MeasurementFields, error) { if len(fieldsToCreate) == 0 { return nil, nil } @@ -613,37 +284,37 @@ func (s *Shard) createFieldsAndMeasurements(fieldsToCreate []*fieldCreate) (map[ defer s.mu.Unlock() // add fields - measurementsToSave := make(map[string]*measurementFields) + measurementsToSave := make(map[string]*MeasurementFields) for _, f := range fieldsToCreate { - m := s.measurementFields[f.measurement] + m := s.measurementFields[f.Measurement] if m == nil { - m = measurementsToSave[f.measurement] + m = measurementsToSave[f.Measurement] if m == nil { - m = &measurementFields{Fields: make(map[string]*field)} + m = &MeasurementFields{Fields: make(map[string]*Field)} } - s.measurementFields[f.measurement] = m + s.measurementFields[f.Measurement] = m } - measurementsToSave[f.measurement] = m + measurementsToSave[f.Measurement] = m // add the field to the in memory index - if err := m.createFieldIfNotExists(f.field.Name, f.field.Type); err != nil { + if err := m.CreateFieldIfNotExists(f.Field.Name, f.Field.Type); err != nil { return nil, err } // ensure the measurement is in the index and the field is there - measurement := s.index.createMeasurementIndexIfNotExists(f.measurement) - measurement.fieldNames[f.field.Name] = struct{}{} + measurement := s.index.CreateMeasurementIndexIfNotExists(f.Measurement) + measurement.fieldNames[f.Field.Name] = struct{}{} } return measurementsToSave, nil } // validateSeriesAndFields checks which series and fields are new and whose metadata should be saved and indexed -func (s *Shard) validateSeriesAndFields(points []Point) ([]*seriesCreate, []*fieldCreate, error) { - var seriesToCreate []*seriesCreate - var fieldsToCreate []*fieldCreate +func (s *Shard) validateSeriesAndFields(points []Point) ([]*SeriesCreate, []*FieldCreate, error) { + var seriesToCreate []*SeriesCreate + var fieldsToCreate []*FieldCreate // get the mutex for the in memory index, which is shared across shards s.index.mu.RLock() @@ -657,14 +328,14 @@ func (s *Shard) validateSeriesAndFields(points []Point) ([]*seriesCreate, []*fie // see if the series should be added to the index if ss := s.index.series[string(p.Key())]; ss == nil { series := &Series{Key: string(p.Key()), Tags: p.Tags()} - seriesToCreate = append(seriesToCreate, &seriesCreate{p.Name(), series}) + seriesToCreate = append(seriesToCreate, &SeriesCreate{p.Name(), series}) } // see if the field definitions need to be saved to the shard mf := s.measurementFields[p.Name()] if mf == nil { for name, value := range p.Fields() { - fieldsToCreate = append(fieldsToCreate, &fieldCreate{p.Name(), &field{Name: name, Type: influxql.InspectDataType(value)}}) + fieldsToCreate = append(fieldsToCreate, &FieldCreate{p.Name(), &Field{Name: name, Type: influxql.InspectDataType(value)}}) } continue // skip validation since all fields are new } @@ -680,72 +351,23 @@ func (s *Shard) validateSeriesAndFields(points []Point) ([]*seriesCreate, []*fie continue // Field is present, and it's of the same type. Nothing more to do. } - fieldsToCreate = append(fieldsToCreate, &fieldCreate{p.Name(), &field{Name: name, Type: influxql.InspectDataType(value)}}) + fieldsToCreate = append(fieldsToCreate, &FieldCreate{p.Name(), &Field{Name: name, Type: influxql.InspectDataType(value)}}) } } return seriesToCreate, fieldsToCreate, nil } -// loadsMetadataIndex loads the shard metadata into memory. This should only be called by Open -func (s *Shard) loadMetadataIndex() error { - return s.db.View(func(tx *bolt.Tx) error { - s.index.mu.Lock() - defer s.index.mu.Unlock() - - // load measurement metadata - meta := tx.Bucket([]byte("fields")) - c := meta.Cursor() - for k, v := c.First(); k != nil; k, v = c.Next() { - m := s.index.createMeasurementIndexIfNotExists(string(k)) - mf := &measurementFields{} - if err := mf.UnmarshalBinary(v); err != nil { - return err - } - for name, _ := range mf.Fields { - m.fieldNames[name] = struct{}{} - } - mf.codec = newFieldCodec(mf.Fields) - s.measurementFields[m.Name] = mf - } - - // load series metadata - meta = tx.Bucket([]byte("series")) - c = meta.Cursor() - for k, v := c.First(); k != nil; k, v = c.Next() { - series := &Series{} - if err := series.UnmarshalBinary(v); err != nil { - return err - } - s.index.createSeriesIndexIfNotExists(measurementFromSeriesKey(string(k)), series) - } - return nil - }) -} - // SeriesCount returns the number of series buckets on the shard. -// This does not include a count from the WAL. -func (s *Shard) SeriesCount() (n int, err error) { - err = s.db.View(func(tx *bolt.Tx) error { - return tx.ForEach(func(_ []byte, _ *bolt.Bucket) error { - n++ - return nil - }) - }) +func (s *Shard) SeriesCount() (int, error) { return s.engine.SeriesCount() } - // Remove top-level buckets. - n -= topLevelBucketN - - return -} - -type measurementFields struct { - Fields map[string]*field `json:"fields"` - codec *FieldCodec +type MeasurementFields struct { + Fields map[string]*Field `json:"fields"` + Codec *FieldCodec } // MarshalBinary encodes the object to a binary format. -func (m *measurementFields) MarshalBinary() ([]byte, error) { +func (m *MeasurementFields) MarshalBinary() ([]byte, error) { var pb internal.MeasurementFields for _, f := range m.Fields { id := int32(f.ID) @@ -757,22 +379,22 @@ func (m *measurementFields) MarshalBinary() ([]byte, error) { } // UnmarshalBinary decodes the object from a binary format. -func (m *measurementFields) UnmarshalBinary(buf []byte) error { +func (m *MeasurementFields) UnmarshalBinary(buf []byte) error { var pb internal.MeasurementFields if err := proto.Unmarshal(buf, &pb); err != nil { return err } - m.Fields = make(map[string]*field) + m.Fields = make(map[string]*Field) for _, f := range pb.Fields { - m.Fields[f.GetName()] = &field{ID: uint8(f.GetID()), Name: f.GetName(), Type: influxql.DataType(f.GetType())} + m.Fields[f.GetName()] = &Field{ID: uint8(f.GetID()), Name: f.GetName(), Type: influxql.DataType(f.GetType())} } return nil } -// createFieldIfNotExists creates a new field with an autoincrementing ID. +// CreateFieldIfNotExists creates a new field with an autoincrementing ID. // Returns an error if 255 fields have already been created on the measurement or // the fields already exists with a different type. -func (m *measurementFields) createFieldIfNotExists(name string, typ influxql.DataType) error { +func (m *MeasurementFields) CreateFieldIfNotExists(name string, typ influxql.DataType) error { // Ignore if the field already exists. if f := m.Fields[name]; f != nil { if f.Type != typ { @@ -787,19 +409,19 @@ func (m *measurementFields) createFieldIfNotExists(name string, typ influxql.Dat } // Create and append a new field. - f := &field{ + f := &Field{ ID: uint8(len(m.Fields) + 1), Name: name, Type: typ, } m.Fields[name] = f - m.codec = newFieldCodec(m.Fields) + m.Codec = NewFieldCodec(m.Fields) return nil } // Field represents a series field. -type field struct { +type Field struct { ID uint8 `json:"id,omitempty"` Name string `json:"name,omitempty"` Type influxql.DataType `json:"type,omitempty"` @@ -813,15 +435,15 @@ type field struct { // TODO: this shouldn't be exported. nothing outside the shard should know about field encodings. // However, this is here until tx.go and the engine get refactored into tsdb. type FieldCodec struct { - fieldsByID map[uint8]*field - fieldsByName map[string]*field + fieldsByID map[uint8]*Field + fieldsByName map[string]*Field } // NewFieldCodec returns a FieldCodec for the given Measurement. Must be called with // a RLock that protects the Measurement. -func newFieldCodec(fields map[string]*field) *FieldCodec { - fieldsByID := make(map[uint8]*field, len(fields)) - fieldsByName := make(map[string]*field, len(fields)) +func NewFieldCodec(fields map[string]*Field) *FieldCodec { + fieldsByID := make(map[uint8]*Field, len(fields)) + fieldsByName := make(map[string]*Field, len(fields)) for _, f := range fields { fieldsByID[f.ID] = f fieldsByName[f.Name] = f @@ -1045,15 +667,15 @@ func (f *FieldCodec) DecodeByID(targetID uint8, b []byte) (interface{}, error) { // DecodeByName scans a byte slice for a field with the given name, converts it to its // expected type, and return that value. func (f *FieldCodec) DecodeByName(name string, b []byte) (interface{}, error) { - if fi := f.fieldByName(name); fi == nil { + fi := f.fieldByName(name) + if fi == nil { return 0, ErrFieldNotFound - } else { - return f.DecodeByID(fi.ID, b) } + return f.DecodeByID(fi.ID, b) } // FieldByName returns the field by its name. It will return a nil if not found -func (f *FieldCodec) fieldByName(name string) *field { +func (f *FieldCodec) fieldByName(name string) *Field { return f.fieldsByName[name] } @@ -1083,136 +705,3 @@ func u64tob(v uint64) []byte { binary.BigEndian.PutUint64(b, v) return b } - -// marshalWALEntry encodes point data into a single byte slice. -// -// The format of the byte slice is: -// -// uint64 timestamp -// uint32 key length -// []byte key -// []byte data -// -func marshalWALEntry(key []byte, timestamp int64, data []byte) []byte { - v := make([]byte, 8+4, 8+4+len(key)+len(data)) - binary.BigEndian.PutUint64(v[0:8], uint64(timestamp)) - binary.BigEndian.PutUint32(v[8:12], uint32(len(key))) - v = append(v, key...) - v = append(v, data...) - return v -} - -// unmarshalWALEntry decodes a WAL entry into it's separate parts. -// Returned byte slices point to the original slice. -func unmarshalWALEntry(v []byte) (key []byte, timestamp int64, data []byte) { - keyLen := binary.BigEndian.Uint32(v[8:12]) - key = v[12 : 12+keyLen] - timestamp = int64(binary.BigEndian.Uint64(v[0:8])) - data = v[12+keyLen:] - return -} - -// marshalCacheEntry encodes the timestamp and data to a single byte slice. -// -// The format of the byte slice is: -// -// uint64 timestamp -// []byte data -// -func marshalCacheEntry(timestamp int64, data []byte) []byte { - buf := make([]byte, 8, 8+len(data)) - binary.BigEndian.PutUint64(buf[0:8], uint64(timestamp)) - return append(buf, data...) -} - -// unmarshalCacheEntry returns the timestamp and data from an encoded byte slice. -func unmarshalCacheEntry(buf []byte) (timestamp int64, data []byte) { - timestamp = int64(binary.BigEndian.Uint64(buf[0:8])) - data = buf[8:] - return -} - -// byteSlices represents a sortable slice of byte slices. -type byteSlices [][]byte - -func (a byteSlices) Len() int { return len(a) } -func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 } -func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] } - -// shardCursor provides ordered iteration across a Bolt bucket and shard cache. -type shardCursor struct { - // Bolt cursor and readahead buffer. - cursor *bolt.Cursor - buf struct { - key, value []byte - } - - // Cache and current cache index. - cache [][]byte - index int -} - -// Seek moves the cursor to a position and returns the closest key/value pair. -func (sc *shardCursor) Seek(seek []byte) (key, value []byte) { - // Seek bolt cursor. - if sc.cursor != nil { - sc.buf.key, sc.buf.value = sc.cursor.Seek(seek) - } - - // Seek cache index. - sc.index = sort.Search(len(sc.cache), func(i int) bool { - return bytes.Compare(sc.cache[i][0:8], seek) != -1 - }) - - return sc.read() -} - -// Next returns the next key/value pair from the cursor. -func (sc *shardCursor) Next() (key, value []byte) { - // Read next bolt key/value if not bufferred. - if sc.buf.key == nil && sc.cursor != nil { - sc.buf.key, sc.buf.value = sc.cursor.Next() - } - - return sc.read() -} - -// read returns the next key/value in the cursor buffer or cache. -func (sc *shardCursor) read() (key, value []byte) { - // If neither a buffer or cache exists then return nil. - if sc.buf.key == nil && sc.index >= len(sc.cache) { - return nil, nil - } - - // Use the buffer if it exists and there's no cache or if it is lower than the cache. - if sc.buf.key != nil && (sc.index >= len(sc.cache) || bytes.Compare(sc.buf.key, sc.cache[sc.index][0:8]) == -1) { - key, value = sc.buf.key, sc.buf.value - sc.buf.key, sc.buf.value = nil, nil - return - } - - // Otherwise read from the cache. - // Continue skipping ahead through duplicate keys in the cache list. - for { - // Read the current cache key/value pair. - key, value = sc.cache[sc.index][0:8], sc.cache[sc.index][8:] - sc.index++ - - // Exit loop if we're at the end of the cache or the next key is different. - if sc.index >= len(sc.cache) || !bytes.Equal(key, sc.cache[sc.index][0:8]) { - break - } - } - - return -} - -// WALPartitionN is the number of partitions in the write ahead log. -const WALPartitionN = 8 - -// WALPartition returns the partition number that key belongs to. -func WALPartition(key []byte) uint8 { - h := fnv.New64a() - h.Write(key) - return uint8(h.Sum64() % WALPartitionN) -} diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard_test.go index 0d8e6b77b..db21bef34 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/shard_test.go @@ -1,4 +1,4 @@ -package tsdb +package tsdb_test import ( "fmt" @@ -9,6 +9,8 @@ import ( "reflect" "testing" "time" + + "github.com/influxdb/influxdb/tsdb" ) func TestShardWriteAndIndex(t *testing.T) { @@ -16,42 +18,43 @@ func TestShardWriteAndIndex(t *testing.T) { defer os.RemoveAll(tmpDir) tmpShard := path.Join(tmpDir, "shard") - index := NewDatabaseIndex() - sh := NewShard(index, tmpShard) + index := tsdb.NewDatabaseIndex() + sh := tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions()) if err := sh.Open(); err != nil { t.Fatalf("error openeing shard: %s", err.Error()) } - pt := NewPoint( + pt := tsdb.NewPoint( "cpu", map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0}, time.Unix(1, 2), ) - err := sh.WritePoints([]Point{pt}) + err := sh.WritePoints([]tsdb.Point{pt}) if err != nil { t.Fatalf(err.Error()) } pt.SetTime(time.Unix(2, 3)) - err = sh.WritePoints([]Point{pt}) + err = sh.WritePoints([]tsdb.Point{pt}) if err != nil { t.Fatalf(err.Error()) } validateIndex := func() { - if !reflect.DeepEqual(index.names, []string{"cpu"}) { + if !reflect.DeepEqual(index.Names(), []string{"cpu"}) { t.Fatalf("measurement names in shard didn't match") } - if len(index.series) != 1 { + if index.SeriesN() != 1 { t.Fatalf("series wasn't in index") } - seriesTags := index.series[string(pt.Key())].Tags + + seriesTags := index.Series(string(pt.Key())).Tags if len(seriesTags) != len(pt.Tags()) || pt.Tags()["host"] != seriesTags["host"] { - t.Fatalf("tags weren't properly saved to series index: %v, %v", pt.Tags(), index.series[string(pt.Key())].Tags) + t.Fatalf("tags weren't properly saved to series index: %v, %v", pt.Tags(), seriesTags) } - if !reflect.DeepEqual(index.measurements["cpu"].TagKeys(), []string{"host"}) { + if !reflect.DeepEqual(index.Measurement("cpu").TagKeys(), []string{"host"}) { t.Fatalf("tag key wasn't saved to measurement index") } } @@ -61,8 +64,8 @@ func TestShardWriteAndIndex(t *testing.T) { // ensure the index gets loaded after closing and opening the shard sh.Close() - index = NewDatabaseIndex() - sh = NewShard(index, tmpShard) + index = tsdb.NewDatabaseIndex() + sh = tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions()) if err := sh.Open(); err != nil { t.Fatalf("error openeing shard: %s", err.Error()) } @@ -71,7 +74,7 @@ func TestShardWriteAndIndex(t *testing.T) { // and ensure that we can still write data pt.SetTime(time.Unix(2, 6)) - err = sh.WritePoints([]Point{pt}) + err = sh.WritePoints([]tsdb.Point{pt}) if err != nil { t.Fatalf(err.Error()) } @@ -82,52 +85,52 @@ func TestShardWriteAddNewField(t *testing.T) { defer os.RemoveAll(tmpDir) tmpShard := path.Join(tmpDir, "shard") - index := NewDatabaseIndex() - sh := NewShard(index, tmpShard) + index := tsdb.NewDatabaseIndex() + sh := tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions()) if err := sh.Open(); err != nil { t.Fatalf("error openeing shard: %s", err.Error()) } defer sh.Close() - pt := NewPoint( + pt := tsdb.NewPoint( "cpu", map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0}, time.Unix(1, 2), ) - err := sh.WritePoints([]Point{pt}) + err := sh.WritePoints([]tsdb.Point{pt}) if err != nil { t.Fatalf(err.Error()) } - pt = NewPoint( + pt = tsdb.NewPoint( "cpu", map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0, "value2": 2.0}, time.Unix(1, 2), ) - err = sh.WritePoints([]Point{pt}) + err = sh.WritePoints([]tsdb.Point{pt}) if err != nil { t.Fatalf(err.Error()) } - if !reflect.DeepEqual(index.names, []string{"cpu"}) { + if !reflect.DeepEqual(index.Names(), []string{"cpu"}) { t.Fatalf("measurement names in shard didn't match") } - if len(index.series) != 1 { + if index.SeriesN() != 1 { t.Fatalf("series wasn't in index") } - seriesTags := index.series[string(pt.Key())].Tags + seriesTags := index.Series(string(pt.Key())).Tags if len(seriesTags) != len(pt.Tags()) || pt.Tags()["host"] != seriesTags["host"] { - t.Fatalf("tags weren't properly saved to series index: %v, %v", pt.Tags(), index.series[string(pt.Key())].Tags) + t.Fatalf("tags weren't properly saved to series index: %v, %v", pt.Tags(), seriesTags) } - if !reflect.DeepEqual(index.measurements["cpu"].TagKeys(), []string{"host"}) { + if !reflect.DeepEqual(index.Measurement("cpu").TagKeys(), []string{"host"}) { t.Fatalf("tag key wasn't saved to measurement index") } - if len(index.measurements["cpu"].FieldNames()) != 2 { + if len(index.Measurement("cpu").FieldNames()) != 2 { t.Fatalf("field names wasn't saved to measurement index") } @@ -139,10 +142,11 @@ func TestShard_Autoflush(t *testing.T) { defer os.RemoveAll(path) // Open shard with a really low size threshold, high flush interval. - sh := NewShard(NewDatabaseIndex(), filepath.Join(path, "shard")) - sh.MaxWALSize = 1024 // 1KB - sh.WALFlushInterval = 1 * time.Hour - sh.WALPartitionFlushDelay = 1 * time.Millisecond + sh := tsdb.NewShard(tsdb.NewDatabaseIndex(), filepath.Join(path, "shard"), tsdb.EngineOptions{ + MaxWALSize: 1024, // 1KB + WALFlushInterval: 1 * time.Hour, + WALPartitionFlushDelay: 1 * time.Millisecond, + }) if err := sh.Open(); err != nil { t.Fatal(err) } @@ -150,7 +154,7 @@ func TestShard_Autoflush(t *testing.T) { // Write a bunch of points. for i := 0; i < 100; i++ { - if err := sh.WritePoints([]Point{NewPoint( + if err := sh.WritePoints([]tsdb.Point{tsdb.NewPoint( fmt.Sprintf("cpu%d", i), map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0}, @@ -177,10 +181,11 @@ func TestShard_Autoflush_FlushInterval(t *testing.T) { defer os.RemoveAll(path) // Open shard with a high size threshold, small time threshold. - sh := NewShard(NewDatabaseIndex(), filepath.Join(path, "shard")) - sh.MaxWALSize = 10 * 1024 * 1024 // 10MB - sh.WALFlushInterval = 100 * time.Millisecond - sh.WALPartitionFlushDelay = 1 * time.Millisecond + sh := tsdb.NewShard(tsdb.NewDatabaseIndex(), filepath.Join(path, "shard"), tsdb.EngineOptions{ + MaxWALSize: 10 * 1024 * 1024, // 10MB + WALFlushInterval: 100 * time.Millisecond, + WALPartitionFlushDelay: 1 * time.Millisecond, + }) if err := sh.Open(); err != nil { t.Fatal(err) } @@ -188,7 +193,7 @@ func TestShard_Autoflush_FlushInterval(t *testing.T) { // Write some points. for i := 0; i < 100; i++ { - if err := sh.WritePoints([]Point{NewPoint( + if err := sh.WritePoints([]tsdb.Point{tsdb.NewPoint( fmt.Sprintf("cpu%d", i), map[string]string{"host": "server"}, map[string]interface{}{"value": 1.0}, @@ -240,12 +245,12 @@ func benchmarkWritePoints(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt int) { // Generate test series (measurements + unique tag sets). series := genTestSeries(mCnt, tkCnt, tvCnt) // Create index for the shard to use. - index := NewDatabaseIndex() + index := tsdb.NewDatabaseIndex() // Generate point data to write to the shard. - points := []Point{} + points := []tsdb.Point{} for _, s := range series { for val := 0.0; val < float64(pntCnt); val++ { - p := NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now()) + p := tsdb.NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now()) points = append(points, p) } } @@ -258,7 +263,7 @@ func benchmarkWritePoints(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt int) { for n := 0; n < b.N; n++ { tmpDir, _ := ioutil.TempDir("", "shard_test") tmpShard := path.Join(tmpDir, "shard") - shard := NewShard(index, tmpShard) + shard := tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions()) shard.Open() b.StartTimer() @@ -280,12 +285,12 @@ func benchmarkWritePointsExistingSeries(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt // Generate test series (measurements + unique tag sets). series := genTestSeries(mCnt, tkCnt, tvCnt) // Create index for the shard to use. - index := NewDatabaseIndex() + index := tsdb.NewDatabaseIndex() // Generate point data to write to the shard. - points := []Point{} + points := []tsdb.Point{} for _, s := range series { for val := 0.0; val < float64(pntCnt); val++ { - p := NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now()) + p := tsdb.NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now()) points = append(points, p) } } @@ -293,7 +298,7 @@ func benchmarkWritePointsExistingSeries(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt tmpDir, _ := ioutil.TempDir("", "") defer os.RemoveAll(tmpDir) tmpShard := path.Join(tmpDir, "shard") - shard := NewShard(index, tmpShard) + shard := tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions()) shard.Open() defer shard.Close() chunkedWrite(shard, points) @@ -314,7 +319,7 @@ func benchmarkWritePointsExistingSeries(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt } } -func chunkedWrite(shard *Shard, points []Point) { +func chunkedWrite(shard *tsdb.Shard, points []tsdb.Point) { nPts := len(points) chunkSz := 10000 start := 0 diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store.go index 787b50733..e028437bc 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store.go @@ -9,18 +9,15 @@ import ( "strconv" "strings" "sync" - "time" "github.com/influxdb/influxdb/influxql" ) func NewStore(path string) *Store { return &Store{ - path: path, - MaxWALSize: DefaultMaxWALSize, - WALFlushInterval: DefaultWALFlushInterval, - WALPartitionFlushDelay: DefaultWALPartitionFlushDelay, - Logger: log.New(os.Stderr, "[store] ", log.LstdFlags), + path: path, + EngineOptions: NewEngineOptions(), + Logger: log.New(os.Stderr, "[store] ", log.LstdFlags), } } @@ -35,16 +32,34 @@ type Store struct { databaseIndexes map[string]*DatabaseIndex shards map[uint64]*Shard - MaxWALSize int - WALFlushInterval time.Duration - WALPartitionFlushDelay time.Duration - - Logger *log.Logger + EngineOptions EngineOptions + Logger *log.Logger } // Path returns the store's root path. func (s *Store) Path() string { return s.path } +// DatabaseIndexN returns the number of databases indicies in the store. +func (s *Store) DatabaseIndexN() int { + s.mu.RLock() + defer s.mu.RUnlock() + return len(s.databaseIndexes) +} + +// Shard returns a shard by id. +func (s *Store) Shard(id uint64) *Shard { + s.mu.RLock() + defer s.mu.RUnlock() + return s.shards[id] +} + +// ShardN returns the number of shard in the store. +func (s *Store) ShardN() int { + s.mu.RLock() + defer s.mu.RUnlock() + return len(s.shards) +} + func (s *Store) CreateShard(database, retentionPolicy string, shardID uint64) error { s.mu.Lock() defer s.mu.Unlock() @@ -67,7 +82,7 @@ func (s *Store) CreateShard(database, retentionPolicy string, shardID uint64) er } shardPath := filepath.Join(s.path, database, retentionPolicy, strconv.FormatUint(shardID, 10)) - shard := s.newShard(db, shardPath) + shard := NewShard(db, shardPath, s.EngineOptions) if err := shard.Open(); err != nil { return err } @@ -101,15 +116,6 @@ func (s *Store) DeleteShard(shardID uint64) error { return nil } -// newShard returns a shard and copies configuration settings from the store. -func (s *Store) newShard(index *DatabaseIndex, path string) *Shard { - sh := NewShard(index, path) - sh.MaxWALSize = s.MaxWALSize - sh.WALFlushInterval = s.WALFlushInterval - sh.WALPartitionFlushDelay = s.WALPartitionFlushDelay - return sh -} - // DeleteDatabase will close all shards associated with a database and remove the directory and files from disk. func (s *Store) DeleteDatabase(name string, shardIDs []uint64) error { s.mu.Lock() @@ -127,12 +133,6 @@ func (s *Store) DeleteDatabase(name string, shardIDs []uint64) error { return nil } -func (s *Store) Shard(shardID uint64) *Shard { - s.mu.RLock() - defer s.mu.RUnlock() - return s.shards[shardID] -} - // ShardIDs returns a slice of all ShardIDs under management. func (s *Store) ShardIDs() []uint64 { ids := make([]uint64, 0, len(s.shards)) @@ -173,7 +173,7 @@ func (s *Store) deleteSeries(keys []string) error { s.mu.RLock() defer s.mu.RUnlock() for _, sh := range s.shards { - if err := sh.deleteSeries(keys); err != nil { + if err := sh.DeleteSeries(keys); err != nil { return err } } @@ -185,7 +185,7 @@ func (s *Store) deleteMeasurement(name string, seriesKeys []string) error { s.mu.RLock() defer s.mu.RUnlock() for _, sh := range s.shards { - if err := sh.deleteMeasurement(name, seriesKeys); err != nil { + if err := sh.DeleteMeasurement(name, seriesKeys); err != nil { return err } } @@ -236,8 +236,11 @@ func (s *Store) loadShards() error { continue } - shard := s.newShard(s.databaseIndexes[db], path) - shard.Open() + shard := NewShard(s.databaseIndexes[db], path, s.EngineOptions) + err = shard.Open() + if err != nil { + return fmt.Errorf("failed to open shard %d: %s", shardID, err) + } s.shards[shardID] = shard } } @@ -253,6 +256,8 @@ func (s *Store) Open() error { s.shards = map[uint64]*Shard{} s.databaseIndexes = map[string]*DatabaseIndex{} + s.Logger.Printf("Using data dir: %v", s.Path()) + // Create directory. if err := os.MkdirAll(s.path, 0777); err != nil { return err @@ -281,18 +286,6 @@ func (s *Store) WriteToShard(shardID uint64, points []Point) error { return sh.WritePoints(points) } -// Flush forces all shards to write their WAL data to the index. -func (s *Store) Flush() error { - s.mu.RLock() - defer s.mu.RUnlock() - for shardID, sh := range s.shards { - if err := sh.Flush(s.WALPartitionFlushDelay); err != nil { - return fmt.Errorf("flush: shard=%d, err=%s", shardID, err) - } - } - return nil -} - func (s *Store) CreateMapper(shardID uint64, query string, chunkSize int) (Mapper, error) { q, err := influxql.NewParser(strings.NewReader(query)).ParseStatement() if err != nil { @@ -309,10 +302,7 @@ func (s *Store) CreateMapper(shardID uint64, query string, chunkSize int) (Mappe return nil, nil } - if (stmt.IsRawQuery && !stmt.HasDistinct()) || stmt.IsSimpleDerivative() { - return NewRawMapper(shard, stmt, chunkSize), nil - } - return NewAggMapper(shard, stmt), nil + return NewLocalMapper(shard, stmt, chunkSize), nil } func (s *Store) Close() error { diff --git a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store_test.go b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store_test.go index ceff2f2f8..e619a3fa4 100644 --- a/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store_test.go +++ b/Godeps/_workspace/src/github.com/influxdb/influxdb/tsdb/store_test.go @@ -1,4 +1,4 @@ -package tsdb +package tsdb_test import ( "io/ioutil" @@ -6,6 +6,8 @@ import ( "path/filepath" "testing" "time" + + "github.com/influxdb/influxdb/tsdb" ) func TestStoreOpen(t *testing.T) { @@ -19,13 +21,13 @@ func TestStoreOpen(t *testing.T) { t.Fatalf("failed to create test db dir: %v", err) } - s := NewStore(dir) + s := tsdb.NewStore(dir) if err := s.Open(); err != nil { t.Fatalf("Store.Open() failed: %v", err) } - if exp := 1; len(s.databaseIndexes) != exp { - t.Fatalf("database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp) + if got, exp := s.DatabaseIndexN(), 1; got != exp { + t.Fatalf("database index count mismatch: got %v, exp %v", got, exp) } } @@ -46,26 +48,25 @@ func TestStoreOpenShard(t *testing.T) { t.Fatalf("Store.Open() failed to create test shard 1: %v", err) } - s := NewStore(dir) + s := tsdb.NewStore(dir) if err := s.Open(); err != nil { t.Fatalf("Store.Open() failed: %v", err) } - if exp := 1; len(s.databaseIndexes) != exp { - t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp) + if got, exp := s.DatabaseIndexN(), 1; got != exp { + t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", got, exp) } - if _, ok := s.databaseIndexes["mydb"]; !ok { + if di := s.DatabaseIndex("mydb"); di == nil { t.Errorf("Store.Open() database myb does not exist") } - if exp := 1; len(s.shards) != exp { - t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", len(s.shards), exp) + if got, exp := s.ShardN(), 1; got != exp { + t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", got, exp) } - sh := s.shards[uint64(1)] - if sh.path != shardPath { - t.Errorf("Store.Open() shard path mismatch: got %v, exp %v", sh.path, shardPath) + if sh := s.Shard(1); sh.Path() != shardPath { + t.Errorf("Store.Open() shard path mismatch: got %v, exp %v", sh.Path(), shardPath) } } @@ -80,16 +81,16 @@ func TestStoreOpenShardCreateDelete(t *testing.T) { t.Fatalf("Store.Open() failed to create test db dir: %v", err) } - s := NewStore(dir) + s := tsdb.NewStore(dir) if err := s.Open(); err != nil { t.Fatalf("Store.Open() failed: %v", err) } - if exp := 1; len(s.databaseIndexes) != exp { - t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp) + if got, exp := s.DatabaseIndexN(), 1; got != exp { + t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", got, exp) } - if _, ok := s.databaseIndexes["mydb"]; !ok { + if di := s.DatabaseIndex("mydb"); di == nil { t.Errorf("Store.Open() database mydb does not exist") } @@ -97,8 +98,8 @@ func TestStoreOpenShardCreateDelete(t *testing.T) { t.Fatalf("Store.Open() failed to create shard") } - if exp := 1; len(s.shards) != exp { - t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", len(s.shards), exp) + if got, exp := s.ShardN(), 1; got != exp { + t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", got, exp) } shardIDs := s.ShardIDs() @@ -110,7 +111,7 @@ func TestStoreOpenShardCreateDelete(t *testing.T) { t.Fatalf("Store.Open() failed to delete shard: %v", err) } - if _, ok := s.shards[uint64(1)]; ok { + if sh := s.Shard(1); sh != nil { t.Fatal("Store.Open() shard ID 1 still exists") } } @@ -127,17 +128,17 @@ func TestStoreOpenNotDatabaseDir(t *testing.T) { t.Fatalf("Store.Open() failed to create test db dir: %v", err) } - s := NewStore(dir) + s := tsdb.NewStore(dir) if err := s.Open(); err != nil { t.Fatalf("Store.Open() failed: %v", err) } - if exp := 0; len(s.databaseIndexes) != exp { - t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp) + if got, exp := s.DatabaseIndexN(), 0; got != exp { + t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", got, exp) } - if exp := 0; len(s.shards) != exp { - t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", len(s.shards), exp) + if got, exp := s.ShardN(), 0; got != exp { + t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", got, exp) } } @@ -157,21 +158,21 @@ func TestStoreOpenNotRPDir(t *testing.T) { t.Fatalf("Store.Open() failed to create test retention policy directory: %v", err) } - s := NewStore(dir) + s := tsdb.NewStore(dir) if err := s.Open(); err != nil { t.Fatalf("Store.Open() failed: %v", err) } - if exp := 1; len(s.databaseIndexes) != exp { - t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp) + if got, exp := s.DatabaseIndexN(), 1; got != exp { + t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", got, exp) } - if _, ok := s.databaseIndexes["mydb"]; !ok { + if di := s.DatabaseIndex("mydb"); di == nil { t.Errorf("Store.Open() database myb does not exist") } - if exp := 0; len(s.shards) != exp { - t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", len(s.shards), exp) + if got, exp := s.ShardN(), 0; got != exp { + t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", got, exp) } } @@ -193,21 +194,21 @@ func TestStoreOpenShardBadShardPath(t *testing.T) { t.Fatalf("Store.Open() failed to create test shard 1: %v", err) } - s := NewStore(dir) + s := tsdb.NewStore(dir) if err := s.Open(); err != nil { t.Fatalf("Store.Open() failed: %v", err) } - if exp := 1; len(s.databaseIndexes) != exp { - t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp) + if got, exp := s.DatabaseIndexN(), 1; got != exp { + t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", got, exp) } - if _, ok := s.databaseIndexes["mydb"]; !ok { + if di := s.DatabaseIndex("mydb"); di == nil { t.Errorf("Store.Open() database myb does not exist") } - if exp := 0; len(s.shards) != exp { - t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", len(s.shards), exp) + if got, exp := s.ShardN(), 0; got != exp { + t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", got, exp) } } @@ -218,17 +219,17 @@ func benchmarkStoreOpen(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt, shardCnt int) // Generate test series (measurements + unique tag sets). series := genTestSeries(mCnt, tkCnt, tvCnt) // Generate point data to write to the shards. - points := []Point{} + points := []tsdb.Point{} for _, s := range series { for val := 0.0; val < float64(pntCnt); val++ { - p := NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now()) + p := tsdb.NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now()) points = append(points, p) } } // Create a temporary directory for the test data. dir, _ := ioutil.TempDir("", "store_test") // Create the store. - store := NewStore(dir) + store := tsdb.NewStore(dir) // Open the store. if err := store.Open(); err != nil { b.Fatalf("benchmarkStoreOpen: %s", err) @@ -249,7 +250,7 @@ func benchmarkStoreOpen(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt, shardCnt int) // Run the benchmark loop. b.ResetTimer() for n := 0; n < b.N; n++ { - store := NewStore(dir) + store := tsdb.NewStore(dir) if err := store.Open(); err != nil { b.Fatalf("benchmarkStoreOpen: %s", err) } @@ -260,7 +261,7 @@ func benchmarkStoreOpen(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt, shardCnt int) } } -func chunkedWriteStoreShard(store *Store, shardID int, points []Point) { +func chunkedWriteStoreShard(store *tsdb.Store, shardID int, points []tsdb.Point) { nPts := len(points) chunkSz := 10000 start := 0