Telegraf 0.1.5, update InfluxDB client to HEAD

This commit is contained in:
Cameron Sparr 2015-08-18 12:11:41 -06:00
parent 461245c83d
commit d98bedd6e1
52 changed files with 8464 additions and 3937 deletions

24
Godeps/Godeps.json generated
View File

@ -89,33 +89,33 @@
},
{
"ImportPath": "github.com/influxdb/influxdb/client",
"Comment": "v0.9.2",
"Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09"
"Comment": "v0.9.1-rc1-545-g8de66eb",
"Rev": "8de66eb37024cd6bd953662e5588253f0888874b"
},
{
"ImportPath": "github.com/influxdb/influxdb/influxql",
"Comment": "v0.9.2",
"Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09"
"Comment": "v0.9.1-rc1-545-g8de66eb",
"Rev": "8de66eb37024cd6bd953662e5588253f0888874b"
},
{
"ImportPath": "github.com/influxdb/influxdb/meta",
"Comment": "v0.9.2",
"Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09"
"Comment": "v0.9.1-rc1-545-g8de66eb",
"Rev": "8de66eb37024cd6bd953662e5588253f0888874b"
},
{
"ImportPath": "github.com/influxdb/influxdb/snapshot",
"Comment": "v0.9.2",
"Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09"
"Comment": "v0.9.1-rc1-545-g8de66eb",
"Rev": "8de66eb37024cd6bd953662e5588253f0888874b"
},
{
"ImportPath": "github.com/influxdb/influxdb/toml",
"Comment": "v0.9.2",
"Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09"
"Comment": "v0.9.1-rc1-545-g8de66eb",
"Rev": "8de66eb37024cd6bd953662e5588253f0888874b"
},
{
"ImportPath": "github.com/influxdb/influxdb/tsdb",
"Comment": "v0.9.2",
"Rev": "6c0a91f775f9fc0e625d17ffa04a3fe86945ba09"
"Comment": "v0.9.1-rc1-545-g8de66eb",
"Rev": "8de66eb37024cd6bd953662e5588253f0888874b"
},
{
"ImportPath": "github.com/lib/pq",

View File

@ -45,6 +45,7 @@ the configuration below.
package main
import "github.com/influxdb/influxdb/client"
import "net/url"
const (
MyHost = "localhost"

View File

@ -5,22 +5,69 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"net"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/influxdb/influxdb/influxql"
"github.com/influxdb/influxdb/tsdb"
)
const (
// DefaultHost is the default host used to connect to an InfluxDB instance
DefaultHost = "localhost"
// DefaultPort is the default port used to connect to an InfluxDB instance
DefaultPort = 8086
// DefaultTimeout is the default connection timeout used to connect to an InfluxDB instance
DefaultTimeout = 0
)
// Query is used to send a command to the server. Both Command and Database are required.
type Query struct {
Command string
Database string
}
// ParseConnectionString will parse a string to create a valid connection URL
func ParseConnectionString(path string, ssl bool) (url.URL, error) {
var host string
var port int
if strings.Contains(path, ":") {
h := strings.Split(path, ":")
i, e := strconv.Atoi(h[1])
if e != nil {
return url.URL{}, fmt.Errorf("invalid port number %q: %s\n", path, e)
}
port = i
if h[0] == "" {
host = DefaultHost
} else {
host = h[0]
}
} else {
host = path
// If they didn't specify a port, always use the default port
port = DefaultPort
}
u := url.URL{
Scheme: "http",
}
if ssl {
u.Scheme = "https"
}
u.Host = net.JoinHostPort(host, strconv.Itoa(port))
return u, nil
}
// Config is used to specify what server to connect to.
// URL: The URL of the server connecting to.
// Username/Password are optional. They will be passed via basic auth if provided.
@ -34,6 +81,13 @@ type Config struct {
Timeout time.Duration
}
// NewConfig will create a config to be used in connecting to the client
func NewConfig() Config {
return Config{
Timeout: DefaultTimeout,
}
}
// Client is used to make calls to the server.
type Client struct {
url url.URL
@ -120,7 +174,8 @@ func (c *Client) Query(q Query) (*Response, error) {
// If successful, error is nil and Response is nil
// If an error occurs, Response may contain additional information if populated.
func (c *Client) Write(bp BatchPoints) (*Response, error) {
c.url.Path = "write"
u := c.url
u.Path = "write"
var b bytes.Buffer
for _, p := range bp.Points {
@ -146,7 +201,7 @@ func (c *Client) Write(bp BatchPoints) (*Response, error) {
}
}
req, err := http.NewRequest("POST", c.url.String(), &b)
req, err := http.NewRequest("POST", u.String(), &b)
if err != nil {
return nil, err
}
@ -156,10 +211,10 @@ func (c *Client) Write(bp BatchPoints) (*Response, error) {
req.SetBasicAuth(c.username, c.password)
}
params := req.URL.Query()
params.Add("db", bp.Database)
params.Add("rp", bp.RetentionPolicy)
params.Add("precision", bp.Precision)
params.Add("consistency", bp.WriteConsistency)
params.Set("db", bp.Database)
params.Set("rp", bp.RetentionPolicy)
params.Set("precision", bp.Precision)
params.Set("consistency", bp.WriteConsistency)
req.URL.RawQuery = params.Encode()
resp, err := c.httpClient.Do(req)
@ -170,7 +225,7 @@ func (c *Client) Write(bp BatchPoints) (*Response, error) {
var response Response
body, err := ioutil.ReadAll(resp.Body)
if err != nil && err.Error() != "EOF" {
if err != nil {
return nil, err
}
@ -183,6 +238,52 @@ func (c *Client) Write(bp BatchPoints) (*Response, error) {
return nil, nil
}
// WriteLineProtocol takes a string with line returns to delimit each write
// If successful, error is nil and Response is nil
// If an error occurs, Response may contain additional information if populated.
func (c *Client) WriteLineProtocol(data, database, retentionPolicy, precision, writeConsistency string) (*Response, error) {
u := c.url
u.Path = "write"
r := strings.NewReader(data)
req, err := http.NewRequest("POST", u.String(), r)
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "")
req.Header.Set("User-Agent", c.userAgent)
if c.username != "" {
req.SetBasicAuth(c.username, c.password)
}
params := req.URL.Query()
params.Set("db", database)
params.Set("rp", retentionPolicy)
params.Set("precision", precision)
params.Set("consistency", writeConsistency)
req.URL.RawQuery = params.Encode()
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
var response Response
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
err := fmt.Errorf(string(body))
response.Err = err
return &response, err
}
return nil, nil
}
// Ping will check to see if the server is up
// Ping returns how long the request took, the version of the server it connected to, and an error if one occurred.
func (c *Client) Ping() (time.Duration, string, error) {
@ -209,34 +310,6 @@ func (c *Client) Ping() (time.Duration, string, error) {
return time.Since(now), version, nil
}
// Dump connects to server and retrieves all data stored for specified database.
// If successful, Dump returns the entire response body, which is an io.ReadCloser
func (c *Client) Dump(db string) (io.ReadCloser, error) {
u := c.url
u.Path = "dump"
values := u.Query()
values.Set("db", db)
u.RawQuery = values.Encode()
req, err := http.NewRequest("GET", u.String(), nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", c.userAgent)
if c.username != "" {
req.SetBasicAuth(c.username, c.password)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
return resp.Body, fmt.Errorf("HTTP Protocol error %d", resp.StatusCode)
}
return resp.Body, nil
}
// Structs
// Result represents a resultset returned from a single statement.

View File

@ -469,14 +469,14 @@ SHOW MEASUREMENTS WHERE region = 'uswest' AND host = 'serverA';
### SHOW RETENTION POLICIES
```
show_retention_policies = "SHOW RETENTION POLICIES" db_name .
show_retention_policies = "SHOW RETENTION POLICIES ON" db_name .
```
#### Example:
```sql
-- show all retention policies on a database
SHOW RETENTION POLICIES mydb;
SHOW RETENTION POLICIES ON mydb;
```
### SHOW SERIES

View File

@ -936,6 +936,11 @@ func (s *SelectStatement) walkForTime(node Node) bool {
// HasWildcard returns whether or not the select statement has at least 1 wildcard
func (s *SelectStatement) HasWildcard() bool {
return s.HasFieldWildcard() || s.HasDimensionWildcard()
}
// HasFieldWildcard returns whether or not the select statement has at least 1 wildcard in the fields
func (s *SelectStatement) HasFieldWildcard() bool {
for _, f := range s.Fields {
_, ok := f.Expr.(*Wildcard)
if ok {
@ -943,6 +948,12 @@ func (s *SelectStatement) HasWildcard() bool {
}
}
return false
}
// HasDimensionWildcard returns whether or not the select statement has
// at least 1 wildcard in the dimensions aka `GROUP BY`
func (s *SelectStatement) HasDimensionWildcard() bool {
for _, d := range s.Dimensions {
_, ok := d.Expr.(*Wildcard)
if ok {
@ -990,11 +1001,27 @@ func (s *SelectStatement) validate(tr targetRequirement) error {
return err
}
if err := s.validateWildcard(); err != nil {
return err
}
return nil
}
func (s *SelectStatement) validateAggregates(tr targetRequirement) error {
// First, determine if specific calls have at least one and only one argument
// First, if 1 field is an aggregate, then all fields must be an aggregate. This is
// a explicit limitation of the current system.
numAggregates := 0
for _, f := range s.Fields {
if _, ok := f.Expr.(*Call); ok {
numAggregates++
}
}
if numAggregates != 0 && numAggregates != len(s.Fields) {
return fmt.Errorf("mixing aggregate and non-aggregate queries is not supported")
}
// Secondly, determine if specific calls have at least one and only one argument
for _, f := range s.Fields {
if c, ok := f.Expr.(*Call); ok {
switch c.Name {
@ -1033,6 +1060,13 @@ func (s *SelectStatement) validateAggregates(tr targetRequirement) error {
return nil
}
func (s *SelectStatement) validateWildcard() error {
if s.HasWildcard() && len(s.Fields) > 1 {
return fmt.Errorf("wildcards can not be combined with other fields")
}
return nil
}
func (s *SelectStatement) HasDistinct() bool {
// determine if we have a call named distinct
for _, f := range s.Fields {
@ -1321,6 +1355,17 @@ func (s *SelectStatement) NamesInSelect() []string {
return a
}
// NamesInDimension returns the field and tag names (idents) in the group by
func (s *SelectStatement) NamesInDimension() []string {
var a []string
for _, d := range s.Dimensions {
a = append(a, walkNames(d.Expr)...)
}
return a
}
// walkNames will walk the Expr and return the database fields
func walkNames(exp Expr) []string {
switch expr := exp.(type) {
@ -1953,6 +1998,32 @@ func (s *ShowFieldKeysStatement) RequiredPrivileges() ExecutionPrivileges {
// Fields represents a list of fields.
type Fields []*Field
// AliasNames returns a list of calculated field names in
// order of alias, function name, then field.
func (a Fields) AliasNames() []string {
names := []string{}
for _, f := range a {
names = append(names, f.Name())
}
return names
}
// Names returns a list of raw field names.
func (a Fields) Names() []string {
names := []string{}
for _, f := range a {
var name string
switch expr := f.Expr.(type) {
case *Call:
name = expr.Name
case *VarRef:
name = expr.Val
}
names = append(names, name)
}
return names
}
// String returns a string representation of the fields.
func (a Fields) String() string {
var str []string
@ -1992,26 +2063,6 @@ func (f *Field) Name() string {
func (f *Field) String() string {
str := f.Expr.String()
switch f.Expr.(type) {
case *VarRef:
quoted := false
// Escape any double-quotes in the field
if strings.Contains(str, `"`) {
str = strings.Replace(str, `"`, `\"`, -1)
quoted = true
}
// Escape any single-quotes in the field
if strings.Contains(str, `'`) {
quoted = true
}
// Double-quote field names with spaces or that were previously escaped
if strings.Contains(str, " ") || quoted {
str = fmt.Sprintf("\"%s\"", str)
}
}
if f.Alias == "" {
return str
}
@ -2132,7 +2183,9 @@ type VarRef struct {
}
// String returns a string representation of the variable reference.
func (r *VarRef) String() string { return r.Val }
func (r *VarRef) String() string {
return QuoteIdent(r.Val)
}
// Call represents a function call.
type Call struct {

View File

@ -44,35 +44,35 @@ func TestSelectStatement_Substatement(t *testing.T) {
{
stmt: `SELECT sum(aa.value) + sum(bb.value) FROM aa, bb`,
expr: &influxql.VarRef{Val: "aa.value"},
sub: `SELECT aa.value FROM aa`,
sub: `SELECT "aa.value" FROM aa`,
},
// 2. Simple merge
{
stmt: `SELECT sum(aa.value) + sum(bb.value) FROM aa, bb`,
expr: &influxql.VarRef{Val: "bb.value"},
sub: `SELECT bb.value FROM bb`,
sub: `SELECT "bb.value" FROM bb`,
},
// 3. Join with condition
{
stmt: `SELECT sum(aa.value) + sum(bb.value) FROM aa, bb WHERE aa.host = 'servera' AND bb.host = 'serverb'`,
expr: &influxql.VarRef{Val: "bb.value"},
sub: `SELECT bb.value FROM bb WHERE bb.host = 'serverb'`,
sub: `SELECT "bb.value" FROM bb WHERE "bb.host" = 'serverb'`,
},
// 4. Join with complex condition
{
stmt: `SELECT sum(aa.value) + sum(bb.value) FROM aa, bb WHERE aa.host = 'servera' AND (bb.host = 'serverb' OR bb.host = 'serverc') AND 1 = 2`,
expr: &influxql.VarRef{Val: "bb.value"},
sub: `SELECT bb.value FROM bb WHERE (bb.host = 'serverb' OR bb.host = 'serverc') AND 1.000 = 2.000`,
sub: `SELECT "bb.value" FROM bb WHERE ("bb.host" = 'serverb' OR "bb.host" = 'serverc') AND 1.000 = 2.000`,
},
// 5. 4 with different condition order
{
stmt: `SELECT sum(aa.value) + sum(bb.value) FROM aa, bb WHERE ((bb.host = 'serverb' OR bb.host = 'serverc') AND aa.host = 'servera') AND 1 = 2`,
expr: &influxql.VarRef{Val: "bb.value"},
sub: `SELECT bb.value FROM bb WHERE ((bb.host = 'serverb' OR bb.host = 'serverc')) AND 1.000 = 2.000`,
sub: `SELECT "bb.value" FROM bb WHERE (("bb.host" = 'serverb' OR "bb.host" = 'serverc')) AND 1.000 = 2.000`,
},
}
@ -217,7 +217,7 @@ func TestSelectStatement_SetTimeRange(t *testing.T) {
// Ensure the idents from the select clause can come out
func TestSelect_NamesInSelect(t *testing.T) {
s := MustParseSelectStatement("select count(asdf), bar from cpu")
s := MustParseSelectStatement("select count(asdf), count(bar) from cpu")
a := s.NamesInSelect()
if !reflect.DeepEqual(a, []string{"asdf", "bar"}) {
t.Fatal("expected names asdf and bar")

View File

@ -550,10 +550,9 @@ func ReduceMedian(values []interface{}) interface{} {
sortedRange = getSortedRange(data, middle-1, 2)
var low, high = sortedRange[0], sortedRange[1]
return low + (high-low)/2
} else {
sortedRange = getSortedRange(data, middle, 1)
return sortedRange[0]
}
sortedRange = getSortedRange(data, middle, 1)
return sortedRange[0]
}
// getSortedRange returns a sorted subset of data. By using discardLowerRange and discardUpperRange to get the target

View File

@ -1471,11 +1471,18 @@ func (p *Parser) parseFields() (Fields, error) {
func (p *Parser) parseField() (*Field, error) {
f := &Field{}
_, pos, _ := p.scanIgnoreWhitespace()
p.unscan()
// Parse the expression first.
expr, err := p.ParseExpr()
if err != nil {
return nil, err
}
var c validateField
Walk(&c, expr)
if c.foundInvalid {
return nil, fmt.Errorf("invalid operator %s in SELECT clause at line %d, char %d; operator is intended for WHERE clause", c.badToken, pos.Line+1, pos.Char+1)
}
f.Expr = expr
// Parse the alias if the current and next tokens are "WS AS".
@ -1491,6 +1498,30 @@ func (p *Parser) parseField() (*Field, error) {
return f, nil
}
// validateField checks if the Expr is a valid field. We disallow all binary expression
// that return a boolean
type validateField struct {
foundInvalid bool
badToken Token
}
func (c *validateField) Visit(n Node) Visitor {
e, ok := n.(*BinaryExpr)
if !ok {
return c
}
switch e.Op {
case EQ, NEQ, EQREGEX,
NEQREGEX, LT, LTE, GT, GTE,
AND, OR:
c.foundInvalid = true
c.badToken = e.Op
return nil
}
return c
}
// parseAlias parses the "AS (IDENT|STRING)" alias for fields and dimensions.
func (p *Parser) parseAlias() (string, error) {
// Check if the next token is "AS". If not, then unscan and exit.
@ -1660,31 +1691,31 @@ func (p *Parser) parseFill() (FillOption, interface{}, error) {
p.unscan()
return NullFill, nil, nil
}
if lit, ok := expr.(*Call); !ok {
lit, ok := expr.(*Call)
if !ok {
p.unscan()
return NullFill, nil, nil
} else {
if strings.ToLower(lit.Name) != "fill" {
p.unscan()
return NullFill, nil, nil
}
if len(lit.Args) != 1 {
return NullFill, nil, errors.New("fill requires an argument, e.g.: 0, null, none, previous")
}
switch lit.Args[0].String() {
case "null":
return NullFill, nil, nil
case "none":
return NoFill, nil, nil
case "previous":
return PreviousFill, nil, nil
default:
num, ok := lit.Args[0].(*NumberLiteral)
if !ok {
return NullFill, nil, fmt.Errorf("expected number argument in fill()")
}
return NumberFill, num.Val, nil
}
if strings.ToLower(lit.Name) != "fill" {
p.unscan()
return NullFill, nil, nil
}
if len(lit.Args) != 1 {
return NullFill, nil, errors.New("fill requires an argument, e.g.: 0, null, none, previous")
}
switch lit.Args[0].String() {
case "null":
return NullFill, nil, nil
case "none":
return NoFill, nil, nil
case "previous":
return PreviousFill, nil, nil
default:
num, ok := lit.Args[0].(*NumberLiteral)
if !ok {
return NullFill, nil, fmt.Errorf("expected number argument in fill()")
}
return NumberFill, num.Val, nil
}
}
@ -2186,6 +2217,11 @@ func QuoteIdent(segments ...string) string {
// IdentNeedsQuotes returns true if the ident string given would require quotes.
func IdentNeedsQuotes(ident string) bool {
// check if this identifier is a keyword
tok := Lookup(ident)
if tok != IDENT {
return true
}
for i, r := range ident {
if i == 0 && !isIdentFirstChar(r) {
return true

View File

@ -1225,12 +1225,13 @@ func TestParser_ParseStatement(t *testing.T) {
{s: `SELECT field1 FROM myseries ORDER BY time, field1`, err: `only ORDER BY time ASC supported at this time`},
{s: `SELECT field1 AS`, err: `found EOF, expected identifier at line 1, char 18`},
{s: `SELECT field1 FROM foo group by time(1s)`, err: `GROUP BY requires at least one aggregate function`},
{s: `SELECT count(value), value FROM foo`, err: `mixing aggregate and non-aggregate queries is not supported`},
{s: `SELECT count(value) FROM foo group by time(1s)`, err: `aggregate functions with GROUP BY time require a WHERE time clause`},
{s: `SELECT count(value) FROM foo group by time(1s) where host = 'hosta.influxdb.org'`, err: `aggregate functions with GROUP BY time require a WHERE time clause`},
{s: `SELECT field1 FROM 12`, err: `found 12, expected identifier at line 1, char 20`},
{s: `SELECT 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 FROM myseries`, err: `unable to parse number at line 1, char 8`},
{s: `SELECT 10.5h FROM myseries`, err: `found h, expected FROM at line 1, char 12`},
{s: `SELECT derivative(field1), field1 FROM myseries`, err: `derivative cannot be used with other fields`},
{s: `SELECT derivative(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`},
{s: `SELECT distinct(field1), sum(field1) FROM myseries`, err: `aggregate function distinct() can not be combined with other functions or fields`},
{s: `SELECT distinct(field1), field2 FROM myseries`, err: `aggregate function distinct() can not be combined with other functions or fields`},
{s: `SELECT distinct(field1, field2) FROM myseries`, err: `distinct function can only have one argument`},
@ -1244,6 +1245,12 @@ func TestParser_ParseStatement(t *testing.T) {
{s: `select derivative() from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 0`},
{s: `select derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 3`},
{s: `SELECT field1 from myseries WHERE host =~ 'asd' LIMIT 1`, err: `found asd, expected regex at line 1, char 42`},
{s: `SELECT value > 2 FROM cpu`, err: `invalid operator > in SELECT clause at line 1, char 8; operator is intended for WHERE clause`},
{s: `SELECT value = 2 FROM cpu`, err: `invalid operator = in SELECT clause at line 1, char 8; operator is intended for WHERE clause`},
{s: `SELECT s =~ /foo/ FROM cpu`, err: `invalid operator =~ in SELECT clause at line 1, char 8; operator is intended for WHERE clause`},
{s: `SELECT foo, * from cpu`, err: `wildcards can not be combined with other fields`},
{s: `SELECT *, * from cpu`, err: `found ,, expected FROM at line 1, char 9`},
{s: `SELECT *, foo from cpu`, err: `found ,, expected FROM at line 1, char 9`},
{s: `DELETE`, err: `found EOF, expected FROM at line 1, char 8`},
{s: `DELETE FROM`, err: `found EOF, expected identifier at line 1, char 13`},
{s: `DELETE FROM myseries WHERE`, err: `found EOF, expected identifier, string, number, bool at line 1, char 28`},
@ -1661,6 +1668,8 @@ func TestQuoteIdent(t *testing.T) {
s string
}{
{[]string{``}, ``},
{[]string{`select`}, `"select"`},
{[]string{`in-bytes`}, `"in-bytes"`},
{[]string{`foo`, `bar`}, `"foo".bar`},
{[]string{`foo`, ``, `bar`}, `"foo"..bar`},
{[]string{`foo bar`, `baz`}, `"foo bar".baz`},

View File

@ -166,12 +166,8 @@ func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor {
return func(values []interface{}) interface{} {
l := lhs(values)
r := rhs(values)
if lv, ok := l.(float64); ok {
if rv, ok := r.(float64); ok {
if rv != 0 {
return lv + rv
}
}
if lf, rf, ok := processorValuesAsFloat64(l, r); ok {
return lf + rf
}
return nil
}
@ -179,12 +175,8 @@ func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor {
return func(values []interface{}) interface{} {
l := lhs(values)
r := rhs(values)
if lv, ok := l.(float64); ok {
if rv, ok := r.(float64); ok {
if rv != 0 {
return lv - rv
}
}
if lf, rf, ok := processorValuesAsFloat64(l, r); ok {
return lf - rf
}
return nil
}
@ -192,12 +184,8 @@ func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor {
return func(values []interface{}) interface{} {
l := lhs(values)
r := rhs(values)
if lv, ok := l.(float64); ok {
if rv, ok := r.(float64); ok {
if rv != 0 {
return lv * rv
}
}
if lf, rf, ok := processorValuesAsFloat64(l, r); ok {
return lf * rf
}
return nil
}
@ -205,12 +193,8 @@ func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor {
return func(values []interface{}) interface{} {
l := lhs(values)
r := rhs(values)
if lv, ok := l.(float64); ok {
if rv, ok := r.(float64); ok {
if rv != 0 {
return lv / rv
}
}
if lf, rf, ok := processorValuesAsFloat64(l, r); ok {
return lf / rf
}
return nil
}
@ -221,3 +205,27 @@ func newBinaryExprEvaluator(op Token, lhs, rhs Processor) Processor {
}
}
}
func processorValuesAsFloat64(lhs interface{}, rhs interface{}) (float64, float64, bool) {
var lf float64
var rf float64
var ok bool
lf, ok = lhs.(float64)
if !ok {
var li int64
if li, ok = lhs.(int64); !ok {
return 0, 0, false
}
lf = float64(li)
}
rf, ok = rhs.(float64)
if !ok {
var ri int64
if ri, ok = rhs.(int64); !ok {
return 0, 0, false
}
rf = float64(ri)
}
return lf, rf, true
}

View File

@ -514,6 +514,8 @@ func ScanString(r io.RuneScanner) (string, error) {
_, _ = buf.WriteRune('\\')
} else if ch1 == '"' {
_, _ = buf.WriteRune('"')
} else if ch1 == '\'' {
_, _ = buf.WriteRune('\'')
} else {
return string(ch0) + string(ch1), errBadEscape
}

View File

@ -243,6 +243,7 @@ func TestScanString(t *testing.T) {
{in: `"foo\nbar"`, out: "foo\nbar"},
{in: `"foo\\bar"`, out: `foo\bar`},
{in: `"foo\"bar"`, out: `foo"bar`},
{in: `'foo\'bar'`, out: `foo'bar`},
{in: `"foo` + "\n", out: `foo`, err: "bad string"}, // newline in string
{in: `"foo`, out: `foo`, err: "bad string"}, // unclosed quotes

View File

@ -31,16 +31,17 @@ type Config struct {
Dir string `toml:"dir"`
Hostname string `toml:"hostname"`
BindAddress string `toml:"bind-address"`
Peers []string `toml:"peers"`
Peers []string `toml:"-"`
RetentionAutoCreate bool `toml:"retention-autocreate"`
ElectionTimeout toml.Duration `toml:"election-timeout"`
HeartbeatTimeout toml.Duration `toml:"heartbeat-timeout"`
LeaderLeaseTimeout toml.Duration `toml:"leader-lease-timeout"`
CommitTimeout toml.Duration `toml:"commit-timeout"`
ClusterTracing bool `toml:"cluster-tracing"`
}
func NewConfig() Config {
return Config{
func NewConfig() *Config {
return &Config{
Hostname: DefaultHostname,
BindAddress: DefaultBindAddress,
RetentionAutoCreate: true,

View File

@ -141,8 +141,8 @@ func (data *Data) CreateRetentionPolicy(database string, rpi *RetentionPolicyInf
// Validate retention policy.
if rpi.Name == "" {
return ErrRetentionPolicyNameRequired
} else if rpi.ReplicaN != len(data.Nodes) {
return ErrReplicationFactorMismatch
} else if rpi.ReplicaN < 1 {
return ErrReplicationFactorTooLow
}
// Find database.
@ -706,14 +706,18 @@ func (di *DatabaseInfo) unmarshal(pb *internal.DatabaseInfo) {
di.Name = pb.GetName()
di.DefaultRetentionPolicy = pb.GetDefaultRetentionPolicy()
di.RetentionPolicies = make([]RetentionPolicyInfo, len(pb.GetRetentionPolicies()))
for i, x := range pb.GetRetentionPolicies() {
di.RetentionPolicies[i].unmarshal(x)
if len(pb.GetRetentionPolicies()) > 0 {
di.RetentionPolicies = make([]RetentionPolicyInfo, len(pb.GetRetentionPolicies()))
for i, x := range pb.GetRetentionPolicies() {
di.RetentionPolicies[i].unmarshal(x)
}
}
di.ContinuousQueries = make([]ContinuousQueryInfo, len(pb.GetContinuousQueries()))
for i, x := range pb.GetContinuousQueries() {
di.ContinuousQueries[i].unmarshal(x)
if len(pb.GetContinuousQueries()) > 0 {
di.ContinuousQueries = make([]ContinuousQueryInfo, len(pb.GetContinuousQueries()))
for i, x := range pb.GetContinuousQueries() {
di.ContinuousQueries[i].unmarshal(x)
}
}
}
@ -794,9 +798,11 @@ func (rpi *RetentionPolicyInfo) unmarshal(pb *internal.RetentionPolicyInfo) {
rpi.Duration = time.Duration(pb.GetDuration())
rpi.ShardGroupDuration = time.Duration(pb.GetShardGroupDuration())
rpi.ShardGroups = make([]ShardGroupInfo, len(pb.GetShardGroups()))
for i, x := range pb.GetShardGroups() {
rpi.ShardGroups[i].unmarshal(x)
if len(pb.GetShardGroups()) > 0 {
rpi.ShardGroups = make([]ShardGroupInfo, len(pb.GetShardGroups()))
for i, x := range pb.GetShardGroups() {
rpi.ShardGroups[i].unmarshal(x)
}
}
}
@ -900,9 +906,11 @@ func (sgi *ShardGroupInfo) unmarshal(pb *internal.ShardGroupInfo) {
sgi.EndTime = UnmarshalTime(pb.GetEndTime())
sgi.DeletedAt = UnmarshalTime(pb.GetDeletedAt())
sgi.Shards = make([]ShardInfo, len(pb.GetShards()))
for i, x := range pb.GetShards() {
sgi.Shards[i].unmarshal(x)
if len(pb.GetShards()) > 0 {
sgi.Shards = make([]ShardInfo, len(pb.GetShards()))
for i, x := range pb.GetShards() {
sgi.Shards[i].unmarshal(x)
}
}
}

View File

@ -127,14 +127,10 @@ func TestData_CreateRetentionPolicy_ErrNameRequired(t *testing.T) {
}
}
// Ensure that creating a policy with a replication factor that doesn't match
// the number of nodes in the cluster will return an error. This is a temporary
// restriction until v0.9.1 is released.
func TestData_CreateRetentionPolicy_ErrReplicationFactorMismatch(t *testing.T) {
data := meta.Data{
Nodes: []meta.NodeInfo{{ID: 1}, {ID: 2}, {ID: 3}},
}
if err := data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 2}); err != meta.ErrReplicationFactorMismatch {
// Ensure that creating a policy with a replication factor less than 1 returns an error.
func TestData_CreateRetentionPolicy_ErrReplicationFactorTooLow(t *testing.T) {
data := meta.Data{Nodes: []meta.NodeInfo{{ID: 1}}}
if err := data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 0}); err != meta.ErrReplicationFactorTooLow {
t.Fatalf("unexpected error: %s", err)
}
}
@ -152,10 +148,10 @@ func TestData_CreateRetentionPolicy_ErrRetentionPolicyExists(t *testing.T) {
var data meta.Data
if err := data.CreateDatabase("db0"); err != nil {
t.Fatal(err)
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != nil {
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != nil {
t.Fatal(err)
}
if err := data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != meta.ErrRetentionPolicyExists {
if err := data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != meta.ErrRetentionPolicyExists {
t.Fatalf("unexpected error: %s", err)
}
}
@ -165,7 +161,7 @@ func TestData_UpdateRetentionPolicy(t *testing.T) {
var data meta.Data
if err := data.CreateDatabase("db0"); err != nil {
t.Fatal(err)
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != nil {
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != nil {
t.Fatal(err)
}
@ -194,7 +190,7 @@ func TestData_DropRetentionPolicy(t *testing.T) {
var data meta.Data
if err := data.CreateDatabase("db0"); err != nil {
t.Fatal(err)
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != nil {
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != nil {
t.Fatal(err)
}
@ -229,9 +225,9 @@ func TestData_RetentionPolicy(t *testing.T) {
var data meta.Data
if err := data.CreateDatabase("db0"); err != nil {
t.Fatal(err)
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != nil {
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != nil {
t.Fatal(err)
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp1"}); err != nil {
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp1", ReplicaN: 1}); err != nil {
t.Fatal(err)
}
@ -240,6 +236,7 @@ func TestData_RetentionPolicy(t *testing.T) {
} else if !reflect.DeepEqual(rpi, &meta.RetentionPolicyInfo{
Name: "rp0",
ShardGroupDuration: 604800000000000,
ReplicaN: 1,
}) {
t.Fatalf("unexpected value: %#v", rpi)
}
@ -258,7 +255,7 @@ func TestData_SetDefaultRetentionPolicy(t *testing.T) {
var data meta.Data
if err := data.CreateDatabase("db0"); err != nil {
t.Fatal(err)
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0"}); err != nil {
} else if err = data.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{Name: "rp0", ReplicaN: 1}); err != nil {
t.Fatal(err)
}

View File

@ -58,10 +58,9 @@ var (
ErrRetentionPolicyDurationTooLow = errors.New(fmt.Sprintf("retention policy duration must be at least %s",
RetentionPolicyMinDuration))
// ErrReplicationFactorMismatch is returned when the replication factor
// does not match the number of nodes in the cluster. This is a temporary
// restriction until v0.9.1 is released.
ErrReplicationFactorMismatch = errors.New("replication factor must match cluster size; this limitation will be lifted in v0.9.1")
// ErrReplicationFactorTooLow is returned when the replication factor is not in an
// acceptable range.
ErrReplicationFactorTooLow = errors.New("replication factor must be greater than 0")
)
var (

View File

@ -37,7 +37,14 @@ It has these top-level messages:
SetPrivilegeCommand
SetDataCommand
SetAdminPrivilegeCommand
UpdateNodeCommand
Response
ResponseHeader
ErrorResponse
FetchDataRequest
FetchDataResponse
JoinRequest
JoinResponse
*/
package internal
@ -48,6 +55,42 @@ import math "math"
var _ = proto.Marshal
var _ = math.Inf
type RPCType int32
const (
RPCType_Error RPCType = 1
RPCType_FetchData RPCType = 2
RPCType_Join RPCType = 3
)
var RPCType_name = map[int32]string{
1: "Error",
2: "FetchData",
3: "Join",
}
var RPCType_value = map[string]int32{
"Error": 1,
"FetchData": 2,
"Join": 3,
}
func (x RPCType) Enum() *RPCType {
p := new(RPCType)
*p = x
return p
}
func (x RPCType) String() string {
return proto.EnumName(RPCType_name, int32(x))
}
func (x *RPCType) UnmarshalJSON(data []byte) error {
value, err := proto.UnmarshalJSONEnum(RPCType_value, data, "RPCType")
if err != nil {
return err
}
*x = RPCType(value)
return nil
}
type Command_Type int32
const (
@ -69,6 +112,7 @@ const (
Command_SetPrivilegeCommand Command_Type = 16
Command_SetDataCommand Command_Type = 17
Command_SetAdminPrivilegeCommand Command_Type = 18
Command_UpdateNodeCommand Command_Type = 19
)
var Command_Type_name = map[int32]string{
@ -90,6 +134,7 @@ var Command_Type_name = map[int32]string{
16: "SetPrivilegeCommand",
17: "SetDataCommand",
18: "SetAdminPrivilegeCommand",
19: "UpdateNodeCommand",
}
var Command_Type_value = map[string]int32{
"CreateNodeCommand": 1,
@ -110,6 +155,7 @@ var Command_Type_value = map[string]int32{
"SetPrivilegeCommand": 16,
"SetDataCommand": 17,
"SetAdminPrivilegeCommand": 18,
"UpdateNodeCommand": 19,
}
func (x Command_Type) Enum() *Command_Type {
@ -1112,6 +1158,38 @@ var E_SetAdminPrivilegeCommand_Command = &proto.ExtensionDesc{
Tag: "bytes,118,opt,name=command",
}
type UpdateNodeCommand struct {
ID *uint64 `protobuf:"varint,1,req" json:"ID,omitempty"`
Host *string `protobuf:"bytes,2,req" json:"Host,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *UpdateNodeCommand) Reset() { *m = UpdateNodeCommand{} }
func (m *UpdateNodeCommand) String() string { return proto.CompactTextString(m) }
func (*UpdateNodeCommand) ProtoMessage() {}
func (m *UpdateNodeCommand) GetID() uint64 {
if m != nil && m.ID != nil {
return *m.ID
}
return 0
}
func (m *UpdateNodeCommand) GetHost() string {
if m != nil && m.Host != nil {
return *m.Host
}
return ""
}
var E_UpdateNodeCommand_Command = &proto.ExtensionDesc{
ExtendedType: (*Command)(nil),
ExtensionType: (*UpdateNodeCommand)(nil),
Field: 119,
Name: "internal.UpdateNodeCommand.command",
Tag: "bytes,119,opt,name=command",
}
type Response struct {
OK *bool `protobuf:"varint,1,req" json:"OK,omitempty"`
Error *string `protobuf:"bytes,2,opt" json:"Error,omitempty"`
@ -1144,7 +1222,182 @@ func (m *Response) GetIndex() uint64 {
return 0
}
type ResponseHeader struct {
OK *bool `protobuf:"varint,1,req" json:"OK,omitempty"`
Error *string `protobuf:"bytes,2,opt" json:"Error,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *ResponseHeader) Reset() { *m = ResponseHeader{} }
func (m *ResponseHeader) String() string { return proto.CompactTextString(m) }
func (*ResponseHeader) ProtoMessage() {}
func (m *ResponseHeader) GetOK() bool {
if m != nil && m.OK != nil {
return *m.OK
}
return false
}
func (m *ResponseHeader) GetError() string {
if m != nil && m.Error != nil {
return *m.Error
}
return ""
}
type ErrorResponse struct {
Header *ResponseHeader `protobuf:"bytes,1,req" json:"Header,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *ErrorResponse) Reset() { *m = ErrorResponse{} }
func (m *ErrorResponse) String() string { return proto.CompactTextString(m) }
func (*ErrorResponse) ProtoMessage() {}
func (m *ErrorResponse) GetHeader() *ResponseHeader {
if m != nil {
return m.Header
}
return nil
}
type FetchDataRequest struct {
Index *uint64 `protobuf:"varint,1,req" json:"Index,omitempty"`
Term *uint64 `protobuf:"varint,2,req" json:"Term,omitempty"`
Blocking *bool `protobuf:"varint,3,opt,def=0" json:"Blocking,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *FetchDataRequest) Reset() { *m = FetchDataRequest{} }
func (m *FetchDataRequest) String() string { return proto.CompactTextString(m) }
func (*FetchDataRequest) ProtoMessage() {}
const Default_FetchDataRequest_Blocking bool = false
func (m *FetchDataRequest) GetIndex() uint64 {
if m != nil && m.Index != nil {
return *m.Index
}
return 0
}
func (m *FetchDataRequest) GetTerm() uint64 {
if m != nil && m.Term != nil {
return *m.Term
}
return 0
}
func (m *FetchDataRequest) GetBlocking() bool {
if m != nil && m.Blocking != nil {
return *m.Blocking
}
return Default_FetchDataRequest_Blocking
}
type FetchDataResponse struct {
Header *ResponseHeader `protobuf:"bytes,1,req" json:"Header,omitempty"`
Index *uint64 `protobuf:"varint,2,req" json:"Index,omitempty"`
Term *uint64 `protobuf:"varint,3,req" json:"Term,omitempty"`
Data []byte `protobuf:"bytes,4,opt" json:"Data,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *FetchDataResponse) Reset() { *m = FetchDataResponse{} }
func (m *FetchDataResponse) String() string { return proto.CompactTextString(m) }
func (*FetchDataResponse) ProtoMessage() {}
func (m *FetchDataResponse) GetHeader() *ResponseHeader {
if m != nil {
return m.Header
}
return nil
}
func (m *FetchDataResponse) GetIndex() uint64 {
if m != nil && m.Index != nil {
return *m.Index
}
return 0
}
func (m *FetchDataResponse) GetTerm() uint64 {
if m != nil && m.Term != nil {
return *m.Term
}
return 0
}
func (m *FetchDataResponse) GetData() []byte {
if m != nil {
return m.Data
}
return nil
}
type JoinRequest struct {
Addr *string `protobuf:"bytes,1,req" json:"Addr,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *JoinRequest) Reset() { *m = JoinRequest{} }
func (m *JoinRequest) String() string { return proto.CompactTextString(m) }
func (*JoinRequest) ProtoMessage() {}
func (m *JoinRequest) GetAddr() string {
if m != nil && m.Addr != nil {
return *m.Addr
}
return ""
}
type JoinResponse struct {
Header *ResponseHeader `protobuf:"bytes,1,req" json:"Header,omitempty"`
// Indicates that this node should take part in the raft cluster.
EnableRaft *bool `protobuf:"varint,2,opt" json:"EnableRaft,omitempty"`
// The addresses of raft peers to use if joining as a raft member. If not joining
// as a raft member, these are the nodes running raft.
RaftNodes []string `protobuf:"bytes,3,rep" json:"RaftNodes,omitempty"`
// The node ID assigned to the requesting node.
NodeID *uint64 `protobuf:"varint,4,opt" json:"NodeID,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *JoinResponse) Reset() { *m = JoinResponse{} }
func (m *JoinResponse) String() string { return proto.CompactTextString(m) }
func (*JoinResponse) ProtoMessage() {}
func (m *JoinResponse) GetHeader() *ResponseHeader {
if m != nil {
return m.Header
}
return nil
}
func (m *JoinResponse) GetEnableRaft() bool {
if m != nil && m.EnableRaft != nil {
return *m.EnableRaft
}
return false
}
func (m *JoinResponse) GetRaftNodes() []string {
if m != nil {
return m.RaftNodes
}
return nil
}
func (m *JoinResponse) GetNodeID() uint64 {
if m != nil && m.NodeID != nil {
return *m.NodeID
}
return 0
}
func init() {
proto.RegisterEnum("internal.RPCType", RPCType_name, RPCType_value)
proto.RegisterEnum("internal.Command_Type", Command_Type_name, Command_Type_value)
proto.RegisterExtension(E_CreateNodeCommand_Command)
proto.RegisterExtension(E_DeleteNodeCommand_Command)
@ -1164,4 +1417,5 @@ func init() {
proto.RegisterExtension(E_SetPrivilegeCommand_Command)
proto.RegisterExtension(E_SetDataCommand_Command)
proto.RegisterExtension(E_SetAdminPrivilegeCommand_Command)
proto.RegisterExtension(E_UpdateNodeCommand_Command)
}

View File

@ -98,6 +98,8 @@ message Command {
UpdateUserCommand = 15;
SetPrivilegeCommand = 16;
SetDataCommand = 17;
SetAdminPrivilegeCommand = 18;
UpdateNodeCommand = 19;
}
required Type type = 1;
@ -250,8 +252,69 @@ message SetAdminPrivilegeCommand {
required bool Admin = 2;
}
message UpdateNodeCommand {
extend Command {
optional UpdateNodeCommand command = 119;
}
required uint64 ID = 1;
required string Host = 2;
}
message Response {
required bool OK = 1;
optional string Error = 2;
optional uint64 Index = 3;
}
//========================================================================
//
// RPC - higher-level cluster communication operations
//
//========================================================================
enum RPCType {
Error = 1;
FetchData = 2;
Join = 3;
}
message ResponseHeader {
required bool OK = 1;
optional string Error = 2;
}
message ErrorResponse {
required ResponseHeader Header = 1;
}
message FetchDataRequest {
required uint64 Index = 1;
required uint64 Term = 2;
optional bool Blocking = 3 [default = false];
}
message FetchDataResponse {
required ResponseHeader Header = 1;
required uint64 Index = 2;
required uint64 Term = 3;
optional bytes Data = 4;
}
message JoinRequest {
required string Addr = 1;
}
message JoinResponse {
required ResponseHeader Header = 1;
// Indicates that this node should take part in the raft cluster.
optional bool EnableRaft = 2;
// The addresses of raft peers to use if joining as a raft member. If not joining
// as a raft member, these are the nodes running raft.
repeated string RaftNodes = 3;
// The node ID assigned to the requesting node.
optional uint64 NodeID = 4;
}

View File

@ -0,0 +1,62 @@
package meta
import (
"io"
"net"
)
// proxy brokers a connection from src to dst
func proxy(dst, src *net.TCPConn) error {
// channels to wait on the close event for each connection
serverClosed := make(chan struct{}, 1)
clientClosed := make(chan struct{}, 1)
errors := make(chan error, 1)
go broker(dst, src, clientClosed, errors)
go broker(src, dst, serverClosed, errors)
// wait for one half of the proxy to exit, then trigger a shutdown of the
// other half by calling CloseRead(). This will break the read loop in the
// broker and allow us to fully close the connection cleanly without a
// "use of closed network connection" error.
var waitFor chan struct{}
select {
case <-clientClosed:
// the client closed first and any more packets from the server aren't
// useful, so we can optionally SetLinger(0) here to recycle the port
// faster.
dst.SetLinger(0)
dst.CloseRead()
waitFor = serverClosed
case <-serverClosed:
src.CloseRead()
waitFor = clientClosed
case err := <-errors:
src.CloseRead()
dst.SetLinger(0)
dst.CloseRead()
return err
}
// Wait for the other connection to close.
<-waitFor
return nil
}
// This does the actual data transfer.
// The broker only closes the Read side.
func broker(dst, src net.Conn, srcClosed chan struct{}, errors chan error) {
// We can handle errors in a finer-grained manner by inlining io.Copy (it's
// simple, and we drop the ReaderFrom or WriterTo checks for
// net.Conn->net.Conn transfers, which aren't needed). This would also let
// us adjust buffersize.
_, err := io.Copy(dst, src)
if err != nil {
errors <- err
}
if err := src.Close(); err != nil {
errors <- err
}
srcClosed <- struct{}{}
}

View File

@ -0,0 +1,460 @@
package meta
import (
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"net"
"time"
"github.com/gogo/protobuf/proto"
"github.com/hashicorp/raft"
"github.com/influxdb/influxdb/meta/internal"
)
// Max size of a message before we treat the size as invalid
const (
MaxMessageSize = 1024 * 1024 * 1024
leaderDialTimeout = 10 * time.Second
)
// rpc handles request/response style messaging between cluster nodes
type rpc struct {
logger *log.Logger
tracingEnabled bool
store interface {
cachedData() *Data
IsLeader() bool
Leader() string
Peers() ([]string, error)
AddPeer(host string) error
CreateNode(host string) (*NodeInfo, error)
NodeByHost(host string) (*NodeInfo, error)
WaitForDataChanged() error
}
}
type JoinResult struct {
RaftEnabled bool
RaftNodes []string
NodeID uint64
}
type Reply interface {
GetHeader() *internal.ResponseHeader
}
// proxyLeader proxies the connection to the current raft leader
func (r *rpc) proxyLeader(conn *net.TCPConn) {
if r.store.Leader() == "" {
r.sendError(conn, "no leader")
return
}
leaderConn, err := net.DialTimeout("tcp", r.store.Leader(), leaderDialTimeout)
if err != nil {
r.sendError(conn, fmt.Sprintf("dial leader: %v", err))
return
}
defer leaderConn.Close()
leaderConn.Write([]byte{MuxRPCHeader})
if err := proxy(leaderConn.(*net.TCPConn), conn); err != nil {
r.sendError(conn, fmt.Sprintf("leader proxy error: %v", err))
}
}
// handleRPCConn reads a command from the connection and executes it.
func (r *rpc) handleRPCConn(conn net.Conn) {
defer conn.Close()
// RPC connections should execute on the leader. If we are not the leader,
// proxy the connection to the leader so that clients an connect to any node
// in the cluster.
r.traceCluster("rpc connection from: %v", conn.RemoteAddr())
if !r.store.IsLeader() {
r.proxyLeader(conn.(*net.TCPConn))
return
}
// Read and execute request.
typ, resp, err := func() (internal.RPCType, proto.Message, error) {
// Read request size.
var sz uint64
if err := binary.Read(conn, binary.BigEndian, &sz); err != nil {
return internal.RPCType_Error, nil, fmt.Errorf("read size: %s", err)
}
if sz == 0 {
return 0, nil, fmt.Errorf("invalid message size: %d", sz)
}
if sz >= MaxMessageSize {
return 0, nil, fmt.Errorf("max message size of %d exceeded: %d", MaxMessageSize, sz)
}
// Read request.
buf := make([]byte, sz)
if _, err := io.ReadFull(conn, buf); err != nil {
return internal.RPCType_Error, nil, fmt.Errorf("read request: %s", err)
}
// Determine the RPC type
rpcType := internal.RPCType(btou64(buf[0:8]))
buf = buf[8:]
r.traceCluster("recv %v request on: %v", rpcType, conn.RemoteAddr())
switch rpcType {
case internal.RPCType_FetchData:
var req internal.FetchDataRequest
if err := proto.Unmarshal(buf, &req); err != nil {
return internal.RPCType_Error, nil, fmt.Errorf("fetch request unmarshal: %v", err)
}
resp, err := r.handleFetchData(&req)
return rpcType, resp, err
case internal.RPCType_Join:
var req internal.JoinRequest
if err := proto.Unmarshal(buf, &req); err != nil {
return internal.RPCType_Error, nil, fmt.Errorf("join request unmarshal: %v", err)
}
resp, err := r.handleJoinRequest(&req)
return rpcType, resp, err
default:
return internal.RPCType_Error, nil, fmt.Errorf("unknown rpc type:%v", rpcType)
}
}()
// Handle unexpected RPC errors
if err != nil {
resp = &internal.ErrorResponse{
Header: &internal.ResponseHeader{
OK: proto.Bool(false),
},
}
typ = internal.RPCType_Error
}
// Set the status header and error message
if reply, ok := resp.(Reply); ok {
reply.GetHeader().OK = proto.Bool(err == nil)
if err != nil {
reply.GetHeader().Error = proto.String(err.Error())
}
}
r.sendResponse(conn, typ, resp)
}
func (r *rpc) sendResponse(conn net.Conn, typ internal.RPCType, resp proto.Message) {
// Marshal the response back to a protobuf
buf, err := proto.Marshal(resp)
if err != nil {
r.logger.Printf("unable to marshal response: %v", err)
return
}
// Encode response back to connection.
if _, err := conn.Write(r.pack(typ, buf)); err != nil {
r.logger.Printf("unable to write rpc response: %s", err)
}
}
func (r *rpc) sendError(conn net.Conn, msg string) {
r.traceCluster(msg)
resp := &internal.ErrorResponse{
Header: &internal.ResponseHeader{
OK: proto.Bool(false),
Error: proto.String(msg),
},
}
r.sendResponse(conn, internal.RPCType_Error, resp)
}
// handleFetchData handles a request for the current nodes meta data
func (r *rpc) handleFetchData(req *internal.FetchDataRequest) (*internal.FetchDataResponse, error) {
var (
b []byte
data *Data
err error
)
for {
data = r.store.cachedData()
if data.Index != req.GetIndex() {
b, err = data.MarshalBinary()
if err != nil {
return nil, err
}
break
}
if !req.GetBlocking() {
break
}
if err := r.store.WaitForDataChanged(); err != nil {
return nil, err
}
}
return &internal.FetchDataResponse{
Header: &internal.ResponseHeader{
OK: proto.Bool(true),
},
Index: proto.Uint64(data.Index),
Term: proto.Uint64(data.Term),
Data: b}, nil
}
// handleJoinRequest handles a request to join the cluster
func (r *rpc) handleJoinRequest(req *internal.JoinRequest) (*internal.JoinResponse, error) {
r.traceCluster("join request from: %v", *req.Addr)
node, err := func() (*NodeInfo, error) {
// attempt to create the node
node, err := r.store.CreateNode(*req.Addr)
// if it exists, return the existing node
if err == ErrNodeExists {
node, err = r.store.NodeByHost(*req.Addr)
if err != nil {
return node, err
}
r.logger.Printf("existing node re-joined: id=%v addr=%v", node.ID, node.Host)
} else if err != nil {
return nil, fmt.Errorf("create node: %v", err)
}
peers, err := r.store.Peers()
if err != nil {
return nil, fmt.Errorf("list peers: %v", err)
}
// If we have less than 3 nodes, add them as raft peers if they are not
// already a peer
if len(peers) < MaxRaftNodes && !raft.PeerContained(peers, *req.Addr) {
r.logger.Printf("adding new raft peer: nodeId=%v addr=%v", node.ID, *req.Addr)
if err = r.store.AddPeer(*req.Addr); err != nil {
return node, fmt.Errorf("add peer: %v", err)
}
}
return node, err
}()
nodeID := uint64(0)
if node != nil {
nodeID = node.ID
}
if err != nil {
return nil, err
}
// get the current raft peers
peers, err := r.store.Peers()
if err != nil {
return nil, fmt.Errorf("list peers: %v", err)
}
return &internal.JoinResponse{
Header: &internal.ResponseHeader{
OK: proto.Bool(true),
},
EnableRaft: proto.Bool(raft.PeerContained(peers, *req.Addr)),
RaftNodes: peers,
NodeID: proto.Uint64(nodeID),
}, err
}
// pack returns a TLV style byte slice encoding the size of the payload, the RPC type
// and the RPC data
func (r *rpc) pack(typ internal.RPCType, b []byte) []byte {
buf := u64tob(uint64(len(b)) + 8)
buf = append(buf, u64tob(uint64(typ))...)
buf = append(buf, b...)
return buf
}
// fetchMetaData returns the latest copy of the meta store data from the current
// leader.
func (r *rpc) fetchMetaData(blocking bool) (*Data, error) {
assert(r.store != nil, "store is nil")
// Retrieve the current known leader.
leader := r.store.Leader()
if leader == "" {
return nil, errors.New("no leader")
}
var index, term uint64
data := r.store.cachedData()
if data != nil {
index = data.Index
term = data.Index
}
resp, err := r.call(leader, &internal.FetchDataRequest{
Index: proto.Uint64(index),
Term: proto.Uint64(term),
Blocking: proto.Bool(blocking),
})
if err != nil {
return nil, err
}
switch t := resp.(type) {
case *internal.FetchDataResponse:
// If data is nil, then the term and index we sent matches the leader
if t.GetData() == nil {
return nil, nil
}
ms := &Data{}
if err := ms.UnmarshalBinary(t.GetData()); err != nil {
return nil, fmt.Errorf("rpc unmarshal metadata: %v", err)
}
return ms, nil
case *internal.ErrorResponse:
return nil, fmt.Errorf("rpc failed: %s", t.GetHeader().GetError())
default:
return nil, fmt.Errorf("rpc failed: unknown response type: %v", t.String())
}
}
// join attempts to join a cluster at remoteAddr using localAddr as the current
// node's cluster address
func (r *rpc) join(localAddr, remoteAddr string) (*JoinResult, error) {
req := &internal.JoinRequest{
Addr: proto.String(localAddr),
}
resp, err := r.call(remoteAddr, req)
if err != nil {
return nil, err
}
switch t := resp.(type) {
case *internal.JoinResponse:
return &JoinResult{
RaftEnabled: t.GetEnableRaft(),
RaftNodes: t.GetRaftNodes(),
NodeID: t.GetNodeID(),
}, nil
case *internal.ErrorResponse:
return nil, fmt.Errorf("rpc failed: %s", t.GetHeader().GetError())
default:
return nil, fmt.Errorf("rpc failed: unknown response type: %v", t.String())
}
}
// call sends an encoded request to the remote leader and returns
// an encoded response value.
func (r *rpc) call(dest string, req proto.Message) (proto.Message, error) {
// Determine type of request
var rpcType internal.RPCType
switch t := req.(type) {
case *internal.JoinRequest:
rpcType = internal.RPCType_Join
case *internal.FetchDataRequest:
rpcType = internal.RPCType_FetchData
default:
return nil, fmt.Errorf("unknown rpc request type: %v", t)
}
// Create a connection to the leader.
conn, err := net.DialTimeout("tcp", dest, leaderDialTimeout)
if err != nil {
return nil, fmt.Errorf("rpc dial: %v", err)
}
defer conn.Close()
// Write a marker byte for rpc messages.
_, err = conn.Write([]byte{MuxRPCHeader})
if err != nil {
return nil, err
}
b, err := proto.Marshal(req)
if err != nil {
return nil, fmt.Errorf("rpc marshal: %v", err)
}
// Write request size & bytes.
if _, err := conn.Write(r.pack(rpcType, b)); err != nil {
return nil, fmt.Errorf("write %v rpc: %s", rpcType, err)
}
data, err := ioutil.ReadAll(conn)
if err != nil {
return nil, fmt.Errorf("read %v rpc: %v", rpcType, err)
}
// Should always have a size and type
if exp := 16; len(data) < exp {
r.traceCluster("recv: %v", string(data))
return nil, fmt.Errorf("rpc %v failed: short read: got %v, exp %v", rpcType, len(data), exp)
}
sz := btou64(data[0:8])
if len(data[8:]) != int(sz) {
r.traceCluster("recv: %v", string(data))
return nil, fmt.Errorf("rpc %v failed: short read: got %v, exp %v", rpcType, len(data[8:]), sz)
}
// See what response type we got back, could get a general error response
rpcType = internal.RPCType(btou64(data[8:16]))
data = data[16:]
var resp proto.Message
switch rpcType {
case internal.RPCType_Join:
resp = &internal.JoinResponse{}
case internal.RPCType_FetchData:
resp = &internal.FetchDataResponse{}
case internal.RPCType_Error:
resp = &internal.ErrorResponse{}
default:
return nil, fmt.Errorf("unknown rpc response type: %v", rpcType)
}
if err := proto.Unmarshal(data, resp); err != nil {
return nil, fmt.Errorf("rpc unmarshal: %v", err)
}
if reply, ok := resp.(Reply); ok {
if !reply.GetHeader().GetOK() {
return nil, fmt.Errorf("rpc %v failed: %s", rpcType, reply.GetHeader().GetError())
}
}
return resp, nil
}
func (r *rpc) traceCluster(msg string, args ...interface{}) {
if r.tracingEnabled {
r.logger.Printf("rpc: "+msg, args...)
}
}
func u64tob(v uint64) []byte {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, v)
return b
}
func btou64(b []byte) uint64 {
return binary.BigEndian.Uint64(b)
}
func contains(s []string, e string) bool {
for _, a := range s {
if a == e {
return true
}
}
return false
}

View File

@ -0,0 +1,242 @@
package meta
import (
"net"
"sync"
"testing"
)
func TestRPCFetchData(t *testing.T) {
serverRPC := &rpc{
store: &fakeStore{
md: &Data{Index: 99},
},
}
srv := newTestServer(t, serverRPC)
defer srv.Close()
go srv.Serve()
// Wait for the RPC server to be ready
<-srv.Ready
// create a new RPC with no existing meta.Data cache
clientRPC := &rpc{
store: &fakeStore{
leader: srv.Listener.Addr().String(),
},
}
// fetch the servers meta-data
md, err := clientRPC.fetchMetaData(false)
if err != nil {
t.Fatalf("failed to fetchMetaData: %v", err)
}
if md == nil {
t.Fatalf("meta-data is nil")
}
if exp := uint64(99); md.Index != exp {
t.Fatalf("meta-data mismatch. got %v, exp %v", md.Index, exp)
}
}
func TestRPCFetchDataMatchesLeader(t *testing.T) {
serverRPC := &rpc{
store: &fakeStore{
md: &Data{Index: 99},
},
}
srv := newTestServer(t, serverRPC)
defer srv.Close()
go srv.Serve()
// Wait for the RPC server to be ready
<-srv.Ready
// create a new RPC with a matching index as the server
clientRPC := &rpc{
store: &fakeStore{
leader: srv.Listener.Addr().String(),
md: &Data{Index: 99},
},
}
// fetch the servers meta-data
md, err := clientRPC.fetchMetaData(false)
if err != nil {
t.Fatalf("failed to fetchMetaData: %v", err)
}
if md != nil {
t.Fatalf("meta-data is not nil")
}
}
func TestRPCFetchDataMatchesBlocking(t *testing.T) {
fs := &fakeStore{
md: &Data{Index: 99},
blockChan: make(chan struct{}),
}
serverRPC := &rpc{
store: fs,
}
srv := newTestServer(t, serverRPC)
defer srv.Close()
go srv.Serve()
// Wait for the RPC server to be ready
<-srv.Ready
// create a new RPC with a matching index as the server
clientRPC := &rpc{
store: &fakeStore{
leader: srv.Listener.Addr().String(),
md: &Data{Index: 99},
},
}
// Kick off the fetching block
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
// fetch the servers meta-data
md, err := clientRPC.fetchMetaData(true)
if err != nil {
t.Fatalf("failed to fetchMetaData: %v", err)
}
if md == nil {
t.Fatalf("meta-data is nil")
}
if exp := uint64(100); md.Index != exp {
t.Fatalf("meta-data mismatch. got %v, exp %v", md.Index, exp)
}
}()
// Simulate the rmote index changing and unblocking
fs.mu.Lock()
fs.md.Index = 100
fs.mu.Unlock()
close(fs.blockChan)
wg.Wait()
}
func TestRPCJoin(t *testing.T) {
fs := &fakeStore{
leader: "1.2.3.4:1234",
md: &Data{Index: 99},
newNodeID: uint64(100),
blockChan: make(chan struct{}),
}
serverRPC := &rpc{
store: fs,
}
srv := newTestServer(t, serverRPC)
defer srv.Close()
go srv.Serve()
// Wait for the RPC server to be ready
<-srv.Ready
// create a new RPC with a matching index as the server
clientRPC := &rpc{
store: &fakeStore{
leader: srv.Listener.Addr().String(),
md: &Data{Index: 99},
},
}
res, err := clientRPC.join("1.2.3.4:1234", srv.Listener.Addr().String())
if err != nil {
t.Fatalf("failed to join: %v", err)
}
if exp := true; res.RaftEnabled != true {
t.Fatalf("raft enabled mismatch: got %v, exp %v", res.RaftEnabled, exp)
}
if exp := 1; len(res.RaftNodes) != exp {
t.Fatalf("raft peer mismatch: got %v, exp %v", len(res.RaftNodes), exp)
}
if exp := "1.2.3.4:1234"; res.RaftNodes[0] != exp {
t.Fatalf("raft peer mismatch: got %v, exp %v", res.RaftNodes[0], exp)
}
if exp := uint64(100); res.NodeID != exp {
t.Fatalf("node id mismatch. got %v, exp %v", res.NodeID, exp)
}
}
type fakeStore struct {
mu sync.RWMutex
leader string
newNodeID uint64
md *Data
blockChan chan struct{}
}
type testServer struct {
Listener net.Listener
Ready chan struct{}
rpc *rpc
t *testing.T
}
func newTestServer(t *testing.T, rpc *rpc) *testServer {
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("failed to listen: %v", err)
}
return &testServer{
Listener: ln,
Ready: make(chan struct{}),
rpc: rpc,
}
}
func (s *testServer) Close() {
s.Listener.Close()
}
func (s *testServer) Serve() {
close(s.Ready)
conn, err := s.Listener.Accept()
if err != nil {
s.t.Fatalf("failed to accept: %v", err)
}
// Demux...
b := make([]byte, 1)
if _, err := conn.Read(b); err != nil {
s.t.Fatalf("failed to demux: %v", err)
}
s.rpc.handleRPCConn(conn)
}
func (f *fakeStore) cachedData() *Data {
f.mu.RLock()
defer f.mu.RUnlock()
return f.md
}
func (f *fakeStore) IsLeader() bool { return true }
func (f *fakeStore) Leader() string { return f.leader }
func (f *fakeStore) Peers() ([]string, error) { return []string{f.leader}, nil }
func (f *fakeStore) AddPeer(host string) error { return nil }
func (f *fakeStore) CreateNode(host string) (*NodeInfo, error) {
return &NodeInfo{ID: f.newNodeID, Host: host}, nil
}
func (f *fakeStore) NodeByHost(host string) (*NodeInfo, error) { return nil, nil }
func (f *fakeStore) WaitForDataChanged() error {
<-f.blockChan
return nil
}

View File

@ -0,0 +1,489 @@
package meta
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"sync"
"time"
"github.com/hashicorp/raft"
"github.com/hashicorp/raft-boltdb"
)
// raftState abstracts the interaction of the raft consensus layer
// across local or remote nodes. It is a form of the state design pattern and allows
// the meta.Store to change its behavior with the raft layer at runtime.
type raftState interface {
open() error
remove() error
initialize() error
leader() string
isLeader() bool
sync(index uint64, timeout time.Duration) error
setPeers(addrs []string) error
addPeer(addr string) error
peers() ([]string, error)
invalidate() error
close() error
lastIndex() uint64
apply(b []byte) error
snapshot() error
}
// localRaft is a consensus strategy that uses a local raft implementation for
// consensus operations.
type localRaft struct {
wg sync.WaitGroup
closing chan struct{}
store *Store
raft *raft.Raft
transport *raft.NetworkTransport
peerStore raft.PeerStore
raftStore *raftboltdb.BoltStore
raftLayer *raftLayer
}
func (r *localRaft) remove() error {
if err := os.RemoveAll(filepath.Join(r.store.path, "raft.db")); err != nil {
return err
}
if err := os.RemoveAll(filepath.Join(r.store.path, "peers.json")); err != nil {
return err
}
if err := os.RemoveAll(filepath.Join(r.store.path, "snapshots")); err != nil {
return err
}
return nil
}
func (r *localRaft) updateMetaData(ms *Data) {
if ms == nil {
return
}
updated := false
r.store.mu.RLock()
if ms.Index > r.store.data.Index {
updated = true
}
r.store.mu.RUnlock()
if updated {
r.store.Logger.Printf("Updating metastore to term=%v index=%v", ms.Term, ms.Index)
r.store.mu.Lock()
r.store.data = ms
r.store.mu.Unlock()
}
}
func (r *localRaft) invalidate() error {
if r.store.IsLeader() {
return nil
}
ms, err := r.store.rpc.fetchMetaData(false)
if err != nil {
return err
}
r.updateMetaData(ms)
return nil
}
func (r *localRaft) open() error {
r.closing = make(chan struct{})
s := r.store
// Setup raft configuration.
config := raft.DefaultConfig()
config.LogOutput = ioutil.Discard
if s.clusterTracingEnabled {
config.Logger = s.Logger
}
config.HeartbeatTimeout = s.HeartbeatTimeout
config.ElectionTimeout = s.ElectionTimeout
config.LeaderLeaseTimeout = s.LeaderLeaseTimeout
config.CommitTimeout = s.CommitTimeout
// If no peers are set in the config or there is one and we are it, then start as a single server.
if len(s.peers) <= 1 {
config.EnableSingleNode = true
// Ensure we can always become the leader
config.DisableBootstrapAfterElect = false
// Don't shutdown raft automatically if we renamed our hostname back to a previous name
config.ShutdownOnRemove = false
}
// Build raft layer to multiplex listener.
r.raftLayer = newRaftLayer(s.RaftListener, s.RemoteAddr)
// Create a transport layer
r.transport = raft.NewNetworkTransport(r.raftLayer, 3, 10*time.Second, config.LogOutput)
// Create peer storage.
r.peerStore = raft.NewJSONPeers(s.path, r.transport)
peers, err := r.peerStore.Peers()
if err != nil {
return err
}
// For single-node clusters, we can update the raft peers before we start the cluster if the hostname
// has changed.
if config.EnableSingleNode {
if err := r.peerStore.SetPeers([]string{s.RemoteAddr.String()}); err != nil {
return err
}
peers = []string{s.RemoteAddr.String()}
}
// If we have multiple nodes in the cluster, make sure our address is in the raft peers or
// we won't be able to boot into the cluster because the other peers will reject our new hostname. This
// is difficult to resolve automatically because we need to have all the raft peers agree on the current members
// of the cluster before we can change them.
if len(peers) > 0 && !raft.PeerContained(peers, s.RemoteAddr.String()) {
s.Logger.Printf("%v is not in the list of raft peers. Please update %v/peers.json on all raft nodes to have the same contents.", s.RemoteAddr.String(), s.Path())
return fmt.Errorf("peers out of sync: %v not in %v", s.RemoteAddr.String(), peers)
}
// Create the log store and stable store.
store, err := raftboltdb.NewBoltStore(filepath.Join(s.path, "raft.db"))
if err != nil {
return fmt.Errorf("new bolt store: %s", err)
}
r.raftStore = store
// Create the snapshot store.
snapshots, err := raft.NewFileSnapshotStore(s.path, raftSnapshotsRetained, os.Stderr)
if err != nil {
return fmt.Errorf("file snapshot store: %s", err)
}
// Create raft log.
ra, err := raft.NewRaft(config, (*storeFSM)(s), store, store, snapshots, r.peerStore, r.transport)
if err != nil {
return fmt.Errorf("new raft: %s", err)
}
r.raft = ra
r.wg.Add(1)
go r.logLeaderChanges()
return nil
}
func (r *localRaft) logLeaderChanges() {
defer r.wg.Done()
// Logs our current state (Node at 1.2.3.4:8088 [Follower])
r.store.Logger.Printf(r.raft.String())
for {
select {
case <-r.closing:
return
case <-r.raft.LeaderCh():
peers, err := r.peers()
if err != nil {
r.store.Logger.Printf("failed to lookup peers: %v", err)
}
r.store.Logger.Printf("%v. peers=%v", r.raft.String(), peers)
}
}
}
func (r *localRaft) close() error {
close(r.closing)
r.wg.Wait()
if r.transport != nil {
r.transport.Close()
r.transport = nil
}
if r.raftLayer != nil {
r.raftLayer.Close()
r.raftLayer = nil
}
// Shutdown raft.
if r.raft != nil {
if err := r.raft.Shutdown().Error(); err != nil {
return err
}
r.raft = nil
}
if r.raftStore != nil {
r.raftStore.Close()
r.raftStore = nil
}
return nil
}
func (r *localRaft) initialize() error {
s := r.store
// If we have committed entries then the store is already in the cluster.
if index, err := r.raftStore.LastIndex(); err != nil {
return fmt.Errorf("last index: %s", err)
} else if index > 0 {
return nil
}
// Force set peers.
if err := r.setPeers(s.peers); err != nil {
return fmt.Errorf("set raft peers: %s", err)
}
return nil
}
// apply applies a serialized command to the raft log.
func (r *localRaft) apply(b []byte) error {
// Apply to raft log.
f := r.raft.Apply(b, 0)
if err := f.Error(); err != nil {
return err
}
// Return response if it's an error.
// No other non-nil objects should be returned.
resp := f.Response()
if err, ok := resp.(error); ok {
return lookupError(err)
}
assert(resp == nil, "unexpected response: %#v", resp)
return nil
}
func (r *localRaft) lastIndex() uint64 {
return r.raft.LastIndex()
}
func (r *localRaft) sync(index uint64, timeout time.Duration) error {
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
timer := time.NewTimer(timeout)
defer timer.Stop()
for {
// Wait for next tick or timeout.
select {
case <-ticker.C:
case <-timer.C:
return errors.New("timeout")
}
// Compare index against current metadata.
r.store.mu.Lock()
ok := (r.store.data.Index >= index)
r.store.mu.Unlock()
// Exit if we are at least at the given index.
if ok {
return nil
}
}
}
func (r *localRaft) snapshot() error {
future := r.raft.Snapshot()
return future.Error()
}
// addPeer adds addr to the list of peers in the cluster.
func (r *localRaft) addPeer(addr string) error {
peers, err := r.peerStore.Peers()
if err != nil {
return err
}
if len(peers) >= 3 {
return nil
}
if fut := r.raft.AddPeer(addr); fut.Error() != nil {
return fut.Error()
}
return nil
}
// setPeers sets a list of peers in the cluster.
func (r *localRaft) setPeers(addrs []string) error {
return r.raft.SetPeers(addrs).Error()
}
func (r *localRaft) peers() ([]string, error) {
return r.peerStore.Peers()
}
func (r *localRaft) leader() string {
if r.raft == nil {
return ""
}
return r.raft.Leader()
}
func (r *localRaft) isLeader() bool {
r.store.mu.RLock()
defer r.store.mu.RUnlock()
if r.raft == nil {
return false
}
return r.raft.State() == raft.Leader
}
// remoteRaft is a consensus strategy that uses a remote raft cluster for
// consensus operations.
type remoteRaft struct {
store *Store
}
func (r *remoteRaft) remove() error {
return nil
}
func (r *remoteRaft) updateMetaData(ms *Data) {
if ms == nil {
return
}
updated := false
r.store.mu.RLock()
if ms.Index > r.store.data.Index {
updated = true
}
r.store.mu.RUnlock()
if updated {
r.store.Logger.Printf("Updating metastore to term=%v index=%v", ms.Term, ms.Index)
r.store.mu.Lock()
r.store.data = ms
r.store.mu.Unlock()
}
}
func (r *remoteRaft) invalidate() error {
ms, err := r.store.rpc.fetchMetaData(false)
if err != nil {
return err
}
r.updateMetaData(ms)
return nil
}
func (r *remoteRaft) setPeers(addrs []string) error {
// Convert to JSON
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
if err := enc.Encode(addrs); err != nil {
return err
}
// Write out as JSON
return ioutil.WriteFile(filepath.Join(r.store.path, "peers.json"), buf.Bytes(), 0755)
}
// addPeer adds addr to the list of peers in the cluster.
func (r *remoteRaft) addPeer(addr string) error {
return fmt.Errorf("cannot add peer using remote raft")
}
func (r *remoteRaft) peers() ([]string, error) {
return readPeersJSON(filepath.Join(r.store.path, "peers.json"))
}
func (r *remoteRaft) open() error {
if err := r.setPeers(r.store.peers); err != nil {
return err
}
go func() {
for {
select {
case <-r.store.closing:
return
default:
}
ms, err := r.store.rpc.fetchMetaData(true)
if err != nil {
r.store.Logger.Printf("fetch metastore: %v", err)
time.Sleep(time.Second)
continue
}
r.updateMetaData(ms)
}
}()
return nil
}
func (r *remoteRaft) close() error {
return nil
}
// apply applies a serialized command to the raft log.
func (r *remoteRaft) apply(b []byte) error {
return fmt.Errorf("cannot apply log while in remote raft state")
}
func (r *remoteRaft) initialize() error {
return nil
}
func (r *remoteRaft) leader() string {
if len(r.store.peers) == 0 {
return ""
}
return r.store.peers[rand.Intn(len(r.store.peers))]
}
func (r *remoteRaft) isLeader() bool {
return false
}
func (r *remoteRaft) lastIndex() uint64 {
return r.store.cachedData().Index
}
func (r *remoteRaft) sync(index uint64, timeout time.Duration) error {
//FIXME: jwilder: check index and timeout
return r.store.invalidate()
}
func (r *remoteRaft) snapshot() error {
return fmt.Errorf("cannot snapshot while in remote raft state")
}
func readPeersJSON(path string) ([]string, error) {
// Read the file
buf, err := ioutil.ReadFile(path)
if err != nil && !os.IsNotExist(err) {
return nil, err
}
// Check for no peers
if len(buf) == 0 {
return nil, nil
}
// Decode the peers
var peers []string
dec := json.NewDecoder(bytes.NewReader(buf))
if err := dec.Decode(&peers); err != nil {
return nil, err
}
return peers, nil
}

View File

@ -10,6 +10,7 @@ import (
type StatementExecutor struct {
Store interface {
Nodes() ([]NodeInfo, error)
Peers() ([]string, error)
Database(name string) (*DatabaseInfo, error)
Databases() ([]DatabaseInfo, error)
@ -127,9 +128,14 @@ func (e *StatementExecutor) executeShowServersStatement(q *influxql.ShowServersS
return &influxql.Result{Err: err}
}
row := &influxql.Row{Columns: []string{"id", "url"}}
peers, err := e.Store.Peers()
if err != nil {
return &influxql.Result{Err: err}
}
row := &influxql.Row{Columns: []string{"id", "cluster_addr", "raft"}}
for _, ni := range nis {
row.Values = append(row.Values, []interface{}{ni.ID, "http://" + ni.Host})
row.Values = append(row.Values, []interface{}{ni.ID, ni.Host, contains(peers, ni.Host)})
}
return &influxql.Result{Series: []*influxql.Row{row}}
}

View File

@ -121,15 +121,18 @@ func TestStatementExecutor_ExecuteStatement_ShowServers(t *testing.T) {
{ID: 2, Host: "node1"},
}, nil
}
e.Store.PeersFn = func() ([]string, error) {
return []string{"node0"}, nil
}
if res := e.ExecuteStatement(influxql.MustParseStatement(`SHOW SERVERS`)); res.Err != nil {
t.Fatal(res.Err)
} else if !reflect.DeepEqual(res.Series, influxql.Rows{
{
Columns: []string{"id", "url"},
Columns: []string{"id", "cluster_addr", "raft"},
Values: [][]interface{}{
{uint64(1), "http://node0"},
{uint64(2), "http://node1"},
{uint64(1), "node0", true},
{uint64(2), "node1", false},
},
},
}) {
@ -778,6 +781,7 @@ func NewStatementExecutor() *StatementExecutor {
// StatementExecutorStore represents a mock implementation of StatementExecutor.Store.
type StatementExecutorStore struct {
NodesFn func() ([]meta.NodeInfo, error)
PeersFn func() ([]string, error)
DatabaseFn func(name string) (*meta.DatabaseInfo, error)
DatabasesFn func() ([]meta.DatabaseInfo, error)
CreateDatabaseFn func(name string) (*meta.DatabaseInfo, error)
@ -804,6 +808,10 @@ func (s *StatementExecutorStore) Nodes() ([]meta.NodeInfo, error) {
return s.NodesFn()
}
func (s *StatementExecutorStore) Peers() ([]string, error) {
return s.PeersFn()
}
func (s *StatementExecutorStore) Database(name string) (*meta.DatabaseInfo, error) {
return s.DatabaseFn(name)
}

View File

@ -21,7 +21,6 @@ import (
"github.com/gogo/protobuf/proto"
"github.com/hashicorp/raft"
"github.com/hashicorp/raft-boltdb"
"github.com/influxdb/influxdb/influxql"
"github.com/influxdb/influxdb/meta/internal"
"golang.org/x/crypto/bcrypt"
@ -31,9 +30,12 @@ import (
const (
MuxRaftHeader = 0
MuxExecHeader = 1
MuxRPCHeader = 5
// SaltBytes is the number of bytes used for salts
SaltBytes = 32
DefaultSyncNodeDelay = time.Second
)
// ExecMagic is the first 4 bytes sent to a remote exec connection to verify
@ -45,6 +47,10 @@ const (
AutoCreateRetentionPolicyName = "default"
AutoCreateRetentionPolicyPeriod = 0
RetentionPolicyMinDuration = time.Hour
// MaxAutoCreatedRetentionPolicyReplicaN is the maximum replication factor that will
// be set for auto-created retention policies.
MaxAutoCreatedRetentionPolicyReplicaN = 3
)
// Raft configuration.
@ -53,6 +59,7 @@ const (
raftSnapshotsRetained = 2
raftTransportMaxPool = 3
raftTransportTimeout = 10 * time.Second
MaxRaftNodes = 3
)
// Store represents a raft-backed metastore.
@ -68,17 +75,22 @@ type Store struct {
data *Data
remoteAddr net.Addr
raft *raft.Raft
raftLayer *raftLayer
peerStore raft.PeerStore
transport *raft.NetworkTransport
store *raftboltdb.BoltStore
rpc *rpc
// The address used by other nodes to reach this node.
RemoteAddr net.Addr
raftState raftState
ready chan struct{}
err chan error
closing chan struct{}
wg sync.WaitGroup
changed chan struct{}
// clusterTracingEnabled controls whether low-level cluster communcation is logged.
// Useful for troubleshooting
clusterTracingEnabled bool
retentionAutoCreate bool
@ -86,6 +98,9 @@ type Store struct {
RaftListener net.Listener
ExecListener net.Listener
// The listener for higher-level, cluster operations
RPCListener net.Listener
// The advertised hostname of the store.
Addr net.Addr
@ -118,8 +133,8 @@ type authUser struct {
}
// NewStore returns a new instance of Store.
func NewStore(c Config) *Store {
return &Store{
func NewStore(c *Config) *Store {
s := &Store{
path: c.Dir,
peers: c.Peers,
data: &Data{},
@ -127,8 +142,10 @@ func NewStore(c Config) *Store {
ready: make(chan struct{}),
err: make(chan error),
closing: make(chan struct{}),
changed: make(chan struct{}),
retentionAutoCreate: c.RetentionAutoCreate,
clusterTracingEnabled: c.ClusterTracing,
retentionAutoCreate: c.RetentionAutoCreate,
HeartbeatTimeout: time.Duration(c.HeartbeatTimeout),
ElectionTimeout: time.Duration(c.ElectionTimeout),
@ -140,6 +157,14 @@ func NewStore(c Config) *Store {
},
Logger: log.New(os.Stderr, "[metastore] ", log.LstdFlags),
}
s.raftState = &localRaft{store: s}
s.rpc = &rpc{
store: s,
tracingEnabled: c.ClusterTracing,
logger: s.Logger,
}
return s
}
// Path returns the root path when open.
@ -153,7 +178,7 @@ func (s *Store) IDPath() string { return filepath.Join(s.path, "id") }
func (s *Store) Open() error {
// Verify that no more than 3 peers.
// https://github.com/influxdb/influxdb/issues/2750
if len(s.peers) > 3 {
if len(s.peers) > MaxRaftNodes {
return ErrTooManyPeers
}
@ -162,8 +187,12 @@ func (s *Store) Open() error {
panic("Store.RaftListener not set")
} else if s.ExecListener == nil {
panic("Store.ExecListener not set")
} else if s.RPCListener == nil {
panic("Store.RPCListener not set")
}
s.Logger.Printf("Using data dir: %v", s.Path())
if err := func() error {
s.mu.Lock()
defer s.mu.Unlock()
@ -174,8 +203,13 @@ func (s *Store) Open() error {
}
s.opened = true
// load our raft state
if err := s.loadState(); err != nil {
return err
}
// Create the root directory if it doesn't already exist.
if err := os.MkdirAll(s.path, 0777); err != nil {
if err := s.createRootDir(); err != nil {
return fmt.Errorf("mkdir all: %s", err)
}
@ -204,78 +238,186 @@ func (s *Store) Open() error {
s.wg.Add(1)
go s.serveExecListener()
s.wg.Add(1)
go s.serveRPCListener()
// Join an existing cluster if we needed
if err := s.joinCluster(); err != nil {
return fmt.Errorf("join: %v", err)
}
// If the ID doesn't exist then create a new node.
if s.id == 0 {
go s.init()
} else {
go s.syncNodeInfo()
close(s.ready)
}
return nil
}
// syncNodeInfo continuously tries to update the current nodes hostname
// in the meta store. It will retry until successful.
func (s *Store) syncNodeInfo() error {
<-s.ready
for {
if err := func() error {
if err := s.WaitForLeader(0); err != nil {
return err
}
ni, err := s.Node(s.id)
if err != nil {
return err
}
if ni == nil {
return ErrNodeNotFound
}
if ni.Host == s.RemoteAddr.String() {
s.Logger.Printf("Updated node id=%d hostname=%v", s.id, s.RemoteAddr.String())
return nil
}
_, err = s.UpdateNode(s.id, s.RemoteAddr.String())
if err != nil {
return err
}
return nil
}(); err != nil {
// If we get an error, the cluster has not stabilized so just try again
time.Sleep(DefaultSyncNodeDelay)
continue
}
return nil
}
}
// loadState sets the appropriate raftState from our persistent storage
func (s *Store) loadState() error {
peers, err := readPeersJSON(filepath.Join(s.path, "peers.json"))
if err != nil {
return err
}
// If we have existing peers, use those. This will override what's in the
// config.
if len(peers) > 0 {
s.peers = peers
}
// if no peers on disk, we need to start raft in order to initialize a new
// cluster or join an existing one.
if len(peers) == 0 {
s.raftState = &localRaft{store: s}
// if we have a raft database, (maybe restored), we should start raft locally
} else if _, err := os.Stat(filepath.Join(s.path, "raft.db")); err == nil {
s.raftState = &localRaft{store: s}
// otherwise, we should use remote raft
} else {
s.raftState = &remoteRaft{store: s}
}
return nil
}
func (s *Store) joinCluster() error {
// No join options, so nothing to do
if len(s.peers) == 0 {
return nil
}
// We already have a node ID so were already part of a cluster,
// don't join again so we can use our existing state.
if s.id != 0 {
s.Logger.Printf("Skipping cluster join: already member of cluster: nodeId=%v raftEnabled=%v peers=%v",
s.id, raft.PeerContained(s.peers, s.RemoteAddr.String()), s.peers)
return nil
}
s.Logger.Printf("Joining cluster at: %v", s.peers)
for {
for _, join := range s.peers {
res, err := s.rpc.join(s.RemoteAddr.String(), join)
if err != nil {
s.Logger.Printf("Join node %v failed: %v: retrying...", join, err)
continue
}
s.Logger.Printf("Joined remote node %v", join)
s.Logger.Printf("nodeId=%v raftEnabled=%v peers=%v", res.NodeID, res.RaftEnabled, res.RaftNodes)
s.peers = res.RaftNodes
s.id = res.NodeID
if err := s.writeNodeID(res.NodeID); err != nil {
s.Logger.Printf("Write node id failed: %v", err)
break
}
if !res.RaftEnabled {
// Shutdown our local raft and transition to a remote raft state
if err := s.enableRemoteRaft(); err != nil {
s.Logger.Printf("Enable remote raft failed: %v", err)
break
}
}
return nil
}
time.Sleep(time.Second)
}
}
func (s *Store) enableLocalRaft() error {
if _, ok := s.raftState.(*localRaft); ok {
return nil
}
s.Logger.Printf("Switching to local raft")
lr := &localRaft{store: s}
return s.changeState(lr)
}
func (s *Store) enableRemoteRaft() error {
if _, ok := s.raftState.(*remoteRaft); ok {
return nil
}
s.Logger.Printf("Switching to remote raft")
rr := &remoteRaft{store: s}
return s.changeState(rr)
}
func (s *Store) changeState(state raftState) error {
if err := s.raftState.close(); err != nil {
return err
}
// Clear out any persistent state
if err := s.raftState.remove(); err != nil {
return err
}
s.raftState = state
if err := s.raftState.open(); err != nil {
return err
}
return nil
}
// openRaft initializes the raft store.
func (s *Store) openRaft() error {
// Setup raft configuration.
config := raft.DefaultConfig()
config.Logger = s.Logger
config.HeartbeatTimeout = s.HeartbeatTimeout
config.ElectionTimeout = s.ElectionTimeout
config.LeaderLeaseTimeout = s.LeaderLeaseTimeout
config.CommitTimeout = s.CommitTimeout
// If no peers are set in the config then start as a single server.
config.EnableSingleNode = (len(s.peers) == 0)
// Build raft layer to multiplex listener.
s.raftLayer = newRaftLayer(s.RaftListener, s.Addr)
// Create a transport layer
s.transport = raft.NewNetworkTransport(s.raftLayer, 3, 10*time.Second, os.Stderr)
// Create peer storage.
s.peerStore = raft.NewJSONPeers(s.path, s.transport)
// Create the log store and stable store.
store, err := raftboltdb.NewBoltStore(filepath.Join(s.path, "raft.db"))
if err != nil {
return fmt.Errorf("new bolt store: %s", err)
}
s.store = store
// Create the snapshot store.
snapshots, err := raft.NewFileSnapshotStore(s.path, raftSnapshotsRetained, os.Stderr)
if err != nil {
return fmt.Errorf("file snapshot store: %s", err)
}
// Create raft log.
r, err := raft.NewRaft(config, (*storeFSM)(s), store, store, snapshots, s.peerStore, s.transport)
if err != nil {
return fmt.Errorf("new raft: %s", err)
}
s.raft = r
return nil
return s.raftState.open()
}
// initialize attempts to bootstrap the raft store if there are no committed entries.
func (s *Store) initialize() error {
// If we have committed entries then the store is already in the cluster.
/*
if index, err := s.store.LastIndex(); err != nil {
return fmt.Errorf("last index: %s", err)
} else if index > 0 {
return nil
}
*/
// Force set peers.
if err := s.SetPeers(s.peers); err != nil {
return fmt.Errorf("set raft peers: %s", err)
}
return nil
return s.raftState.initialize()
}
// Close closes the store and shuts down the node in the cluster.
@ -285,6 +427,23 @@ func (s *Store) Close() error {
return s.close()
}
// WaitForDataChanged will block the current goroutine until the metastore index has
// be updated.
func (s *Store) WaitForDataChanged() error {
s.mu.RLock()
changed := s.changed
s.mu.RUnlock()
for {
select {
case <-s.closing:
return errors.New("closing")
case <-changed:
return nil
}
}
}
func (s *Store) close() error {
// Check if store has already been closed.
if !s.opened {
@ -296,18 +455,9 @@ func (s *Store) close() error {
close(s.closing)
// FIXME(benbjohnson): s.wg.Wait()
// Shutdown raft.
if s.raft != nil {
s.raft.Shutdown()
s.raft = nil
}
if s.transport != nil {
s.transport.Close()
s.transport = nil
}
if s.store != nil {
s.store.Close()
s.store = nil
if s.raftState != nil {
s.raftState.close()
s.raftState = nil
}
return nil
@ -329,8 +479,6 @@ func (s *Store) readID() error {
}
s.id = id
s.Logger.Printf("read local node id: %d", s.id)
return nil
}
@ -357,37 +505,43 @@ func (s *Store) createLocalNode() error {
}
// Create new node.
ni, err := s.CreateNode(s.Addr.String())
ni, err := s.CreateNode(s.RemoteAddr.String())
if err != nil {
return fmt.Errorf("create node: %s", err)
}
// Write node id to file.
if err := ioutil.WriteFile(s.IDPath(), []byte(strconv.FormatUint(ni.ID, 10)), 0666); err != nil {
if err := s.writeNodeID(ni.ID); err != nil {
return fmt.Errorf("write file: %s", err)
}
// Set ID locally.
s.id = ni.ID
s.Logger.Printf("created local node: id=%d, host=%s", s.id, s.Addr.String())
s.Logger.Printf("Created local node: id=%d, host=%s", s.id, s.RemoteAddr)
return nil
}
func (s *Store) createRootDir() error {
return os.MkdirAll(s.path, 0777)
}
func (s *Store) writeNodeID(id uint64) error {
if err := s.createRootDir(); err != nil {
return err
}
return ioutil.WriteFile(s.IDPath(), []byte(strconv.FormatUint(id, 10)), 0666)
}
// Snapshot saves a snapshot of the current state.
func (s *Store) Snapshot() error {
future := s.raft.Snapshot()
return future.Error()
return s.raftState.snapshot()
}
// WaitForLeader sleeps until a leader is found or a timeout occurs.
// timeout == 0 means to wait forever.
func (s *Store) WaitForLeader(timeout time.Duration) error {
if s.raft.Leader() != "" {
return nil
}
// Begin timeout timer.
timer := time.NewTimer(timeout)
defer timer.Stop()
@ -404,7 +558,7 @@ func (s *Store) WaitForLeader(timeout time.Duration) error {
return errors.New("timeout")
}
case <-ticker.C:
if s.raft.Leader() != "" {
if s.Leader() != "" {
return nil
}
}
@ -421,10 +575,10 @@ func (s *Store) Err() <-chan error { return s.err }
func (s *Store) IsLeader() bool {
s.mu.RLock()
defer s.mu.RUnlock()
if s.raft == nil {
if s.raftState == nil {
return false
}
return s.raft.State() == raft.Leader
return s.raftState.isLeader()
}
// Leader returns what the store thinks is the current leader. An empty
@ -432,32 +586,27 @@ func (s *Store) IsLeader() bool {
func (s *Store) Leader() string {
s.mu.RLock()
defer s.mu.RUnlock()
if s.raft == nil {
if s.raftState == nil {
return ""
}
return s.raft.Leader()
}
// LeaderCh returns a channel that notifies on leadership change.
// Panics when the store has not been opened yet.
func (s *Store) LeaderCh() <-chan bool {
s.mu.RLock()
defer s.mu.RUnlock()
assert(s.raft != nil, "cannot retrieve leadership channel when closed")
return s.raft.LeaderCh()
return s.raftState.leader()
}
// SetPeers sets a list of peers in the cluster.
func (s *Store) SetPeers(addrs []string) error {
a := make([]string, len(addrs))
for i, s := range addrs {
addr, err := net.ResolveTCPAddr("tcp", s)
if err != nil {
return fmt.Errorf("cannot resolve addr: %s, err=%s", s, err)
}
a[i] = addr.String()
}
return s.raft.SetPeers(a).Error()
return s.raftState.setPeers(addrs)
}
// AddPeer adds addr to the list of peers in the cluster.
func (s *Store) AddPeer(addr string) error {
return s.raftState.addPeer(addr)
}
// Peers returns the list of peers in the cluster.
func (s *Store) Peers() ([]string, error) {
s.mu.RLock()
defer s.mu.RUnlock()
return s.raftState.peers()
}
// serveExecListener processes remote exec connections.
@ -471,10 +620,9 @@ func (s *Store) serveExecListener() {
if err != nil {
if strings.Contains(err.Error(), "connection closed") {
return
} else {
s.Logger.Printf("temporary accept error: %s", err)
continue
}
s.Logger.Printf("temporary accept error: %s", err)
continue
}
// Handle connection in a separate goroutine.
@ -487,6 +635,31 @@ func (s *Store) serveExecListener() {
func (s *Store) handleExecConn(conn net.Conn) {
defer s.wg.Done()
// Nodes not part of the raft cluster may initiate remote exec commands
// but may not know who the current leader of the cluster. If we are not
// the leader, proxy the request to the current leader.
if !s.IsLeader() {
if s.Leader() == s.RemoteAddr.String() {
s.Logger.Printf("No leader")
return
}
leaderConn, err := net.DialTimeout("tcp", s.Leader(), 10*time.Second)
if err != nil {
s.Logger.Printf("Dial leader: %v", err)
return
}
defer leaderConn.Close()
leaderConn.Write([]byte{MuxExecHeader})
if err := proxy(leaderConn.(*net.TCPConn), conn.(*net.TCPConn)); err != nil {
s.Logger.Printf("Leader proxy error: %v", err)
}
conn.Close()
return
}
// Read and execute command.
err := func() error {
// Read marker message.
@ -524,7 +697,7 @@ func (s *Store) handleExecConn(conn net.Conn) {
// Build response message.
var resp internal.Response
resp.OK = proto.Bool(err == nil)
resp.Index = proto.Uint64(s.raft.LastIndex())
resp.Index = proto.Uint64(s.raftState.lastIndex())
if err != nil {
resp.Error = proto.String(err.Error())
}
@ -533,13 +706,39 @@ func (s *Store) handleExecConn(conn net.Conn) {
if b, err := proto.Marshal(&resp); err != nil {
panic(err)
} else if err = binary.Write(conn, binary.BigEndian, uint64(len(b))); err != nil {
s.Logger.Printf("unable to write exec response size: %s", err)
s.Logger.Printf("Unable to write exec response size: %s", err)
} else if _, err = conn.Write(b); err != nil {
s.Logger.Printf("unable to write exec response: %s", err)
s.Logger.Printf("Unable to write exec response: %s", err)
}
conn.Close()
}
// serveRPCListener processes remote exec connections.
// This function runs in a separate goroutine.
func (s *Store) serveRPCListener() {
defer s.wg.Done()
for {
// Accept next TCP connection.
conn, err := s.RPCListener.Accept()
if err != nil {
if strings.Contains(err.Error(), "connection closed") {
return
} else {
s.Logger.Printf("temporary accept error: %s", err)
continue
}
}
// Handle connection in a separate goroutine.
s.wg.Add(1)
go func() {
defer s.wg.Done()
s.rpc.handleRPCConn(conn)
}()
}
}
// MarshalBinary encodes the store's data to a binary protobuf format.
func (s *Store) MarshalBinary() ([]byte, error) {
s.mu.RLock()
@ -607,6 +806,19 @@ func (s *Store) CreateNode(host string) (*NodeInfo, error) {
return s.NodeByHost(host)
}
// UpdateNode updates an existing node in the store.
func (s *Store) UpdateNode(id uint64, host string) (*NodeInfo, error) {
if err := s.exec(internal.Command_UpdateNodeCommand, internal.E_UpdateNodeCommand_Command,
&internal.UpdateNodeCommand{
ID: proto.Uint64(id),
Host: proto.String(host),
},
); err != nil {
return nil, err
}
return s.NodeByHost(host)
}
// DeleteNode removes a node from the metastore by id.
func (s *Store) DeleteNode(id uint64) error {
return s.exec(internal.Command_DeleteNodeCommand, internal.E_DeleteNodeCommand_Command,
@ -658,6 +870,10 @@ func (s *Store) CreateDatabase(name string) (*DatabaseInfo, error) {
return nil, fmt.Errorf("read: %s", err)
}
if nodeN > MaxAutoCreatedRetentionPolicyReplicaN {
nodeN = MaxAutoCreatedRetentionPolicyReplicaN
}
// Create a retention policy.
rpi := NewRetentionPolicyInfo(AutoCreateRetentionPolicyName)
rpi.ReplicaN = nodeN
@ -685,11 +901,11 @@ func (s *Store) CreateDatabaseIfNotExists(name string) (*DatabaseInfo, error) {
}
// Attempt to create database.
if di, err := s.CreateDatabase(name); err == ErrDatabaseExists {
di, err := s.CreateDatabase(name)
if err == ErrDatabaseExists {
return s.Database(name)
} else {
return di, err
}
return di, err
}
// DropDatabase removes a database from the metastore by name.
@ -774,11 +990,11 @@ func (s *Store) CreateRetentionPolicyIfNotExists(database string, rpi *Retention
}
// Attempt to create policy.
if other, err := s.CreateRetentionPolicy(database, rpi); err == ErrRetentionPolicyExists {
other, err := s.CreateRetentionPolicy(database, rpi)
if err == ErrRetentionPolicyExists {
return s.RetentionPolicy(database, rpi.Name)
} else {
return other, err
}
return other, err
}
// SetDefaultRetentionPolicy sets the default retention policy for a database.
@ -858,11 +1074,11 @@ func (s *Store) CreateShardGroupIfNotExists(database, policy string, timestamp t
}
// Attempt to create database.
if sgi, err := s.CreateShardGroup(database, policy, timestamp); err == ErrShardGroupExists {
sgi, err := s.CreateShardGroup(database, policy, timestamp)
if err == ErrShardGroupExists {
return s.ShardGroupByTimestamp(database, policy, timestamp)
} else {
return sgi, err
}
return sgi, err
}
// DeleteShardGroup removes an existing shard group from a policy by ID.
@ -1037,9 +1253,8 @@ func (s *Store) Authenticate(username, password string) (ui *UserInfo, err error
if bytes.Equal(hashed, au.hash) {
ui = u
return nil
} else {
return ErrAuthenticate
}
return ErrAuthenticate
}
// Compare password with user hash.
@ -1264,8 +1479,7 @@ func (s *Store) read(fn func(*Data) error) error {
var errInvalidate = errors.New("invalidate cache")
func (s *Store) invalidate() error {
time.Sleep(1 * time.Second)
return nil // FIXME(benbjohnson): Reload cache from the leader.
return s.raftState.invalidate()
}
func (s *Store) exec(typ internal.Command_Type, desc *proto.ExtensionDesc, value interface{}) error {
@ -1280,36 +1494,21 @@ func (s *Store) exec(typ internal.Command_Type, desc *proto.ExtensionDesc, value
// Apply the command if this is the leader.
// Otherwise remotely execute the command against the current leader.
if s.raft.State() == raft.Leader {
if s.raftState.isLeader() {
return s.apply(b)
} else {
return s.remoteExec(b)
}
return s.remoteExec(b)
}
// apply applies a serialized command to the raft log.
func (s *Store) apply(b []byte) error {
// Apply to raft log.
f := s.raft.Apply(b, 0)
if err := f.Error(); err != nil {
return err
}
// Return response if it's an error.
// No other non-nil objects should be returned.
resp := f.Response()
if err, ok := resp.(error); ok {
return lookupError(err)
}
assert(resp == nil, "unexpected response: %#v", resp)
return nil
return s.raftState.apply(b)
}
// remoteExec sends an encoded command to the remote leader.
func (s *Store) remoteExec(b []byte) error {
// Retrieve the current known leader.
leader := s.raft.Leader()
leader := s.raftState.leader()
if leader == "" {
return errors.New("no leader")
}
@ -1368,30 +1567,13 @@ func (s *Store) remoteExec(b []byte) error {
// sync polls the state machine until it reaches a given index.
func (s *Store) sync(index uint64, timeout time.Duration) error {
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
return s.raftState.sync(index, timeout)
}
timer := time.NewTimer(timeout)
defer timer.Stop()
for {
// Wait for next tick or timeout.
select {
case <-ticker.C:
case <-timer.C:
return errors.New("timeout")
}
// Compare index against current metadata.
s.mu.Lock()
ok := (s.data.Index >= index)
s.mu.Unlock()
// Exit if we are at least at the given index.
if ok {
return nil
}
}
func (s *Store) cachedData() *Data {
s.mu.RLock()
defer s.mu.RUnlock()
return s.data.Clone()
}
// BcryptCost is the cost associated with generating password with Bcrypt.
@ -1467,6 +1649,8 @@ func (fsm *storeFSM) Apply(l *raft.Log) interface{} {
return fsm.applySetAdminPrivilegeCommand(&cmd)
case internal.Command_SetDataCommand:
return fsm.applySetDataCommand(&cmd)
case internal.Command_UpdateNodeCommand:
return fsm.applyUpdateNodeCommand(&cmd)
default:
panic(fmt.Errorf("cannot apply command: %x", l.Data))
}
@ -1475,6 +1659,8 @@ func (fsm *storeFSM) Apply(l *raft.Log) interface{} {
// Copy term and index to new metadata.
fsm.data.Term = l.Term
fsm.data.Index = l.Index
close(s.changed)
s.changed = make(chan struct{})
return err
}
@ -1498,6 +1684,23 @@ func (fsm *storeFSM) applyCreateNodeCommand(cmd *internal.Command) interface{} {
return nil
}
func (fsm *storeFSM) applyUpdateNodeCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_UpdateNodeCommand_Command)
v := ext.(*internal.UpdateNodeCommand)
// Copy data and update.
other := fsm.data.Clone()
ni := other.Node(v.GetID())
if ni == nil {
return ErrNodeNotFound
}
ni.Host = v.GetHost()
fsm.data = other
return nil
}
func (fsm *storeFSM) applyDeleteNodeCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_DeleteNodeCommand_Command)
v := ext.(*internal.DeleteNodeCommand)

View File

@ -218,14 +218,18 @@ func TestStore_DropDatabase(t *testing.T) {
}
// Ensure remaining nodes are correct.
if di, _ := s.Database("db0"); !reflect.DeepEqual(di, &meta.DatabaseInfo{Name: "db0"}) {
t.Fatalf("unexpected database(0): %#v", di)
exp := &meta.DatabaseInfo{Name: "db0"}
if di, _ := s.Database("db0"); !reflect.DeepEqual(di, exp) {
t.Fatalf("unexpected database(0): \ngot: %#v\nexp: %#v", di, exp)
}
if di, _ := s.Database("db1"); di != nil {
t.Fatalf("unexpected database(1): %#v", di)
}
if di, _ := s.Database("db2"); !reflect.DeepEqual(di, &meta.DatabaseInfo{Name: "db2"}) {
t.Fatalf("unexpected database(2): %#v", di)
exp = &meta.DatabaseInfo{Name: "db2"}
if di, _ := s.Database("db2"); !reflect.DeepEqual(di, exp) {
t.Fatalf("unexpected database(2): \ngot: %#v\nexp: %#v", di, exp)
}
}
@ -300,8 +304,9 @@ func TestStore_DropRetentionPolicy(t *testing.T) {
if rpi, _ := s.RetentionPolicy("db0", "rp1"); rpi != nil {
t.Fatalf("unexpected policy(1): %#v", rpi)
}
if rpi, _ := s.RetentionPolicy("db0", "rp2"); !reflect.DeepEqual(rpi, &meta.RetentionPolicyInfo{Name: "rp2", ReplicaN: 1, ShardGroupDuration: 7 * 24 * time.Hour}) {
t.Fatalf("unexpected policy(2): %#v", rpi)
exp := &meta.RetentionPolicyInfo{Name: "rp2", ReplicaN: 1, ShardGroupDuration: 7 * 24 * time.Hour}
if rpi, _ := s.RetentionPolicy("db0", "rp2"); !reflect.DeepEqual(rpi, exp) {
t.Fatalf("unexpected policy(2): \ngot: %#v\nexp: %#v", rpi, exp)
}
}
@ -730,6 +735,7 @@ func TestStore_Snapshot_And_Restore(t *testing.T) {
s := MustOpenStore()
s.LeaveFiles = true
addr := s.RemoteAddr.String()
// Create a bunch of databases in the Store
nDatabases := 5
@ -744,12 +750,12 @@ func TestStore_Snapshot_And_Restore(t *testing.T) {
s.Close()
// Allow the kernel to free up the port so we can re-use it again
time.Sleep(100 * time.Millisecond)
// Test restoring the snapshot taken above.
existingDataPath := s.Path()
s = NewStore(NewConfig(existingDataPath))
if err := s.Open(); err != nil {
panic(err)
}
s = MustOpenStoreWithPath(addr, existingDataPath)
defer s.Close()
// Wait until the server is ready.
@ -782,37 +788,105 @@ func TestCluster_Open(t *testing.T) {
t.Fatal("no leader found")
}
// Add a database to each node.
for i, s := range c.Stores {
if di, err := s.CreateDatabase(fmt.Sprintf("db%d", i)); err != nil {
t.Fatal(err)
} else if di == nil {
t.Fatal("expected database")
// ensure all the nodes see the same metastore data
assertDatabaseReplicated(t, c)
}
// Ensure a multi-node cluster can start, join the cluster, and the first three members are raft nodes.
func TestCluster_OpenRaft(t *testing.T) {
// Start a single node.
c := MustOpenCluster(1)
defer c.Close()
// Check that the node becomes leader.
if s := c.Leader(); s == nil {
t.Fatal("no leader found")
}
// Add 5 more nodes.
for i := 0; i < 5; i++ {
if err := c.Join(); err != nil {
t.Fatalf("failed to join cluster: %v", err)
}
}
// Verify that each store has all databases.
for i := 0; i < len(c.Stores); i++ {
for _, s := range c.Stores {
if di, err := s.Database(fmt.Sprintf("db%d", i)); err != nil {
t.Fatal(err)
} else if di == nil {
t.Fatal("expected database")
}
// ensure we have 3 raft nodes
assertRaftPeerNodes(t, c, 3)
// ensure all the nodes see the same metastore data
assertDatabaseReplicated(t, c)
}
// Ensure a multi-node cluster can restart
func TestCluster_Restart(t *testing.T) {
// Start a single node.
c := MustOpenCluster(1)
defer c.Close()
// Check that one node is leader.
if s := c.Leader(); s == nil {
t.Fatal("no leader found")
}
// Add 5 more ndes, 2 should become raft peers, 3 remote raft clients
for i := 0; i < 5; i++ {
if err := c.Join(); err != nil {
t.Fatalf("failed to join cluster: %v", err)
}
}
// The tests use a host host assigned listener port. We need to re-use
// the original ports when the new cluster is restarted so that the existing
// peer store addresses can be reached.
addrs := []string{}
// Make sure we keep files on disk when we shutdown as well as record the
// current cluster IP addresses
for _, s := range c.Stores {
s.LeaveFiles = true
addrs = append(addrs, s.Addr.String())
}
// Stop the cluster
if err := c.Close(); err != nil {
t.Fatalf("failed to close cluster: %v", err)
}
// Wait a bit to avoid spurious port in use conflict errors from trying to
// start the new cluster to fast
time.Sleep(100 * time.Millisecond)
// Re-create the cluster nodes from existing disk paths and addresses
stores := []*Store{}
for i, s := range c.Stores {
store := MustOpenStoreWithPath(addrs[i], s.Path())
stores = append(stores, store)
}
c.Stores = stores
// Wait for the cluster to stabilize
if err := c.WaitForLeader(); err != nil {
t.Fatal("no leader found")
}
// ensure we have 3 raft nodes
assertRaftPeerNodes(t, c, 3)
// ensure all the nodes see the same metastore data
assertDatabaseReplicated(t, c)
}
// Store is a test wrapper for meta.Store.
type Store struct {
*meta.Store
Listener net.Listener
Stderr bytes.Buffer
LeaveFiles bool // set to true to leave temporary files on close
BindAddress string
Listener net.Listener
Stderr bytes.Buffer
LeaveFiles bool // set to true to leave temporary files on close
}
// NewStore returns a new test wrapper for Store.
func NewStore(c meta.Config) *Store {
func NewStore(c *meta.Config) *Store {
s := &Store{
Store: meta.NewStore(c),
}
@ -823,7 +897,16 @@ func NewStore(c meta.Config) *Store {
// MustOpenStore opens a store in a temporary path. Panic on error.
func MustOpenStore() *Store {
s := NewStore(NewConfig(MustTempFile()))
return MustOpenStoreWithPath("", MustTempFile())
}
// MustOpenStoreWith opens a store from a given path. Panic on error.
func MustOpenStoreWithPath(addr, path string) *Store {
c := NewConfig(path)
s := NewStore(c)
if addr != "" {
s.BindAddress = addr
}
if err := s.Open(); err != nil {
panic(err)
}
@ -840,18 +923,26 @@ func MustOpenStore() *Store {
// Open opens the store on a random TCP port.
func (s *Store) Open() error {
addr := "127.0.0.1:0"
if s.BindAddress != "" {
addr = s.BindAddress
}
// Open a TCP port.
ln, err := net.Listen("tcp", "127.0.0.1:0")
ln, err := net.Listen("tcp", addr)
if err != nil {
return fmt.Errorf("listen: %s", err)
}
s.Addr = ln.Addr()
s.Listener = ln
s.RemoteAddr = s.Addr
// Wrap listener in a muxer.
mux := tcp.NewMux()
s.RaftListener = mux.Listen(meta.MuxRaftHeader)
s.ExecListener = mux.Listen(meta.MuxExecHeader)
s.RPCListener = mux.Listen(meta.MuxRPCHeader)
go mux.Serve(ln)
// Open store.
@ -874,8 +965,8 @@ func (s *Store) Close() error {
}
// NewConfig returns the default test configuration.
func NewConfig(path string) meta.Config {
return meta.Config{
func NewConfig(path string) *meta.Config {
return &meta.Config{
Dir: path,
Hostname: "localhost",
BindAddress: "127.0.0.1:0",
@ -888,27 +979,17 @@ func NewConfig(path string) meta.Config {
// Cluster represents a group of stores joined as a raft cluster.
type Cluster struct {
path string
Stores []*Store
n int
}
// NewCluster returns a cluster of n stores within path.
func NewCluster(path string, n int) *Cluster {
c := &Cluster{}
// Construct a list of temporary peers.
peers := make([]string, n)
for i := range peers {
peers[i] = "127.0.0.1:0"
}
// Create new stores with temporary peers.
for i := 0; i < n; i++ {
config := NewConfig(filepath.Join(path, strconv.Itoa(i)))
config.Peers = peers
s := NewStore(config)
c.Stores = append(c.Stores, s)
}
c := &Cluster{path: path, n: n}
config := NewConfig(filepath.Join(path, strconv.Itoa(0)))
s := NewStore(config)
c.Stores = append(c.Stores, s)
return c
}
@ -930,22 +1011,34 @@ func MustOpenCluster(n int) *Cluster {
return c
}
func (c *Cluster) Join() error {
config := NewConfig(filepath.Join(c.path, strconv.Itoa(len(c.Stores))))
config.Peers = []string{c.Stores[0].Addr.String()}
s := NewStore(config)
if err := s.Open(); err != nil {
return err
}
select {
case err := <-s.Err():
panic(fmt.Sprintf("store: i=%d, addr=%s, err=%s", len(c.Stores), s.Addr.String(), err))
case <-s.Ready():
}
c.Stores = append(c.Stores, s)
return nil
}
// Open opens and initializes all stores in the cluster.
func (c *Cluster) Open() error {
if err := func() error {
// Open each store and add to peer list.
peers := make([]string, len(c.Stores))
for i, s := range c.Stores {
if err := s.Open(); err != nil {
return fmt.Errorf("open test store #%d: %s", i, err)
}
peers[i] = s.Addr.String()
if err := c.Stores[0].Open(); err != nil {
return err
}
// Reset peers on all stores.
for _, s := range c.Stores {
if err := s.SetPeers(peers); err != nil {
return fmt.Errorf("set peers: %s", err)
for i := 1; i < c.n; i++ {
if err := c.Join(); err != nil {
panic(fmt.Sprintf("failed to add new cluster node: %v", err))
}
}
@ -965,6 +1058,15 @@ func (c *Cluster) Close() error {
return nil
}
func (c *Cluster) WaitForLeader() error {
for _, s := range c.Stores {
if err := s.WaitForLeader(5 * time.Second); err != nil {
return err
}
}
return nil
}
// Leader returns the store that is currently leader.
func (c *Cluster) Leader() *Store {
for _, s := range c.Stores {
@ -987,3 +1089,44 @@ func MustTempFile() string {
func mockHashPassword(password string) ([]byte, error) {
return []byte(password), nil
}
// assertRaftPeerNodes counts the number of nodes running with a local raft
// database and asserts that the count is equal to n
func assertRaftPeerNodes(t *testing.T, c *Cluster, n int) {
// Ensure we have the required number of raft nodes
raftCount := 0
for _, s := range c.Stores {
if _, err := os.Stat(filepath.Join(s.Path(), "raft.db")); err == nil {
raftCount += 1
}
}
if raftCount != n {
t.Errorf("raft nodes mismatch: got %v, exp %v", raftCount, n)
}
}
// assertDatabaseReplicated creates a new database named after each node and
// then verifies that each node can see all the created databases from their
// local meta data
func assertDatabaseReplicated(t *testing.T, c *Cluster) {
// Add a database to each node.
for i, s := range c.Stores {
if di, err := s.CreateDatabase(fmt.Sprintf("db%d", i)); err != nil {
t.Fatal(err)
} else if di == nil {
t.Fatal("expected database")
}
}
// Verify that each store has all databases.
for i := 0; i < len(c.Stores); i++ {
for _, s := range c.Stores {
if di, err := s.Database(fmt.Sprintf("db%d", i)); err != nil {
t.Fatal(err)
} else if di == nil {
t.Fatal("expected database")
}
}
}
}

View File

@ -9,6 +9,8 @@ import (
// PointBatcher accepts Points and will emit a batch of those points when either
// a) the batch reaches a certain size, or b) a certain time passes.
type PointBatcher struct {
stats PointBatcherStats
size int
duration time.Duration
@ -17,8 +19,6 @@ type PointBatcher struct {
out chan []Point
flush chan struct{}
stats PointBatcherStats
wg *sync.WaitGroup
}

View File

@ -1,21 +1,23 @@
package tsdb
package tsdb_test
import (
"testing"
"time"
"github.com/influxdb/influxdb/tsdb"
)
// TestBatch_Size ensures that a batcher generates a batch when the size threshold is reached.
func TestBatch_Size(t *testing.T) {
batchSize := 5
batcher := NewPointBatcher(batchSize, time.Hour)
batcher := tsdb.NewPointBatcher(batchSize, time.Hour)
if batcher == nil {
t.Fatal("failed to create batcher for size test")
}
batcher.Start()
var p Point
var p tsdb.Point
go func() {
for i := 0; i < batchSize; i++ {
batcher.In() <- p
@ -31,14 +33,14 @@ func TestBatch_Size(t *testing.T) {
// TestBatch_Size ensures that a batcher generates a batch when the timeout triggers.
func TestBatch_Timeout(t *testing.T) {
batchSize := 5
batcher := NewPointBatcher(batchSize+1, 100*time.Millisecond)
batcher := tsdb.NewPointBatcher(batchSize+1, 100*time.Millisecond)
if batcher == nil {
t.Fatal("failed to create batcher for timeout test")
}
batcher.Start()
var p Point
var p tsdb.Point
go func() {
for i := 0; i < batchSize; i++ {
batcher.In() <- p
@ -54,14 +56,14 @@ func TestBatch_Timeout(t *testing.T) {
// TestBatch_Flush ensures that a batcher generates a batch when flushed
func TestBatch_Flush(t *testing.T) {
batchSize := 2
batcher := NewPointBatcher(batchSize, time.Hour)
batcher := tsdb.NewPointBatcher(batchSize, time.Hour)
if batcher == nil {
t.Fatal("failed to create batcher for flush test")
}
batcher.Start()
var p Point
var p tsdb.Point
go func() {
batcher.In() <- p
batcher.Flush()
@ -76,15 +78,15 @@ func TestBatch_Flush(t *testing.T) {
// TestBatch_MultipleBatches ensures that a batcher correctly processes multiple batches.
func TestBatch_MultipleBatches(t *testing.T) {
batchSize := 2
batcher := NewPointBatcher(batchSize, 100*time.Millisecond)
batcher := tsdb.NewPointBatcher(batchSize, 100*time.Millisecond)
if batcher == nil {
t.Fatal("failed to create batcher for size test")
}
batcher.Start()
var p Point
var b []Point
var p tsdb.Point
var b []tsdb.Point
batcher.In() <- p
batcher.In() <- p
@ -102,7 +104,7 @@ func TestBatch_MultipleBatches(t *testing.T) {
checkPointBatcherStats(t, batcher, -1, 3, 1, 1)
}
func checkPointBatcherStats(t *testing.T, b *PointBatcher, batchTotal, pointTotal, sizeTotal, timeoutTotal int) {
func checkPointBatcherStats(t *testing.T, b *tsdb.PointBatcher, batchTotal, pointTotal, sizeTotal, timeoutTotal int) {
stats := b.Stats()
if batchTotal != -1 && stats.BatchTotal != uint64(batchTotal) {

View File

@ -0,0 +1,119 @@
package tsdb
import (
"bytes"
"container/heap"
)
// MultiCursor returns a single cursor that combines the results of all cursors in order.
//
// If the same key is returned from multiple cursors then the first cursor
// specified will take precendence. A key will only be returned once from the
// returned cursor.
func MultiCursor(cursors ...Cursor) Cursor {
return &multiCursor{cursors: cursors}
}
// multiCursor represents a cursor that combines multiple cursors into one.
type multiCursor struct {
cursors []Cursor
heap cursorHeap
prev []byte
}
// Seek moves the cursor to a given key.
func (mc *multiCursor) Seek(seek []byte) (key, value []byte) {
// Initialize heap.
h := make(cursorHeap, 0, len(mc.cursors))
for i, c := range mc.cursors {
// Move cursor to position. Skip if it's empty.
k, v := c.Seek(seek)
if k == nil {
continue
}
// Append cursor to heap.
h = append(h, &cursorHeapItem{
key: k,
value: v,
cursor: c,
priority: len(mc.cursors) - i,
})
}
heap.Init(&h)
mc.heap = h
mc.prev = nil
return mc.pop()
}
// Next returns the next key/value from the cursor.
func (mc *multiCursor) Next() (key, value []byte) { return mc.pop() }
// pop returns the next item from the heap.
// Reads the next key/value from item's cursor and puts it back on the heap.
func (mc *multiCursor) pop() (key, value []byte) {
// Read items until we have a key that doesn't match the previously read one.
// This is to perform deduplication when there's multiple items with the same key.
// The highest priority cursor will be read first and then remaining keys will be dropped.
for {
// Return nil if there are no more items left.
if len(mc.heap) == 0 {
return nil, nil
}
// Read the next item from the heap.
item := heap.Pop(&mc.heap).(*cursorHeapItem)
// Save the key/value for return.
key, value = item.key, item.value
// Read the next item from the cursor. Push back to heap if one exists.
if item.key, item.value = item.cursor.Next(); item.key != nil {
heap.Push(&mc.heap, item)
}
// Skip if this key matches the previously returned one.
if bytes.Equal(mc.prev, key) {
continue
}
mc.prev = key
return
}
}
// cursorHeap represents a heap of cursorHeapItems.
type cursorHeap []*cursorHeapItem
func (h cursorHeap) Len() int { return len(h) }
func (h cursorHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h cursorHeap) Less(i, j int) bool {
if cmp := bytes.Compare(h[i].key, h[j].key); cmp == -1 {
return true
} else if cmp == 0 {
return h[i].priority > h[j].priority
}
return false
}
func (h *cursorHeap) Push(x interface{}) {
*h = append(*h, x.(*cursorHeapItem))
}
func (h *cursorHeap) Pop() interface{} {
old := *h
n := len(old)
item := old[n-1]
*h = old[0 : n-1]
return item
}
// cursorHeapItem is something we manage in a priority queue.
type cursorHeapItem struct {
key []byte
value []byte
cursor Cursor
priority int
}

View File

@ -0,0 +1,221 @@
package tsdb_test
import (
"bytes"
"encoding/binary"
"math/rand"
"reflect"
"sort"
"testing"
"testing/quick"
"github.com/influxdb/influxdb/tsdb"
)
// Ensure the multi-cursor can correctly iterate across a single subcursor.
func TestMultiCursor_Single(t *testing.T) {
mc := tsdb.MultiCursor(
NewCursor([]CursorItem{
{Key: []byte{0x00}, Value: []byte{0x00}},
{Key: []byte{0x01}, Value: []byte{0x10}},
{Key: []byte{0x02}, Value: []byte{0x20}},
}),
)
if k, v := mc.Seek([]byte{0x00}); !bytes.Equal(k, []byte{0x00}) || !bytes.Equal(v, []byte{0x00}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x01}) || !bytes.Equal(v, []byte{0x10}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x02}) || !bytes.Equal(v, []byte{0x20}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); k != nil {
t.Fatalf("expected eof, got: %x / %x", k, v)
}
}
// Ensure the multi-cursor can correctly iterate across multiple non-overlapping subcursors.
func TestMultiCursor_Multiple_NonOverlapping(t *testing.T) {
mc := tsdb.MultiCursor(
NewCursor([]CursorItem{
{Key: []byte{0x00}, Value: []byte{0x00}},
{Key: []byte{0x03}, Value: []byte{0x30}},
{Key: []byte{0x04}, Value: []byte{0x40}},
}),
NewCursor([]CursorItem{
{Key: []byte{0x01}, Value: []byte{0x10}},
{Key: []byte{0x02}, Value: []byte{0x20}},
}),
)
if k, v := mc.Seek([]byte{0x00}); !bytes.Equal(k, []byte{0x00}) || !bytes.Equal(v, []byte{0x00}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x01}) || !bytes.Equal(v, []byte{0x10}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x02}) || !bytes.Equal(v, []byte{0x20}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x03}) || !bytes.Equal(v, []byte{0x30}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x04}) || !bytes.Equal(v, []byte{0x40}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); k != nil {
t.Fatalf("expected eof, got: %x / %x", k, v)
}
}
// Ensure the multi-cursor can correctly iterate across multiple overlapping subcursors.
func TestMultiCursor_Multiple_Overlapping(t *testing.T) {
mc := tsdb.MultiCursor(
NewCursor([]CursorItem{
{Key: []byte{0x00}, Value: []byte{0x00}},
{Key: []byte{0x03}, Value: []byte{0x03}},
{Key: []byte{0x04}, Value: []byte{0x04}},
}),
NewCursor([]CursorItem{
{Key: []byte{0x00}, Value: []byte{0xF0}},
{Key: []byte{0x02}, Value: []byte{0xF2}},
{Key: []byte{0x04}, Value: []byte{0xF4}},
}),
)
if k, v := mc.Seek([]byte{0x00}); !bytes.Equal(k, []byte{0x00}) || !bytes.Equal(v, []byte{0x00}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x02}) || !bytes.Equal(v, []byte{0xF2}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x03}) || !bytes.Equal(v, []byte{0x03}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); !bytes.Equal(k, []byte{0x04}) || !bytes.Equal(v, []byte{0x04}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = mc.Next(); k != nil {
t.Fatalf("expected eof, got: %x / %x", k, v)
}
}
// Ensure the multi-cursor can handle randomly generated data.
func TestMultiCursor_Quick(t *testing.T) {
quick.Check(func(seek uint64, cursors []Cursor) bool {
var got, exp [][]byte
seek %= 100
// Merge all cursor data to determine expected output.
// First seen key overrides all other items with the same key.
m := make(map[string][]byte)
for _, c := range cursors {
for _, item := range c.items {
if bytes.Compare(item.Key, u64tob(seek)) == -1 {
continue
}
if _, ok := m[string(item.Key)]; ok {
continue
}
m[string(item.Key)] = item.Value
}
}
// Convert map back to single item list.
for k, v := range m {
exp = append(exp, append([]byte(k), v...))
}
sort.Sort(byteSlices(exp))
// Create multi-cursor and iterate over all items.
mc := tsdb.MultiCursor(tsdbCursorSlice(cursors)...)
for k, v := mc.Seek(u64tob(seek)); k != nil; k, v = mc.Next() {
got = append(got, append(k, v...))
}
// Verify results.
if !reflect.DeepEqual(got, exp) {
t.Fatalf("mismatch: seek=%d\n\ngot=%+v\n\nexp=%+v", seek, got, exp)
}
return true
}, nil)
}
// Cursor represents an in-memory test cursor.
type Cursor struct {
items []CursorItem
index int
}
// NewCursor returns a new instance of Cursor.
func NewCursor(items []CursorItem) *Cursor {
sort.Sort(CursorItems(items))
return &Cursor{items: items}
}
// Seek seeks to an item by key.
func (c *Cursor) Seek(seek []byte) (key, value []byte) {
for c.index = 0; c.index < len(c.items); c.index++ {
if bytes.Compare(c.items[c.index].Key, seek) == -1 { // skip keys less than seek
continue
}
return c.items[c.index].Key, c.items[c.index].Value
}
return nil, nil
}
// Next returns the next key/value pair.
func (c *Cursor) Next() (key, value []byte) {
if c.index >= len(c.items)-1 {
return nil, nil
}
c.index++
return c.items[c.index].Key, c.items[c.index].Value
}
// Generate returns a randomly generated cursor. Implements quick.Generator.
func (c Cursor) Generate(rand *rand.Rand, size int) reflect.Value {
c.index = 0
c.items = make([]CursorItem, rand.Intn(size))
for i := range c.items {
value, _ := quick.Value(reflect.TypeOf([]byte(nil)), rand)
c.items[i] = CursorItem{
Key: u64tob(uint64(rand.Intn(size))),
Value: value.Interface().([]byte),
}
}
// Sort items by key.
sort.Sort(CursorItems(c.items))
return reflect.ValueOf(c)
}
// tsdbCursorSlice converts a Cursor slice to a tsdb.Cursor slice.
func tsdbCursorSlice(a []Cursor) []tsdb.Cursor {
var other []tsdb.Cursor
for i := range a {
other = append(other, &a[i])
}
return other
}
// CursorItem represents a key/value pair in a cursor.
type CursorItem struct {
Key []byte
Value []byte
}
type CursorItems []CursorItem
func (a CursorItems) Len() int { return len(a) }
func (a CursorItems) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a CursorItems) Less(i, j int) bool { return bytes.Compare(a[i].Key, a[j].Key) == -1 }
// byteSlices represents a sortable slice of byte slices.
type byteSlices [][]byte
func (a byteSlices) Len() int { return len(a) }
func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 }
func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
// u64tob converts a uint64 into an 8-byte slice.
func u64tob(v uint64) []byte {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, v)
return b
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,695 @@
package b1
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"hash/fnv"
"io"
"log"
"os"
"sort"
"sync"
"time"
"github.com/boltdb/bolt"
"github.com/influxdb/influxdb/tsdb"
)
// Format is the file format name of this engine.
const Format = "b1"
func init() {
tsdb.RegisterEngine(Format, NewEngine)
}
// topLevelBucketN is the number of non-series buckets in the bolt db.
const topLevelBucketN = 3
var (
// ErrWALPartitionNotFound returns when flushing a partition that does not exist.
ErrWALPartitionNotFound = errors.New("wal partition not found")
)
// Ensure Engine implements the interface.
var _ tsdb.Engine = &Engine{}
// Engine represents a version 1 storage engine.
type Engine struct {
mu sync.RWMutex
path string // path to data file
db *bolt.DB // underlying database
cache map[uint8]map[string][][]byte // values by <wal partition,series>
walSize int // approximate size of the WAL, in bytes
flush chan struct{} // signals background flush
flushTimer *time.Timer // signals time-based flush
// These coordinate closing and waiting for running goroutines.
wg sync.WaitGroup
closing chan struct{}
// Used for out-of-band error messages.
logger *log.Logger
// The maximum size and time thresholds for flushing the WAL.
MaxWALSize int
WALFlushInterval time.Duration
WALPartitionFlushDelay time.Duration
// The writer used by the logger.
LogOutput io.Writer
}
// NewEngine returns a new instance of Engine.
func NewEngine(path string, opt tsdb.EngineOptions) tsdb.Engine {
e := &Engine{
path: path,
flush: make(chan struct{}, 1),
MaxWALSize: opt.MaxWALSize,
WALFlushInterval: opt.WALFlushInterval,
WALPartitionFlushDelay: opt.WALPartitionFlushDelay,
LogOutput: os.Stderr,
}
// Initialize all partitions of the cache.
e.cache = make(map[uint8]map[string][][]byte)
for i := uint8(0); i < WALPartitionN; i++ {
e.cache[i] = make(map[string][][]byte)
}
return e
}
// Path returns the path the engine was initialized with.
func (e *Engine) Path() string { return e.path }
// Open opens and initializes the engine.
func (e *Engine) Open() error {
if err := func() error {
e.mu.Lock()
defer e.mu.Unlock()
// Open underlying storage.
db, err := bolt.Open(e.path, 0666, &bolt.Options{Timeout: 1 * time.Second})
if err != nil {
return err
}
e.db = db
// Initialize data file.
if err := e.db.Update(func(tx *bolt.Tx) error {
_, _ = tx.CreateBucketIfNotExists([]byte("series"))
_, _ = tx.CreateBucketIfNotExists([]byte("fields"))
_, _ = tx.CreateBucketIfNotExists([]byte("wal"))
// Set file format, if not set yet.
b, _ := tx.CreateBucketIfNotExists([]byte("meta"))
if v := b.Get([]byte("format")); v == nil {
if err := b.Put([]byte("format"), []byte(Format)); err != nil {
return fmt.Errorf("set format: %s", err)
}
}
return nil
}); err != nil {
return fmt.Errorf("init: %s", err)
}
// Start flush interval timer.
e.flushTimer = time.NewTimer(e.WALFlushInterval)
// Initialize logger.
e.logger = log.New(e.LogOutput, "[b1] ", log.LstdFlags)
// Start background goroutines.
e.wg.Add(1)
e.closing = make(chan struct{})
go e.autoflusher(e.closing)
return nil
}(); err != nil {
e.close()
return err
}
// Flush on-disk WAL before we return to the caller.
if err := e.Flush(0); err != nil {
return fmt.Errorf("flush: %s", err)
}
return nil
}
func (e *Engine) Close() error {
e.mu.Lock()
err := e.close()
e.mu.Unlock()
// Wait for open goroutines to finish.
e.wg.Wait()
return err
}
func (e *Engine) close() error {
if e.db != nil {
e.db.Close()
}
if e.closing != nil {
close(e.closing)
e.closing = nil
}
return nil
}
// SetLogOutput sets the writer used for log output.
// This must be set before opening the engine.
func (e *Engine) SetLogOutput(w io.Writer) { e.LogOutput = w }
// LoadMetadataIndex loads the shard metadata into memory.
func (e *Engine) LoadMetadataIndex(index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error {
return e.db.View(func(tx *bolt.Tx) error {
// load measurement metadata
meta := tx.Bucket([]byte("fields"))
c := meta.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
m := index.CreateMeasurementIndexIfNotExists(string(k))
mf := &tsdb.MeasurementFields{}
if err := mf.UnmarshalBinary(v); err != nil {
return err
}
for name, _ := range mf.Fields {
m.SetFieldName(name)
}
mf.Codec = tsdb.NewFieldCodec(mf.Fields)
measurementFields[m.Name] = mf
}
// load series metadata
meta = tx.Bucket([]byte("series"))
c = meta.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
series := &tsdb.Series{}
if err := series.UnmarshalBinary(v); err != nil {
return err
}
index.CreateSeriesIndexIfNotExists(tsdb.MeasurementFromSeriesKey(string(k)), series)
}
return nil
})
}
// WritePoints will write the raw data points and any new metadata to the index in the shard
func (e *Engine) WritePoints(points []tsdb.Point, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) error {
// save to the underlying bolt instance
if err := e.db.Update(func(tx *bolt.Tx) error {
// save any new metadata
if len(seriesToCreate) > 0 {
b := tx.Bucket([]byte("series"))
for _, sc := range seriesToCreate {
data, err := sc.Series.MarshalBinary()
if err != nil {
return err
}
if err := b.Put([]byte(sc.Series.Key), data); err != nil {
return err
}
}
}
if len(measurementFieldsToSave) > 0 {
b := tx.Bucket([]byte("fields"))
for name, m := range measurementFieldsToSave {
data, err := m.MarshalBinary()
if err != nil {
return err
}
if err := b.Put([]byte(name), data); err != nil {
return err
}
}
}
// Write points to WAL bucket.
wal := tx.Bucket([]byte("wal"))
for _, p := range points {
// Retrieve partition bucket.
key := p.Key()
b, err := wal.CreateBucketIfNotExists([]byte{WALPartition(key)})
if err != nil {
return fmt.Errorf("create WAL partition bucket: %s", err)
}
// Generate an autoincrementing index for the WAL partition.
id, _ := b.NextSequence()
// Append points sequentially to the WAL bucket.
v := marshalWALEntry(key, p.UnixNano(), p.Data())
if err := b.Put(u64tob(id), v); err != nil {
return fmt.Errorf("put wal: %s", err)
}
}
return nil
}); err != nil {
return err
}
// If successful then save points to in-memory cache.
if err := func() error {
e.mu.Lock()
defer e.mu.Unlock()
// tracks which in-memory caches need to be resorted
resorts := map[uint8]map[string]struct{}{}
for _, p := range points {
// Generate in-memory cache entry of <timestamp,data>.
key, data := p.Key(), p.Data()
v := make([]byte, 8+len(data))
binary.BigEndian.PutUint64(v[0:8], uint64(p.UnixNano()))
copy(v[8:], data)
// Determine if we are appending.
partitionID := WALPartition(key)
a := e.cache[partitionID][string(key)]
appending := (len(a) == 0 || bytes.Compare(a[len(a)-1], v) == -1)
// Append to cache list.
a = append(a, v)
// If not appending, keep track of cache lists that need to be resorted.
if !appending {
series := resorts[partitionID]
if series == nil {
series = map[string]struct{}{}
resorts[partitionID] = series
}
series[string(key)] = struct{}{}
}
e.cache[partitionID][string(key)] = a
// Calculate estimated WAL size.
e.walSize += len(key) + len(v)
}
// Sort by timestamp if not appending.
for partitionID, cache := range resorts {
for key, _ := range cache {
sort.Sort(byteSlices(e.cache[partitionID][key]))
}
}
// Check for flush threshold.
e.triggerAutoFlush()
return nil
}(); err != nil {
return err
}
return nil
}
// DeleteSeries deletes the series from the engine.
func (e *Engine) DeleteSeries(keys []string) error {
e.mu.Lock()
defer e.mu.Unlock()
if err := e.db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("series"))
for _, k := range keys {
if err := b.Delete([]byte(k)); err != nil {
return err
}
if err := tx.DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound {
return err
}
delete(e.cache[WALPartition([]byte(k))], k)
}
return nil
}); err != nil {
return err
}
return nil
}
// DeleteMeasurement deletes a measurement and all related series.
func (e *Engine) DeleteMeasurement(name string, seriesKeys []string) error {
e.mu.Lock()
defer e.mu.Unlock()
if err := e.db.Update(func(tx *bolt.Tx) error {
bm := tx.Bucket([]byte("fields"))
if err := bm.Delete([]byte(name)); err != nil {
return err
}
b := tx.Bucket([]byte("series"))
for _, k := range seriesKeys {
if err := b.Delete([]byte(k)); err != nil {
return err
}
if err := tx.DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound {
return err
}
delete(e.cache[WALPartition([]byte(k))], k)
}
return nil
}); err != nil {
return err
}
return nil
}
// Flush writes all points from the write ahead log to the index.
func (e *Engine) Flush(partitionFlushDelay time.Duration) error {
// Retrieve a list of WAL buckets.
var partitionIDs []uint8
if err := e.db.View(func(tx *bolt.Tx) error {
return tx.Bucket([]byte("wal")).ForEach(func(key, _ []byte) error {
partitionIDs = append(partitionIDs, uint8(key[0]))
return nil
})
}); err != nil {
return err
}
// Continue flushing until there are no more partition buckets.
for _, partitionID := range partitionIDs {
if err := e.FlushPartition(partitionID); err != nil {
return fmt.Errorf("flush partition: id=%d, err=%s", partitionID, err)
}
// Wait momentarily so other threads can process.
time.Sleep(partitionFlushDelay)
}
e.mu.Lock()
defer e.mu.Unlock()
// Reset WAL size.
e.walSize = 0
// Reset the timer.
e.flushTimer.Reset(e.WALFlushInterval)
return nil
}
// FlushPartition flushes a single WAL partition.
func (e *Engine) FlushPartition(partitionID uint8) error {
e.mu.Lock()
defer e.mu.Unlock()
startTime := time.Now()
var pointN int
if err := e.db.Update(func(tx *bolt.Tx) error {
// Retrieve partition bucket. Exit if it doesn't exist.
pb := tx.Bucket([]byte("wal")).Bucket([]byte{byte(partitionID)})
if pb == nil {
return ErrWALPartitionNotFound
}
// Iterate over keys in the WAL partition bucket.
c := pb.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
key, timestamp, data := unmarshalWALEntry(v)
// Create bucket for entry.
b, err := tx.CreateBucketIfNotExists(key)
if err != nil {
return fmt.Errorf("create bucket: %s", err)
}
// Write point to bucket.
if err := b.Put(u64tob(uint64(timestamp)), data); err != nil {
return fmt.Errorf("put: %s", err)
}
// Remove entry in the WAL.
if err := c.Delete(); err != nil {
return fmt.Errorf("delete: %s", err)
}
pointN++
}
return nil
}); err != nil {
return err
}
// Reset cache.
e.cache[partitionID] = make(map[string][][]byte)
if pointN > 0 {
e.logger.Printf("flush %d points in %.3fs", pointN, time.Since(startTime).Seconds())
}
return nil
}
// autoflusher waits for notification of a flush and kicks it off in the background.
// This method runs in a separate goroutine.
func (e *Engine) autoflusher(closing chan struct{}) {
defer e.wg.Done()
for {
// Wait for close or flush signal.
select {
case <-closing:
return
case <-e.flushTimer.C:
if err := e.Flush(e.WALPartitionFlushDelay); err != nil {
e.logger.Printf("flush error: %s", err)
}
case <-e.flush:
if err := e.Flush(e.WALPartitionFlushDelay); err != nil {
e.logger.Printf("flush error: %s", err)
}
}
}
}
// triggerAutoFlush signals that a flush should occur if the size is above the threshold.
// This function must be called within the context of a lock.
func (e *Engine) triggerAutoFlush() {
// Ignore if we haven't reached the threshold.
if e.walSize < e.MaxWALSize {
return
}
// Otherwise send a non-blocking signal.
select {
case e.flush <- struct{}{}:
default:
}
}
// SeriesCount returns the number of series buckets on the shard.
// This does not include a count from the WAL.
func (e *Engine) SeriesCount() (n int, err error) {
err = e.db.View(func(tx *bolt.Tx) error {
return tx.ForEach(func(_ []byte, _ *bolt.Bucket) error {
n++
return nil
})
})
// Remove top-level buckets.
n -= topLevelBucketN
return
}
// Begin starts a new transaction on the engine.
func (e *Engine) Begin(writable bool) (tsdb.Tx, error) {
tx, err := e.db.Begin(writable)
if err != nil {
return nil, err
}
return &Tx{Tx: tx, engine: e}, nil
}
// DB returns the underlying Bolt database.
func (e *Engine) DB() *bolt.DB { return e.db }
// Tx represents a transaction.
type Tx struct {
*bolt.Tx
engine *Engine
}
// Cursor returns an iterator for a key.
func (tx *Tx) Cursor(key string) tsdb.Cursor {
// Retrieve key bucket.
b := tx.Bucket([]byte(key))
tx.engine.mu.RLock()
defer tx.engine.mu.RUnlock()
// Ignore if there is no bucket or points in the cache.
partitionID := WALPartition([]byte(key))
if b == nil && len(tx.engine.cache[partitionID][key]) == 0 {
return nil
}
// Retrieve a copy of the in-cache points for the key.
cache := make([][]byte, len(tx.engine.cache[partitionID][key]))
copy(cache, tx.engine.cache[partitionID][key])
// Build a cursor that merges the bucket and cache together.
cur := &Cursor{cache: cache}
if b != nil {
cur.cursor = b.Cursor()
}
return cur
}
// Cursor provides ordered iteration across a series.
type Cursor struct {
// Bolt cursor and readahead buffer.
cursor *bolt.Cursor
buf struct {
key, value []byte
}
// Cache and current cache index.
cache [][]byte
index int
// Previously read key.
prev []byte
}
// Seek moves the cursor to a position and returns the closest key/value pair.
func (c *Cursor) Seek(seek []byte) (key, value []byte) {
// Seek bolt cursor.
if c.cursor != nil {
c.buf.key, c.buf.value = c.cursor.Seek(seek)
}
// Seek cache index.
c.index = sort.Search(len(c.cache), func(i int) bool {
return bytes.Compare(c.cache[i][0:8], seek) != -1
})
c.prev = nil
return c.read()
}
// Next returns the next key/value pair from the cursor.
func (c *Cursor) Next() (key, value []byte) {
return c.read()
}
// read returns the next key/value in the cursor buffer or cache.
func (c *Cursor) read() (key, value []byte) {
// Continue skipping ahead through duplicate keys in the cache list.
for {
// Read next value from the cursor.
if c.buf.key == nil && c.cursor != nil {
c.buf.key, c.buf.value = c.cursor.Next()
}
// Read from the buffer or cache, which ever is lower.
if c.buf.key != nil && (c.index >= len(c.cache) || bytes.Compare(c.buf.key, c.cache[c.index][0:8]) == -1) {
key, value = c.buf.key, c.buf.value
c.buf.key, c.buf.value = nil, nil
} else if c.index < len(c.cache) {
key, value = c.cache[c.index][0:8], c.cache[c.index][8:]
c.index++
} else {
key, value = nil, nil
}
// Exit loop if we're at the end of the cache or the next key is different.
if key == nil || !bytes.Equal(key, c.prev) {
break
}
}
c.prev = key
return
}
// WALPartitionN is the number of partitions in the write ahead log.
const WALPartitionN = 8
// WALPartition returns the partition number that key belongs to.
func WALPartition(key []byte) uint8 {
h := fnv.New64a()
h.Write(key)
return uint8(h.Sum64() % WALPartitionN)
}
// marshalWALEntry encodes point data into a single byte slice.
//
// The format of the byte slice is:
//
// uint64 timestamp
// uint32 key length
// []byte key
// []byte data
//
func marshalWALEntry(key []byte, timestamp int64, data []byte) []byte {
v := make([]byte, 8+4, 8+4+len(key)+len(data))
binary.BigEndian.PutUint64(v[0:8], uint64(timestamp))
binary.BigEndian.PutUint32(v[8:12], uint32(len(key)))
v = append(v, key...)
v = append(v, data...)
return v
}
// unmarshalWALEntry decodes a WAL entry into it's separate parts.
// Returned byte slices point to the original slice.
func unmarshalWALEntry(v []byte) (key []byte, timestamp int64, data []byte) {
keyLen := binary.BigEndian.Uint32(v[8:12])
key = v[12 : 12+keyLen]
timestamp = int64(binary.BigEndian.Uint64(v[0:8]))
data = v[12+keyLen:]
return
}
// marshalCacheEntry encodes the timestamp and data to a single byte slice.
//
// The format of the byte slice is:
//
// uint64 timestamp
// []byte data
//
func marshalCacheEntry(timestamp int64, data []byte) []byte {
buf := make([]byte, 8, 8+len(data))
binary.BigEndian.PutUint64(buf[0:8], uint64(timestamp))
return append(buf, data...)
}
// unmarshalCacheEntry returns the timestamp and data from an encoded byte slice.
func unmarshalCacheEntry(buf []byte) (timestamp int64, data []byte) {
timestamp = int64(binary.BigEndian.Uint64(buf[0:8]))
data = buf[8:]
return
}
// u64tob converts a uint64 into an 8-byte slice.
func u64tob(v uint64) []byte {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, v)
return b
}
// byteSlices represents a sortable slice of byte slices.
type byteSlices [][]byte
func (a byteSlices) Len() int { return len(a) }
func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 }
func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] }

View File

@ -0,0 +1,134 @@
package b1_test
import (
"bytes"
"encoding/binary"
"io/ioutil"
"os"
"testing"
"time"
"github.com/influxdb/influxdb/influxql"
"github.com/influxdb/influxdb/tsdb"
"github.com/influxdb/influxdb/tsdb/engine/b1"
)
// Ensure points can be written to the engine and queried.
func TestEngine_WritePoints(t *testing.T) {
e := OpenDefaultEngine()
defer e.Close()
// Create metadata.
mf := &tsdb.MeasurementFields{Fields: make(map[string]*tsdb.Field)}
mf.CreateFieldIfNotExists("value", influxql.Float)
seriesToCreate := []*tsdb.SeriesCreate{
{Series: &tsdb.Series{Key: string(tsdb.MakeKey([]byte("temperature"), nil))}},
}
// Parse point.
points, err := tsdb.ParsePointsWithPrecision([]byte("temperature value=100 1434059627"), time.Now().UTC(), "s")
if err != nil {
t.Fatal(err)
} else if data, err := mf.Codec.EncodeFields(points[0].Fields()); err != nil {
t.Fatal(err)
} else {
points[0].SetData(data)
}
// Write original value.
if err := e.WritePoints(points, map[string]*tsdb.MeasurementFields{"temperature": mf}, seriesToCreate); err != nil {
t.Fatal(err)
}
// Flush to disk.
if err := e.Flush(0); err != nil {
t.Fatal(err)
}
// Parse new point.
points, err = tsdb.ParsePointsWithPrecision([]byte("temperature value=200 1434059627"), time.Now().UTC(), "s")
if err != nil {
t.Fatal(err)
} else if data, err := mf.Codec.EncodeFields(points[0].Fields()); err != nil {
t.Fatal(err)
} else {
points[0].SetData(data)
}
// Update existing value.
if err := e.WritePoints(points, nil, nil); err != nil {
t.Fatal(err)
}
// Ensure only the updated value is read.
tx := e.MustBegin(false)
defer tx.Rollback()
c := tx.Cursor("temperature")
if k, v := c.Seek([]byte{0}); !bytes.Equal(k, u64tob(uint64(time.Unix(1434059627, 0).UnixNano()))) {
t.Fatalf("unexpected key: %#v", k)
} else if m, err := mf.Codec.DecodeFieldsWithNames(v); err != nil {
t.Fatal(err)
} else if m["value"] != float64(200) {
t.Errorf("unexpected value: %#v", m)
}
if k, v := c.Next(); k != nil {
t.Fatalf("unexpected key/value: %#v / %#v", k, v)
}
}
// Engine represents a test wrapper for b1.Engine.
type Engine struct {
*b1.Engine
}
// NewEngine returns a new instance of Engine.
func NewEngine(opt tsdb.EngineOptions) *Engine {
// Generate temporary file.
f, _ := ioutil.TempFile("", "b1-")
f.Close()
os.Remove(f.Name())
return &Engine{
Engine: b1.NewEngine(f.Name(), opt).(*b1.Engine),
}
}
// OpenEngine returns an opened instance of Engine. Panic on error.
func OpenEngine(opt tsdb.EngineOptions) *Engine {
e := NewEngine(opt)
if err := e.Open(); err != nil {
panic(err)
}
return e
}
// OpenDefaultEngine returns an open Engine with default options.
func OpenDefaultEngine() *Engine { return OpenEngine(tsdb.NewEngineOptions()) }
// Close closes the engine and removes all data.
func (e *Engine) Close() error {
e.Engine.Close()
os.RemoveAll(e.Path())
return nil
}
// MustBegin returns a new tranaction. Panic on error.
func (e *Engine) MustBegin(writable bool) tsdb.Tx {
tx, err := e.Begin(writable)
if err != nil {
panic(err)
}
return tx
}
func u64tob(v uint64) []byte {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, v)
return b
}
func btou64(b []byte) uint64 {
return binary.BigEndian.Uint64(b)
}

View File

@ -0,0 +1,627 @@
package bz1
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"log"
"math"
"sort"
"sync"
"time"
"github.com/boltdb/bolt"
"github.com/golang/snappy"
"github.com/influxdb/influxdb/tsdb"
)
var (
// ErrSeriesExists is returned when writing points to an existing series.
ErrSeriesExists = errors.New("series exists")
)
// Format is the file format name of this engine.
const Format = "bz1"
func init() {
tsdb.RegisterEngine(Format, NewEngine)
}
const (
// DefaultBlockSize is the default size of uncompressed points blocks.
DefaultBlockSize = 32 * 1024 // 32KB
)
// Ensure Engine implements the interface.
var _ tsdb.Engine = &Engine{}
// Engine represents a storage engine with compressed blocks.
type Engine struct {
mu sync.Mutex
path string
db *bolt.DB
// Write-ahead log storage.
PointsWriter interface {
WritePoints(points []tsdb.Point) error
}
// Size of uncompressed points to write to a block.
BlockSize int
}
// NewEngine returns a new instance of Engine.
func NewEngine(path string, opt tsdb.EngineOptions) tsdb.Engine {
return &Engine{
path: path,
BlockSize: DefaultBlockSize,
}
}
// Path returns the path the engine was opened with.
func (e *Engine) Path() string { return e.path }
// Open opens and initializes the engine.
func (e *Engine) Open() error {
if err := func() error {
e.mu.Lock()
defer e.mu.Unlock()
// Open underlying storage.
db, err := bolt.Open(e.path, 0666, &bolt.Options{Timeout: 1 * time.Second})
if err != nil {
return err
}
e.db = db
// Initialize data file.
if err := e.db.Update(func(tx *bolt.Tx) error {
_, _ = tx.CreateBucketIfNotExists([]byte("series"))
_, _ = tx.CreateBucketIfNotExists([]byte("fields"))
_, _ = tx.CreateBucketIfNotExists([]byte("points"))
// Set file format, if not set yet.
b, _ := tx.CreateBucketIfNotExists([]byte("meta"))
if v := b.Get([]byte("format")); v == nil {
if err := b.Put([]byte("format"), []byte(Format)); err != nil {
return fmt.Errorf("set format: %s", err)
}
}
return nil
}); err != nil {
return fmt.Errorf("init: %s", err)
}
return nil
}(); err != nil {
e.close()
return err
}
return nil
}
// Close closes the engine.
func (e *Engine) Close() error {
e.mu.Lock()
defer e.mu.Unlock()
return e.close()
}
func (e *Engine) close() error {
if e.db != nil {
return e.db.Close()
}
return nil
}
// SetLogOutput is a no-op.
func (e *Engine) SetLogOutput(w io.Writer) {}
// LoadMetadataIndex loads the shard metadata into memory.
func (e *Engine) LoadMetadataIndex(index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error {
return e.db.View(func(tx *bolt.Tx) error {
// Load measurement metadata
meta := tx.Bucket([]byte("fields"))
c := meta.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
m := index.CreateMeasurementIndexIfNotExists(string(k))
mf := &tsdb.MeasurementFields{}
if err := mf.UnmarshalBinary(v); err != nil {
return err
}
for name, _ := range mf.Fields {
m.SetFieldName(name)
}
mf.Codec = tsdb.NewFieldCodec(mf.Fields)
measurementFields[m.Name] = mf
}
// Load series metadata
meta = tx.Bucket([]byte("series"))
c = meta.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
series := &tsdb.Series{}
if err := series.UnmarshalBinary(v); err != nil {
return err
}
index.CreateSeriesIndexIfNotExists(tsdb.MeasurementFromSeriesKey(string(k)), series)
}
return nil
})
}
// WritePoints writes metadata and point data into the engine.
// Returns an error if new points are added to an existing key.
func (e *Engine) WritePoints(points []tsdb.Point, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) error {
// Write series & field metadata.
if err := e.db.Update(func(tx *bolt.Tx) error {
if err := e.writeSeries(tx, seriesToCreate); err != nil {
return fmt.Errorf("write series: %s", err)
}
if err := e.writeFields(tx, measurementFieldsToSave); err != nil {
return fmt.Errorf("write fields: %s", err)
}
return nil
}); err != nil {
return err
}
// Write points to the WAL.
if err := e.PointsWriter.WritePoints(points); err != nil {
return fmt.Errorf("write points: %s", err)
}
return nil
}
// writeSeries writes a list of series to the metadata.
func (e *Engine) writeSeries(tx *bolt.Tx, a []*tsdb.SeriesCreate) error {
// Ignore if there are no series.
if len(a) == 0 {
return nil
}
// Marshal and insert each series into the metadata.
b := tx.Bucket([]byte("series"))
for _, sc := range a {
// Marshal series into bytes.
data, err := sc.Series.MarshalBinary()
if err != nil {
return fmt.Errorf("marshal series: %s", err)
}
// Insert marshaled data into appropriate key.
if err := b.Put([]byte(sc.Series.Key), data); err != nil {
return fmt.Errorf("put: %s", err)
}
}
return nil
}
// writeFields writes a list of measurement fields to the metadata.
func (e *Engine) writeFields(tx *bolt.Tx, m map[string]*tsdb.MeasurementFields) error {
// Ignore if there are no fields to save.
if len(m) == 0 {
return nil
}
// Persist each measurement field in the map.
b := tx.Bucket([]byte("fields"))
for k, f := range m {
// Marshal field into bytes.
data, err := f.MarshalBinary()
if err != nil {
return fmt.Errorf("marshal measurement field: %s", err)
}
// Insert marshaled data into key.
if err := b.Put([]byte(k), data); err != nil {
return fmt.Errorf("put: %s", err)
}
}
return nil
}
// WriteIndex writes marshaled points to the engine's underlying index.
func (e *Engine) WriteIndex(pointsByKey map[string][][]byte) error {
return e.db.Update(func(tx *bolt.Tx) error {
for key, values := range pointsByKey {
if err := e.writeIndex(tx, key, values); err != nil {
return fmt.Errorf("write: key=%x, err=%s", key, err)
}
}
return nil
})
}
// writeIndex writes a set of points for a single key.
func (e *Engine) writeIndex(tx *bolt.Tx, key string, a [][]byte) error {
// Ignore if there are no points.
if len(a) == 0 {
return nil
}
// Create or retrieve series bucket.
bkt, err := tx.Bucket([]byte("points")).CreateBucketIfNotExists([]byte(key))
if err != nil {
return fmt.Errorf("create series bucket: %s", err)
}
c := bkt.Cursor()
// Ensure the slice is sorted before retrieving the time range.
a = DedupeEntries(a)
sort.Sort(byteSlices(a))
// Determine time range of new data.
tmin, tmax := int64(btou64(a[0][0:8])), int64(btou64(a[len(a)-1][0:8]))
// If tmin is after the last block then append new blocks.
//
// This is the optimized fast path. Otherwise we need to merge the points
// with existing blocks on disk and rewrite all the blocks for that range.
if k, v := c.Last(); k == nil || int64(btou64(v[0:8])) < tmin {
if err := e.writeBlocks(bkt, a); err != nil {
return fmt.Errorf("append blocks: %s", err)
}
}
// Generate map of inserted keys.
m := make(map[int64]struct{})
for _, b := range a {
m[int64(btou64(b[0:8]))] = struct{}{}
}
// If time range overlaps existing blocks then unpack full range and reinsert.
var existing [][]byte
for k, v := c.First(); k != nil; k, v = c.Next() {
// Determine block range.
bmin, bmax := int64(btou64(k)), int64(btou64(v[0:8]))
// Skip over all blocks before the time range.
// Exit once we reach a block that is beyond our time range.
if bmax < tmin {
continue
} else if bmin > tmax {
break
}
// Decode block.
buf, err := snappy.Decode(nil, v[8:])
if err != nil {
return fmt.Errorf("decode block: %s", err)
}
// Copy out any entries that aren't being overwritten.
for _, entry := range SplitEntries(buf) {
if _, ok := m[int64(btou64(entry[0:8]))]; !ok {
existing = append(existing, entry)
}
}
// Delete block in database.
c.Delete()
}
// Merge entries before rewriting.
a = append(existing, a...)
sort.Sort(byteSlices(a))
// Rewrite points to new blocks.
if err := e.writeBlocks(bkt, a); err != nil {
return fmt.Errorf("rewrite blocks: %s", err)
}
return nil
}
// writeBlocks writes point data to the bucket in blocks.
func (e *Engine) writeBlocks(bkt *bolt.Bucket, a [][]byte) error {
var block []byte
// Dedupe points by key.
a = DedupeEntries(a)
// Group points into blocks by size.
tmin, tmax := int64(math.MaxInt64), int64(math.MinInt64)
for i, p := range a {
// Update block time range.
timestamp := int64(btou64(p[0:8]))
if timestamp < tmin {
tmin = timestamp
}
if timestamp > tmax {
tmax = timestamp
}
// Append point to the end of the block.
block = append(block, p...)
// If the block is larger than the target block size or this is the
// last point then flush the block to the bucket.
if len(block) >= e.BlockSize || i == len(a)-1 {
// Encode block in the following format:
// tmax int64
// data []byte (snappy compressed)
value := append(u64tob(uint64(tmax)), snappy.Encode(nil, block)...)
// Write block to the bucket.
if err := bkt.Put(u64tob(uint64(tmin)), value); err != nil {
return fmt.Errorf("put: ts=%d-%d, err=%s", tmin, tmax, err)
}
// Reset the block & time range.
block = nil
tmin, tmax = int64(math.MaxInt64), int64(math.MinInt64)
}
}
return nil
}
// DeleteSeries deletes the series from the engine.
func (e *Engine) DeleteSeries(keys []string) error {
return e.db.Update(func(tx *bolt.Tx) error {
for _, k := range keys {
if err := tx.Bucket([]byte("series")).Delete([]byte(k)); err != nil {
return fmt.Errorf("delete series metadata: %s", err)
}
if err := tx.Bucket([]byte("points")).DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound {
return fmt.Errorf("delete series data: %s", err)
}
}
return nil
})
}
// DeleteMeasurement deletes a measurement and all related series.
func (e *Engine) DeleteMeasurement(name string, seriesKeys []string) error {
return e.db.Update(func(tx *bolt.Tx) error {
if err := tx.Bucket([]byte("fields")).Delete([]byte(name)); err != nil {
return err
}
for _, k := range seriesKeys {
if err := tx.Bucket([]byte("series")).Delete([]byte(k)); err != nil {
return fmt.Errorf("delete series metadata: %s", err)
}
if err := tx.Bucket([]byte("points")).DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound {
return fmt.Errorf("delete series data: %s", err)
}
}
return nil
})
}
// SeriesCount returns the number of series buckets on the shard.
func (e *Engine) SeriesCount() (n int, err error) {
err = e.db.View(func(tx *bolt.Tx) error {
c := tx.Bucket([]byte("points")).Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
n++
}
return nil
})
return
}
// Begin starts a new transaction on the engine.
func (e *Engine) Begin(writable bool) (tsdb.Tx, error) {
tx, err := e.db.Begin(writable)
if err != nil {
return nil, err
}
return &Tx{Tx: tx, engine: e}, nil
}
// Stats returns internal statistics for the engine.
func (e *Engine) Stats() (stats Stats, err error) {
err = e.db.View(func(tx *bolt.Tx) error {
stats.Size = tx.Size()
return nil
})
return stats, err
}
// Stats represents internal engine statistics.
type Stats struct {
Size int64 // BoltDB data size
}
// Tx represents a transaction.
type Tx struct {
*bolt.Tx
engine *Engine
}
// Cursor returns an iterator for a key.
func (tx *Tx) Cursor(key string) tsdb.Cursor {
// Retrieve points bucket. Ignore if there is no bucket.
b := tx.Bucket([]byte("points")).Bucket([]byte(key))
if b == nil {
return nil
}
return &Cursor{
cursor: b.Cursor(),
buf: make([]byte, DefaultBlockSize),
}
}
// Cursor provides ordered iteration across a series.
type Cursor struct {
cursor *bolt.Cursor
buf []byte // uncompressed buffer
off int // buffer offset
}
// Seek moves the cursor to a position and returns the closest key/value pair.
func (c *Cursor) Seek(seek []byte) (key, value []byte) {
// Move cursor to appropriate block and set to buffer.
_, v := c.cursor.Seek(seek)
c.setBuf(v)
// Read current block up to seek position.
c.seekBuf(seek)
// Return current entry.
return c.read()
}
// seekBuf moves the cursor to a position within the current buffer.
func (c *Cursor) seekBuf(seek []byte) (key, value []byte) {
for {
// Slice off the current entry.
buf := c.buf[c.off:]
// Exit if current entry's timestamp is on or after the seek.
if len(buf) == 0 || bytes.Compare(buf[0:8], seek) != -1 {
return
}
// Otherwise skip ahead to the next entry.
c.off += entryHeaderSize + entryDataSize(buf)
}
}
// Next returns the next key/value pair from the cursor.
func (c *Cursor) Next() (key, value []byte) {
// Ignore if there is no buffer.
if len(c.buf) == 0 {
return nil, nil
}
// Move forward to next entry.
c.off += entryHeaderSize + entryDataSize(c.buf[c.off:])
// If no items left then read first item from next block.
if c.off >= len(c.buf) {
_, v := c.cursor.Next()
c.setBuf(v)
}
return c.read()
}
// setBuf saves a compressed block to the buffer.
func (c *Cursor) setBuf(block []byte) {
// Clear if the block is empty.
if len(block) == 0 {
c.buf, c.off = c.buf[0:0], 0
return
}
// Otherwise decode block into buffer.
// Skip over the first 8 bytes since they are the max timestamp.
buf, err := snappy.Decode(nil, block[8:])
if err != nil {
c.buf = c.buf[0:0]
log.Printf("block decode error: %s", err)
}
c.buf, c.off = buf, 0
}
// read reads the current key and value from the current block.
func (c *Cursor) read() (key, value []byte) {
// Return nil if the offset is at the end of the buffer.
if c.off >= len(c.buf) {
return nil, nil
}
// Otherwise read the current entry.
buf := c.buf[c.off:]
dataSize := entryDataSize(buf)
return buf[0:8], buf[entryHeaderSize : entryHeaderSize+dataSize]
}
// MarshalEntry encodes point data into a single byte slice.
//
// The format of the byte slice is:
//
// uint64 timestamp
// uint32 data length
// []byte data
//
func MarshalEntry(timestamp int64, data []byte) []byte {
v := make([]byte, 8+4, 8+4+len(data))
binary.BigEndian.PutUint64(v[0:8], uint64(timestamp))
binary.BigEndian.PutUint32(v[8:12], uint32(len(data)))
v = append(v, data...)
return v
}
// UnmarshalEntry decodes an entry into it's separate parts.
// Returns the timestamp, data and the number of bytes read.
// Returned byte slices point to the original slice.
func UnmarshalEntry(v []byte) (timestamp int64, data []byte, n int) {
timestamp = int64(binary.BigEndian.Uint64(v[0:8]))
dataLen := binary.BigEndian.Uint32(v[8:12])
data = v[12+dataLen:]
return timestamp, data, 12 + int(dataLen)
}
// SplitEntries returns a slice of individual entries from one continuous set.
func SplitEntries(b []byte) [][]byte {
var a [][]byte
for {
// Exit if there's no more data left.
if len(b) == 0 {
return a
}
// Create slice that points to underlying entry.
dataSize := entryDataSize(b)
a = append(a, b[0:entryHeaderSize+dataSize])
// Move buffer forward.
b = b[entryHeaderSize+dataSize:]
}
}
// DedupeEntries returns slices with unique keys (the first 8 bytes).
func DedupeEntries(a [][]byte) [][]byte {
// Convert to a map where the last slice is used.
m := make(map[string][]byte)
for _, b := range a {
m[string(b[0:8])] = b
}
// Convert map back to a slice of byte slices.
other := make([][]byte, 0, len(m))
for _, v := range m {
other = append(other, v)
}
// Sort entries.
sort.Sort(byteSlices(other))
return other
}
// entryHeaderSize is the number of bytes required for the header.
const entryHeaderSize = 8 + 4
// entryDataSize returns the size of an entry's data field, in bytes.
func entryDataSize(v []byte) int { return int(binary.BigEndian.Uint32(v[8:12])) }
// u64tob converts a uint64 into an 8-byte slice.
func u64tob(v uint64) []byte {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, v)
return b
}
// btou64 converts an 8-byte slice into an uint64.
func btou64(b []byte) uint64 { return binary.BigEndian.Uint64(b) }
type byteSlices [][]byte
func (a byteSlices) Len() int { return len(a) }
func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 }

View File

@ -0,0 +1,439 @@
package bz1_test
import (
"bytes"
"encoding/binary"
"errors"
"io/ioutil"
"math/rand"
"os"
"reflect"
"sort"
"strconv"
"testing"
"testing/quick"
"time"
"github.com/influxdb/influxdb/tsdb"
"github.com/influxdb/influxdb/tsdb/engine/bz1"
)
// Ensure the engine can write series metadata and reload it.
func TestEngine_LoadMetadataIndex_Series(t *testing.T) {
e := OpenDefaultEngine()
defer e.Close()
// Setup nop mock.
e.PointsWriter.WritePointsFn = func(a []tsdb.Point) error { return nil }
// Write series metadata.
if err := e.WritePoints(nil, nil, []*tsdb.SeriesCreate{
{Series: &tsdb.Series{Key: string(tsdb.MakeKey([]byte("cpu"), map[string]string{"host": "server0"})), Tags: map[string]string{"host": "server0"}}},
{Series: &tsdb.Series{Key: string(tsdb.MakeKey([]byte("cpu"), map[string]string{"host": "server1"})), Tags: map[string]string{"host": "server1"}}},
{Series: &tsdb.Series{Key: "series with spaces"}},
}); err != nil {
t.Fatal(err)
}
// Load metadata index.
index := tsdb.NewDatabaseIndex()
if err := e.LoadMetadataIndex(index, make(map[string]*tsdb.MeasurementFields)); err != nil {
t.Fatal(err)
}
// Verify index is correct.
if m := index.Measurement("cpu"); m == nil {
t.Fatal("measurement not found")
} else if s := m.SeriesByID(1); s.Key != "cpu,host=server0" || !reflect.DeepEqual(s.Tags, map[string]string{"host": "server0"}) {
t.Fatalf("unexpected series: %q / %#v", s.Key, s.Tags)
} else if s = m.SeriesByID(2); s.Key != "cpu,host=server1" || !reflect.DeepEqual(s.Tags, map[string]string{"host": "server1"}) {
t.Fatalf("unexpected series: %q / %#v", s.Key, s.Tags)
}
if m := index.Measurement("series with spaces"); m == nil {
t.Fatal("measurement not found")
} else if s := m.SeriesByID(3); s.Key != "series with spaces" {
t.Fatalf("unexpected series: %q", s.Key)
}
}
// Ensure the engine can write field metadata and reload it.
func TestEngine_LoadMetadataIndex_Fields(t *testing.T) {
e := OpenDefaultEngine()
defer e.Close()
// Setup nop mock.
e.PointsWriter.WritePointsFn = func(a []tsdb.Point) error { return nil }
// Write series metadata.
if err := e.WritePoints(nil, map[string]*tsdb.MeasurementFields{
"cpu": &tsdb.MeasurementFields{
Fields: map[string]*tsdb.Field{
"value": &tsdb.Field{ID: 0, Name: "value"},
},
},
}, nil); err != nil {
t.Fatal(err)
}
// Load metadata index.
mfs := make(map[string]*tsdb.MeasurementFields)
if err := e.LoadMetadataIndex(tsdb.NewDatabaseIndex(), mfs); err != nil {
t.Fatal(err)
}
// Verify measurement field is correct.
if mf := mfs["cpu"]; mf == nil {
t.Fatal("measurement fields not found")
} else if !reflect.DeepEqual(mf.Fields, map[string]*tsdb.Field{"value": &tsdb.Field{ID: 0, Name: "value"}}) {
t.Fatalf("unexpected fields: %#v", mf.Fields)
}
}
// Ensure the engine can write points to storage.
func TestEngine_WritePoints_PointsWriter(t *testing.T) {
e := OpenDefaultEngine()
defer e.Close()
// Points to be inserted.
points := []tsdb.Point{
tsdb.NewPoint("cpu", tsdb.Tags{}, tsdb.Fields{}, time.Unix(0, 1)),
tsdb.NewPoint("cpu", tsdb.Tags{}, tsdb.Fields{}, time.Unix(0, 0)),
tsdb.NewPoint("cpu", tsdb.Tags{}, tsdb.Fields{}, time.Unix(1, 0)),
tsdb.NewPoint("cpu", tsdb.Tags{"host": "serverA"}, tsdb.Fields{}, time.Unix(0, 0)),
}
// Mock points writer to ensure points are passed through.
var invoked bool
e.PointsWriter.WritePointsFn = func(a []tsdb.Point) error {
invoked = true
if !reflect.DeepEqual(points, a) {
t.Fatalf("unexpected points: %#v", a)
}
return nil
}
// Write points against two separate series.
if err := e.WritePoints(points, nil, nil); err != nil {
t.Fatal(err)
} else if !invoked {
t.Fatal("PointsWriter.WritePoints() not called")
}
}
// Ensure the engine can return errors from the points writer.
func TestEngine_WritePoints_ErrPointsWriter(t *testing.T) {
e := OpenDefaultEngine()
defer e.Close()
// Ensure points writer returns an error.
e.PointsWriter.WritePointsFn = func(a []tsdb.Point) error { return errors.New("marker") }
// Write to engine.
if err := e.WritePoints(nil, nil, nil); err == nil || err.Error() != `write points: marker` {
t.Fatal(err)
}
}
// Ensure the engine can write points to the index.
func TestEngine_WriteIndex_Append(t *testing.T) {
e := OpenDefaultEngine()
defer e.Close()
// Append points to index.
if err := e.WriteIndex(map[string][][]byte{
"cpu": [][]byte{
bz1.MarshalEntry(1, []byte{0x10}),
bz1.MarshalEntry(2, []byte{0x20}),
},
"mem": [][]byte{
bz1.MarshalEntry(0, []byte{0x30}),
},
}); err != nil {
t.Fatal(err)
}
// Start transaction.
tx := e.MustBegin(false)
defer tx.Rollback()
// Iterate over "cpu" series.
c := tx.Cursor("cpu")
if k, v := c.Seek(u64tob(0)); !reflect.DeepEqual(k, []byte{0, 0, 0, 0, 0, 0, 0, 1}) || !reflect.DeepEqual(v, []byte{0x10}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = c.Next(); !reflect.DeepEqual(k, []byte{0, 0, 0, 0, 0, 0, 0, 2}) || !reflect.DeepEqual(v, []byte{0x20}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, _ = c.Next(); k != nil {
t.Fatalf("unexpected key/value: %x / %x", k, v)
}
// Iterate over "mem" series.
c = tx.Cursor("mem")
if k, v := c.Seek(u64tob(0)); !reflect.DeepEqual(k, []byte{0, 0, 0, 0, 0, 0, 0, 0}) || !reflect.DeepEqual(v, []byte{0x30}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, _ = c.Next(); k != nil {
t.Fatalf("unexpected key/value: %x / %x", k, v)
}
}
// Ensure the engine can rewrite blocks that contain the new point range.
func TestEngine_WriteIndex_Insert(t *testing.T) {
e := OpenDefaultEngine()
defer e.Close()
// Write initial points to index.
if err := e.WriteIndex(map[string][][]byte{
"cpu": [][]byte{
bz1.MarshalEntry(10, []byte{0x10}),
bz1.MarshalEntry(20, []byte{0x20}),
bz1.MarshalEntry(30, []byte{0x30}),
},
}); err != nil {
t.Fatal(err)
}
// Write overlapping points to index.
if err := e.WriteIndex(map[string][][]byte{
"cpu": [][]byte{
bz1.MarshalEntry(9, []byte{0x09}),
bz1.MarshalEntry(10, []byte{0xFF}),
bz1.MarshalEntry(25, []byte{0x25}),
bz1.MarshalEntry(31, []byte{0x31}),
},
}); err != nil {
t.Fatal(err)
}
// Write overlapping points to index again.
if err := e.WriteIndex(map[string][][]byte{
"cpu": [][]byte{
bz1.MarshalEntry(31, []byte{0xFF}),
},
}); err != nil {
t.Fatal(err)
}
// Start transaction.
tx := e.MustBegin(false)
defer tx.Rollback()
// Iterate over "cpu" series.
c := tx.Cursor("cpu")
if k, v := c.Seek(u64tob(0)); btou64(k) != 9 || !bytes.Equal(v, []byte{0x09}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = c.Next(); btou64(k) != 10 || !bytes.Equal(v, []byte{0xFF}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = c.Next(); btou64(k) != 20 || !bytes.Equal(v, []byte{0x20}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = c.Next(); btou64(k) != 25 || !bytes.Equal(v, []byte{0x25}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = c.Next(); btou64(k) != 30 || !bytes.Equal(v, []byte{0x30}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
} else if k, v = c.Next(); btou64(k) != 31 || !bytes.Equal(v, []byte{0xFF}) {
t.Fatalf("unexpected key/value: %x / %x", k, v)
}
}
// Ensure the engine ignores writes without keys.
func TestEngine_WriteIndex_NoKeys(t *testing.T) {
e := OpenDefaultEngine()
defer e.Close()
if err := e.WriteIndex(nil); err != nil {
t.Fatal(err)
}
}
// Ensure the engine ignores writes without points in a key.
func TestEngine_WriteIndex_NoPoints(t *testing.T) {
e := OpenDefaultEngine()
defer e.Close()
if err := e.WriteIndex(map[string][][]byte{"cpu": nil}); err != nil {
t.Fatal(err)
}
}
// Ensure the engine ignores writes without points in a key.
func TestEngine_WriteIndex_Quick(t *testing.T) {
if testing.Short() {
t.Skip("short mode")
}
quick.Check(func(sets []Points, blockSize int) bool {
e := OpenDefaultEngine()
e.BlockSize = blockSize % 1024 // 1KB max block size
defer e.Close()
// Write points to index in multiple sets.
for _, set := range sets {
if err := e.WriteIndex(map[string][][]byte(set)); err != nil {
t.Fatal(err)
}
}
// Merge all points together.
points := MergePoints(sets)
// Retrieve a sorted list of keys so results are deterministic.
keys := points.Keys()
// Start transaction to read index.
tx := e.MustBegin(false)
defer tx.Rollback()
// Iterate over results to ensure they are correct.
for _, key := range keys {
c := tx.Cursor(key)
// Read list of key/values.
var got [][]byte
for k, v := c.Seek(u64tob(0)); k != nil; k, v = c.Next() {
got = append(got, append(copyBytes(k), v...))
}
// Generate expected values.
// We need to remove the data length from the slice.
var exp [][]byte
for _, b := range points[key] {
exp = append(exp, append(copyBytes(b[0:8]), b[12:]...)) // remove data len
}
if !reflect.DeepEqual(got, exp) {
t.Fatalf("points: block size=%d, key=%s:\n\ngot=%x\n\nexp=%x\n\n", e.BlockSize, key, got, exp)
}
}
return true
}, nil)
}
// Engine represents a test wrapper for bz1.Engine.
type Engine struct {
*bz1.Engine
PointsWriter EnginePointsWriter
}
// NewEngine returns a new instance of Engine.
func NewEngine(opt tsdb.EngineOptions) *Engine {
// Generate temporary file.
f, _ := ioutil.TempFile("", "bz1-")
f.Close()
os.Remove(f.Name())
// Create test wrapper and attach mocks.
e := &Engine{
Engine: bz1.NewEngine(f.Name(), opt).(*bz1.Engine),
}
e.Engine.PointsWriter = &e.PointsWriter
return e
}
// OpenEngine returns an opened instance of Engine. Panic on error.
func OpenEngine(opt tsdb.EngineOptions) *Engine {
e := NewEngine(opt)
if err := e.Open(); err != nil {
panic(err)
}
return e
}
// OpenDefaultEngine returns an open Engine with default options.
func OpenDefaultEngine() *Engine { return OpenEngine(tsdb.NewEngineOptions()) }
// Close closes the engine and removes all data.
func (e *Engine) Close() error {
e.Engine.Close()
os.RemoveAll(e.Path())
return nil
}
// MustBegin returns a new tranaction. Panic on error.
func (e *Engine) MustBegin(writable bool) tsdb.Tx {
tx, err := e.Begin(writable)
if err != nil {
panic(err)
}
return tx
}
// EnginePointsWriter represents a mock that implements Engine.PointsWriter.
type EnginePointsWriter struct {
WritePointsFn func(points []tsdb.Point) error
}
func (w *EnginePointsWriter) WritePoints(points []tsdb.Point) error {
return w.WritePointsFn(points)
}
// Points represents a set of encoded points by key. Implements quick.Generator.
type Points map[string][][]byte
// Keys returns a sorted list of keys.
func (m Points) Keys() []string {
var keys []string
for k := range m {
keys = append(keys, k)
}
sort.Strings(keys)
return keys
}
func (Points) Generate(rand *rand.Rand, size int) reflect.Value {
// Generate series with a random number of points in each.
m := make(map[string][][]byte)
for i, seriesN := 0, rand.Intn(size); i < seriesN; i++ {
key := strconv.Itoa(rand.Intn(20))
// Generate points for the series.
for j, pointN := 0, rand.Intn(size); j < pointN; j++ {
timestamp := time.Unix(0, 0).Add(time.Duration(rand.Intn(100)))
data, ok := quick.Value(reflect.TypeOf([]byte(nil)), rand)
if !ok {
panic("cannot generate data")
}
m[key] = append(m[key], bz1.MarshalEntry(timestamp.UnixNano(), data.Interface().([]byte)))
}
}
return reflect.ValueOf(Points(m))
}
// MergePoints returns a map of all points merged together by key.
// Later points will overwrite earlier ones.
func MergePoints(a []Points) Points {
// Combine all points into one set.
m := make(Points)
for _, set := range a {
for key, values := range set {
m[key] = append(m[key], values...)
}
}
// Dedupe points.
for key, values := range m {
m[key] = bz1.DedupeEntries(values)
}
return m
}
// copyBytes returns a copy of a byte slice.
func copyBytes(b []byte) []byte {
if b == nil {
return nil
}
other := make([]byte, len(b))
copy(other, b)
return other
}
// u64tob converts a uint64 into an 8-byte slice.
func u64tob(v uint64) []byte {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, v)
return b
}
// btou64 converts an 8-byte slice into an uint64.
func btou64(b []byte) uint64 { return binary.BigEndian.Uint64(b) }

View File

@ -0,0 +1,6 @@
package engine
import (
_ "github.com/influxdb/influxdb/tsdb/engine/b1"
_ "github.com/influxdb/influxdb/tsdb/engine/bz1"
)

View File

@ -1,815 +1,3 @@
package tsdb
package tsdb_test
import (
"fmt"
"io/ioutil"
"math"
"os"
"testing"
"time"
"github.com/influxdb/influxdb/influxql"
"github.com/influxdb/influxdb/meta"
)
var sID0 = uint64(1)
var sID1 = uint64(2)
var sgID1 = uint64(3)
var sgID2 = uint64(4)
var nID = uint64(42)
// Simple test to ensure data can be read from two shards.
func TestWritePointsAndExecuteTwoShards(t *testing.T) {
// Create the mock planner and its metastore
store, query_executor := testStoreAndQueryExecutor()
defer os.RemoveAll(store.path)
query_executor.MetaStore = &testQEMetastore{
sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) {
return []meta.ShardGroupInfo{
{
ID: sgID,
StartTime: time.Now().Add(-time.Hour),
EndTime: time.Now().Add(time.Hour),
Shards: []meta.ShardInfo{
{
ID: uint64(sID0),
OwnerIDs: []uint64{nID},
},
},
},
{
ID: sgID,
StartTime: time.Now().Add(-2 * time.Hour),
EndTime: time.Now().Add(-time.Hour),
Shards: []meta.ShardInfo{
{
ID: uint64(sID1),
OwnerIDs: []uint64{nID},
},
},
},
}, nil
},
}
// Write two points across shards.
pt1time := time.Unix(1, 0).UTC()
if err := store.WriteToShard(sID0, []Point{NewPoint(
"cpu",
map[string]string{"host": "serverA", "region": "us-east"},
map[string]interface{}{"value": 100},
pt1time,
)}); err != nil {
t.Fatalf(err.Error())
}
pt2time := time.Unix(2, 0).UTC()
if err := store.WriteToShard(sID1, []Point{NewPoint(
"cpu",
map[string]string{"host": "serverB", "region": "us-east"},
map[string]interface{}{"value": 200},
pt2time,
)}); err != nil {
t.Fatalf(err.Error())
}
var tests = []struct {
skip bool // Skip test
stmt string // Query statement
chunkSize int // Chunk size for driving the executor
expected string // Expected results, rendered as a string
}{
{
stmt: `SELECT value FROM cpu`,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu`,
chunkSize: 1,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu LIMIT 1`,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`,
},
{
stmt: `SELECT value FROM cpu LIMIT 1`,
chunkSize: 2,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`,
},
{
stmt: `SELECT value FROM cpu WHERE host='serverA'`,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`,
},
{
stmt: `SELECT value FROM cpu WHERE host='serverB'`,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu WHERE host='serverC'`,
expected: `null`,
},
{
stmt: `SELECT value FROM cpu GROUP BY host`,
expected: `[{"name":"cpu","tags":{"host":"serverA"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","tags":{"host":"serverB"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu GROUP BY region`,
expected: `[{"name":"cpu","tags":{"region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu GROUP BY host,region`,
expected: `[{"name":"cpu","tags":{"host":"serverA","region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","tags":{"host":"serverB","region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu WHERE host='serverA' GROUP BY host`,
expected: `[{"name":"cpu","tags":{"host":"serverA"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`,
},
// Aggregate queries.
{
stmt: `SELECT sum(value) FROM cpu`,
expected: `[{"name":"cpu","columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",300]]}]`,
},
}
for _, tt := range tests {
if tt.skip {
t.Logf("Skipping test %s", tt.stmt)
continue
}
executor, err := query_executor.plan(mustParseSelectStatement(tt.stmt), tt.chunkSize)
if err != nil {
t.Fatalf("failed to plan query: %s", err.Error())
}
got := executeAndGetResults(executor)
if got != tt.expected {
t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got)
}
}
}
// Test that executor correctly orders data across shards.
func TestWritePointsAndExecuteTwoShardsAlign(t *testing.T) {
// Create the mock planner and its metastore
store, query_executor := testStoreAndQueryExecutor()
defer os.RemoveAll(store.path)
query_executor.MetaStore = &testQEMetastore{
sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) {
return []meta.ShardGroupInfo{
{
ID: sgID,
StartTime: time.Now().Add(-2 * time.Hour),
EndTime: time.Now().Add(-time.Hour),
Shards: []meta.ShardInfo{
{
ID: uint64(sID1),
OwnerIDs: []uint64{nID},
},
},
},
{
ID: sgID,
StartTime: time.Now().Add(-2 * time.Hour),
EndTime: time.Now().Add(time.Hour),
Shards: []meta.ShardInfo{
{
ID: uint64(sID0),
OwnerIDs: []uint64{nID},
},
},
},
}, nil
},
}
// Write interleaving, by time, chunks to the shards.
if err := store.WriteToShard(sID0, []Point{NewPoint(
"cpu",
map[string]string{"host": "serverA"},
map[string]interface{}{"value": 100},
time.Unix(1, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
if err := store.WriteToShard(sID1, []Point{NewPoint(
"cpu",
map[string]string{"host": "serverB"},
map[string]interface{}{"value": 200},
time.Unix(2, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
if err := store.WriteToShard(sID1, []Point{NewPoint(
"cpu",
map[string]string{"host": "serverA"},
map[string]interface{}{"value": 300},
time.Unix(3, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
var tests = []struct {
skip bool // Skip test
stmt string // Query statement
chunkSize int // Chunk size for driving the executor
expected string // Expected results, rendered as a string
}{
{
stmt: `SELECT value FROM cpu`,
chunkSize: 1,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:03Z",300]]}]`,
},
{
stmt: `SELECT value FROM cpu`,
chunkSize: 2,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:03Z",300]]}]`,
},
{
stmt: `SELECT mean(value),sum(value) FROM cpu`,
chunkSize: 2,
expected: `[{"name":"cpu","columns":["time","mean","sum"],"values":[["1970-01-01T00:00:00Z",200,600]]}]`,
},
}
for _, tt := range tests {
if tt.skip {
t.Logf("Skipping test %s", tt.stmt)
continue
}
executor, err := query_executor.plan(mustParseSelectStatement(tt.stmt), tt.chunkSize)
if err != nil {
t.Fatalf("failed to plan query: %s", err.Error())
}
got := executeAndGetResults(executor)
if got != tt.expected {
t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got)
}
}
}
// Test that executor correctly orders data across shards when the tagsets
// are not presented in alphabetically order across shards.
func TestWritePointsAndExecuteTwoShardsTagSetOrdering(t *testing.T) {
// Create the mock planner and its metastore
store, query_executor := testStoreAndQueryExecutor()
defer os.RemoveAll(store.path)
query_executor.MetaStore = &testQEMetastore{
sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) {
return []meta.ShardGroupInfo{
{
ID: sgID,
Shards: []meta.ShardInfo{
{
ID: uint64(sID0),
OwnerIDs: []uint64{nID},
},
},
},
{
ID: sgID,
Shards: []meta.ShardInfo{
{
ID: uint64(sID1),
OwnerIDs: []uint64{nID},
},
},
},
}, nil
},
}
// Write tagsets "y" and "z" to first shard.
if err := store.WriteToShard(sID0, []Point{NewPoint(
"cpu",
map[string]string{"host": "y"},
map[string]interface{}{"value": 100},
time.Unix(1, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
if err := store.WriteToShard(sID0, []Point{NewPoint(
"cpu",
map[string]string{"host": "z"},
map[string]interface{}{"value": 200},
time.Unix(1, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
// Write tagsets "x", y" and "z" to second shard.
if err := store.WriteToShard(sID1, []Point{NewPoint(
"cpu",
map[string]string{"host": "x"},
map[string]interface{}{"value": 300},
time.Unix(2, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
if err := store.WriteToShard(sID1, []Point{NewPoint(
"cpu",
map[string]string{"host": "y"},
map[string]interface{}{"value": 400},
time.Unix(3, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
if err := store.WriteToShard(sID1, []Point{NewPoint(
"cpu",
map[string]string{"host": "z"},
map[string]interface{}{"value": 500},
time.Unix(3, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
var tests = []struct {
skip bool // Skip test
stmt string // Query statement
chunkSize int // Chunk size for driving the executor
expected string // Expected results, rendered as a string
}{
{
stmt: `SELECT sum(value) FROM cpu GROUP BY host`,
expected: `[{"name":"cpu","tags":{"host":"x"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",300]]},{"name":"cpu","tags":{"host":"y"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",500]]},{"name":"cpu","tags":{"host":"z"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",700]]}]`,
},
{
stmt: `SELECT value FROM cpu GROUP BY host`,
expected: `[{"name":"cpu","tags":{"host":"x"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",300]]},{"name":"cpu","tags":{"host":"y"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:03Z",400]]},{"name":"cpu","tags":{"host":"z"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",200],["1970-01-01T00:00:03Z",500]]}]`,
},
}
for _, tt := range tests {
if tt.skip {
t.Logf("Skipping test %s", tt.stmt)
continue
}
executor, err := query_executor.plan(mustParseSelectStatement(tt.stmt), tt.chunkSize)
if err != nil {
t.Fatalf("failed to plan query: %s", err.Error())
}
got := executeAndGetResults(executor)
if got != tt.expected {
t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got)
}
}
}
// TestProccessAggregateDerivative tests the rawQueryDerivativeProcessor transformation function on the engine.
// The is called for a query with a GROUP BY.
func TestProcessAggregateDerivative(t *testing.T) {
tests := []struct {
name string
fn string
interval time.Duration
in [][]interface{}
exp [][]interface{}
}{
{
name: "empty input",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{},
exp: [][]interface{}{},
},
{
name: "single row returns 0.0",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 0.0,
},
},
},
{
name: "basic derivative",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 3.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 5.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 9.0,
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
},
{
name: "12h interval",
fn: "derivative",
interval: 12 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 3.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 0.5,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 0.5,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 0.5,
},
},
},
{
name: "negative derivatives",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 0.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), -2.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
},
{
name: "negative derivatives",
fn: "non_negative_derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 2.0,
},
// Show resultes in negative derivative
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 0.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
},
{
name: "float derivatives",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), int64(3),
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), int64(5),
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), int64(9),
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
},
}
for _, test := range tests {
got := processAggregateDerivative(test.in, test.fn == "non_negative_derivative", test.interval)
if len(got) != len(test.exp) {
t.Fatalf("processAggregateDerivative(%s) - %s\nlen mismatch: got %d, exp %d", test.fn, test.name, len(got), len(test.exp))
}
for i := 0; i < len(test.exp); i++ {
if test.exp[i][0] != got[i][0] || test.exp[i][1] != got[i][1] {
t.Fatalf("processAggregateDerivative - %s results mismatch:\ngot %v\nexp %v", test.name, got, test.exp)
}
}
}
}
// TestProcessRawQueryDerivative tests the rawQueryDerivativeProcessor transformation function on the engine.
// The is called for a queries that do not have a group by.
func TestProcessRawQueryDerivative(t *testing.T) {
tests := []struct {
name string
fn string
interval time.Duration
in []*mapperValue
exp []*mapperValue
}{
{
name: "empty input",
fn: "derivative",
interval: 24 * time.Hour,
in: []*mapperValue{},
exp: []*mapperValue{},
},
{
name: "single row returns 0.0",
fn: "derivative",
interval: 24 * time.Hour,
in: []*mapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 1.0,
},
},
exp: []*mapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 0.0,
},
},
},
{
name: "basic derivative",
fn: "derivative",
interval: 24 * time.Hour,
in: []*mapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 0.0,
},
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 3.0,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 5.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 9.0,
},
},
exp: []*mapperValue{
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 3.0,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 2.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
},
{
name: "12h interval",
fn: "derivative",
interval: 12 * time.Hour,
in: []*mapperValue{
{
Time: time.Unix(0, 0).UnixNano(),
Value: 1.0,
},
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 2.0,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 3.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
exp: []*mapperValue{
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 0.5,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 0.5,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 0.5,
},
},
},
{
name: "negative derivatives",
fn: "derivative",
interval: 24 * time.Hour,
in: []*mapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 1.0,
},
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 2.0,
},
// should go negative
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 0.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
exp: []*mapperValue{
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 1.0,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: -2.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
},
{
name: "negative derivatives",
fn: "non_negative_derivative",
interval: 24 * time.Hour,
in: []*mapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 1.0,
},
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 2.0,
},
// should go negative
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 0.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
exp: []*mapperValue{
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 1.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
},
}
for _, test := range tests {
p := rawQueryDerivativeProcessor{
isNonNegative: test.fn == "non_negative_derivative",
derivativeInterval: test.interval,
}
got := p.process(test.in)
if len(got) != len(test.exp) {
t.Fatalf("rawQueryDerivativeProcessor(%s) - %s\nlen mismatch: got %d, exp %d", test.fn, test.name, len(got), len(test.exp))
}
for i := 0; i < len(test.exp); i++ {
fmt.Println("Times:", test.exp[i].Time, got[i].Time)
if test.exp[i].Time != got[i].Time || math.Abs((test.exp[i].Value.(float64)-got[i].Value.(float64))) > 0.0000001 {
t.Fatalf("rawQueryDerivativeProcessor - %s results mismatch:\ngot %v\nexp %v", test.name, got, test.exp)
}
}
}
}
type testQEMetastore struct {
sgFunc func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error)
}
func (t *testQEMetastore) ShardGroupsByTimeRange(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) {
return t.sgFunc(database, policy, min, max)
}
func (t *testQEMetastore) Database(name string) (*meta.DatabaseInfo, error) { return nil, nil }
func (t *testQEMetastore) Databases() ([]meta.DatabaseInfo, error) { return nil, nil }
func (t *testQEMetastore) User(name string) (*meta.UserInfo, error) { return nil, nil }
func (t *testQEMetastore) AdminUserExists() (bool, error) { return false, nil }
func (t *testQEMetastore) Authenticate(username, password string) (*meta.UserInfo, error) {
return nil, nil
}
func (t *testQEMetastore) RetentionPolicy(database, name string) (rpi *meta.RetentionPolicyInfo, err error) {
return nil, nil
}
func (t *testQEMetastore) UserCount() (int, error) { return 0, nil }
func (t *testQEMetastore) NodeID() uint64 { return nID }
func testStoreAndQueryExecutor() (*Store, *QueryExecutor) {
path, _ := ioutil.TempDir("", "")
store := NewStore(path)
err := store.Open()
if err != nil {
panic(err)
}
database := "foo"
retentionPolicy := "bar"
store.CreateShard(database, retentionPolicy, sID0)
store.CreateShard(database, retentionPolicy, sID1)
query_executor := NewQueryExecutor(store)
query_executor.ShardMapper = &testQEShardMapper{store}
return store, query_executor
}
type testQEShardMapper struct {
store *Store
}
func (t *testQEShardMapper) CreateMapper(shard meta.ShardInfo, stmt string, chunkSize int) (Mapper, error) {
return t.store.CreateMapper(shard.ID, stmt, chunkSize)
}
func executeAndGetResults(executor Executor) string {
ch := executor.Execute()
var rows []*influxql.Row
for r := range ch {
rows = append(rows, r)
}
return string(mustMarshalJSON(rows))
}
import _ "github.com/influxdb/influxdb/tsdb/engine"

View File

@ -0,0 +1,981 @@
package tsdb
import (
"fmt"
"math"
"sort"
"time"
"github.com/influxdb/influxdb/influxql"
)
const (
// Return an error if the user is trying to select more than this number of points in a group by statement.
// Most likely they specified a group by interval without time boundaries.
MaxGroupByPoints = 100000
// Since time is always selected, the column count when selecting only a single other value will be 2
SelectColumnCountWithOneValue = 2
// IgnoredChunkSize is what gets passed into Mapper.Begin for aggregate queries as they don't chunk points out
IgnoredChunkSize = 0
)
// Mapper is the interface all Mapper types must implement.
type Mapper interface {
Open() error
TagSets() []string
Fields() []string
NextChunk() (interface{}, error)
Close()
}
// StatefulMapper encapsulates a Mapper and some state that the executor needs to
// track for that mapper.
type StatefulMapper struct {
Mapper
bufferedChunk *MapperOutput // Last read chunk.
drained bool
}
// NextChunk wraps a RawMapper and some state.
func (sm *StatefulMapper) NextChunk() (*MapperOutput, error) {
c, err := sm.Mapper.NextChunk()
if err != nil {
return nil, err
}
chunk, ok := c.(*MapperOutput)
if !ok {
if chunk == interface{}(nil) {
return nil, nil
}
}
return chunk, nil
}
type Executor struct {
stmt *influxql.SelectStatement
mappers []*StatefulMapper
chunkSize int
limitedTagSets map[string]struct{} // Set tagsets for which data has reached the LIMIT.
}
// NewExecutor returns a new Executor.
func NewExecutor(stmt *influxql.SelectStatement, mappers []Mapper, chunkSize int) *Executor {
a := []*StatefulMapper{}
for _, m := range mappers {
a = append(a, &StatefulMapper{m, nil, false})
}
return &Executor{
stmt: stmt,
mappers: a,
chunkSize: chunkSize,
limitedTagSets: make(map[string]struct{}),
}
}
// Execute begins execution of the query and returns a channel to receive rows.
func (e *Executor) Execute() <-chan *influxql.Row {
// Create output channel and stream data in a separate goroutine.
out := make(chan *influxql.Row, 0)
// Certain operations on the SELECT statement can be performed by the Executor without
// assistance from the Mappers. This allows the Executor to prepare aggregation functions
// and mathematical functions.
e.stmt.RewriteDistinct()
if (e.stmt.IsRawQuery && !e.stmt.HasDistinct()) || e.stmt.IsSimpleDerivative() {
go e.executeRaw(out)
} else {
go e.executeAggregate(out)
}
return out
}
// mappersDrained returns whether all the executors Mappers have been drained of data.
func (e *Executor) mappersDrained() bool {
for _, m := range e.mappers {
if !m.drained {
return false
}
}
return true
}
// nextMapperTagset returns the alphabetically lowest tagset across all Mappers.
func (e *Executor) nextMapperTagSet() string {
tagset := ""
for _, m := range e.mappers {
if m.bufferedChunk != nil {
if tagset == "" {
tagset = m.bufferedChunk.key()
} else if m.bufferedChunk.key() < tagset {
tagset = m.bufferedChunk.key()
}
}
}
return tagset
}
// nextMapperLowestTime returns the lowest minimum time across all Mappers, for the given tagset.
func (e *Executor) nextMapperLowestTime(tagset string) int64 {
minTime := int64(math.MaxInt64)
for _, m := range e.mappers {
if !m.drained && m.bufferedChunk != nil {
if m.bufferedChunk.key() != tagset {
continue
}
t := m.bufferedChunk.Values[len(m.bufferedChunk.Values)-1].Time
if t < minTime {
minTime = t
}
}
}
return minTime
}
// tagSetIsLimited returns whether data for the given tagset has been LIMITed.
func (e *Executor) tagSetIsLimited(tagset string) bool {
_, ok := e.limitedTagSets[tagset]
return ok
}
// limitTagSet marks the given taset as LIMITed.
func (e *Executor) limitTagSet(tagset string) {
e.limitedTagSets[tagset] = struct{}{}
}
func (e *Executor) executeRaw(out chan *influxql.Row) {
// It's important that all resources are released when execution completes.
defer e.close()
// Open the mappers.
for _, m := range e.mappers {
if err := m.Open(); err != nil {
out <- &influxql.Row{Err: err}
return
}
}
// Get the distinct fields across all mappers.
var selectFields, aliasFields []string
if e.stmt.HasWildcard() {
sf := newStringSet()
for _, m := range e.mappers {
sf.add(m.Fields()...)
}
selectFields = sf.list()
aliasFields = selectFields
} else {
selectFields = e.stmt.Fields.Names()
aliasFields = e.stmt.Fields.AliasNames()
}
// Used to read ahead chunks from mappers.
var rowWriter *limitedRowWriter
var currTagset string
// Keep looping until all mappers drained.
var err error
for {
// Get the next chunk from each Mapper.
for _, m := range e.mappers {
if m.drained {
continue
}
// Set the next buffered chunk on the mapper, or mark it drained.
for {
if m.bufferedChunk == nil {
m.bufferedChunk, err = m.NextChunk()
if err != nil {
out <- &influxql.Row{Err: err}
return
}
if m.bufferedChunk == nil {
// Mapper can do no more for us.
m.drained = true
break
}
// If the SELECT query is on more than 1 field, but the chunks values from the Mappers
// only contain a single value, create k-v pairs using the field name of the chunk
// and the value of the chunk. If there is only 1 SELECT field across all mappers then
// there is no need to create k-v pairs, and there is no need to distinguish field data,
// as it is all for the *same* field.
if len(selectFields) > 1 && len(m.bufferedChunk.Fields) == 1 {
fieldKey := m.bufferedChunk.Fields[0]
for i := range m.bufferedChunk.Values {
field := map[string]interface{}{fieldKey: m.bufferedChunk.Values[i].Value}
m.bufferedChunk.Values[i].Value = field
}
}
}
if e.tagSetIsLimited(m.bufferedChunk.Name) {
// chunk's tagset is limited, so no good. Try again.
m.bufferedChunk = nil
continue
}
// This mapper has a chunk available, and it is not limited.
break
}
}
// All Mappers done?
if e.mappersDrained() {
rowWriter.Flush()
break
}
// Send out data for the next alphabetically-lowest tagset. All Mappers emit data in this order,
// so by always continuing with the lowest tagset until it is finished, we process all data in
// the required order, and don't "miss" any.
tagset := e.nextMapperTagSet()
if tagset != currTagset {
currTagset = tagset
// Tagset has changed, time for a new rowWriter. Be sure to kick out any residual values.
rowWriter.Flush()
rowWriter = nil
}
// Process the mapper outputs. We can send out everything up to the min of the last time
// of the chunks for the next tagset.
minTime := e.nextMapperLowestTime(tagset)
// Now empty out all the chunks up to the min time. Create new output struct for this data.
var chunkedOutput *MapperOutput
for _, m := range e.mappers {
if m.drained {
continue
}
// This mapper's next chunk is not for the next tagset, or the very first value of
// the chunk is at a higher acceptable timestamp. Skip it.
if m.bufferedChunk.key() != tagset || m.bufferedChunk.Values[0].Time > minTime {
continue
}
// Find the index of the point up to the min.
ind := len(m.bufferedChunk.Values)
for i, mo := range m.bufferedChunk.Values {
if mo.Time > minTime {
ind = i
break
}
}
// Add up to the index to the values
if chunkedOutput == nil {
chunkedOutput = &MapperOutput{
Name: m.bufferedChunk.Name,
Tags: m.bufferedChunk.Tags,
cursorKey: m.bufferedChunk.key(),
}
chunkedOutput.Values = m.bufferedChunk.Values[:ind]
} else {
chunkedOutput.Values = append(chunkedOutput.Values, m.bufferedChunk.Values[:ind]...)
}
// Clear out the values being sent out, keep the remainder.
m.bufferedChunk.Values = m.bufferedChunk.Values[ind:]
// If we emptied out all the values, clear the mapper's buffered chunk.
if len(m.bufferedChunk.Values) == 0 {
m.bufferedChunk = nil
}
}
// Sort the values by time first so we can then handle offset and limit
sort.Sort(MapperValues(chunkedOutput.Values))
// Now that we have full name and tag details, initialize the rowWriter.
// The Name and Tags will be the same for all mappers.
if rowWriter == nil {
rowWriter = &limitedRowWriter{
limit: e.stmt.Limit,
offset: e.stmt.Offset,
chunkSize: e.chunkSize,
name: chunkedOutput.Name,
tags: chunkedOutput.Tags,
selectNames: selectFields,
aliasNames: aliasFields,
fields: e.stmt.Fields,
c: out,
}
}
if e.stmt.HasDerivative() {
interval, err := derivativeInterval(e.stmt)
if err != nil {
out <- &influxql.Row{Err: err}
return
}
rowWriter.transformer = &RawQueryDerivativeProcessor{
IsNonNegative: e.stmt.FunctionCalls()[0].Name == "non_negative_derivative",
DerivativeInterval: interval,
}
}
// Emit the data via the limiter.
if limited := rowWriter.Add(chunkedOutput.Values); limited {
// Limit for this tagset was reached, mark it and start draining a new tagset.
e.limitTagSet(chunkedOutput.key())
continue
}
}
close(out)
}
func (e *Executor) executeAggregate(out chan *influxql.Row) {
// It's important to close all resources when execution completes.
defer e.close()
// Create the functions which will reduce values from mappers for
// a given interval. The function offsets within this slice match
// the offsets within the value slices that are returned by the
// mapper.
aggregates := e.stmt.FunctionCalls()
reduceFuncs := make([]influxql.ReduceFunc, len(aggregates))
for i, c := range aggregates {
reduceFunc, err := influxql.InitializeReduceFunc(c)
if err != nil {
out <- &influxql.Row{Err: err}
return
}
reduceFuncs[i] = reduceFunc
}
// Put together the rows to return, starting with columns.
columnNames := make([]string, len(e.stmt.Fields)+1)
columnNames[0] = "time"
for i, f := range e.stmt.Fields {
columnNames[i+1] = f.Name()
}
// Open the mappers.
for _, m := range e.mappers {
if err := m.Open(); err != nil {
out <- &influxql.Row{Err: err}
return
}
}
// Build the set of available tagsets across all mappers. This is used for
// later checks.
availTagSets := newStringSet()
for _, m := range e.mappers {
for _, t := range m.TagSets() {
availTagSets.add(t)
}
}
// Prime each mapper's chunk buffer.
var err error
for _, m := range e.mappers {
m.bufferedChunk, err = m.NextChunk()
if err != nil {
out <- &influxql.Row{Err: err}
return
}
if m.bufferedChunk == nil {
m.drained = true
}
}
// Keep looping until all mappers drained.
for !e.mappersDrained() {
// Send out data for the next alphabetically-lowest tagset. All Mappers send out in this order
// so collect data for this tagset, ignoring all others.
tagset := e.nextMapperTagSet()
chunks := []*MapperOutput{}
// Pull as much as possible from each mapper. Stop when a mapper offers
// data for a new tagset, or empties completely.
for _, m := range e.mappers {
if m.drained {
continue
}
for {
if m.bufferedChunk == nil {
m.bufferedChunk, err = m.NextChunk()
if err != nil {
out <- &influxql.Row{Err: err}
return
}
if m.bufferedChunk == nil {
m.drained = true
break
}
}
// Got a chunk. Can we use it?
if m.bufferedChunk.key() != tagset {
// No, so just leave it in the buffer.
break
}
// We can, take it.
chunks = append(chunks, m.bufferedChunk)
m.bufferedChunk = nil
}
}
// Prep a row, ready for kicking out.
var row *influxql.Row
// Prep for bucketing data by start time of the interval.
buckets := map[int64][][]interface{}{}
for _, chunk := range chunks {
if row == nil {
row = &influxql.Row{
Name: chunk.Name,
Tags: chunk.Tags,
Columns: columnNames,
}
}
startTime := chunk.Values[0].Time
_, ok := buckets[startTime]
values := chunk.Values[0].Value.([]interface{})
if !ok {
buckets[startTime] = make([][]interface{}, len(values))
}
for i, v := range values {
buckets[startTime][i] = append(buckets[startTime][i], v)
}
}
// Now, after the loop above, within each time bucket is a slice. Within the element of each
// slice is another slice of interface{}, ready for passing to the reducer functions.
// Work each bucket of time, in time ascending order.
tMins := make(int64arr, 0, len(buckets))
for k, _ := range buckets {
tMins = append(tMins, k)
}
sort.Sort(tMins)
values := make([][]interface{}, len(tMins))
for i, t := range tMins {
values[i] = make([]interface{}, 0, len(columnNames))
values[i] = append(values[i], time.Unix(0, t).UTC()) // Time value is always first.
for j, f := range reduceFuncs {
reducedVal := f(buckets[t][j])
values[i] = append(values[i], reducedVal)
}
}
// Perform any mathematics.
values = processForMath(e.stmt.Fields, values)
// Handle any fill options
values = e.processFill(values)
// process derivatives
values = e.processDerivative(values)
// If we have multiple tag sets we'll want to filter out the empty ones
if len(availTagSets) > 1 && resultsEmpty(values) {
continue
}
row.Values = values
out <- row
}
close(out)
}
// processFill will take the results and return new results (or the same if no fill modifications are needed)
// with whatever fill options the query has.
func (e *Executor) processFill(results [][]interface{}) [][]interface{} {
// don't do anything if we're supposed to leave the nulls
if e.stmt.Fill == influxql.NullFill {
return results
}
if e.stmt.Fill == influxql.NoFill {
// remove any rows that have even one nil value. This one is tricky because they could have multiple
// aggregates, but this option means that any row that has even one nil gets purged.
newResults := make([][]interface{}, 0, len(results))
for _, vals := range results {
hasNil := false
// start at 1 because the first value is always time
for j := 1; j < len(vals); j++ {
if vals[j] == nil {
hasNil = true
break
}
}
if !hasNil {
newResults = append(newResults, vals)
}
}
return newResults
}
// They're either filling with previous values or a specific number
for i, vals := range results {
// start at 1 because the first value is always time
for j := 1; j < len(vals); j++ {
if vals[j] == nil {
switch e.stmt.Fill {
case influxql.PreviousFill:
if i != 0 {
vals[j] = results[i-1][j]
}
case influxql.NumberFill:
vals[j] = e.stmt.FillValue
}
}
}
}
return results
}
// processDerivative returns the derivatives of the results
func (e *Executor) processDerivative(results [][]interface{}) [][]interface{} {
// Return early if we're not supposed to process the derivatives
if e.stmt.HasDerivative() {
interval, err := derivativeInterval(e.stmt)
if err != nil {
return results // XXX need to handle this better.
}
// Determines whether to drop negative differences
isNonNegative := e.stmt.FunctionCalls()[0].Name == "non_negative_derivative"
return ProcessAggregateDerivative(results, isNonNegative, interval)
}
return results
}
// Close closes the executor such that all resources are released. Once closed,
// an executor may not be re-used.
func (e *Executor) close() {
if e != nil {
for _, m := range e.mappers {
m.Close()
}
}
}
// limitedRowWriter accepts raw mapper values, and will emit those values as rows in chunks
// of the given size. If the chunk size is 0, no chunking will be performed. In addiiton if
// limit is reached, outstanding values will be emitted. If limit is zero, no limit is enforced.
type limitedRowWriter struct {
chunkSize int
limit int
offset int
name string
tags map[string]string
fields influxql.Fields
selectNames []string
aliasNames []string
c chan *influxql.Row
currValues []*MapperValue
totalOffSet int
totalSent int
transformer interface {
Process(input []*MapperValue) []*MapperValue
}
}
// Add accepts a slice of values, and will emit those values as per chunking requirements.
// If limited is returned as true, the limit was also reached and no more values should be
// added. In that case only up the limit of values are emitted.
func (r *limitedRowWriter) Add(values []*MapperValue) (limited bool) {
if r.currValues == nil {
r.currValues = make([]*MapperValue, 0, r.chunkSize)
}
// Enforce offset.
if r.totalOffSet < r.offset {
// Still some offsetting to do.
offsetRequired := r.offset - r.totalOffSet
if offsetRequired >= len(values) {
r.totalOffSet += len(values)
return false
} else {
// Drop leading values and keep going.
values = values[offsetRequired:]
r.totalOffSet += offsetRequired
}
}
r.currValues = append(r.currValues, values...)
// Check limit.
limitReached := r.limit > 0 && r.totalSent+len(r.currValues) >= r.limit
if limitReached {
// Limit will be satified with current values. Truncate 'em.
r.currValues = r.currValues[:r.limit-r.totalSent]
}
// Is chunking in effect?
if r.chunkSize != IgnoredChunkSize {
// Chunking level reached?
for len(r.currValues) >= r.chunkSize {
index := len(r.currValues) - (len(r.currValues) - r.chunkSize)
r.c <- r.processValues(r.currValues[:index])
r.currValues = r.currValues[index:]
}
// After values have been sent out by chunking, there may still be some
// values left, if the remainder is less than the chunk size. But if the
// limit has been reached, kick them out.
if len(r.currValues) > 0 && limitReached {
r.c <- r.processValues(r.currValues)
r.currValues = nil
}
} else if limitReached {
// No chunking in effect, but the limit has been reached.
r.c <- r.processValues(r.currValues)
r.currValues = nil
}
return limitReached
}
// Flush instructs the limitedRowWriter to emit any pending values as a single row,
// adhering to any limits. Chunking is not enforced.
func (r *limitedRowWriter) Flush() {
if r == nil {
return
}
// If at least some rows were sent, and no values are pending, then don't
// emit anything, since at least 1 row was previously emitted. This ensures
// that if no rows were ever sent, at least 1 will be emitted, even an empty row.
if r.totalSent != 0 && len(r.currValues) == 0 {
return
}
if r.limit > 0 && len(r.currValues) > r.limit {
r.currValues = r.currValues[:r.limit]
}
r.c <- r.processValues(r.currValues)
r.currValues = nil
}
// processValues emits the given values in a single row.
func (r *limitedRowWriter) processValues(values []*MapperValue) *influxql.Row {
defer func() {
r.totalSent += len(values)
}()
selectNames := r.selectNames
aliasNames := r.aliasNames
if r.transformer != nil {
values = r.transformer.Process(values)
}
// ensure that time is in the select names and in the first position
hasTime := false
for i, n := range selectNames {
if n == "time" {
// Swap time to the first argument for names
if i != 0 {
selectNames[0], selectNames[i] = selectNames[i], selectNames[0]
}
hasTime = true
break
}
}
// time should always be in the list of names they get back
if !hasTime {
selectNames = append([]string{"time"}, selectNames...)
aliasNames = append([]string{"time"}, aliasNames...)
}
// since selectNames can contain tags, we need to strip them out
selectFields := make([]string, 0, len(selectNames))
aliasFields := make([]string, 0, len(selectNames))
for i, n := range selectNames {
if _, found := r.tags[n]; !found {
selectFields = append(selectFields, n)
aliasFields = append(aliasFields, aliasNames[i])
}
}
row := &influxql.Row{
Name: r.name,
Tags: r.tags,
Columns: aliasFields,
}
// Kick out an empty row it no results available.
if len(values) == 0 {
return row
}
// if they've selected only a single value we have to handle things a little differently
singleValue := len(selectFields) == SelectColumnCountWithOneValue
// the results will have all of the raw mapper results, convert into the row
for _, v := range values {
vals := make([]interface{}, len(selectFields))
if singleValue {
vals[0] = time.Unix(0, v.Time).UTC()
switch val := v.Value.(type) {
case map[string]interface{}:
vals[1] = val[selectFields[1]]
default:
vals[1] = val
}
} else {
fields := v.Value.(map[string]interface{})
// time is always the first value
vals[0] = time.Unix(0, v.Time).UTC()
// populate the other values
for i := 1; i < len(selectFields); i++ {
f, ok := fields[selectFields[i]]
if ok {
vals[i] = f
continue
}
if v.Tags != nil {
f, ok = v.Tags[selectFields[i]]
if ok {
vals[i] = f
}
}
}
}
row.Values = append(row.Values, vals)
}
// Perform any mathematical post-processing.
row.Values = processForMath(r.fields, row.Values)
return row
}
type RawQueryDerivativeProcessor struct {
LastValueFromPreviousChunk *MapperValue
IsNonNegative bool // Whether to drop negative differences
DerivativeInterval time.Duration
}
func (rqdp *RawQueryDerivativeProcessor) canProcess(input []*MapperValue) bool {
// If we only have 1 value, then the value did not change, so return
// a single row with 0.0
if len(input) == 1 {
return false
}
// See if the field value is numeric, if it's not, we can't process the derivative
validType := false
switch input[0].Value.(type) {
case int64:
validType = true
case float64:
validType = true
}
return validType
}
func (rqdp *RawQueryDerivativeProcessor) Process(input []*MapperValue) []*MapperValue {
if len(input) == 0 {
return input
}
if !rqdp.canProcess(input) {
return []*MapperValue{
&MapperValue{
Time: input[0].Time,
Value: 0.0,
},
}
}
if rqdp.LastValueFromPreviousChunk == nil {
rqdp.LastValueFromPreviousChunk = input[0]
}
derivativeValues := []*MapperValue{}
for i := 1; i < len(input); i++ {
v := input[i]
// Calculate the derivative of successive points by dividing the difference
// of each value by the elapsed time normalized to the interval
diff := int64toFloat64(v.Value) - int64toFloat64(rqdp.LastValueFromPreviousChunk.Value)
elapsed := v.Time - rqdp.LastValueFromPreviousChunk.Time
value := 0.0
if elapsed > 0 {
value = diff / (float64(elapsed) / float64(rqdp.DerivativeInterval))
}
rqdp.LastValueFromPreviousChunk = v
// Drop negative values for non-negative derivatives
if rqdp.IsNonNegative && diff < 0 {
continue
}
derivativeValues = append(derivativeValues, &MapperValue{
Time: v.Time,
Value: value,
})
}
return derivativeValues
}
// processForMath will apply any math that was specified in the select statement
// against the passed in results
func processForMath(fields influxql.Fields, results [][]interface{}) [][]interface{} {
hasMath := false
for _, f := range fields {
if _, ok := f.Expr.(*influxql.BinaryExpr); ok {
hasMath = true
} else if _, ok := f.Expr.(*influxql.ParenExpr); ok {
hasMath = true
}
}
if !hasMath {
return results
}
processors := make([]influxql.Processor, len(fields))
startIndex := 1
for i, f := range fields {
processors[i], startIndex = influxql.GetProcessor(f.Expr, startIndex)
}
mathResults := make([][]interface{}, len(results))
for i, _ := range mathResults {
mathResults[i] = make([]interface{}, len(fields)+1)
// put the time in
mathResults[i][0] = results[i][0]
for j, p := range processors {
mathResults[i][j+1] = p(results[i])
}
}
return mathResults
}
// ProcessAggregateDerivative returns the derivatives of an aggregate result set
func ProcessAggregateDerivative(results [][]interface{}, isNonNegative bool, interval time.Duration) [][]interface{} {
// Return early if we can't calculate derivatives
if len(results) == 0 {
return results
}
// If we only have 1 value, then the value did not change, so return
// a single row w/ 0.0
if len(results) == 1 {
return [][]interface{}{
[]interface{}{results[0][0], 0.0},
}
}
// Check the value's type to ensure it's an numeric, if not, return a 0 result. We only check the first value
// because derivatives cannot be combined with other aggregates currently.
validType := false
switch results[0][1].(type) {
case int64:
validType = true
case float64:
validType = true
}
if !validType {
return [][]interface{}{
[]interface{}{results[0][0], 0.0},
}
}
// Otherwise calculate the derivatives as the difference between consecutive
// points divided by the elapsed time. Then normalize to the requested
// interval.
derivatives := [][]interface{}{}
for i := 1; i < len(results); i++ {
prev := results[i-1]
cur := results[i]
if cur[1] == nil || prev[1] == nil {
continue
}
elapsed := cur[0].(time.Time).Sub(prev[0].(time.Time))
diff := int64toFloat64(cur[1]) - int64toFloat64(prev[1])
value := 0.0
if elapsed > 0 {
value = float64(diff) / (float64(elapsed) / float64(interval))
}
// Drop negative values for non-negative derivatives
if isNonNegative && diff < 0 {
continue
}
val := []interface{}{
cur[0],
value,
}
derivatives = append(derivatives, val)
}
return derivatives
}
// derivativeInterval returns the time interval for the one (and only) derivative func
func derivativeInterval(stmt *influxql.SelectStatement) (time.Duration, error) {
if len(stmt.FunctionCalls()[0].Args) == 2 {
return stmt.FunctionCalls()[0].Args[1].(*influxql.DurationLiteral).Val, nil
}
interval, err := stmt.GroupByInterval()
if err != nil {
return 0, err
}
if interval > 0 {
return interval, nil
}
return time.Second, nil
}
// resultsEmpty will return true if the all the result values are empty or contain only nulls
func resultsEmpty(resultValues [][]interface{}) bool {
for _, vals := range resultValues {
// start the loop at 1 because we want to skip over the time value
for i := 1; i < len(vals); i++ {
if vals[i] != nil {
return false
}
}
}
return true
}
func int64toFloat64(v interface{}) float64 {
switch v.(type) {
case int64:
return float64(v.(int64))
case float64:
return v.(float64)
}
panic(fmt.Sprintf("expected either int64 or float64, got %v", v))
}
type int64arr []int64
func (a int64arr) Len() int { return len(a) }
func (a int64arr) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a int64arr) Less(i, j int) bool { return a[i] < a[j] }

View File

@ -0,0 +1,991 @@
package tsdb_test
import (
"encoding/json"
"io/ioutil"
"math"
"os"
"testing"
"time"
"github.com/influxdb/influxdb/influxql"
"github.com/influxdb/influxdb/meta"
"github.com/influxdb/influxdb/tsdb"
)
var sID0 = uint64(1)
var sID1 = uint64(2)
var sgID1 = uint64(3)
var sgID2 = uint64(4)
var nID = uint64(42)
// Simple test to ensure data can be read from two shards.
func TestWritePointsAndExecuteTwoShards(t *testing.T) {
// Create the mock planner and its metastore
store, query_executor := testStoreAndQueryExecutor()
defer os.RemoveAll(store.Path())
query_executor.MetaStore = &testQEMetastore{
sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) {
return []meta.ShardGroupInfo{
{
ID: sgID,
StartTime: time.Now().Add(-time.Hour),
EndTime: time.Now().Add(time.Hour),
Shards: []meta.ShardInfo{
{
ID: uint64(sID0),
OwnerIDs: []uint64{nID},
},
},
},
{
ID: sgID,
StartTime: time.Now().Add(-2 * time.Hour),
EndTime: time.Now().Add(-time.Hour),
Shards: []meta.ShardInfo{
{
ID: uint64(sID1),
OwnerIDs: []uint64{nID},
},
},
},
}, nil
},
}
// Write two points across shards.
pt1time := time.Unix(1, 0).UTC()
if err := store.WriteToShard(sID0, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverA", "region": "us-east"},
map[string]interface{}{"value": 100},
pt1time,
)}); err != nil {
t.Fatalf(err.Error())
}
pt2time := time.Unix(2, 0).UTC()
if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverB", "region": "us-east"},
map[string]interface{}{"value": 200},
pt2time,
)}); err != nil {
t.Fatalf(err.Error())
}
var tests = []struct {
skip bool // Skip test
stmt string // Query statement
chunkSize int // Chunk size for driving the executor
expected string // Expected results, rendered as a string
}{
{
stmt: `SELECT value FROM cpu`,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu`,
chunkSize: 1,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu LIMIT 1`,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`,
},
{
stmt: `SELECT value FROM cpu LIMIT 1`,
chunkSize: 2,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`,
},
{
stmt: `SELECT value FROM cpu WHERE host='serverA'`,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`,
},
{
stmt: `SELECT value FROM cpu WHERE host='serverB'`,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu WHERE host='serverC'`,
expected: `null`,
},
{
stmt: `SELECT value FROM cpu GROUP BY host`,
expected: `[{"name":"cpu","tags":{"host":"serverA"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","tags":{"host":"serverB"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu GROUP BY region`,
expected: `[{"name":"cpu","tags":{"region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu GROUP BY host,region`,
expected: `[{"name":"cpu","tags":{"host":"serverA","region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","tags":{"host":"serverB","region":"us-east"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]}]`,
},
{
stmt: `SELECT value FROM cpu WHERE host='serverA' GROUP BY host`,
expected: `[{"name":"cpu","tags":{"host":"serverA"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]}]`,
},
// Aggregate queries.
{
stmt: `SELECT sum(value) FROM cpu`,
expected: `[{"name":"cpu","columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",300]]}]`,
},
}
for _, tt := range tests {
if tt.skip {
t.Logf("Skipping test %s", tt.stmt)
continue
}
executor, err := query_executor.Plan(mustParseSelectStatement(tt.stmt), tt.chunkSize)
if err != nil {
t.Fatalf("failed to plan query: %s", err.Error())
}
got := executeAndGetResults(executor)
if got != tt.expected {
t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got)
}
}
}
// Test that executor correctly orders data across shards.
func TestWritePointsAndExecuteTwoShardsAlign(t *testing.T) {
// Create the mock planner and its metastore
store, query_executor := testStoreAndQueryExecutor()
defer os.RemoveAll(store.Path())
query_executor.MetaStore = &testQEMetastore{
sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) {
return []meta.ShardGroupInfo{
{
ID: sgID,
StartTime: time.Now().Add(-2 * time.Hour),
EndTime: time.Now().Add(-time.Hour),
Shards: []meta.ShardInfo{
{
ID: uint64(sID1),
OwnerIDs: []uint64{nID},
},
},
},
{
ID: sgID,
StartTime: time.Now().Add(-2 * time.Hour),
EndTime: time.Now().Add(time.Hour),
Shards: []meta.ShardInfo{
{
ID: uint64(sID0),
OwnerIDs: []uint64{nID},
},
},
},
}, nil
},
}
// Write interleaving, by time, chunks to the shards.
if err := store.WriteToShard(sID0, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverA"},
map[string]interface{}{"value": 100},
time.Unix(1, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverB"},
map[string]interface{}{"value": 200},
time.Unix(2, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverA"},
map[string]interface{}{"value": 300},
time.Unix(3, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
var tests = []struct {
skip bool // Skip test
stmt string // Query statement
chunkSize int // Chunk size for driving the executor
expected string // Expected results, rendered as a string
}{
{
stmt: `SELECT value FROM cpu`,
chunkSize: 1,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:02Z",200]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:03Z",300]]}]`,
},
{
stmt: `SELECT value FROM cpu`,
chunkSize: 2,
expected: `[{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:02Z",200]]},{"name":"cpu","columns":["time","value"],"values":[["1970-01-01T00:00:03Z",300]]}]`,
},
{
stmt: `SELECT mean(value),sum(value) FROM cpu`,
chunkSize: 2,
expected: `[{"name":"cpu","columns":["time","mean","sum"],"values":[["1970-01-01T00:00:00Z",200,600]]}]`,
},
}
for _, tt := range tests {
if tt.skip {
t.Logf("Skipping test %s", tt.stmt)
continue
}
executor, err := query_executor.Plan(mustParseSelectStatement(tt.stmt), tt.chunkSize)
if err != nil {
t.Fatalf("failed to plan query: %s", err.Error())
}
got := executeAndGetResults(executor)
if got != tt.expected {
t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got)
}
}
}
// Test to ensure the engine handles query re-writing across stores.
func TestWritePointsAndExecuteTwoShardsQueryRewrite(t *testing.T) {
// Create two distinct stores, ensuring shard mappers will shard nothing.
store0 := testStore()
defer os.RemoveAll(store0.Path())
store1 := testStore()
defer os.RemoveAll(store1.Path())
// Create a shard in each store.
database := "foo"
retentionPolicy := "bar"
store0.CreateShard(database, retentionPolicy, sID0)
store1.CreateShard(database, retentionPolicy, sID1)
// Write two points across shards.
pt1time := time.Unix(1, 0).UTC()
if err := store0.WriteToShard(sID0, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverA"},
map[string]interface{}{"value1": 100},
pt1time,
)}); err != nil {
t.Fatalf(err.Error())
}
pt2time := time.Unix(2, 0).UTC()
if err := store1.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverB"},
map[string]interface{}{"value2": 200},
pt2time,
)}); err != nil {
t.Fatalf(err.Error())
}
var tests = []struct {
skip bool // Skip test
stmt string // Query statement
chunkSize int // Chunk size for driving the executor
expected string // Expected results, rendered as a string
}{
{
stmt: `SELECT * FROM cpu`,
expected: `[{"name":"cpu","columns":["time","host","value1","value2"],"values":[["1970-01-01T00:00:01Z","serverA",100,null],["1970-01-01T00:00:02Z","serverB",null,200]]}]`,
},
{
stmt: `SELECT * FROM cpu GROUP BY *`,
expected: `[{"name":"cpu","tags":{"host":"serverA"},"columns":["time","value1","value2"],"values":[["1970-01-01T00:00:01Z",100,null]]},{"name":"cpu","tags":{"host":"serverB"},"columns":["time","value1","value2"],"values":[["1970-01-01T00:00:02Z",null,200]]}]`,
},
}
for _, tt := range tests {
if tt.skip {
t.Logf("Skipping test %s", tt.stmt)
continue
}
parsedSelectStmt := mustParseSelectStatement(tt.stmt)
// Create Mappers and Executor.
mapper0, err := store0.CreateMapper(sID0, tt.stmt, tt.chunkSize)
if err != nil {
t.Fatalf("failed to create mapper0: %s", err.Error())
}
mapper1, err := store1.CreateMapper(sID1, tt.stmt, tt.chunkSize)
if err != nil {
t.Fatalf("failed to create mapper1: %s", err.Error())
}
executor := tsdb.NewExecutor(parsedSelectStmt, []tsdb.Mapper{mapper0, mapper1}, tt.chunkSize)
// Check the results.
got := executeAndGetResults(executor)
if got != tt.expected {
t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got)
}
}
}
// Test that executor correctly orders data across shards when the tagsets
// are not presented in alphabetically order across shards.
func TestWritePointsAndExecuteTwoShardsTagSetOrdering(t *testing.T) {
// Create the mock planner and its metastore
store, query_executor := testStoreAndQueryExecutor()
defer os.RemoveAll(store.Path())
query_executor.MetaStore = &testQEMetastore{
sgFunc: func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) {
return []meta.ShardGroupInfo{
{
ID: sgID,
Shards: []meta.ShardInfo{
{
ID: uint64(sID0),
OwnerIDs: []uint64{nID},
},
},
},
{
ID: sgID,
Shards: []meta.ShardInfo{
{
ID: uint64(sID1),
OwnerIDs: []uint64{nID},
},
},
},
}, nil
},
}
// Write tagsets "y" and "z" to first shard.
if err := store.WriteToShard(sID0, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "y"},
map[string]interface{}{"value": 100},
time.Unix(1, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
if err := store.WriteToShard(sID0, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "z"},
map[string]interface{}{"value": 200},
time.Unix(1, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
// Write tagsets "x", y" and "z" to second shard.
if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "x"},
map[string]interface{}{"value": 300},
time.Unix(2, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "y"},
map[string]interface{}{"value": 400},
time.Unix(3, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
if err := store.WriteToShard(sID1, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "z"},
map[string]interface{}{"value": 500},
time.Unix(3, 0).UTC(),
)}); err != nil {
t.Fatalf(err.Error())
}
var tests = []struct {
skip bool // Skip test
stmt string // Query statement
chunkSize int // Chunk size for driving the executor
expected string // Expected results, rendered as a string
}{
{
stmt: `SELECT sum(value) FROM cpu GROUP BY host`,
expected: `[{"name":"cpu","tags":{"host":"x"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",300]]},{"name":"cpu","tags":{"host":"y"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",500]]},{"name":"cpu","tags":{"host":"z"},"columns":["time","sum"],"values":[["1970-01-01T00:00:00Z",700]]}]`,
},
{
stmt: `SELECT value FROM cpu GROUP BY host`,
expected: `[{"name":"cpu","tags":{"host":"x"},"columns":["time","value"],"values":[["1970-01-01T00:00:02Z",300]]},{"name":"cpu","tags":{"host":"y"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",100],["1970-01-01T00:00:03Z",400]]},{"name":"cpu","tags":{"host":"z"},"columns":["time","value"],"values":[["1970-01-01T00:00:01Z",200],["1970-01-01T00:00:03Z",500]]}]`,
},
}
for _, tt := range tests {
if tt.skip {
t.Logf("Skipping test %s", tt.stmt)
continue
}
executor, err := query_executor.Plan(mustParseSelectStatement(tt.stmt), tt.chunkSize)
if err != nil {
t.Fatalf("failed to plan query: %s", err.Error())
}
got := executeAndGetResults(executor)
if got != tt.expected {
t.Fatalf("Test %s\nexp: %s\ngot: %s\n", tt.stmt, tt.expected, got)
}
}
}
// TestProccessAggregateDerivative tests the RawQueryDerivativeProcessor transformation function on the engine.
// The is called for a query with a GROUP BY.
func TestProcessAggregateDerivative(t *testing.T) {
tests := []struct {
name string
fn string
interval time.Duration
in [][]interface{}
exp [][]interface{}
}{
{
name: "empty input",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{},
exp: [][]interface{}{},
},
{
name: "single row returns 0.0",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 0.0,
},
},
},
{
name: "basic derivative",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 3.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 5.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 9.0,
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
},
{
name: "12h interval",
fn: "derivative",
interval: 12 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 3.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 0.5,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 0.5,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 0.5,
},
},
},
{
name: "negative derivatives",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 0.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), -2.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
},
{
name: "negative derivatives",
fn: "non_negative_derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 2.0,
},
// Show resultes in negative derivative
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 0.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
},
{
name: "integer derivatives",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 1.0,
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), int64(3),
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), int64(5),
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), int64(9),
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), 2.0,
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), 4.0,
},
},
},
{
name: "string derivatives",
fn: "derivative",
interval: 24 * time.Hour,
in: [][]interface{}{
[]interface{}{
time.Unix(0, 0), "1.0",
},
[]interface{}{
time.Unix(0, 0).Add(24 * time.Hour), "2.0",
},
[]interface{}{
time.Unix(0, 0).Add(48 * time.Hour), "3.0",
},
[]interface{}{
time.Unix(0, 0).Add(72 * time.Hour), "4.0",
},
},
exp: [][]interface{}{
[]interface{}{
time.Unix(0, 0), 0.0,
},
},
},
}
for _, test := range tests {
got := tsdb.ProcessAggregateDerivative(test.in, test.fn == "non_negative_derivative", test.interval)
if len(got) != len(test.exp) {
t.Fatalf("ProcessAggregateDerivative(%s) - %s\nlen mismatch: got %d, exp %d", test.fn, test.name, len(got), len(test.exp))
}
for i := 0; i < len(test.exp); i++ {
if test.exp[i][0] != got[i][0] || test.exp[i][1] != got[i][1] {
t.Fatalf("ProcessAggregateDerivative - %s results mismatch:\ngot %v\nexp %v", test.name, got, test.exp)
}
}
}
}
// TestProcessRawQueryDerivative tests the RawQueryDerivativeProcessor transformation function on the engine.
// The is called for a queries that do not have a group by.
func TestProcessRawQueryDerivative(t *testing.T) {
tests := []struct {
name string
fn string
interval time.Duration
in []*tsdb.MapperValue
exp []*tsdb.MapperValue
}{
{
name: "empty input",
fn: "derivative",
interval: 24 * time.Hour,
in: []*tsdb.MapperValue{},
exp: []*tsdb.MapperValue{},
},
{
name: "single row returns 0.0",
fn: "derivative",
interval: 24 * time.Hour,
in: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 1.0,
},
},
exp: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 0.0,
},
},
},
{
name: "basic derivative",
fn: "derivative",
interval: 24 * time.Hour,
in: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 0.0,
},
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 3.0,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 5.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 9.0,
},
},
exp: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 3.0,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 2.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
},
{
name: "integer derivative",
fn: "derivative",
interval: 24 * time.Hour,
in: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: int64(0),
},
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: int64(3),
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: int64(5),
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: int64(9),
},
},
exp: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 3.0,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 2.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
},
{
name: "12h interval",
fn: "derivative",
interval: 12 * time.Hour,
in: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).UnixNano(),
Value: 1.0,
},
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 2.0,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 3.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
exp: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 0.5,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 0.5,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 0.5,
},
},
},
{
name: "negative derivatives",
fn: "derivative",
interval: 24 * time.Hour,
in: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 1.0,
},
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 2.0,
},
// should go negative
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 0.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
exp: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 1.0,
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: -2.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
},
{
name: "negative derivatives",
fn: "non_negative_derivative",
interval: 24 * time.Hour,
in: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 1.0,
},
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 2.0,
},
// should go negative
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: 0.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
exp: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: 1.0,
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: 4.0,
},
},
},
{
name: "string derivatives",
fn: "derivative",
interval: 24 * time.Hour,
in: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: "1.0",
},
{
Time: time.Unix(0, 0).Add(24 * time.Hour).UnixNano(),
Value: "2.0",
},
{
Time: time.Unix(0, 0).Add(48 * time.Hour).UnixNano(),
Value: "3.0",
},
{
Time: time.Unix(0, 0).Add(72 * time.Hour).UnixNano(),
Value: "4.0",
},
},
exp: []*tsdb.MapperValue{
{
Time: time.Unix(0, 0).Unix(),
Value: 0.0,
},
},
},
}
for _, test := range tests {
p := tsdb.RawQueryDerivativeProcessor{
IsNonNegative: test.fn == "non_negative_derivative",
DerivativeInterval: test.interval,
}
got := p.Process(test.in)
if len(got) != len(test.exp) {
t.Fatalf("RawQueryDerivativeProcessor(%s) - %s\nlen mismatch: got %d, exp %d", test.fn, test.name, len(got), len(test.exp))
}
for i := 0; i < len(test.exp); i++ {
if test.exp[i].Time != got[i].Time || math.Abs((test.exp[i].Value.(float64)-got[i].Value.(float64))) > 0.0000001 {
t.Fatalf("RawQueryDerivativeProcessor - %s results mismatch:\ngot %v\nexp %v", test.name, got, test.exp)
}
}
}
}
type testQEMetastore struct {
sgFunc func(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error)
}
func (t *testQEMetastore) ShardGroupsByTimeRange(database, policy string, min, max time.Time) (a []meta.ShardGroupInfo, err error) {
return t.sgFunc(database, policy, min, max)
}
func (t *testQEMetastore) Database(name string) (*meta.DatabaseInfo, error) { return nil, nil }
func (t *testQEMetastore) Databases() ([]meta.DatabaseInfo, error) { return nil, nil }
func (t *testQEMetastore) User(name string) (*meta.UserInfo, error) { return nil, nil }
func (t *testQEMetastore) AdminUserExists() (bool, error) { return false, nil }
func (t *testQEMetastore) Authenticate(username, password string) (*meta.UserInfo, error) {
return nil, nil
}
func (t *testQEMetastore) RetentionPolicy(database, name string) (rpi *meta.RetentionPolicyInfo, err error) {
return nil, nil
}
func (t *testQEMetastore) UserCount() (int, error) { return 0, nil }
func (t *testQEMetastore) NodeID() uint64 { return nID }
func testStore() *tsdb.Store {
path, _ := ioutil.TempDir("", "")
store := tsdb.NewStore(path)
err := store.Open()
if err != nil {
panic(err)
}
return store
}
func testStoreAndQueryExecutor() (*tsdb.Store, *tsdb.QueryExecutor) {
store := testStore()
database := "foo"
retentionPolicy := "bar"
store.CreateShard(database, retentionPolicy, sID0)
store.CreateShard(database, retentionPolicy, sID1)
query_executor := tsdb.NewQueryExecutor(store)
query_executor.ShardMapper = &testQEShardMapper{store}
return store, query_executor
}
type testQEShardMapper struct {
store *tsdb.Store
}
func (t *testQEShardMapper) CreateMapper(shard meta.ShardInfo, stmt string, chunkSize int) (tsdb.Mapper, error) {
return t.store.CreateMapper(shard.ID, stmt, chunkSize)
}
func executeAndGetResults(executor *tsdb.Executor) string {
ch := executor.Execute()
var rows []*influxql.Row
for r := range ch {
rows = append(rows, r)
}
b, err := json.Marshal(rows)
if err != nil {
panic(err)
}
return string(b)
}

View File

@ -1,12 +1,12 @@
// Code generated by protoc-gen-go.
// source: meta.proto
// Code generated by protoc-gen-gogo.
// source: internal/meta.proto
// DO NOT EDIT!
/*
Package internal is a generated protocol buffer package.
It is generated from these files:
meta.proto
internal/meta.proto
It has these top-level messages:
Series
@ -16,7 +16,7 @@ It has these top-level messages:
*/
package internal
import proto "github.com/golang/protobuf/proto"
import proto "github.com/gogo/protobuf/proto"
import math "math"
// Reference imports to suppress errors if they are not otherwise used.

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
package tsdb
package tsdb_test
import (
"encoding/json"
@ -12,92 +12,119 @@ import (
"time"
"github.com/influxdb/influxdb/influxql"
"github.com/influxdb/influxdb/tsdb"
)
func TestShardMapper_RawMapperTagSets(t *testing.T) {
func TestShardMapper_RawMapperTagSetsFields(t *testing.T) {
tmpDir, _ := ioutil.TempDir("", "shard_test")
defer os.RemoveAll(tmpDir)
shard := mustCreateShard(tmpDir)
pt1time := time.Unix(1, 0).UTC()
pt1 := NewPoint(
pt1 := tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverA", "region": "us-east"},
map[string]interface{}{"value": 42},
map[string]interface{}{"idle": 60},
pt1time,
)
pt2time := time.Unix(2, 0).UTC()
pt2 := NewPoint(
pt2 := tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverB", "region": "us-east"},
map[string]interface{}{"value": 60},
map[string]interface{}{"load": 60},
pt2time,
)
err := shard.WritePoints([]Point{pt1, pt2})
err := shard.WritePoints([]tsdb.Point{pt1, pt2})
if err != nil {
t.Fatalf(err.Error())
}
var tests = []struct {
stmt string
expected []string
stmt string
expectedTags []string
expectedFields []string
}{
{
stmt: `SELECT value FROM cpu`,
expected: []string{"cpu"},
stmt: `SELECT load FROM cpu`,
expectedTags: []string{"cpu"},
expectedFields: []string{"load"},
},
{
stmt: `SELECT value FROM cpu GROUP BY host`,
expected: []string{"cpu|host|serverA", "cpu|host|serverB"},
stmt: `SELECT derivative(load) FROM cpu`,
expectedTags: []string{"cpu"},
expectedFields: []string{"load"},
},
{
stmt: `SELECT value FROM cpu GROUP BY region`,
expected: []string{"cpu|region|us-east"},
stmt: `SELECT idle,load FROM cpu`,
expectedTags: []string{"cpu"},
expectedFields: []string{"idle", "load"},
},
{
stmt: `SELECT value FROM cpu WHERE host='serverA'`,
expected: []string{"cpu"},
stmt: `SELECT load,idle FROM cpu`,
expectedTags: []string{"cpu"},
expectedFields: []string{"idle", "load"},
},
{
stmt: `SELECT value FROM cpu WHERE host='serverB'`,
expected: []string{"cpu"},
stmt: `SELECT load FROM cpu GROUP BY host`,
expectedTags: []string{"cpu|host|serverA", "cpu|host|serverB"},
expectedFields: []string{"load"},
},
{
stmt: `SELECT value FROM cpu WHERE host='serverC'`,
expected: []string{},
stmt: `SELECT load FROM cpu GROUP BY region`,
expectedTags: []string{"cpu|region|us-east"},
expectedFields: []string{"load"},
},
{
stmt: `SELECT load FROM cpu WHERE host='serverA'`,
expectedTags: []string{"cpu"},
expectedFields: []string{"load"},
},
{
stmt: `SELECT load FROM cpu WHERE host='serverB'`,
expectedTags: []string{"cpu"},
expectedFields: []string{"load"},
},
{
stmt: `SELECT load FROM cpu WHERE host='serverC'`,
expectedTags: []string{},
expectedFields: []string{"load"},
},
}
for _, tt := range tests {
stmt := mustParseSelectStatement(tt.stmt)
mapper := openRawMapperOrFail(t, shard, stmt, 0)
got := mapper.TagSets()
if !reflect.DeepEqual(got, tt.expected) {
t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected)
tags := mapper.TagSets()
if !reflect.DeepEqual(tags, tt.expectedTags) {
t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, tags, tt.expectedTags)
}
fields := mapper.Fields()
if !reflect.DeepEqual(fields, tt.expectedFields) {
t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, fields, tt.expectedFields)
}
}
}
func TestShardMapper_WriteAndSingleMapperRawQuery(t *testing.T) {
func TestShardMapper_WriteAndSingleMapperRawQuerySingleValue(t *testing.T) {
tmpDir, _ := ioutil.TempDir("", "shard_test")
defer os.RemoveAll(tmpDir)
shard := mustCreateShard(tmpDir)
pt1time := time.Unix(1, 0).UTC()
pt1 := NewPoint(
pt1 := tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverA", "region": "us-east"},
map[string]interface{}{"value": 42},
map[string]interface{}{"load": 42},
pt1time,
)
pt2time := time.Unix(2, 0).UTC()
pt2 := NewPoint(
pt2 := tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverB", "region": "us-east"},
map[string]interface{}{"value": 60},
map[string]interface{}{"load": 60},
pt2time,
)
err := shard.WritePoints([]Point{pt1, pt2})
err := shard.WritePoints([]tsdb.Point{pt1, pt2})
if err != nil {
t.Fatalf(err.Error())
}
@ -108,62 +135,65 @@ func TestShardMapper_WriteAndSingleMapperRawQuery(t *testing.T) {
expected []string
}{
{
stmt: `SELECT value FROM cpu`,
expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42},{"time":2000000000,"value":60}]}`, `null`},
stmt: `SELECT load FROM cpu`,
expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`, `null`},
},
{
stmt: `SELECT value FROM cpu`,
stmt: `SELECT load FROM cpu # chunkSize 1`,
chunkSize: 1,
expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42}]}`, `{"name":"cpu","values":[{"time":2000000000,"value":60}]}`, `null`},
expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`},
},
{
stmt: `SELECT value FROM cpu`,
stmt: `SELECT load FROM cpu # chunkSize 2`,
chunkSize: 2,
expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42},{"time":2000000000,"value":60}]}`},
expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`},
},
{
stmt: `SELECT value FROM cpu`,
stmt: `SELECT load FROM cpu # chunkSize 3`,
chunkSize: 3,
expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42},{"time":2000000000,"value":60}]}`},
expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`},
},
{
stmt: `SELECT value FROM cpu GROUP BY host`,
expected: []string{`{"name":"cpu","tags":{"host":"serverA"},"values":[{"time":1000000000,"value":42}]}`, `{"name":"cpu","tags":{"host":"serverB"},"values":[{"time":2000000000,"value":60}]}`, `null`},
stmt: `SELECT load FROM cpu GROUP BY host`,
expected: []string{
`{"name":"cpu","tags":{"host":"serverA"},"fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`,
`{"name":"cpu","tags":{"host":"serverB"},"fields":["load"],"values":[{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`,
},
},
{
stmt: `SELECT value FROM cpu GROUP BY region`,
expected: []string{`{"name":"cpu","tags":{"region":"us-east"},"values":[{"time":1000000000,"value":42},{"time":2000000000,"value":60}]}`, `null`},
stmt: `SELECT load FROM cpu GROUP BY region`,
expected: []string{`{"name":"cpu","tags":{"region":"us-east"},"fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`},
},
{
stmt: `SELECT value FROM cpu WHERE host='serverA'`,
expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42}]}`, `null`},
stmt: `SELECT load FROM cpu WHERE host='serverA'`,
expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`},
},
{
stmt: `SELECT value FROM cpu WHERE host='serverB'`,
expected: []string{`{"name":"cpu","values":[{"time":2000000000,"value":60}]}`, `null`},
stmt: `SELECT load FROM cpu WHERE host='serverB'`,
expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`},
},
{
stmt: `SELECT value FROM cpu WHERE host='serverC'`,
stmt: `SELECT load FROM cpu WHERE host='serverC'`,
expected: []string{`null`},
},
{
stmt: `SELECT value FROM cpu WHERE value = 60`,
expected: []string{`{"name":"cpu","values":[{"time":2000000000,"value":60}]}`, `null`},
stmt: `SELECT load FROM cpu WHERE load = 60`,
expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`},
},
{
stmt: `SELECT value FROM cpu WHERE value != 60`,
expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42}]}`, `null`},
stmt: `SELECT load FROM cpu WHERE load != 60`,
expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`},
},
{
stmt: fmt.Sprintf(`SELECT value FROM cpu WHERE time = '%s'`, pt1time.Format(influxql.DateTimeFormat)),
expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42}]}`, `null`},
stmt: fmt.Sprintf(`SELECT load FROM cpu WHERE time = '%s'`, pt1time.Format(influxql.DateTimeFormat)),
expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`},
},
{
stmt: fmt.Sprintf(`SELECT value FROM cpu WHERE time > '%s'`, pt1time.Format(influxql.DateTimeFormat)),
expected: []string{`{"name":"cpu","values":[{"time":2000000000,"value":60}]}`, `null`},
stmt: fmt.Sprintf(`SELECT load FROM cpu WHERE time > '%s'`, pt1time.Format(influxql.DateTimeFormat)),
expected: []string{`{"name":"cpu","fields":["load"],"values":[{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`},
},
{
stmt: fmt.Sprintf(`SELECT value FROM cpu WHERE time > '%s'`, pt2time.Format(influxql.DateTimeFormat)),
stmt: fmt.Sprintf(`SELECT load FROM cpu WHERE time > '%s'`, pt2time.Format(influxql.DateTimeFormat)),
expected: []string{`null`},
},
}
@ -188,20 +218,20 @@ func TestShardMapper_WriteAndSingleMapperRawQueryMultiValue(t *testing.T) {
shard := mustCreateShard(tmpDir)
pt1time := time.Unix(1, 0).UTC()
pt1 := NewPoint(
pt1 := tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverA", "region": "us-east"},
map[string]interface{}{"foo": 42, "bar": 43},
pt1time,
)
pt2time := time.Unix(2, 0).UTC()
pt2 := NewPoint(
pt2 := tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverB", "region": "us-east"},
map[string]interface{}{"foo": 60, "bar": 61},
pt2time,
)
err := shard.WritePoints([]Point{pt1, pt2})
err := shard.WritePoints([]tsdb.Point{pt1, pt2})
if err != nil {
t.Fatalf(err.Error())
}
@ -213,11 +243,11 @@ func TestShardMapper_WriteAndSingleMapperRawQueryMultiValue(t *testing.T) {
}{
{
stmt: `SELECT foo FROM cpu`,
expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":42},{"time":2000000000,"value":60}]}`, `null`},
expected: []string{`{"name":"cpu","fields":["foo"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`},
},
{
stmt: `SELECT foo,bar FROM cpu`,
expected: []string{`{"name":"cpu","values":[{"time":1000000000,"value":{"bar":43,"foo":42}},{"time":2000000000,"value":{"bar":61,"foo":60}}]}`, `null`},
expected: []string{`{"name":"cpu","fields":["bar","foo"],"values":[{"time":1000000000,"value":{"bar":43,"foo":42},"tags":{"host":"serverA","region":"us-east"}},{"time":2000000000,"value":{"bar":61,"foo":60},"tags":{"host":"serverB","region":"us-east"}}]}`},
},
}
@ -225,10 +255,75 @@ func TestShardMapper_WriteAndSingleMapperRawQueryMultiValue(t *testing.T) {
stmt := mustParseSelectStatement(tt.stmt)
mapper := openRawMapperOrFail(t, shard, stmt, tt.chunkSize)
for _, s := range tt.expected {
for i, s := range tt.expected {
got := nextRawChunkAsJson(t, mapper)
if got != s {
t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected)
t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected[i])
break
}
}
}
}
func TestShardMapper_WriteAndSingleMapperRawQueryMultiSource(t *testing.T) {
tmpDir, _ := ioutil.TempDir("", "shard_test")
defer os.RemoveAll(tmpDir)
shard := mustCreateShard(tmpDir)
pt1time := time.Unix(1, 0).UTC()
pt1 := tsdb.NewPoint(
"cpu0",
map[string]string{"host": "serverA", "region": "us-east"},
map[string]interface{}{"foo": 42},
pt1time,
)
pt2time := time.Unix(2, 0).UTC()
pt2 := tsdb.NewPoint(
"cpu1",
map[string]string{"host": "serverB", "region": "us-east"},
map[string]interface{}{"bar": 60},
pt2time,
)
err := shard.WritePoints([]tsdb.Point{pt1, pt2})
if err != nil {
t.Fatalf(err.Error())
}
var tests = []struct {
stmt string
chunkSize int
expected []string
}{
{
stmt: `SELECT foo FROM cpu0,cpu1`,
expected: []string{`{"name":"cpu0","fields":["foo"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`},
},
{
stmt: `SELECT foo FROM cpu0,cpu1 WHERE foo=42`,
expected: []string{`{"name":"cpu0","fields":["foo"],"values":[{"time":1000000000,"value":42,"tags":{"host":"serverA","region":"us-east"}}]}`},
},
{
stmt: `SELECT bar FROM cpu0,cpu1`,
expected: []string{`{"name":"cpu1","fields":["bar"],"values":[{"time":2000000000,"value":60,"tags":{"host":"serverB","region":"us-east"}}]}`},
},
{
stmt: `SELECT bar FROM cpu0,cpu1 WHERE foo=42`,
expected: []string{`null`},
},
{
stmt: `SELECT bar FROM cpu0,cpu1 WHERE bar!=60`,
expected: []string{`null`},
},
}
for _, tt := range tests {
stmt := mustParseSelectStatement(tt.stmt)
mapper := openRawMapperOrFail(t, shard, stmt, tt.chunkSize)
for i, s := range tt.expected {
got := nextRawChunkAsJson(t, mapper)
if got != s {
t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected[i])
break
}
}
@ -241,20 +336,20 @@ func TestShardMapper_WriteAndSingleMapperAggregateQuery(t *testing.T) {
shard := mustCreateShard(tmpDir)
pt1time := time.Unix(10, 0).UTC()
pt1 := NewPoint(
pt1 := tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverA", "region": "us-east"},
map[string]interface{}{"value": 1},
pt1time,
)
pt2time := time.Unix(20, 0).UTC()
pt2 := NewPoint(
pt2 := tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverB", "region": "us-east"},
map[string]interface{}{"value": 60},
pt2time,
)
err := shard.WritePoints([]Point{pt1, pt2})
err := shard.WritePoints([]tsdb.Point{pt1, pt2})
if err != nil {
t.Fatalf(err.Error())
}
@ -265,92 +360,92 @@ func TestShardMapper_WriteAndSingleMapperAggregateQuery(t *testing.T) {
}{
{
stmt: `SELECT sum(value) FROM cpu`,
expected: []string{`{"name":"cpu","values":[{"value":[61]}]}`, `null`},
expected: []string{`{"name":"cpu","fields":["value"],"values":[{"value":[61]}]}`, `null`},
},
{
stmt: `SELECT sum(value),mean(value) FROM cpu`,
expected: []string{`{"name":"cpu","values":[{"value":[61,{"Count":2,"Mean":30.5,"ResultType":1}]}]}`, `null`},
expected: []string{`{"name":"cpu","fields":["value"],"values":[{"value":[61,{"Count":2,"Mean":30.5,"ResultType":1}]}]}`, `null`},
},
{
stmt: `SELECT sum(value) FROM cpu GROUP BY host`,
expected: []string{
`{"name":"cpu","tags":{"host":"serverA"},"values":[{"value":[1]}]}`,
`{"name":"cpu","tags":{"host":"serverB"},"values":[{"value":[60]}]}`,
`{"name":"cpu","tags":{"host":"serverA"},"fields":["value"],"values":[{"value":[1]}]}`,
`{"name":"cpu","tags":{"host":"serverB"},"fields":["value"],"values":[{"value":[60]}]}`,
`null`},
},
{
stmt: `SELECT sum(value) FROM cpu GROUP BY region`,
expected: []string{
`{"name":"cpu","tags":{"region":"us-east"},"values":[{"value":[61]}]}`,
`{"name":"cpu","tags":{"region":"us-east"},"fields":["value"],"values":[{"value":[61]}]}`,
`null`},
},
{
stmt: `SELECT sum(value) FROM cpu GROUP BY region,host`,
expected: []string{
`{"name":"cpu","tags":{"host":"serverA","region":"us-east"},"values":[{"value":[1]}]}`,
`{"name":"cpu","tags":{"host":"serverB","region":"us-east"},"values":[{"value":[60]}]}`,
`{"name":"cpu","tags":{"host":"serverA","region":"us-east"},"fields":["value"],"values":[{"value":[1]}]}`,
`{"name":"cpu","tags":{"host":"serverB","region":"us-east"},"fields":["value"],"values":[{"value":[60]}]}`,
`null`},
},
{
stmt: `SELECT sum(value) FROM cpu WHERE host='serverB'`,
expected: []string{
`{"name":"cpu","values":[{"value":[60]}]}`,
`{"name":"cpu","fields":["value"],"values":[{"value":[60]}]}`,
`null`},
},
{
stmt: fmt.Sprintf(`SELECT sum(value) FROM cpu WHERE time = '%s'`, pt1time.Format(influxql.DateTimeFormat)),
expected: []string{
`{"name":"cpu","values":[{"time":10000000000,"value":[1]}]}`,
`{"name":"cpu","fields":["value"],"values":[{"time":10000000000,"value":[1]}]}`,
`null`},
},
{
stmt: fmt.Sprintf(`SELECT sum(value) FROM cpu WHERE time > '%s'`, pt1time.Format(influxql.DateTimeFormat)),
expected: []string{
`{"name":"cpu","values":[{"time":10000000001,"value":[60]}]}`,
`{"name":"cpu","fields":["value"],"values":[{"time":10000000001,"value":[60]}]}`,
`null`},
},
{
stmt: fmt.Sprintf(`SELECT sum(value) FROM cpu WHERE time > '%s'`, pt2time.Format(influxql.DateTimeFormat)),
expected: []string{
`{"name":"cpu","values":[{"time":20000000001,"value":[null]}]}`,
`{"name":"cpu","fields":["value"],"values":[{"time":20000000001,"value":[null]}]}`,
`null`},
},
}
for _, tt := range tests {
stmt := mustParseSelectStatement(tt.stmt)
mapper := openAggMapperOrFail(t, shard, stmt)
mapper := openLocalMapperOrFail(t, shard, stmt)
for i := range tt.expected {
got := aggIntervalAsJson(t, mapper)
if got != tt.expected[i] {
t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected[i])
t.Fatalf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected[i])
break
}
}
}
}
func TestShardMapper_AggMapperTagSets(t *testing.T) {
func TestShardMapper_LocalMapperTagSets(t *testing.T) {
tmpDir, _ := ioutil.TempDir("", "shard_test")
defer os.RemoveAll(tmpDir)
shard := mustCreateShard(tmpDir)
pt1time := time.Unix(1, 0).UTC()
pt1 := NewPoint(
pt1 := tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverA", "region": "us-east"},
map[string]interface{}{"value": 42},
pt1time,
)
pt2time := time.Unix(2, 0).UTC()
pt2 := NewPoint(
pt2 := tsdb.NewPoint(
"cpu",
map[string]string{"host": "serverB", "region": "us-east"},
map[string]interface{}{"value": 60},
pt2time,
)
err := shard.WritePoints([]Point{pt1, pt2})
err := shard.WritePoints([]tsdb.Point{pt1, pt2})
if err != nil {
t.Fatalf(err.Error())
}
@ -387,7 +482,7 @@ func TestShardMapper_AggMapperTagSets(t *testing.T) {
for _, tt := range tests {
stmt := mustParseSelectStatement(tt.stmt)
mapper := openAggMapperOrFail(t, shard, stmt)
mapper := openLocalMapperOrFail(t, shard, stmt)
got := mapper.TagSets()
if !reflect.DeepEqual(got, tt.expected) {
t.Errorf("test '%s'\n\tgot %s\n\texpected %s", tt.stmt, got, tt.expected)
@ -396,10 +491,10 @@ func TestShardMapper_AggMapperTagSets(t *testing.T) {
}
func mustCreateShard(dir string) *Shard {
func mustCreateShard(dir string) *tsdb.Shard {
tmpShard := path.Join(dir, "shard")
index := NewDatabaseIndex()
sh := NewShard(index, tmpShard)
index := tsdb.NewDatabaseIndex()
sh := tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions())
if err := sh.Open(); err != nil {
panic(fmt.Sprintf("error opening shard: %s", err.Error()))
}
@ -415,8 +510,8 @@ func mustParseSelectStatement(s string) *influxql.SelectStatement {
return stmt.(*influxql.SelectStatement)
}
func openRawMapperOrFail(t *testing.T, shard *Shard, stmt *influxql.SelectStatement, chunkSize int) *RawMapper {
mapper := NewRawMapper(shard, stmt, chunkSize)
func openRawMapperOrFail(t *testing.T, shard *tsdb.Shard, stmt *influxql.SelectStatement, chunkSize int) tsdb.Mapper {
mapper := tsdb.NewLocalMapper(shard, stmt, chunkSize)
if err := mapper.Open(); err != nil {
t.Fatalf("failed to open raw mapper: %s", err.Error())
@ -424,7 +519,7 @@ func openRawMapperOrFail(t *testing.T, shard *Shard, stmt *influxql.SelectStatem
return mapper
}
func nextRawChunkAsJson(t *testing.T, mapper *RawMapper) string {
func nextRawChunkAsJson(t *testing.T, mapper tsdb.Mapper) string {
r, err := mapper.NextChunk()
if err != nil {
t.Fatalf("failed to get next chunk from mapper: %s", err.Error())
@ -436,8 +531,8 @@ func nextRawChunkAsJson(t *testing.T, mapper *RawMapper) string {
return string(b)
}
func openAggMapperOrFail(t *testing.T, shard *Shard, stmt *influxql.SelectStatement) *AggMapper {
mapper := NewAggMapper(shard, stmt)
func openLocalMapperOrFail(t *testing.T, shard *tsdb.Shard, stmt *influxql.SelectStatement) *tsdb.LocalMapper {
mapper := tsdb.NewLocalMapper(shard, stmt, 0)
if err := mapper.Open(); err != nil {
t.Fatalf("failed to open aggregate mapper: %s", err.Error())
@ -445,7 +540,7 @@ func openAggMapperOrFail(t *testing.T, shard *Shard, stmt *influxql.SelectStatem
return mapper
}
func aggIntervalAsJson(t *testing.T, mapper *AggMapper) string {
func aggIntervalAsJson(t *testing.T, mapper *tsdb.LocalMapper) string {
r, err := mapper.NextChunk()
if err != nil {
t.Fatalf("failed to get chunk from aggregate mapper: %s", err.Error())

View File

@ -39,6 +39,27 @@ func NewDatabaseIndex() *DatabaseIndex {
}
}
// Names returns a sorted list of measurement names.
func (d *DatabaseIndex) Names() []string {
d.mu.RLock()
defer d.mu.RUnlock()
return d.names
}
// Series returns a series by key.
func (d *DatabaseIndex) Series(key string) *Series {
d.mu.RLock()
defer d.mu.RUnlock()
return d.series[key]
}
// SeriesN returns the number of series.
func (d *DatabaseIndex) SeriesN() int {
d.mu.RLock()
defer d.mu.RUnlock()
return len(d.series)
}
// Measurement returns the measurement object from the index by the name
func (d *DatabaseIndex) Measurement(name string) *Measurement {
d.mu.RLock()
@ -55,8 +76,8 @@ func (d *DatabaseIndex) MeasurementSeriesCounts() (nMeasurements int, nSeries in
return
}
// createSeriesIndexIfNotExists adds the series for the given measurement to the index and sets its ID or returns the existing series object
func (s *DatabaseIndex) createSeriesIndexIfNotExists(measurementName string, series *Series) *Series {
// CreateSeriesIndexIfNotExists adds the series for the given measurement to the index and sets its ID or returns the existing series object
func (s *DatabaseIndex) CreateSeriesIndexIfNotExists(measurementName string, series *Series) *Series {
// if there is a measurement for this id, it's already been added
ss := s.series[series.Key]
if ss != nil {
@ -64,7 +85,7 @@ func (s *DatabaseIndex) createSeriesIndexIfNotExists(measurementName string, ser
}
// get or create the measurement index
m := s.createMeasurementIndexIfNotExists(measurementName)
m := s.CreateMeasurementIndexIfNotExists(measurementName)
// set the in memory ID for query processing on this shard
series.id = s.lastID + 1
@ -78,8 +99,8 @@ func (s *DatabaseIndex) createSeriesIndexIfNotExists(measurementName string, ser
return series
}
// createMeasurementIndexIfNotExists creates or retrieves an in memory index object for the measurement
func (s *DatabaseIndex) createMeasurementIndexIfNotExists(name string) *Measurement {
// CreateMeasurementIndexIfNotExists creates or retrieves an in memory index object for the measurement
func (s *DatabaseIndex) CreateMeasurementIndexIfNotExists(name string) *Measurement {
name = unescapeString(name)
m := s.measurements[name]
if m == nil {
@ -263,6 +284,7 @@ func (db *DatabaseIndex) DropSeries(keys []string) {
continue
}
series.measurement.DropSeries(series.id)
delete(db.series, k)
}
}
@ -276,11 +298,10 @@ type Measurement struct {
index *DatabaseIndex
// in-memory index fields
series map[string]*Series // sorted tagset string to the series object
seriesByID map[uint64]*Series // lookup table for series by their id
measurement *Measurement
seriesByTagKeyValue map[string]map[string]seriesIDs // map from tag key to value to sorted set of series ids
seriesIDs seriesIDs // sorted list of series IDs in this measurement
seriesByTagKeyValue map[string]map[string]SeriesIDs // map from tag key to value to sorted set of series ids
seriesIDs SeriesIDs // sorted list of series IDs in this measurement
}
// NewMeasurement allocates and initializes a new Measurement.
@ -290,10 +311,9 @@ func NewMeasurement(name string, idx *DatabaseIndex) *Measurement {
fieldNames: make(map[string]struct{}),
index: idx,
series: make(map[string]*Series),
seriesByID: make(map[uint64]*Series),
seriesByTagKeyValue: make(map[string]map[string]seriesIDs),
seriesIDs: make(seriesIDs, 0),
seriesByTagKeyValue: make(map[string]map[string]SeriesIDs),
seriesIDs: make(SeriesIDs, 0),
}
}
@ -305,6 +325,13 @@ func (m *Measurement) HasField(name string) bool {
return hasField
}
// SeriesByID returns a series by identifier.
func (m *Measurement) SeriesByID(id uint64) *Series {
m.mu.RLock()
defer m.mu.RUnlock()
return m.seriesByID[id]
}
// SeriesKeys returns the keys of every series in this measurement
func (m *Measurement) SeriesKeys() []string {
m.mu.RLock()
@ -321,7 +348,7 @@ func (m *Measurement) ValidateGroupBy(stmt *influxql.SelectStatement) error {
for _, d := range stmt.Dimensions {
switch e := d.Expr.(type) {
case *influxql.VarRef:
if !m.HasTagKey(e.Val) {
if m.HasField(e.Val) {
return fmt.Errorf("can not use field in GROUP BY clause: %s", e.Val)
}
}
@ -353,8 +380,6 @@ func (m *Measurement) AddSeries(s *Series) bool {
return false
}
m.seriesByID[s.id] = s
tagset := string(marshalTags(s.Tags))
m.series[tagset] = s
m.seriesIDs = append(m.seriesIDs, s.id)
// the series ID should always be higher than all others because it's a new
@ -367,7 +392,7 @@ func (m *Measurement) AddSeries(s *Series) bool {
for k, v := range s.Tags {
valueMap := m.seriesByTagKeyValue[k]
if valueMap == nil {
valueMap = make(map[string]seriesIDs)
valueMap = make(map[string]SeriesIDs)
m.seriesByTagKeyValue[k] = valueMap
}
ids := valueMap[v]
@ -392,10 +417,6 @@ func (m *Measurement) DropSeries(seriesID uint64) {
if _, ok := m.seriesByID[seriesID]; !ok {
return
}
s := m.seriesByID[seriesID]
tagset := string(marshalTags(s.Tags))
delete(m.series, tagset)
delete(m.seriesByID, seriesID)
var ids []uint64
@ -407,7 +428,7 @@ func (m *Measurement) DropSeries(seriesID uint64) {
m.seriesIDs = ids
// remove this series id to the tag index on the measurement
// s.seriesByTagKeyValue is defined as map[string]map[string]seriesIDs
// s.seriesByTagKeyValue is defined as map[string]map[string]SeriesIDs
for k, v := range m.seriesByTagKeyValue {
values := v
for kk, vv := range values {
@ -497,7 +518,7 @@ func (m *Measurement) TagSets(stmt *influxql.SelectStatement, dimensions []strin
// Convert the TagSet to a string, so it can be added to a map allowing TagSets to be handled
// as a set.
tagsAsKey := string(marshalTags(tags))
tagsAsKey := string(MarshalTags(tags))
tagSet, ok := tagSets[tagsAsKey]
if !ok {
// This TagSet is new, create a new entry for it.
@ -507,7 +528,7 @@ func (m *Measurement) TagSets(stmt *influxql.SelectStatement, dimensions []strin
tagsForSet[k] = v
}
tagSet.Tags = tagsForSet
tagSet.Key = marshalTags(tagsForSet)
tagSet.Key = MarshalTags(tagsForSet)
}
// Associate the series and filter with the Tagset.
@ -534,11 +555,11 @@ func (m *Measurement) TagSets(stmt *influxql.SelectStatement, dimensions []strin
}
// mergeSeriesFilters merges two sets of filter expressions and culls series IDs.
func mergeSeriesFilters(op influxql.Token, ids seriesIDs, lfilters, rfilters map[uint64]influxql.Expr) (seriesIDs, map[uint64]influxql.Expr) {
func mergeSeriesFilters(op influxql.Token, ids SeriesIDs, lfilters, rfilters map[uint64]influxql.Expr) (SeriesIDs, map[uint64]influxql.Expr) {
// Create a map to hold the final set of series filter expressions.
filters := make(map[uint64]influxql.Expr, 0)
// Resulting list of series IDs
var series seriesIDs
var series SeriesIDs
// Combining logic:
// +==========+==========+==========+=======================+=======================+
@ -603,7 +624,7 @@ func mergeSeriesFilters(op influxql.Token, ids seriesIDs, lfilters, rfilters map
// idsForExpr will return a collection of series ids and a filter expression that should
// be used to filter points from those series.
func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, influxql.Expr, error) {
func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (SeriesIDs, influxql.Expr, error) {
name, ok := n.LHS.(*influxql.VarRef)
value := n.RHS
if !ok {
@ -632,20 +653,20 @@ func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, influxql.Ex
// if we're looking for series with a specific tag value
if str, ok := value.(*influxql.StringLiteral); ok {
var ids seriesIDs
var ids SeriesIDs
if n.Op == influxql.EQ {
// return series that have a tag of specific value.
ids = tagVals[str.Val]
} else if n.Op == influxql.NEQ {
ids = m.seriesIDs.reject(tagVals[str.Val])
ids = m.seriesIDs.Reject(tagVals[str.Val])
}
return ids, &influxql.BooleanLiteral{Val: true}, nil
}
// if we're looking for series with a tag value that matches a regex
if re, ok := value.(*influxql.RegexLiteral); ok {
var ids seriesIDs
var ids SeriesIDs
// The operation is a NEQREGEX, code must start by assuming all match, even
// series without any tags.
@ -657,9 +678,9 @@ func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, influxql.Ex
match := re.Val.MatchString(k)
if match && n.Op == influxql.EQREGEX {
ids = ids.union(tagVals[k])
ids = ids.Union(tagVals[k])
} else if match && n.Op == influxql.NEQREGEX {
ids = ids.reject(tagVals[k])
ids = ids.Reject(tagVals[k])
}
}
return ids, &influxql.BooleanLiteral{Val: true}, nil
@ -671,7 +692,7 @@ func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, influxql.Ex
// walkWhereForSeriesIds recursively walks the WHERE clause and returns an ordered set of series IDs and
// a map from those series IDs to filter expressions that should be used to limit points returned in
// the final query result.
func (m *Measurement) walkWhereForSeriesIds(expr influxql.Expr) (seriesIDs, map[uint64]influxql.Expr, error) {
func (m *Measurement) walkWhereForSeriesIds(expr influxql.Expr) (SeriesIDs, map[uint64]influxql.Expr, error) {
switch n := expr.(type) {
case *influxql.BinaryExpr:
switch n.Op {
@ -702,12 +723,12 @@ func (m *Measurement) walkWhereForSeriesIds(expr influxql.Expr) (seriesIDs, map[
}
// Combine the series IDs from the LHS and RHS.
var ids seriesIDs
var ids SeriesIDs
switch n.Op {
case influxql.AND:
ids = lids.intersect(rids)
ids = lids.Intersect(rids)
case influxql.OR:
ids = lids.union(rids)
ids = lids.Union(rids)
}
// Merge the filter expressions for the LHS and RHS.
@ -785,7 +806,7 @@ func expandExprWithValues(expr influxql.Expr, keys []string, tagExprs []tagExpr,
// seriesIDsAllOrByExpr walks an expressions for matching series IDs
// or, if no expressions is given, returns all series IDs for the measurement.
func (m *Measurement) seriesIDsAllOrByExpr(expr influxql.Expr) (seriesIDs, error) {
func (m *Measurement) seriesIDsAllOrByExpr(expr influxql.Expr) (SeriesIDs, error) {
// If no expression given or the measurement has no series,
// we can take just return the ids or nil accordingly.
if expr == nil {
@ -997,16 +1018,16 @@ func (s *Series) match(tags map[string]string) bool {
return true
}
// seriesIDs is a convenience type for sorting, checking equality, and doing
// SeriesIDs is a convenience type for sorting, checking equality, and doing
// union and intersection of collections of series ids.
type seriesIDs []uint64
type SeriesIDs []uint64
func (a seriesIDs) Len() int { return len(a) }
func (a seriesIDs) Less(i, j int) bool { return a[i] < a[j] }
func (a seriesIDs) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a SeriesIDs) Len() int { return len(a) }
func (a SeriesIDs) Less(i, j int) bool { return a[i] < a[j] }
func (a SeriesIDs) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
// equals assumes that both are sorted.
func (a seriesIDs) equals(other seriesIDs) bool {
// Equals assumes that both are sorted.
func (a SeriesIDs) Equals(other SeriesIDs) bool {
if len(a) != len(other) {
return false
}
@ -1018,9 +1039,9 @@ func (a seriesIDs) equals(other seriesIDs) bool {
return true
}
// intersect returns a new collection of series ids in sorted order that is the intersection of the two.
// Intersect returns a new collection of series ids in sorted order that is the intersection of the two.
// The two collections must already be sorted.
func (a seriesIDs) intersect(other seriesIDs) seriesIDs {
func (a SeriesIDs) Intersect(other SeriesIDs) SeriesIDs {
l := a
r := other
@ -1047,12 +1068,12 @@ func (a seriesIDs) intersect(other seriesIDs) seriesIDs {
}
}
return seriesIDs(ids)
return SeriesIDs(ids)
}
// union returns a new collection of series ids in sorted order that is the union of the two.
// Union returns a new collection of series ids in sorted order that is the union of the two.
// The two collections must already be sorted.
func (a seriesIDs) union(other seriesIDs) seriesIDs {
func (a SeriesIDs) Union(other SeriesIDs) SeriesIDs {
l := a
r := other
ids := make([]uint64, 0, len(l)+len(r))
@ -1081,9 +1102,9 @@ func (a seriesIDs) union(other seriesIDs) seriesIDs {
return ids
}
// reject returns a new collection of series ids in sorted order with the passed in set removed from the original.
// Reject returns a new collection of series ids in sorted order with the passed in set removed from the original.
// This is useful for the NOT operator. The two collections must already be sorted.
func (a seriesIDs) reject(other seriesIDs) seriesIDs {
func (a SeriesIDs) Reject(other SeriesIDs) SeriesIDs {
l := a
r := other
var i, j int
@ -1106,7 +1127,7 @@ func (a seriesIDs) reject(other seriesIDs) seriesIDs {
ids = append(ids, l[i:]...)
}
return seriesIDs(ids)
return SeriesIDs(ids)
}
// TagFilter represents a tag filter when looking up other tags or measurements.
@ -1118,7 +1139,7 @@ type TagFilter struct {
}
// used to convert the tag set to bytes for use as a lookup key
func marshalTags(tags map[string]string) []byte {
func MarshalTags(tags map[string]string) []byte {
// Empty maps marshal to empty bytes.
if len(tags) == 0 {
return nil
@ -1169,6 +1190,13 @@ func (m *Measurement) TagKeys() []string {
return keys
}
// SetFieldName adds the field name to the measurement.
func (m *Measurement) SetFieldName(name string) {
m.mu.Lock()
m.fieldNames[name] = struct{}{}
m.mu.Unlock()
}
// FieldNames returns a list of the measurement's field names
func (m *Measurement) FieldNames() (a []string) {
m.mu.RLock()
@ -1180,7 +1208,7 @@ func (m *Measurement) FieldNames() (a []string) {
return
}
func (m *Measurement) tagValuesByKeyAndSeriesID(tagKeys []string, ids seriesIDs) map[string]stringSet {
func (m *Measurement) tagValuesByKeyAndSeriesID(tagKeys []string, ids SeriesIDs) map[string]stringSet {
// If no tag keys were passed, get all tag keys for the measurement.
if len(tagKeys) == 0 {
for k := range m.seriesByTagKeyValue {
@ -1221,9 +1249,11 @@ func newStringSet() stringSet {
return make(map[string]struct{})
}
// add adds a string to the set.
func (s stringSet) add(ss string) {
s[ss] = struct{}{}
// add adds strings to the set.
func (s stringSet) add(ss ...string) {
for _, n := range ss {
s[n] = struct{}{}
}
}
// contains returns whether the set contains the given string.
@ -1270,7 +1300,7 @@ func (s stringSet) intersect(o stringSet) stringSet {
return ns
}
func measurementFromSeriesKey(key string) string {
func MeasurementFromSeriesKey(key string) string {
idx := strings.Index(key, ",")
if idx == -1 {
return key

View File

@ -1,4 +1,4 @@
package tsdb
package tsdb_test
import (
"bytes"
@ -6,86 +6,87 @@ import (
"testing"
"github.com/influxdb/influxdb/influxql"
"github.com/influxdb/influxdb/tsdb"
)
// Test comparing seriesIDs for equality.
func Test_seriesIDs_equals(t *testing.T) {
ids1 := seriesIDs{1, 2, 3}
ids2 := seriesIDs{1, 2, 3}
ids3 := seriesIDs{4, 5, 6}
// Test comparing SeriesIDs for equality.
func Test_SeriesIDs_Equals(t *testing.T) {
ids1 := tsdb.SeriesIDs{1, 2, 3}
ids2 := tsdb.SeriesIDs{1, 2, 3}
ids3 := tsdb.SeriesIDs{4, 5, 6}
if !ids1.equals(ids2) {
if !ids1.Equals(ids2) {
t.Fatal("expected ids1 == ids2")
} else if ids1.equals(ids3) {
} else if ids1.Equals(ids3) {
t.Fatal("expected ids1 != ids3")
}
}
// Test intersecting sets of seriesIDs.
func Test_seriesIDs_intersect(t *testing.T) {
// Test intersecting sets of SeriesIDs.
func Test_SeriesIDs_Intersect(t *testing.T) {
// Test swaping l & r, all branches of if-else, and exit loop when 'j < len(r)'
ids1 := seriesIDs{1, 3, 4, 5, 6}
ids2 := seriesIDs{1, 2, 3, 7}
exp := seriesIDs{1, 3}
got := ids1.intersect(ids2)
ids1 := tsdb.SeriesIDs{1, 3, 4, 5, 6}
ids2 := tsdb.SeriesIDs{1, 2, 3, 7}
exp := tsdb.SeriesIDs{1, 3}
got := ids1.Intersect(ids2)
if !exp.equals(got) {
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
// Test exit for loop when 'i < len(l)'
ids1 = seriesIDs{1}
ids2 = seriesIDs{1, 2}
exp = seriesIDs{1}
got = ids1.intersect(ids2)
ids1 = tsdb.SeriesIDs{1}
ids2 = tsdb.SeriesIDs{1, 2}
exp = tsdb.SeriesIDs{1}
got = ids1.Intersect(ids2)
if !exp.equals(got) {
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
}
// Test union sets of seriesIDs.
func Test_seriesIDs_union(t *testing.T) {
// Test union sets of SeriesIDs.
func Test_SeriesIDs_Union(t *testing.T) {
// Test all branches of if-else, exit loop because of 'j < len(r)', and append remainder from left.
ids1 := seriesIDs{1, 2, 3, 7}
ids2 := seriesIDs{1, 3, 4, 5, 6}
exp := seriesIDs{1, 2, 3, 4, 5, 6, 7}
got := ids1.union(ids2)
ids1 := tsdb.SeriesIDs{1, 2, 3, 7}
ids2 := tsdb.SeriesIDs{1, 3, 4, 5, 6}
exp := tsdb.SeriesIDs{1, 2, 3, 4, 5, 6, 7}
got := ids1.Union(ids2)
if !exp.equals(got) {
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
// Test exit because of 'i < len(l)' and append remainder from right.
ids1 = seriesIDs{1}
ids2 = seriesIDs{1, 2}
exp = seriesIDs{1, 2}
got = ids1.union(ids2)
ids1 = tsdb.SeriesIDs{1}
ids2 = tsdb.SeriesIDs{1, 2}
exp = tsdb.SeriesIDs{1, 2}
got = ids1.Union(ids2)
if !exp.equals(got) {
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
}
// Test removing one set of seriesIDs from another.
func Test_seriesIDs_reject(t *testing.T) {
// Test removing one set of SeriesIDs from another.
func Test_SeriesIDs_Reject(t *testing.T) {
// Test all branches of if-else, exit loop because of 'j < len(r)', and append remainder from left.
ids1 := seriesIDs{1, 2, 3, 7}
ids2 := seriesIDs{1, 3, 4, 5, 6}
exp := seriesIDs{2, 7}
got := ids1.reject(ids2)
ids1 := tsdb.SeriesIDs{1, 2, 3, 7}
ids2 := tsdb.SeriesIDs{1, 3, 4, 5, 6}
exp := tsdb.SeriesIDs{2, 7}
got := ids1.Reject(ids2)
if !exp.equals(got) {
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
// Test exit because of 'i < len(l)'.
ids1 = seriesIDs{1}
ids2 = seriesIDs{1, 2}
exp = seriesIDs{}
got = ids1.reject(ids2)
ids1 = tsdb.SeriesIDs{1}
ids2 = tsdb.SeriesIDs{1, 2}
exp = tsdb.SeriesIDs{}
got = ids1.Reject(ids2)
if !exp.equals(got) {
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
}
@ -113,7 +114,7 @@ func TestMarshalTags(t *testing.T) {
result: []byte(`baz|foo|battttt|bar`),
},
} {
result := marshalTags(tt.tags)
result := tsdb.MarshalTags(tt.tags)
if !bytes.Equal(result, tt.result) {
t.Fatalf("%d. unexpected result: exp=%s, got=%s", i, tt.result, result)
}
@ -137,7 +138,7 @@ func benchmarkMarshalTags(b *testing.B, keyN int) {
// Unmarshal map into byte slice.
b.ReportAllocs()
for i := 0; i < b.N; i++ {
marshalTags(tags)
tsdb.MarshalTags(tags)
}
}
@ -154,23 +155,23 @@ func BenchmarkCreateSeriesIndex_1M(b *testing.B) {
}
func benchmarkCreateSeriesIndex(b *testing.B, series []*TestSeries) {
idxs := make([]*DatabaseIndex, 0, b.N)
idxs := make([]*tsdb.DatabaseIndex, 0, b.N)
for i := 0; i < b.N; i++ {
idxs = append(idxs, NewDatabaseIndex())
idxs = append(idxs, tsdb.NewDatabaseIndex())
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
idx := idxs[n]
for _, s := range series {
idx.createSeriesIndexIfNotExists(s.Measurement, s.Series)
idx.CreateSeriesIndexIfNotExists(s.Measurement, s.Series)
}
}
}
type TestSeries struct {
Measurement string
Series *Series
Series *tsdb.Series
}
func genTestSeries(mCnt, tCnt, vCnt int) []*TestSeries {
@ -181,8 +182,8 @@ func genTestSeries(mCnt, tCnt, vCnt int) []*TestSeries {
for _, ts := range tagSets {
series = append(series, &TestSeries{
Measurement: m,
Series: &Series{
Key: fmt.Sprintf("%s:%s", m, string(marshalTags(ts))),
Series: &tsdb.Series{
Key: fmt.Sprintf("%s:%s", m, string(tsdb.MarshalTags(ts))),
Tags: ts,
},
})

View File

@ -37,6 +37,13 @@ type Point interface {
String() string
}
// Points represents a sortable list of points by timestamp.
type Points []Point
func (a Points) Len() int { return len(a) }
func (a Points) Less(i, j int) bool { return a[i].Time().Before(a[j].Time()) }
func (a Points) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
// point is the default implementation of Point.
type point struct {
time time.Time
@ -109,7 +116,7 @@ func ParsePointsWithPrecision(buf []byte, defaultTime time.Time, precision strin
block []byte
)
for {
pos, block = scanTo(buf, pos, '\n')
pos, block = scanLine(buf, pos)
pos += 1
if len(block) == 0 {
@ -117,7 +124,14 @@ func ParsePointsWithPrecision(buf []byte, defaultTime time.Time, precision strin
}
// lines which start with '#' are comments
if start := skipWhitespace(block, 0); block[start] == '#' {
start := skipWhitespace(block, 0)
// If line is all whitespace, just skip it
if start >= len(block) {
continue
}
if block[start] == '#' {
continue
}
@ -222,6 +236,10 @@ func scanKey(buf []byte, i int) (int, []byte, error) {
}
if buf[i] == '=' {
if i-1 < 0 || i-2 < 0 {
return i, buf[start:i], fmt.Errorf("missing tag name")
}
// Check for "cpu,=value" but allow "cpu,a\,=value"
if buf[i-1] == ',' && buf[i-2] != '\\' {
return i, buf[start:i], fmt.Errorf("missing tag name")
@ -254,6 +272,13 @@ func scanKey(buf []byte, i int) (int, []byte, error) {
return i, buf[start:i], fmt.Errorf("missing tag value")
}
i += 1
// grow our indices slice if we have too many tags
if commas >= len(indices) {
newIndics := make([]int, cap(indices)*2)
copy(newIndics, indices)
indices = newIndics
}
indices[commas] = i
commas += 1
@ -273,6 +298,14 @@ func scanKey(buf []byte, i int) (int, []byte, error) {
if equals > 0 && commas-1 != equals-1 {
return i, buf[start:i], fmt.Errorf("missing tag value")
}
// grow our indices slice if we have too many tags
if commas >= len(indices) {
newIndics := make([]int, cap(indices)*2)
copy(newIndics, indices)
indices = newIndics
}
indices[commas] = i + 1
break
}
@ -286,6 +319,12 @@ func scanKey(buf []byte, i int) (int, []byte, error) {
return i, buf[start:i], fmt.Errorf("invalid tag format")
}
// This check makes sure we actually received fields from the user. #3379
// This will catch invalid syntax such as: `cpu,host=serverA,region=us-west`
if i >= len(buf) {
return i, buf[start:i], fmt.Errorf("missing fields")
}
// Now we know where the key region is within buf, and the locations of tags, we
// need to deterimine if duplicate tags exist and if the tags are sorted. This iterates
// 1/2 of the list comparing each end with each other, walking towards the center from
@ -408,21 +447,20 @@ func scanFields(buf []byte, i int) (int, []byte, error) {
if isNumeric(buf[i+1]) || buf[i+1] == '-' || buf[i+1] == 'N' || buf[i+1] == 'n' {
var err error
i, _, err = scanNumber(buf, i+1)
i, err = scanNumber(buf, i+1)
if err != nil {
return i, buf[start:i], err
} else {
continue
}
// If next byte is not a double-quote, the value must be a boolean
} else if buf[i+1] != '"' {
continue
}
// If next byte is not a double-quote, the value must be a boolean
if buf[i+1] != '"' {
var err error
i, _, err = scanBoolean(buf, i+1)
if err != nil {
return i, buf[start:i], err
} else {
continue
}
continue
}
}
@ -483,8 +521,9 @@ func isNumeric(b byte) bool {
// scanNumber returns the end position within buf, start at i after
// scanning over buf for an integer, or float. It returns an
// error if a invalid number is scanned.
func scanNumber(buf []byte, i int) (int, []byte, error) {
func scanNumber(buf []byte, i int) (int, error) {
start := i
var isInt bool
// Is negative number?
if i < len(buf) && buf[i] == '-' {
@ -506,13 +545,19 @@ func scanNumber(buf []byte, i int) (int, []byte, error) {
break
}
if buf[i] == 'i' && i > start && !isInt {
isInt = true
i += 1
continue
}
if buf[i] == '.' {
decimals += 1
}
// Can't have more than 1 decimal (e.g. 1.1.1 should fail)
if decimals > 1 {
return i, buf[start:i], fmt.Errorf("invalid number")
return i, fmt.Errorf("invalid number")
}
// `e` is valid for floats but not as the first char
@ -534,36 +579,44 @@ func scanNumber(buf []byte, i int) (int, []byte, error) {
i += 3
continue
}
return i, buf[start:i], fmt.Errorf("invalid number")
return i, fmt.Errorf("invalid number")
}
if !isNumeric(buf[i]) {
return i, buf[start:i], fmt.Errorf("invalid number")
return i, fmt.Errorf("invalid number")
}
i += 1
}
if isInt && (decimals > 0 || scientific) {
return i, fmt.Errorf("invalid number")
}
// It's more common that numbers will be within min/max range for their type but we need to prevent
// out or range numbers from being parsed successfully. This uses some simple heuristics to decide
// if we should parse the number to the actual type. It does not do it all the time because it incurs
// extra allocations and we end up converting the type again when writing points to disk.
if decimals == 0 {
if isInt {
// Make sure the last char is an 'i' for integers (e.g. 9i10 is not valid)
if buf[i-1] != 'i' {
return i, fmt.Errorf("invalid number")
}
// Parse the int to check bounds the number of digits could be larger than the max range
if len(buf[start:i]) >= maxInt64Digits || len(buf[start:i]) >= minInt64Digits {
if _, err := strconv.ParseInt(string(buf[start:i]), 10, 64); err != nil {
return i, buf[start:i], fmt.Errorf("invalid integer")
// We subtract 1 from the index to remove the `i` from our tests
if len(buf[start:i-1]) >= maxInt64Digits || len(buf[start:i-1]) >= minInt64Digits {
if _, err := strconv.ParseInt(string(buf[start:i-1]), 10, 64); err != nil {
return i, fmt.Errorf("unable to parse integer %s: %s", buf[start:i-1], err)
}
}
} else {
// Parse the float to check bounds if it's scientific or the number of digits could be larger than the max range
if scientific || len(buf[start:i]) >= maxFloat64Digits || len(buf[start:i]) >= minFloat64Digits {
if _, err := strconv.ParseFloat(string(buf[start:i]), 10); err != nil {
return i, buf[start:i], fmt.Errorf("invalid float")
return i, fmt.Errorf("invalid float")
}
}
}
return i, buf[start:i], nil
return i, nil
}
// scanBoolean returns the end position within buf, start at i after
@ -633,10 +686,6 @@ func skipWhitespace(buf []byte, i int) int {
return i
}
if buf[i] == '\\' {
i += 2
continue
}
if buf[i] == ' ' || buf[i] == '\t' {
i += 1
continue
@ -646,6 +695,39 @@ func skipWhitespace(buf []byte, i int) int {
return i
}
// scanLine returns the end position in buf and the next line found within
// buf.
func scanLine(buf []byte, i int) (int, []byte) {
start := i
quoted := false
for {
// reached the end of buf?
if i >= len(buf) {
break
}
// If we see a double quote, makes sure it is not escaped
if buf[i] == '"' && buf[i-1] != '\\' {
i += 1
quoted = !quoted
continue
}
if buf[i] == '\\' {
i += 2
continue
}
if buf[i] == '\n' && !quoted {
break
}
i += 1
}
return i, buf[start:i]
}
// scanTo returns the end position in buf and the next consecutive block
// of bytes, starting from i and ending with stop byte. If there are leading
// spaces or escaped chars, they are skipped.
@ -791,7 +873,7 @@ func unescapeQuoteString(in string) string {
// NewPoint returns a new point with the given measurement name, tags, fields and timestamp
func NewPoint(name string, tags Tags, fields Fields, time time.Time) Point {
return &point{
key: makeKey([]byte(name), tags),
key: MakeKey([]byte(name), tags),
time: time,
fields: fields.MarshalBinary(),
}
@ -821,7 +903,7 @@ func (p *point) Name() string {
// SetName updates the measurement name for the point
func (p *point) SetName(name string) {
p.key = makeKey([]byte(name), p.Tags())
p.key = MakeKey([]byte(name), p.Tags())
}
// Time return the timestamp for the point
@ -863,20 +945,20 @@ func (p *point) Tags() Tags {
return tags
}
func makeKey(name []byte, tags Tags) []byte {
return append(escape(name), tags.hashKey()...)
func MakeKey(name []byte, tags Tags) []byte {
return append(escape(name), tags.HashKey()...)
}
// SetTags replaces the tags for the point
func (p *point) SetTags(tags Tags) {
p.key = makeKey(p.name(), tags)
p.key = MakeKey(p.name(), tags)
}
// AddTag adds or replaces a tag value for a point
func (p *point) AddTag(key, value string) {
tags := p.Tags()
tags[key] = value
p.key = makeKey(p.name(), tags)
p.key = MakeKey(p.name(), tags)
}
// Fields returns the fields for the point
@ -950,7 +1032,7 @@ func (p *point) UnixNano() int64 {
type Tags map[string]string
func (t Tags) hashKey() []byte {
func (t Tags) HashKey() []byte {
// Empty maps marshal to empty bytes.
if len(t) == 0 {
return nil
@ -995,6 +1077,10 @@ func (t Tags) hashKey() []byte {
type Fields map[string]interface{}
func parseNumber(val []byte) (interface{}, error) {
if val[len(val)-1] == 'i' {
val = val[:len(val)-1]
return strconv.ParseInt(string(val), 10, 64)
}
for i := 0; i < len(val); i++ {
// If there is a decimal or an N (NaN), I (Inf), parse as float
if val[i] == '.' || val[i] == 'N' || val[i] == 'n' || val[i] == 'I' || val[i] == 'i' || val[i] == 'e' {
@ -1004,7 +1090,7 @@ func parseNumber(val []byte) (interface{}, error) {
return string(val), nil
}
}
return strconv.ParseInt(string(val), 10, 64)
return strconv.ParseFloat(string(val), 64)
}
func newFieldsFromBinary(buf []byte) Fields {
@ -1024,6 +1110,7 @@ func newFieldsFromBinary(buf []byte) Fields {
if len(name) == 0 {
continue
}
name = unescape(name)
i, valueBuf = scanFieldValue(buf, i+1)
if len(valueBuf) == 0 {
@ -1051,7 +1138,7 @@ func newFieldsFromBinary(buf []byte) Fields {
panic(fmt.Sprintf("unable to parse bool value '%v': %v\n", string(valueBuf), err))
}
}
fields[string(unescape(name))] = value
fields[string(name)] = value
i += 1
}
return fields
@ -1074,12 +1161,16 @@ func (p Fields) MarshalBinary() []byte {
switch t := v.(type) {
case int:
b = append(b, []byte(strconv.FormatInt(int64(t), 10))...)
b = append(b, 'i')
case int32:
b = append(b, []byte(strconv.FormatInt(int64(t), 10))...)
b = append(b, 'i')
case uint64:
b = append(b, []byte(strconv.FormatUint(t, 10))...)
b = append(b, 'i')
case int64:
b = append(b, []byte(strconv.FormatInt(t, 10))...)
b = append(b, 'i')
case float64:
// ensure there is a decimal in the encoded for

File diff suppressed because it is too large Load Diff

View File

@ -44,13 +44,13 @@ type QueryExecutor struct {
Logger *log.Logger
// the local data store
store *Store
Store *Store
}
// NewQueryExecutor returns an initialized QueryExecutor
func NewQueryExecutor(store *Store) *QueryExecutor {
return &QueryExecutor{
store: store,
Store: store,
Logger: log.New(os.Stderr, "[query] ", log.LstdFlags),
}
}
@ -199,7 +199,7 @@ func (q *QueryExecutor) ExecuteQuery(query *influxql.Query, database string, chu
}
// Plan creates an execution plan for the given SelectStatement and returns an Executor.
func (q *QueryExecutor) plan(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) {
func (q *QueryExecutor) Plan(stmt *influxql.SelectStatement, chunkSize int) (*Executor, error) {
shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers.
// Replace instances of "now()" with the current time, and check the resultant times.
@ -245,31 +245,14 @@ func (q *QueryExecutor) plan(stmt *influxql.SelectStatement, chunkSize int) (Exe
mappers = append(mappers, m)
}
var executor Executor
if len(mappers) > 0 {
// All Mapper are of same type, so check first to determine correct Executor type.
if _, ok := mappers[0].(*RawMapper); ok {
executor = NewRawExecutor(stmt, mappers, chunkSize)
} else {
executor = NewAggregateExecutor(stmt, mappers)
}
} else {
// With no mappers, the Executor type doesn't matter.
executor = NewRawExecutor(stmt, nil, chunkSize)
}
executor := NewExecutor(stmt, mappers, chunkSize)
return executor, nil
}
// executeSelectStatement plans and executes a select statement against a database.
func (q *QueryExecutor) executeSelectStatement(statementID int, stmt *influxql.SelectStatement, results chan *influxql.Result, chunkSize int) error {
// Perform any necessary query re-writing.
stmt, err := q.rewriteSelectStatement(stmt)
if err != nil {
return err
}
// Plan statement execution.
e, err := q.plan(stmt, chunkSize)
e, err := q.Plan(stmt, chunkSize)
if err != nil {
return err
}
@ -282,10 +265,9 @@ func (q *QueryExecutor) executeSelectStatement(statementID int, stmt *influxql.S
for row := range ch {
if row.Err != nil {
return row.Err
} else {
resultSent = true
results <- &influxql.Result{StatementID: statementID, Series: []*influxql.Row{row}}
}
resultSent = true
results <- &influxql.Result{StatementID: statementID, Series: []*influxql.Row{row}}
}
if !resultSent {
@ -295,85 +277,6 @@ func (q *QueryExecutor) executeSelectStatement(statementID int, stmt *influxql.S
return nil
}
// rewriteSelectStatement performs any necessary query re-writing.
func (q *QueryExecutor) rewriteSelectStatement(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) {
var err error
// Expand regex expressions in the FROM clause.
sources, err := q.expandSources(stmt.Sources)
if err != nil {
return nil, err
}
stmt.Sources = sources
// Expand wildcards in the fields or GROUP BY.
if stmt.HasWildcard() {
stmt, err = q.expandWildcards(stmt)
if err != nil {
return nil, err
}
}
stmt.RewriteDistinct()
return stmt, nil
}
// expandWildcards returns a new SelectStatement with wildcards in the fields
// and/or GROUP BY expanded with actual field names.
func (q *QueryExecutor) expandWildcards(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) {
// If there are no wildcards in the statement, return it as-is.
if !stmt.HasWildcard() {
return stmt, nil
}
// Use sets to avoid duplicate field names.
fieldSet := map[string]struct{}{}
dimensionSet := map[string]struct{}{}
var fields influxql.Fields
var dimensions influxql.Dimensions
// Iterate measurements in the FROM clause getting the fields & dimensions for each.
for _, src := range stmt.Sources {
if m, ok := src.(*influxql.Measurement); ok {
// Lookup the database. The database may not exist if no data for this database
// was ever written to the shard.
db := q.store.DatabaseIndex(m.Database)
if db == nil {
return stmt, nil
}
// Lookup the measurement in the database.
mm := db.measurements[m.Name]
if mm == nil {
return nil, ErrMeasurementNotFound(m.String())
}
// Get the fields for this measurement.
for _, name := range mm.FieldNames() {
if _, ok := fieldSet[name]; ok {
continue
}
fieldSet[name] = struct{}{}
fields = append(fields, &influxql.Field{Expr: &influxql.VarRef{Val: name}})
}
// Get the dimensions for this measurement.
for _, t := range mm.TagKeys() {
if _, ok := dimensionSet[t]; ok {
continue
}
dimensionSet[t] = struct{}{}
dimensions = append(dimensions, &influxql.Dimension{Expr: &influxql.VarRef{Val: t}})
}
}
}
// Return a new SelectStatement with the wild cards rewritten.
return stmt.RewriteWildcards(fields, dimensions), nil
}
// expandSources expands regex sources and removes duplicates.
// NOTE: sources must be normalized (db and rp set) before calling this function.
func (q *QueryExecutor) expandSources(sources influxql.Sources) (influxql.Sources, error) {
@ -394,7 +297,7 @@ func (q *QueryExecutor) expandSources(sources influxql.Sources) (influxql.Source
}
// Lookup the database.
db := q.store.DatabaseIndex(src.Database)
db := q.Store.DatabaseIndex(src.Database)
if db == nil {
return nil, nil
}
@ -453,7 +356,7 @@ func (q *QueryExecutor) executeDropDatabaseStatement(stmt *influxql.DropDatabase
}
}
err = q.store.DeleteDatabase(stmt.Name, shardIDs)
err = q.Store.DeleteDatabase(stmt.Name, shardIDs)
if err != nil {
return &influxql.Result{Err: err}
}
@ -464,7 +367,7 @@ func (q *QueryExecutor) executeDropDatabaseStatement(stmt *influxql.DropDatabase
// executeDropMeasurementStatement removes the measurement and all series data from the local store for the given measurement
func (q *QueryExecutor) executeDropMeasurementStatement(stmt *influxql.DropMeasurementStatement, database string) *influxql.Result {
// Find the database.
db := q.store.DatabaseIndex(database)
db := q.Store.DatabaseIndex(database)
if db == nil {
return &influxql.Result{}
}
@ -478,7 +381,7 @@ func (q *QueryExecutor) executeDropMeasurementStatement(stmt *influxql.DropMeasu
db.DropMeasurement(m.Name)
// now drop the raw data
if err := q.store.deleteMeasurement(m.Name, m.SeriesKeys()); err != nil {
if err := q.Store.deleteMeasurement(m.Name, m.SeriesKeys()); err != nil {
return &influxql.Result{Err: err}
}
@ -488,7 +391,7 @@ func (q *QueryExecutor) executeDropMeasurementStatement(stmt *influxql.DropMeasu
// executeDropSeriesStatement removes all series from the local store that match the drop query
func (q *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStatement, database string) *influxql.Result {
// Find the database.
db := q.store.DatabaseIndex(database)
db := q.Store.DatabaseIndex(database)
if db == nil {
return &influxql.Result{}
}
@ -506,7 +409,7 @@ func (q *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStat
var seriesKeys []string
for _, m := range measurements {
var ids seriesIDs
var ids SeriesIDs
if stmt.Condition != nil {
// Get series IDs that match the WHERE clause.
ids, _, err = m.walkWhereForSeriesIds(stmt.Condition)
@ -524,7 +427,7 @@ func (q *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStat
}
// delete the raw series data
if err := q.store.deleteSeries(seriesKeys); err != nil {
if err := q.Store.deleteSeries(seriesKeys); err != nil {
return &influxql.Result{Err: err}
}
// remove them from the index
@ -535,7 +438,7 @@ func (q *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStat
func (q *QueryExecutor) executeShowSeriesStatement(stmt *influxql.ShowSeriesStatement, database string) *influxql.Result {
// Find the database.
db := q.store.DatabaseIndex(database)
db := q.Store.DatabaseIndex(database)
if db == nil {
return &influxql.Result{}
}
@ -559,7 +462,7 @@ func (q *QueryExecutor) executeShowSeriesStatement(stmt *influxql.ShowSeriesStat
// Loop through measurements to build result. One result row / measurement.
for _, m := range measurements {
var ids seriesIDs
var ids SeriesIDs
if stmt.Condition != nil {
// Get series IDs that match the WHERE clause.
@ -646,7 +549,7 @@ func (q *QueryExecutor) filterShowSeriesResult(limit, offset int, rows influxql.
func (q *QueryExecutor) executeShowMeasurementsStatement(stmt *influxql.ShowMeasurementsStatement, database string) *influxql.Result {
// Find the database.
db := q.store.DatabaseIndex(database)
db := q.Store.DatabaseIndex(database)
if db == nil {
return &influxql.Result{}
}
@ -705,7 +608,7 @@ func (q *QueryExecutor) executeShowMeasurementsStatement(stmt *influxql.ShowMeas
func (q *QueryExecutor) executeShowTagKeysStatement(stmt *influxql.ShowTagKeysStatement, database string) *influxql.Result {
// Find the database.
db := q.store.DatabaseIndex(database)
db := q.Store.DatabaseIndex(database)
if db == nil {
return &influxql.Result{}
}
@ -758,7 +661,7 @@ func (q *QueryExecutor) executeShowTagKeysStatement(stmt *influxql.ShowTagKeysSt
func (q *QueryExecutor) executeShowTagValuesStatement(stmt *influxql.ShowTagValuesStatement, database string) *influxql.Result {
// Find the database.
db := q.store.DatabaseIndex(database)
db := q.Store.DatabaseIndex(database)
if db == nil {
return &influxql.Result{}
}
@ -782,7 +685,7 @@ func (q *QueryExecutor) executeShowTagValuesStatement(stmt *influxql.ShowTagValu
tagValues := make(map[string]stringSet)
for _, m := range measurements {
var ids seriesIDs
var ids SeriesIDs
if stmt.Condition != nil {
// Get series IDs that match the WHERE clause.
@ -836,7 +739,7 @@ func (q *QueryExecutor) executeShowFieldKeysStatement(stmt *influxql.ShowFieldKe
var err error
// Find the database.
db := q.store.DatabaseIndex(database)
db := q.Store.DatabaseIndex(database)
if db == nil {
return &influxql.Result{}
}

View File

@ -1,6 +1,7 @@
package tsdb
package tsdb_test
import (
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
@ -10,6 +11,7 @@ import (
"github.com/influxdb/influxdb/influxql"
"github.com/influxdb/influxdb/meta"
"github.com/influxdb/influxdb/tsdb"
)
var sgID = uint64(2)
@ -17,10 +19,10 @@ var shardID = uint64(1)
func TestWritePointsAndExecuteQuery(t *testing.T) {
store, executor := testStoreAndExecutor()
defer os.RemoveAll(store.path)
defer os.RemoveAll(store.Path())
// Write first point.
if err := store.WriteToShard(shardID, []Point{NewPoint(
if err := store.WriteToShard(shardID, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
@ -30,7 +32,7 @@ func TestWritePointsAndExecuteQuery(t *testing.T) {
}
// Write second point.
if err := store.WriteToShard(shardID, []Point{NewPoint(
if err := store.WriteToShard(shardID, []tsdb.Point{tsdb.NewPoint(
"cpu",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
@ -39,100 +41,90 @@ func TestWritePointsAndExecuteQuery(t *testing.T) {
t.Fatalf(err.Error())
}
got := executeAndGetJSON("select * from cpu", executor)
exepected := `[{"series":[{"name":"cpu","tags":{"host":"server"},"columns":["time","value"],"values":[["1970-01-01T00:00:01.000000002Z",1],["1970-01-01T00:00:02.000000003Z",1]]}]}]`
got := executeAndGetJSON("SELECT * FROM cpu", executor)
exepected := `[{"series":[{"name":"cpu","columns":["time","host","value"],"values":[["1970-01-01T00:00:01.000000002Z","server",1],["1970-01-01T00:00:02.000000003Z","server",1]]}]}]`
if exepected != got {
t.Fatalf("exp: %s\ngot: %s", exepected, got)
t.Fatalf("\nexp: %s\ngot: %s", exepected, got)
}
got = executeAndGetJSON("SELECT * FROM cpu GROUP BY *", executor)
exepected = `[{"series":[{"name":"cpu","tags":{"host":"server"},"columns":["time","value"],"values":[["1970-01-01T00:00:01.000000002Z",1],["1970-01-01T00:00:02.000000003Z",1]]}]}]`
if exepected != got {
t.Fatalf("\nexp: %s\ngot: %s", exepected, got)
}
store.Close()
store = NewStore(store.path)
store = tsdb.NewStore(store.Path())
if err := store.Open(); err != nil {
t.Fatalf(err.Error())
}
executor.store = store
executor.Store = store
executor.ShardMapper = &testShardMapper{store: store}
got = executeAndGetJSON("select * from cpu", executor)
got = executeAndGetJSON("SELECT * FROM cpu GROUP BY *", executor)
if exepected != got {
t.Fatalf("exp: %s\ngot: %s", exepected, got)
t.Fatalf("\nexp: %s\ngot: %s", exepected, got)
}
}
// Ensure that points can be written and flushed even after a restart.
func TestWritePointsAndExecuteQuery_FlushRestart(t *testing.T) {
// Ensure writing a point and updating it results in only a single point.
func TestWritePointsAndExecuteQuery_Update(t *testing.T) {
store, executor := testStoreAndExecutor()
defer os.RemoveAll(store.path)
defer os.RemoveAll(store.Path())
// Write first point.
if err := store.WriteToShard(shardID, []Point{NewPoint(
"cpu",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
time.Unix(1, 2),
// Write original point.
if err := store.WriteToShard(1, []tsdb.Point{tsdb.NewPoint(
"temperature",
map[string]string{},
map[string]interface{}{"value": 100.0},
time.Unix(0, 0),
)}); err != nil {
t.Fatalf(err.Error())
}
// Write second point.
if err := store.WriteToShard(shardID, []Point{NewPoint(
"cpu",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
time.Unix(2, 3),
)}); err != nil {
t.Fatalf(err.Error())
}
// Restart the store.
if err := store.Close(); err != nil {
t.Fatal(err)
} else if err = store.Open(); err != nil {
t.Fatal(err)
}
// Flush WAL data to the index.
if err := store.Flush(); err != nil {
t.Fatal(err)
}
got := executeAndGetJSON("select * from cpu", executor)
exepected := `[{"series":[{"name":"cpu","tags":{"host":"server"},"columns":["time","value"],"values":[["1970-01-01T00:00:01.000000002Z",1],["1970-01-01T00:00:02.000000003Z",1]]}]}]`
if exepected != got {
t.Fatalf("exp: %s\ngot: %s", exepected, got)
}
// Restart store.
store.Close()
store = NewStore(store.path)
store = tsdb.NewStore(store.Path())
if err := store.Open(); err != nil {
t.Fatalf(err.Error())
}
executor.store = store
executor.Store = store
executor.ShardMapper = &testShardMapper{store: store}
got = executeAndGetJSON("select * from cpu", executor)
if exepected != got {
t.Fatalf("exp: %s\ngot: %s", exepected, got)
// Rewrite point with new value.
if err := store.WriteToShard(1, []tsdb.Point{tsdb.NewPoint(
"temperature",
map[string]string{},
map[string]interface{}{"value": 200.0},
time.Unix(0, 0),
)}); err != nil {
t.Fatalf(err.Error())
}
got := executeAndGetJSON("select * from temperature", executor)
exp := `[{"series":[{"name":"temperature","columns":["time","value"],"values":[["1970-01-01T00:00:00Z",200]]}]}]`
if exp != got {
t.Fatalf("\n\nexp: %s\ngot: %s", exp, got)
}
}
func TestDropSeriesStatement(t *testing.T) {
store, executor := testStoreAndExecutor()
defer os.RemoveAll(store.path)
defer os.RemoveAll(store.Path())
pt := NewPoint(
pt := tsdb.NewPoint(
"cpu",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
time.Unix(1, 2),
)
err := store.WriteToShard(shardID, []Point{pt})
err := store.WriteToShard(shardID, []tsdb.Point{pt})
if err != nil {
t.Fatalf(err.Error())
}
got := executeAndGetJSON("select * from cpu", executor)
got := executeAndGetJSON("SELECT * FROM cpu GROUP BY *", executor)
exepected := `[{"series":[{"name":"cpu","tags":{"host":"server"},"columns":["time","value"],"values":[["1970-01-01T00:00:01.000000002Z",1]]}]}]`
if exepected != got {
t.Fatalf("exp: %s\ngot: %s", exepected, got)
@ -140,7 +132,7 @@ func TestDropSeriesStatement(t *testing.T) {
got = executeAndGetJSON("drop series from cpu", executor)
got = executeAndGetJSON("select * from cpu", executor)
got = executeAndGetJSON("SELECT * FROM cpu GROUP BY *", executor)
exepected = `[{}]`
if exepected != got {
t.Fatalf("exp: %s\ngot: %s", exepected, got)
@ -153,9 +145,9 @@ func TestDropSeriesStatement(t *testing.T) {
}
store.Close()
store = NewStore(store.path)
store = tsdb.NewStore(store.Path())
store.Open()
executor.store = store
executor.Store = store
got = executeAndGetJSON("select * from cpu", executor)
exepected = `[{}]`
@ -172,22 +164,22 @@ func TestDropSeriesStatement(t *testing.T) {
func TestDropMeasurementStatement(t *testing.T) {
store, executor := testStoreAndExecutor()
defer os.RemoveAll(store.path)
defer os.RemoveAll(store.Path())
pt := NewPoint(
pt := tsdb.NewPoint(
"cpu",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
time.Unix(1, 2),
)
pt2 := NewPoint(
pt2 := tsdb.NewPoint(
"memory",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
time.Unix(1, 2),
)
if err := store.WriteToShard(shardID, []Point{pt, pt2}); err != nil {
if err := store.WriteToShard(shardID, []tsdb.Point{pt, pt2}); err != nil {
t.Fatal(err)
}
@ -215,7 +207,7 @@ func TestDropMeasurementStatement(t *testing.T) {
t.Fatalf("exp: %s\ngot: %s", exepected, got)
}
got = executeAndGetJSON("select * from memory", executor)
exepected = `[{"error":"measurement not found: \"foo\".\"foo\".memory"}]`
exepected = `[{}]`
if exepected != got {
t.Fatalf("exp: %s\ngot: %s", exepected, got)
}
@ -223,9 +215,9 @@ func TestDropMeasurementStatement(t *testing.T) {
validateDrop()
store.Close()
store = NewStore(store.path)
store = tsdb.NewStore(store.Path())
store.Open()
executor.store = store
executor.Store = store
validateDrop()
}
@ -240,20 +232,20 @@ func (m *metaExec) ExecuteStatement(stmt influxql.Statement) *influxql.Result {
func TestDropDatabase(t *testing.T) {
store, executor := testStoreAndExecutor()
defer os.RemoveAll(store.path)
defer os.RemoveAll(store.Path())
pt := NewPoint(
pt := tsdb.NewPoint(
"cpu",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
time.Unix(1, 2),
)
if err := store.WriteToShard(shardID, []Point{pt}); err != nil {
if err := store.WriteToShard(shardID, []tsdb.Point{pt}); err != nil {
t.Fatal(err)
}
got := executeAndGetJSON("select * from cpu", executor)
got := executeAndGetJSON("SELECT * FROM cpu GROUP BY *", executor)
expected := `[{"series":[{"name":"cpu","tags":{"host":"server"},"columns":["time","value"],"values":[["1970-01-01T00:00:01.000000002Z",1]]}]}]`
if expected != got {
t.Fatalf("exp: %s\ngot: %s", expected, got)
@ -267,7 +259,7 @@ func TestDropDatabase(t *testing.T) {
executor.MetaStatementExecutor = me
// verify the database is there on disk
dbPath := filepath.Join(store.path, "foo")
dbPath := filepath.Join(store.Path(), "foo")
if _, err := os.Stat(dbPath); err != nil {
t.Fatalf("execpted database dir %s to exist", dbPath)
}
@ -287,12 +279,12 @@ func TestDropDatabase(t *testing.T) {
}
store.Close()
store = NewStore(store.path)
store = tsdb.NewStore(store.Path())
store.Open()
executor.store = store
executor.Store = store
executor.ShardMapper = &testShardMapper{store: store}
if err := store.WriteToShard(shardID, []Point{pt}); err == nil || err.Error() != "shard not found" {
if err := store.WriteToShard(shardID, []tsdb.Point{pt}); err == nil || err.Error() != "shard not found" {
t.Fatalf("expected shard to not be found")
}
}
@ -300,7 +292,7 @@ func TestDropDatabase(t *testing.T) {
// Ensure that queries for which there is no data result in an empty set.
func TestQueryNoData(t *testing.T) {
store, executor := testStoreAndExecutor()
defer os.RemoveAll(store.path)
defer os.RemoveAll(store.Path())
got := executeAndGetJSON("select * from /.*/", executor)
expected := `[{}]`
@ -321,7 +313,7 @@ func TestQueryNoData(t *testing.T) {
// to create a user.
func TestAuthenticateIfUserCountZeroAndCreateUser(t *testing.T) {
store, executor := testStoreAndExecutor()
defer os.RemoveAll(store.path)
defer os.RemoveAll(store.Path())
ms := &testMetastore{userCount: 0}
executor.MetaStore = ms
@ -348,10 +340,10 @@ func TestAuthenticateIfUserCountZeroAndCreateUser(t *testing.T) {
}
}
func testStoreAndExecutor() (*Store, *QueryExecutor) {
func testStoreAndExecutor() (*tsdb.Store, *tsdb.QueryExecutor) {
path, _ := ioutil.TempDir("", "")
store := NewStore(path)
store := tsdb.NewStore(path)
err := store.Open()
if err != nil {
panic(err)
@ -361,14 +353,14 @@ func testStoreAndExecutor() (*Store, *QueryExecutor) {
shardID := uint64(1)
store.CreateShard(database, retentionPolicy, shardID)
executor := NewQueryExecutor(store)
executor := tsdb.NewQueryExecutor(store)
executor.MetaStore = &testMetastore{}
executor.ShardMapper = &testShardMapper{store: store}
return store, executor
}
func executeAndGetJSON(query string, executor *QueryExecutor) string {
func executeAndGetJSON(query string, executor *tsdb.QueryExecutor) string {
ch, err := executor.ExecuteQuery(mustParseQuery(query), "foo", 20)
if err != nil {
panic(err.Error())
@ -378,7 +370,12 @@ func executeAndGetJSON(query string, executor *QueryExecutor) string {
for r := range ch {
results = append(results, r)
}
return string(mustMarshalJSON(results))
b, err := json.Marshal(results)
if err != nil {
panic(err)
}
return string(b)
}
type testMetastore struct {
@ -467,10 +464,10 @@ func (t *testMetastore) NodeID() uint64 {
}
type testShardMapper struct {
store *Store
store *tsdb.Store
}
func (t *testShardMapper) CreateMapper(shard meta.ShardInfo, stmt string, chunkSize int) (Mapper, error) {
func (t *testShardMapper) CreateMapper(shard meta.ShardInfo, stmt string, chunkSize int) (tsdb.Mapper, error) {
m, err := t.store.CreateMapper(shard.ID, stmt, chunkSize)
return m, err
}

View File

@ -1,19 +1,14 @@
package tsdb
import (
"bytes"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"hash/fnv"
"io"
"log"
"math"
"os"
"sort"
"sync"
"time"
"github.com/influxdb/influxdb/influxql"
"github.com/influxdb/influxdb/tsdb/internal"
@ -35,15 +30,8 @@ var (
// ErrFieldUnmappedID is returned when the system is presented, during decode, with a field ID
// there is no mapping for.
ErrFieldUnmappedID = errors.New("field ID not mapped")
// ErrWALPartitionNotFound is returns when flushing a WAL partition that
// does not exist.
ErrWALPartitionNotFound = errors.New("wal partition not found")
)
// topLevelBucketN is the number of non-series buckets in the bolt db.
const topLevelBucketN = 3
// Shard represents a self-contained time series database. An inverted index of
// the measurement and tag data is kept along with the raw time series data.
// Data can be split across many shards. The query engine in TSDB is responsible
@ -52,53 +40,27 @@ type Shard struct {
db *bolt.DB // underlying data store
index *DatabaseIndex
path string
cache map[uint8]map[string][][]byte // values by <wal partition,series>
walSize int // approximate size of the WAL, in bytes
flush chan struct{} // signals background flush
flushTimer *time.Timer // signals time-based flush
engine Engine
options EngineOptions
mu sync.RWMutex
measurementFields map[string]*measurementFields // measurement name to their fields
// These coordinate closing and waiting for running goroutines.
wg sync.WaitGroup
closing chan struct{}
// Used for out-of-band error messages.
logger *log.Logger
// The maximum size and time thresholds for flushing the WAL.
MaxWALSize int
WALFlushInterval time.Duration
WALPartitionFlushDelay time.Duration
measurementFields map[string]*MeasurementFields // measurement name to their fields
// The writer used by the logger.
LogOutput io.Writer
}
// NewShard returns a new initialized Shard
func NewShard(index *DatabaseIndex, path string) *Shard {
s := &Shard{
func NewShard(index *DatabaseIndex, path string, options EngineOptions) *Shard {
return &Shard{
index: index,
path: path,
flush: make(chan struct{}, 1),
measurementFields: make(map[string]*measurementFields),
MaxWALSize: DefaultMaxWALSize,
WALFlushInterval: DefaultWALFlushInterval,
WALPartitionFlushDelay: DefaultWALPartitionFlushDelay,
options: options,
measurementFields: make(map[string]*MeasurementFields),
LogOutput: os.Stderr,
}
// Initialize all partitions of the cache.
s.cache = make(map[uint8]map[string][][]byte)
for i := uint8(0); i < WALPartitionN; i++ {
s.cache[i] = make(map[string][][]byte)
}
return s
}
// Path returns the path set on the shard when it was created.
@ -110,87 +72,57 @@ func (s *Shard) Open() error {
s.mu.Lock()
defer s.mu.Unlock()
s.index.mu.Lock()
defer s.index.mu.Unlock()
// Return if the shard is already open
if s.db != nil {
if s.engine != nil {
return nil
}
// Open store on shard.
store, err := bolt.Open(s.path, 0666, &bolt.Options{Timeout: 1 * time.Second})
// Initialize underlying engine.
e, err := NewEngine(s.path, s.options)
if err != nil {
return err
return fmt.Errorf("new engine: %s", err)
}
s.db = store
s.engine = e
// Initialize store.
if err := s.db.Update(func(tx *bolt.Tx) error {
_, _ = tx.CreateBucketIfNotExists([]byte("series"))
_, _ = tx.CreateBucketIfNotExists([]byte("fields"))
_, _ = tx.CreateBucketIfNotExists([]byte("wal"))
// Set log output on the engine.
s.engine.SetLogOutput(s.LogOutput)
return nil
}); err != nil {
return fmt.Errorf("init: %s", err)
// Open engine.
if err := s.engine.Open(); err != nil {
return fmt.Errorf("open engine: %s", err)
}
if err := s.loadMetadataIndex(); err != nil {
// Load metadata index.
if err := s.engine.LoadMetadataIndex(s.index, s.measurementFields); err != nil {
return fmt.Errorf("load metadata index: %s", err)
}
// Initialize logger.
s.logger = log.New(s.LogOutput, "[shard] ", log.LstdFlags)
// Start flush interval timer.
s.flushTimer = time.NewTimer(s.WALFlushInterval)
// Start background goroutines.
s.wg.Add(1)
s.closing = make(chan struct{})
go s.autoflusher(s.closing)
return nil
}(); err != nil {
s.close()
return err
}
// Flush on-disk WAL before we return to the caller.
if err := s.Flush(0); err != nil {
return fmt.Errorf("flush: %s", err)
}
return nil
}
// Close shuts down the shard's store.
func (s *Shard) Close() error {
s.mu.Lock()
err := s.close()
s.mu.Unlock()
// Wait for open goroutines to finish.
s.wg.Wait()
return err
defer s.mu.Unlock()
return s.close()
}
func (s *Shard) close() error {
if s.db != nil {
s.db.Close()
}
if s.closing != nil {
close(s.closing)
s.closing = nil
if s.engine != nil {
return s.engine.Close()
}
return nil
}
// TODO: this is temporarily exported to make tx.go work. When the query engine gets refactored
// into the tsdb package this should be removed. No one outside tsdb should know the underlying store.
func (s *Shard) DB() *bolt.DB {
return s.db
}
// TODO: this is temporarily exported to make tx.go work. When the query engine gets refactored
// into the tsdb package this should be removed. No one outside tsdb should know the underlying field encoding scheme.
func (s *Shard) FieldCodec(measurementName string) *FieldCodec {
@ -198,21 +130,21 @@ func (s *Shard) FieldCodec(measurementName string) *FieldCodec {
defer s.mu.RUnlock()
m := s.measurementFields[measurementName]
if m == nil {
return nil
return NewFieldCodec(nil)
}
return m.codec
return m.Codec
}
// struct to hold information for a field to create on a measurement
type fieldCreate struct {
measurement string
field *field
type FieldCreate struct {
Measurement string
Field *Field
}
// struct to hold information for a series to create
type seriesCreate struct {
measurement string
series *Series
type SeriesCreate struct {
Measurement string
Series *Series
}
// WritePoints will write the raw data points and any new metadata to the index in the shard
@ -226,7 +158,7 @@ func (s *Shard) WritePoints(points []Point) error {
if len(seriesToCreate) > 0 {
s.index.mu.Lock()
for _, ss := range seriesToCreate {
s.index.createSeriesIndexIfNotExists(ss.measurement, ss.series)
s.index.CreateSeriesIndexIfNotExists(ss.Measurement, ss.Series)
}
s.index.mu.Unlock()
}
@ -239,262 +171,36 @@ func (s *Shard) WritePoints(points []Point) error {
// make sure all data is encoded before attempting to save to bolt
for _, p := range points {
// marshal the raw data if it hasn't been marshaled already
if p.Data() == nil {
// this was populated earlier, don't need to validate that it's there.
s.mu.RLock()
mf := s.measurementFields[p.Name()]
s.mu.RUnlock()
// If a measurement is dropped while writes for it are in progress, this could be nil
if mf == nil {
return ErrFieldNotFound
}
data, err := mf.codec.EncodeFields(p.Fields())
if err != nil {
return err
}
p.SetData(data)
// Ignore if raw data has already been marshaled.
if p.Data() != nil {
continue
}
// This was populated earlier, don't need to validate that it's there.
s.mu.RLock()
mf := s.measurementFields[p.Name()]
s.mu.RUnlock()
// If a measurement is dropped while writes for it are in progress, this could be nil
if mf == nil {
return ErrFieldNotFound
}
data, err := mf.Codec.EncodeFields(p.Fields())
if err != nil {
return err
}
p.SetData(data)
}
// save to the underlying bolt instance
if err := s.db.Update(func(tx *bolt.Tx) error {
// save any new metadata
if len(seriesToCreate) > 0 {
b := tx.Bucket([]byte("series"))
for _, sc := range seriesToCreate {
data, err := sc.series.MarshalBinary()
if err != nil {
return err
}
if err := b.Put([]byte(sc.series.Key), data); err != nil {
return err
}
}
}
if len(measurementFieldsToSave) > 0 {
b := tx.Bucket([]byte("fields"))
for name, m := range measurementFieldsToSave {
data, err := m.MarshalBinary()
if err != nil {
return err
}
if err := b.Put([]byte(name), data); err != nil {
return err
}
}
}
// Write points to WAL bucket.
wal := tx.Bucket([]byte("wal"))
for _, p := range points {
// Retrieve partition bucket.
key := p.Key()
b, err := wal.CreateBucketIfNotExists([]byte{WALPartition(key)})
if err != nil {
return fmt.Errorf("create WAL partition bucket: %s", err)
}
// Generate an autoincrementing index for the WAL partition.
id, _ := b.NextSequence()
// Append points sequentially to the WAL bucket.
v := marshalWALEntry(key, p.UnixNano(), p.Data())
if err := b.Put(u64tob(id), v); err != nil {
return fmt.Errorf("put wal: %s", err)
}
}
return nil
}); err != nil {
return err
}
// If successful then save points to in-memory cache.
if err := func() error {
s.mu.Lock()
defer s.mu.Unlock()
// tracks which in-memory caches need to be resorted
resorts := map[uint8]map[string]struct{}{}
for _, p := range points {
// Generate in-memory cache entry of <timestamp,data>.
key, data := p.Key(), p.Data()
v := make([]byte, 8+len(data))
binary.BigEndian.PutUint64(v[0:8], uint64(p.UnixNano()))
copy(v[8:], data)
// Determine if we are appending.
partitionID := WALPartition(key)
a := s.cache[partitionID][string(key)]
appending := (len(a) == 0 || bytes.Compare(a[len(a)-1], v) == -1)
// Append to cache list.
a = append(a, v)
// If not appending, keep track of cache lists that need to be resorted.
if !appending {
series := resorts[partitionID]
if series == nil {
series = map[string]struct{}{}
resorts[partitionID] = series
}
series[string(key)] = struct{}{}
}
s.cache[partitionID][string(key)] = a
// Calculate estimated WAL size.
s.walSize += len(key) + len(v)
}
// Sort by timestamp if not appending.
for partitionID, cache := range resorts {
for key, _ := range cache {
sort.Sort(byteSlices(s.cache[partitionID][key]))
}
}
// Check for flush threshold.
s.triggerAutoFlush()
return nil
}(); err != nil {
return err
// Write to the engine.
if err := s.engine.WritePoints(points, measurementFieldsToSave, seriesToCreate); err != nil {
return fmt.Errorf("engine: %s", err)
}
return nil
}
// Flush writes all points from the write ahead log to the index.
func (s *Shard) Flush(partitionFlushDelay time.Duration) error {
// Retrieve a list of WAL buckets.
var partitionIDs []uint8
if err := s.db.View(func(tx *bolt.Tx) error {
return tx.Bucket([]byte("wal")).ForEach(func(key, _ []byte) error {
partitionIDs = append(partitionIDs, uint8(key[0]))
return nil
})
}); err != nil {
return err
}
// Continue flushing until there are no more partition buckets.
for _, partitionID := range partitionIDs {
if err := s.FlushPartition(partitionID); err != nil {
return fmt.Errorf("flush partition: id=%d, err=%s", partitionID, err)
}
// Wait momentarily so other threads can process.
time.Sleep(partitionFlushDelay)
}
s.mu.Lock()
defer s.mu.Unlock()
// Reset WAL size.
s.walSize = 0
// Reset the timer.
s.flushTimer.Reset(s.WALFlushInterval)
return nil
}
// FlushPartition flushes a single WAL partition.
func (s *Shard) FlushPartition(partitionID uint8) error {
s.mu.Lock()
defer s.mu.Unlock()
startTime := time.Now()
var pointN int
if err := s.db.Update(func(tx *bolt.Tx) error {
// Retrieve partition bucket. Exit if it doesn't exist.
pb := tx.Bucket([]byte("wal")).Bucket([]byte{byte(partitionID)})
if pb == nil {
return ErrWALPartitionNotFound
}
// Iterate over keys in the WAL partition bucket.
c := pb.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
key, timestamp, data := unmarshalWALEntry(v)
// Create bucket for entry.
b, err := tx.CreateBucketIfNotExists(key)
if err != nil {
return fmt.Errorf("create bucket: %s", err)
}
// Write point to bucket.
if err := b.Put(u64tob(uint64(timestamp)), data); err != nil {
return fmt.Errorf("put: %s", err)
}
// Remove entry in the WAL.
if err := c.Delete(); err != nil {
return fmt.Errorf("delete: %s", err)
}
pointN++
}
return nil
}); err != nil {
return err
}
// Reset cache.
s.cache[partitionID] = make(map[string][][]byte)
if pointN > 0 {
s.logger.Printf("flush %d points in %.3fs", pointN, time.Since(startTime).Seconds())
}
return nil
}
// autoflusher waits for notification of a flush and kicks it off in the background.
// This method runs in a separate goroutine.
func (s *Shard) autoflusher(closing chan struct{}) {
defer s.wg.Done()
for {
// Wait for close or flush signal.
select {
case <-closing:
return
case <-s.flushTimer.C:
if err := s.Flush(s.WALPartitionFlushDelay); err != nil {
s.logger.Printf("flush error: %s", err)
}
case <-s.flush:
if err := s.Flush(s.WALPartitionFlushDelay); err != nil {
s.logger.Printf("flush error: %s", err)
}
}
}
}
// triggerAutoFlush signals that a flush should occur if the size is above the threshold.
// This function must be called within the context of a lock.
func (s *Shard) triggerAutoFlush() {
// Ignore if we haven't reached the threshold.
if s.walSize < s.MaxWALSize {
return
}
// Otherwise send a non-blocking signal.
select {
case s.flush <- struct{}{}:
default:
}
}
func (s *Shard) ValidateAggregateFieldsInStatement(measurementName string, stmt *influxql.SelectStatement) error {
s.mu.RLock()
defer s.mu.RUnlock()
@ -547,62 +253,27 @@ func (s *Shard) ValidateAggregateFieldsInStatement(measurementName string, stmt
return nil
}
// deleteSeries deletes the buckets and the metadata for the given series keys
func (s *Shard) deleteSeries(keys []string) error {
s.mu.Lock()
defer s.mu.Unlock()
if err := s.db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("series"))
for _, k := range keys {
if err := b.Delete([]byte(k)); err != nil {
return err
}
if err := tx.DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound {
return err
}
delete(s.cache[WALPartition([]byte(k))], k)
}
return nil
}); err != nil {
return err
}
return nil
// DeleteSeries deletes a list of series.
func (s *Shard) DeleteSeries(keys []string) error {
return s.engine.DeleteSeries(keys)
}
// deleteMeasurement deletes the measurement field encoding information and all underlying series from the shard
func (s *Shard) deleteMeasurement(name string, seriesKeys []string) error {
// DeleteMeasurement deletes a measurement and all underlying series.
func (s *Shard) DeleteMeasurement(name string, seriesKeys []string) error {
s.mu.Lock()
defer s.mu.Unlock()
if err := s.db.Update(func(tx *bolt.Tx) error {
bm := tx.Bucket([]byte("fields"))
if err := bm.Delete([]byte(name)); err != nil {
return err
}
b := tx.Bucket([]byte("series"))
for _, k := range seriesKeys {
if err := b.Delete([]byte(k)); err != nil {
return err
}
if err := tx.DeleteBucket([]byte(k)); err != nil && err != bolt.ErrBucketNotFound {
return err
}
delete(s.cache[WALPartition([]byte(k))], k)
}
return nil
}); err != nil {
if err := s.engine.DeleteMeasurement(name, seriesKeys); err != nil {
return err
}
// Remove entry from shard index.
delete(s.measurementFields, name)
return nil
}
func (s *Shard) createFieldsAndMeasurements(fieldsToCreate []*fieldCreate) (map[string]*measurementFields, error) {
func (s *Shard) createFieldsAndMeasurements(fieldsToCreate []*FieldCreate) (map[string]*MeasurementFields, error) {
if len(fieldsToCreate) == 0 {
return nil, nil
}
@ -613,37 +284,37 @@ func (s *Shard) createFieldsAndMeasurements(fieldsToCreate []*fieldCreate) (map[
defer s.mu.Unlock()
// add fields
measurementsToSave := make(map[string]*measurementFields)
measurementsToSave := make(map[string]*MeasurementFields)
for _, f := range fieldsToCreate {
m := s.measurementFields[f.measurement]
m := s.measurementFields[f.Measurement]
if m == nil {
m = measurementsToSave[f.measurement]
m = measurementsToSave[f.Measurement]
if m == nil {
m = &measurementFields{Fields: make(map[string]*field)}
m = &MeasurementFields{Fields: make(map[string]*Field)}
}
s.measurementFields[f.measurement] = m
s.measurementFields[f.Measurement] = m
}
measurementsToSave[f.measurement] = m
measurementsToSave[f.Measurement] = m
// add the field to the in memory index
if err := m.createFieldIfNotExists(f.field.Name, f.field.Type); err != nil {
if err := m.CreateFieldIfNotExists(f.Field.Name, f.Field.Type); err != nil {
return nil, err
}
// ensure the measurement is in the index and the field is there
measurement := s.index.createMeasurementIndexIfNotExists(f.measurement)
measurement.fieldNames[f.field.Name] = struct{}{}
measurement := s.index.CreateMeasurementIndexIfNotExists(f.Measurement)
measurement.fieldNames[f.Field.Name] = struct{}{}
}
return measurementsToSave, nil
}
// validateSeriesAndFields checks which series and fields are new and whose metadata should be saved and indexed
func (s *Shard) validateSeriesAndFields(points []Point) ([]*seriesCreate, []*fieldCreate, error) {
var seriesToCreate []*seriesCreate
var fieldsToCreate []*fieldCreate
func (s *Shard) validateSeriesAndFields(points []Point) ([]*SeriesCreate, []*FieldCreate, error) {
var seriesToCreate []*SeriesCreate
var fieldsToCreate []*FieldCreate
// get the mutex for the in memory index, which is shared across shards
s.index.mu.RLock()
@ -657,14 +328,14 @@ func (s *Shard) validateSeriesAndFields(points []Point) ([]*seriesCreate, []*fie
// see if the series should be added to the index
if ss := s.index.series[string(p.Key())]; ss == nil {
series := &Series{Key: string(p.Key()), Tags: p.Tags()}
seriesToCreate = append(seriesToCreate, &seriesCreate{p.Name(), series})
seriesToCreate = append(seriesToCreate, &SeriesCreate{p.Name(), series})
}
// see if the field definitions need to be saved to the shard
mf := s.measurementFields[p.Name()]
if mf == nil {
for name, value := range p.Fields() {
fieldsToCreate = append(fieldsToCreate, &fieldCreate{p.Name(), &field{Name: name, Type: influxql.InspectDataType(value)}})
fieldsToCreate = append(fieldsToCreate, &FieldCreate{p.Name(), &Field{Name: name, Type: influxql.InspectDataType(value)}})
}
continue // skip validation since all fields are new
}
@ -680,72 +351,23 @@ func (s *Shard) validateSeriesAndFields(points []Point) ([]*seriesCreate, []*fie
continue // Field is present, and it's of the same type. Nothing more to do.
}
fieldsToCreate = append(fieldsToCreate, &fieldCreate{p.Name(), &field{Name: name, Type: influxql.InspectDataType(value)}})
fieldsToCreate = append(fieldsToCreate, &FieldCreate{p.Name(), &Field{Name: name, Type: influxql.InspectDataType(value)}})
}
}
return seriesToCreate, fieldsToCreate, nil
}
// loadsMetadataIndex loads the shard metadata into memory. This should only be called by Open
func (s *Shard) loadMetadataIndex() error {
return s.db.View(func(tx *bolt.Tx) error {
s.index.mu.Lock()
defer s.index.mu.Unlock()
// load measurement metadata
meta := tx.Bucket([]byte("fields"))
c := meta.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
m := s.index.createMeasurementIndexIfNotExists(string(k))
mf := &measurementFields{}
if err := mf.UnmarshalBinary(v); err != nil {
return err
}
for name, _ := range mf.Fields {
m.fieldNames[name] = struct{}{}
}
mf.codec = newFieldCodec(mf.Fields)
s.measurementFields[m.Name] = mf
}
// load series metadata
meta = tx.Bucket([]byte("series"))
c = meta.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
series := &Series{}
if err := series.UnmarshalBinary(v); err != nil {
return err
}
s.index.createSeriesIndexIfNotExists(measurementFromSeriesKey(string(k)), series)
}
return nil
})
}
// SeriesCount returns the number of series buckets on the shard.
// This does not include a count from the WAL.
func (s *Shard) SeriesCount() (n int, err error) {
err = s.db.View(func(tx *bolt.Tx) error {
return tx.ForEach(func(_ []byte, _ *bolt.Bucket) error {
n++
return nil
})
})
func (s *Shard) SeriesCount() (int, error) { return s.engine.SeriesCount() }
// Remove top-level buckets.
n -= topLevelBucketN
return
}
type measurementFields struct {
Fields map[string]*field `json:"fields"`
codec *FieldCodec
type MeasurementFields struct {
Fields map[string]*Field `json:"fields"`
Codec *FieldCodec
}
// MarshalBinary encodes the object to a binary format.
func (m *measurementFields) MarshalBinary() ([]byte, error) {
func (m *MeasurementFields) MarshalBinary() ([]byte, error) {
var pb internal.MeasurementFields
for _, f := range m.Fields {
id := int32(f.ID)
@ -757,22 +379,22 @@ func (m *measurementFields) MarshalBinary() ([]byte, error) {
}
// UnmarshalBinary decodes the object from a binary format.
func (m *measurementFields) UnmarshalBinary(buf []byte) error {
func (m *MeasurementFields) UnmarshalBinary(buf []byte) error {
var pb internal.MeasurementFields
if err := proto.Unmarshal(buf, &pb); err != nil {
return err
}
m.Fields = make(map[string]*field)
m.Fields = make(map[string]*Field)
for _, f := range pb.Fields {
m.Fields[f.GetName()] = &field{ID: uint8(f.GetID()), Name: f.GetName(), Type: influxql.DataType(f.GetType())}
m.Fields[f.GetName()] = &Field{ID: uint8(f.GetID()), Name: f.GetName(), Type: influxql.DataType(f.GetType())}
}
return nil
}
// createFieldIfNotExists creates a new field with an autoincrementing ID.
// CreateFieldIfNotExists creates a new field with an autoincrementing ID.
// Returns an error if 255 fields have already been created on the measurement or
// the fields already exists with a different type.
func (m *measurementFields) createFieldIfNotExists(name string, typ influxql.DataType) error {
func (m *MeasurementFields) CreateFieldIfNotExists(name string, typ influxql.DataType) error {
// Ignore if the field already exists.
if f := m.Fields[name]; f != nil {
if f.Type != typ {
@ -787,19 +409,19 @@ func (m *measurementFields) createFieldIfNotExists(name string, typ influxql.Dat
}
// Create and append a new field.
f := &field{
f := &Field{
ID: uint8(len(m.Fields) + 1),
Name: name,
Type: typ,
}
m.Fields[name] = f
m.codec = newFieldCodec(m.Fields)
m.Codec = NewFieldCodec(m.Fields)
return nil
}
// Field represents a series field.
type field struct {
type Field struct {
ID uint8 `json:"id,omitempty"`
Name string `json:"name,omitempty"`
Type influxql.DataType `json:"type,omitempty"`
@ -813,15 +435,15 @@ type field struct {
// TODO: this shouldn't be exported. nothing outside the shard should know about field encodings.
// However, this is here until tx.go and the engine get refactored into tsdb.
type FieldCodec struct {
fieldsByID map[uint8]*field
fieldsByName map[string]*field
fieldsByID map[uint8]*Field
fieldsByName map[string]*Field
}
// NewFieldCodec returns a FieldCodec for the given Measurement. Must be called with
// a RLock that protects the Measurement.
func newFieldCodec(fields map[string]*field) *FieldCodec {
fieldsByID := make(map[uint8]*field, len(fields))
fieldsByName := make(map[string]*field, len(fields))
func NewFieldCodec(fields map[string]*Field) *FieldCodec {
fieldsByID := make(map[uint8]*Field, len(fields))
fieldsByName := make(map[string]*Field, len(fields))
for _, f := range fields {
fieldsByID[f.ID] = f
fieldsByName[f.Name] = f
@ -1045,15 +667,15 @@ func (f *FieldCodec) DecodeByID(targetID uint8, b []byte) (interface{}, error) {
// DecodeByName scans a byte slice for a field with the given name, converts it to its
// expected type, and return that value.
func (f *FieldCodec) DecodeByName(name string, b []byte) (interface{}, error) {
if fi := f.fieldByName(name); fi == nil {
fi := f.fieldByName(name)
if fi == nil {
return 0, ErrFieldNotFound
} else {
return f.DecodeByID(fi.ID, b)
}
return f.DecodeByID(fi.ID, b)
}
// FieldByName returns the field by its name. It will return a nil if not found
func (f *FieldCodec) fieldByName(name string) *field {
func (f *FieldCodec) fieldByName(name string) *Field {
return f.fieldsByName[name]
}
@ -1083,136 +705,3 @@ func u64tob(v uint64) []byte {
binary.BigEndian.PutUint64(b, v)
return b
}
// marshalWALEntry encodes point data into a single byte slice.
//
// The format of the byte slice is:
//
// uint64 timestamp
// uint32 key length
// []byte key
// []byte data
//
func marshalWALEntry(key []byte, timestamp int64, data []byte) []byte {
v := make([]byte, 8+4, 8+4+len(key)+len(data))
binary.BigEndian.PutUint64(v[0:8], uint64(timestamp))
binary.BigEndian.PutUint32(v[8:12], uint32(len(key)))
v = append(v, key...)
v = append(v, data...)
return v
}
// unmarshalWALEntry decodes a WAL entry into it's separate parts.
// Returned byte slices point to the original slice.
func unmarshalWALEntry(v []byte) (key []byte, timestamp int64, data []byte) {
keyLen := binary.BigEndian.Uint32(v[8:12])
key = v[12 : 12+keyLen]
timestamp = int64(binary.BigEndian.Uint64(v[0:8]))
data = v[12+keyLen:]
return
}
// marshalCacheEntry encodes the timestamp and data to a single byte slice.
//
// The format of the byte slice is:
//
// uint64 timestamp
// []byte data
//
func marshalCacheEntry(timestamp int64, data []byte) []byte {
buf := make([]byte, 8, 8+len(data))
binary.BigEndian.PutUint64(buf[0:8], uint64(timestamp))
return append(buf, data...)
}
// unmarshalCacheEntry returns the timestamp and data from an encoded byte slice.
func unmarshalCacheEntry(buf []byte) (timestamp int64, data []byte) {
timestamp = int64(binary.BigEndian.Uint64(buf[0:8]))
data = buf[8:]
return
}
// byteSlices represents a sortable slice of byte slices.
type byteSlices [][]byte
func (a byteSlices) Len() int { return len(a) }
func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 }
func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
// shardCursor provides ordered iteration across a Bolt bucket and shard cache.
type shardCursor struct {
// Bolt cursor and readahead buffer.
cursor *bolt.Cursor
buf struct {
key, value []byte
}
// Cache and current cache index.
cache [][]byte
index int
}
// Seek moves the cursor to a position and returns the closest key/value pair.
func (sc *shardCursor) Seek(seek []byte) (key, value []byte) {
// Seek bolt cursor.
if sc.cursor != nil {
sc.buf.key, sc.buf.value = sc.cursor.Seek(seek)
}
// Seek cache index.
sc.index = sort.Search(len(sc.cache), func(i int) bool {
return bytes.Compare(sc.cache[i][0:8], seek) != -1
})
return sc.read()
}
// Next returns the next key/value pair from the cursor.
func (sc *shardCursor) Next() (key, value []byte) {
// Read next bolt key/value if not bufferred.
if sc.buf.key == nil && sc.cursor != nil {
sc.buf.key, sc.buf.value = sc.cursor.Next()
}
return sc.read()
}
// read returns the next key/value in the cursor buffer or cache.
func (sc *shardCursor) read() (key, value []byte) {
// If neither a buffer or cache exists then return nil.
if sc.buf.key == nil && sc.index >= len(sc.cache) {
return nil, nil
}
// Use the buffer if it exists and there's no cache or if it is lower than the cache.
if sc.buf.key != nil && (sc.index >= len(sc.cache) || bytes.Compare(sc.buf.key, sc.cache[sc.index][0:8]) == -1) {
key, value = sc.buf.key, sc.buf.value
sc.buf.key, sc.buf.value = nil, nil
return
}
// Otherwise read from the cache.
// Continue skipping ahead through duplicate keys in the cache list.
for {
// Read the current cache key/value pair.
key, value = sc.cache[sc.index][0:8], sc.cache[sc.index][8:]
sc.index++
// Exit loop if we're at the end of the cache or the next key is different.
if sc.index >= len(sc.cache) || !bytes.Equal(key, sc.cache[sc.index][0:8]) {
break
}
}
return
}
// WALPartitionN is the number of partitions in the write ahead log.
const WALPartitionN = 8
// WALPartition returns the partition number that key belongs to.
func WALPartition(key []byte) uint8 {
h := fnv.New64a()
h.Write(key)
return uint8(h.Sum64() % WALPartitionN)
}

View File

@ -1,4 +1,4 @@
package tsdb
package tsdb_test
import (
"fmt"
@ -9,6 +9,8 @@ import (
"reflect"
"testing"
"time"
"github.com/influxdb/influxdb/tsdb"
)
func TestShardWriteAndIndex(t *testing.T) {
@ -16,42 +18,43 @@ func TestShardWriteAndIndex(t *testing.T) {
defer os.RemoveAll(tmpDir)
tmpShard := path.Join(tmpDir, "shard")
index := NewDatabaseIndex()
sh := NewShard(index, tmpShard)
index := tsdb.NewDatabaseIndex()
sh := tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions())
if err := sh.Open(); err != nil {
t.Fatalf("error openeing shard: %s", err.Error())
}
pt := NewPoint(
pt := tsdb.NewPoint(
"cpu",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
time.Unix(1, 2),
)
err := sh.WritePoints([]Point{pt})
err := sh.WritePoints([]tsdb.Point{pt})
if err != nil {
t.Fatalf(err.Error())
}
pt.SetTime(time.Unix(2, 3))
err = sh.WritePoints([]Point{pt})
err = sh.WritePoints([]tsdb.Point{pt})
if err != nil {
t.Fatalf(err.Error())
}
validateIndex := func() {
if !reflect.DeepEqual(index.names, []string{"cpu"}) {
if !reflect.DeepEqual(index.Names(), []string{"cpu"}) {
t.Fatalf("measurement names in shard didn't match")
}
if len(index.series) != 1 {
if index.SeriesN() != 1 {
t.Fatalf("series wasn't in index")
}
seriesTags := index.series[string(pt.Key())].Tags
seriesTags := index.Series(string(pt.Key())).Tags
if len(seriesTags) != len(pt.Tags()) || pt.Tags()["host"] != seriesTags["host"] {
t.Fatalf("tags weren't properly saved to series index: %v, %v", pt.Tags(), index.series[string(pt.Key())].Tags)
t.Fatalf("tags weren't properly saved to series index: %v, %v", pt.Tags(), seriesTags)
}
if !reflect.DeepEqual(index.measurements["cpu"].TagKeys(), []string{"host"}) {
if !reflect.DeepEqual(index.Measurement("cpu").TagKeys(), []string{"host"}) {
t.Fatalf("tag key wasn't saved to measurement index")
}
}
@ -61,8 +64,8 @@ func TestShardWriteAndIndex(t *testing.T) {
// ensure the index gets loaded after closing and opening the shard
sh.Close()
index = NewDatabaseIndex()
sh = NewShard(index, tmpShard)
index = tsdb.NewDatabaseIndex()
sh = tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions())
if err := sh.Open(); err != nil {
t.Fatalf("error openeing shard: %s", err.Error())
}
@ -71,7 +74,7 @@ func TestShardWriteAndIndex(t *testing.T) {
// and ensure that we can still write data
pt.SetTime(time.Unix(2, 6))
err = sh.WritePoints([]Point{pt})
err = sh.WritePoints([]tsdb.Point{pt})
if err != nil {
t.Fatalf(err.Error())
}
@ -82,52 +85,52 @@ func TestShardWriteAddNewField(t *testing.T) {
defer os.RemoveAll(tmpDir)
tmpShard := path.Join(tmpDir, "shard")
index := NewDatabaseIndex()
sh := NewShard(index, tmpShard)
index := tsdb.NewDatabaseIndex()
sh := tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions())
if err := sh.Open(); err != nil {
t.Fatalf("error openeing shard: %s", err.Error())
}
defer sh.Close()
pt := NewPoint(
pt := tsdb.NewPoint(
"cpu",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
time.Unix(1, 2),
)
err := sh.WritePoints([]Point{pt})
err := sh.WritePoints([]tsdb.Point{pt})
if err != nil {
t.Fatalf(err.Error())
}
pt = NewPoint(
pt = tsdb.NewPoint(
"cpu",
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0, "value2": 2.0},
time.Unix(1, 2),
)
err = sh.WritePoints([]Point{pt})
err = sh.WritePoints([]tsdb.Point{pt})
if err != nil {
t.Fatalf(err.Error())
}
if !reflect.DeepEqual(index.names, []string{"cpu"}) {
if !reflect.DeepEqual(index.Names(), []string{"cpu"}) {
t.Fatalf("measurement names in shard didn't match")
}
if len(index.series) != 1 {
if index.SeriesN() != 1 {
t.Fatalf("series wasn't in index")
}
seriesTags := index.series[string(pt.Key())].Tags
seriesTags := index.Series(string(pt.Key())).Tags
if len(seriesTags) != len(pt.Tags()) || pt.Tags()["host"] != seriesTags["host"] {
t.Fatalf("tags weren't properly saved to series index: %v, %v", pt.Tags(), index.series[string(pt.Key())].Tags)
t.Fatalf("tags weren't properly saved to series index: %v, %v", pt.Tags(), seriesTags)
}
if !reflect.DeepEqual(index.measurements["cpu"].TagKeys(), []string{"host"}) {
if !reflect.DeepEqual(index.Measurement("cpu").TagKeys(), []string{"host"}) {
t.Fatalf("tag key wasn't saved to measurement index")
}
if len(index.measurements["cpu"].FieldNames()) != 2 {
if len(index.Measurement("cpu").FieldNames()) != 2 {
t.Fatalf("field names wasn't saved to measurement index")
}
@ -139,10 +142,11 @@ func TestShard_Autoflush(t *testing.T) {
defer os.RemoveAll(path)
// Open shard with a really low size threshold, high flush interval.
sh := NewShard(NewDatabaseIndex(), filepath.Join(path, "shard"))
sh.MaxWALSize = 1024 // 1KB
sh.WALFlushInterval = 1 * time.Hour
sh.WALPartitionFlushDelay = 1 * time.Millisecond
sh := tsdb.NewShard(tsdb.NewDatabaseIndex(), filepath.Join(path, "shard"), tsdb.EngineOptions{
MaxWALSize: 1024, // 1KB
WALFlushInterval: 1 * time.Hour,
WALPartitionFlushDelay: 1 * time.Millisecond,
})
if err := sh.Open(); err != nil {
t.Fatal(err)
}
@ -150,7 +154,7 @@ func TestShard_Autoflush(t *testing.T) {
// Write a bunch of points.
for i := 0; i < 100; i++ {
if err := sh.WritePoints([]Point{NewPoint(
if err := sh.WritePoints([]tsdb.Point{tsdb.NewPoint(
fmt.Sprintf("cpu%d", i),
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
@ -177,10 +181,11 @@ func TestShard_Autoflush_FlushInterval(t *testing.T) {
defer os.RemoveAll(path)
// Open shard with a high size threshold, small time threshold.
sh := NewShard(NewDatabaseIndex(), filepath.Join(path, "shard"))
sh.MaxWALSize = 10 * 1024 * 1024 // 10MB
sh.WALFlushInterval = 100 * time.Millisecond
sh.WALPartitionFlushDelay = 1 * time.Millisecond
sh := tsdb.NewShard(tsdb.NewDatabaseIndex(), filepath.Join(path, "shard"), tsdb.EngineOptions{
MaxWALSize: 10 * 1024 * 1024, // 10MB
WALFlushInterval: 100 * time.Millisecond,
WALPartitionFlushDelay: 1 * time.Millisecond,
})
if err := sh.Open(); err != nil {
t.Fatal(err)
}
@ -188,7 +193,7 @@ func TestShard_Autoflush_FlushInterval(t *testing.T) {
// Write some points.
for i := 0; i < 100; i++ {
if err := sh.WritePoints([]Point{NewPoint(
if err := sh.WritePoints([]tsdb.Point{tsdb.NewPoint(
fmt.Sprintf("cpu%d", i),
map[string]string{"host": "server"},
map[string]interface{}{"value": 1.0},
@ -240,12 +245,12 @@ func benchmarkWritePoints(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt int) {
// Generate test series (measurements + unique tag sets).
series := genTestSeries(mCnt, tkCnt, tvCnt)
// Create index for the shard to use.
index := NewDatabaseIndex()
index := tsdb.NewDatabaseIndex()
// Generate point data to write to the shard.
points := []Point{}
points := []tsdb.Point{}
for _, s := range series {
for val := 0.0; val < float64(pntCnt); val++ {
p := NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now())
p := tsdb.NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now())
points = append(points, p)
}
}
@ -258,7 +263,7 @@ func benchmarkWritePoints(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt int) {
for n := 0; n < b.N; n++ {
tmpDir, _ := ioutil.TempDir("", "shard_test")
tmpShard := path.Join(tmpDir, "shard")
shard := NewShard(index, tmpShard)
shard := tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions())
shard.Open()
b.StartTimer()
@ -280,12 +285,12 @@ func benchmarkWritePointsExistingSeries(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt
// Generate test series (measurements + unique tag sets).
series := genTestSeries(mCnt, tkCnt, tvCnt)
// Create index for the shard to use.
index := NewDatabaseIndex()
index := tsdb.NewDatabaseIndex()
// Generate point data to write to the shard.
points := []Point{}
points := []tsdb.Point{}
for _, s := range series {
for val := 0.0; val < float64(pntCnt); val++ {
p := NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now())
p := tsdb.NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now())
points = append(points, p)
}
}
@ -293,7 +298,7 @@ func benchmarkWritePointsExistingSeries(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt
tmpDir, _ := ioutil.TempDir("", "")
defer os.RemoveAll(tmpDir)
tmpShard := path.Join(tmpDir, "shard")
shard := NewShard(index, tmpShard)
shard := tsdb.NewShard(index, tmpShard, tsdb.NewEngineOptions())
shard.Open()
defer shard.Close()
chunkedWrite(shard, points)
@ -314,7 +319,7 @@ func benchmarkWritePointsExistingSeries(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt
}
}
func chunkedWrite(shard *Shard, points []Point) {
func chunkedWrite(shard *tsdb.Shard, points []tsdb.Point) {
nPts := len(points)
chunkSz := 10000
start := 0

View File

@ -9,18 +9,15 @@ import (
"strconv"
"strings"
"sync"
"time"
"github.com/influxdb/influxdb/influxql"
)
func NewStore(path string) *Store {
return &Store{
path: path,
MaxWALSize: DefaultMaxWALSize,
WALFlushInterval: DefaultWALFlushInterval,
WALPartitionFlushDelay: DefaultWALPartitionFlushDelay,
Logger: log.New(os.Stderr, "[store] ", log.LstdFlags),
path: path,
EngineOptions: NewEngineOptions(),
Logger: log.New(os.Stderr, "[store] ", log.LstdFlags),
}
}
@ -35,16 +32,34 @@ type Store struct {
databaseIndexes map[string]*DatabaseIndex
shards map[uint64]*Shard
MaxWALSize int
WALFlushInterval time.Duration
WALPartitionFlushDelay time.Duration
Logger *log.Logger
EngineOptions EngineOptions
Logger *log.Logger
}
// Path returns the store's root path.
func (s *Store) Path() string { return s.path }
// DatabaseIndexN returns the number of databases indicies in the store.
func (s *Store) DatabaseIndexN() int {
s.mu.RLock()
defer s.mu.RUnlock()
return len(s.databaseIndexes)
}
// Shard returns a shard by id.
func (s *Store) Shard(id uint64) *Shard {
s.mu.RLock()
defer s.mu.RUnlock()
return s.shards[id]
}
// ShardN returns the number of shard in the store.
func (s *Store) ShardN() int {
s.mu.RLock()
defer s.mu.RUnlock()
return len(s.shards)
}
func (s *Store) CreateShard(database, retentionPolicy string, shardID uint64) error {
s.mu.Lock()
defer s.mu.Unlock()
@ -67,7 +82,7 @@ func (s *Store) CreateShard(database, retentionPolicy string, shardID uint64) er
}
shardPath := filepath.Join(s.path, database, retentionPolicy, strconv.FormatUint(shardID, 10))
shard := s.newShard(db, shardPath)
shard := NewShard(db, shardPath, s.EngineOptions)
if err := shard.Open(); err != nil {
return err
}
@ -101,15 +116,6 @@ func (s *Store) DeleteShard(shardID uint64) error {
return nil
}
// newShard returns a shard and copies configuration settings from the store.
func (s *Store) newShard(index *DatabaseIndex, path string) *Shard {
sh := NewShard(index, path)
sh.MaxWALSize = s.MaxWALSize
sh.WALFlushInterval = s.WALFlushInterval
sh.WALPartitionFlushDelay = s.WALPartitionFlushDelay
return sh
}
// DeleteDatabase will close all shards associated with a database and remove the directory and files from disk.
func (s *Store) DeleteDatabase(name string, shardIDs []uint64) error {
s.mu.Lock()
@ -127,12 +133,6 @@ func (s *Store) DeleteDatabase(name string, shardIDs []uint64) error {
return nil
}
func (s *Store) Shard(shardID uint64) *Shard {
s.mu.RLock()
defer s.mu.RUnlock()
return s.shards[shardID]
}
// ShardIDs returns a slice of all ShardIDs under management.
func (s *Store) ShardIDs() []uint64 {
ids := make([]uint64, 0, len(s.shards))
@ -173,7 +173,7 @@ func (s *Store) deleteSeries(keys []string) error {
s.mu.RLock()
defer s.mu.RUnlock()
for _, sh := range s.shards {
if err := sh.deleteSeries(keys); err != nil {
if err := sh.DeleteSeries(keys); err != nil {
return err
}
}
@ -185,7 +185,7 @@ func (s *Store) deleteMeasurement(name string, seriesKeys []string) error {
s.mu.RLock()
defer s.mu.RUnlock()
for _, sh := range s.shards {
if err := sh.deleteMeasurement(name, seriesKeys); err != nil {
if err := sh.DeleteMeasurement(name, seriesKeys); err != nil {
return err
}
}
@ -236,8 +236,11 @@ func (s *Store) loadShards() error {
continue
}
shard := s.newShard(s.databaseIndexes[db], path)
shard.Open()
shard := NewShard(s.databaseIndexes[db], path, s.EngineOptions)
err = shard.Open()
if err != nil {
return fmt.Errorf("failed to open shard %d: %s", shardID, err)
}
s.shards[shardID] = shard
}
}
@ -253,6 +256,8 @@ func (s *Store) Open() error {
s.shards = map[uint64]*Shard{}
s.databaseIndexes = map[string]*DatabaseIndex{}
s.Logger.Printf("Using data dir: %v", s.Path())
// Create directory.
if err := os.MkdirAll(s.path, 0777); err != nil {
return err
@ -281,18 +286,6 @@ func (s *Store) WriteToShard(shardID uint64, points []Point) error {
return sh.WritePoints(points)
}
// Flush forces all shards to write their WAL data to the index.
func (s *Store) Flush() error {
s.mu.RLock()
defer s.mu.RUnlock()
for shardID, sh := range s.shards {
if err := sh.Flush(s.WALPartitionFlushDelay); err != nil {
return fmt.Errorf("flush: shard=%d, err=%s", shardID, err)
}
}
return nil
}
func (s *Store) CreateMapper(shardID uint64, query string, chunkSize int) (Mapper, error) {
q, err := influxql.NewParser(strings.NewReader(query)).ParseStatement()
if err != nil {
@ -309,10 +302,7 @@ func (s *Store) CreateMapper(shardID uint64, query string, chunkSize int) (Mappe
return nil, nil
}
if (stmt.IsRawQuery && !stmt.HasDistinct()) || stmt.IsSimpleDerivative() {
return NewRawMapper(shard, stmt, chunkSize), nil
}
return NewAggMapper(shard, stmt), nil
return NewLocalMapper(shard, stmt, chunkSize), nil
}
func (s *Store) Close() error {

View File

@ -1,4 +1,4 @@
package tsdb
package tsdb_test
import (
"io/ioutil"
@ -6,6 +6,8 @@ import (
"path/filepath"
"testing"
"time"
"github.com/influxdb/influxdb/tsdb"
)
func TestStoreOpen(t *testing.T) {
@ -19,13 +21,13 @@ func TestStoreOpen(t *testing.T) {
t.Fatalf("failed to create test db dir: %v", err)
}
s := NewStore(dir)
s := tsdb.NewStore(dir)
if err := s.Open(); err != nil {
t.Fatalf("Store.Open() failed: %v", err)
}
if exp := 1; len(s.databaseIndexes) != exp {
t.Fatalf("database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp)
if got, exp := s.DatabaseIndexN(), 1; got != exp {
t.Fatalf("database index count mismatch: got %v, exp %v", got, exp)
}
}
@ -46,26 +48,25 @@ func TestStoreOpenShard(t *testing.T) {
t.Fatalf("Store.Open() failed to create test shard 1: %v", err)
}
s := NewStore(dir)
s := tsdb.NewStore(dir)
if err := s.Open(); err != nil {
t.Fatalf("Store.Open() failed: %v", err)
}
if exp := 1; len(s.databaseIndexes) != exp {
t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp)
if got, exp := s.DatabaseIndexN(), 1; got != exp {
t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", got, exp)
}
if _, ok := s.databaseIndexes["mydb"]; !ok {
if di := s.DatabaseIndex("mydb"); di == nil {
t.Errorf("Store.Open() database myb does not exist")
}
if exp := 1; len(s.shards) != exp {
t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", len(s.shards), exp)
if got, exp := s.ShardN(), 1; got != exp {
t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", got, exp)
}
sh := s.shards[uint64(1)]
if sh.path != shardPath {
t.Errorf("Store.Open() shard path mismatch: got %v, exp %v", sh.path, shardPath)
if sh := s.Shard(1); sh.Path() != shardPath {
t.Errorf("Store.Open() shard path mismatch: got %v, exp %v", sh.Path(), shardPath)
}
}
@ -80,16 +81,16 @@ func TestStoreOpenShardCreateDelete(t *testing.T) {
t.Fatalf("Store.Open() failed to create test db dir: %v", err)
}
s := NewStore(dir)
s := tsdb.NewStore(dir)
if err := s.Open(); err != nil {
t.Fatalf("Store.Open() failed: %v", err)
}
if exp := 1; len(s.databaseIndexes) != exp {
t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp)
if got, exp := s.DatabaseIndexN(), 1; got != exp {
t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", got, exp)
}
if _, ok := s.databaseIndexes["mydb"]; !ok {
if di := s.DatabaseIndex("mydb"); di == nil {
t.Errorf("Store.Open() database mydb does not exist")
}
@ -97,8 +98,8 @@ func TestStoreOpenShardCreateDelete(t *testing.T) {
t.Fatalf("Store.Open() failed to create shard")
}
if exp := 1; len(s.shards) != exp {
t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", len(s.shards), exp)
if got, exp := s.ShardN(), 1; got != exp {
t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", got, exp)
}
shardIDs := s.ShardIDs()
@ -110,7 +111,7 @@ func TestStoreOpenShardCreateDelete(t *testing.T) {
t.Fatalf("Store.Open() failed to delete shard: %v", err)
}
if _, ok := s.shards[uint64(1)]; ok {
if sh := s.Shard(1); sh != nil {
t.Fatal("Store.Open() shard ID 1 still exists")
}
}
@ -127,17 +128,17 @@ func TestStoreOpenNotDatabaseDir(t *testing.T) {
t.Fatalf("Store.Open() failed to create test db dir: %v", err)
}
s := NewStore(dir)
s := tsdb.NewStore(dir)
if err := s.Open(); err != nil {
t.Fatalf("Store.Open() failed: %v", err)
}
if exp := 0; len(s.databaseIndexes) != exp {
t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp)
if got, exp := s.DatabaseIndexN(), 0; got != exp {
t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", got, exp)
}
if exp := 0; len(s.shards) != exp {
t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", len(s.shards), exp)
if got, exp := s.ShardN(), 0; got != exp {
t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", got, exp)
}
}
@ -157,21 +158,21 @@ func TestStoreOpenNotRPDir(t *testing.T) {
t.Fatalf("Store.Open() failed to create test retention policy directory: %v", err)
}
s := NewStore(dir)
s := tsdb.NewStore(dir)
if err := s.Open(); err != nil {
t.Fatalf("Store.Open() failed: %v", err)
}
if exp := 1; len(s.databaseIndexes) != exp {
t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp)
if got, exp := s.DatabaseIndexN(), 1; got != exp {
t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", got, exp)
}
if _, ok := s.databaseIndexes["mydb"]; !ok {
if di := s.DatabaseIndex("mydb"); di == nil {
t.Errorf("Store.Open() database myb does not exist")
}
if exp := 0; len(s.shards) != exp {
t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", len(s.shards), exp)
if got, exp := s.ShardN(), 0; got != exp {
t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", got, exp)
}
}
@ -193,21 +194,21 @@ func TestStoreOpenShardBadShardPath(t *testing.T) {
t.Fatalf("Store.Open() failed to create test shard 1: %v", err)
}
s := NewStore(dir)
s := tsdb.NewStore(dir)
if err := s.Open(); err != nil {
t.Fatalf("Store.Open() failed: %v", err)
}
if exp := 1; len(s.databaseIndexes) != exp {
t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", len(s.databaseIndexes), exp)
if got, exp := s.DatabaseIndexN(), 1; got != exp {
t.Fatalf("Store.Open() database index count mismatch: got %v, exp %v", got, exp)
}
if _, ok := s.databaseIndexes["mydb"]; !ok {
if di := s.DatabaseIndex("mydb"); di == nil {
t.Errorf("Store.Open() database myb does not exist")
}
if exp := 0; len(s.shards) != exp {
t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", len(s.shards), exp)
if got, exp := s.ShardN(), 0; got != exp {
t.Fatalf("Store.Open() shard count mismatch: got %v, exp %v", got, exp)
}
}
@ -218,17 +219,17 @@ func benchmarkStoreOpen(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt, shardCnt int)
// Generate test series (measurements + unique tag sets).
series := genTestSeries(mCnt, tkCnt, tvCnt)
// Generate point data to write to the shards.
points := []Point{}
points := []tsdb.Point{}
for _, s := range series {
for val := 0.0; val < float64(pntCnt); val++ {
p := NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now())
p := tsdb.NewPoint(s.Measurement, s.Series.Tags, map[string]interface{}{"value": val}, time.Now())
points = append(points, p)
}
}
// Create a temporary directory for the test data.
dir, _ := ioutil.TempDir("", "store_test")
// Create the store.
store := NewStore(dir)
store := tsdb.NewStore(dir)
// Open the store.
if err := store.Open(); err != nil {
b.Fatalf("benchmarkStoreOpen: %s", err)
@ -249,7 +250,7 @@ func benchmarkStoreOpen(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt, shardCnt int)
// Run the benchmark loop.
b.ResetTimer()
for n := 0; n < b.N; n++ {
store := NewStore(dir)
store := tsdb.NewStore(dir)
if err := store.Open(); err != nil {
b.Fatalf("benchmarkStoreOpen: %s", err)
}
@ -260,7 +261,7 @@ func benchmarkStoreOpen(b *testing.B, mCnt, tkCnt, tvCnt, pntCnt, shardCnt int)
}
}
func chunkedWriteStoreShard(store *Store, shardID int, points []Point) {
func chunkedWriteStoreShard(store *tsdb.Store, shardID int, points []tsdb.Point) {
nPts := len(points)
chunkSz := 10000
start := 0