telegraf/Godeps/_workspace/src/github.com/influxdb/influxdb/meta/store.go

2224 lines
58 KiB
Go

package meta
import (
"bytes"
crand "crypto/rand"
"crypto/sha256"
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"math/rand"
"net"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/gogo/protobuf/proto"
"github.com/hashicorp/raft"
"github.com/influxdb/influxdb/influxql"
"github.com/influxdb/influxdb/meta/internal"
"golang.org/x/crypto/bcrypt"
)
// tcp.Mux header bytes.
const (
MuxRaftHeader = 0
MuxExecHeader = 1
MuxRPCHeader = 5
// SaltBytes is the number of bytes used for salts
SaltBytes = 32
DefaultSyncNodeDelay = time.Second
)
// ExecMagic is the first 4 bytes sent to a remote exec connection to verify
// that it is coming from a remote exec client connection.
const ExecMagic = "EXEC"
// Retention policy settings.
const (
AutoCreateRetentionPolicyName = "default"
AutoCreateRetentionPolicyPeriod = 0
// MaxAutoCreatedRetentionPolicyReplicaN is the maximum replication factor that will
// be set for auto-created retention policies.
MaxAutoCreatedRetentionPolicyReplicaN = 3
)
// Raft configuration.
const (
raftLogCacheSize = 512
raftSnapshotsRetained = 2
raftTransportMaxPool = 3
raftTransportTimeout = 10 * time.Second
MaxRaftNodes = 3
)
// Store represents a raft-backed metastore.
type Store struct {
mu sync.RWMutex
path string
opened bool
id uint64 // local node id
// All peers in cluster. Used during bootstrapping.
peers []string
data *Data
rpc *rpc
// The address used by other nodes to reach this node.
RemoteAddr net.Addr
raftState raftState
ready chan struct{}
err chan error
closing chan struct{}
wg sync.WaitGroup
changed chan struct{}
// clusterTracingEnabled controls whether low-level cluster communication is logged.
// Useful for troubleshooting
clusterTracingEnabled bool
retentionAutoCreate bool
// The listeners to accept raft and remote exec connections from.
RaftListener net.Listener
ExecListener net.Listener
// The listener for higher-level, cluster operations
RPCListener net.Listener
// The advertised hostname of the store.
Addr net.Addr
// The amount of time before a follower starts a new election.
HeartbeatTimeout time.Duration
// The amount of time before a candidate starts a new election.
ElectionTimeout time.Duration
// The amount of time without communication to the cluster before a
// leader steps down to a follower state.
LeaderLeaseTimeout time.Duration
// The amount of time without an apply before sending a heartbeat.
CommitTimeout time.Duration
// Authentication cache.
authCache map[string]authUser
// hashPassword generates a cryptographically secure hash for password.
// Returns an error if the password is invalid or a hash cannot be generated.
hashPassword HashPasswordFn
Logger *log.Logger
}
type authUser struct {
salt []byte
hash []byte
}
// NewStore returns a new instance of Store.
func NewStore(c *Config) *Store {
s := &Store{
path: c.Dir,
peers: c.Peers,
data: &Data{},
ready: make(chan struct{}),
err: make(chan error),
closing: make(chan struct{}),
changed: make(chan struct{}),
clusterTracingEnabled: c.ClusterTracing,
retentionAutoCreate: c.RetentionAutoCreate,
HeartbeatTimeout: time.Duration(c.HeartbeatTimeout),
ElectionTimeout: time.Duration(c.ElectionTimeout),
LeaderLeaseTimeout: time.Duration(c.LeaderLeaseTimeout),
CommitTimeout: time.Duration(c.CommitTimeout),
authCache: make(map[string]authUser, 0),
hashPassword: func(password string) ([]byte, error) {
return bcrypt.GenerateFromPassword([]byte(password), BcryptCost)
},
Logger: log.New(os.Stderr, "[metastore] ", log.LstdFlags),
}
s.raftState = &localRaft{store: s}
s.rpc = &rpc{
store: s,
tracingEnabled: c.ClusterTracing,
logger: s.Logger,
}
return s
}
// Path returns the root path when open.
// Returns an empty string when the store is closed.
func (s *Store) Path() string { return s.path }
// IDPath returns the path to the local node ID file.
func (s *Store) IDPath() string { return filepath.Join(s.path, "id") }
// Open opens and initializes the raft store.
func (s *Store) Open() error {
// Verify that no more than 3 peers.
// https://github.com/influxdb/influxdb/issues/2750
if len(s.peers) > MaxRaftNodes {
return ErrTooManyPeers
}
// Verify listeners are set.
if s.RaftListener == nil {
panic("Store.RaftListener not set")
} else if s.ExecListener == nil {
panic("Store.ExecListener not set")
} else if s.RPCListener == nil {
panic("Store.RPCListener not set")
}
s.Logger.Printf("Using data dir: %v", s.Path())
if err := func() error {
s.mu.Lock()
defer s.mu.Unlock()
// Check if store has already been opened.
if s.opened {
return ErrStoreOpen
}
s.opened = true
// load our raft state
if err := s.loadState(); err != nil {
return err
}
// Create the root directory if it doesn't already exist.
if err := s.createRootDir(); err != nil {
return fmt.Errorf("mkdir all: %s", err)
}
// Open the raft store.
if err := s.openRaft(); err != nil {
return fmt.Errorf("raft: %s", err)
}
// Initialize the store, if necessary.
if err := s.initialize(); err != nil {
return fmt.Errorf("initialize raft: %s", err)
}
// Load existing ID, if exists.
if err := s.readID(); err != nil {
return fmt.Errorf("read id: %s", err)
}
return nil
}(); err != nil {
return err
}
// Begin serving listener.
s.wg.Add(1)
go s.serveExecListener()
s.wg.Add(1)
go s.serveRPCListener()
// Join an existing cluster if we needed
if err := s.joinCluster(); err != nil {
return fmt.Errorf("join: %v", err)
}
// If the ID doesn't exist then create a new node.
if s.id == 0 {
go s.init()
} else {
go s.syncNodeInfo()
close(s.ready)
}
// Wait for a leader to be elected so we know the raft log is loaded
// and up to date
<-s.ready
return s.WaitForLeader(0)
}
// syncNodeInfo continuously tries to update the current nodes hostname
// in the meta store. It will retry until successful.
func (s *Store) syncNodeInfo() error {
<-s.ready
for {
if err := func() error {
if err := s.WaitForLeader(0); err != nil {
return err
}
ni, err := s.Node(s.id)
if err != nil {
return err
}
if ni == nil {
return ErrNodeNotFound
}
if ni.Host == s.RemoteAddr.String() {
s.Logger.Printf("Updated node id=%d hostname=%v", s.id, s.RemoteAddr.String())
return nil
}
_, err = s.UpdateNode(s.id, s.RemoteAddr.String())
if err != nil {
return err
}
return nil
}(); err != nil {
// If we get an error, the cluster has not stabilized so just try again
time.Sleep(DefaultSyncNodeDelay)
continue
}
return nil
}
}
// loadState sets the appropriate raftState from our persistent storage
func (s *Store) loadState() error {
peers, err := readPeersJSON(filepath.Join(s.path, "peers.json"))
if err != nil {
return err
}
// If we have existing peers, use those. This will override what's in the
// config.
if len(peers) > 0 {
s.peers = peers
}
// if no peers on disk, we need to start raft in order to initialize a new
// cluster or join an existing one.
if len(peers) == 0 {
s.raftState = &localRaft{store: s}
// if we have a raft database, (maybe restored), we should start raft locally
} else if _, err := os.Stat(filepath.Join(s.path, "raft.db")); err == nil {
s.raftState = &localRaft{store: s}
// otherwise, we should use remote raft
} else {
s.raftState = &remoteRaft{store: s}
}
return nil
}
func (s *Store) joinCluster() error {
// No join options, so nothing to do
if len(s.peers) == 0 {
return nil
}
// We already have a node ID so were already part of a cluster,
// don't join again so we can use our existing state.
if s.id != 0 {
s.Logger.Printf("Skipping cluster join: already member of cluster: nodeId=%v raftEnabled=%v peers=%v",
s.id, raft.PeerContained(s.peers, s.RemoteAddr.String()), s.peers)
return nil
}
s.Logger.Printf("Joining cluster at: %v", s.peers)
for {
for _, join := range s.peers {
res, err := s.rpc.join(s.RemoteAddr.String(), join)
if err != nil {
s.Logger.Printf("Join node %v failed: %v: retrying...", join, err)
continue
}
s.Logger.Printf("Joined remote node %v", join)
s.Logger.Printf("nodeId=%v raftEnabled=%v peers=%v", res.NodeID, res.RaftEnabled, res.RaftNodes)
s.peers = res.RaftNodes
s.id = res.NodeID
if err := s.writeNodeID(res.NodeID); err != nil {
s.Logger.Printf("Write node id failed: %v", err)
break
}
if !res.RaftEnabled {
// Shutdown our local raft and transition to a remote raft state
if err := s.enableRemoteRaft(); err != nil {
s.Logger.Printf("Enable remote raft failed: %v", err)
break
}
}
return nil
}
time.Sleep(time.Second)
}
}
func (s *Store) enableLocalRaft() error {
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.raftState.(*localRaft); ok {
return nil
}
s.Logger.Printf("Switching to local raft")
lr := &localRaft{store: s}
return s.changeState(lr)
}
func (s *Store) enableRemoteRaft() error {
if _, ok := s.raftState.(*remoteRaft); ok {
return nil
}
s.Logger.Printf("Switching to remote raft")
rr := &remoteRaft{store: s}
return s.changeState(rr)
}
func (s *Store) changeState(state raftState) error {
if s.raftState != nil {
if err := s.raftState.close(); err != nil {
return err
}
// Clear out any persistent state
if err := s.raftState.remove(); err != nil {
return err
}
}
s.raftState = state
if err := s.raftState.open(); err != nil {
return err
}
return nil
}
// openRaft initializes the raft store.
func (s *Store) openRaft() error {
return s.raftState.open()
}
// initialize attempts to bootstrap the raft store if there are no committed entries.
func (s *Store) initialize() error {
return s.raftState.initialize()
}
// Close closes the store and shuts down the node in the cluster.
func (s *Store) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
return s.close()
}
// WaitForDataChanged will block the current goroutine until the metastore index has
// be updated.
func (s *Store) WaitForDataChanged() error {
s.mu.RLock()
changed := s.changed
s.mu.RUnlock()
for {
select {
case <-s.closing:
return errors.New("closing")
case <-changed:
return nil
}
}
}
func (s *Store) close() error {
// Check if store has already been closed.
if !s.opened {
return ErrStoreClosed
}
s.opened = false
// Close our exec listener
if err := s.ExecListener.Close(); err != nil {
s.Logger.Printf("error closing ExecListener %s", err)
}
// Close our RPC listener
if err := s.RPCListener.Close(); err != nil {
s.Logger.Printf("error closing ExecListener %s", err)
}
if s.raftState != nil {
s.raftState.close()
}
// Because a go routine could of already fired in the time we acquired the lock
// it could then try to acquire another lock, and will deadlock.
// For that reason, we will release our lock and signal the close so that
// all go routines can exit cleanly and fullfill their contract to the wait group.
s.mu.Unlock()
// Notify goroutines of close.
close(s.closing)
s.wg.Wait()
// Now that all go routines are cleaned up, w lock to do final clean up and exit
s.mu.Lock()
s.raftState = nil
return nil
}
// readID reads the local node ID from the ID file.
func (s *Store) readID() error {
b, err := ioutil.ReadFile(s.IDPath())
if os.IsNotExist(err) {
s.id = 0
return nil
} else if err != nil {
return fmt.Errorf("read file: %s", err)
}
id, err := strconv.ParseUint(string(b), 10, 64)
if err != nil {
return fmt.Errorf("parse id: %s", err)
}
s.id = id
return nil
}
// init initializes the store in a separate goroutine.
// This occurs when the store first creates or joins a cluster.
// The ready channel is closed once the store is initialized.
func (s *Store) init() {
// Create a node for this store.
if err := s.createLocalNode(); err != nil {
s.err <- fmt.Errorf("create local node: %s", err)
return
}
// Notify the ready channel.
close(s.ready)
}
// createLocalNode creates the node for this local instance.
// Writes the id of the node to file on success.
func (s *Store) createLocalNode() error {
// Wait for leader.
if err := s.WaitForLeader(0); err != nil {
return fmt.Errorf("wait for leader: %s", err)
}
// Create new node.
ni, err := s.CreateNode(s.RemoteAddr.String())
if err != nil {
return fmt.Errorf("create node: %s", err)
}
// Write node id to file.
if err := s.writeNodeID(ni.ID); err != nil {
return fmt.Errorf("write file: %s", err)
}
// Set ID locally.
s.mu.Lock()
s.id = ni.ID
s.mu.Unlock()
s.Logger.Printf("Created local node: id=%d, host=%s", s.id, s.RemoteAddr)
return nil
}
func (s *Store) createRootDir() error {
return os.MkdirAll(s.path, 0777)
}
func (s *Store) writeNodeID(id uint64) error {
if err := s.createRootDir(); err != nil {
return err
}
return ioutil.WriteFile(s.IDPath(), []byte(strconv.FormatUint(id, 10)), 0666)
}
// Snapshot saves a snapshot of the current state.
func (s *Store) Snapshot() error {
return s.raftState.snapshot()
}
// WaitForLeader sleeps until a leader is found or a timeout occurs.
// timeout == 0 means to wait forever.
func (s *Store) WaitForLeader(timeout time.Duration) error {
// Begin timeout timer.
timer := time.NewTimer(timeout)
defer timer.Stop()
// Continually check for leader until timeout.
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-s.closing:
return errors.New("closing")
case <-timer.C:
if timeout != 0 {
return errors.New("timeout")
}
case <-ticker.C:
if s.Leader() != "" {
return nil
}
}
}
}
// Ready returns a channel that is closed once the store is initialized.
func (s *Store) Ready() <-chan struct{} { return s.ready }
// Err returns a channel for all out-of-band errors.
func (s *Store) Err() <-chan error { return s.err }
// IsLeader returns true if the store is currently the leader.
func (s *Store) IsLeader() bool {
s.mu.RLock()
defer s.mu.RUnlock()
return s.raftState.isLeader()
}
// Leader returns what the store thinks is the current leader. An empty
// string indicates no leader exists.
func (s *Store) Leader() string {
s.mu.RLock()
defer s.mu.RUnlock()
if s.raftState == nil {
return ""
}
return s.raftState.leader()
}
// SetPeers sets a list of peers in the cluster.
func (s *Store) SetPeers(addrs []string) error {
return s.raftState.setPeers(addrs)
}
// AddPeer adds addr to the list of peers in the cluster.
func (s *Store) AddPeer(addr string) error {
return s.raftState.addPeer(addr)
}
// Peers returns the list of peers in the cluster.
func (s *Store) Peers() ([]string, error) {
s.mu.RLock()
defer s.mu.RUnlock()
return s.raftState.peers()
}
// serveExecListener processes remote exec connections.
// This function runs in a separate goroutine.
func (s *Store) serveExecListener() {
defer s.wg.Done()
for {
// Accept next TCP connection.
var err error
conn, err := s.ExecListener.Accept()
if err != nil {
if strings.Contains(err.Error(), "connection closed") {
return
}
s.Logger.Printf("temporary accept error: %s", err)
continue
}
// Handle connection in a separate goroutine.
s.wg.Add(1)
go s.handleExecConn(conn)
select {
case <-s.closing:
return
default:
}
}
}
// handleExecConn reads a command from the connection and executes it.
func (s *Store) handleExecConn(conn net.Conn) {
defer s.wg.Done()
// Nodes not part of the raft cluster may initiate remote exec commands
// but may not know who the current leader of the cluster. If we are not
// the leader, proxy the request to the current leader.
if !s.IsLeader() {
if s.Leader() == s.RemoteAddr.String() {
s.Logger.Printf("No leader")
return
}
leaderConn, err := net.DialTimeout("tcp", s.Leader(), 10*time.Second)
if err != nil {
s.Logger.Printf("Dial leader: %v", err)
return
}
defer leaderConn.Close()
leaderConn.Write([]byte{MuxExecHeader})
if err := proxy(leaderConn.(*net.TCPConn), conn.(*net.TCPConn)); err != nil {
s.Logger.Printf("Leader proxy error: %v", err)
}
conn.Close()
return
}
// Read and execute command.
err := func() error {
// Read marker message.
b := make([]byte, 4)
if _, err := io.ReadFull(conn, b); err != nil {
return fmt.Errorf("read magic: %s", err)
} else if string(b) != ExecMagic {
return fmt.Errorf("invalid exec magic: %q", string(b))
}
// Read command size.
var sz uint64
if err := binary.Read(conn, binary.BigEndian, &sz); err != nil {
return fmt.Errorf("read size: %s", err)
}
// Read command.
buf := make([]byte, sz)
if _, err := io.ReadFull(conn, buf); err != nil {
return fmt.Errorf("read command: %s", err)
}
// Ensure command can be deserialized before applying.
if err := proto.Unmarshal(buf, &internal.Command{}); err != nil {
return fmt.Errorf("unable to unmarshal command: %s", err)
}
// Apply against the raft log.
if err := s.apply(buf); err != nil {
return err
}
return nil
}()
// Build response message.
var resp internal.Response
resp.OK = proto.Bool(err == nil)
resp.Index = proto.Uint64(s.raftState.lastIndex())
if err != nil {
resp.Error = proto.String(err.Error())
}
// Encode response back to connection.
if b, err := proto.Marshal(&resp); err != nil {
panic(err)
} else if err = binary.Write(conn, binary.BigEndian, uint64(len(b))); err != nil {
s.Logger.Printf("Unable to write exec response size: %s", err)
} else if _, err = conn.Write(b); err != nil {
s.Logger.Printf("Unable to write exec response: %s", err)
}
conn.Close()
}
// serveRPCListener processes remote exec connections.
// This function runs in a separate goroutine.
func (s *Store) serveRPCListener() {
defer s.wg.Done()
for {
// Accept next TCP connection.
conn, err := s.RPCListener.Accept()
if err != nil {
if strings.Contains(err.Error(), "connection closed") {
return
} else {
s.Logger.Printf("temporary accept error: %s", err)
continue
}
}
// Handle connection in a separate goroutine.
s.wg.Add(1)
go func() {
defer s.wg.Done()
s.rpc.handleRPCConn(conn)
}()
select {
case <-s.closing:
return
default:
}
}
}
// MarshalBinary encodes the store's data to a binary protobuf format.
func (s *Store) MarshalBinary() ([]byte, error) {
s.mu.RLock()
defer s.mu.RUnlock()
return s.data.MarshalBinary()
}
// ClusterID returns the unique identifier for the cluster.
// This is generated once a node has been created.
func (s *Store) ClusterID() (id uint64, err error) {
err = s.read(func(data *Data) error {
id = data.ClusterID
return nil
})
return
}
// NodeID returns the identifier for the local node.
// Panics if the node has not joined the cluster.
func (s *Store) NodeID() uint64 { return s.id }
// Node returns a node by id.
func (s *Store) Node(id uint64) (ni *NodeInfo, err error) {
err = s.read(func(data *Data) error {
ni = data.Node(id)
if ni == nil {
return errInvalidate
}
return nil
})
return
}
// NodeByHost returns a node by hostname.
func (s *Store) NodeByHost(host string) (ni *NodeInfo, err error) {
err = s.read(func(data *Data) error {
ni = data.NodeByHost(host)
if ni == nil {
return errInvalidate
}
return nil
})
return
}
// Nodes returns a list of all nodes.
func (s *Store) Nodes() (a []NodeInfo, err error) {
err = s.read(func(data *Data) error {
a = data.Nodes
return nil
})
return
}
// CreateNode creates a new node in the store.
func (s *Store) CreateNode(host string) (*NodeInfo, error) {
if err := s.exec(internal.Command_CreateNodeCommand, internal.E_CreateNodeCommand_Command,
&internal.CreateNodeCommand{
Host: proto.String(host),
Rand: proto.Uint64(uint64(rand.Int63())),
},
); err != nil {
return nil, err
}
return s.NodeByHost(host)
}
// UpdateNode updates an existing node in the store.
func (s *Store) UpdateNode(id uint64, host string) (*NodeInfo, error) {
if err := s.exec(internal.Command_UpdateNodeCommand, internal.E_UpdateNodeCommand_Command,
&internal.UpdateNodeCommand{
ID: proto.Uint64(id),
Host: proto.String(host),
},
); err != nil {
return nil, err
}
return s.NodeByHost(host)
}
// DeleteNode removes a node from the metastore by id.
func (s *Store) DeleteNode(id uint64, force bool) error {
ni := s.data.Node(id)
if ni == nil {
return ErrNodeNotFound
}
err := s.exec(internal.Command_DeleteNodeCommand, internal.E_DeleteNodeCommand_Command,
&internal.DeleteNodeCommand{
ID: proto.Uint64(id),
Force: proto.Bool(force),
},
)
if err != nil {
return err
}
// Need to send a second message to remove the peer
return s.exec(internal.Command_RemovePeerCommand, internal.E_RemovePeerCommand_Command,
&internal.RemovePeerCommand{
ID: proto.Uint64(id),
Addr: proto.String(ni.Host),
},
)
}
// Database returns a database by name.
func (s *Store) Database(name string) (di *DatabaseInfo, err error) {
err = s.read(func(data *Data) error {
di = data.Database(name)
if di == nil {
return errInvalidate
}
return nil
})
return
}
// Databases returns a list of all databases.
func (s *Store) Databases() (dis []DatabaseInfo, err error) {
err = s.read(func(data *Data) error {
dis = data.Databases
return nil
})
return
}
// CreateDatabase creates a new database in the store.
func (s *Store) CreateDatabase(name string) (*DatabaseInfo, error) {
if err := s.exec(internal.Command_CreateDatabaseCommand, internal.E_CreateDatabaseCommand_Command,
&internal.CreateDatabaseCommand{
Name: proto.String(name),
},
); err != nil {
return nil, err
}
s.Logger.Printf("database '%s' created", name)
if s.retentionAutoCreate {
// Read node count.
// Retention policies must be fully replicated.
var nodeN int
if err := s.read(func(data *Data) error {
nodeN = len(data.Nodes)
return nil
}); err != nil {
return nil, fmt.Errorf("read: %s", err)
}
if nodeN > MaxAutoCreatedRetentionPolicyReplicaN {
nodeN = MaxAutoCreatedRetentionPolicyReplicaN
}
// Create a retention policy.
rpi := NewRetentionPolicyInfo(AutoCreateRetentionPolicyName)
rpi.ReplicaN = nodeN
rpi.Duration = AutoCreateRetentionPolicyPeriod
if _, err := s.CreateRetentionPolicy(name, rpi); err != nil {
return nil, err
}
// Set it as the default retention policy.
if err := s.SetDefaultRetentionPolicy(name, AutoCreateRetentionPolicyName); err != nil {
return nil, err
}
}
return s.Database(name)
}
// CreateDatabaseIfNotExists creates a new database in the store if it doesn't already exist.
func (s *Store) CreateDatabaseIfNotExists(name string) (*DatabaseInfo, error) {
// Try to find database locally first.
if di, err := s.Database(name); err != nil {
return nil, err
} else if di != nil {
return di, nil
}
// Attempt to create database.
di, err := s.CreateDatabase(name)
if err == ErrDatabaseExists {
return s.Database(name)
}
return di, err
}
// DropDatabase removes a database from the metastore by name.
func (s *Store) DropDatabase(name string) error {
return s.exec(internal.Command_DropDatabaseCommand, internal.E_DropDatabaseCommand_Command,
&internal.DropDatabaseCommand{
Name: proto.String(name),
},
)
}
// RetentionPolicy returns a retention policy for a database by name.
func (s *Store) RetentionPolicy(database, name string) (rpi *RetentionPolicyInfo, err error) {
err = s.read(func(data *Data) error {
rpi, err = data.RetentionPolicy(database, name)
if err != nil {
return err
} else if rpi == nil {
return errInvalidate
}
return nil
})
return
}
// DefaultRetentionPolicy returns the default retention policy for a database.
func (s *Store) DefaultRetentionPolicy(database string) (rpi *RetentionPolicyInfo, err error) {
err = s.read(func(data *Data) error {
di := data.Database(database)
if di == nil {
return ErrDatabaseNotFound
}
for i := range di.RetentionPolicies {
if di.RetentionPolicies[i].Name == di.DefaultRetentionPolicy {
rpi = &di.RetentionPolicies[i]
return nil
}
}
return errInvalidate
})
return
}
// RetentionPolicies returns a list of all retention policies for a database.
func (s *Store) RetentionPolicies(database string) (a []RetentionPolicyInfo, err error) {
err = s.read(func(data *Data) error {
di := data.Database(database)
if di != nil {
return ErrDatabaseNotFound
}
a = di.RetentionPolicies
return nil
})
return
}
// CreateRetentionPolicy creates a new retention policy for a database.
func (s *Store) CreateRetentionPolicy(database string, rpi *RetentionPolicyInfo) (*RetentionPolicyInfo, error) {
if rpi.Duration < MinRetentionPolicyDuration && rpi.Duration != 0 {
return nil, ErrRetentionPolicyDurationTooLow
}
if err := s.exec(internal.Command_CreateRetentionPolicyCommand, internal.E_CreateRetentionPolicyCommand_Command,
&internal.CreateRetentionPolicyCommand{
Database: proto.String(database),
RetentionPolicy: rpi.marshal(),
},
); err != nil {
return nil, err
}
s.Logger.Printf("retention policy '%s' for database '%s' created", rpi.Name, database)
return s.RetentionPolicy(database, rpi.Name)
}
// CreateRetentionPolicyIfNotExists creates a new policy in the store if it doesn't already exist.
func (s *Store) CreateRetentionPolicyIfNotExists(database string, rpi *RetentionPolicyInfo) (*RetentionPolicyInfo, error) {
// Try to find policy locally first.
if rpi, err := s.RetentionPolicy(database, rpi.Name); err != nil {
return nil, err
} else if rpi != nil {
return rpi, nil
}
// Attempt to create policy.
other, err := s.CreateRetentionPolicy(database, rpi)
if err == ErrRetentionPolicyExists {
return s.RetentionPolicy(database, rpi.Name)
}
return other, err
}
// SetDefaultRetentionPolicy sets the default retention policy for a database.
func (s *Store) SetDefaultRetentionPolicy(database, name string) error {
return s.exec(internal.Command_SetDefaultRetentionPolicyCommand, internal.E_SetDefaultRetentionPolicyCommand_Command,
&internal.SetDefaultRetentionPolicyCommand{
Database: proto.String(database),
Name: proto.String(name),
},
)
}
// UpdateRetentionPolicy updates an existing retention policy.
func (s *Store) UpdateRetentionPolicy(database, name string, rpu *RetentionPolicyUpdate) error {
var newName *string
if rpu.Name != nil {
newName = rpu.Name
}
var duration *int64
if rpu.Duration != nil {
value := int64(*rpu.Duration)
duration = &value
}
var replicaN *uint32
if rpu.ReplicaN != nil {
value := uint32(*rpu.ReplicaN)
replicaN = &value
}
return s.exec(internal.Command_UpdateRetentionPolicyCommand, internal.E_UpdateRetentionPolicyCommand_Command,
&internal.UpdateRetentionPolicyCommand{
Database: proto.String(database),
Name: proto.String(name),
NewName: newName,
Duration: duration,
ReplicaN: replicaN,
},
)
}
// DropRetentionPolicy removes a policy from a database by name.
func (s *Store) DropRetentionPolicy(database, name string) error {
return s.exec(internal.Command_DropRetentionPolicyCommand, internal.E_DropRetentionPolicyCommand_Command,
&internal.DropRetentionPolicyCommand{
Database: proto.String(database),
Name: proto.String(name),
},
)
}
// CreateShardGroup creates a new shard group in a retention policy for a given time.
func (s *Store) CreateShardGroup(database, policy string, timestamp time.Time) (*ShardGroupInfo, error) {
if err := s.exec(internal.Command_CreateShardGroupCommand, internal.E_CreateShardGroupCommand_Command,
&internal.CreateShardGroupCommand{
Database: proto.String(database),
Policy: proto.String(policy),
Timestamp: proto.Int64(timestamp.UnixNano()),
},
); err != nil {
return nil, err
}
return s.ShardGroupByTimestamp(database, policy, timestamp)
}
// CreateShardGroupIfNotExists creates a new shard group if one doesn't already exist.
func (s *Store) CreateShardGroupIfNotExists(database, policy string, timestamp time.Time) (*ShardGroupInfo, error) {
// Try to find shard group locally first.
if sgi, err := s.ShardGroupByTimestamp(database, policy, timestamp); err != nil {
return nil, err
} else if sgi != nil && !sgi.Deleted() {
return sgi, nil
}
// Attempt to create database.
sgi, err := s.CreateShardGroup(database, policy, timestamp)
if err == ErrShardGroupExists {
return s.ShardGroupByTimestamp(database, policy, timestamp)
}
return sgi, err
}
// DeleteShardGroup removes an existing shard group from a policy by ID.
func (s *Store) DeleteShardGroup(database, policy string, id uint64) error {
return s.exec(internal.Command_DeleteShardGroupCommand, internal.E_DeleteShardGroupCommand_Command,
&internal.DeleteShardGroupCommand{
Database: proto.String(database),
Policy: proto.String(policy),
ShardGroupID: proto.Uint64(id),
},
)
}
// ShardGroups returns a list of all shard groups for a policy by timestamp.
func (s *Store) ShardGroups(database, policy string) (a []ShardGroupInfo, err error) {
err = s.read(func(data *Data) error {
a, err = data.ShardGroups(database, policy)
if err != nil {
return err
}
return nil
})
return
}
// ShardGroupsByTimeRange returns a slice of ShardGroups that may contain data for the given time range. ShardGroups
// are sorted by start time.
func (s *Store) ShardGroupsByTimeRange(database, policy string, tmin, tmax time.Time) (a []ShardGroupInfo, err error) {
err = s.read(func(data *Data) error {
a, err = data.ShardGroupsByTimeRange(database, policy, tmin, tmax)
if err != nil {
return err
} else if a == nil {
return errInvalidate
}
return nil
})
return
}
// VisitRetentionPolicies calls the given function with full retention policy details.
func (s *Store) VisitRetentionPolicies(f func(d DatabaseInfo, r RetentionPolicyInfo)) {
s.read(func(data *Data) error {
for _, di := range data.Databases {
for _, rp := range di.RetentionPolicies {
f(di, rp)
}
}
return nil
})
return
}
// ShardGroupByTimestamp returns a shard group for a policy by timestamp.
func (s *Store) ShardGroupByTimestamp(database, policy string, timestamp time.Time) (sgi *ShardGroupInfo, err error) {
err = s.read(func(data *Data) error {
sgi, err = data.ShardGroupByTimestamp(database, policy, timestamp)
if err != nil {
return err
} else if sgi == nil {
return errInvalidate
}
return nil
})
return
}
func (s *Store) ShardOwner(shardID uint64) (database, policy string, sgi *ShardGroupInfo) {
s.read(func(data *Data) error {
for _, dbi := range data.Databases {
for _, rpi := range dbi.RetentionPolicies {
for _, g := range rpi.ShardGroups {
if g.Deleted() {
continue
}
for _, sh := range g.Shards {
if sh.ID == shardID {
database = dbi.Name
policy = rpi.Name
sgi = &g
return nil
}
}
}
}
}
return errInvalidate
})
return
}
// CreateContinuousQuery creates a new continuous query on the store.
func (s *Store) CreateContinuousQuery(database, name, query string) error {
return s.exec(internal.Command_CreateContinuousQueryCommand, internal.E_CreateContinuousQueryCommand_Command,
&internal.CreateContinuousQueryCommand{
Database: proto.String(database),
Name: proto.String(name),
Query: proto.String(query),
},
)
}
// DropContinuousQuery removes a continuous query from the store.
func (s *Store) DropContinuousQuery(database, name string) error {
return s.exec(internal.Command_DropContinuousQueryCommand, internal.E_DropContinuousQueryCommand_Command,
&internal.DropContinuousQueryCommand{
Database: proto.String(database),
Name: proto.String(name),
},
)
}
// CreateSubscription creates a new subscription on the store.
func (s *Store) CreateSubscription(database, rp, name, mode string, destinations []string) error {
return s.exec(internal.Command_CreateSubscriptionCommand, internal.E_CreateSubscriptionCommand_Command,
&internal.CreateSubscriptionCommand{
Database: proto.String(database),
RetentionPolicy: proto.String(rp),
Name: proto.String(name),
Mode: proto.String(mode),
Destinations: destinations,
},
)
}
// DropSubscription removes a subscription from the store.
func (s *Store) DropSubscription(database, rp, name string) error {
return s.exec(internal.Command_DropSubscriptionCommand, internal.E_DropSubscriptionCommand_Command,
&internal.DropSubscriptionCommand{
Database: proto.String(database),
RetentionPolicy: proto.String(rp),
Name: proto.String(name),
},
)
}
// User returns a user by name.
func (s *Store) User(name string) (ui *UserInfo, err error) {
err = s.read(func(data *Data) error {
ui = data.User(name)
if ui == nil {
return errInvalidate
}
return nil
})
return
}
// Users returns a list of all users.
func (s *Store) Users() (a []UserInfo, err error) {
err = s.read(func(data *Data) error {
a = data.Users
return nil
})
return
}
// AdminUserExists returns true if an admin user exists on the system.
func (s *Store) AdminUserExists() (exists bool, err error) {
err = s.read(func(data *Data) error {
for i := range data.Users {
if data.Users[i].Admin {
exists = true
break
}
}
return nil
})
return
}
// ErrAuthenticate is returned when authentication fails.
var ErrAuthenticate = errors.New("authentication failed")
// Authenticate retrieves a user with a matching username and password.
func (s *Store) Authenticate(username, password string) (ui *UserInfo, err error) {
err = s.read(func(data *Data) error {
s.mu.Lock()
defer s.mu.Unlock()
// Find user.
u := data.User(username)
if u == nil {
return ErrUserNotFound
}
// Check the local auth cache first.
if au, ok := s.authCache[username]; ok {
// verify the password using the cached salt and hash
hashed, err := s.hashWithSalt(au.salt, password)
if err != nil {
return err
}
if bytes.Equal(hashed, au.hash) {
ui = u
return nil
}
return ErrAuthenticate
}
// Compare password with user hash.
if err := bcrypt.CompareHashAndPassword([]byte(u.Hash), []byte(password)); err != nil {
return ErrAuthenticate
}
// generate a salt and hash of the password for the cache
salt, hashed, err := s.saltedHash(password)
if err != nil {
return err
}
s.authCache[username] = authUser{salt: salt, hash: hashed}
ui = u
return nil
})
return
}
// hashWithSalt returns a salted hash of password using salt
func (s *Store) hashWithSalt(salt []byte, password string) ([]byte, error) {
hasher := sha256.New()
hasher.Write(append(salt, []byte(password)...))
return hasher.Sum(nil), nil
}
// saltedHash returns a salt and salted hash of password
func (s *Store) saltedHash(password string) (salt, hash []byte, err error) {
salt = make([]byte, SaltBytes)
_, err = io.ReadFull(crand.Reader, salt)
if err != nil {
return
}
hash, err = s.hashWithSalt(salt, password)
return
}
// CreateUser creates a new user in the store.
func (s *Store) CreateUser(name, password string, admin bool) (*UserInfo, error) {
// Hash the password before serializing it.
hash, err := s.hashPassword(password)
if err != nil {
return nil, err
}
// Serialize command and send it to the leader.
if err := s.exec(internal.Command_CreateUserCommand, internal.E_CreateUserCommand_Command,
&internal.CreateUserCommand{
Name: proto.String(name),
Hash: proto.String(string(hash)),
Admin: proto.Bool(admin),
},
); err != nil {
return nil, err
}
return s.User(name)
}
// DropUser removes a user from the metastore by name.
func (s *Store) DropUser(name string) error {
return s.exec(internal.Command_DropUserCommand, internal.E_DropUserCommand_Command,
&internal.DropUserCommand{
Name: proto.String(name),
},
)
}
// UpdateUser updates an existing user in the store.
func (s *Store) UpdateUser(name, password string) error {
// Hash the password before serializing it.
hash, err := s.hashPassword(password)
if err != nil {
return err
}
// Serialize command and send it to the leader.
return s.exec(internal.Command_UpdateUserCommand, internal.E_UpdateUserCommand_Command,
&internal.UpdateUserCommand{
Name: proto.String(name),
Hash: proto.String(string(hash)),
},
)
}
// SetPrivilege sets a privilege for a user on a database.
func (s *Store) SetPrivilege(username, database string, p influxql.Privilege) error {
return s.exec(internal.Command_SetPrivilegeCommand, internal.E_SetPrivilegeCommand_Command,
&internal.SetPrivilegeCommand{
Username: proto.String(username),
Database: proto.String(database),
Privilege: proto.Int32(int32(p)),
},
)
}
// SetAdminPrivilege sets the admin privilege for a user on a database.
func (s *Store) SetAdminPrivilege(username string, admin bool) error {
return s.exec(internal.Command_SetAdminPrivilegeCommand, internal.E_SetAdminPrivilegeCommand_Command,
&internal.SetAdminPrivilegeCommand{
Username: proto.String(username),
Admin: proto.Bool(admin),
},
)
}
// UserPrivileges returns a list of all databases.
func (s *Store) UserPrivileges(username string) (p map[string]influxql.Privilege, err error) {
err = s.read(func(data *Data) error {
p, err = data.UserPrivileges(username)
return err
})
return
}
// UserPrivilege returns the privilege for a database.
func (s *Store) UserPrivilege(username, database string) (p *influxql.Privilege, err error) {
err = s.read(func(data *Data) error {
p, err = data.UserPrivilege(username, database)
return err
})
return
}
// UserCount returns the number of users defined in the cluster.
func (s *Store) UserCount() (count int, err error) {
err = s.read(func(data *Data) error {
count = len(data.Users)
return nil
})
return
}
// PrecreateShardGroups creates shard groups whose endtime is before the 'to' time passed in, but
// is yet to expire before 'from'. This is to avoid the need for these shards to be created when data
// for the corresponding time range arrives. Shard creation involves Raft consensus, and precreation
// avoids taking the hit at write-time.
func (s *Store) PrecreateShardGroups(from, to time.Time) error {
s.read(func(data *Data) error {
for _, di := range data.Databases {
for _, rp := range di.RetentionPolicies {
if len(rp.ShardGroups) == 0 {
// No data was ever written to this group, or all groups have been deleted.
continue
}
g := rp.ShardGroups[len(rp.ShardGroups)-1] // Get the last group in time.
if !g.Deleted() && g.EndTime.Before(to) && g.EndTime.After(from) {
// Group is not deleted, will end before the future time, but is still yet to expire.
// This last check is important, so the system doesn't create shards groups wholly
// in the past.
// Create successive shard group.
nextShardGroupTime := g.EndTime.Add(1 * time.Nanosecond)
if newGroup, err := s.CreateShardGroupIfNotExists(di.Name, rp.Name, nextShardGroupTime); err != nil {
s.Logger.Printf("failed to precreate successive shard group for group %d: %s",
g.ID, err.Error())
} else {
s.Logger.Printf("new shard group %d successfully precreated for database %s, retention policy %s",
newGroup.ID, di.Name, rp.Name)
}
}
}
}
return nil
})
return nil
}
// SetData force overwrites the root data.
// This should only be used when restoring a snapshot.
func (s *Store) SetData(data *Data) error {
return s.exec(internal.Command_SetDataCommand, internal.E_SetDataCommand_Command,
&internal.SetDataCommand{
Data: data.marshal(),
},
)
}
// read executes a function with the current metadata.
// If an error is returned then the cache is invalidated and retried.
//
// The error returned by the retry is passed through to the original caller
// unless the error is errInvalidate. A nil error is passed through when
// errInvalidate is returned.
func (s *Store) read(fn func(*Data) error) error {
// First use the cached metadata.
s.mu.RLock()
data := s.data
s.mu.RUnlock()
// Execute fn against cached data.
// Return immediately if there was no error.
if err := fn(data); err == nil {
return nil
}
// If an error occurred then invalidate cache and retry.
if err := s.invalidate(); err != nil {
return err
}
// Re-read the metadata.
s.mu.RLock()
data = s.data
s.mu.RUnlock()
// Passthrough error unless it is a cache invalidation.
if err := fn(data); err != nil && err != errInvalidate {
return err
}
return nil
}
// errInvalidate is returned to read() when the cache should be invalidated
// but an error should not be passed through to the caller.
var errInvalidate = errors.New("invalidate cache")
func (s *Store) invalidate() error {
return s.raftState.invalidate()
}
func (s *Store) exec(typ internal.Command_Type, desc *proto.ExtensionDesc, value interface{}) error {
// Create command.
cmd := &internal.Command{Type: &typ}
err := proto.SetExtension(cmd, desc, value)
assert(err == nil, "proto.SetExtension: %s", err)
// Marshal to a byte slice.
b, err := proto.Marshal(cmd)
assert(err == nil, "proto.Marshal: %s", err)
// Apply the command if this is the leader.
// Otherwise remotely execute the command against the current leader.
if s.raftState.isLeader() {
return s.apply(b)
}
return s.remoteExec(b)
}
// apply applies a serialized command to the raft log.
func (s *Store) apply(b []byte) error {
return s.raftState.apply(b)
}
// remoteExec sends an encoded command to the remote leader.
func (s *Store) remoteExec(b []byte) error {
// Retrieve the current known leader.
leader := s.raftState.leader()
if leader == "" {
return errors.New("no leader detected during remoteExec")
}
// Create a connection to the leader.
conn, err := net.DialTimeout("tcp", leader, 10*time.Second)
if err != nil {
return err
}
defer conn.Close()
// Write a marker byte for exec messages.
_, err = conn.Write([]byte{MuxExecHeader})
if err != nil {
return err
}
// Write a marker message.
_, err = conn.Write([]byte(ExecMagic))
if err != nil {
return err
}
// Write command size & bytes.
if err := binary.Write(conn, binary.BigEndian, uint64(len(b))); err != nil {
return fmt.Errorf("write command size: %s", err)
} else if _, err := conn.Write(b); err != nil {
return fmt.Errorf("write command: %s", err)
}
// Read response bytes.
var sz uint64
if err := binary.Read(conn, binary.BigEndian, &sz); err != nil {
return fmt.Errorf("read response size: %s", err)
}
buf := make([]byte, sz)
if _, err := io.ReadFull(conn, buf); err != nil {
return fmt.Errorf("read response: %s", err)
}
// Unmarshal response.
var resp internal.Response
if err := proto.Unmarshal(buf, &resp); err != nil {
return fmt.Errorf("unmarshal response: %s", err)
} else if !resp.GetOK() {
return lookupError(fmt.Errorf(resp.GetError()))
}
// Wait for local FSM to sync to index.
if err := s.sync(resp.GetIndex(), 5*time.Second); err != nil {
return fmt.Errorf("sync: %s", err)
}
return nil
}
// sync polls the state machine until it reaches a given index.
func (s *Store) sync(index uint64, timeout time.Duration) error {
return s.raftState.sync(index, timeout)
}
func (s *Store) cachedData() *Data {
s.mu.RLock()
defer s.mu.RUnlock()
return s.data.Clone()
}
// BcryptCost is the cost associated with generating password with Bcrypt.
// This setting is lowered during testing to improve test suite performance.
var BcryptCost = 10
// HashPasswordFn represnets a password hashing function.
type HashPasswordFn func(password string) ([]byte, error)
// GetHashPasswordFn returns the current password hashing function.
func (s *Store) GetHashPasswordFn() HashPasswordFn {
s.mu.RLock()
defer s.mu.RUnlock()
return s.hashPassword
}
// SetHashPasswordFn sets the password hashing function.
func (s *Store) SetHashPasswordFn(fn HashPasswordFn) {
s.mu.Lock()
defer s.mu.Unlock()
s.hashPassword = fn
}
// notifiyChanged will close a changed channel which brooadcasts to all waiting
// goroutines that the meta store has been updated. Callers are responsible for locking
// the meta store before calling this.
func (s *Store) notifyChanged() {
close(s.changed)
s.changed = make(chan struct{})
}
// storeFSM represents the finite state machine used by Store to interact with Raft.
type storeFSM Store
func (fsm *storeFSM) Apply(l *raft.Log) interface{} {
var cmd internal.Command
if err := proto.Unmarshal(l.Data, &cmd); err != nil {
panic(fmt.Errorf("cannot marshal command: %x", l.Data))
}
// Lock the store.
s := (*Store)(fsm)
s.mu.Lock()
defer s.mu.Unlock()
err := func() interface{} {
switch cmd.GetType() {
case internal.Command_RemovePeerCommand:
return fsm.applyRemovePeerCommand(&cmd)
case internal.Command_CreateNodeCommand:
return fsm.applyCreateNodeCommand(&cmd)
case internal.Command_DeleteNodeCommand:
return fsm.applyDeleteNodeCommand(&cmd)
case internal.Command_CreateDatabaseCommand:
return fsm.applyCreateDatabaseCommand(&cmd)
case internal.Command_DropDatabaseCommand:
return fsm.applyDropDatabaseCommand(&cmd)
case internal.Command_CreateRetentionPolicyCommand:
return fsm.applyCreateRetentionPolicyCommand(&cmd)
case internal.Command_DropRetentionPolicyCommand:
return fsm.applyDropRetentionPolicyCommand(&cmd)
case internal.Command_SetDefaultRetentionPolicyCommand:
return fsm.applySetDefaultRetentionPolicyCommand(&cmd)
case internal.Command_UpdateRetentionPolicyCommand:
return fsm.applyUpdateRetentionPolicyCommand(&cmd)
case internal.Command_CreateShardGroupCommand:
return fsm.applyCreateShardGroupCommand(&cmd)
case internal.Command_DeleteShardGroupCommand:
return fsm.applyDeleteShardGroupCommand(&cmd)
case internal.Command_CreateContinuousQueryCommand:
return fsm.applyCreateContinuousQueryCommand(&cmd)
case internal.Command_DropContinuousQueryCommand:
return fsm.applyDropContinuousQueryCommand(&cmd)
case internal.Command_CreateSubscriptionCommand:
return fsm.applyCreateSubscriptionCommand(&cmd)
case internal.Command_DropSubscriptionCommand:
return fsm.applyDropSubscriptionCommand(&cmd)
case internal.Command_CreateUserCommand:
return fsm.applyCreateUserCommand(&cmd)
case internal.Command_DropUserCommand:
return fsm.applyDropUserCommand(&cmd)
case internal.Command_UpdateUserCommand:
return fsm.applyUpdateUserCommand(&cmd)
case internal.Command_SetPrivilegeCommand:
return fsm.applySetPrivilegeCommand(&cmd)
case internal.Command_SetAdminPrivilegeCommand:
return fsm.applySetAdminPrivilegeCommand(&cmd)
case internal.Command_SetDataCommand:
return fsm.applySetDataCommand(&cmd)
case internal.Command_UpdateNodeCommand:
return fsm.applyUpdateNodeCommand(&cmd)
default:
panic(fmt.Errorf("cannot apply command: %x", l.Data))
}
}()
// Copy term and index to new metadata.
fsm.data.Term = l.Term
fsm.data.Index = l.Index
s.notifyChanged()
return err
}
func (fsm *storeFSM) applyRemovePeerCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_RemovePeerCommand_Command)
v := ext.(*internal.RemovePeerCommand)
id := v.GetID()
addr := v.GetAddr()
// Only do this if you are the leader
if fsm.raftState.isLeader() {
//Remove that node from the peer
fsm.Logger.Printf("removing peer for node id %d, %s", id, addr)
if err := fsm.raftState.removePeer(addr); err != nil {
fsm.Logger.Printf("error removing peer: %s", err)
}
}
// If this is the node being shutdown, close raft
if fsm.id == id {
fsm.Logger.Printf("shutting down raft for %s", addr)
if err := fsm.raftState.close(); err != nil {
fsm.Logger.Printf("failed to shut down raft: %s", err)
}
}
return nil
}
func (fsm *storeFSM) applyCreateNodeCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_CreateNodeCommand_Command)
v := ext.(*internal.CreateNodeCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.CreateNode(v.GetHost()); err != nil {
return err
}
// If the cluster ID hasn't been set then use the command's random number.
if other.ClusterID == 0 {
other.ClusterID = uint64(v.GetRand())
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyUpdateNodeCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_UpdateNodeCommand_Command)
v := ext.(*internal.UpdateNodeCommand)
// Copy data and update.
other := fsm.data.Clone()
ni := other.Node(v.GetID())
if ni == nil {
return ErrNodeNotFound
}
ni.Host = v.GetHost()
fsm.data = other
return nil
}
func (fsm *storeFSM) applyDeleteNodeCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_DeleteNodeCommand_Command)
v := ext.(*internal.DeleteNodeCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.DeleteNode(v.GetID(), v.GetForce()); err != nil {
return err
}
fsm.data = other
id := v.GetID()
fsm.Logger.Printf("node '%d' removed", id)
return nil
}
func (fsm *storeFSM) applyCreateDatabaseCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_CreateDatabaseCommand_Command)
v := ext.(*internal.CreateDatabaseCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.CreateDatabase(v.GetName()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyDropDatabaseCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_DropDatabaseCommand_Command)
v := ext.(*internal.DropDatabaseCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.DropDatabase(v.GetName()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyCreateRetentionPolicyCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_CreateRetentionPolicyCommand_Command)
v := ext.(*internal.CreateRetentionPolicyCommand)
pb := v.GetRetentionPolicy()
// Copy data and update.
other := fsm.data.Clone()
if err := other.CreateRetentionPolicy(v.GetDatabase(),
&RetentionPolicyInfo{
Name: pb.GetName(),
ReplicaN: int(pb.GetReplicaN()),
Duration: time.Duration(pb.GetDuration()),
ShardGroupDuration: time.Duration(pb.GetShardGroupDuration()),
}); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyDropRetentionPolicyCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_DropRetentionPolicyCommand_Command)
v := ext.(*internal.DropRetentionPolicyCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.DropRetentionPolicy(v.GetDatabase(), v.GetName()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applySetDefaultRetentionPolicyCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_SetDefaultRetentionPolicyCommand_Command)
v := ext.(*internal.SetDefaultRetentionPolicyCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.SetDefaultRetentionPolicy(v.GetDatabase(), v.GetName()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyUpdateRetentionPolicyCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_UpdateRetentionPolicyCommand_Command)
v := ext.(*internal.UpdateRetentionPolicyCommand)
// Create update object.
rpu := RetentionPolicyUpdate{Name: v.NewName}
if v.Duration != nil {
value := time.Duration(v.GetDuration())
rpu.Duration = &value
}
if v.ReplicaN != nil {
value := int(v.GetReplicaN())
rpu.ReplicaN = &value
}
// Copy data and update.
other := fsm.data.Clone()
if err := other.UpdateRetentionPolicy(v.GetDatabase(), v.GetName(), &rpu); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyCreateShardGroupCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_CreateShardGroupCommand_Command)
v := ext.(*internal.CreateShardGroupCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.CreateShardGroup(v.GetDatabase(), v.GetPolicy(), time.Unix(0, v.GetTimestamp())); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyDeleteShardGroupCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_DeleteShardGroupCommand_Command)
v := ext.(*internal.DeleteShardGroupCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.DeleteShardGroup(v.GetDatabase(), v.GetPolicy(), v.GetShardGroupID()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyCreateContinuousQueryCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_CreateContinuousQueryCommand_Command)
v := ext.(*internal.CreateContinuousQueryCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.CreateContinuousQuery(v.GetDatabase(), v.GetName(), v.GetQuery()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyDropContinuousQueryCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_DropContinuousQueryCommand_Command)
v := ext.(*internal.DropContinuousQueryCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.DropContinuousQuery(v.GetDatabase(), v.GetName()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyCreateSubscriptionCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_CreateSubscriptionCommand_Command)
v := ext.(*internal.CreateSubscriptionCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.CreateSubscription(v.GetDatabase(), v.GetRetentionPolicy(), v.GetName(), v.GetMode(), v.GetDestinations()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyDropSubscriptionCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_DropSubscriptionCommand_Command)
v := ext.(*internal.DropSubscriptionCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.DropSubscription(v.GetDatabase(), v.GetRetentionPolicy(), v.GetName()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyCreateUserCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_CreateUserCommand_Command)
v := ext.(*internal.CreateUserCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.CreateUser(v.GetName(), v.GetHash(), v.GetAdmin()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applyDropUserCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_DropUserCommand_Command)
v := ext.(*internal.DropUserCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.DropUser(v.GetName()); err != nil {
return err
}
fsm.data = other
delete(fsm.authCache, v.GetName())
return nil
}
func (fsm *storeFSM) applyUpdateUserCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_UpdateUserCommand_Command)
v := ext.(*internal.UpdateUserCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.UpdateUser(v.GetName(), v.GetHash()); err != nil {
return err
}
fsm.data = other
delete(fsm.authCache, v.GetName())
return nil
}
func (fsm *storeFSM) applySetPrivilegeCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_SetPrivilegeCommand_Command)
v := ext.(*internal.SetPrivilegeCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.SetPrivilege(v.GetUsername(), v.GetDatabase(), influxql.Privilege(v.GetPrivilege())); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applySetAdminPrivilegeCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_SetAdminPrivilegeCommand_Command)
v := ext.(*internal.SetAdminPrivilegeCommand)
// Copy data and update.
other := fsm.data.Clone()
if err := other.SetAdminPrivilege(v.GetUsername(), v.GetAdmin()); err != nil {
return err
}
fsm.data = other
return nil
}
func (fsm *storeFSM) applySetDataCommand(cmd *internal.Command) interface{} {
ext, _ := proto.GetExtension(cmd, internal.E_SetDataCommand_Command)
v := ext.(*internal.SetDataCommand)
// Overwrite data.
fsm.data = &Data{}
fsm.data.unmarshal(v.GetData())
return nil
}
func (fsm *storeFSM) Snapshot() (raft.FSMSnapshot, error) {
s := (*Store)(fsm)
s.mu.Lock()
defer s.mu.Unlock()
return &storeFSMSnapshot{Data: (*Store)(fsm).data}, nil
}
func (fsm *storeFSM) Restore(r io.ReadCloser) error {
// Read all bytes.
b, err := ioutil.ReadAll(r)
if err != nil {
return err
}
// Decode metadata.
data := &Data{}
if err := data.UnmarshalBinary(b); err != nil {
return err
}
// Set metadata on store.
// NOTE: No lock because Hashicorp Raft doesn't call Restore concurrently
// with any other function.
fsm.data = data
return nil
}
type storeFSMSnapshot struct {
Data *Data
}
func (s *storeFSMSnapshot) Persist(sink raft.SnapshotSink) error {
err := func() error {
// Encode data.
p, err := s.Data.MarshalBinary()
if err != nil {
return err
}
// Write data to sink.
if _, err := sink.Write(p); err != nil {
return err
}
// Close the sink.
if err := sink.Close(); err != nil {
return err
}
return nil
}()
if err != nil {
sink.Cancel()
return err
}
return nil
}
// Release is invoked when we are finished with the snapshot
func (s *storeFSMSnapshot) Release() {}
// raftLayer wraps the connection so it can be re-used for forwarding.
type raftLayer struct {
ln net.Listener
addr net.Addr
conn chan net.Conn
closed chan struct{}
}
// newRaftLayer returns a new instance of raftLayer.
func newRaftLayer(ln net.Listener, addr net.Addr) *raftLayer {
return &raftLayer{
ln: ln,
addr: addr,
conn: make(chan net.Conn),
closed: make(chan struct{}),
}
}
// Addr returns the local address for the layer.
func (l *raftLayer) Addr() net.Addr { return l.addr }
// Dial creates a new network connection.
func (l *raftLayer) Dial(addr string, timeout time.Duration) (net.Conn, error) {
conn, err := net.DialTimeout("tcp", addr, timeout)
if err != nil {
return nil, err
}
// Write a marker byte for raft messages.
_, err = conn.Write([]byte{MuxRaftHeader})
if err != nil {
conn.Close()
return nil, err
}
return conn, err
}
// Accept waits for the next connection.
func (l *raftLayer) Accept() (net.Conn, error) { return l.ln.Accept() }
// Close closes the layer.
func (l *raftLayer) Close() error { return l.ln.Close() }
// RetentionPolicyUpdate represents retention policy fields to be updated.
type RetentionPolicyUpdate struct {
Name *string
Duration *time.Duration
ReplicaN *int
}
func (rpu *RetentionPolicyUpdate) SetName(v string) { rpu.Name = &v }
func (rpu *RetentionPolicyUpdate) SetDuration(v time.Duration) { rpu.Duration = &v }
func (rpu *RetentionPolicyUpdate) SetReplicaN(v int) { rpu.ReplicaN = &v }
// assert will panic with a given formatted message if the given condition is false.
func assert(condition bool, msg string, v ...interface{}) {
if !condition {
panic(fmt.Sprintf("assert failed: "+msg, v...))
}
}