diff --git a/CHANGELOG.md b/CHANGELOG.md index 85730cafd..f7fcae123 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,36 @@ +## v0.1.4 [2015-07-09] + +### Features +- [#56](https://github.com/influxdb/telegraf/pull/56): Update README for Kafka plugin. Thanks @EmilS! + +### Bugfixes +- [#50](https://github.com/influxdb/telegraf/pull/50): Fix init.sh script to use telegraf directory. Thanks @jseriff! +- [#52](https://github.com/influxdb/telegraf/pull/52): Update CHANGELOG to reference updated directory. Thanks @benfb! + +## v0.1.3 [2015-07-05] + +### Features +- [#35](https://github.com/influxdb/telegraf/pull/35): Add Kafka plugin. Thanks @EmilS! +- [#47](https://github.com/influxdb/telegraf/pull/47): Add RethinkDB plugin. Thanks @jipperinbham! + +### Bugfixes +- [#45](https://github.com/influxdb/telegraf/pull/45): Skip disk tags that don't have a value. Thanks @jhofeditz! +- [#43](https://github.com/influxdb/telegraf/pull/43): Fix bug in MySQL plugin. Thanks @marcosnils! + ## v0.1.2 [2015-07-01] ### Features -- [#12](https://github.com/influxdb/influxdb/pull/12): Add Linux/ARM to the list of built binaries. Thanks @voxxit! -- [#14](https://github.com/influxdb/influxdb/pull/14): Clarify the S3 buckets that Telegraf is pushed to. -- [#16](https://github.com/influxdb/influxdb/pull/16): Convert Redis to use URI, support Redis AUTH. Thanks @jipperinbham! -- [#21](https://github.com/influxdb/influxdb/pull/21): Add memcached plugiun. Thanks @Yukki! +- [#12](https://github.com/influxdb/telegraf/pull/12): Add Linux/ARM to the list of built binaries. Thanks @voxxit! +- [#14](https://github.com/influxdb/telegraf/pull/14): Clarify the S3 buckets that Telegraf is pushed to. +- [#16](https://github.com/influxdb/telegraf/pull/16): Convert Redis to use URI, support Redis AUTH. Thanks @jipperinbham! +- [#21](https://github.com/influxdb/telegraf/pull/21): Add memcached plugin. Thanks @Yukki! ### Bugfixes -- [#13](https://github.com/influxdb/influxdb/pull/13): Fix the packaging script. -- [#19](https://github.com/influxdb/influxdb/pull/19): Add host name to metric tags. Thanks @sherifzain! -- [#20](https://github.com/influxdb/influxdb/pull/20): Fix race condition with accumulator mutex. Thanks @nkatsaros! -- [#23](https://github.com/influxdb/influxdb/pull/23): Change name of folder for packages. Thanks @colinrymer! -- [#32](https://github.com/influxdb/influxdb/pull/32): Fix spelling of memoory -> memory. Thanks @tylernisonoff! +- [#13](https://github.com/influxdb/telegraf/pull/13): Fix the packaging script. +- [#19](https://github.com/influxdb/telegraf/pull/19): Add host name to metric tags. Thanks @sherifzain! +- [#20](https://github.com/influxdb/telegraf/pull/20): Fix race condition with accumulator mutex. Thanks @nkatsaros! +- [#23](https://github.com/influxdb/telegraf/pull/23): Change name of folder for packages. Thanks @colinrymer! +- [#32](https://github.com/influxdb/telegraf/pull/32): Fix spelling of memoory -> memory. Thanks @tylernisonoff! ## v0.1.1 [2015-06-19] diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..f31297ab0 --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +UNAME := $(shell sh -c 'uname') + +ifeq ($(UNAME), Darwin) + export ADVERTISED_HOST := $(shell sh -c 'boot2docker ip') +endif +ifeq ($(UNAME), Linux) + export ADVERTISED_HOST := localhost +endif + +prepare: + go get -d -v -t ./... + +docker-compose: + docker-compose up -d + +test: prepare docker-compose + go test -v ./... + +test-short: prepare + go test -v -short ./... + +test-cleanup: + docker-compose kill + +update: + go get -u -v -d -t ./... + +.PHONY: test diff --git a/README.md b/README.md index 711903f51..b7e1e9fdb 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Telegraf - A native agent for InfluxDB +# Telegraf - A native agent for InfluxDB [![Circle CI](https://circleci.com/gh/influxdb/telegraf.svg?style=svg)](https://circleci.com/gh/influxdb/telegraf) Telegraf is an agent written in Go for collecting metrics from the system it's running on or from other services and writing them into InfluxDB. @@ -13,8 +13,8 @@ We'll eagerly accept pull requests for new plugins and will manage the set of pl ### Linux packages for Debian/Ubuntu and RHEL/CentOS: ``` -http://get.influxdb.org/telegraf/telegraf_0.1.2_amd64.deb -http://get.influxdb.org/telegraf/telegraf-0.1.2-1.x86_64.rpm +http://get.influxdb.org/telegraf/telegraf_0.1.4_amd64.deb +http://get.influxdb.org/telegraf/telegraf-0.1.4-1.x86_64.rpm ``` ### OSX via Homebrew: @@ -47,8 +47,16 @@ Telegraf currently has support for collecting metrics from: * System (memory, CPU, network, etc.) * Docker * MySQL +* Prometheus (client libraries and exporters) * PostgreSQL * Redis +* Elasticsearch +* RethinkDB +* Kafka +* MongoDB +* Disque +* Lustre2 +* Memcached We'll be adding support for many more over the coming months. Read on if you want to add support for another service or third-party API. @@ -141,6 +149,7 @@ func Gather(acc plugins.Accumulator) error { ### Example ```go +package simple // simple.go @@ -169,7 +178,36 @@ func (s *Simple) Gather(acc plugins.Accumulator) error { } func init() { - plugins.Add("simple", func() plugins.Plugin { &Simple{} }) + plugins.Add("simple", func() plugins.Plugin { return &Simple{} }) } ``` +## Testing + +### Execute short tests: + +execute `make short-test` + +### Execute long tests: + +As Telegraf collects metrics from several third-party services it becomes a +difficult task to mock each service as some of them have complicated protocols +which would take some time to replicate. + +To overcome this situation we've decided to use docker containers to provide a +fast and reproducible environment to test those services which require it. +For other situations +(i.e: https://github.com/influxdb/telegraf/blob/master/plugins/redis/redis_test.go ) +a simple mock will suffice. + +To execute Telegraf tests follow these simple steps: + +- Install docker compose following [these](https://docs.docker.com/compose/install/) instructions + - NOTE: mac users should be able to simply do `brew install boot2docker` + and `brew install docker-compose` +- execute `make test` + +### Unit test troubleshooting: + +Try cleaning up your test environment by executing `make test-cleanup` and +re-running diff --git a/accumulator.go b/accumulator.go index ab5a02dae..b3f7a4511 100644 --- a/accumulator.go +++ b/accumulator.go @@ -10,6 +10,8 @@ import ( "github.com/influxdb/influxdb/client" ) +// BatchPoints is used to send a batch of data in a single write from telegraf +// to influx type BatchPoints struct { mu sync.Mutex @@ -22,6 +24,7 @@ type BatchPoints struct { Config *ConfiguredPlugin } +// Add adds a measurement func (bp *BatchPoints) Add(measurement string, val interface{}, tags map[string]string) { bp.mu.Lock() defer bp.mu.Unlock() @@ -55,6 +58,7 @@ func (bp *BatchPoints) Add(measurement string, val interface{}, tags map[string] }) } +// AddValuesWithTime adds a measurement with a provided timestamp func (bp *BatchPoints) AddValuesWithTime( measurement string, values map[string]interface{}, diff --git a/agent.go b/agent.go index 6b31e397d..ff8572048 100644 --- a/agent.go +++ b/agent.go @@ -23,8 +23,13 @@ type runningPlugin struct { config *ConfiguredPlugin } +// Agent runs telegraf and collects data based on the given config type Agent struct { + + // Interval at which to gather information Interval Duration + + // Run in debug mode? Debug bool Hostname string @@ -34,6 +39,7 @@ type Agent struct { plugins []*runningPlugin } +// NewAgent returns an Agent struct based off the given Config func NewAgent(config *Config) (*Agent, error) { agent := &Agent{Config: config, Interval: Duration{10 * time.Second}} @@ -95,6 +101,7 @@ func (a *Agent) LoadOutputs() ([]string, error) { return names, nil } +// LoadPlugins loads the agent's plugins func (a *Agent) LoadPlugins() ([]string, error) { var names []string @@ -228,10 +235,12 @@ func (a *Agent) flush(bp BatchPoints) error { return outerr } +// TestAllPlugins verifies that we can 'Gather' from all plugins with the +// default configuration func (a *Agent) TestAllPlugins() error { var names []string - for name, _ := range plugins.Plugins { + for name := range plugins.Plugins { names = append(names, name) } @@ -257,6 +266,8 @@ func (a *Agent) TestAllPlugins() error { return nil } +// Test verifies that we can 'Gather' from all plugins with their configured +// Config struct func (a *Agent) Test() error { var acc BatchPoints @@ -280,6 +291,7 @@ func (a *Agent) Test() error { return nil } +// Run runs the agent daemon, gathering every Interval func (a *Agent) Run(shutdown chan struct{}) error { var wg sync.WaitGroup diff --git a/circle.yml b/circle.yml new file mode 100644 index 000000000..3b9888ffd --- /dev/null +++ b/circle.yml @@ -0,0 +1,24 @@ +dependencies: + post: + # install golint + - go get github.com/golang/lint/golint + # install binaries + - go install ./... + +test: + pre: + # Vet go code for any potential errors + - go vet ./... + # Verify that all files are properly go formatted + - "[ `git ls-files | grep '.go$' | xargs gofmt -l 2>&1 | wc -l` -eq 0 ]" + # Only docker-compose up kafka, the other services are already running + # see: https://circleci.com/docs/environment#databases + # - docker-compose up -d kafka + override: + # Enforce that testutil, cmd, and main directory are fully linted + - golint . + - golint testutil/... + - golint cmd/... + # Run short unit tests + - make test-short + # TODO run full unit test suite diff --git a/cmd/telegraf/telegraf.go b/cmd/telegraf/telegraf.go index ea78ef6de..d4659620a 100644 --- a/cmd/telegraf/telegraf.go +++ b/cmd/telegraf/telegraf.go @@ -20,7 +20,10 @@ var fVersion = flag.Bool("version", false, "display the version") var fSampleConfig = flag.Bool("sample-config", false, "print out full sample configuration") var fPidfile = flag.String("pidfile", "", "file to write our pid to") +// Telegraf version var Version = "unreleased" + +// Telegraf commit var Commit = "" func main() { diff --git a/config.go b/config.go index c82fb0bb0..66d4107c8 100644 --- a/config.go +++ b/config.go @@ -13,10 +13,12 @@ import ( "github.com/naoina/toml/ast" ) +// Duration just wraps time.Duration type Duration struct { time.Duration } +// UnmarshalTOML parses the duration from the TOML config file func (d *Duration) UnmarshalTOML(b []byte) error { dur, err := time.ParseDuration(string(b[1 : len(b)-1])) if err != nil { @@ -28,6 +30,9 @@ func (d *Duration) UnmarshalTOML(b []byte) error { return nil } +// Config specifies the URL/user/password for the database that telegraf +// will be logging to, as well as all the plugins that the user has +// specified type Config struct { Tags map[string]string @@ -36,14 +41,17 @@ type Config struct { outputs map[string]*ast.Table } +// Plugins returns the configured plugins as a map of name -> plugin toml func (c *Config) Plugins() map[string]*ast.Table { return c.plugins } +// Outputs returns the configured outputs as a map of name -> output toml func (c *Config) Outputs() map[string]*ast.Table { return c.outputs } +// ConfiguredPlugin containing a name, interval, and drop/pass prefix lists type ConfiguredPlugin struct { Name string @@ -53,6 +61,7 @@ type ConfiguredPlugin struct { Interval time.Duration } +// ShouldPass returns true if the metric should pass, false if should drop func (cp *ConfiguredPlugin) ShouldPass(measurement string) bool { if cp.Pass != nil { for _, pat := range cp.Pass { @@ -77,6 +86,7 @@ func (cp *ConfiguredPlugin) ShouldPass(measurement string) bool { return true } +// ApplyOutput loads the toml config into the given interface func (c *Config) ApplyOutput(name string, v interface{}) error { if c.outputs[name] != nil { return toml.UnmarshalTable(c.outputs[name], v) @@ -85,6 +95,7 @@ func (c *Config) ApplyOutput(name string, v interface{}) error { return nil } +// ApplyAgent loads the toml config into the given interface func (c *Config) ApplyAgent(v interface{}) error { if c.agent != nil { return toml.UnmarshalTable(c.agent, v) @@ -93,6 +104,9 @@ func (c *Config) ApplyAgent(v interface{}) error { return nil } +// ApplyPlugin takes defined plugin names and applies them to the given +// interface, returning a ConfiguredPlugin object in the end that can +// be inserted into a runningPlugin by the agent. func (c *Config) ApplyPlugin(name string, v interface{}) (*ConfiguredPlugin, error) { cp := &ConfiguredPlugin{Name: name} @@ -144,10 +158,12 @@ func (c *Config) ApplyPlugin(name string, v interface{}) (*ConfiguredPlugin, err return cp, nil } +// PluginsDeclared returns the name of all plugins declared in the config. func (c *Config) PluginsDeclared() []string { return declared(c.plugins) } +// OutputsDeclared returns the name of all outputs declared in the config. func (c *Config) OutputsDeclared() []string { return declared(c.outputs) } @@ -164,12 +180,14 @@ func declared(endpoints map[string]*ast.Table) []string { return names } +// DefaultConfig returns an empty default configuration func DefaultConfig() *Config { return &Config{} } -var ErrInvalidConfig = errors.New("invalid configuration") +var errInvalidConfig = errors.New("invalid configuration") +// LoadConfig loads the given config file and returns a *Config pointer func LoadConfig(path string) (*Config, error) { data, err := ioutil.ReadFile(path) if err != nil { @@ -189,7 +207,7 @@ func LoadConfig(path string) (*Config, error) { for name, val := range tbl.Fields { subtbl, ok := val.(*ast.Table) if !ok { - return nil, ErrInvalidConfig + return nil, errInvalidConfig } switch name { @@ -211,6 +229,8 @@ func LoadConfig(path string) (*Config, error) { return c, nil } +// ListTags returns a string of tags specified in the config, +// line-protocol style func (c *Config) ListTags() string { var tags []string @@ -263,6 +283,11 @@ url = "http://localhost:8086" # required. # The target database for metrics. This database must already exist database = "telegraf" # required. +# Connection timeout (for the connection with InfluxDB), formatted as a string. +# Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". +# If not provided, will default to 0 (no timeout) +# timeout = "5s" + # username = "telegraf" # password = "metricsmetricsmetricsmetrics" @@ -285,12 +310,13 @@ database = "telegraf" # required. ` +// PrintSampleConfig prints the sample config! func PrintSampleConfig() { fmt.Printf(header) var names []string - for name, _ := range plugins.Plugins { + for name := range plugins.Plugins { names = append(names, name) } diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..c51a0235b --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,27 @@ +mysql: + image: mysql + ports: + - "3306:3306" + environment: + MYSQL_ALLOW_EMPTY_PASSWORD: yes + +memcached: + image: memcached + ports: + - "11211:11211" + +postgres: + image: postgres + ports: + - "5432:5432" + +# advertised host env variable must be set at runtime, ie, +# ADVERTISED_HOST=`boot2docker ip` docker-compose up -d +kafka: + image: spotify/kafka + ports: + - "2181:2181" + - "9092:9092" + environment: + ADVERTISED_HOST: + ADVERTISED_PORT: 9092 diff --git a/etc/config.sample.toml b/etc/config.sample.toml index e9628e3fa..38cfeba68 100644 --- a/etc/config.sample.toml +++ b/etc/config.sample.toml @@ -99,11 +99,11 @@ servers = ["localhost"] # postgres://[pqgotest[:password]]@localhost?sslmode=[disable|verify-ca|verify-full] # or a simple string: # host=localhost user=pqotest password=... sslmode=... -# +# # All connection parameters are optional. By default, the host is localhost # and the user is the currently running user. For localhost, we default # to sslmode=disable as well. -# +# address = "sslmode=disable" @@ -124,6 +124,14 @@ address = "sslmode=disable" # If no servers are specified, then localhost is used as the host. servers = ["localhost"] +[mongodb] +# An array of URI to gather stats about. Specify an ip or hostname +# with optional port add password. ie mongodb://user:auth_key@10.10.3.30:27017, +# mongodb://10.10.3.33:18832, 10.0.0.1:10000, etc. +# +# If no servers are specified, then 127.0.0.1 is used as the host and 27107 as the port. +servers = ["127.0.0.1:27017"] + # Read metrics about swap memory usage [swap] # no configuration diff --git a/package.sh b/package.sh index cea28db1e..fca719e8c 100755 --- a/package.sh +++ b/package.sh @@ -32,11 +32,13 @@ AWS_FILE=~/aws.conf -INSTALL_ROOT_DIR=/opt/influxdb -TELEGRAF_LOG_DIR=/var/log/influxdb -CONFIG_ROOT_DIR=/etc/opt/influxdb +INSTALL_ROOT_DIR=/opt/telegraf +TELEGRAF_LOG_DIR=/var/log/telegraf +CONFIG_ROOT_DIR=/etc/opt/telegraf +LOGROTATE_DIR=/etc/logrotate.d SAMPLE_CONFIGURATION=etc/config.sample.toml +LOGROTATE_CONFIGURATION=etc/logrotate.d/telegraf INITD_SCRIPT=scripts/init.sh TMP_WORK_DIR=`mktemp -d` @@ -144,6 +146,11 @@ make_dir_tree() { echo "Failed to create configuration directory -- aborting." cleanup_exit 1 fi + mkdir -p $work_dir/$LOGROTATE_DIR + if [ $? -ne 0 ]; then + echo "Failed to create configuration directory -- aborting." + cleanup_exit 1 + fi } @@ -251,6 +258,12 @@ if [ $? -ne 0 ]; then cleanup_exit 1 fi +cp $LOGROTATE_CONFIGURATION $TMP_WORK_DIR/$LOGROTATE_DIR/telegraf.conf +if [ $? -ne 0 ]; then + echo "Failed to copy $LOGROTATE_CONFIGURATION to packaging directory -- aborting." + cleanup_exit 1 +fi + generate_postinstall_script $VERSION ########################################################################### diff --git a/plugins/all/all.go b/plugins/all/all.go index 466a7166d..ab7dfcbbf 100644 --- a/plugins/all/all.go +++ b/plugins/all/all.go @@ -1,10 +1,19 @@ package all import ( + _ "github.com/influxdb/telegraf/plugins/disque" + _ "github.com/influxdb/telegraf/plugins/elasticsearch" + _ "github.com/influxdb/telegraf/plugins/haproxy" _ "github.com/influxdb/telegraf/plugins/kafka_consumer" + _ "github.com/influxdb/telegraf/plugins/lustre2" _ "github.com/influxdb/telegraf/plugins/memcached" + _ "github.com/influxdb/telegraf/plugins/mongodb" _ "github.com/influxdb/telegraf/plugins/mysql" + _ "github.com/influxdb/telegraf/plugins/nginx" _ "github.com/influxdb/telegraf/plugins/postgresql" + _ "github.com/influxdb/telegraf/plugins/prometheus" + _ "github.com/influxdb/telegraf/plugins/rabbitmq" _ "github.com/influxdb/telegraf/plugins/redis" + _ "github.com/influxdb/telegraf/plugins/rethinkdb" _ "github.com/influxdb/telegraf/plugins/system" ) diff --git a/plugins/disque/disque.go b/plugins/disque/disque.go new file mode 100644 index 000000000..292e1b363 --- /dev/null +++ b/plugins/disque/disque.go @@ -0,0 +1,202 @@ +package disque + +import ( + "bufio" + "errors" + "fmt" + "net" + "net/url" + "strconv" + "strings" + "sync" + + "github.com/influxdb/telegraf/plugins" +) + +type Disque struct { + Servers []string + + c net.Conn + buf []byte +} + +var sampleConfig = ` +# An array of URI to gather stats about. Specify an ip or hostname +# with optional port and password. ie disque://localhost, disque://10.10.3.33:18832, +# 10.0.0.1:10000, etc. +# +# If no servers are specified, then localhost is used as the host. +servers = ["localhost"]` + +func (r *Disque) SampleConfig() string { + return sampleConfig +} + +func (r *Disque) Description() string { + return "Read metrics from one or many disque servers" +} + +var Tracking = map[string]string{ + "uptime_in_seconds": "uptime", + "connected_clients": "clients", + "blocked_clients": "blocked_clients", + "used_memory": "used_memory", + "used_memory_rss": "used_memory_rss", + "used_memory_peak": "used_memory_peak", + "total_connections_received": "total_connections_received", + "total_commands_processed": "total_commands_processed", + "instantaneous_ops_per_sec": "instantaneous_ops_per_sec", + "latest_fork_usec": "latest_fork_usec", + "mem_fragmentation_ratio": "mem_fragmentation_ratio", + "used_cpu_sys": "used_cpu_sys", + "used_cpu_user": "used_cpu_user", + "used_cpu_sys_children": "used_cpu_sys_children", + "used_cpu_user_children": "used_cpu_user_children", + "registered_jobs": "registered_jobs", + "registered_queues": "registered_queues", +} + +var ErrProtocolError = errors.New("disque protocol error") + +// Reads stats from all configured servers accumulates stats. +// Returns one of the errors encountered while gather stats (if any). +func (g *Disque) Gather(acc plugins.Accumulator) error { + if len(g.Servers) == 0 { + url := &url.URL{ + Host: ":7711", + } + g.gatherServer(url, acc) + return nil + } + + var wg sync.WaitGroup + + var outerr error + + for _, serv := range g.Servers { + u, err := url.Parse(serv) + if err != nil { + return fmt.Errorf("Unable to parse to address '%s': %s", serv, err) + } else if u.Scheme == "" { + // fallback to simple string based address (i.e. "10.0.0.1:10000") + u.Scheme = "tcp" + u.Host = serv + u.Path = "" + } + wg.Add(1) + go func(serv string) { + defer wg.Done() + outerr = g.gatherServer(u, acc) + }(serv) + } + + wg.Wait() + + return outerr +} + +const defaultPort = "7711" + +func (g *Disque) gatherServer(addr *url.URL, acc plugins.Accumulator) error { + if g.c == nil { + + _, _, err := net.SplitHostPort(addr.Host) + if err != nil { + addr.Host = addr.Host + ":" + defaultPort + } + + c, err := net.Dial("tcp", addr.Host) + if err != nil { + return fmt.Errorf("Unable to connect to disque server '%s': %s", addr.Host, err) + } + + if addr.User != nil { + pwd, set := addr.User.Password() + if set && pwd != "" { + c.Write([]byte(fmt.Sprintf("AUTH %s\r\n", pwd))) + + r := bufio.NewReader(c) + + line, err := r.ReadString('\n') + if err != nil { + return err + } + if line[0] != '+' { + return fmt.Errorf("%s", strings.TrimSpace(line)[1:]) + } + } + } + + g.c = c + } + + g.c.Write([]byte("info\r\n")) + + r := bufio.NewReader(g.c) + + line, err := r.ReadString('\n') + if err != nil { + return err + } + + if line[0] != '$' { + return fmt.Errorf("bad line start: %s", ErrProtocolError) + } + + line = strings.TrimSpace(line) + + szStr := line[1:] + + sz, err := strconv.Atoi(szStr) + if err != nil { + return fmt.Errorf("bad size string <<%s>>: %s", szStr, ErrProtocolError) + } + + var read int + + for read < sz { + line, err := r.ReadString('\n') + if err != nil { + return err + } + + read += len(line) + + if len(line) == 1 || line[0] == '#' { + continue + } + + parts := strings.SplitN(line, ":", 2) + + name := string(parts[0]) + + metric, ok := Tracking[name] + if !ok { + continue + } + + tags := map[string]string{"host": addr.String()} + val := strings.TrimSpace(parts[1]) + + ival, err := strconv.ParseUint(val, 10, 64) + if err == nil { + acc.Add(metric, ival, tags) + continue + } + + fval, err := strconv.ParseFloat(val, 64) + if err != nil { + return err + } + + acc.Add(metric, fval, tags) + } + + return nil +} + +func init() { + plugins.Add("disque", func() plugins.Plugin { + return &Disque{} + }) +} diff --git a/plugins/disque/disque_test.go b/plugins/disque/disque_test.go new file mode 100644 index 000000000..257a87c84 --- /dev/null +++ b/plugins/disque/disque_test.go @@ -0,0 +1,250 @@ +package disque + +import ( + "bufio" + "fmt" + "net" + "testing" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDisqueGeneratesMetrics(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + l, err := net.Listen("tcp", ":0") + require.NoError(t, err) + + defer l.Close() + + go func() { + c, err := l.Accept() + if err != nil { + return + } + + buf := bufio.NewReader(c) + + for { + line, err := buf.ReadString('\n') + if err != nil { + return + } + + if line != "info\r\n" { + return + } + + fmt.Fprintf(c, "$%d\n", len(testOutput)) + c.Write([]byte(testOutput)) + } + }() + + addr := fmt.Sprintf("disque://%s", l.Addr().String()) + + r := &Disque{ + Servers: []string{addr}, + } + + var acc testutil.Accumulator + + err = r.Gather(&acc) + require.NoError(t, err) + + checkInt := []struct { + name string + value uint64 + }{ + {"uptime", 1452705}, + {"clients", 31}, + {"blocked_clients", 13}, + {"used_memory", 1840104}, + {"used_memory_rss", 3227648}, + {"used_memory_peak", 89603656}, + {"total_connections_received", 5062777}, + {"total_commands_processed", 12308396}, + {"instantaneous_ops_per_sec", 18}, + {"latest_fork_usec", 1644}, + {"registered_jobs", 360}, + {"registered_queues", 12}, + } + + for _, c := range checkInt { + assert.True(t, acc.CheckValue(c.name, c.value)) + } + + checkFloat := []struct { + name string + value float64 + }{ + {"mem_fragmentation_ratio", 1.75}, + {"used_cpu_sys", 19585.73}, + {"used_cpu_user", 11255.96}, + {"used_cpu_sys_children", 1.75}, + {"used_cpu_user_children", 1.91}, + } + + for _, c := range checkFloat { + assert.True(t, acc.CheckValue(c.name, c.value)) + } +} + +func TestDisqueCanPullStatsFromMultipleServers(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + l, err := net.Listen("tcp", ":0") + require.NoError(t, err) + + defer l.Close() + + go func() { + c, err := l.Accept() + if err != nil { + return + } + + buf := bufio.NewReader(c) + + for { + line, err := buf.ReadString('\n') + if err != nil { + return + } + + if line != "info\r\n" { + return + } + + fmt.Fprintf(c, "$%d\n", len(testOutput)) + c.Write([]byte(testOutput)) + } + }() + + addr := fmt.Sprintf("disque://%s", l.Addr().String()) + + r := &Disque{ + Servers: []string{addr}, + } + + var acc testutil.Accumulator + + err = r.Gather(&acc) + require.NoError(t, err) + + checkInt := []struct { + name string + value uint64 + }{ + {"uptime", 1452705}, + {"clients", 31}, + {"blocked_clients", 13}, + {"used_memory", 1840104}, + {"used_memory_rss", 3227648}, + {"used_memory_peak", 89603656}, + {"total_connections_received", 5062777}, + {"total_commands_processed", 12308396}, + {"instantaneous_ops_per_sec", 18}, + {"latest_fork_usec", 1644}, + {"registered_jobs", 360}, + {"registered_queues", 12}, + } + + for _, c := range checkInt { + assert.True(t, acc.CheckValue(c.name, c.value)) + } + + checkFloat := []struct { + name string + value float64 + }{ + {"mem_fragmentation_ratio", 1.75}, + {"used_cpu_sys", 19585.73}, + {"used_cpu_user", 11255.96}, + {"used_cpu_sys_children", 1.75}, + {"used_cpu_user_children", 1.91}, + } + + for _, c := range checkFloat { + assert.True(t, acc.CheckValue(c.name, c.value)) + } +} + +const testOutput = `# Server +disque_version:0.0.1 +disque_git_sha1:b5247598 +disque_git_dirty:0 +disque_build_id:379fda78983a60c6 +os:Linux 3.13.0-44-generic x86_64 +arch_bits:64 +multiplexing_api:epoll +gcc_version:4.8.2 +process_id:32420 +run_id:1cfdfa4c6bc3f285182db5427522a8a4c16e42e4 +tcp_port:7711 +uptime_in_seconds:1452705 +uptime_in_days:16 +hz:10 +config_file:/usr/local/etc/disque/disque.conf + +# Clients +connected_clients:31 +client_longest_output_list:0 +client_biggest_input_buf:0 +blocked_clients:13 + +# Memory +used_memory:1840104 +used_memory_human:1.75M +used_memory_rss:3227648 +used_memory_peak:89603656 +used_memory_peak_human:85.45M +mem_fragmentation_ratio:1.75 +mem_allocator:jemalloc-3.6.0 + +# Jobs +registered_jobs:360 + +# Queues +registered_queues:12 + +# Persistence +loading:0 +aof_enabled:1 +aof_state:on +aof_rewrite_in_progress:0 +aof_rewrite_scheduled:0 +aof_last_rewrite_time_sec:0 +aof_current_rewrite_time_sec:-1 +aof_last_bgrewrite_status:ok +aof_last_write_status:ok +aof_current_size:41952430 +aof_base_size:9808 +aof_pending_rewrite:0 +aof_buffer_length:0 +aof_rewrite_buffer_length:0 +aof_pending_bio_fsync:0 +aof_delayed_fsync:1 + +# Stats +total_connections_received:5062777 +total_commands_processed:12308396 +instantaneous_ops_per_sec:18 +total_net_input_bytes:1346996528 +total_net_output_bytes:1967551763 +instantaneous_input_kbps:1.38 +instantaneous_output_kbps:1.78 +rejected_connections:0 +latest_fork_usec:1644 + +# CPU +used_cpu_sys:19585.73 +used_cpu_user:11255.96 +used_cpu_sys_children:1.75 +used_cpu_user_children:1.91 +` diff --git a/plugins/elasticsearch/elasticsearch.go b/plugins/elasticsearch/elasticsearch.go new file mode 100644 index 000000000..5607532e6 --- /dev/null +++ b/plugins/elasticsearch/elasticsearch.go @@ -0,0 +1,154 @@ +package elasticsearch + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/influxdb/telegraf/plugins" +) + +const statsPath = "/_nodes/stats" +const statsPathLocal = "/_nodes/_local/stats" + +type node struct { + Host string `json:"host"` + Name string `json:"name"` + Attributes map[string]string `json:"attributes"` + Indices interface{} `json:"indices"` + OS interface{} `json:"os"` + Process interface{} `json:"process"` + JVM interface{} `json:"jvm"` + ThreadPool interface{} `json:"thread_pool"` + Network interface{} `json:"network"` + FS interface{} `json:"fs"` + Transport interface{} `json:"transport"` + HTTP interface{} `json:"http"` + Breakers interface{} `json:"breakers"` +} + +const sampleConfig = ` +# specify a list of one or more Elasticsearch servers +servers = ["http://localhost:9200"] + +# set local to false when you want to read the indices stats from all nodes +# within the cluster +local = true +` + +// Elasticsearch is a plugin to read stats from one or many Elasticsearch +// servers. +type Elasticsearch struct { + Local bool + Servers []string + client *http.Client +} + +// NewElasticsearch return a new instance of Elasticsearch +func NewElasticsearch() *Elasticsearch { + return &Elasticsearch{client: http.DefaultClient} +} + +// SampleConfig returns sample configuration for this plugin. +func (e *Elasticsearch) SampleConfig() string { + return sampleConfig +} + +// Description returns the plugin description. +func (e *Elasticsearch) Description() string { + return "Read stats from one or more Elasticsearch servers or clusters" +} + +// Gather reads the stats from Elasticsearch and writes it to the +// Accumulator. +func (e *Elasticsearch) Gather(acc plugins.Accumulator) error { + for _, serv := range e.Servers { + var url string + if e.Local { + url = serv + statsPathLocal + } else { + url = serv + statsPath + } + if err := e.gatherUrl(url, acc); err != nil { + return err + } + } + return nil +} + +func (e *Elasticsearch) gatherUrl(url string, acc plugins.Accumulator) error { + r, err := e.client.Get(url) + if err != nil { + return err + } + if r.StatusCode != http.StatusOK { + return fmt.Errorf("elasticsearch: API responded with status-code %d, expected %d", r.StatusCode, http.StatusOK) + } + d := json.NewDecoder(r.Body) + esRes := &struct { + ClusterName string `json:"cluster_name"` + Nodes map[string]*node `json:"nodes"` + }{} + if err = d.Decode(esRes); err != nil { + return err + } + + for id, n := range esRes.Nodes { + tags := map[string]string{ + "node_id": id, + "node_host": n.Host, + "node_name": n.Name, + "cluster_name": esRes.ClusterName, + } + + for k, v := range n.Attributes { + tags["node_attribute_"+k] = v + } + + stats := map[string]interface{}{ + "indices": n.Indices, + "os": n.OS, + "process": n.Process, + "jvm": n.JVM, + "thread_pool": n.ThreadPool, + "network": n.Network, + "fs": n.FS, + "transport": n.Transport, + "http": n.HTTP, + "breakers": n.Breakers, + } + + for p, s := range stats { + if err := e.parseInterface(acc, p, tags, s); err != nil { + return err + } + } + } + + return nil +} + +func (e *Elasticsearch) parseInterface(acc plugins.Accumulator, prefix string, tags map[string]string, v interface{}) error { + switch t := v.(type) { + case map[string]interface{}: + for k, v := range t { + if err := e.parseInterface(acc, prefix+"_"+k, tags, v); err != nil { + return err + } + } + case float64: + acc.Add(prefix, t, tags) + case bool, string, []interface{}: + // ignored types + return nil + default: + return fmt.Errorf("elasticsearch: got unexpected type %T with value %v (%s)", t, t, prefix) + } + return nil +} + +func init() { + plugins.Add("elasticsearch", func() plugins.Plugin { + return NewElasticsearch() + }) +} diff --git a/plugins/elasticsearch/elasticsearch_test.go b/plugins/elasticsearch/elasticsearch_test.go new file mode 100644 index 000000000..9e1cf66c9 --- /dev/null +++ b/plugins/elasticsearch/elasticsearch_test.go @@ -0,0 +1,72 @@ +package elasticsearch + +import ( + "io/ioutil" + "net/http" + "strings" + "testing" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +type transportMock struct { + statusCode int + body string +} + +func newTransportMock(statusCode int, body string) http.RoundTripper { + return &transportMock{ + statusCode: statusCode, + body: body, + } +} + +func (t *transportMock) RoundTrip(r *http.Request) (*http.Response, error) { + res := &http.Response{ + Header: make(http.Header), + Request: r, + StatusCode: t.statusCode, + } + res.Header.Set("Content-Type", "application/json") + res.Body = ioutil.NopCloser(strings.NewReader(t.body)) + return res, nil +} + +func TestElasticsearch(t *testing.T) { + es := NewElasticsearch() + es.Servers = []string{"http://example.com:9200"} + es.client.Transport = newTransportMock(http.StatusOK, statsResponse) + + var acc testutil.Accumulator + if err := es.Gather(&acc); err != nil { + t.Fatal(err) + } + + tags := map[string]string{ + "cluster_name": "es-testcluster", + "node_attribute_master": "true", + "node_id": "SDFsfSDFsdfFSDSDfSFDSDF", + "node_name": "test.host.com", + "node_host": "test", + } + + testTables := []map[string]float64{ + indicesExpected, + osExpected, + processExpected, + jvmExpected, + threadPoolExpected, + networkExpected, + fsExpected, + transportExpected, + httpExpected, + breakersExpected, + } + + for _, testTable := range testTables { + for k, v := range testTable { + assert.NoError(t, acc.ValidateTaggedValue(k, v, tags)) + } + } +} diff --git a/plugins/elasticsearch/testdata_test.go b/plugins/elasticsearch/testdata_test.go new file mode 100644 index 000000000..87bd15692 --- /dev/null +++ b/plugins/elasticsearch/testdata_test.go @@ -0,0 +1,734 @@ +package elasticsearch + +const statsResponse = ` +{ + "cluster_name": "es-testcluster", + "nodes": { + "SDFsfSDFsdfFSDSDfSFDSDF": { + "timestamp": 1436365550135, + "name": "test.host.com", + "transport_address": "inet[/127.0.0.1:9300]", + "host": "test", + "ip": [ + "inet[/127.0.0.1:9300]", + "NONE" + ], + "attributes": { + "master": "true" + }, + "indices": { + "docs": { + "count": 29652, + "deleted": 5229 + }, + "store": { + "size_in_bytes": 37715234, + "throttle_time_in_millis": 215 + }, + "indexing": { + "index_total": 84790, + "index_time_in_millis": 29680, + "index_current": 0, + "delete_total": 13879, + "delete_time_in_millis": 1139, + "delete_current": 0, + "noop_update_total": 0, + "is_throttled": false, + "throttle_time_in_millis": 0 + }, + "get": { + "total": 1, + "time_in_millis": 2, + "exists_total": 0, + "exists_time_in_millis": 0, + "missing_total": 1, + "missing_time_in_millis": 2, + "current": 0 + }, + "search": { + "open_contexts": 0, + "query_total": 1452, + "query_time_in_millis": 5695, + "query_current": 0, + "fetch_total": 414, + "fetch_time_in_millis": 146, + "fetch_current": 0 + }, + "merges": { + "current": 0, + "current_docs": 0, + "current_size_in_bytes": 0, + "total": 133, + "total_time_in_millis": 21060, + "total_docs": 203672, + "total_size_in_bytes": 142900226 + }, + "refresh": { + "total": 1076, + "total_time_in_millis": 20078 + }, + "flush": { + "total": 115, + "total_time_in_millis": 2401 + }, + "warmer": { + "current": 0, + "total": 2319, + "total_time_in_millis": 448 + }, + "filter_cache": { + "memory_size_in_bytes": 7384, + "evictions": 0 + }, + "id_cache": { + "memory_size_in_bytes": 0 + }, + "fielddata": { + "memory_size_in_bytes": 12996, + "evictions": 0 + }, + "percolate": { + "total": 0, + "time_in_millis": 0, + "current": 0, + "memory_size_in_bytes": -1, + "memory_size": "-1b", + "queries": 0 + }, + "completion": { + "size_in_bytes": 0 + }, + "segments": { + "count": 134, + "memory_in_bytes": 1285212, + "index_writer_memory_in_bytes": 0, + "index_writer_max_memory_in_bytes": 172368955, + "version_map_memory_in_bytes": 611844, + "fixed_bit_set_memory_in_bytes": 0 + }, + "translog": { + "operations": 17702, + "size_in_bytes": 17 + }, + "suggest": { + "total": 0, + "time_in_millis": 0, + "current": 0 + }, + "query_cache": { + "memory_size_in_bytes": 0, + "evictions": 0, + "hit_count": 0, + "miss_count": 0 + }, + "recovery": { + "current_as_source": 0, + "current_as_target": 0, + "throttle_time_in_millis": 0 + } + }, + "os": { + "timestamp": 1436460392944, + "uptime_in_millis": 25092, + "load_average": [ + 0.01, + 0.04, + 0.05 + ], + "cpu": { + "sys": 0, + "user": 0, + "idle": 99, + "usage": 0, + "stolen": 0 + }, + "mem": { + "free_in_bytes": 477761536, + "used_in_bytes": 1621868544, + "free_percent": 74, + "used_percent": 25, + "actual_free_in_bytes": 1565470720, + "actual_used_in_bytes": 534159360 + }, + "swap": { + "used_in_bytes": 0, + "free_in_bytes": 487997440 + } + }, + "process": { + "timestamp": 1436460392945, + "open_file_descriptors": 160, + "cpu": { + "percent": 2, + "sys_in_millis": 1870, + "user_in_millis": 13610, + "total_in_millis": 15480 + }, + "mem": { + "resident_in_bytes": 246382592, + "share_in_bytes": 18747392, + "total_virtual_in_bytes": 4747890688 + } + }, + "jvm": { + "timestamp": 1436460392945, + "uptime_in_millis": 202245, + "mem": { + "heap_used_in_bytes": 52709568, + "heap_used_percent": 5, + "heap_committed_in_bytes": 259522560, + "heap_max_in_bytes": 1038876672, + "non_heap_used_in_bytes": 39634576, + "non_heap_committed_in_bytes": 40841216, + "pools": { + "young": { + "used_in_bytes": 32685760, + "max_in_bytes": 279183360, + "peak_used_in_bytes": 71630848, + "peak_max_in_bytes": 279183360 + }, + "survivor": { + "used_in_bytes": 8912880, + "max_in_bytes": 34865152, + "peak_used_in_bytes": 8912888, + "peak_max_in_bytes": 34865152 + }, + "old": { + "used_in_bytes": 11110928, + "max_in_bytes": 724828160, + "peak_used_in_bytes": 14354608, + "peak_max_in_bytes": 724828160 + } + } + }, + "threads": { + "count": 44, + "peak_count": 45 + }, + "gc": { + "collectors": { + "young": { + "collection_count": 2, + "collection_time_in_millis": 98 + }, + "old": { + "collection_count": 1, + "collection_time_in_millis": 24 + } + } + }, + "buffer_pools": { + "direct": { + "count": 40, + "used_in_bytes": 6304239, + "total_capacity_in_bytes": 6304239 + }, + "mapped": { + "count": 0, + "used_in_bytes": 0, + "total_capacity_in_bytes": 0 + } + } + }, + "thread_pool": { + "percolate": { + "threads": 123, + "queue": 23, + "active": 13, + "rejected": 235, + "largest": 23, + "completed": 33 + }, + "fetch_shard_started": { + "threads": 3, + "queue": 1, + "active": 5, + "rejected": 6, + "largest": 4, + "completed": 54 + }, + "listener": { + "threads": 1, + "queue": 2, + "active": 4, + "rejected": 8, + "largest": 1, + "completed": 1 + }, + "index": { + "threads": 6, + "queue": 8, + "active": 4, + "rejected": 2, + "largest": 3, + "completed": 6 + }, + "refresh": { + "threads": 23, + "queue": 7, + "active": 3, + "rejected": 4, + "largest": 8, + "completed": 3 + }, + "suggest": { + "threads": 2, + "queue": 7, + "active": 2, + "rejected": 1, + "largest": 8, + "completed": 3 + }, + "generic": { + "threads": 1, + "queue": 4, + "active": 6, + "rejected": 3, + "largest": 2, + "completed": 27 + }, + "warmer": { + "threads": 2, + "queue": 7, + "active": 3, + "rejected": 2, + "largest": 3, + "completed": 1 + }, + "search": { + "threads": 5, + "queue": 7, + "active": 2, + "rejected": 7, + "largest": 2, + "completed": 4 + }, + "flush": { + "threads": 3, + "queue": 8, + "active": 0, + "rejected": 1, + "largest": 5, + "completed": 3 + }, + "optimize": { + "threads": 3, + "queue": 4, + "active": 1, + "rejected": 2, + "largest": 7, + "completed": 3 + }, + "fetch_shard_store": { + "threads": 1, + "queue": 7, + "active": 4, + "rejected": 2, + "largest": 4, + "completed": 1 + }, + "management": { + "threads": 2, + "queue": 3, + "active": 1, + "rejected": 6, + "largest": 2, + "completed": 22 + }, + "get": { + "threads": 1, + "queue": 8, + "active": 4, + "rejected": 3, + "largest": 2, + "completed": 1 + }, + "merge": { + "threads": 6, + "queue": 4, + "active": 5, + "rejected": 2, + "largest": 5, + "completed": 1 + }, + "bulk": { + "threads": 4, + "queue": 5, + "active": 7, + "rejected": 3, + "largest": 1, + "completed": 4 + }, + "snapshot": { + "threads": 8, + "queue": 5, + "active": 6, + "rejected": 2, + "largest": 1, + "completed": 0 + } + }, + "network": { + "tcp": { + "active_opens": 13, + "passive_opens": 16, + "curr_estab": 29, + "in_segs": 113, + "out_segs": 97, + "retrans_segs": 0, + "estab_resets": 0, + "attempt_fails": 0, + "in_errs": 0, + "out_rsts": 0 + } + }, + "fs": { + "timestamp": 1436460392946, + "total": { + "total_in_bytes": 19507089408, + "free_in_bytes": 16909316096, + "available_in_bytes": 15894814720 + }, + "data": [ + { + "path": "/usr/share/elasticsearch/data/elasticsearch/nodes/0", + "mount": "/usr/share/elasticsearch/data", + "dev": "/dev/sda1", + "type": "ext4", + "total_in_bytes": 19507089408, + "free_in_bytes": 16909316096, + "available_in_bytes": 15894814720 + } + ] + }, + "transport": { + "server_open": 13, + "rx_count": 6, + "rx_size_in_bytes": 1380, + "tx_count": 6, + "tx_size_in_bytes": 1380 + }, + "http": { + "current_open": 3, + "total_opened": 3 + }, + "breakers": { + "fielddata": { + "limit_size_in_bytes": 623326003, + "limit_size": "594.4mb", + "estimated_size_in_bytes": 0, + "estimated_size": "0b", + "overhead": 1.03, + "tripped": 0 + }, + "request": { + "limit_size_in_bytes": 415550668, + "limit_size": "396.2mb", + "estimated_size_in_bytes": 0, + "estimated_size": "0b", + "overhead": 1.0, + "tripped": 0 + }, + "parent": { + "limit_size_in_bytes": 727213670, + "limit_size": "693.5mb", + "estimated_size_in_bytes": 0, + "estimated_size": "0b", + "overhead": 1.0, + "tripped": 0 + } + } + } + } +} +` + +var indicesExpected = map[string]float64{ + "indices_id_cache_memory_size_in_bytes": 0, + "indices_completion_size_in_bytes": 0, + "indices_suggest_total": 0, + "indices_suggest_time_in_millis": 0, + "indices_suggest_current": 0, + "indices_query_cache_memory_size_in_bytes": 0, + "indices_query_cache_evictions": 0, + "indices_query_cache_hit_count": 0, + "indices_query_cache_miss_count": 0, + "indices_store_size_in_bytes": 37715234, + "indices_store_throttle_time_in_millis": 215, + "indices_merges_current_docs": 0, + "indices_merges_current_size_in_bytes": 0, + "indices_merges_total": 133, + "indices_merges_total_time_in_millis": 21060, + "indices_merges_total_docs": 203672, + "indices_merges_total_size_in_bytes": 142900226, + "indices_merges_current": 0, + "indices_filter_cache_memory_size_in_bytes": 7384, + "indices_filter_cache_evictions": 0, + "indices_indexing_index_total": 84790, + "indices_indexing_index_time_in_millis": 29680, + "indices_indexing_index_current": 0, + "indices_indexing_noop_update_total": 0, + "indices_indexing_throttle_time_in_millis": 0, + "indices_indexing_delete_total": 13879, + "indices_indexing_delete_time_in_millis": 1139, + "indices_indexing_delete_current": 0, + "indices_get_exists_time_in_millis": 0, + "indices_get_missing_total": 1, + "indices_get_missing_time_in_millis": 2, + "indices_get_current": 0, + "indices_get_total": 1, + "indices_get_time_in_millis": 2, + "indices_get_exists_total": 0, + "indices_refresh_total": 1076, + "indices_refresh_total_time_in_millis": 20078, + "indices_percolate_current": 0, + "indices_percolate_memory_size_in_bytes": -1, + "indices_percolate_queries": 0, + "indices_percolate_total": 0, + "indices_percolate_time_in_millis": 0, + "indices_translog_operations": 17702, + "indices_translog_size_in_bytes": 17, + "indices_recovery_current_as_source": 0, + "indices_recovery_current_as_target": 0, + "indices_recovery_throttle_time_in_millis": 0, + "indices_docs_count": 29652, + "indices_docs_deleted": 5229, + "indices_flush_total_time_in_millis": 2401, + "indices_flush_total": 115, + "indices_fielddata_memory_size_in_bytes": 12996, + "indices_fielddata_evictions": 0, + "indices_search_fetch_current": 0, + "indices_search_open_contexts": 0, + "indices_search_query_total": 1452, + "indices_search_query_time_in_millis": 5695, + "indices_search_query_current": 0, + "indices_search_fetch_total": 414, + "indices_search_fetch_time_in_millis": 146, + "indices_warmer_current": 0, + "indices_warmer_total": 2319, + "indices_warmer_total_time_in_millis": 448, + "indices_segments_count": 134, + "indices_segments_memory_in_bytes": 1285212, + "indices_segments_index_writer_memory_in_bytes": 0, + "indices_segments_index_writer_max_memory_in_bytes": 172368955, + "indices_segments_version_map_memory_in_bytes": 611844, + "indices_segments_fixed_bit_set_memory_in_bytes": 0, +} + +var osExpected = map[string]float64{ + "os_swap_used_in_bytes": 0, + "os_swap_free_in_bytes": 487997440, + "os_timestamp": 1436460392944, + "os_uptime_in_millis": 25092, + "os_cpu_sys": 0, + "os_cpu_user": 0, + "os_cpu_idle": 99, + "os_cpu_usage": 0, + "os_cpu_stolen": 0, + "os_mem_free_percent": 74, + "os_mem_used_percent": 25, + "os_mem_actual_free_in_bytes": 1565470720, + "os_mem_actual_used_in_bytes": 534159360, + "os_mem_free_in_bytes": 477761536, + "os_mem_used_in_bytes": 1621868544, +} + +var processExpected = map[string]float64{ + "process_mem_resident_in_bytes": 246382592, + "process_mem_share_in_bytes": 18747392, + "process_mem_total_virtual_in_bytes": 4747890688, + "process_timestamp": 1436460392945, + "process_open_file_descriptors": 160, + "process_cpu_total_in_millis": 15480, + "process_cpu_percent": 2, + "process_cpu_sys_in_millis": 1870, + "process_cpu_user_in_millis": 13610, +} + +var jvmExpected = map[string]float64{ + "jvm_timestamp": 1436460392945, + "jvm_uptime_in_millis": 202245, + "jvm_mem_non_heap_used_in_bytes": 39634576, + "jvm_mem_non_heap_committed_in_bytes": 40841216, + "jvm_mem_pools_young_max_in_bytes": 279183360, + "jvm_mem_pools_young_peak_used_in_bytes": 71630848, + "jvm_mem_pools_young_peak_max_in_bytes": 279183360, + "jvm_mem_pools_young_used_in_bytes": 32685760, + "jvm_mem_pools_survivor_peak_used_in_bytes": 8912888, + "jvm_mem_pools_survivor_peak_max_in_bytes": 34865152, + "jvm_mem_pools_survivor_used_in_bytes": 8912880, + "jvm_mem_pools_survivor_max_in_bytes": 34865152, + "jvm_mem_pools_old_peak_max_in_bytes": 724828160, + "jvm_mem_pools_old_used_in_bytes": 11110928, + "jvm_mem_pools_old_max_in_bytes": 724828160, + "jvm_mem_pools_old_peak_used_in_bytes": 14354608, + "jvm_mem_heap_used_in_bytes": 52709568, + "jvm_mem_heap_used_percent": 5, + "jvm_mem_heap_committed_in_bytes": 259522560, + "jvm_mem_heap_max_in_bytes": 1038876672, + "jvm_threads_peak_count": 45, + "jvm_threads_count": 44, + "jvm_gc_collectors_young_collection_count": 2, + "jvm_gc_collectors_young_collection_time_in_millis": 98, + "jvm_gc_collectors_old_collection_count": 1, + "jvm_gc_collectors_old_collection_time_in_millis": 24, + "jvm_buffer_pools_direct_count": 40, + "jvm_buffer_pools_direct_used_in_bytes": 6304239, + "jvm_buffer_pools_direct_total_capacity_in_bytes": 6304239, + "jvm_buffer_pools_mapped_count": 0, + "jvm_buffer_pools_mapped_used_in_bytes": 0, + "jvm_buffer_pools_mapped_total_capacity_in_bytes": 0, +} + +var threadPoolExpected = map[string]float64{ + "thread_pool_merge_threads": 6, + "thread_pool_merge_queue": 4, + "thread_pool_merge_active": 5, + "thread_pool_merge_rejected": 2, + "thread_pool_merge_largest": 5, + "thread_pool_merge_completed": 1, + "thread_pool_bulk_threads": 4, + "thread_pool_bulk_queue": 5, + "thread_pool_bulk_active": 7, + "thread_pool_bulk_rejected": 3, + "thread_pool_bulk_largest": 1, + "thread_pool_bulk_completed": 4, + "thread_pool_warmer_threads": 2, + "thread_pool_warmer_queue": 7, + "thread_pool_warmer_active": 3, + "thread_pool_warmer_rejected": 2, + "thread_pool_warmer_largest": 3, + "thread_pool_warmer_completed": 1, + "thread_pool_get_largest": 2, + "thread_pool_get_completed": 1, + "thread_pool_get_threads": 1, + "thread_pool_get_queue": 8, + "thread_pool_get_active": 4, + "thread_pool_get_rejected": 3, + "thread_pool_index_threads": 6, + "thread_pool_index_queue": 8, + "thread_pool_index_active": 4, + "thread_pool_index_rejected": 2, + "thread_pool_index_largest": 3, + "thread_pool_index_completed": 6, + "thread_pool_suggest_threads": 2, + "thread_pool_suggest_queue": 7, + "thread_pool_suggest_active": 2, + "thread_pool_suggest_rejected": 1, + "thread_pool_suggest_largest": 8, + "thread_pool_suggest_completed": 3, + "thread_pool_fetch_shard_store_queue": 7, + "thread_pool_fetch_shard_store_active": 4, + "thread_pool_fetch_shard_store_rejected": 2, + "thread_pool_fetch_shard_store_largest": 4, + "thread_pool_fetch_shard_store_completed": 1, + "thread_pool_fetch_shard_store_threads": 1, + "thread_pool_management_threads": 2, + "thread_pool_management_queue": 3, + "thread_pool_management_active": 1, + "thread_pool_management_rejected": 6, + "thread_pool_management_largest": 2, + "thread_pool_management_completed": 22, + "thread_pool_percolate_queue": 23, + "thread_pool_percolate_active": 13, + "thread_pool_percolate_rejected": 235, + "thread_pool_percolate_largest": 23, + "thread_pool_percolate_completed": 33, + "thread_pool_percolate_threads": 123, + "thread_pool_listener_active": 4, + "thread_pool_listener_rejected": 8, + "thread_pool_listener_largest": 1, + "thread_pool_listener_completed": 1, + "thread_pool_listener_threads": 1, + "thread_pool_listener_queue": 2, + "thread_pool_search_rejected": 7, + "thread_pool_search_largest": 2, + "thread_pool_search_completed": 4, + "thread_pool_search_threads": 5, + "thread_pool_search_queue": 7, + "thread_pool_search_active": 2, + "thread_pool_fetch_shard_started_threads": 3, + "thread_pool_fetch_shard_started_queue": 1, + "thread_pool_fetch_shard_started_active": 5, + "thread_pool_fetch_shard_started_rejected": 6, + "thread_pool_fetch_shard_started_largest": 4, + "thread_pool_fetch_shard_started_completed": 54, + "thread_pool_refresh_rejected": 4, + "thread_pool_refresh_largest": 8, + "thread_pool_refresh_completed": 3, + "thread_pool_refresh_threads": 23, + "thread_pool_refresh_queue": 7, + "thread_pool_refresh_active": 3, + "thread_pool_optimize_threads": 3, + "thread_pool_optimize_queue": 4, + "thread_pool_optimize_active": 1, + "thread_pool_optimize_rejected": 2, + "thread_pool_optimize_largest": 7, + "thread_pool_optimize_completed": 3, + "thread_pool_snapshot_largest": 1, + "thread_pool_snapshot_completed": 0, + "thread_pool_snapshot_threads": 8, + "thread_pool_snapshot_queue": 5, + "thread_pool_snapshot_active": 6, + "thread_pool_snapshot_rejected": 2, + "thread_pool_generic_threads": 1, + "thread_pool_generic_queue": 4, + "thread_pool_generic_active": 6, + "thread_pool_generic_rejected": 3, + "thread_pool_generic_largest": 2, + "thread_pool_generic_completed": 27, + "thread_pool_flush_threads": 3, + "thread_pool_flush_queue": 8, + "thread_pool_flush_active": 0, + "thread_pool_flush_rejected": 1, + "thread_pool_flush_largest": 5, + "thread_pool_flush_completed": 3, +} + +var networkExpected = map[string]float64{ + "network_tcp_in_errs": 0, + "network_tcp_passive_opens": 16, + "network_tcp_curr_estab": 29, + "network_tcp_in_segs": 113, + "network_tcp_out_segs": 97, + "network_tcp_retrans_segs": 0, + "network_tcp_attempt_fails": 0, + "network_tcp_active_opens": 13, + "network_tcp_estab_resets": 0, + "network_tcp_out_rsts": 0, +} + +var fsExpected = map[string]float64{ + "fs_timestamp": 1436460392946, + "fs_total_free_in_bytes": 16909316096, + "fs_total_available_in_bytes": 15894814720, + "fs_total_total_in_bytes": 19507089408, +} + +var transportExpected = map[string]float64{ + "transport_server_open": 13, + "transport_rx_count": 6, + "transport_rx_size_in_bytes": 1380, + "transport_tx_count": 6, + "transport_tx_size_in_bytes": 1380, +} + +var httpExpected = map[string]float64{ + "http_current_open": 3, + "http_total_opened": 3, +} + +var breakersExpected = map[string]float64{ + "breakers_fielddata_estimated_size_in_bytes": 0, + "breakers_fielddata_overhead": 1.03, + "breakers_fielddata_tripped": 0, + "breakers_fielddata_limit_size_in_bytes": 623326003, + "breakers_request_estimated_size_in_bytes": 0, + "breakers_request_overhead": 1.0, + "breakers_request_tripped": 0, + "breakers_request_limit_size_in_bytes": 415550668, + "breakers_parent_overhead": 1.0, + "breakers_parent_tripped": 0, + "breakers_parent_limit_size_in_bytes": 727213670, + "breakers_parent_estimated_size_in_bytes": 0, +} diff --git a/plugins/haproxy/haproxy.go b/plugins/haproxy/haproxy.go new file mode 100644 index 000000000..e09bfe5be --- /dev/null +++ b/plugins/haproxy/haproxy.go @@ -0,0 +1,310 @@ +package haproxy + +import ( + "encoding/csv" + "fmt" + "github.com/influxdb/telegraf/plugins" + "io" + "net/http" + "net/url" + "strconv" + "sync" +) + +//CSV format: https://cbonte.github.io/haproxy-dconv/configuration-1.5.html#9.1 +const ( + HF_PXNAME = 0 // 0. pxname [LFBS]: proxy name + HF_SVNAME = 1 // 1. svname [LFBS]: service name (FRONTEND for frontend, BACKEND for backend, any name for server/listener) + HF_QCUR = 2 //2. qcur [..BS]: current queued requests. For the backend this reports the number queued without a server assigned. + HF_QMAX = 3 //3. qmax [..BS]: max value of qcur + HF_SCUR = 4 // 4. scur [LFBS]: current sessions + HF_SMAX = 5 //5. smax [LFBS]: max sessions + HF_SLIM = 6 //6. slim [LFBS]: configured session limit + HF_STOT = 7 //7. stot [LFBS]: cumulative number of connections + HF_BIN = 8 //8. bin [LFBS]: bytes in + HF_BOUT = 9 //9. bout [LFBS]: bytes out + HF_DREQ = 10 //10. dreq [LFB.]: requests denied because of security concerns. + HF_DRESP = 11 //11. dresp [LFBS]: responses denied because of security concerns. + HF_EREQ = 12 //12. ereq [LF..]: request errors. Some of the possible causes are: + HF_ECON = 13 //13. econ [..BS]: number of requests that encountered an error trying to + HF_ERESP = 14 //14. eresp [..BS]: response errors. srv_abrt will be counted here also. Some other errors are: - write error on the client socket (won't be counted for the server stat) - failure applying filters to the response. + HF_WRETR = 15 //15. wretr [..BS]: number of times a connection to a server was retried. + HF_WREDIS = 16 //16. wredis [..BS]: number of times a request was redispatched to another server. The server value counts the number of times that server was switched away from. + HF_STATUS = 17 //17. status [LFBS]: status (UP/DOWN/NOLB/MAINT/MAINT(via)...) + HF_WEIGHT = 18 //18. weight [..BS]: total weight (backend), server weight (server) + HF_ACT = 19 //19. act [..BS]: number of active servers (backend), server is active (server) + HF_BCK = 20 //20. bck [..BS]: number of backup servers (backend), server is backup (server) + HF_CHKFAIL = 21 //21. chkfail [...S]: number of failed checks. (Only counts checks failed when the server is up.) + HF_CHKDOWN = 22 //22. chkdown [..BS]: number of UP->DOWN transitions. The backend counter counts transitions to the whole backend being down, rather than the sum of the counters for each server. + HF_LASTCHG = 23 //23. lastchg [..BS]: number of seconds since the last UP<->DOWN transition + HF_DOWNTIME = 24 //24. downtime [..BS]: total downtime (in seconds). The value for the backend is the downtime for the whole backend, not the sum of the server downtime. + HF_QLIMIT = 25 //25. qlimit [...S]: configured maxqueue for the server, or nothing in the value is 0 (default, meaning no limit) + HF_PID = 26 //26. pid [LFBS]: process id (0 for first instance, 1 for second, ...) + HF_IID = 27 //27. iid [LFBS]: unique proxy id + HF_SID = 28 //28. sid [L..S]: server id (unique inside a proxy) + HF_THROTTLE = 29 //29. throttle [...S]: current throttle percentage for the server, when slowstart is active, or no value if not in slowstart. + HF_LBTOT = 30 //30. lbtot [..BS]: total number of times a server was selected, either for new sessions, or when re-dispatching. The server counter is the number of times that server was selected. + HF_TRACKED = 31 //31. tracked [...S]: id of proxy/server if tracking is enabled. + HF_TYPE = 32 //32. type [LFBS]: (0 = frontend, 1 = backend, 2 = server, 3 = socket/listener) + HF_RATE = 33 //33. rate [.FBS]: number of sessions per second over last elapsed second + HF_RATE_LIM = 34 //34. rate_lim [.F..]: configured limit on new sessions per second + HF_RATE_MAX = 35 //35. rate_max [.FBS]: max number of new sessions per second + HF_CHECK_STATUS = 36 //36. check_status [...S]: status of last health check, one of: + HF_CHECK_CODE = 37 //37. check_code [...S]: layer5-7 code, if available + HF_CHECK_DURATION = 38 //38. check_duration [...S]: time in ms took to finish last health check + HF_HRSP_1xx = 39 //39. hrsp_1xx [.FBS]: http responses with 1xx code + HF_HRSP_2xx = 40 //40. hrsp_2xx [.FBS]: http responses with 2xx code + HF_HRSP_3xx = 41 //41. hrsp_3xx [.FBS]: http responses with 3xx code + HF_HRSP_4xx = 42 //42. hrsp_4xx [.FBS]: http responses with 4xx code + HF_HRSP_5xx = 43 //43. hrsp_5xx [.FBS]: http responses with 5xx code + HF_HRSP_OTHER = 44 //44. hrsp_other [.FBS]: http responses with other codes (protocol error) + HF_HANAFAIL = 45 //45. hanafail [...S]: failed health checks details + HF_REQ_RATE = 46 //46. req_rate [.F..]: HTTP requests per second over last elapsed second + HF_REQ_RATE_MAX = 47 //47. req_rate_max [.F..]: max number of HTTP requests per second observed + HF_REQ_TOT = 48 //48. req_tot [.F..]: total number of HTTP requests received + HF_CLI_ABRT = 49 //49. cli_abrt [..BS]: number of data transfers aborted by the client + HF_SRV_ABRT = 50 //50. srv_abrt [..BS]: number of data transfers aborted by the server (inc. in eresp) + HF_COMP_IN = 51 //51. comp_in [.FB.]: number of HTTP response bytes fed to the compressor + HF_COMP_OUT = 52 //52. comp_out [.FB.]: number of HTTP response bytes emitted by the compressor + HF_COMP_BYP = 53 //53. comp_byp [.FB.]: number of bytes that bypassed the HTTP compressor (CPU/BW limit) + HF_COMP_RSP = 54 //54. comp_rsp [.FB.]: number of HTTP responses that were compressed + HF_LASTSESS = 55 //55. lastsess [..BS]: number of seconds since last session assigned to server/backend + HF_LAST_CHK = 56 //56. last_chk [...S]: last health check contents or textual error + HF_LAST_AGT = 57 //57. last_agt [...S]: last agent check contents or textual error + HF_QTIME = 58 //58. qtime [..BS]: + HF_CTIME = 59 //59. ctime [..BS]: + HF_RTIME = 60 //60. rtime [..BS]: (0 for TCP) + HF_TTIME = 61 //61. ttime [..BS]: the average total session time in ms over the 1024 last requests +) + +type haproxy struct { + Servers []string + + client *http.Client +} + +var sampleConfig = ` +# An array of address to gather stats about. Specify an ip on hostname +# with optional port. ie localhost, 10.10.3.33:1936, etc. +# +# If no servers are specified, then default to 127.0.0.1:1936 +servers = ["http://myhaproxy.com:1936", "http://anotherhaproxy.com:1936"] +# Or you can also use local socket(not work yet) +# servers = ["socket:/run/haproxy/admin.sock"] +` + +func (r *haproxy) SampleConfig() string { + return sampleConfig +} + +func (r *haproxy) Description() string { + return "Read metrics of haproxy, via socket or csv stats page" +} + +// Reads stats from all configured servers accumulates stats. +// Returns one of the errors encountered while gather stats (if any). +func (g *haproxy) Gather(acc plugins.Accumulator) error { + if len(g.Servers) == 0 { + return g.gatherServer("http://127.0.0.1:1936", acc) + } + + var wg sync.WaitGroup + + var outerr error + + for _, serv := range g.Servers { + wg.Add(1) + go func(serv string) { + defer wg.Done() + outerr = g.gatherServer(serv, acc) + }(serv) + } + + wg.Wait() + + return outerr +} + +func (g *haproxy) gatherServer(addr string, acc plugins.Accumulator) error { + if g.client == nil { + + client := &http.Client{} + g.client = client + } + + u, err := url.Parse(addr) + if err != nil { + return fmt.Errorf("Unable parse server address '%s': %s", addr, err) + } + + req, err := http.NewRequest("GET", fmt.Sprintf("%s://%s%s/;csv", u.Scheme, u.Host, u.Path), nil) + if u.User != nil { + p, _ := u.User.Password() + req.SetBasicAuth(u.User.Username(), p) + } + + res, err := g.client.Do(req) + if err != nil { + return fmt.Errorf("Unable to connect to haproxy server '%s': %s", addr, err) + } + + if res.StatusCode != 200 { + return fmt.Errorf("Unable to get valid stat result from '%s': %s", addr, err) + } + + importCsvResult(res.Body, acc, u.Host) + + return nil +} + +func importCsvResult(r io.Reader, acc plugins.Accumulator, host string) ([][]string, error) { + csv := csv.NewReader(r) + result, err := csv.ReadAll() + + for _, row := range result { + + for field, v := range row { + tags := map[string]string{ + "host": host, + "proxy": row[HF_PXNAME], + "sv": row[HF_SVNAME], + } + switch field { + case HF_QCUR: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("qcur", ival, tags) + } + case HF_QMAX: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("qmax", ival, tags) + } + case HF_SCUR: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("scur", ival, tags) + } + case HF_SMAX: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("smax", ival, tags) + } + case HF_BIN: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("bin", ival, tags) + } + case HF_BOUT: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("bout", ival, tags) + } + case HF_DREQ: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("dreq", ival, tags) + } + case HF_DRESP: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("dresp", ival, tags) + } + case HF_RATE: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("rate", ival, tags) + } + case HF_RATE_MAX: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("rate_max", ival, tags) + } + case HF_STOT: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("stot", ival, tags) + } + case HF_HRSP_1xx: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("http_response.1xx", ival, tags) + } + case HF_HRSP_2xx: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("http_response.2xx", ival, tags) + } + case HF_HRSP_3xx: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("http_response.3xx", ival, tags) + } + case HF_HRSP_4xx: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("http_response.4xx", ival, tags) + } + case HF_EREQ: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("ereq", ival, tags) + } + case HF_ERESP: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("eresp", ival, tags) + } + case HF_ECON: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("econ", ival, tags) + } + case HF_WRETR: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("wretr", ival, tags) + } + case HF_WREDIS: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("wredis", ival, tags) + } + case HF_REQ_RATE: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("req_rate", ival, tags) + } + case HF_REQ_RATE_MAX: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("req_rate_max", ival, tags) + } + case HF_REQ_TOT: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("req_tot", ival, tags) + } + case HF_THROTTLE: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("throttle", ival, tags) + } + case HF_LBTOT: + ival, err := strconv.ParseUint(v, 10, 64) + if err == nil { + acc.Add("lbtot", ival, tags) + } + + } + + } + } + return result, err +} + +func init() { + plugins.Add("haproxy", func() plugins.Plugin { + return &haproxy{} + }) +} diff --git a/plugins/haproxy/haproxy_test.go b/plugins/haproxy/haproxy_test.go new file mode 100644 index 000000000..b54f516c9 --- /dev/null +++ b/plugins/haproxy/haproxy_test.go @@ -0,0 +1,152 @@ +package haproxy + +import ( + "fmt" + "strings" + "testing" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "net/http" + "net/http/httptest" +) + +func TestHaproxyGeneratesMetricsWithAuthentication(t *testing.T) { + //We create a fake server to return test data + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + username, password, ok := r.BasicAuth() + if !ok { + w.WriteHeader(http.StatusNotFound) + fmt.Fprint(w, "Unauthorized") + return + } + + if username == "user" && password == "password" { + fmt.Fprint(w, csvOutputSample) + } else { + w.WriteHeader(http.StatusNotFound) + fmt.Fprint(w, "Unauthorized") + } + })) + defer ts.Close() + + //Now we tested again above server, with our authentication data + r := &haproxy{ + Servers: []string{strings.Replace(ts.URL, "http://", "http://user:password@", 1)}, + } + + var acc testutil.Accumulator + + err := r.Gather(&acc) + require.NoError(t, err) + + tags := map[string]string{ + "host": ts.Listener.Addr().String(), + "proxy": "be_app", + "sv": "host0", + } + + assert.NoError(t, acc.ValidateTaggedValue("stot", uint64(171014), tags)) + + checkInt := []struct { + name string + value uint64 + }{ + {"bin", 5557055817}, + {"scur", 288}, + {"qmax", 81}, + {"http_response.1xx", 0}, + {"http_response.2xx", 1314093}, + {"http_response.3xx", 537036}, + {"http_response.4xx", 123452}, + {"dreq", 1102}, + {"dresp", 80}, + {"wretr", 17}, + {"wredis", 19}, + {"ereq", 95740}, + {"econ", 0}, + {"eresp", 0}, + {"req_rate", 35}, + {"req_rate_max", 140}, + {"req_tot", 1987928}, + {"bin", 5557055817}, + {"bout", 24096715169}, + {"rate", 18}, + {"rate_max", 102}, + + {"throttle", 13}, + {"lbtot", 114}, + } + + for _, c := range checkInt { + assert.Equal(t, true, acc.CheckValue(c.name, c.value)) + } + + //Here, we should get error because we don't pass authentication data + r = &haproxy{ + Servers: []string{ts.URL}, + } + + err = r.Gather(&acc) + require.Error(t, err) +} + +func TestHaproxyGeneratesMetricsWithoutAuthentication(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, csvOutputSample) + })) + defer ts.Close() + + r := &haproxy{ + Servers: []string{ts.URL}, + } + + var acc testutil.Accumulator + + err := r.Gather(&acc) + require.NoError(t, err) + + tags := map[string]string{ + "proxy": "be_app", + "host": ts.Listener.Addr().String(), + "sv": "host0", + } + + assert.NoError(t, acc.ValidateTaggedValue("stot", uint64(171014), tags)) + assert.NoError(t, acc.ValidateTaggedValue("scur", uint64(1), tags)) + assert.NoError(t, acc.ValidateTaggedValue("rate", uint64(3), tags)) + assert.Equal(t, true, acc.CheckValue("bin", uint64(5557055817))) +} + +//When not passing server config, we default to localhost +//We just want to make sure we did request stat from localhost +func TestHaproxyDefaultGetFromLocalhost(t *testing.T) { + r := &haproxy{} + + var acc testutil.Accumulator + + err := r.Gather(&acc) + require.Error(t, err) + assert.Contains(t, err.Error(), "127.0.0.1:1936/;csv") +} + +const csvOutputSample = ` +# pxname,svname,qcur,qmax,scur,smax,slim,stot,bin,bout,dreq,dresp,ereq,econ,eresp,wretr,wredis,status,weight,act,bck,chkfail,chkdown,lastchg,downtime,qlimit,pid,iid,sid,throttle,lbtot,tracked,type,rate,rate_lim,rate_max,check_status,check_code,check_duration,hrsp_1xx,hrsp_2xx,hrsp_3xx,hrsp_4xx,hrsp_5xx,hrsp_other,hanafail,req_rate,req_rate_max,req_tot,cli_abrt,srv_abrt,comp_in,comp_out,comp_byp,comp_rsp,lastsess,last_chk,last_agt,qtime,ctime,rtime,ttime, +fe_app,FRONTEND,,81,288,713,2000,1094063,5557055817,24096715169,1102,80,95740,,,17,19,OPEN,,,,,,,,,2,16,113,13,114,,0,18,0,102,,,,0,1314093,537036,123452,11966,1360,,35,140,1987928,,,0,0,0,0,,,,,,,, +be_static,host0,0,0,0,3,,3209,1141294,17389596,,0,,0,0,0,0,no check,1,1,0,,,,,,2,17,1,,3209,,2,0,,7,,,,0,218,1497,1494,0,0,0,,,,0,0,,,,,2,,,0,2,23,545, +be_static,BACKEND,0,0,0,3,200,3209,1141294,17389596,0,0,,0,0,0,0,UP,1,1,0,,0,70698,0,,2,17,0,,3209,,1,0,,7,,,,0,218,1497,1494,0,0,,,,,0,0,0,0,0,0,2,,,0,2,23,545, +be_static,host0,0,0,0,1,,28,17313,466003,,0,,0,0,0,0,UP,1,1,0,0,0,70698,0,,2,18,1,,28,,2,0,,1,L4OK,,1,0,17,6,5,0,0,0,,,,0,0,,,,,2103,,,0,1,1,36, +be_static,host4,0,0,0,1,,28,15358,1281073,,0,,0,0,0,0,UP,1,1,0,0,0,70698,0,,2,18,2,,28,,2,0,,1,L4OK,,1,0,20,5,3,0,0,0,,,,0,0,,,,,2076,,,0,1,1,54, +be_static,host5,0,0,0,1,,28,17547,1970404,,0,,0,0,0,0,UP,1,1,0,0,0,70698,0,,2,18,3,,28,,2,0,,1,L4OK,,0,0,20,5,3,0,0,0,,,,0,0,,,,,1495,,,0,1,1,53, +be_static,host6,0,0,0,1,,28,14105,1328679,,0,,0,0,0,0,UP,1,1,0,0,0,70698,0,,2,18,4,,28,,2,0,,1,L4OK,,0,0,18,8,2,0,0,0,,,,0,0,,,,,1418,,,0,0,1,49, +be_static,host7,0,0,0,1,,28,15258,1965185,,0,,0,0,0,0,UP,1,1,0,0,0,70698,0,,2,18,5,,28,,2,0,,1,L4OK,,0,0,17,8,3,0,0,0,,,,0,0,,,,,935,,,0,0,1,28, +be_static,host8,0,0,0,1,,28,12934,1034779,,0,,0,0,0,0,UP,1,1,0,0,0,70698,0,,2,18,6,,28,,2,0,,1,L4OK,,0,0,17,9,2,0,0,0,,,,0,0,,,,,582,,,0,1,1,66, +be_static,host9,0,0,0,1,,28,13434,134063,,0,,0,0,0,0,UP,1,1,0,0,0,70698,0,,2,18,7,,28,,2,0,,1,L4OK,,0,0,17,8,3,0,0,0,,,,0,0,,,,,539,,,0,0,1,80, +be_static,host1,0,0,0,1,,28,7873,1209688,,0,,0,0,0,0,UP,1,1,0,0,0,70698,0,,2,18,8,,28,,2,0,,1,L4OK,,0,0,22,6,0,0,0,0,,,,0,0,,,,,487,,,0,0,1,36, +be_static,host2,0,0,0,1,,28,13830,1085929,,0,,0,0,0,0,UP,1,1,0,0,0,70698,0,,2,18,9,,28,,2,0,,1,L4OK,,0,0,19,6,3,0,0,0,,,,0,0,,,,,338,,,0,1,1,38, +be_static,host3,0,0,0,1,,28,17959,1259760,,0,,0,0,0,0,UP,1,1,0,0,0,70698,0,,2,18,10,,28,,2,0,,1,L4OK,,1,0,20,6,2,0,0,0,,,,0,0,,,,,92,,,0,1,1,17, +be_static,BACKEND,0,0,0,2,200,307,160276,13322728,0,0,,0,0,0,0,UP,11,11,0,,0,70698,0,,2,18,0,,307,,1,0,,4,,,,0,205,73,29,0,0,,,,,0,0,0,0,0,0,92,,,0,1,3,381, +be_app,host0,0,0,1,32,,171014,510913516,2193856571,,0,,0,1,1,0,UP,100,1,0,1,0,70698,0,,2,19,1,,171013,,2,3,,12,L7OK,301,10,0,119534,48051,2345,1056,0,0,,,,73,1,,,,,0,Moved Permanently,,0,2,312,2341, +be_app,host4,0,0,2,29,,171013,499318742,2195595896,12,34,,0,2,0,0,UP,100,1,0,2,0,70698,0,,2,19,2,,171013,,2,3,,12,L7OK,301,12,0,119572,47882,2441,1088,0,0,,,,84,2,,,,,0,Moved Permanently,,0,2,316,2355, +` diff --git a/plugins/kafka_consumer/README.md b/plugins/kafka_consumer/README.md new file mode 100644 index 000000000..15e404215 --- /dev/null +++ b/plugins/kafka_consumer/README.md @@ -0,0 +1,24 @@ +# Kafka Consumer + +The [Kafka](http://kafka.apache.org/) consumer plugin polls a specified Kafka +topic and adds messages to InfluxDB. The plugin assumes messages follow the +line protocol. [Consumer Group](http://godoc.org/github.com/wvanbergen/kafka/consumergroup) +is used to talk to the Kafka cluster so multiple instances of telegraf can read +from the same topic in parallel. + +## Testing + +Running integration tests requires running Zookeeper & Kafka. The following +commands assume you're on OS X & using [boot2docker](http://boot2docker.io/). + +To start Kafka & Zookeeper: + +``` +docker run -d -p 2181:2181 -p 9092:9092 --env ADVERTISED_HOST=`boot2docker ip` --env ADVERTISED_PORT=9092 spotify/kafka +``` + +To run tests: + +``` +ZOOKEEPER_PEERS=$(boot2docker ip):2181 KAFKA_PEERS=$(boot2docker ip):9092 go test +``` diff --git a/plugins/kafka_consumer/kafka_consumer_integration_test.go b/plugins/kafka_consumer/kafka_consumer_integration_test.go index 1541cb127..325318014 100644 --- a/plugins/kafka_consumer/kafka_consumer_integration_test.go +++ b/plugins/kafka_consumer/kafka_consumer_integration_test.go @@ -2,8 +2,6 @@ package kafka_consumer import ( "fmt" - "os" - "strings" "testing" "time" @@ -14,19 +12,13 @@ import ( ) func TestReadsMetricsFromKafka(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } var zkPeers, brokerPeers []string - if len(os.Getenv("ZOOKEEPER_PEERS")) == 0 { - zkPeers = []string{"localhost:2181"} - } else { - zkPeers = strings.Split(os.Getenv("ZOOKEEPER_PEERS"), ",") - } - - if len(os.Getenv("KAFKA_PEERS")) == 0 { - brokerPeers = []string{"localhost:9092"} - } else { - brokerPeers = strings.Split(os.Getenv("KAFKA_PEERS"), ",") - } + zkPeers = []string{testutil.GetLocalHost() + ":2181"} + brokerPeers = []string{testutil.GetLocalHost() + ":9092"} k := &Kafka{ ConsumerGroupName: "telegraf_test_consumers", diff --git a/plugins/lustre2/lustre2.go b/plugins/lustre2/lustre2.go new file mode 100644 index 000000000..95b6bdbf7 --- /dev/null +++ b/plugins/lustre2/lustre2.go @@ -0,0 +1,234 @@ +/* +Lustre 2.x telegraf plugin + +Lustre (http://lustre.org/) is an open-source, parallel file system +for HPC environments. It stores statistics about its activity in +/proc + +*/ +package lustre2 + +import ( + "path/filepath" + "strconv" + "strings" + + "github.com/influxdb/telegraf/plugins" + common "github.com/influxdb/telegraf/plugins/system/ps/common" +) + +// Lustre proc files can change between versions, so we want to future-proof +// by letting people choose what to look at. +type Lustre2 struct { + Ost_procfiles []string + Mds_procfiles []string +} + +var sampleConfig = ` +# An array of /proc globs to search for Lustre stats +# If not specified, the default will work on Lustre 2.5.x +# +# ost_procfiles = ["/proc/fs/lustre/obdfilter/*/stats", "/proc/fs/lustre/osd-ldiskfs/*/stats"] +# mds_procfiles = ["/proc/fs/lustre/mdt/*/md_stats"]` + +/* The wanted fields would be a []string if not for the +lines that start with read_bytes/write_bytes and contain + both the byte count and the function call count +*/ +type mapping struct { + inProc string // What to look for at the start of a line in /proc/fs/lustre/* + field uint32 // which field to extract from that line + reportAs string // What measurement name to use + tag string // Additional tag to add for this metric +} + +var wanted_ost_fields = []*mapping{ + { + inProc: "write_bytes", + field: 6, + reportAs: "write_bytes", + }, + { // line starts with 'write_bytes', but value write_calls is in second column + inProc: "write_bytes", + field: 1, + reportAs: "write_calls", + }, + { + inProc: "read_bytes", + field: 6, + reportAs: "read_bytes", + }, + { // line starts with 'read_bytes', but value read_calls is in second column + inProc: "read_bytes", + field: 1, + reportAs: "read_calls", + }, + { + inProc: "cache_hit", + }, + { + inProc: "cache_miss", + }, + { + inProc: "cache_access", + }, +} + +var wanted_mds_fields = []*mapping{ + { + inProc: "open", + }, + { + inProc: "close", + }, + { + inProc: "mknod", + }, + { + inProc: "link", + }, + { + inProc: "unlink", + }, + { + inProc: "mkdir", + }, + { + inProc: "rmdir", + }, + { + inProc: "rename", + }, + { + inProc: "getattr", + }, + { + inProc: "setattr", + }, + { + inProc: "getxattr", + }, + { + inProc: "setxattr", + }, + { + inProc: "statfs", + }, + { + inProc: "sync", + }, + { + inProc: "samedir_rename", + }, + { + inProc: "crossdir_rename", + }, +} + +func (l *Lustre2) GetLustreProcStats(fileglob string, wanted_fields []*mapping, acc plugins.Accumulator) error { + files, err := filepath.Glob(fileglob) + if err != nil { + return err + } + + for _, file := range files { + /* Turn /proc/fs/lustre/obdfilter//stats and similar + * into just the object store target name + * Assumpion: the target name is always second to last, + * which is true in Lustre 2.1->2.5 + */ + path := strings.Split(file, "/") + name := path[len(path)-2] + tags := map[string]string{ + "name": name, + } + + lines, err := common.ReadLines(file) + if err != nil { + return err + } + + for _, line := range lines { + fields := strings.Fields(line) + + for _, wanted := range wanted_fields { + var data uint64 + if fields[0] == wanted.inProc { + wanted_field := wanted.field + // if not set, assume field[1]. Shouldn't be field[0], as + // that's a string + if wanted_field == 0 { + wanted_field = 1 + } + data, err = strconv.ParseUint((fields[wanted_field]), 10, 64) + if err != nil { + return err + } + report_name := wanted.inProc + if wanted.reportAs != "" { + report_name = wanted.reportAs + } + acc.Add(report_name, data, tags) + + } + } + } + } + return nil +} + +// SampleConfig returns sample configuration message +func (l *Lustre2) SampleConfig() string { + return sampleConfig +} + +// Description returns description of Lustre2 plugin +func (l *Lustre2) Description() string { + return "Read metrics from local Lustre service on OST, MDS" +} + +// Gather reads stats from all lustre targets +func (l *Lustre2) Gather(acc plugins.Accumulator) error { + + if len(l.Ost_procfiles) == 0 { + // read/write bytes are in obdfilter//stats + err := l.GetLustreProcStats("/proc/fs/lustre/obdfilter/*/stats", wanted_ost_fields, acc) + if err != nil { + return err + } + // cache counters are in osd-ldiskfs//stats + err = l.GetLustreProcStats("/proc/fs/lustre/osd-ldiskfs/*/stats", wanted_ost_fields, acc) + if err != nil { + return err + } + } + + if len(l.Mds_procfiles) == 0 { + // Metadata server stats + err := l.GetLustreProcStats("/proc/fs/lustre/mdt/*/md_stats", wanted_mds_fields, acc) + if err != nil { + return err + } + } + + for _, procfile := range l.Ost_procfiles { + err := l.GetLustreProcStats(procfile, wanted_ost_fields, acc) + if err != nil { + return err + } + } + for _, procfile := range l.Mds_procfiles { + err := l.GetLustreProcStats(procfile, wanted_mds_fields, acc) + if err != nil { + return err + } + } + + return nil +} + +func init() { + plugins.Add("lustre2", func() plugins.Plugin { + return &Lustre2{} + }) +} diff --git a/plugins/lustre2/lustre2_test.go b/plugins/lustre2/lustre2_test.go new file mode 100644 index 000000000..850a4ff32 --- /dev/null +++ b/plugins/lustre2/lustre2_test.go @@ -0,0 +1,144 @@ +package lustre2 + +import ( + "io/ioutil" + "os" + "testing" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Set config file variables to point to fake directory structure instead of /proc? + +const obdfilterProcContents = `snapshot_time 1438693064.430544 secs.usecs +read_bytes 203238095 samples [bytes] 4096 1048576 78026117632000 +write_bytes 71893382 samples [bytes] 1 1048576 15201500833981 +get_info 1182008495 samples [reqs] +set_info_async 2 samples [reqs] +connect 1117 samples [reqs] +reconnect 1160 samples [reqs] +disconnect 1084 samples [reqs] +statfs 3575885 samples [reqs] +create 698 samples [reqs] +destroy 3190060 samples [reqs] +setattr 605647 samples [reqs] +punch 805187 samples [reqs] +sync 6608753 samples [reqs] +preprw 275131477 samples [reqs] +commitrw 275131477 samples [reqs] +quotactl 229231 samples [reqs] +ping 78020757 samples [reqs] +` + +const osdldiskfsProcContents = `snapshot_time 1438693135.640551 secs.usecs +get_page 275132812 samples [usec] 0 3147 1320420955 22041662259 +cache_access 19047063027 samples [pages] 1 1 19047063027 +cache_hit 7393729777 samples [pages] 1 1 7393729777 +cache_miss 11653333250 samples [pages] 1 1 11653333250 +` + +const mdtProcContents = `snapshot_time 1438693238.20113 secs.usecs +open 1024577037 samples [reqs] +close 873243496 samples [reqs] +mknod 349042 samples [reqs] +link 445 samples [reqs] +unlink 3549417 samples [reqs] +mkdir 705499 samples [reqs] +rmdir 227434 samples [reqs] +rename 629196 samples [reqs] +getattr 1503663097 samples [reqs] +setattr 1898364 samples [reqs] +getxattr 6145349681 samples [reqs] +setxattr 83969 samples [reqs] +statfs 2916320 samples [reqs] +sync 434081 samples [reqs] +samedir_rename 259625 samples [reqs] +crossdir_rename 369571 samples [reqs] +` + +type metrics struct { + name string + value uint64 +} + +func TestLustre2GeneratesMetrics(t *testing.T) { + + tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/" + ost_name := "OST0001" + + mdtdir := tempdir + "/mdt/" + err := os.MkdirAll(mdtdir+"/"+ost_name, 0755) + require.NoError(t, err) + + osddir := tempdir + "/osd-ldiskfs/" + err = os.MkdirAll(osddir+"/"+ost_name, 0755) + require.NoError(t, err) + + obddir := tempdir + "/obdfilter/" + err = os.MkdirAll(obddir+"/"+ost_name, 0755) + require.NoError(t, err) + + err = ioutil.WriteFile(mdtdir+"/"+ost_name+"/md_stats", []byte(mdtProcContents), 0644) + require.NoError(t, err) + + err = ioutil.WriteFile(osddir+"/"+ost_name+"/stats", []byte(osdldiskfsProcContents), 0644) + require.NoError(t, err) + + err = ioutil.WriteFile(obddir+"/"+ost_name+"/stats", []byte(obdfilterProcContents), 0644) + require.NoError(t, err) + + m := &Lustre2{ + Ost_procfiles: []string{obddir + "/*/stats", osddir + "/*/stats"}, + Mds_procfiles: []string{mdtdir + "/*/md_stats"}, + } + + var acc testutil.Accumulator + + err = m.Gather(&acc) + require.NoError(t, err) + + tags := map[string]string{ + "name": ost_name, + } + + intMetrics := []*metrics{ + { + name: "write_bytes", + value: 15201500833981, + }, + { + name: "read_bytes", + value: 78026117632000, + }, + { + name: "write_calls", + value: 71893382, + }, + { + name: "read_calls", + value: 203238095, + }, + { + name: "cache_hit", + value: 7393729777, + }, + { + name: "cache_access", + value: 19047063027, + }, + { + name: "cache_miss", + value: 11653333250, + }, + } + + for _, metric := range intMetrics { + assert.True(t, acc.HasUIntValue(metric.name), metric.name) + assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags)) + } + + err = os.RemoveAll(os.TempDir() + "/telegraf") + require.NoError(t, err) +} diff --git a/plugins/memcached/memcached_test.go b/plugins/memcached/memcached_test.go index 08e696fb7..501fed1b9 100644 --- a/plugins/memcached/memcached_test.go +++ b/plugins/memcached/memcached_test.go @@ -9,8 +9,12 @@ import ( ) func TestMemcachedGeneratesMetrics(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + m := &Memcached{ - Servers: []string{"localhost"}, + Servers: []string{testutil.GetLocalHost()}, } var acc testutil.Accumulator diff --git a/plugins/mongodb/mongodb.go b/plugins/mongodb/mongodb.go new file mode 100644 index 000000000..28bbe3af0 --- /dev/null +++ b/plugins/mongodb/mongodb.go @@ -0,0 +1,144 @@ +package mongodb + +import ( + "crypto/tls" + "crypto/x509" + "fmt" + "net" + "net/url" + "sync" + "time" + + "github.com/influxdb/telegraf/plugins" + "gopkg.in/mgo.v2" +) + +type MongoDB struct { + Servers []string + Ssl Ssl + mongos map[string]*Server +} + +type Ssl struct { + Enabled bool + CaCerts []string `toml:"cacerts"` +} + +var sampleConfig = ` +# An array of URI to gather stats about. Specify an ip or hostname +# with optional port add password. ie mongodb://user:auth_key@10.10.3.30:27017, +# mongodb://10.10.3.33:18832, 10.0.0.1:10000, etc. +# +# If no servers are specified, then 127.0.0.1 is used as the host and 27107 as the port. +servers = ["127.0.0.1:27017"]` + +func (m *MongoDB) SampleConfig() string { + return sampleConfig +} + +func (*MongoDB) Description() string { + return "Read metrics from one or many MongoDB servers" +} + +var localhost = &url.URL{Host: "127.0.0.1:27017"} + +// Reads stats from all configured servers accumulates stats. +// Returns one of the errors encountered while gather stats (if any). +func (m *MongoDB) Gather(acc plugins.Accumulator) error { + if len(m.Servers) == 0 { + m.gatherServer(m.getMongoServer(localhost), acc) + return nil + } + + var wg sync.WaitGroup + + var outerr error + + for _, serv := range m.Servers { + u, err := url.Parse(serv) + if err != nil { + return fmt.Errorf("Unable to parse to address '%s': %s", serv, err) + } else if u.Scheme == "" { + u.Scheme = "mongodb" + // fallback to simple string based address (i.e. "10.0.0.1:10000") + u.Host = serv + if u.Path == u.Host { + u.Path = "" + } + } + wg.Add(1) + go func() { + defer wg.Done() + outerr = m.gatherServer(m.getMongoServer(u), acc) + }() + } + + wg.Wait() + + return outerr +} + +func (m *MongoDB) getMongoServer(url *url.URL) *Server { + if _, ok := m.mongos[url.Host]; !ok { + m.mongos[url.Host] = &Server{ + Url: url, + } + } + return m.mongos[url.Host] +} + +func (m *MongoDB) gatherServer(server *Server, acc plugins.Accumulator) error { + if server.Session == nil { + var dialAddrs []string + if server.Url.User != nil { + dialAddrs = []string{server.Url.String()} + } else { + dialAddrs = []string{server.Url.Host} + } + dialInfo, err := mgo.ParseURL(dialAddrs[0]) + if err != nil { + return fmt.Errorf("Unable to parse URL (%s), %s\n", dialAddrs[0], err.Error()) + } + dialInfo.Direct = true + dialInfo.Timeout = time.Duration(10) * time.Second + + if m.Ssl.Enabled { + tlsConfig := &tls.Config{} + if len(m.Ssl.CaCerts) > 0 { + roots := x509.NewCertPool() + for _, caCert := range m.Ssl.CaCerts { + ok := roots.AppendCertsFromPEM([]byte(caCert)) + if !ok { + return fmt.Errorf("failed to parse root certificate") + } + } + tlsConfig.RootCAs = roots + } else { + tlsConfig.InsecureSkipVerify = true + } + dialInfo.DialServer = func(addr *mgo.ServerAddr) (net.Conn, error) { + conn, err := tls.Dial("tcp", addr.String(), tlsConfig) + if err != nil { + fmt.Printf("error in Dial, %s\n", err.Error()) + } + return conn, err + } + } + + sess, err := mgo.DialWithInfo(dialInfo) + if err != nil { + fmt.Printf("error dialing over ssl, %s\n", err.Error()) + return fmt.Errorf("Unable to connect to MongoDB, %s\n", err.Error()) + } + server.Session = sess + } + return server.gatherData(acc) +} + +func init() { + plugins.Add("mongodb", func() plugins.Plugin { + return &MongoDB{ + mongos: make(map[string]*Server), + } + }) +} diff --git a/plugins/mongodb/mongodb_data.go b/plugins/mongodb/mongodb_data.go new file mode 100644 index 000000000..ba6cc8d95 --- /dev/null +++ b/plugins/mongodb/mongodb_data.go @@ -0,0 +1,100 @@ +package mongodb + +import ( + "fmt" + "reflect" + "strconv" + + "github.com/influxdb/telegraf/plugins" +) + +type MongodbData struct { + StatLine *StatLine + Tags map[string]string +} + +func NewMongodbData(statLine *StatLine, tags map[string]string) *MongodbData { + if statLine.NodeType != "" && statLine.NodeType != "UNK" { + tags["state"] = statLine.NodeType + } + return &MongodbData{ + StatLine: statLine, + Tags: tags, + } +} + +var DefaultStats = map[string]string{ + "inserts_per_sec": "Insert", + "queries_per_sec": "Query", + "updates_per_sec": "Update", + "deletes_per_sec": "Delete", + "getmores_per_sec": "GetMore", + "commands_per_sec": "Command", + "flushes_per_sec": "Flushes", + "vsize_megabytes": "Virtual", + "resident_megabytes": "Resident", + "queued_reads": "QueuedReaders", + "queued_writes": "QueuedWriters", + "active_reads": "ActiveReaders", + "active_writes": "ActiveWriters", + "net_in_bytes": "NetIn", + "net_out_bytes": "NetOut", + "open_connections": "NumConnections", +} + +var DefaultReplStats = map[string]string{ + "repl_inserts_per_sec": "InsertR", + "repl_queries_per_sec": "QueryR", + "repl_updates_per_sec": "UpdateR", + "repl_deletes_per_sec": "DeleteR", + "repl_getmores_per_sec": "GetMoreR", + "repl_commands_per_sec": "CommandR", + "member_status": "NodeType", +} + +var MmapStats = map[string]string{ + "mapped_megabytes": "Mapped", + "non-mapped_megabytes": "NonMapped", + "page_faults_per_sec": "Faults", +} + +var WiredTigerStats = map[string]string{ + "percent_cache_dirty": "CacheDirtyPercent", + "percent_cache_used": "CacheUsedPercent", +} + +func (d *MongodbData) AddDefaultStats(acc plugins.Accumulator) { + statLine := reflect.ValueOf(d.StatLine).Elem() + d.addStat(acc, statLine, DefaultStats) + if d.StatLine.NodeType != "" { + d.addStat(acc, statLine, DefaultReplStats) + } + if d.StatLine.StorageEngine == "mmapv1" { + d.addStat(acc, statLine, MmapStats) + } else if d.StatLine.StorageEngine == "wiredTiger" { + for key, value := range WiredTigerStats { + val := statLine.FieldByName(value).Interface() + percentVal := fmt.Sprintf("%.1f", val.(float64)*100) + floatVal, _ := strconv.ParseFloat(percentVal, 64) + d.add(acc, key, floatVal) + } + } +} + +func (d *MongodbData) addStat(acc plugins.Accumulator, statLine reflect.Value, stats map[string]string) { + for key, value := range stats { + val := statLine.FieldByName(value).Interface() + d.add(acc, key, val) + } +} + +func (d *MongodbData) add(acc plugins.Accumulator, key string, val interface{}) { + acc.AddValuesWithTime( + key, + map[string]interface{}{ + "value": val, + }, + d.Tags, + d.StatLine.Time, + ) +} diff --git a/plugins/mongodb/mongodb_data_test.go b/plugins/mongodb/mongodb_data_test.go new file mode 100644 index 000000000..9ee3f9f48 --- /dev/null +++ b/plugins/mongodb/mongodb_data_test.go @@ -0,0 +1,111 @@ +package mongodb + +import ( + "testing" + "time" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var tags = make(map[string]string) + +func TestAddNonReplStats(t *testing.T) { + d := NewMongodbData( + &StatLine{ + StorageEngine: "", + Time: time.Now(), + Insert: 0, + Query: 0, + Update: 0, + Delete: 0, + GetMore: 0, + Command: 0, + Flushes: 0, + Virtual: 0, + Resident: 0, + QueuedReaders: 0, + QueuedWriters: 0, + ActiveReaders: 0, + ActiveWriters: 0, + NetIn: 0, + NetOut: 0, + NumConnections: 0, + }, + tags, + ) + var acc testutil.Accumulator + + d.AddDefaultStats(&acc) + + for key, _ := range DefaultStats { + assert.True(t, acc.HasIntValue(key)) + } +} + +func TestAddReplStats(t *testing.T) { + d := NewMongodbData( + &StatLine{ + StorageEngine: "mmapv1", + Mapped: 0, + NonMapped: 0, + Faults: 0, + }, + tags, + ) + + var acc testutil.Accumulator + + d.AddDefaultStats(&acc) + + for key, _ := range MmapStats { + assert.True(t, acc.HasIntValue(key)) + } +} + +func TestAddWiredTigerStats(t *testing.T) { + d := NewMongodbData( + &StatLine{ + StorageEngine: "wiredTiger", + CacheDirtyPercent: 0, + CacheUsedPercent: 0, + }, + tags, + ) + + var acc testutil.Accumulator + + d.AddDefaultStats(&acc) + + for key, _ := range WiredTigerStats { + assert.True(t, acc.HasFloatValue(key)) + } +} + +func TestStateTag(t *testing.T) { + d := NewMongodbData( + &StatLine{ + StorageEngine: "", + Time: time.Now(), + Insert: 0, + Query: 0, + NodeType: "PRI", + }, + tags, + ) + + stats := []string{"inserts_per_sec", "queries_per_sec"} + + stateTags := make(map[string]string) + stateTags["state"] = "PRI" + + var acc testutil.Accumulator + + d.AddDefaultStats(&acc) + + for _, key := range stats { + err := acc.ValidateTaggedValue(key, int64(0), stateTags) + require.NoError(t, err) + } +} diff --git a/plugins/mongodb/mongodb_server.go b/plugins/mongodb/mongodb_server.go new file mode 100644 index 000000000..d9b0edaad --- /dev/null +++ b/plugins/mongodb/mongodb_server.go @@ -0,0 +1,50 @@ +package mongodb + +import ( + "net/url" + "time" + + "github.com/influxdb/telegraf/plugins" + "gopkg.in/mgo.v2" + "gopkg.in/mgo.v2/bson" +) + +type Server struct { + Url *url.URL + Session *mgo.Session + lastResult *ServerStatus +} + +func (s *Server) getDefaultTags() map[string]string { + tags := make(map[string]string) + tags["hostname"] = s.Url.Host + return tags +} + +func (s *Server) gatherData(acc plugins.Accumulator) error { + s.Session.SetMode(mgo.Eventual, true) + s.Session.SetSocketTimeout(0) + result := &ServerStatus{} + err := s.Session.DB("admin").Run(bson.D{{"serverStatus", 1}, {"recordStats", 0}}, result) + if err != nil { + return err + } + defer func() { + s.lastResult = result + }() + + result.SampleTime = time.Now() + if s.lastResult != nil && result != nil { + duration := result.SampleTime.Sub(s.lastResult.SampleTime) + durationInSeconds := int64(duration.Seconds()) + if durationInSeconds == 0 { + durationInSeconds = 1 + } + data := NewMongodbData( + NewStatLine(*s.lastResult, *result, s.Url.Host, true, durationInSeconds), + s.getDefaultTags(), + ) + data.AddDefaultStats(acc) + } + return nil +} diff --git a/plugins/mongodb/mongodb_server_test.go b/plugins/mongodb/mongodb_server_test.go new file mode 100644 index 000000000..ec536bbef --- /dev/null +++ b/plugins/mongodb/mongodb_server_test.go @@ -0,0 +1,43 @@ +// +build integration + +package mongodb + +import ( + "testing" + "time" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGetDefaultTags(t *testing.T) { + var tagTests = []struct { + in string + out string + }{ + {"hostname", server.Url.Host}, + } + defaultTags := server.getDefaultTags() + for _, tt := range tagTests { + if defaultTags[tt.in] != tt.out { + t.Errorf("expected %q, got %q", tt.out, defaultTags[tt.in]) + } + } +} + +func TestAddDefaultStats(t *testing.T) { + var acc testutil.Accumulator + + err := server.gatherData(&acc) + require.NoError(t, err) + + time.Sleep(time.Duration(1) * time.Second) + // need to call this twice so it can perform the diff + err = server.gatherData(&acc) + require.NoError(t, err) + + for key, _ := range DefaultStats { + assert.True(t, acc.HasIntValue(key)) + } +} diff --git a/plugins/mongodb/mongodb_test.go b/plugins/mongodb/mongodb_test.go new file mode 100644 index 000000000..174128d19 --- /dev/null +++ b/plugins/mongodb/mongodb_test.go @@ -0,0 +1,71 @@ +// +build integration + +package mongodb + +import ( + "log" + "math/rand" + "net/url" + "os" + "testing" + "time" + + "gopkg.in/mgo.v2" +) + +var connect_url string +var server *Server + +func init() { + connect_url = os.Getenv("MONGODB_URL") + if connect_url == "" { + connect_url = "127.0.0.1:27017" + server = &Server{Url: &url.URL{Host: connect_url}} + } else { + full_url, err := url.Parse(connect_url) + if err != nil { + log.Fatalf("Unable to parse URL (%s), %s\n", full_url, err.Error()) + } + server = &Server{Url: full_url} + } +} + +func testSetup(m *testing.M) { + var err error + var dialAddrs []string + if server.Url.User != nil { + dialAddrs = []string{server.Url.String()} + } else { + dialAddrs = []string{server.Url.Host} + } + dialInfo, err := mgo.ParseURL(dialAddrs[0]) + if err != nil { + log.Fatalf("Unable to parse URL (%s), %s\n", dialAddrs[0], err.Error()) + } + dialInfo.Direct = true + dialInfo.Timeout = time.Duration(10) * time.Second + sess, err := mgo.DialWithInfo(dialInfo) + if err != nil { + log.Fatalf("Unable to connect to MongoDB, %s\n", err.Error()) + } + server.Session = sess + server.Session, _ = mgo.Dial(server.Url.Host) + if err != nil { + log.Fatalln(err.Error()) + } +} + +func testTeardown(m *testing.M) { + server.Session.Close() +} + +func TestMain(m *testing.M) { + // seed randomness for use with tests + rand.Seed(time.Now().UTC().UnixNano()) + + testSetup(m) + res := m.Run() + testTeardown(m) + + os.Exit(res) +} diff --git a/plugins/mongodb/mongostat.go b/plugins/mongodb/mongostat.go new file mode 100644 index 000000000..b3c990b1a --- /dev/null +++ b/plugins/mongodb/mongostat.go @@ -0,0 +1,549 @@ +/*** +The code contained here came from https://github.com/mongodb/mongo-tools/blob/master/mongostat/stat_types.go +and contains modifications so that no other dependency from that project is needed. Other modifications included +removing uneccessary code specific to formatting the output and determine the current state of the database. It +is licensed under Apache Version 2.0, http://www.apache.org/licenses/LICENSE-2.0.html +***/ + +package mongodb + +import ( + "sort" + "strings" + "time" +) + +const ( + MongosProcess = "mongos" +) + +// Flags to determine cases when to activate/deactivate columns for output. +const ( + Always = 1 << iota // always activate the column + Discover // only active when mongostat is in discover mode + Repl // only active if one of the nodes being monitored is in a replset + Locks // only active if node is capable of calculating lock info + AllOnly // only active if mongostat was run with --all option + MMAPOnly // only active if node has mmap-specific fields + WTOnly // only active if node has wiredtiger-specific fields +) + +type ServerStatus struct { + SampleTime time.Time `bson:""` + Host string `bson:"host"` + Version string `bson:"version"` + Process string `bson:"process"` + Pid int64 `bson:"pid"` + Uptime int64 `bson:"uptime"` + UptimeMillis int64 `bson:"uptimeMillis"` + UptimeEstimate int64 `bson:"uptimeEstimate"` + LocalTime time.Time `bson:"localTime"` + Asserts map[string]int64 `bson:"asserts"` + BackgroundFlushing *FlushStats `bson:"backgroundFlushing"` + ExtraInfo *ExtraInfo `bson:"extra_info"` + Connections *ConnectionStats `bson:"connections"` + Dur *DurStats `bson:"dur"` + GlobalLock *GlobalLockStats `bson:"globalLock"` + Locks map[string]LockStats `bson:"locks,omitempty"` + Network *NetworkStats `bson:"network"` + Opcounters *OpcountStats `bson:"opcounters"` + OpcountersRepl *OpcountStats `bson:"opcountersRepl"` + RecordStats *DBRecordStats `bson:"recordStats"` + Mem *MemStats `bson:"mem"` + Repl *ReplStatus `bson:"repl"` + ShardCursorType map[string]interface{} `bson:"shardCursorType"` + StorageEngine map[string]string `bson:"storageEngine"` + WiredTiger *WiredTiger `bson:"wiredTiger"` +} + +// WiredTiger stores information related to the WiredTiger storage engine. +type WiredTiger struct { + Transaction TransactionStats `bson:"transaction"` + Concurrent ConcurrentTransactions `bson:"concurrentTransactions"` + Cache CacheStats `bson:"cache"` +} + +type ConcurrentTransactions struct { + Write ConcurrentTransStats `bson:"write"` + Read ConcurrentTransStats `bson:"read"` +} + +type ConcurrentTransStats struct { + Out int64 `bson:"out"` +} + +// CacheStats stores cache statistics for WiredTiger. +type CacheStats struct { + TrackedDirtyBytes int64 `bson:"tracked dirty bytes in the cache"` + CurrentCachedBytes int64 `bson:"bytes currently in the cache"` + MaxBytesConfigured int64 `bson:"maximum bytes configured"` +} + +// TransactionStats stores transaction checkpoints in WiredTiger. +type TransactionStats struct { + TransCheckpoints int64 `bson:"transaction checkpoints"` +} + +// ReplStatus stores data related to replica sets. +type ReplStatus struct { + SetName interface{} `bson:"setName"` + IsMaster interface{} `bson:"ismaster"` + Secondary interface{} `bson:"secondary"` + IsReplicaSet interface{} `bson:"isreplicaset"` + ArbiterOnly interface{} `bson:"arbiterOnly"` + Hosts []string `bson:"hosts"` + Passives []string `bson:"passives"` + Me string `bson:"me"` +} + +// DBRecordStats stores data related to memory operations across databases. +type DBRecordStats struct { + AccessesNotInMemory int64 `bson:"accessesNotInMemory"` + PageFaultExceptionsThrown int64 `bson:"pageFaultExceptionsThrown"` + DBRecordAccesses map[string]RecordAccesses `bson:",inline"` +} + +// RecordAccesses stores data related to memory operations scoped to a database. +type RecordAccesses struct { + AccessesNotInMemory int64 `bson:"accessesNotInMemory"` + PageFaultExceptionsThrown int64 `bson:"pageFaultExceptionsThrown"` +} + +// MemStats stores data related to memory statistics. +type MemStats struct { + Bits int64 `bson:"bits"` + Resident int64 `bson:"resident"` + Virtual int64 `bson:"virtual"` + Supported interface{} `bson:"supported"` + Mapped int64 `bson:"mapped"` + MappedWithJournal int64 `bson:"mappedWithJournal"` +} + +// FlushStats stores information about memory flushes. +type FlushStats struct { + Flushes int64 `bson:"flushes"` + TotalMs int64 `bson:"total_ms"` + AverageMs float64 `bson:"average_ms"` + LastMs int64 `bson:"last_ms"` + LastFinished time.Time `bson:"last_finished"` +} + +// ConnectionStats stores information related to incoming database connections. +type ConnectionStats struct { + Current int64 `bson:"current"` + Available int64 `bson:"available"` + TotalCreated int64 `bson:"totalCreated"` +} + +// DurTiming stores information related to journaling. +type DurTiming struct { + Dt int64 `bson:"dt"` + PrepLogBuffer int64 `bson:"prepLogBuffer"` + WriteToJournal int64 `bson:"writeToJournal"` + WriteToDataFiles int64 `bson:"writeToDataFiles"` + RemapPrivateView int64 `bson:"remapPrivateView"` +} + +// DurStats stores information related to journaling statistics. +type DurStats struct { + Commits int64 `bson:"commits"` + JournaledMB int64 `bson:"journaledMB"` + WriteToDataFilesMB int64 `bson:"writeToDataFilesMB"` + Compression int64 `bson:"compression"` + CommitsInWriteLock int64 `bson:"commitsInWriteLock"` + EarlyCommits int64 `bson:"earlyCommits"` + TimeMs DurTiming +} + +// QueueStats stores the number of queued read/write operations. +type QueueStats struct { + Total int64 `bson:"total"` + Readers int64 `bson:"readers"` + Writers int64 `bson:"writers"` +} + +// ClientStats stores the number of active read/write operations. +type ClientStats struct { + Total int64 `bson:"total"` + Readers int64 `bson:"readers"` + Writers int64 `bson:"writers"` +} + +// GlobalLockStats stores information related locks in the MMAP storage engine. +type GlobalLockStats struct { + TotalTime int64 `bson:"totalTime"` + LockTime int64 `bson:"lockTime"` + CurrentQueue *QueueStats `bson:"currentQueue"` + ActiveClients *ClientStats `bson:"activeClients"` +} + +// NetworkStats stores information related to network traffic. +type NetworkStats struct { + BytesIn int64 `bson:"bytesIn"` + BytesOut int64 `bson:"bytesOut"` + NumRequests int64 `bson:"numRequests"` +} + +// OpcountStats stores information related to comamnds and basic CRUD operations. +type OpcountStats struct { + Insert int64 `bson:"insert"` + Query int64 `bson:"query"` + Update int64 `bson:"update"` + Delete int64 `bson:"delete"` + GetMore int64 `bson:"getmore"` + Command int64 `bson:"command"` +} + +// ReadWriteLockTimes stores time spent holding read/write locks. +type ReadWriteLockTimes struct { + Read int64 `bson:"R"` + Write int64 `bson:"W"` + ReadLower int64 `bson:"r"` + WriteLower int64 `bson:"w"` +} + +// LockStats stores information related to time spent acquiring/holding locks +// for a given database. +type LockStats struct { + TimeLockedMicros ReadWriteLockTimes `bson:"timeLockedMicros"` + TimeAcquiringMicros ReadWriteLockTimes `bson:"timeAcquiringMicros"` + + // AcquireCount is a new field of the lock stats only populated on 3.0 or newer. + // Typed as a pointer so that if it is nil, mongostat can assume the field is not populated + // with real namespace data. + AcquireCount *ReadWriteLockTimes `bson:"acquireCount,omitempty"` +} + +// ExtraInfo stores additional platform specific information. +type ExtraInfo struct { + PageFaults *int64 `bson:"page_faults"` +} + +// StatHeader describes a single column for mongostat's terminal output, +// its formatting, and in which modes it should be displayed. +type StatHeader struct { + // The text to appear in the column's header cell + HeaderText string + + // Bitmask containing flags to determine if this header is active or not + ActivateFlags int +} + +// StatHeaders are the complete set of data metrics supported by mongostat. +var StatHeaders = []StatHeader{ + {"", Always}, // placeholder for hostname column (blank header text) + {"insert", Always}, + {"query", Always}, + {"update", Always}, + {"delete", Always}, + {"getmore", Always}, + {"command", Always}, + {"% dirty", WTOnly}, + {"% used", WTOnly}, + {"flushes", Always}, + {"mapped", MMAPOnly}, + {"vsize", Always}, + {"res", Always}, + {"non-mapped", MMAPOnly | AllOnly}, + {"faults", MMAPOnly}, + {" locked db", Locks}, + {"qr|qw", Always}, + {"ar|aw", Always}, + {"netIn", Always}, + {"netOut", Always}, + {"conn", Always}, + {"set", Repl}, + {"repl", Repl}, + {"time", Always}, +} + +// NamespacedLocks stores information on the LockStatus of namespaces. +type NamespacedLocks map[string]LockStatus + +// LockUsage stores information related to a namespace's lock usage. +type LockUsage struct { + Namespace string + Reads int64 + Writes int64 +} + +type lockUsages []LockUsage + +func percentageInt64(value, outOf int64) float64 { + if value == 0 || outOf == 0 { + return 0 + } + return 100 * (float64(value) / float64(outOf)) +} + +func (slice lockUsages) Len() int { + return len(slice) +} + +func (slice lockUsages) Less(i, j int) bool { + return slice[i].Reads+slice[i].Writes < slice[j].Reads+slice[j].Writes +} + +func (slice lockUsages) Swap(i, j int) { + slice[i], slice[j] = slice[j], slice[i] +} + +// LockStatus stores a database's lock statistics. +type LockStatus struct { + DBName string + Percentage float64 + Global bool +} + +// StatLine is a wrapper for all metrics reported by mongostat for monitored hosts. +type StatLine struct { + Key string + // What storage engine is being used for the node with this stat line + StorageEngine string + + Error error + IsMongos bool + Host string + + // The time at which this StatLine was generated. + Time time.Time + + // The last time at which this StatLine was printed to output. + LastPrinted time.Time + + // Opcounter fields + Insert, Query, Update, Delete, GetMore, Command int64 + + // Cache utilization (wiredtiger only) + CacheDirtyPercent float64 + CacheUsedPercent float64 + + // Replicated Opcounter fields + InsertR, QueryR, UpdateR, DeleteR, GetMoreR, CommandR int64 + Flushes int64 + Mapped, Virtual, Resident, NonMapped int64 + Faults int64 + HighestLocked *LockStatus + QueuedReaders, QueuedWriters int64 + ActiveReaders, ActiveWriters int64 + NetIn, NetOut int64 + NumConnections int64 + ReplSetName string + NodeType string +} + +func parseLocks(stat ServerStatus) map[string]LockUsage { + returnVal := map[string]LockUsage{} + for namespace, lockInfo := range stat.Locks { + returnVal[namespace] = LockUsage{ + namespace, + lockInfo.TimeLockedMicros.Read + lockInfo.TimeLockedMicros.ReadLower, + lockInfo.TimeLockedMicros.Write + lockInfo.TimeLockedMicros.WriteLower, + } + } + return returnVal +} + +func computeLockDiffs(prevLocks, curLocks map[string]LockUsage) []LockUsage { + lockUsages := lockUsages(make([]LockUsage, 0, len(curLocks))) + for namespace, curUsage := range curLocks { + prevUsage, hasKey := prevLocks[namespace] + if !hasKey { + // This namespace didn't appear in the previous batch of lock info, + // so we can't compute a diff for it - skip it. + continue + } + // Calculate diff of lock usage for this namespace and add to the list + lockUsages = append(lockUsages, + LockUsage{ + namespace, + curUsage.Reads - prevUsage.Reads, + curUsage.Writes - prevUsage.Writes, + }) + } + // Sort the array in order of least to most locked + sort.Sort(lockUsages) + return lockUsages +} + +func diff(newVal, oldVal, sampleTime int64) int64 { + return (newVal - oldVal) / sampleTime +} + +// NewStatLine constructs a StatLine object from two ServerStatus objects. +func NewStatLine(oldStat, newStat ServerStatus, key string, all bool, sampleSecs int64) *StatLine { + returnVal := &StatLine{ + Key: key, + Host: newStat.Host, + Mapped: -1, + Virtual: -1, + Resident: -1, + NonMapped: -1, + Faults: -1, + } + + // set the storage engine appropriately + if newStat.StorageEngine != nil && newStat.StorageEngine["name"] != "" { + returnVal.StorageEngine = newStat.StorageEngine["name"] + } else { + returnVal.StorageEngine = "mmapv1" + } + + if newStat.Opcounters != nil && oldStat.Opcounters != nil { + returnVal.Insert = diff(newStat.Opcounters.Insert, oldStat.Opcounters.Insert, sampleSecs) + returnVal.Query = diff(newStat.Opcounters.Query, oldStat.Opcounters.Query, sampleSecs) + returnVal.Update = diff(newStat.Opcounters.Update, oldStat.Opcounters.Update, sampleSecs) + returnVal.Delete = diff(newStat.Opcounters.Delete, oldStat.Opcounters.Delete, sampleSecs) + returnVal.GetMore = diff(newStat.Opcounters.GetMore, oldStat.Opcounters.GetMore, sampleSecs) + returnVal.Command = diff(newStat.Opcounters.Command, oldStat.Opcounters.Command, sampleSecs) + } + + if newStat.OpcountersRepl != nil && oldStat.OpcountersRepl != nil { + returnVal.InsertR = diff(newStat.OpcountersRepl.Insert, oldStat.OpcountersRepl.Insert, sampleSecs) + returnVal.QueryR = diff(newStat.OpcountersRepl.Query, oldStat.OpcountersRepl.Query, sampleSecs) + returnVal.UpdateR = diff(newStat.OpcountersRepl.Update, oldStat.OpcountersRepl.Update, sampleSecs) + returnVal.DeleteR = diff(newStat.OpcountersRepl.Delete, oldStat.OpcountersRepl.Delete, sampleSecs) + returnVal.GetMoreR = diff(newStat.OpcountersRepl.GetMore, oldStat.OpcountersRepl.GetMore, sampleSecs) + returnVal.CommandR = diff(newStat.OpcountersRepl.Command, oldStat.OpcountersRepl.Command, sampleSecs) + } + + returnVal.CacheDirtyPercent = -1 + returnVal.CacheUsedPercent = -1 + if newStat.WiredTiger != nil && oldStat.WiredTiger != nil { + returnVal.Flushes = newStat.WiredTiger.Transaction.TransCheckpoints - oldStat.WiredTiger.Transaction.TransCheckpoints + returnVal.CacheDirtyPercent = float64(newStat.WiredTiger.Cache.TrackedDirtyBytes) / float64(newStat.WiredTiger.Cache.MaxBytesConfigured) + returnVal.CacheUsedPercent = float64(newStat.WiredTiger.Cache.CurrentCachedBytes) / float64(newStat.WiredTiger.Cache.MaxBytesConfigured) + } else if newStat.BackgroundFlushing != nil && oldStat.BackgroundFlushing != nil { + returnVal.Flushes = newStat.BackgroundFlushing.Flushes - oldStat.BackgroundFlushing.Flushes + } + + returnVal.Time = newStat.SampleTime + returnVal.IsMongos = + (newStat.ShardCursorType != nil || strings.HasPrefix(newStat.Process, MongosProcess)) + + // BEGIN code modification + if oldStat.Mem.Supported.(bool) { + // END code modification + if !returnVal.IsMongos { + returnVal.Mapped = newStat.Mem.Mapped + } + returnVal.Virtual = newStat.Mem.Virtual + returnVal.Resident = newStat.Mem.Resident + + if !returnVal.IsMongos && all { + returnVal.NonMapped = newStat.Mem.Virtual - newStat.Mem.Mapped + } + } + + if newStat.Repl != nil { + setName, isReplSet := newStat.Repl.SetName.(string) + if isReplSet { + returnVal.ReplSetName = setName + } + // BEGIN code modification + if newStat.Repl.IsMaster.(bool) { + returnVal.NodeType = "PRI" + } else if newStat.Repl.Secondary.(bool) { + returnVal.NodeType = "SEC" + } else { + returnVal.NodeType = "UNK" + } + // END code modification + } else if returnVal.IsMongos { + returnVal.NodeType = "RTR" + } + + if oldStat.ExtraInfo != nil && newStat.ExtraInfo != nil && + oldStat.ExtraInfo.PageFaults != nil && newStat.ExtraInfo.PageFaults != nil { + returnVal.Faults = diff(*(newStat.ExtraInfo.PageFaults), *(oldStat.ExtraInfo.PageFaults), sampleSecs) + } + if !returnVal.IsMongos && oldStat.Locks != nil { + globalCheck, hasGlobal := oldStat.Locks["Global"] + if hasGlobal && globalCheck.AcquireCount != nil { + // This appears to be a 3.0+ server so the data in these fields do *not* refer to + // actual namespaces and thus we can't compute lock %. + returnVal.HighestLocked = nil + } else { + prevLocks := parseLocks(oldStat) + curLocks := parseLocks(newStat) + lockdiffs := computeLockDiffs(prevLocks, curLocks) + if len(lockdiffs) == 0 { + if newStat.GlobalLock != nil { + returnVal.HighestLocked = &LockStatus{ + DBName: "", + Percentage: percentageInt64(newStat.GlobalLock.LockTime, newStat.GlobalLock.TotalTime), + Global: true, + } + } + } else { + // Get the entry with the highest lock + highestLocked := lockdiffs[len(lockdiffs)-1] + + var timeDiffMillis int64 + timeDiffMillis = newStat.UptimeMillis - oldStat.UptimeMillis + + lockToReport := highestLocked.Writes + + // if the highest locked namespace is not '.' + if highestLocked.Namespace != "." { + for _, namespaceLockInfo := range lockdiffs { + if namespaceLockInfo.Namespace == "." { + lockToReport += namespaceLockInfo.Writes + } + } + } + + // lock data is in microseconds and uptime is in milliseconds - so + // divide by 1000 so that they units match + lockToReport /= 1000 + + returnVal.HighestLocked = &LockStatus{ + DBName: highestLocked.Namespace, + Percentage: percentageInt64(lockToReport, timeDiffMillis), + Global: false, + } + } + } + } else { + returnVal.HighestLocked = nil + } + + if newStat.GlobalLock != nil { + hasWT := (newStat.WiredTiger != nil && oldStat.WiredTiger != nil) + //If we have wiredtiger stats, use those instead + if newStat.GlobalLock.CurrentQueue != nil { + if hasWT { + returnVal.QueuedReaders = newStat.GlobalLock.CurrentQueue.Readers + newStat.GlobalLock.ActiveClients.Readers - newStat.WiredTiger.Concurrent.Read.Out + returnVal.QueuedWriters = newStat.GlobalLock.CurrentQueue.Writers + newStat.GlobalLock.ActiveClients.Writers - newStat.WiredTiger.Concurrent.Write.Out + if returnVal.QueuedReaders < 0 { + returnVal.QueuedReaders = 0 + } + if returnVal.QueuedWriters < 0 { + returnVal.QueuedWriters = 0 + } + } else { + returnVal.QueuedReaders = newStat.GlobalLock.CurrentQueue.Readers + returnVal.QueuedWriters = newStat.GlobalLock.CurrentQueue.Writers + } + } + + if hasWT { + returnVal.ActiveReaders = newStat.WiredTiger.Concurrent.Read.Out + returnVal.ActiveWriters = newStat.WiredTiger.Concurrent.Write.Out + } else if newStat.GlobalLock.ActiveClients != nil { + returnVal.ActiveReaders = newStat.GlobalLock.ActiveClients.Readers + returnVal.ActiveWriters = newStat.GlobalLock.ActiveClients.Writers + } + } + + if oldStat.Network != nil && newStat.Network != nil { + returnVal.NetIn = diff(newStat.Network.BytesIn, oldStat.Network.BytesIn, sampleSecs) + returnVal.NetOut = diff(newStat.Network.BytesOut, oldStat.Network.BytesOut, sampleSecs) + } + + if newStat.Connections != nil { + returnVal.NumConnections = newStat.Connections.Current + } + + return returnVal +} diff --git a/plugins/mysql/mysql.go b/plugins/mysql/mysql.go index 15b1af8c9..a55006a4d 100644 --- a/plugins/mysql/mysql.go +++ b/plugins/mysql/mysql.go @@ -71,6 +71,10 @@ var mappings = []*mapping{ onServer: "Innodb_", inExport: "innodb_", }, + { + onServer: "Tokudb_", + inExport: "tokudb_", + }, { onServer: "Threads_", inExport: "threads_", @@ -91,7 +95,7 @@ func (m *Mysql) gatherServer(serv string, acc plugins.Accumulator) error { rows, err := db.Query(`SHOW /*!50002 GLOBAL */ STATUS`) if err != nil { - return nil + return err } for rows.Next() { diff --git a/plugins/mysql/mysql_test.go b/plugins/mysql/mysql_test.go index 33643861a..b4c29146e 100644 --- a/plugins/mysql/mysql_test.go +++ b/plugins/mysql/mysql_test.go @@ -1,6 +1,7 @@ package mysql import ( + "fmt" "strings" "testing" @@ -10,8 +11,12 @@ import ( ) func TestMysqlGeneratesMetrics(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + m := &Mysql{ - Servers: []string{""}, + Servers: []string{fmt.Sprintf("root@tcp(%s:3306)/", testutil.GetLocalHost())}, } var acc testutil.Accumulator @@ -39,7 +44,7 @@ func TestMysqlGeneratesMetrics(t *testing.T) { var count int for _, p := range acc.Points { - if strings.HasPrefix(p.Name, prefix.prefix) { + if strings.HasPrefix(p.Measurement, prefix.prefix) { count++ } } @@ -53,7 +58,13 @@ func TestMysqlGeneratesMetrics(t *testing.T) { } func TestMysqlDefaultsToLocal(t *testing.T) { - m := &Mysql{} + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + m := &Mysql{ + Servers: []string{fmt.Sprintf("root@tcp(%s:3306)/", testutil.GetLocalHost())}, + } var acc testutil.Accumulator diff --git a/plugins/nginx/nginx.go b/plugins/nginx/nginx.go new file mode 100644 index 000000000..75b17232b --- /dev/null +++ b/plugins/nginx/nginx.go @@ -0,0 +1,145 @@ +package nginx + +import ( + "bufio" + "fmt" + "net" + "net/http" + "net/url" + "strconv" + "strings" + "sync" + "time" + + "github.com/influxdb/telegraf/plugins" +) + +type Nginx struct { + Urls []string +} + +var sampleConfig = ` +# An array of Nginx stub_status URI to gather stats. +urls = ["localhost/status"]` + +func (n *Nginx) SampleConfig() string { + return sampleConfig +} + +func (n *Nginx) Description() string { + return "Read Nginx's basic status information (ngx_http_stub_status_module)" +} + +func (n *Nginx) Gather(acc plugins.Accumulator) error { + var wg sync.WaitGroup + var outerr error + + for _, u := range n.Urls { + addr, err := url.Parse(u) + if err != nil { + return fmt.Errorf("Unable to parse address '%s': %s", u, err) + } + + wg.Add(1) + go func(addr *url.URL) { + defer wg.Done() + outerr = n.gatherUrl(addr, acc) + }(addr) + } + + wg.Wait() + + return outerr +} + +var tr = &http.Transport{ + ResponseHeaderTimeout: time.Duration(3 * time.Second), +} + +var client = &http.Client{Transport: tr} + +func (n *Nginx) gatherUrl(addr *url.URL, acc plugins.Accumulator) error { + resp, err := client.Get(addr.String()) + if err != nil { + return fmt.Errorf("error making HTTP request to %s: %s", addr.String(), err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("%s returned HTTP status %s", addr.String(), resp.Status) + } + r := bufio.NewReader(resp.Body) + + // Active connections + _, err = r.ReadString(':') + if err != nil { + return err + } + line, err := r.ReadString('\n') + if err != nil { + return err + } + active, err := strconv.ParseUint(strings.TrimSpace(line), 10, 64) + if err != nil { + return err + } + + // Server accepts handled requests + _, err = r.ReadString('\n') + if err != nil { + return err + } + line, err = r.ReadString('\n') + if err != nil { + return err + } + data := strings.SplitN(strings.TrimSpace(line), " ", 3) + accepts, err := strconv.ParseUint(data[0], 10, 64) + if err != nil { + return err + } + handled, err := strconv.ParseUint(data[1], 10, 64) + if err != nil { + return err + } + requests, err := strconv.ParseUint(data[2], 10, 64) + if err != nil { + return err + } + + // Reading/Writing/Waiting + line, err = r.ReadString('\n') + if err != nil { + return err + } + data = strings.SplitN(strings.TrimSpace(line), " ", 6) + reading, err := strconv.ParseUint(data[1], 10, 64) + if err != nil { + return err + } + writing, err := strconv.ParseUint(data[3], 10, 64) + if err != nil { + return err + } + waiting, err := strconv.ParseUint(data[5], 10, 64) + if err != nil { + return err + } + + host, _, _ := net.SplitHostPort(addr.Host) + tags := map[string]string{"server": host} + acc.Add("active", active, tags) + acc.Add("accepts", accepts, tags) + acc.Add("handled", handled, tags) + acc.Add("requests", requests, tags) + acc.Add("reading", reading, tags) + acc.Add("writing", writing, tags) + acc.Add("waiting", waiting, tags) + + return nil +} + +func init() { + plugins.Add("nginx", func() plugins.Plugin { + return &Nginx{} + }) +} diff --git a/plugins/nginx/nginx_test.go b/plugins/nginx/nginx_test.go new file mode 100644 index 000000000..6184f9b44 --- /dev/null +++ b/plugins/nginx/nginx_test.go @@ -0,0 +1,68 @@ +package nginx + +import ( + "fmt" + "net" + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const sampleResponse = ` +Active connections: 585 +server accepts handled requests + 85340 85340 35085 +Reading: 4 Writing: 135 Waiting: 446 +` + +func TestNginxGeneratesMetrics(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var rsp string + + if r.URL.Path == "/stub_status" { + rsp = sampleResponse + } else { + panic("Cannot handle request") + } + + fmt.Fprintln(w, rsp) + })) + defer ts.Close() + + n := &Nginx{ + Urls: []string{fmt.Sprintf("%s/stub_status", ts.URL)}, + } + + var acc testutil.Accumulator + + err := n.Gather(&acc) + require.NoError(t, err) + + metrics := []struct { + name string + value uint64 + }{ + {"active", 585}, + {"accepts", 85340}, + {"handled", 85340}, + {"requests", 35085}, + {"reading", 4}, + {"writing", 135}, + {"waiting", 446}, + } + addr, err := url.Parse(ts.URL) + if err != nil { + panic(err) + } + host, _, _ := net.SplitHostPort(addr.Host) + tags := map[string]string{"server": host} + + for _, m := range metrics { + assert.NoError(t, acc.ValidateTaggedValue(m.name, m.value, tags)) + } +} diff --git a/plugins/postgresql/postgresql_test.go b/plugins/postgresql/postgresql_test.go index b11200e9f..363d289f9 100644 --- a/plugins/postgresql/postgresql_test.go +++ b/plugins/postgresql/postgresql_test.go @@ -1,6 +1,7 @@ package postgresql import ( + "fmt" "testing" "github.com/influxdb/telegraf/testutil" @@ -9,10 +10,15 @@ import ( ) func TestPostgresqlGeneratesMetrics(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + p := &Postgresql{ Servers: []*Server{ { - Address: "sslmode=disable", + Address: fmt.Sprintf("host=%s user=postgres sslmode=disable", + testutil.GetLocalHost()), Databases: []string{"postgres"}, }, }, @@ -54,10 +60,15 @@ func TestPostgresqlGeneratesMetrics(t *testing.T) { } func TestPostgresqlTagsMetricsWithDatabaseName(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + p := &Postgresql{ Servers: []*Server{ { - Address: "sslmode=disable", + Address: fmt.Sprintf("host=%s user=postgres sslmode=disable", + testutil.GetLocalHost()), Databases: []string{"postgres"}, }, }, @@ -75,10 +86,15 @@ func TestPostgresqlTagsMetricsWithDatabaseName(t *testing.T) { } func TestPostgresqlDefaultsToAllDatabases(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + p := &Postgresql{ Servers: []*Server{ { - Address: "sslmode=disable", + Address: fmt.Sprintf("host=%s user=postgres sslmode=disable", + testutil.GetLocalHost()), }, }, } @@ -91,7 +107,7 @@ func TestPostgresqlDefaultsToAllDatabases(t *testing.T) { var found bool for _, pnt := range acc.Points { - if pnt.Name == "xact_commit" { + if pnt.Measurement == "xact_commit" { if pnt.Tags["db"] == "postgres" { found = true break diff --git a/plugins/prometheus/prometheus.go b/plugins/prometheus/prometheus.go new file mode 100644 index 000000000..4029e9932 --- /dev/null +++ b/plugins/prometheus/prometheus.go @@ -0,0 +1,105 @@ +package prometheus + +import ( + "errors" + "fmt" + "net/http" + "sync" + "time" + + "github.com/influxdb/telegraf/plugins" + "github.com/prometheus/client_golang/extraction" + "github.com/prometheus/client_golang/model" +) + +type Prometheus struct { + Urls []string +} + +var sampleConfig = ` +# An array of urls to scrape metrics from. +urls = ["http://localhost:9100/metrics"]` + +func (r *Prometheus) SampleConfig() string { + return sampleConfig +} + +func (r *Prometheus) Description() string { + return "Read metrics from one or many prometheus clients" +} + +var ErrProtocolError = errors.New("prometheus protocol error") + +// Reads stats from all configured servers accumulates stats. +// Returns one of the errors encountered while gather stats (if any). +func (g *Prometheus) Gather(acc plugins.Accumulator) error { + var wg sync.WaitGroup + + var outerr error + + for _, serv := range g.Urls { + wg.Add(1) + go func(serv string) { + defer wg.Done() + outerr = g.gatherURL(serv, acc) + }(serv) + } + + wg.Wait() + + return outerr +} + +func (g *Prometheus) gatherURL(url string, acc plugins.Accumulator) error { + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("error making HTTP request to %s: %s", url, err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("%s returned HTTP status %s", url, resp.Status) + } + processor, err := extraction.ProcessorForRequestHeader(resp.Header) + if err != nil { + return fmt.Errorf("error getting extractor for %s: %s", url, err) + } + + ingestor := &Ingester{ + acc: acc, + } + + options := &extraction.ProcessOptions{ + Timestamp: model.TimestampFromTime(time.Now()), + } + + err = processor.ProcessSingle(resp.Body, ingestor, options) + if err != nil { + return fmt.Errorf("error getting processing samples for %s: %s", url, err) + } + return nil +} + +type Ingester struct { + acc plugins.Accumulator +} + +// Ingest implements an extraction.Ingester. +func (i *Ingester) Ingest(samples model.Samples) error { + for _, sample := range samples { + tags := map[string]string{} + for key, value := range sample.Metric { + if key == model.MetricNameLabel { + continue + } + tags[string(key)] = string(value) + } + i.acc.Add(string(sample.Metric[model.MetricNameLabel]), float64(sample.Value), tags) + } + return nil +} + +func init() { + plugins.Add("prometheus", func() plugins.Plugin { + return &Prometheus{} + }) +} diff --git a/plugins/prometheus/prometheus_test.go b/plugins/prometheus/prometheus_test.go new file mode 100644 index 000000000..4f79822c1 --- /dev/null +++ b/plugins/prometheus/prometheus_test.go @@ -0,0 +1,55 @@ +package prometheus + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const sampleTextFormat = `# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 0.00010425500000000001 +go_gc_duration_seconds{quantile="0.25"} 0.000139108 +go_gc_duration_seconds{quantile="0.5"} 0.00015749400000000002 +go_gc_duration_seconds{quantile="0.75"} 0.000331463 +go_gc_duration_seconds{quantile="1"} 0.000667154 +go_gc_duration_seconds_sum 0.0018183950000000002 +go_gc_duration_seconds_count 7 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 15 +` + +func TestPrometheusGeneratesMetrics(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintln(w, sampleTextFormat) + })) + defer ts.Close() + + p := &Prometheus{ + Urls: []string{ts.URL}, + } + + var acc testutil.Accumulator + + err := p.Gather(&acc) + require.NoError(t, err) + + expected := []struct { + name string + value float64 + tags map[string]string + }{ + {"go_gc_duration_seconds_count", 7, map[string]string{}}, + {"go_goroutines", 15, map[string]string{}}, + } + + for _, e := range expected { + assert.NoError(t, acc.ValidateValue(e.name, e.value)) + } +} diff --git a/plugins/rabbitmq/rabbitmq.go b/plugins/rabbitmq/rabbitmq.go new file mode 100644 index 000000000..cd5ec6dc2 --- /dev/null +++ b/plugins/rabbitmq/rabbitmq.go @@ -0,0 +1,218 @@ +package rabbitmq + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/influxdb/telegraf/plugins" +) + +const DefaultUsername = "guest" +const DefaultPassword = "guest" +const DefaultURL = "http://localhost:15672" + +type Server struct { + URL string + Username string + Password string + Nodes []string +} + +type RabbitMQ struct { + Servers []*Server + + Client *http.Client +} + +type OverviewResponse struct { + MessageStats *MessageStats `json:"message_stats"` + ObjectTotals *ObjectTotals `json:"object_totals"` + QueueTotals *QueueTotals `json:"queue_totals"` +} + +type MessageStats struct { + Ack int64 + Deliver int64 + Publish int64 +} + +type ObjectTotals struct { + Channels int64 + Connections int64 + Consumers int64 + Exchanges int64 + Queues int64 +} + +type QueueTotals struct { + Messages int64 + MessagesReady int64 `json:"messages_ready"` + MessagesUnacknowledged int64 `json:"messages_unacknowledged"` +} + +type Node struct { + Name string + + DiskFree int64 `json:"disk_free"` + DiskFreeLimit int64 `json:"disk_free_limit"` + FdTotal int64 `json:"fd_total"` + FdUsed int64 `json:"fd_used"` + MemLimit int64 `json:"mem_limit"` + MemUsed int64 `json:"mem_used"` + ProcTotal int64 `json:"proc_total"` + ProcUsed int64 `json:"proc_used"` + RunQueue int64 `json:"run_queue"` + SocketsTotal int64 `json:"sockets_total"` + SocketsUsed int64 `json:"sockets_used"` +} + +var sampleConfig = ` +# Specify servers via an array of tables +[[rabbitmq.servers]] +# url = "http://localhost:15672" +# username = "guest" +# password = "guest" + +# A list of nodes to pull metrics about. If not specified, metrics for +# all nodes are gathered. +# nodes = ["rabbit@node1", "rabbit@node2"] +` + +func (r *RabbitMQ) SampleConfig() string { + return sampleConfig +} + +func (r *RabbitMQ) Description() string { + return "Read metrics from one or many RabbitMQ servers via the management API" +} + +var localhost = &Server{URL: DefaultURL} + +func (r *RabbitMQ) Gather(acc plugins.Accumulator) error { + if r.Client == nil { + r.Client = &http.Client{} + } + + if len(r.Servers) == 0 { + r.gatherServer(localhost, acc) + return nil + } + + for _, serv := range r.Servers { + err := r.gatherServer(serv, acc) + if err != nil { + return err + } + } + + return nil +} + +func (r *RabbitMQ) gatherServer(serv *Server, acc plugins.Accumulator) error { + overview := &OverviewResponse{} + + err := r.requestJSON(serv, "/api/overview", &overview) + if err != nil { + return err + } + + tags := map[string]string{} + + acc.Add("messages", overview.QueueTotals.Messages, tags) + acc.Add("messages_ready", overview.QueueTotals.MessagesReady, tags) + acc.Add("messages_unacked", overview.QueueTotals.MessagesUnacknowledged, tags) + + acc.Add("channels", overview.ObjectTotals.Channels, tags) + acc.Add("connections", overview.ObjectTotals.Connections, tags) + acc.Add("consumers", overview.ObjectTotals.Consumers, tags) + acc.Add("exchanges", overview.ObjectTotals.Exchanges, tags) + acc.Add("queues", overview.ObjectTotals.Queues, tags) + + if overview.MessageStats != nil { + acc.Add("messages_acked", overview.MessageStats.Ack, tags) + acc.Add("messages_delivered", overview.MessageStats.Deliver, tags) + acc.Add("messages_published", overview.MessageStats.Publish, tags) + } + + nodes := make([]Node, 0) + + err = r.requestJSON(serv, "/api/nodes", &nodes) + if err != nil { + return err + } + + for _, node := range nodes { + if !shouldGatherNode(node, serv) { + continue + } + + tags = map[string]string{"node": node.Name} + + acc.Add("disk_free", node.DiskFree, tags) + acc.Add("disk_free_limit", node.DiskFreeLimit, tags) + acc.Add("fd_total", node.FdTotal, tags) + acc.Add("fd_used", node.FdUsed, tags) + acc.Add("mem_limit", node.MemLimit, tags) + acc.Add("mem_used", node.MemUsed, tags) + acc.Add("proc_total", node.ProcTotal, tags) + acc.Add("proc_used", node.ProcUsed, tags) + acc.Add("run_queue", node.RunQueue, tags) + acc.Add("sockets_total", node.SocketsTotal, tags) + acc.Add("sockets_used", node.SocketsUsed, tags) + } + + return nil +} + +func shouldGatherNode(node Node, serv *Server) bool { + if len(serv.Nodes) == 0 { + return true + } + + for _, name := range serv.Nodes { + if name == node.Name { + return true + } + } + + return false +} + +func (r *RabbitMQ) requestJSON(serv *Server, u string, target interface{}) error { + u = fmt.Sprintf("%s%s", serv.URL, u) + + req, err := http.NewRequest("GET", u, nil) + if err != nil { + return err + } + + username := serv.Username + if username == "" { + username = DefaultUsername + } + + password := serv.Password + if password == "" { + password = DefaultPassword + } + + req.SetBasicAuth(username, password) + + resp, err := r.Client.Do(req) + if err != nil { + return err + } + + defer resp.Body.Close() + + json.NewDecoder(resp.Body).Decode(target) + + return nil +} + +func init() { + plugins.Add("rabbitmq", func() plugins.Plugin { + return &RabbitMQ{} + }) +} diff --git a/plugins/rabbitmq/rabbitmq_test.go b/plugins/rabbitmq/rabbitmq_test.go new file mode 100644 index 000000000..689eb71cf --- /dev/null +++ b/plugins/rabbitmq/rabbitmq_test.go @@ -0,0 +1,202 @@ +package rabbitmq + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const sampleOverviewResponse = ` +{ + "message_stats": { + "ack": 5246, + "ack_details": { + "rate": 0.0 + }, + "deliver": 5246, + "deliver_details": { + "rate": 0.0 + }, + "deliver_get": 5246, + "deliver_get_details": { + "rate": 0.0 + }, + "publish": 5258, + "publish_details": { + "rate": 0.0 + } + }, + "object_totals": { + "channels": 44, + "connections": 44, + "consumers": 65, + "exchanges": 43, + "queues": 62 + }, + "queue_totals": { + "messages": 0, + "messages_details": { + "rate": 0.0 + }, + "messages_ready": 0, + "messages_ready_details": { + "rate": 0.0 + }, + "messages_unacknowledged": 0, + "messages_unacknowledged_details": { + "rate": 0.0 + } + } +} +` + +const sampleNodesResponse = ` +[ + { + "db_dir": "/var/lib/rabbitmq/mnesia/rabbit@vagrant-ubuntu-trusty-64", + "disk_free": 37768282112, + "disk_free_alarm": false, + "disk_free_details": { + "rate": 0.0 + }, + "disk_free_limit": 50000000, + "enabled_plugins": [ + "rabbitmq_management" + ], + "fd_total": 1024, + "fd_used": 63, + "fd_used_details": { + "rate": 0.0 + }, + "io_read_avg_time": 0, + "io_read_avg_time_details": { + "rate": 0.0 + }, + "io_read_bytes": 1, + "io_read_bytes_details": { + "rate": 0.0 + }, + "io_read_count": 1, + "io_read_count_details": { + "rate": 0.0 + }, + "io_sync_avg_time": 0, + "io_sync_avg_time_details": { + "rate": 0.0 + }, + "io_write_avg_time": 0, + "io_write_avg_time_details": { + "rate": 0.0 + }, + "log_file": "/var/log/rabbitmq/rabbit@vagrant-ubuntu-trusty-64.log", + "mem_alarm": false, + "mem_limit": 2503771750, + "mem_used": 159707080, + "mem_used_details": { + "rate": 15185.6 + }, + "mnesia_disk_tx_count": 16, + "mnesia_disk_tx_count_details": { + "rate": 0.0 + }, + "mnesia_ram_tx_count": 296, + "mnesia_ram_tx_count_details": { + "rate": 0.0 + }, + "name": "rabbit@vagrant-ubuntu-trusty-64", + "net_ticktime": 60, + "os_pid": "14244", + "partitions": [], + "proc_total": 1048576, + "proc_used": 783, + "proc_used_details": { + "rate": 0.0 + }, + "processors": 1, + "rates_mode": "basic", + "run_queue": 0, + "running": true, + "sasl_log_file": "/var/log/rabbitmq/rabbit@vagrant-ubuntu-trusty-64-sasl.log", + "sockets_total": 829, + "sockets_used": 45, + "sockets_used_details": { + "rate": 0.0 + }, + "type": "disc", + "uptime": 7464827 + } +] +` + +func TestRabbitMQGeneratesMetrics(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var rsp string + + if r.URL.Path == "/api/overview" { + rsp = sampleOverviewResponse + } else if r.URL.Path == "/api/nodes" { + rsp = sampleNodesResponse + } else { + panic("Cannot handle request") + } + + fmt.Fprintln(w, rsp) + })) + defer ts.Close() + + r := &RabbitMQ{ + Servers: []*Server{ + { + URL: ts.URL, + }, + }, + } + + var acc testutil.Accumulator + + err := r.Gather(&acc) + require.NoError(t, err) + + intMetrics := []string{ + "messages", + "messages_ready", + "messages_unacked", + + "messages_acked", + "messages_delivered", + "messages_published", + + "channels", + "connections", + "consumers", + "exchanges", + "queues", + } + + for _, metric := range intMetrics { + assert.True(t, acc.HasIntValue(metric)) + } + + nodeIntMetrics := []string{ + "disk_free", + "disk_free_limit", + "fd_total", + "fd_used", + "mem_limit", + "mem_used", + "proc_total", + "proc_used", + "run_queue", + "sockets_total", + "sockets_used", + } + + for _, metric := range nodeIntMetrics { + assert.True(t, acc.HasIntValue(metric)) + } +} diff --git a/plugins/redis/redis.go b/plugins/redis/redis.go index 831d74dbe..d2f3dd374 100644 --- a/plugins/redis/redis.go +++ b/plugins/redis/redis.go @@ -126,7 +126,7 @@ func (g *Redis) gatherServer(addr *url.URL, acc plugins.Accumulator) error { if addr.User != nil { pwd, set := addr.User.Password() if set && pwd != "" { - c.Write([]byte(fmt.Sprintf("AUTH %s\n", pwd))) + c.Write([]byte(fmt.Sprintf("AUTH %s\r\n", pwd))) r := bufio.NewReader(c) @@ -143,7 +143,7 @@ func (g *Redis) gatherServer(addr *url.URL, acc plugins.Accumulator) error { g.c = c } - g.c.Write([]byte("info\n")) + g.c.Write([]byte("info\r\n")) r := bufio.NewReader(g.c) @@ -188,7 +188,12 @@ func (g *Redis) gatherServer(addr *url.URL, acc plugins.Accumulator) error { continue } - tags := map[string]string{"host": addr.String()} + _, rPort, err := net.SplitHostPort(addr.Host) + if err != nil { + rPort = defaultPort + } + tags := map[string]string{"host": addr.String(), "port": rPort} + val := strings.TrimSpace(parts[1]) ival, err := strconv.ParseUint(val, 10, 64) diff --git a/plugins/redis/redis_test.go b/plugins/redis/redis_test.go index 317fde783..adf38ef75 100644 --- a/plugins/redis/redis_test.go +++ b/plugins/redis/redis_test.go @@ -12,6 +12,10 @@ import ( ) func TestRedisGeneratesMetrics(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + l, err := net.Listen("tcp", ":0") require.NoError(t, err) @@ -31,7 +35,7 @@ func TestRedisGeneratesMetrics(t *testing.T) { return } - if line != "info\n" { + if line != "info\r\n" { return } @@ -83,7 +87,7 @@ func TestRedisGeneratesMetrics(t *testing.T) { } for _, c := range checkInt { - assert.NoError(t, acc.ValidateValue(c.name, c.value)) + assert.True(t, acc.CheckValue(c.name, c.value)) } checkFloat := []struct { @@ -98,11 +102,15 @@ func TestRedisGeneratesMetrics(t *testing.T) { } for _, c := range checkFloat { - assert.NoError(t, acc.ValidateValue(c.name, c.value)) + assert.True(t, acc.CheckValue(c.name, c.value)) } } func TestRedisCanPullStatsFromMultipleServers(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + l, err := net.Listen("tcp", ":0") require.NoError(t, err) @@ -122,7 +130,7 @@ func TestRedisCanPullStatsFromMultipleServers(t *testing.T) { return } - if line != "info\n" { + if line != "info\r\n" { return } @@ -174,7 +182,7 @@ func TestRedisCanPullStatsFromMultipleServers(t *testing.T) { } for _, c := range checkInt { - assert.NoError(t, acc.ValidateValue(c.name, c.value)) + assert.True(t, acc.CheckValue(c.name, c.value)) } checkFloat := []struct { @@ -189,7 +197,7 @@ func TestRedisCanPullStatsFromMultipleServers(t *testing.T) { } for _, c := range checkFloat { - assert.NoError(t, acc.ValidateValue(c.name, c.value)) + assert.True(t, acc.CheckValue(c.name, c.value)) } } diff --git a/plugins/rethinkdb/rethinkdb.go b/plugins/rethinkdb/rethinkdb.go new file mode 100644 index 000000000..1c46a1f49 --- /dev/null +++ b/plugins/rethinkdb/rethinkdb.go @@ -0,0 +1,92 @@ +package rethinkdb + +import ( + "fmt" + "net/url" + "sync" + + "github.com/influxdb/telegraf/plugins" + + "gopkg.in/dancannon/gorethink.v1" +) + +type RethinkDB struct { + Servers []string +} + +var sampleConfig = ` +# An array of URI to gather stats about. Specify an ip or hostname +# with optional port add password. ie rethinkdb://user:auth_key@10.10.3.30:28105, +# rethinkdb://10.10.3.33:18832, 10.0.0.1:10000, etc. +# +# If no servers are specified, then 127.0.0.1 is used as the host and 28015 as the port. +servers = ["127.0.0.1:28015"]` + +func (r *RethinkDB) SampleConfig() string { + return sampleConfig +} + +func (r *RethinkDB) Description() string { + return "Read metrics from one or many RethinkDB servers" +} + +var localhost = &Server{Url: &url.URL{Host: "127.0.0.1:28015"}} + +// Reads stats from all configured servers accumulates stats. +// Returns one of the errors encountered while gather stats (if any). +func (r *RethinkDB) Gather(acc plugins.Accumulator) error { + if len(r.Servers) == 0 { + r.gatherServer(localhost, acc) + return nil + } + + var wg sync.WaitGroup + + var outerr error + + for _, serv := range r.Servers { + u, err := url.Parse(serv) + if err != nil { + return fmt.Errorf("Unable to parse to address '%s': %s", serv, err) + } else if u.Scheme == "" { + // fallback to simple string based address (i.e. "10.0.0.1:10000") + u.Host = serv + } + wg.Add(1) + go func(serv string) { + defer wg.Done() + outerr = r.gatherServer(&Server{Url: u}, acc) + }(serv) + } + + wg.Wait() + + return outerr +} + +func (r *RethinkDB) gatherServer(server *Server, acc plugins.Accumulator) error { + var err error + connectOpts := gorethink.ConnectOpts{ + Address: server.Url.Host, + DiscoverHosts: false, + } + if server.Url.User != nil { + pwd, set := server.Url.User.Password() + if set && pwd != "" { + connectOpts.AuthKey = pwd + } + } + server.session, err = gorethink.Connect(connectOpts) + if err != nil { + return fmt.Errorf("Unable to connect to RethinkDB, %s\n", err.Error()) + } + defer server.session.Close() + + return server.gatherData(acc) +} + +func init() { + plugins.Add("rethinkdb", func() plugins.Plugin { + return &RethinkDB{} + }) +} diff --git a/plugins/rethinkdb/rethinkdb_data.go b/plugins/rethinkdb/rethinkdb_data.go new file mode 100644 index 000000000..5fae28931 --- /dev/null +++ b/plugins/rethinkdb/rethinkdb_data.go @@ -0,0 +1,110 @@ +package rethinkdb + +import ( + "reflect" + "time" + + "github.com/influxdb/telegraf/plugins" +) + +type serverStatus struct { + Id string `gorethink:"id"` + Network struct { + Addresses []Address `gorethink:"canonical_addresses"` + Hostname string `gorethink:"hostname"` + DriverPort int `gorethink:"reql_port"` + } `gorethink:"network"` + Process struct { + Version string `gorethink:"version"` + RunningSince time.Time `gorethink:"time_started"` + } `gorethink:"process"` +} + +type Address struct { + Host string `gorethink:"host"` + Port int `gorethink:"port"` +} + +type stats struct { + Engine Engine `gorethink:"query_engine"` +} + +type Engine struct { + ClientConns int64 `gorethink:"client_connections,omitempty"` + ClientActive int64 `gorethink:"clients_active,omitempty"` + QueriesPerSec int64 `gorethink:"queries_per_sec,omitempty"` + TotalQueries int64 `gorethink:"queries_total,omitempty"` + ReadsPerSec int64 `gorethink:"read_docs_per_sec,omitempty"` + TotalReads int64 `gorethink:"read_docs_total,omitempty"` + WritesPerSec int64 `gorethink:"written_docs_per_sec,omitempty"` + TotalWrites int64 `gorethink:"written_docs_total,omitempty"` +} + +type tableStatus struct { + Id string `gorethink:"id"` + DB string `gorethink:"db"` + Name string `gorethink:"name"` +} + +type tableStats struct { + Engine Engine `gorethink:"query_engine"` + Storage Storage `gorethink:"storage_engine"` +} + +type Storage struct { + Cache Cache `gorethink:"cache"` + Disk Disk `gorethink:"disk"` +} + +type Cache struct { + BytesInUse int64 `gorethink:"in_use_bytes"` +} + +type Disk struct { + ReadBytesPerSec int64 `gorethink:"read_bytes_per_sec"` + ReadBytesTotal int64 `gorethink:"read_bytes_total"` + WriteBytesPerSec int64 `gorethik:"written_bytes_per_sec"` + WriteBytesTotal int64 `gorethink:"written_bytes_total"` + SpaceUsage SpaceUsage `gorethink:"space_usage"` +} + +type SpaceUsage struct { + Data int64 `gorethink:"data_bytes"` + Garbage int64 `gorethink:"garbage_bytes"` + Metadata int64 `gorethink:"metadata_bytes"` + Prealloc int64 `gorethink:"preallocated_bytes"` +} + +var engineStats = map[string]string{ + "active_clients": "ClientActive", + "clients": "ClientConns", + "queries_per_sec": "QueriesPerSec", + "total_queries": "TotalQueries", + "read_docs_per_sec": "ReadsPerSec", + "total_reads": "TotalReads", + "written_docs_per_sec": "WritesPerSec", + "total_writes": "TotalWrites", +} + +func (e *Engine) AddEngineStats(keys []string, acc plugins.Accumulator, tags map[string]string) { + engine := reflect.ValueOf(e).Elem() + for _, key := range keys { + acc.Add( + key, + engine.FieldByName(engineStats[key]).Interface(), + tags, + ) + } +} + +func (s *Storage) AddStats(acc plugins.Accumulator, tags map[string]string) { + acc.Add("cache_bytes_in_use", s.Cache.BytesInUse, tags) + acc.Add("disk_read_bytes_per_sec", s.Disk.ReadBytesPerSec, tags) + acc.Add("disk_read_bytes_total", s.Disk.ReadBytesTotal, tags) + acc.Add("disk_written_bytes_per_sec", s.Disk.WriteBytesPerSec, tags) + acc.Add("disk_written_bytes_total", s.Disk.WriteBytesTotal, tags) + acc.Add("disk_usage_data_bytes", s.Disk.SpaceUsage.Data, tags) + acc.Add("disk_usage_garbage_bytes", s.Disk.SpaceUsage.Garbage, tags) + acc.Add("disk_usage_metadata_bytes", s.Disk.SpaceUsage.Metadata, tags) + acc.Add("disk_usage_preallocated_bytes", s.Disk.SpaceUsage.Prealloc, tags) +} diff --git a/plugins/rethinkdb/rethinkdb_data_test.go b/plugins/rethinkdb/rethinkdb_data_test.go new file mode 100644 index 000000000..4c76b2340 --- /dev/null +++ b/plugins/rethinkdb/rethinkdb_data_test.go @@ -0,0 +1,112 @@ +package rethinkdb + +import ( + "testing" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +var tags = make(map[string]string) + +func TestAddEngineStats(t *testing.T) { + engine := &Engine{ + ClientConns: 0, + ClientActive: 0, + QueriesPerSec: 0, + TotalQueries: 0, + ReadsPerSec: 0, + TotalReads: 0, + WritesPerSec: 0, + TotalWrites: 0, + } + + var acc testutil.Accumulator + + keys := []string{ + "active_clients", + "clients", + "queries_per_sec", + "total_queries", + "read_docs_per_sec", + "total_reads", + "written_docs_per_sec", + "total_writes", + } + engine.AddEngineStats(keys, &acc, tags) + + for _, metric := range keys { + assert.True(t, acc.HasIntValue(metric)) + } +} + +func TestAddEngineStatsPartial(t *testing.T) { + engine := &Engine{ + ClientConns: 0, + ClientActive: 0, + QueriesPerSec: 0, + ReadsPerSec: 0, + WritesPerSec: 0, + } + + var acc testutil.Accumulator + + keys := []string{ + "active_clients", + "clients", + "queries_per_sec", + "read_docs_per_sec", + "written_docs_per_sec", + } + + missing_keys := []string{ + "total_queries", + "total_reads", + "total_writes", + } + engine.AddEngineStats(keys, &acc, tags) + + for _, metric := range missing_keys { + assert.False(t, acc.HasIntValue(metric)) + } +} + +func TestAddStorageStats(t *testing.T) { + storage := &Storage{ + Cache: Cache{ + BytesInUse: 0, + }, + Disk: Disk{ + ReadBytesPerSec: 0, + ReadBytesTotal: 0, + WriteBytesPerSec: 0, + WriteBytesTotal: 0, + SpaceUsage: SpaceUsage{ + Data: 0, + Garbage: 0, + Metadata: 0, + Prealloc: 0, + }, + }, + } + + var acc testutil.Accumulator + + keys := []string{ + "cache_bytes_in_use", + "disk_read_bytes_per_sec", + "disk_read_bytes_total", + "disk_written_bytes_per_sec", + "disk_written_bytes_total", + "disk_usage_data_bytes", + "disk_usage_garbage_bytes", + "disk_usage_metadata_bytes", + "disk_usage_preallocated_bytes", + } + + storage.AddStats(&acc, tags) + + for _, metric := range keys { + assert.True(t, acc.HasIntValue(metric)) + } +} diff --git a/plugins/rethinkdb/rethinkdb_server.go b/plugins/rethinkdb/rethinkdb_server.go new file mode 100644 index 000000000..9285068bd --- /dev/null +++ b/plugins/rethinkdb/rethinkdb_server.go @@ -0,0 +1,193 @@ +package rethinkdb + +import ( + "errors" + "fmt" + "net" + "net/url" + "regexp" + "strconv" + "strings" + + "github.com/influxdb/telegraf/plugins" + + "gopkg.in/dancannon/gorethink.v1" +) + +type Server struct { + Url *url.URL + session *gorethink.Session + serverStatus serverStatus +} + +func (s *Server) gatherData(acc plugins.Accumulator) error { + if err := s.getServerStatus(); err != nil { + return fmt.Errorf("Failed to get server_status, %s\n", err) + } + + if err := s.validateVersion(); err != nil { + return fmt.Errorf("Failed version validation, %s\n", err.Error()) + } + + if err := s.addClusterStats(acc); err != nil { + fmt.Printf("error adding cluster stats, %s\n", err.Error()) + return fmt.Errorf("Error adding cluster stats, %s\n", err.Error()) + } + + if err := s.addMemberStats(acc); err != nil { + return fmt.Errorf("Error adding member stats, %s\n", err.Error()) + } + + if err := s.addTableStats(acc); err != nil { + return fmt.Errorf("Error adding table stats, %s\n", err.Error()) + } + + return nil +} + +func (s *Server) validateVersion() error { + if s.serverStatus.Process.Version == "" { + return errors.New("could not determine the RethinkDB server version: process.version key missing") + } + + versionRegexp := regexp.MustCompile("\\d.\\d.\\d") + versionString := versionRegexp.FindString(s.serverStatus.Process.Version) + if versionString == "" { + return fmt.Errorf("could not determine the RethinkDB server version: malformed version string (%v)", s.serverStatus.Process.Version) + } + + majorVersion, err := strconv.Atoi(strings.Split(versionString, "")[0]) + if err != nil || majorVersion < 2 { + return fmt.Errorf("unsupported major version %s\n", versionString) + } + return nil +} + +func (s *Server) getServerStatus() error { + cursor, err := gorethink.DB("rethinkdb").Table("server_status").Run(s.session) + if err != nil { + return err + } + + if cursor.IsNil() { + return errors.New("could not determine the RethinkDB server version: no rows returned from the server_status table") + } + defer cursor.Close() + var serverStatuses []serverStatus + err = cursor.All(&serverStatuses) + if err != nil { + return errors.New("could not parse server_status results") + } + host, port, err := net.SplitHostPort(s.Url.Host) + if err != nil { + return fmt.Errorf("unable to determine provided hostname from %s\n", s.Url.Host) + } + driverPort, _ := strconv.Atoi(port) + for _, ss := range serverStatuses { + for _, address := range ss.Network.Addresses { + if address.Host == host && ss.Network.DriverPort == driverPort { + s.serverStatus = ss + return nil + } + } + } + + return fmt.Errorf("unable to determine host id from server_status with %s", s.Url.Host) +} + +func (s *Server) getDefaultTags() map[string]string { + tags := make(map[string]string) + tags["host"] = s.Url.Host + tags["hostname"] = s.serverStatus.Network.Hostname + return tags +} + +var ClusterTracking = []string{ + "active_clients", + "clients", + "queries_per_sec", + "read_docs_per_sec", + "written_docs_per_sec", +} + +func (s *Server) addClusterStats(acc plugins.Accumulator) error { + cursor, err := gorethink.DB("rethinkdb").Table("stats").Get([]string{"cluster"}).Run(s.session) + if err != nil { + return fmt.Errorf("cluster stats query error, %s\n", err.Error()) + } + defer cursor.Close() + var clusterStats stats + if err := cursor.One(&clusterStats); err != nil { + return fmt.Errorf("failure to parse cluster stats, %s\n", err.Error()) + } + + tags := s.getDefaultTags() + tags["type"] = "cluster" + clusterStats.Engine.AddEngineStats(ClusterTracking, acc, tags) + return nil +} + +var MemberTracking = []string{ + "active_clients", + "clients", + "queries_per_sec", + "total_queries", + "read_docs_per_sec", + "total_reads", + "written_docs_per_sec", + "total_writes", +} + +func (s *Server) addMemberStats(acc plugins.Accumulator) error { + cursor, err := gorethink.DB("rethinkdb").Table("stats").Get([]string{"server", s.serverStatus.Id}).Run(s.session) + if err != nil { + return fmt.Errorf("member stats query error, %s\n", err.Error()) + } + defer cursor.Close() + var memberStats stats + if err := cursor.One(&memberStats); err != nil { + return fmt.Errorf("failure to parse member stats, %s\n", err.Error()) + } + + tags := s.getDefaultTags() + tags["type"] = "member" + memberStats.Engine.AddEngineStats(MemberTracking, acc, tags) + return nil +} + +var TableTracking = []string{ + "read_docs_per_sec", + "total_reads", + "written_docs_per_sec", + "total_writes", +} + +func (s *Server) addTableStats(acc plugins.Accumulator) error { + tablesCursor, err := gorethink.DB("rethinkdb").Table("table_status").Run(s.session) + defer tablesCursor.Close() + var tables []tableStatus + err = tablesCursor.All(&tables) + if err != nil { + return errors.New("could not parse table_status results") + } + for _, table := range tables { + cursor, err := gorethink.DB("rethinkdb").Table("stats"). + Get([]string{"table_server", table.Id, s.serverStatus.Id}). + Run(s.session) + if err != nil { + return fmt.Errorf("table stats query error, %s\n", err.Error()) + } + defer cursor.Close() + var ts tableStats + if err := cursor.One(&ts); err != nil { + return fmt.Errorf("failure to parse table stats, %s\n", err.Error()) + } + + tags := s.getDefaultTags() + tags["type"] = "data" + tags["ns"] = fmt.Sprintf("%s.%s", table.DB, table.Name) + ts.Engine.AddEngineStats(TableTracking, acc, tags) + ts.Storage.AddStats(acc, tags) + } + return nil +} diff --git a/plugins/rethinkdb/rethinkdb_server_test.go b/plugins/rethinkdb/rethinkdb_server_test.go new file mode 100644 index 000000000..21ab0dbbd --- /dev/null +++ b/plugins/rethinkdb/rethinkdb_server_test.go @@ -0,0 +1,81 @@ +// +build integration + +package rethinkdb + +import ( + "testing" + + "github.com/influxdb/telegraf/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestValidateVersion(t *testing.T) { + err := server.validateVersion() + require.NoError(t, err) +} + +func TestGetDefaultTags(t *testing.T) { + var tagTests = []struct { + in string + out string + }{ + {"host", server.Url.Host}, + {"hostname", server.serverStatus.Network.Hostname}, + } + defaultTags := server.getDefaultTags() + for _, tt := range tagTests { + if defaultTags[tt.in] != tt.out { + t.Errorf("expected %q, got %q", tt.out, defaultTags[tt.in]) + } + } +} + +func TestAddClusterStats(t *testing.T) { + var acc testutil.Accumulator + + err := server.addClusterStats(&acc) + require.NoError(t, err) + + for _, metric := range ClusterTracking { + assert.True(t, acc.HasIntValue(metric)) + } +} + +func TestAddMemberStats(t *testing.T) { + var acc testutil.Accumulator + + err := server.addMemberStats(&acc) + require.NoError(t, err) + + for _, metric := range MemberTracking { + assert.True(t, acc.HasIntValue(metric)) + } +} + +func TestAddTableStats(t *testing.T) { + var acc testutil.Accumulator + + err := server.addTableStats(&acc) + require.NoError(t, err) + + for _, metric := range TableTracking { + assert.True(t, acc.HasIntValue(metric)) + } + + keys := []string{ + "cache_bytes_in_use", + "disk_read_bytes_per_sec", + "disk_read_bytes_total", + "disk_written_bytes_per_sec", + "disk_written_bytes_total", + "disk_usage_data_bytes", + "disk_usage_garbage_bytes", + "disk_usage_metadata_bytes", + "disk_usage_preallocated_bytes", + } + + for _, metric := range keys { + assert.True(t, acc.HasIntValue(metric)) + } +} diff --git a/plugins/rethinkdb/rethinkdb_test.go b/plugins/rethinkdb/rethinkdb_test.go new file mode 100644 index 000000000..85c747f42 --- /dev/null +++ b/plugins/rethinkdb/rethinkdb_test.go @@ -0,0 +1,59 @@ +// +build integration + +package rethinkdb + +import ( + "log" + "math/rand" + "net/url" + "os" + "testing" + "time" + + "gopkg.in/dancannon/gorethink.v1" +) + +var connect_url, authKey string +var server *Server + +func init() { + connect_url = os.Getenv("RETHINKDB_URL") + if connect_url == "" { + connect_url = "127.0.0.1:28015" + } + authKey = os.Getenv("RETHINKDB_AUTHKEY") + +} + +func testSetup(m *testing.M) { + var err error + server = &Server{Url: &url.URL{Host: connect_url}} + server.session, _ = gorethink.Connect(gorethink.ConnectOpts{ + Address: server.Url.Host, + AuthKey: authKey, + DiscoverHosts: false, + }) + if err != nil { + log.Fatalln(err.Error()) + } + + err = server.getServerStatus() + if err != nil { + log.Fatalln(err.Error()) + } +} + +func testTeardown(m *testing.M) { + server.session.Close() +} + +func TestMain(m *testing.M) { + // seed randomness for use with tests + rand.Seed(time.Now().UTC().UnixNano()) + + testSetup(m) + res := m.Run() + testTeardown(m) + + os.Exit(res) +} diff --git a/plugins/system/disk.go b/plugins/system/disk.go index cd5475962..1c5bdaef6 100644 --- a/plugins/system/disk.go +++ b/plugins/system/disk.go @@ -55,9 +55,12 @@ func (s *DiskIOStats) Gather(acc plugins.Accumulator) error { } for _, io := range diskio { - tags := map[string]string{ - "name": io.Name, - "serial": io.SerialNumber, + tags := map[string]string{} + if len(io.Name) != 0 { + tags["name"] = io.Name + } + if len(io.SerialNumber) != 0 { + tags["serial"] = io.SerialNumber } acc.Add("reads", io.ReadCount, tags) diff --git a/plugins/system/ps/common/common_freebsd.go b/plugins/system/ps/common/common_freebsd.go index 3c1124655..8ccd40e90 100644 --- a/plugins/system/ps/common/common_freebsd.go +++ b/plugins/system/ps/common/common_freebsd.go @@ -3,9 +3,9 @@ package common import ( - "syscall" "os/exec" "strings" + "syscall" "unsafe" ) @@ -58,4 +58,3 @@ func CallSyscall(mib []int32) ([]byte, uint64, error) { return buf, length, nil } - diff --git a/plugins/system/ps/cpu/cpu_test.go b/plugins/system/ps/cpu/cpu_test.go index b1cffb543..f75dbf101 100644 --- a/plugins/system/ps/cpu/cpu_test.go +++ b/plugins/system/ps/cpu/cpu_test.go @@ -2,6 +2,7 @@ package cpu import ( "fmt" + "os" "runtime" "testing" "time" @@ -94,5 +95,10 @@ func TestCPUPercent(t *testing.T) { } func TestCPUPercentPerCpu(t *testing.T) { + // Skip Per-CPU tests when running from a Circle CI container, + // see: https://github.com/golang/go/issues/11609 + if os.Getenv("CIRCLE_BUILD_NUM") != "" { + t.Skip("Detected that we are in a circleci container, skipping Per-CPU tests") + } testCPUPercent(t, true) } diff --git a/plugins/system/ps/disk/disk_test.go b/plugins/system/ps/disk/disk_test.go index 04776b1d8..6a91bae8c 100644 --- a/plugins/system/ps/disk/disk_test.go +++ b/plugins/system/ps/disk/disk_test.go @@ -45,7 +45,7 @@ func TestDisk_io_counters(t *testing.T) { t.Errorf("error %v", err) } if len(ret) == 0 { - t.Errorf("ret is empty", ret) + t.Errorf("ret is empty: %s", ret) } empty := DiskIOCountersStat{} for part, io := range ret { diff --git a/plugins/system/ps/host/host_linux_386.go b/plugins/system/ps/host/host_linux_386.go index d8f31c2f6..fb6d7a0f6 100644 --- a/plugins/system/ps/host/host_linux_386.go +++ b/plugins/system/ps/host/host_linux_386.go @@ -6,39 +6,39 @@ package host const ( - sizeofPtr = 0x4 - sizeofShort = 0x2 - sizeofInt = 0x4 - sizeofLong = 0x4 - sizeofLongLong = 0x8 + sizeofPtr = 0x4 + sizeofShort = 0x2 + sizeofInt = 0x4 + sizeofLong = 0x4 + sizeofLongLong = 0x8 ) type ( - _C_short int16 - _C_int int32 - _C_long int32 - _C_long_long int64 + _C_short int16 + _C_int int32 + _C_long int32 + _C_long_long int64 ) type utmp struct { - Type int16 - Pad_cgo_0 [2]byte - Pid int32 - Line [32]int8 - Id [4]int8 - User [32]int8 - Host [256]int8 - Exit exit_status - Session int32 - Tv UtTv - Addr_v6 [4]int32 - X__unused [20]int8 + Type int16 + Pad_cgo_0 [2]byte + Pid int32 + Line [32]int8 + Id [4]int8 + User [32]int8 + Host [256]int8 + Exit exit_status + Session int32 + Tv UtTv + Addr_v6 [4]int32 + X__unused [20]int8 } type exit_status struct { - Termination int16 - Exit int16 + Termination int16 + Exit int16 } type UtTv struct { - TvSec int32 - TvUsec int32 + TvSec int32 + TvUsec int32 } diff --git a/plugins/system/system_test.go b/plugins/system/system_test.go index b8e0e169c..7a3d13570 100644 --- a/plugins/system/system_test.go +++ b/plugins/system/system_test.go @@ -272,7 +272,9 @@ func TestSystemStats_GenerateStats(t *testing.T) { require.NoError(t, err) dockertags := map[string]string{ - "id": "blah", + "name": "blah", + "id": "", + "command": "", } assert.True(t, acc.CheckTaggedValue("user", 3.1, dockertags)) diff --git a/scripts/init.sh b/scripts/init.sh index 3e5b239b0..b9339e407 100755 --- a/scripts/init.sh +++ b/scripts/init.sh @@ -42,7 +42,7 @@ if [ ! -f "$STDOUT" ]; then fi if [ -z "$STDERR" ]; then - STDERR=/var/log/influxdb/telegraf.log + STDERR=/var/log/telegraf/telegraf.log fi if [ ! -f "$STDERR" ]; then mkdir -p `dirname $STDERR` @@ -92,10 +92,10 @@ function log_success_msg() { name=telegraf # Daemon name, where is the actual executable -daemon=/opt/influxdb/telegraf +daemon=/opt/telegraf/telegraf # pid file for the daemon -pidfile=/var/run/influxdb/telegraf.pid +pidfile=/var/run/telegraf/telegraf.pid piddir=`dirname $pidfile` if [ ! -d "$piddir" ]; then @@ -104,7 +104,7 @@ if [ ! -d "$piddir" ]; then fi # Configuration file -config=/etc/opt/influxdb/telegraf.conf +config=/etc/opt/telegraf/telegraf.conf # If the daemon is not there, then exit. [ -x $daemon ] || exit 5 diff --git a/scripts/telegraf.service b/scripts/telegraf.service new file mode 100644 index 000000000..a5a764fef --- /dev/null +++ b/scripts/telegraf.service @@ -0,0 +1,13 @@ +[Unit] +Description=The plugin-driven server agent for reporting metrics into InfluxDB +Documentation=https://github.com/influxdb/telegraf +After=network.target + +[Service] +EnvironmentFile=-/etc/default/telegraf +User=telegraf +ExecStart=/opt/telegraf/telegraf -config /etc/opt/telegraf/telegraf.conf $TELEGRAF_OPTS +Restart=on-failure + +[Install] +WantedBy=multi-user.target diff --git a/testutil/accumulator.go b/testutil/accumulator.go index 645366fd5..db3a67e66 100644 --- a/testutil/accumulator.go +++ b/testutil/accumulator.go @@ -2,32 +2,39 @@ package testutil import ( "fmt" + "reflect" "time" ) +// Point defines a single point measurement type Point struct { Measurement string - Value interface{} Tags map[string]string Values map[string]interface{} Time time.Time } +// Accumulator defines a mocked out accumulator type Accumulator struct { Points []*Point } +// Add adds a measurement point to the accumulator func (a *Accumulator) Add(measurement string, value interface{}, tags map[string]string) { + if tags == nil { + tags = map[string]string{} + } a.Points = append( a.Points, &Point{ Measurement: measurement, - Value: value, + Values: map[string]interface{}{"value": value}, Tags: tags, }, ) } +// AddValuesWithTime adds a measurement point with a specified timestamp. func (a *Accumulator) AddValuesWithTime( measurement string, values map[string]interface{}, @@ -45,6 +52,7 @@ func (a *Accumulator) AddValuesWithTime( ) } +// Get gets the specified measurement point from the accumulator func (a *Accumulator) Get(measurement string) (*Point, bool) { for _, p := range a.Points { if p.Measurement == measurement { @@ -55,55 +63,64 @@ func (a *Accumulator) Get(measurement string) (*Point, bool) { return nil, false } +// CheckValue checks that the accumulators point for the given measurement +// is the same as the given value. func (a *Accumulator) CheckValue(measurement string, val interface{}) bool { for _, p := range a.Points { if p.Measurement == measurement { - return p.Value == val + return p.Values["value"] == val } } return false } -func (a *Accumulator) CheckTaggedValue(measurement string, val interface{}, tags map[string]string) bool { +// CheckTaggedValue calls ValidateTaggedValue +func (a *Accumulator) CheckTaggedValue( + measurement string, + val interface{}, + tags map[string]string, +) bool { return a.ValidateTaggedValue(measurement, val, tags) == nil } -func (a *Accumulator) ValidateTaggedValue(measurement string, val interface{}, tags map[string]string) error { +// ValidateTaggedValue validates that the given measurement and value exist +// in the accumulator and with the given tags. +func (a *Accumulator) ValidateTaggedValue( + measurement string, + val interface{}, + tags map[string]string, +) error { + if tags == nil { + tags = map[string]string{} + } for _, p := range a.Points { - var found bool - - if p.Tags == nil && tags == nil { - found = true - } else { - for k, v := range p.Tags { - if tags[k] == v { - found = true - break - } - } + if !reflect.DeepEqual(tags, p.Tags) { + continue } - if found && p.Measurement == measurement { - if p.Value != val { - return fmt.Errorf("%v (%T) != %v (%T)", p.Value, p.Value, val, val) + if p.Measurement == measurement { + if p.Values["value"] != val { + return fmt.Errorf("%v (%T) != %v (%T)", + p.Values["value"], p.Values["value"], val, val) } - return nil } } - return fmt.Errorf("unknown value %s with tags %v", measurement, tags) + return fmt.Errorf("unknown measurement %s with tags %v", measurement, tags) } +// ValidateValue calls ValidateTaggedValue func (a *Accumulator) ValidateValue(measurement string, val interface{}) error { return a.ValidateTaggedValue(measurement, val, nil) } +// HasIntValue returns true if the measurement has an Int value func (a *Accumulator) HasIntValue(measurement string) bool { for _, p := range a.Points { if p.Measurement == measurement { - _, ok := p.Value.(int64) + _, ok := p.Values["value"].(int64) return ok } } @@ -111,10 +128,23 @@ func (a *Accumulator) HasIntValue(measurement string) bool { return false } +// HasUIntValue returns true if the measurement has a UInt value +func (a *Accumulator) HasUIntValue(measurement string) bool { + for _, p := range a.Points { + if p.Measurement == measurement { + _, ok := p.Values["value"].(uint64) + return ok + } + } + + return false +} + +// HasFloatValue returns true if the given measurement has a float value func (a *Accumulator) HasFloatValue(measurement string) bool { for _, p := range a.Points { if p.Measurement == measurement { - _, ok := p.Value.(float64) + _, ok := p.Values["value"].(float64) return ok } } diff --git a/testutil/testutil.go b/testutil/testutil.go new file mode 100644 index 000000000..91eb4b6b9 --- /dev/null +++ b/testutil/testutil.go @@ -0,0 +1,29 @@ +package testutil + +import ( + "net" + "net/url" + "os" +) + +var localhost = "localhost" + +// GetLocalHost returns the DOCKER_HOST environment variable, parsing +// out any scheme or ports so that only the IP address is returned. +func GetLocalHost() string { + if dockerHostVar := os.Getenv("DOCKER_HOST"); dockerHostVar != "" { + u, err := url.Parse(dockerHostVar) + if err != nil { + return dockerHostVar + } + + // split out the ip addr from the port + host, _, err := net.SplitHostPort(u.Host) + if err != nil { + return dockerHostVar + } + + return host + } + return localhost +} diff --git a/testutil/testutil_test.go b/testutil/testutil_test.go new file mode 100644 index 000000000..52a807514 --- /dev/null +++ b/testutil/testutil_test.go @@ -0,0 +1,34 @@ +package testutil + +import ( + "os" + "testing" +) + +func TestDockerHost(t *testing.T) { + + os.Unsetenv("DOCKER_HOST") + + host := GetLocalHost() + + if host != localhost { + t.Fatalf("Host should be localhost when DOCKER_HOST is not set. Current value [%s]", host) + } + + os.Setenv("DOCKER_HOST", "1.1.1.1") + + host = GetLocalHost() + + if host != "1.1.1.1" { + t.Fatalf("Host should take DOCKER_HOST value when set. Current value is [%s] and DOCKER_HOST is [%s]", host, os.Getenv("DOCKER_HOST")) + } + + os.Setenv("DOCKER_HOST", "tcp://1.1.1.1:8080") + + host = GetLocalHost() + + if host != "1.1.1.1" { + t.Fatalf("Host should take DOCKER_HOST value when set. Current value is [%s] and DOCKER_HOST is [%s]", host, os.Getenv("DOCKER_HOST")) + } + +}