2015-05-22 23:45:14 +00:00
|
|
|
package telegraf
|
2015-04-01 16:34:32 +00:00
|
|
|
|
|
|
|
import (
|
2015-08-26 23:43:09 +00:00
|
|
|
"errors"
|
2015-05-18 21:10:12 +00:00
|
|
|
"fmt"
|
2015-04-01 16:34:32 +00:00
|
|
|
"log"
|
2015-04-07 16:56:40 +00:00
|
|
|
"os"
|
2015-04-01 16:34:32 +00:00
|
|
|
"sort"
|
2015-05-20 05:19:32 +00:00
|
|
|
"sync"
|
2015-04-07 16:23:35 +00:00
|
|
|
"time"
|
2015-04-01 16:34:32 +00:00
|
|
|
|
2015-08-07 20:31:25 +00:00
|
|
|
"github.com/influxdb/telegraf/outputs"
|
2015-05-22 23:45:14 +00:00
|
|
|
"github.com/influxdb/telegraf/plugins"
|
2015-10-16 22:13:32 +00:00
|
|
|
|
|
|
|
"github.com/influxdb/influxdb/client/v2"
|
2015-04-01 16:34:32 +00:00
|
|
|
)
|
|
|
|
|
2015-08-07 20:31:25 +00:00
|
|
|
type runningOutput struct {
|
|
|
|
name string
|
|
|
|
output outputs.Output
|
|
|
|
}
|
|
|
|
|
2015-05-18 19:15:15 +00:00
|
|
|
type runningPlugin struct {
|
|
|
|
name string
|
|
|
|
plugin plugins.Plugin
|
2015-05-20 05:19:32 +00:00
|
|
|
config *ConfiguredPlugin
|
2015-05-18 19:15:15 +00:00
|
|
|
}
|
|
|
|
|
2015-08-04 14:58:32 +00:00
|
|
|
// Agent runs telegraf and collects data based on the given config
|
2015-04-01 16:34:32 +00:00
|
|
|
type Agent struct {
|
2015-08-04 14:58:32 +00:00
|
|
|
|
|
|
|
// Interval at which to gather information
|
2015-04-01 16:34:32 +00:00
|
|
|
Interval Duration
|
2015-08-04 14:58:32 +00:00
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
// Interval at which to flush data
|
|
|
|
FlushInterval Duration
|
|
|
|
|
|
|
|
// TODO(cam): Remove UTC and Precision parameters, they are no longer
|
|
|
|
// valid for the agent config. Leaving them here for now for backwards-
|
|
|
|
// compatability
|
|
|
|
|
2015-09-02 16:30:44 +00:00
|
|
|
// Option for outputting data in UTC
|
|
|
|
UTC bool `toml:"utc"`
|
|
|
|
|
|
|
|
// Precision to write data at
|
|
|
|
// Valid values for Precision are n, u, ms, s, m, and h
|
|
|
|
Precision string
|
|
|
|
|
|
|
|
// Option for running in debug mode
|
2015-04-01 16:34:32 +00:00
|
|
|
Debug bool
|
2015-04-07 16:56:40 +00:00
|
|
|
Hostname string
|
2015-04-01 16:34:32 +00:00
|
|
|
|
|
|
|
Config *Config
|
|
|
|
|
2015-08-07 20:31:25 +00:00
|
|
|
outputs []*runningOutput
|
2015-05-18 19:15:15 +00:00
|
|
|
plugins []*runningPlugin
|
2015-04-01 16:34:32 +00:00
|
|
|
}
|
|
|
|
|
2015-08-04 14:58:32 +00:00
|
|
|
// NewAgent returns an Agent struct based off the given Config
|
2015-04-06 16:32:10 +00:00
|
|
|
func NewAgent(config *Config) (*Agent, error) {
|
2015-09-02 16:30:44 +00:00
|
|
|
agent := &Agent{
|
2015-10-16 22:13:32 +00:00
|
|
|
Config: config,
|
|
|
|
Interval: Duration{10 * time.Second},
|
|
|
|
FlushInterval: Duration{10 * time.Second},
|
|
|
|
UTC: true,
|
|
|
|
Precision: "s",
|
2015-09-02 16:30:44 +00:00
|
|
|
}
|
2015-04-01 16:34:32 +00:00
|
|
|
|
2015-09-02 16:30:44 +00:00
|
|
|
// Apply the toml table to the agent config, overriding defaults
|
2015-05-20 05:19:32 +00:00
|
|
|
err := config.ApplyAgent(agent)
|
2015-04-01 16:34:32 +00:00
|
|
|
if err != nil {
|
2015-04-06 16:32:10 +00:00
|
|
|
return nil, err
|
2015-04-01 16:34:32 +00:00
|
|
|
}
|
|
|
|
|
2015-04-07 16:56:40 +00:00
|
|
|
if agent.Hostname == "" {
|
|
|
|
hostname, err := os.Hostname()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
agent.Hostname = hostname
|
2015-05-22 23:33:38 +00:00
|
|
|
}
|
2015-04-07 16:56:40 +00:00
|
|
|
|
2015-05-22 23:33:38 +00:00
|
|
|
if config.Tags == nil {
|
|
|
|
config.Tags = map[string]string{}
|
2015-04-07 16:56:40 +00:00
|
|
|
}
|
|
|
|
|
2015-05-22 23:33:38 +00:00
|
|
|
config.Tags["host"] = agent.Hostname
|
|
|
|
|
2015-04-07 00:24:24 +00:00
|
|
|
return agent, nil
|
|
|
|
}
|
|
|
|
|
2015-08-12 17:04:25 +00:00
|
|
|
// Connect connects to all configured outputs
|
2015-08-04 14:58:32 +00:00
|
|
|
func (a *Agent) Connect() error {
|
2015-08-07 20:31:25 +00:00
|
|
|
for _, o := range a.outputs {
|
2015-09-09 21:56:10 +00:00
|
|
|
if a.Debug {
|
|
|
|
log.Printf("Attempting connection to output: %s\n", o.name)
|
|
|
|
}
|
2015-08-11 20:02:04 +00:00
|
|
|
err := o.output.Connect()
|
2015-08-07 20:31:25 +00:00
|
|
|
if err != nil {
|
2015-09-19 01:02:16 +00:00
|
|
|
log.Printf("Failed to connect to output %s, retrying in 15s\n", o.name)
|
|
|
|
time.Sleep(15 * time.Second)
|
|
|
|
err = o.output.Connect()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-08-07 20:31:25 +00:00
|
|
|
}
|
2015-08-26 17:02:10 +00:00
|
|
|
if a.Debug {
|
|
|
|
log.Printf("Successfully connected to output: %s\n", o.name)
|
|
|
|
}
|
2015-04-01 16:34:32 +00:00
|
|
|
}
|
2015-08-07 20:31:25 +00:00
|
|
|
return nil
|
|
|
|
}
|
2015-04-01 16:34:32 +00:00
|
|
|
|
2015-08-12 17:04:25 +00:00
|
|
|
// Close closes the connection to all configured outputs
|
|
|
|
func (a *Agent) Close() error {
|
|
|
|
var err error
|
|
|
|
for _, o := range a.outputs {
|
|
|
|
err = o.output.Close()
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-08-11 20:02:04 +00:00
|
|
|
// LoadOutputs loads the agent's outputs
|
2015-09-22 01:38:57 +00:00
|
|
|
func (a *Agent) LoadOutputs(filters []string) ([]string, error) {
|
2015-08-07 20:31:25 +00:00
|
|
|
var names []string
|
2015-04-01 16:34:32 +00:00
|
|
|
|
2015-08-07 20:31:25 +00:00
|
|
|
for _, name := range a.Config.OutputsDeclared() {
|
|
|
|
creator, ok := outputs.Outputs[name]
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("Undefined but requested output: %s", name)
|
|
|
|
}
|
|
|
|
|
2015-09-22 01:38:57 +00:00
|
|
|
if sliceContains(name, filters) || len(filters) == 0 {
|
2015-09-23 20:54:22 +00:00
|
|
|
if a.Debug {
|
|
|
|
log.Println("Output Enabled: ", name)
|
|
|
|
}
|
2015-09-22 01:38:57 +00:00
|
|
|
output := creator()
|
2015-08-07 20:31:25 +00:00
|
|
|
|
2015-09-22 01:38:57 +00:00
|
|
|
err := a.Config.ApplyOutput(name, output)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-08-07 20:31:25 +00:00
|
|
|
|
2015-09-22 01:38:57 +00:00
|
|
|
a.outputs = append(a.outputs, &runningOutput{name, output})
|
|
|
|
names = append(names, name)
|
|
|
|
}
|
2015-04-06 16:32:10 +00:00
|
|
|
}
|
|
|
|
|
2015-08-11 20:02:04 +00:00
|
|
|
sort.Strings(names)
|
2015-04-01 16:34:32 +00:00
|
|
|
|
2015-08-07 20:31:25 +00:00
|
|
|
return names, nil
|
2015-04-01 16:34:32 +00:00
|
|
|
}
|
|
|
|
|
2015-08-04 14:58:32 +00:00
|
|
|
// LoadPlugins loads the agent's plugins
|
2015-09-22 01:38:57 +00:00
|
|
|
func (a *Agent) LoadPlugins(filters []string) ([]string, error) {
|
2015-04-01 16:34:32 +00:00
|
|
|
var names []string
|
|
|
|
|
2015-05-18 21:10:12 +00:00
|
|
|
for _, name := range a.Config.PluginsDeclared() {
|
|
|
|
creator, ok := plugins.Plugins[name]
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("Undefined but requested plugin: %s", name)
|
|
|
|
}
|
2015-05-18 18:54:59 +00:00
|
|
|
|
2015-09-22 01:38:57 +00:00
|
|
|
if sliceContains(name, filters) || len(filters) == 0 {
|
2015-08-11 15:50:36 +00:00
|
|
|
plugin := creator()
|
2015-09-22 01:38:57 +00:00
|
|
|
|
2015-08-11 15:50:36 +00:00
|
|
|
config, err := a.Config.ApplyPlugin(name, plugin)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
a.plugins = append(a.plugins, &runningPlugin{name, plugin, config})
|
|
|
|
names = append(names, name)
|
|
|
|
}
|
2015-04-01 16:34:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
sort.Strings(names)
|
|
|
|
|
|
|
|
return names, nil
|
|
|
|
}
|
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
// gatherParallel runs the plugins that are using the same reporting interval
|
2015-08-26 17:02:10 +00:00
|
|
|
// as the telegraf agent.
|
2015-10-16 22:13:32 +00:00
|
|
|
func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
|
2015-05-20 05:19:32 +00:00
|
|
|
var wg sync.WaitGroup
|
|
|
|
|
2015-09-28 20:08:28 +00:00
|
|
|
start := time.Now()
|
|
|
|
counter := 0
|
2015-05-20 05:19:32 +00:00
|
|
|
for _, plugin := range a.plugins {
|
|
|
|
if plugin.config.Interval != 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Add(1)
|
2015-09-28 20:08:28 +00:00
|
|
|
counter++
|
2015-05-20 05:19:32 +00:00
|
|
|
go func(plugin *runningPlugin) {
|
|
|
|
defer wg.Done()
|
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
acc := NewAccumulator(plugin.config, pointChan)
|
|
|
|
acc.SetDebug(a.Debug)
|
|
|
|
acc.SetPrefix(plugin.name + "_")
|
|
|
|
acc.SetDefaultTags(a.Config.Tags)
|
2015-05-20 05:19:32 +00:00
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
if err := plugin.plugin.Gather(acc); err != nil {
|
2015-08-24 17:25:15 +00:00
|
|
|
log.Printf("Error in plugin [%s]: %s", plugin.name, err)
|
2015-08-22 20:03:30 +00:00
|
|
|
}
|
|
|
|
|
2015-05-20 05:19:32 +00:00
|
|
|
}(plugin)
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
|
2015-09-28 20:08:28 +00:00
|
|
|
elapsed := time.Since(start)
|
2015-10-16 22:13:32 +00:00
|
|
|
log.Printf("Default (%s) interval, gathered metrics from %d plugins in %s\n",
|
|
|
|
a.Interval, counter, elapsed)
|
|
|
|
return nil
|
2015-04-01 16:34:32 +00:00
|
|
|
}
|
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
// gatherSeparate runs the plugins that have been configured with their own
|
2015-08-26 17:02:10 +00:00
|
|
|
// reporting interval.
|
2015-10-16 22:13:32 +00:00
|
|
|
func (a *Agent) gatherSeparate(
|
|
|
|
shutdown chan struct{},
|
|
|
|
plugin *runningPlugin,
|
|
|
|
pointChan chan *client.Point,
|
|
|
|
) error {
|
2015-05-20 05:19:32 +00:00
|
|
|
ticker := time.NewTicker(plugin.config.Interval)
|
|
|
|
|
|
|
|
for {
|
2015-08-26 23:43:09 +00:00
|
|
|
var outerr error
|
2015-09-28 20:08:28 +00:00
|
|
|
start := time.Now()
|
2015-05-20 05:19:32 +00:00
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
acc := NewAccumulator(plugin.config, pointChan)
|
|
|
|
acc.SetDebug(a.Debug)
|
|
|
|
acc.SetPrefix(plugin.name + "_")
|
|
|
|
acc.SetDefaultTags(a.Config.Tags)
|
2015-08-26 23:43:09 +00:00
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
if err := plugin.plugin.Gather(acc); err != nil {
|
2015-08-26 23:43:09 +00:00
|
|
|
log.Printf("Error in plugin [%s]: %s", plugin.name, err)
|
2015-09-02 16:30:44 +00:00
|
|
|
}
|
2015-05-20 05:19:32 +00:00
|
|
|
|
2015-09-28 20:08:28 +00:00
|
|
|
elapsed := time.Since(start)
|
2015-10-16 22:13:32 +00:00
|
|
|
log.Printf("Separate (%s) interval, gathered metrics from %s in %s\n",
|
|
|
|
plugin.config.Interval, plugin.name, elapsed)
|
2015-08-26 23:43:09 +00:00
|
|
|
|
|
|
|
if outerr != nil {
|
|
|
|
return outerr
|
2015-08-07 20:31:25 +00:00
|
|
|
}
|
2015-05-20 05:19:32 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-shutdown:
|
|
|
|
return nil
|
|
|
|
case <-ticker.C:
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-04 14:58:32 +00:00
|
|
|
// Test verifies that we can 'Gather' from all plugins with their configured
|
|
|
|
// Config struct
|
2015-04-07 00:24:24 +00:00
|
|
|
func (a *Agent) Test() error {
|
2015-10-16 22:13:32 +00:00
|
|
|
shutdown := make(chan struct{})
|
|
|
|
defer close(shutdown)
|
|
|
|
pointChan := make(chan *client.Point)
|
2015-04-07 00:24:24 +00:00
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
go a.flusher(shutdown, pointChan)
|
2015-04-07 00:24:24 +00:00
|
|
|
|
|
|
|
for _, plugin := range a.plugins {
|
2015-10-16 22:13:32 +00:00
|
|
|
acc := NewAccumulator(plugin.config, pointChan)
|
|
|
|
acc.SetDebug(true)
|
|
|
|
acc.SetPrefix(plugin.name + "_")
|
2015-05-20 05:19:32 +00:00
|
|
|
|
2015-09-23 20:54:22 +00:00
|
|
|
fmt.Printf("* Plugin: %s, Collection 1\n", plugin.name)
|
2015-05-20 05:19:32 +00:00
|
|
|
if plugin.config.Interval != 0 {
|
|
|
|
fmt.Printf("* Internal: %s\n", plugin.config.Interval)
|
|
|
|
}
|
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
if err := plugin.plugin.Gather(acc); err != nil {
|
2015-04-07 00:24:24 +00:00
|
|
|
return err
|
|
|
|
}
|
2015-09-21 17:05:58 +00:00
|
|
|
|
2015-09-23 20:54:22 +00:00
|
|
|
// Special instructions for some plugins. cpu, for example, needs to be
|
|
|
|
// run twice in order to return cpu usage percentages.
|
|
|
|
switch plugin.name {
|
|
|
|
case "cpu":
|
|
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
fmt.Printf("* Plugin: %s, Collection 2\n", plugin.name)
|
2015-10-16 22:13:32 +00:00
|
|
|
if err := plugin.plugin.Gather(acc); err != nil {
|
2015-09-23 20:54:22 +00:00
|
|
|
return err
|
|
|
|
}
|
2015-09-21 17:05:58 +00:00
|
|
|
}
|
2015-04-07 00:24:24 +00:00
|
|
|
|
2015-09-23 20:54:22 +00:00
|
|
|
}
|
2015-04-07 00:24:24 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
func (a *Agent) flush(points []*client.Point) error {
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
var outerr error
|
|
|
|
|
|
|
|
for _, o := range a.outputs {
|
|
|
|
wg.Add(1)
|
|
|
|
|
|
|
|
go func(ro *runningOutput) {
|
|
|
|
defer wg.Done()
|
|
|
|
// Log all output errors:
|
|
|
|
if err := ro.output.Write(points); err != nil {
|
|
|
|
log.Printf("Error in output [%s]: %s", ro.name, err)
|
|
|
|
outerr = errors.New("Error encountered flushing outputs")
|
|
|
|
}
|
|
|
|
}(o)
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
return outerr
|
|
|
|
}
|
|
|
|
|
|
|
|
// flusher monitors the points input channel and flushes on the minimum interval
|
|
|
|
func (a *Agent) flusher(shutdown chan struct{}, pointChan chan *client.Point) error {
|
|
|
|
ticker := time.NewTicker(a.FlushInterval.Duration)
|
|
|
|
points := make([]*client.Point, 0)
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-shutdown:
|
|
|
|
return nil
|
|
|
|
case <-ticker.C:
|
|
|
|
start := time.Now()
|
|
|
|
if err := a.flush(points); err != nil {
|
|
|
|
log.Printf(err.Error())
|
|
|
|
}
|
|
|
|
elapsed := time.Since(start)
|
|
|
|
log.Printf("Flushed %d metrics in %s\n", len(points), elapsed)
|
|
|
|
points = make([]*client.Point, 0)
|
|
|
|
case pt := <-pointChan:
|
|
|
|
points = append(points, pt)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-04 14:58:32 +00:00
|
|
|
// Run runs the agent daemon, gathering every Interval
|
2015-04-07 16:23:58 +00:00
|
|
|
func (a *Agent) Run(shutdown chan struct{}) error {
|
2015-05-20 05:19:32 +00:00
|
|
|
var wg sync.WaitGroup
|
|
|
|
|
2015-10-16 22:13:32 +00:00
|
|
|
// channel shared between all plugin threads for accumulating points
|
|
|
|
pointChan := make(chan *client.Point, 1000)
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer wg.Done()
|
|
|
|
if err := a.flusher(shutdown, pointChan); err != nil {
|
|
|
|
log.Printf("Flusher routine failed, exiting: %s\n", err.Error())
|
|
|
|
close(shutdown)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2015-05-20 05:19:32 +00:00
|
|
|
for _, plugin := range a.plugins {
|
2015-09-24 18:06:11 +00:00
|
|
|
|
|
|
|
// Start service of any ServicePlugins
|
|
|
|
switch p := plugin.plugin.(type) {
|
|
|
|
case plugins.ServicePlugin:
|
|
|
|
if err := p.Start(); err != nil {
|
|
|
|
log.Printf("Service for plugin %s failed to start, exiting\n%s\n",
|
|
|
|
plugin.name, err.Error())
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer p.Stop()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Special handling for plugins that have their own collection interval
|
2015-10-16 22:13:32 +00:00
|
|
|
// configured. Default intervals are handled below with gatherParallel
|
2015-05-20 05:19:32 +00:00
|
|
|
if plugin.config.Interval != 0 {
|
|
|
|
wg.Add(1)
|
|
|
|
go func(plugin *runningPlugin) {
|
|
|
|
defer wg.Done()
|
2015-10-16 22:13:32 +00:00
|
|
|
if err := a.gatherSeparate(shutdown, plugin, pointChan); err != nil {
|
2015-08-26 23:43:09 +00:00
|
|
|
log.Printf(err.Error())
|
2015-08-22 20:03:30 +00:00
|
|
|
}
|
2015-05-20 05:19:32 +00:00
|
|
|
}(plugin)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
defer wg.Wait()
|
|
|
|
|
2015-04-01 16:34:32 +00:00
|
|
|
ticker := time.NewTicker(a.Interval.Duration)
|
|
|
|
|
|
|
|
for {
|
2015-10-16 22:13:32 +00:00
|
|
|
if err := a.gatherParallel(pointChan); err != nil {
|
2015-08-26 23:43:09 +00:00
|
|
|
log.Printf(err.Error())
|
2015-04-01 16:34:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-shutdown:
|
2015-04-07 16:23:58 +00:00
|
|
|
return nil
|
2015-04-01 16:34:32 +00:00
|
|
|
case <-ticker.C:
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|