Add poller mode to Telegraf

This commit is contained in:
Thibault Cohen 2016-02-28 04:37:03 -05:00
parent bd3d0c330f
commit ec06b87554
10 changed files with 905 additions and 43 deletions

View File

@ -71,6 +71,7 @@ endif
docker run --name mqtt -p "1883:1883" -d ncarlier/mqtt docker run --name mqtt -p "1883:1883" -d ncarlier/mqtt
docker run --name riemann -p "5555:5555" -d blalor/riemann docker run --name riemann -p "5555:5555" -d blalor/riemann
docker run --name snmp -p "31161:31161/udp" -d titilambert/snmpsim docker run --name snmp -p "31161:31161/udp" -d titilambert/snmpsim
docker run --name poller -p "5673:5672" -d rabbitmq:3-management
# Run docker containers necessary for CircleCI unit tests # Run docker containers necessary for CircleCI unit tests
docker-run-circle: docker-run-circle:
@ -85,11 +86,12 @@ docker-run-circle:
docker run --name mqtt -p "1883:1883" -d ncarlier/mqtt docker run --name mqtt -p "1883:1883" -d ncarlier/mqtt
docker run --name riemann -p "5555:5555" -d blalor/riemann docker run --name riemann -p "5555:5555" -d blalor/riemann
docker run --name snmp -p "31161:31161/udp" -d titilambert/snmpsim docker run --name snmp -p "31161:31161/udp" -d titilambert/snmpsim
docker run --name poller -p "5673:5672" -d rabbitmq:3-management
# Kill all docker containers, ignore errors # Kill all docker containers, ignore errors
docker-kill: docker-kill:
-docker kill nsq aerospike redis opentsdb rabbitmq postgres memcached mysql kafka mqtt riemann snmp -docker kill nsq aerospike redis opentsdb rabbitmq postgres memcached mysql kafka mqtt riemann snmp poller
-docker rm nsq aerospike redis opentsdb rabbitmq postgres memcached mysql kafka mqtt riemann snmp -docker rm nsq aerospike redis opentsdb rabbitmq postgres memcached mysql kafka mqtt riemann snmp poller
# Run full unit tests using docker containers (includes setup and teardown) # Run full unit tests using docker containers (includes setup and teardown)
test: vet docker-kill docker-run test: vet docker-kill docker-run

View File

@ -165,7 +165,7 @@ func (ac *accumulator) SetDebug(debug bool) {
ac.debug = debug ac.debug = debug
} }
func (ac *accumulator) setDefaultTags(tags map[string]string) { func (ac *accumulator) SetDefaultTags(tags map[string]string) {
ac.defaultTags = tags ac.defaultTags = tags
} }

View File

@ -121,7 +121,7 @@ func (a *Agent) gatherParallel(metricC chan telegraf.Metric) error {
acc := NewAccumulator(input.Config, metricC) acc := NewAccumulator(input.Config, metricC)
acc.SetDebug(a.Config.Agent.Debug) acc.SetDebug(a.Config.Agent.Debug)
acc.setDefaultTags(a.Config.Tags) acc.SetDefaultTags(a.Config.Tags)
if jitter != 0 { if jitter != 0 {
nanoSleep := rand.Int63n(jitter) nanoSleep := rand.Int63n(jitter)
@ -172,7 +172,7 @@ func (a *Agent) gatherSeparate(
acc := NewAccumulator(input.Config, metricC) acc := NewAccumulator(input.Config, metricC)
acc.SetDebug(a.Config.Agent.Debug) acc.SetDebug(a.Config.Agent.Debug)
acc.setDefaultTags(a.Config.Tags) acc.SetDefaultTags(a.Config.Tags)
if err := input.Input.Gather(acc); err != nil { if err := input.Input.Gather(acc); err != nil {
log.Printf("Error in input [%s]: %s", input.Name, err) log.Printf("Error in input [%s]: %s", input.Name, err)
@ -287,9 +287,9 @@ func (a *Agent) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) er
} }
} }
// jitterInterval applies the the interval jitter to the flush interval using // JitterInterval applies the the interval jitter to the flush interval using
// crypto/rand number generator // crypto/rand number generator
func jitterInterval(ininterval, injitter time.Duration) time.Duration { func JitterInterval(ininterval, injitter time.Duration) time.Duration {
var jitter int64 var jitter int64
outinterval := ininterval outinterval := ininterval
if injitter.Nanoseconds() != 0 { if injitter.Nanoseconds() != 0 {
@ -312,7 +312,7 @@ func jitterInterval(ininterval, injitter time.Duration) time.Duration {
func (a *Agent) Run(shutdown chan struct{}) error { func (a *Agent) Run(shutdown chan struct{}) error {
var wg sync.WaitGroup var wg sync.WaitGroup
a.Config.Agent.FlushInterval.Duration = jitterInterval( a.Config.Agent.FlushInterval.Duration = JitterInterval(
a.Config.Agent.FlushInterval.Duration, a.Config.Agent.FlushInterval.Duration,
a.Config.Agent.FlushJitter.Duration) a.Config.Agent.FlushJitter.Duration)
@ -330,7 +330,7 @@ func (a *Agent) Run(shutdown chan struct{}) error {
case telegraf.ServiceInput: case telegraf.ServiceInput:
acc := NewAccumulator(input.Config, metricC) acc := NewAccumulator(input.Config, metricC)
acc.SetDebug(a.Config.Agent.Debug) acc.SetDebug(a.Config.Agent.Debug)
acc.setDefaultTags(a.Config.Tags) acc.SetDefaultTags(a.Config.Tags)
if err := p.Start(acc); err != nil { if err := p.Start(acc); err != nil {
log.Printf("Service for input %s failed to start, exiting\n%s\n", log.Printf("Service for input %s failed to start, exiting\n%s\n",
input.Name, err.Error()) input.Name, err.Error())

View File

@ -103,7 +103,7 @@ func TestAgent_LoadOutput(t *testing.T) {
} }
func TestAgent_ZeroJitter(t *testing.T) { func TestAgent_ZeroJitter(t *testing.T) {
flushinterval := jitterInterval(time.Duration(10*time.Second), flushinterval := JitterInterval(time.Duration(10*time.Second),
time.Duration(0*time.Second)) time.Duration(0*time.Second))
actual := flushinterval.Nanoseconds() actual := flushinterval.Nanoseconds()
@ -119,7 +119,7 @@ func TestAgent_ZeroInterval(t *testing.T) {
max := time.Duration(5 * time.Second).Nanoseconds() max := time.Duration(5 * time.Second).Nanoseconds()
for i := 0; i < 1000; i++ { for i := 0; i < 1000; i++ {
flushinterval := jitterInterval(time.Duration(0*time.Second), flushinterval := JitterInterval(time.Duration(0*time.Second),
time.Duration(5*time.Second)) time.Duration(5*time.Second))
actual := flushinterval.Nanoseconds() actual := flushinterval.Nanoseconds()
@ -135,7 +135,7 @@ func TestAgent_ZeroInterval(t *testing.T) {
} }
func TestAgent_ZeroBoth(t *testing.T) { func TestAgent_ZeroBoth(t *testing.T) {
flushinterval := jitterInterval(time.Duration(0*time.Second), flushinterval := JitterInterval(time.Duration(0*time.Second),
time.Duration(0*time.Second)) time.Duration(0*time.Second))
actual := flushinterval actual := flushinterval
@ -150,7 +150,7 @@ func TestAgent_JitterMax(t *testing.T) {
max := time.Duration(32 * time.Second).Nanoseconds() max := time.Duration(32 * time.Second).Nanoseconds()
for i := 0; i < 1000; i++ { for i := 0; i < 1000; i++ {
flushinterval := jitterInterval(time.Duration(30*time.Second), flushinterval := JitterInterval(time.Duration(30*time.Second),
time.Duration(2*time.Second)) time.Duration(2*time.Second))
actual := flushinterval.Nanoseconds() actual := flushinterval.Nanoseconds()
if actual > max { if actual > max {
@ -164,7 +164,7 @@ func TestAgent_JitterMin(t *testing.T) {
min := time.Duration(30 * time.Second).Nanoseconds() min := time.Duration(30 * time.Second).Nanoseconds()
for i := 0; i < 1000; i++ { for i := 0; i < 1000; i++ {
flushinterval := jitterInterval(time.Duration(30*time.Second), flushinterval := JitterInterval(time.Duration(30*time.Second),
time.Duration(2*time.Second)) time.Duration(2*time.Second))
actual := flushinterval.Nanoseconds() actual := flushinterval.Nanoseconds()
if actual < min { if actual < min {

View File

@ -15,6 +15,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/all" _ "github.com/influxdata/telegraf/plugins/inputs/all"
"github.com/influxdata/telegraf/plugins/outputs" "github.com/influxdata/telegraf/plugins/outputs"
_ "github.com/influxdata/telegraf/plugins/outputs/all" _ "github.com/influxdata/telegraf/plugins/outputs/all"
"github.com/influxdata/telegraf/poller"
) )
var fDebug = flag.Bool("debug", false, var fDebug = flag.Bool("debug", false,
@ -191,34 +192,63 @@ func main() {
if len(c.Outputs) == 0 { if len(c.Outputs) == 0 {
log.Fatalf("Error: no outputs found, did you provide a valid config file?") log.Fatalf("Error: no outputs found, did you provide a valid config file?")
} }
if len(c.Inputs) == 0 {
log.Fatalf("Error: no inputs found, did you provide a valid config file?") if len(c.Inputs) == 0 && c.Poller.AMQPUrl == "" {
log.Fatalf("Error: no inputs found and poller node disabled, did you provide a valid config file?")
} }
ag, err := agent.NewAgent(c) // TODO: Do we want agent and poller mode enabled in the same time ?
if err != nil { var ag *agent.Agent
log.Fatal(err) var po *poller.Poller
} if c.Poller.AMQPUrl == "" {
// Agent
if *fDebug { if c.Agent == nil {
ag.Config.Agent.Debug = true log.Fatal("err")
} }
ag, err = agent.NewAgent(c)
if *fQuiet {
ag.Config.Agent.Quiet = true
}
if *fTest {
err = ag.Test()
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
return
}
err = ag.Connect() if *fDebug {
if err != nil { ag.Config.Agent.Debug = true
log.Fatal(err) }
if *fQuiet {
ag.Config.Agent.Quiet = true
}
if *fTest {
err = ag.Test()
if err != nil {
log.Fatal(err)
}
return
}
err = ag.Connect()
if err != nil {
log.Fatal(err)
}
} else {
// Poller
po, err = poller.NewPoller(c)
if err != nil {
log.Fatal(err)
}
if *fDebug {
po.Config.Poller.Debug = true
}
if *fQuiet {
po.Config.Poller.Quiet = true
}
err = po.Connect()
if err != nil {
log.Fatal(err)
}
} }
shutdown := make(chan struct{}) shutdown := make(chan struct{})
@ -253,7 +283,11 @@ func main() {
f.Close() f.Close()
} }
ag.Run(shutdown) if c.Poller.AMQPUrl == "" {
ag.Run(shutdown)
} else {
po.Run(shutdown)
}
} }
} }

View File

@ -31,6 +31,7 @@ type Config struct {
OutputFilters []string OutputFilters []string
Agent *AgentConfig Agent *AgentConfig
Poller *PollerConfig
Inputs []*internal_models.RunningInput Inputs []*internal_models.RunningInput
Outputs []*internal_models.RunningOutput Outputs []*internal_models.RunningOutput
} }
@ -44,6 +45,11 @@ func NewConfig() *Config {
FlushInterval: internal.Duration{Duration: 10 * time.Second}, FlushInterval: internal.Duration{Duration: 10 * time.Second},
FlushJitter: internal.Duration{Duration: 5 * time.Second}, FlushJitter: internal.Duration{Duration: 5 * time.Second},
}, },
// Poller defaults:
Poller: &PollerConfig{
FlushInterval: internal.Duration{Duration: 10 * time.Second},
FlushJitter: internal.Duration{Duration: 5 * time.Second},
},
Tags: make(map[string]string), Tags: make(map[string]string),
Inputs: make([]*internal_models.RunningInput, 0), Inputs: make([]*internal_models.RunningInput, 0),
@ -101,6 +107,40 @@ type AgentConfig struct {
Hostname string Hostname string
} }
type PollerConfig struct {
// Rabbitmq url amqp://guest:guest@localhost:5672/
AMQPUrl string `toml:"AMQPurl"`
// Rabbitmq labels
AMQPlabels []string `toml:"AMQPlabels"`
// FlushInterval is the Interval at which to flush data
FlushInterval internal.Duration
// FlushJitter Jitters the flush interval by a random amount.
// This is primarily to avoid large write spikes for users running a large
// number of telegraf instances.
// ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
FlushJitter internal.Duration
// MetricBufferLimit is the max number of metrics that each output plugin
// will cache. The buffer is cleared when a successful write occurs. When
// full, the oldest metrics will be overwritten.
MetricBufferLimit int
// FlushBufferWhenFull tells Telegraf to flush the metric buffer whenever
// it fills up, regardless of FlushInterval. Setting this option to true
// does _not_ deactivate FlushInterval.
FlushBufferWhenFull bool
// Debug is the option for running in debug mode
Debug bool
// Quiet is the option for running in quiet mode
Quiet bool
Hostname string
}
// Inputs returns a list of strings of the configured inputs. // Inputs returns a list of strings of the configured inputs.
func (c *Config) InputNames() []string { func (c *Config) InputNames() []string {
var name []string var name []string
@ -339,6 +379,11 @@ func (c *Config) LoadConfig(path string) error {
log.Printf("Could not parse [agent] config\n") log.Printf("Could not parse [agent] config\n")
return err return err
} }
case "poller":
if err = config.UnmarshalTable(subTable, c.Poller); err != nil {
log.Printf("Could not parse [poller] config\n")
return err
}
case "global_tags", "tags": case "global_tags", "tags":
if err = config.UnmarshalTable(subTable, c.Tags); err != nil { if err = config.UnmarshalTable(subTable, c.Tags); err != nil {
log.Printf("Could not parse [global_tags] config\n") log.Printf("Could not parse [global_tags] config\n")
@ -449,14 +494,14 @@ func (c *Config) addInput(name string, table *ast.Table) error {
// arbitrary types of input, so build the parser and set it. // arbitrary types of input, so build the parser and set it.
switch t := input.(type) { switch t := input.(type) {
case parsers.ParserInput: case parsers.ParserInput:
parser, err := buildParser(name, table) parser, err := BuildParser(name, table)
if err != nil { if err != nil {
return err return err
} }
t.SetParser(parser) t.SetParser(parser)
} }
pluginConfig, err := buildInput(name, table) pluginConfig, err := BuildInput(name, table)
if err != nil { if err != nil {
return err return err
} }
@ -588,10 +633,10 @@ func buildFilter(tbl *ast.Table) internal_models.Filter {
return f return f
} }
// buildInput parses input specific items from the ast.Table, // BuildInput parses input specific items from the ast.Table,
// builds the filter and returns a // builds the filter and returns a
// internal_models.InputConfig to be inserted into internal_models.RunningInput // internal_models.InputConfig to be inserted into internal_models.RunningInput
func buildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, error) { func BuildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, error) {
cp := &internal_models.InputConfig{Name: name} cp := &internal_models.InputConfig{Name: name}
if node, ok := tbl.Fields["interval"]; ok { if node, ok := tbl.Fields["interval"]; ok {
if kv, ok := node.(*ast.KeyValue); ok { if kv, ok := node.(*ast.KeyValue); ok {
@ -648,10 +693,10 @@ func buildInput(name string, tbl *ast.Table) (*internal_models.InputConfig, erro
return cp, nil return cp, nil
} }
// buildParser grabs the necessary entries from the ast.Table for creating // BuildParser grabs the necessary entries from the ast.Table for creating
// a parsers.Parser object, and creates it, which can then be added onto // a parsers.Parser object, and creates it, which can then be added onto
// an Input object. // an Input object.
func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { func BuildParser(name string, tbl *ast.Table) (parsers.Parser, error) {
c := &parsers.Config{} c := &parsers.Config{}
if node, ok := tbl.Fields["data_format"]; ok { if node, ok := tbl.Fields["data_format"]; ok {

View File

@ -0,0 +1,59 @@
# Telegraf configuration
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared inputs.
# Even if a plugin has no configuration, it must be declared in here
# to be active. Declaring a plugin means just specifying the name
# as a section with no variables. To deactivate a plugin, comment
# out the name and any variables.
# Use 'telegraf -config telegraf.toml -test' to see what metrics a config
# file would generate.
# One rule that plugins conform to is wherever a connection string
# can be passed, the values '' and 'localhost' are treated specially.
# They indicate to the plugin to use their own builtin configuration to
# connect to the local system.
# NOTE: The configuration has a few required parameters. They are marked
# with 'required'. Be sure to edit those to make this configuration work.
# Tags can also be specified via a normal map, but only one form at a time:
[global_tags]
dc = "us-east-1"
[poller]
AMQPurl = "amqp://guest:guest@127.0.0.1:5674/"
AMQPlabels = ["label1"]
# run telegraf in debug mode
debug = false
# Override default hostname, if empty use os.Hostname()
hostname = ""
## Default flushing interval for all outputs. You shouldn't set this below
## interval. Maximum flush_interval will be flush_interval + flush_jitter
flush_interval = "3s"
## Jitter the flush interval by a random amount. This is primarily to avoid
## large write spikes for users running a large number of telegraf instances.
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
###############################################################################
# OUTPUTS #
###############################################################################
[[outputs.file]]
## Files to write to, "stdout" is a specially handled file.
files = ["/tmp/metrics.out"]
## Data format to output. This can be "influx" or "graphite"
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
data_format = "influx"

View File

@ -0,0 +1,59 @@
# Telegraf configuration
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared inputs.
# Even if a plugin has no configuration, it must be declared in here
# to be active. Declaring a plugin means just specifying the name
# as a section with no variables. To deactivate a plugin, comment
# out the name and any variables.
# Use 'telegraf -config telegraf.toml -test' to see what metrics a config
# file would generate.
# One rule that plugins conform to is wherever a connection string
# can be passed, the values '' and 'localhost' are treated specially.
# They indicate to the plugin to use their own builtin configuration to
# connect to the local system.
# NOTE: The configuration has a few required parameters. They are marked
# with 'required'. Be sure to edit those to make this configuration work.
# Tags can also be specified via a normal map, but only one form at a time:
[global_tags]
dc = "us-east-1"
[poller]
AMQPurl = "amqp://guest:guest@127.0.0.1:5673/"
AMQPlabels = ["label1"]
# run telegraf in debug mode
debug = false
# Override default hostname, if empty use os.Hostname()
hostname = ""
## Default flushing interval for all outputs. You shouldn't set this below
## interval. Maximum flush_interval will be flush_interval + flush_jitter
flush_interval = "3s"
## Jitter the flush interval by a random amount. This is primarily to avoid
## large write spikes for users running a large number of telegraf instances.
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
###############################################################################
# OUTPUTS #
###############################################################################
[[outputs.file]]
## Files to write to, "stdout" is a specially handled file.
files = ["/tmp/metrics.out"]
## Data format to output. This can be "influx" or "graphite"
## Each data format has it's own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
data_format = "influx"

456
poller/poller.go Normal file
View File

@ -0,0 +1,456 @@
package poller
import (
"errors"
"fmt"
"log"
"os"
"runtime"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/agent"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/internal/config"
"github.com/influxdata/telegraf/internal/models"
influxconfig "github.com/influxdata/config"
"github.com/influxdata/toml"
"github.com/influxdata/toml/ast"
"github.com/streadway/amqp"
)
// Poller runs telegraf and collects data based on the given config
type Poller struct {
Config *config.Config
AMQPconn *amqp.Connection
AMQPchannel *amqp.Channel
rawTasks chan []byte
}
// NewPoller returns an Poller struct based off the given Config
func NewPoller(config *config.Config) (*Poller, error) {
p := &Poller{
Config: config,
}
if p.Config.Poller.Hostname == "" {
hostname, err := os.Hostname()
if err != nil {
return nil, err
}
p.Config.Poller.Hostname = hostname
}
config.Tags["host"] = p.Config.Poller.Hostname
return p, nil
}
func (p *Poller) getTask(conn *amqp.Connection, queueName string, consumerTag string, toto chan []byte) error {
// defer conn.Close()
tasks, err := p.AMQPchannel.Consume(queueName, consumerTag+"_"+queueName, false, false, false, false, nil)
if err != nil {
// TODO BETER HANDLING
return fmt.Errorf("basic.consume: %v", err)
}
for task := range tasks {
//log.Printf("%s \n", task)
toto <- task.Body
err := task.Nack(false, false)
if err != nil {
//TODO ????
}
}
return nil
}
// Conenctio AMQP server
func (p *Poller) AMQPConnect() error {
p.rawTasks = make(chan []byte)
var err error
// Prepare config
// TODO Handle vhost
conf := amqp.Config{
// Vhost: "/telegraf",
Heartbeat: time.Duration(0) * time.Second,
}
// Dial server
p.AMQPconn, err = amqp.DialConfig(p.Config.Poller.AMQPUrl, conf)
if err != nil {
return err
}
return nil
}
// Create Channel
func (p *Poller) AMQPCreateChannel() error {
var err error
// Create Channel
p.AMQPchannel, err = p.AMQPconn.Channel()
if err != nil {
return err
}
for _, AMQPlabel := range p.Config.Poller.AMQPlabels {
// Subscribing to queue
go p.getTask(p.AMQPconn, AMQPlabel, p.Config.Poller.Hostname, p.rawTasks)
}
return nil
}
// Connect connects to all configured outputs
func (p *Poller) Connect() error {
for _, o := range p.Config.Outputs {
o.Quiet = p.Config.Poller.Quiet
switch ot := o.Output.(type) {
case telegraf.ServiceOutput:
if err := ot.Start(); err != nil {
log.Printf("Service for output %s failed to start, exiting\n%s\n",
o.Name, err.Error())
return err
}
}
if p.Config.Poller.Debug {
log.Printf("Attempting connection to output: %s\n", o.Name)
}
err := o.Output.Connect()
if err != nil {
log.Printf("Failed to connect to output %s, retrying in 15s, "+
"error was '%s' \n", o.Name, err)
time.Sleep(15 * time.Second)
err = o.Output.Connect()
if err != nil {
return err
}
}
if p.Config.Poller.Debug {
log.Printf("Successfully connected to output: %s\n", o.Name)
}
}
return nil
}
// Close closes the connection to all configured outputs
func (p *Poller) Close() error {
var err error
for _, o := range p.Config.Outputs {
err = o.Output.Close()
switch ot := o.Output.(type) {
case telegraf.ServiceOutput:
ot.Stop()
}
}
// TODO close AMQP connection
return err
}
func panicRecover(input *internal_models.RunningInput) {
if err := recover(); err != nil {
trace := make([]byte, 2048)
runtime.Stack(trace, true)
log.Printf("FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
input.Name, err, trace)
log.Println("PLEASE REPORT THIS PANIC ON GITHUB with " +
"stack trace, configuration, and OS information: " +
"https://github.com/influxdata/telegraf/issues/new")
}
}
func (p *Poller) gather(input *internal_models.RunningInput, metricC chan telegraf.Metric) error {
defer panicRecover(input)
var outerr error
start := time.Now()
acc := agent.NewAccumulator(input.Config, metricC)
acc.SetDebug(p.Config.Poller.Debug)
acc.SetDefaultTags(p.Config.Tags)
if err := input.Input.Gather(acc); err != nil {
log.Printf("Error in input [%s]: %s", input.Name, err)
}
elapsed := time.Since(start)
if !p.Config.Poller.Quiet {
log.Printf("Gathered metric, from polling, from %s in %s\n",
input.Name, elapsed)
}
return outerr
}
func (p *Poller) getInput(rawInput []byte) (*internal_models.RunningInput, error) {
// Transform rawInput from Message body to input plugin object
table, err := toml.Parse(rawInput)
if err != nil {
return nil, errors.New("invalid configuration")
}
for name, val := range table.Fields {
subTable, ok := val.(*ast.Table)
if !ok {
return nil, errors.New("invalid configuration")
}
switch name {
case "inputs", "plugins":
for pluginName, pluginVal := range subTable.Fields {
name := pluginName
var table *ast.Table
switch pluginSubTable := pluginVal.(type) {
case *ast.Table:
table = pluginSubTable
case []*ast.Table:
table = pluginSubTable[0]
// TODO handle this case
/*
for _, t := range pluginSubTable {
if err = c.addInput(pluginName, t); err != nil {
return err
}
}*/
default:
return nil, fmt.Errorf("Unsupported config format: %s",
pluginName)
}
// TODO factorize copy/paste from config/addInput
// Legacy support renaming io input to diskio
if name == "io" {
name = "diskio"
}
creator, ok := inputs.Inputs[name]
if !ok {
return nil, fmt.Errorf("Undefined but requested input: %s", name)
}
input := creator()
// If the input has a SetParser function, then this means it can accept
// arbitrary types of input, so build the parser and set it.
switch t := input.(type) {
case parsers.ParserInput:
parser, err := config.BuildParser(name, table)
if err != nil {
return nil, err
}
t.SetParser(parser)
}
pluginConfig, err := config.BuildInput(name, table)
if err != nil {
return nil, err
}
if err := influxconfig.UnmarshalTable(table, input); err != nil {
return nil, err
}
rp := &internal_models.RunningInput{
Name: name,
Input: input,
Config: pluginConfig,
}
return rp, nil
}
default:
// TODO log bad conf
continue
}
}
return nil, nil
}
// Test verifies that we can 'Gather' from all inputs with their configured
// Config struct
func (p *Poller) Test() error {
//TODO remove it ?????
shutdown := make(chan struct{})
defer close(shutdown)
metricC := make(chan telegraf.Metric)
// dummy receiver for the point channel
go func() {
for {
select {
case <-metricC:
// do nothing
case <-shutdown:
return
}
}
}()
for _, input := range p.Config.Inputs {
acc := agent.NewAccumulator(input.Config, metricC)
acc.SetDebug(true)
fmt.Printf("* Plugin: %s, Collection 1\n", input.Name)
if input.Config.Interval != 0 {
fmt.Printf("* Internal: %s\n", input.Config.Interval)
}
if err := input.Input.Gather(acc); err != nil {
return err
}
// Special instructions for some inputs. cpu, for example, needs to be
// run twice in order to return cpu usage percentages.
switch input.Name {
case "cpu", "mongodb", "procstat":
time.Sleep(500 * time.Millisecond)
fmt.Printf("* Plugin: %s, Collection 2\n", input.Name)
if err := input.Input.Gather(acc); err != nil {
return err
}
}
}
return nil
}
// flush writes a list of metrics to all configured outputs
func (p *Poller) flush() {
var wg sync.WaitGroup
wg.Add(len(p.Config.Outputs))
for _, o := range p.Config.Outputs {
go func(output *internal_models.RunningOutput) {
defer wg.Done()
err := output.Write()
if err != nil {
log.Printf("Error writing to output [%s]: %s\n",
output.Name, err.Error())
}
}(o)
}
wg.Wait()
}
// flusher monitors the metrics input channel and flushes on the minimum interval
func (p *Poller) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) error {
// Inelegant, but this sleep is to allow the Gather threads to run, so that
// the flusher will flush after metrics are collected.
time.Sleep(time.Millisecond * 200)
ticker := time.NewTicker(p.Config.Poller.FlushInterval.Duration)
for {
select {
case <-shutdown:
log.Println("Hang on, flushing any cached metrics before shutdown")
p.flush()
return nil
case <-ticker.C:
p.flush()
case m := <-metricC:
for _, o := range p.Config.Outputs {
o.AddMetric(m)
}
}
}
}
// Run runs the agent daemon, gathering every Interval
func (p *Poller) Run(shutdown chan struct{}) error {
var wg sync.WaitGroup
p.Config.Agent.FlushInterval.Duration = agent.JitterInterval(
p.Config.Agent.FlushInterval.Duration,
p.Config.Agent.FlushJitter.Duration)
log.Printf("Agent Config: Debug:%#v, Quiet:%#v, Hostname:%#v, "+
"Flush Interval:%s \n",
p.Config.Poller.Debug, p.Config.Poller.Quiet,
p.Config.Poller.Hostname, p.Config.Poller.FlushInterval.Duration)
log.Print("Message queue connection\n")
err := p.AMQPConnect()
if err == nil {
log.Print("Channel creation\n")
err = p.AMQPCreateChannel()
}
if err == nil {
log.Print("Message queue connected\n")
} else {
log.Printf("Message queue connection error: %s\n", err)
}
// channel shared between all input threads for accumulating metrics
metricC := make(chan telegraf.Metric, 10000)
wg.Add(1)
go func() {
defer wg.Done()
if err := p.flusher(shutdown, metricC); err != nil {
log.Printf("Flusher routine failed, exiting: %s\n", err.Error())
close(shutdown)
}
}()
defer wg.Wait()
c := make(chan *amqp.Error)
reconnection:
for {
// We need to be sure that channel is open
// TODO handle channelS!!!
if p.AMQPchannel != nil && p.AMQPconn != nil {
select {
case <-shutdown:
return nil
case rawTask := <-p.rawTasks:
go func(rawTask []byte) {
// Get input obj from message
input, err := p.getInput(rawTask)
if err != nil {
log.Printf(err.Error())
} else {
// Do the check
if err := p.gather(input, metricC); err != nil {
log.Printf(err.Error())
}
}
}(rawTask)
case err := <-p.AMQPconn.NotifyClose(c):
// handle connection errors
// and reconnections
log.Printf("Connection error: %s\n", err)
break reconnection
case err := <-p.AMQPchannel.NotifyClose(c):
// handle channel errors
// and reconnections
log.Printf("Channel error: %s\n", err)
break reconnection
}
} else {
break
}
}
// Handle restart
log.Print("Message queue reconnection in 3 seconds\n")
ticker := time.NewTicker(time.Duration(3) * time.Second)
select {
case <-shutdown:
return nil
case <-ticker.C:
}
// Send shutdown signal to restart routines
log.Print("Shutdown signal send to routines\n")
shutdown <- struct{}{}
return p.Run(shutdown)
}

207
poller/poller_test.go Normal file
View File

@ -0,0 +1,207 @@
package poller
import (
"github.com/stretchr/testify/assert"
"log"
"os"
"testing"
"time"
"github.com/influxdata/telegraf/internal/config"
// needing to load the plugins
_ "github.com/influxdata/telegraf/plugins/inputs/all"
// needing to load the outputs
_ "github.com/influxdata/telegraf/plugins/outputs/all"
"github.com/streadway/amqp"
)
func AMQPcreation() {
// Connect
connection, _ := amqp.Dial("amqp://guest:guest@127.0.0.1:5673/")
// Channel
channel, _ := connection.Channel()
// SPLIT_BY_TAGS_EXCHANGE
channel.ExchangeDeclare(
"SPLIT_BY_TAGS_EXCHANGE", // name
"headers", // type
true, // durable
false, // delete when unused
false, // internal
false, // no-wait
nil, // arguments
)
// ENTRANCE
channel.ExchangeDeclare(
"ENTRANCE", // name
"fanout", // type
true, // durable
false, // delete when unused
false, // internal
false, // no-wait
nil, // arguments
)
channel.ExchangeBind(
"SPLIT_BY_TAGS_EXCHANGE",
"",
"ENTRANCE",
false,
nil,
)
//REQUEUE_AFTER_WAIT_EXCHANGE
channel.ExchangeDeclare(
"REQUEUE_AFTER_WAIT_EXCHANGE",
"fanout", // type
true, // durable
false, // delete when unused
false, // internal
false, // no-wait
nil, // arguments
)
channel.ExchangeBind(
"SPLIT_BY_TAGS_EXCHANGE",
"",
"REQUEUE_AFTER_WAIT_EXCHANGE",
false,
nil,
)
//SPLIT_BY_INTERVAL_EXCHANGE
channel.ExchangeDeclare(
"SPLIT_BY_INTERVAL_EXCHANGE", // name
"headers", // type
true, // durable
false, // delete when unused
false, // internal
false, // no-wait
nil, // arguments
)
// QUEUE wait_queue_5s
args := amqp.Table{}
args["x-dead-letter-exchange"] = "REQUEUE_AFTER_WAIT_EXCHANGE"
args["x-message-ttl"] = int64(5000)
channel.QueueDeclare(
"wait_queue_5s",
true,
false,
false,
false,
args,
)
// Purge queue
channel.QueuePurge("wait_queue_5s", false)
args = amqp.Table{}
args["interval"] = "5000"
args["x-match"] = "all"
channel.QueueBind(
"wait_queue_5s",
"",
"SPLIT_BY_INTERVAL_EXCHANGE",
false,
args,
)
// QUEUE label1
args = amqp.Table{}
args["x-dead-letter-exchange"] = "SPLIT_BY_INTERVAL_EXCHANGE"
channel.QueueDeclare(
"label1",
true,
false,
false,
false,
args,
)
// Purge queue
channel.QueuePurge("label1", false)
args = amqp.Table{}
args["label1"] = "value1"
args["x-match"] = "all"
channel.QueueBind(
"label1",
"",
"SPLIT_BY_TAGS_EXCHANGE",
false,
args,
)
// Create tasks
msg := amqp.Publishing{
Headers: amqp.Table{
"interval": "5000",
"label1": "value1",
},
Body: []byte("[[inputs.mem]]"),
}
channel.Publish(
"ENTRANCE",
"",
false,
false,
msg,
)
}
func TestPoller_Reconnection(t *testing.T) {
// Remove old file
os.Remove("/tmp/metrics.out")
// Create conf
c := config.NewConfig()
err := c.LoadConfig("../internal/config/testdata/telegraf-poller-bad.toml")
assert.NoError(t, err)
p, _ := NewPoller(c)
// Connect output
p.Connect()
// Prepare shutdown
shutdown := make(chan struct{})
go func(sdchan chan struct{}) {
log.Println("Waiting 10s before shuting down poller")
time.Sleep(time.Duration(5) * time.Second)
sdchan <- struct{}{}
close(sdchan)
}(shutdown)
err = p.Run(shutdown)
assert.NoError(t, err)
p.Config.Outputs[0].Output.Close()
// Check output
f, err := os.Open("/tmp/metrics.out")
assert.NoError(t, err)
//if err != nil {
finfo, _ := f.Stat()
assert.True(t, finfo.Size() == int64(0))
f.Close()
//}
}
func TestPoller_Polling(t *testing.T) {
// Remove old file
os.Remove("/tmp/metrics.out")
// Create AMQP flow
AMQPcreation()
// Create conf
c := config.NewConfig()
err := c.LoadConfig("../internal/config/testdata/telegraf-poller.toml")
assert.NoError(t, err)
p, _ := NewPoller(c)
// Connect output
p.Connect()
// Prepare shutdown
shutdown := make(chan struct{})
go func(sdchan chan struct{}) {
log.Println("Waiting 10s before shuting down poller")
time.Sleep(time.Duration(10) * time.Second)
// time.Sleep(time.Duration(5) * time.Second)
sdchan <- struct{}{}
//p.Config.Outputs[0].Output.Close()
// p.Close()
close(sdchan)
}(shutdown)
err = p.Run(shutdown)
assert.NoError(t, err)
p.Config.Outputs[0].Output.Close()
// Check output
f, _ := os.Open("/tmp/metrics.out")
finfo, _ := f.Stat()
assert.True(t, finfo.Size() > int64(0))
f.Close()
}