116
agent/agent.go
116
agent/agent.go
@@ -89,7 +89,7 @@ func panicRecover(input *models.RunningInput) {
|
||||
trace := make([]byte, 2048)
|
||||
runtime.Stack(trace, true)
|
||||
log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
|
||||
input.Name, err, trace)
|
||||
input.Name(), err, trace)
|
||||
log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
|
||||
"stack trace, configuration, and OS information: " +
|
||||
"https://github.com/influxdata/telegraf/issues/new")
|
||||
@@ -103,19 +103,18 @@ func (a *Agent) gatherer(
|
||||
input *models.RunningInput,
|
||||
interval time.Duration,
|
||||
metricC chan telegraf.Metric,
|
||||
) error {
|
||||
) {
|
||||
defer panicRecover(input)
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
var outerr error
|
||||
|
||||
acc := NewAccumulator(input.Config, metricC)
|
||||
acc := NewAccumulator(input, metricC)
|
||||
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
||||
a.Config.Agent.Interval.Duration)
|
||||
acc.setDefaultTags(a.Config.Tags)
|
||||
input.SetDebug(a.Config.Agent.Debug)
|
||||
input.SetDefaultTags(a.Config.Tags)
|
||||
|
||||
internal.RandomSleep(a.Config.Agent.CollectionJitter.Duration, shutdown)
|
||||
|
||||
@@ -123,15 +122,13 @@ func (a *Agent) gatherer(
|
||||
gatherWithTimeout(shutdown, input, acc, interval)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
if outerr != nil {
|
||||
return outerr
|
||||
}
|
||||
|
||||
log.Printf("D! Input [%s] gathered metrics, (%s interval) in %s\n",
|
||||
input.Name, interval, elapsed)
|
||||
input.Name(), interval, elapsed)
|
||||
|
||||
select {
|
||||
case <-shutdown:
|
||||
return nil
|
||||
return
|
||||
case <-ticker.C:
|
||||
continue
|
||||
}
|
||||
@@ -160,13 +157,13 @@ func gatherWithTimeout(
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
log.Printf("E! ERROR in input [%s]: %s", input.Name, err)
|
||||
log.Printf("E! ERROR in input [%s]: %s", input.Name(), err)
|
||||
}
|
||||
return
|
||||
case <-ticker.C:
|
||||
log.Printf("E! ERROR: input [%s] took longer to collect than "+
|
||||
"collection interval (%s)",
|
||||
input.Name, timeout)
|
||||
input.Name(), timeout)
|
||||
continue
|
||||
case <-shutdown:
|
||||
return
|
||||
@@ -194,13 +191,13 @@ func (a *Agent) Test() error {
|
||||
}()
|
||||
|
||||
for _, input := range a.Config.Inputs {
|
||||
acc := NewAccumulator(input.Config, metricC)
|
||||
acc.SetTrace(true)
|
||||
acc := NewAccumulator(input, metricC)
|
||||
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
||||
a.Config.Agent.Interval.Duration)
|
||||
acc.setDefaultTags(a.Config.Tags)
|
||||
input.SetTrace(true)
|
||||
input.SetDefaultTags(a.Config.Tags)
|
||||
|
||||
fmt.Printf("* Plugin: %s, Collection 1\n", input.Name)
|
||||
fmt.Printf("* Plugin: %s, Collection 1\n", input.Name())
|
||||
if input.Config.Interval != 0 {
|
||||
fmt.Printf("* Internal: %s\n", input.Config.Interval)
|
||||
}
|
||||
@@ -214,10 +211,10 @@ func (a *Agent) Test() error {
|
||||
|
||||
// Special instructions for some inputs. cpu, for example, needs to be
|
||||
// run twice in order to return cpu usage percentages.
|
||||
switch input.Name {
|
||||
switch input.Name() {
|
||||
case "cpu", "mongodb", "procstat":
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
fmt.Printf("* Plugin: %s, Collection 2\n", input.Name)
|
||||
fmt.Printf("* Plugin: %s, Collection 2\n", input.Name())
|
||||
if err := input.Input.Gather(acc); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -250,26 +247,69 @@ func (a *Agent) flush() {
|
||||
func (a *Agent) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) error {
|
||||
// Inelegant, but this sleep is to allow the Gather threads to run, so that
|
||||
// the flusher will flush after metrics are collected.
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
time.Sleep(time.Millisecond * 300)
|
||||
|
||||
// create an output metric channel and a gorouting that continously passes
|
||||
// each metric onto the output plugins & aggregators.
|
||||
outMetricC := make(chan telegraf.Metric, 100)
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case <-shutdown:
|
||||
for _, agg := range a.Config.Aggregators {
|
||||
agg.Aggregator.Stop()
|
||||
}
|
||||
if len(outMetricC) > 0 {
|
||||
// keep going until outMetricC is flushed
|
||||
continue
|
||||
}
|
||||
return
|
||||
case m := <-outMetricC:
|
||||
// if dropOriginal is set to true, then we will only send this
|
||||
// metric to the aggregators, not the outputs.
|
||||
var dropOriginal bool
|
||||
if !m.IsAggregate() {
|
||||
for _, agg := range a.Config.Aggregators {
|
||||
if ok := agg.Apply(copyMetric(m)); ok {
|
||||
dropOriginal = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if !dropOriginal {
|
||||
for i, o := range a.Config.Outputs {
|
||||
if i == len(a.Config.Outputs)-1 {
|
||||
o.AddMetric(m)
|
||||
} else {
|
||||
o.AddMetric(copyMetric(m))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
ticker := time.NewTicker(a.Config.Agent.FlushInterval.Duration)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-shutdown:
|
||||
log.Println("I! Hang on, flushing any cached metrics before shutdown")
|
||||
// wait for outMetricC to get flushed before flushing outputs
|
||||
wg.Wait()
|
||||
a.flush()
|
||||
return nil
|
||||
case <-ticker.C:
|
||||
internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown)
|
||||
a.flush()
|
||||
case m := <-metricC:
|
||||
for i, o := range a.Config.Outputs {
|
||||
if i == len(a.Config.Outputs)-1 {
|
||||
o.AddMetric(m)
|
||||
} else {
|
||||
o.AddMetric(copyMetric(m))
|
||||
case metric := <-metricC:
|
||||
mS := []telegraf.Metric{metric}
|
||||
for _, processor := range a.Config.Processors {
|
||||
mS = processor.Apply(mS...)
|
||||
}
|
||||
for _, m := range mS {
|
||||
outMetricC <- m
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -303,18 +343,18 @@ func (a *Agent) Run(shutdown chan struct{}) error {
|
||||
// channel shared between all input threads for accumulating metrics
|
||||
metricC := make(chan telegraf.Metric, 10000)
|
||||
|
||||
// Start all ServicePlugins
|
||||
for _, input := range a.Config.Inputs {
|
||||
// Start service of any ServicePlugins
|
||||
switch p := input.Input.(type) {
|
||||
case telegraf.ServiceInput:
|
||||
acc := NewAccumulator(input.Config, metricC)
|
||||
acc := NewAccumulator(input, metricC)
|
||||
// Service input plugins should set their own precision of their
|
||||
// metrics.
|
||||
acc.DisablePrecision()
|
||||
acc.setDefaultTags(a.Config.Tags)
|
||||
acc.SetPrecision(time.Nanosecond, 0)
|
||||
input.SetDefaultTags(a.Config.Tags)
|
||||
if err := p.Start(acc); err != nil {
|
||||
log.Printf("E! Service for input %s failed to start, exiting\n%s\n",
|
||||
input.Name, err.Error())
|
||||
input.Name(), err.Error())
|
||||
return err
|
||||
}
|
||||
defer p.Stop()
|
||||
@@ -327,6 +367,18 @@ func (a *Agent) Run(shutdown chan struct{}) error {
|
||||
time.Sleep(time.Duration(i - (time.Now().UnixNano() % i)))
|
||||
}
|
||||
|
||||
// Start all Aggregators
|
||||
for _, aggregator := range a.Config.Aggregators {
|
||||
acc := NewAccumulator(aggregator, metricC)
|
||||
acc.SetPrecision(a.Config.Agent.Precision.Duration,
|
||||
a.Config.Agent.Interval.Duration)
|
||||
if err := aggregator.Aggregator.Start(acc); err != nil {
|
||||
log.Printf("[%s] failed to start, exiting\n%s\n",
|
||||
aggregator.Name(), err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
Reference in New Issue
Block a user