Support Processor & Aggregator Plugins

closes #1726
2016-09-08 15:22:10 +01:00
parent 974221f0cf
commit 64a71263a1
28 changed files with 1832 additions and 654 deletions
--- a/agent/agent.go
+++ b/agent/agent.go
@@ -89,7 +89,7 @@ func panicRecover(input *models.RunningInput) {
 		trace := make([]byte, 2048)
 		runtime.Stack(trace, true)
 		log.Printf("E! FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
-			input.Name, err, trace)
+			input.Name(), err, trace)
 		log.Println("E! PLEASE REPORT THIS PANIC ON GITHUB with " +
 			"stack trace, configuration, and OS information: " +
 			"https://github.com/influxdata/telegraf/issues/new")
@@ -103,19 +103,18 @@ func (a *Agent) gatherer(
 	input *models.RunningInput,
 	interval time.Duration,
 	metricC chan telegraf.Metric,
-) error {
+) {
 	defer panicRecover(input)

 	ticker := time.NewTicker(interval)
 	defer ticker.Stop()

 	for {
-		var outerr error
-
-		acc := NewAccumulator(input.Config, metricC)
+		acc := NewAccumulator(input, metricC)
 		acc.SetPrecision(a.Config.Agent.Precision.Duration,
 			a.Config.Agent.Interval.Duration)
-		acc.setDefaultTags(a.Config.Tags)
+		input.SetDebug(a.Config.Agent.Debug)
+		input.SetDefaultTags(a.Config.Tags)

 		internal.RandomSleep(a.Config.Agent.CollectionJitter.Duration, shutdown)

@@ -123,15 +122,13 @@ func (a *Agent) gatherer(
 		gatherWithTimeout(shutdown, input, acc, interval)
 		elapsed := time.Since(start)

-		if outerr != nil {
-			return outerr
-		}
+
 		log.Printf("D! Input [%s] gathered metrics, (%s interval) in %s\n",
-			input.Name, interval, elapsed)
+			input.Name(), interval, elapsed)

 		select {
 		case <-shutdown:
-			return nil
+			return
 		case <-ticker.C:
 			continue
 		}
@@ -160,13 +157,13 @@ func gatherWithTimeout(
 		select {
 		case err := <-done:
 			if err != nil {
-				log.Printf("E! ERROR in input [%s]: %s", input.Name, err)
+				log.Printf("E! ERROR in input [%s]: %s", input.Name(), err)
 			}
 			return
 		case <-ticker.C:
 			log.Printf("E! ERROR: input [%s] took longer to collect than "+
 				"collection interval (%s)",
-				input.Name, timeout)
+				input.Name(), timeout)
 			continue
 		case <-shutdown:
 			return
@@ -194,13 +191,13 @@ func (a *Agent) Test() error {
 	}()

 	for _, input := range a.Config.Inputs {
-		acc := NewAccumulator(input.Config, metricC)
-		acc.SetTrace(true)
+		acc := NewAccumulator(input, metricC)
 		acc.SetPrecision(a.Config.Agent.Precision.Duration,
 			a.Config.Agent.Interval.Duration)
-		acc.setDefaultTags(a.Config.Tags)
+		input.SetTrace(true)
+		input.SetDefaultTags(a.Config.Tags)

-		fmt.Printf("* Plugin: %s, Collection 1\n", input.Name)
+		fmt.Printf("* Plugin: %s, Collection 1\n", input.Name())
 		if input.Config.Interval != 0 {
 			fmt.Printf("* Internal: %s\n", input.Config.Interval)
 		}
@@ -214,10 +211,10 @@ func (a *Agent) Test() error {

 		// Special instructions for some inputs. cpu, for example, needs to be
 		// run twice in order to return cpu usage percentages.
-		switch input.Name {
+		switch input.Name() {
 		case "cpu", "mongodb", "procstat":
 			time.Sleep(500 * time.Millisecond)
-			fmt.Printf("* Plugin: %s, Collection 2\n", input.Name)
+			fmt.Printf("* Plugin: %s, Collection 2\n", input.Name())
 			if err := input.Input.Gather(acc); err != nil {
 				return err
 			}
@@ -250,26 +247,69 @@ func (a *Agent) flush() {
 func (a *Agent) flusher(shutdown chan struct{}, metricC chan telegraf.Metric) error {
 	// Inelegant, but this sleep is to allow the Gather threads to run, so that
 	// the flusher will flush after metrics are collected.
-	time.Sleep(time.Millisecond * 200)
+	time.Sleep(time.Millisecond * 300)
+
+	// create an output metric channel and a gorouting that continously passes
+	// each metric onto the output plugins & aggregators.
+	outMetricC := make(chan telegraf.Metric, 100)
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for {
+			select {
+			case <-shutdown:
+				for _, agg := range a.Config.Aggregators {
+					agg.Aggregator.Stop()
+				}
+				if len(outMetricC) > 0 {
+					// keep going until outMetricC is flushed
+					continue
+				}
+				return
+			case m := <-outMetricC:
+				// if dropOriginal is set to true, then we will only send this
+				// metric to the aggregators, not the outputs.
+				var dropOriginal bool
+				if !m.IsAggregate() {
+					for _, agg := range a.Config.Aggregators {
+						if ok := agg.Apply(copyMetric(m)); ok {
+							dropOriginal = true
+						}
+					}
+				}
+				if !dropOriginal {
+					for i, o := range a.Config.Outputs {
+						if i == len(a.Config.Outputs)-1 {
+							o.AddMetric(m)
+						} else {
+							o.AddMetric(copyMetric(m))
+						}
+					}
+				}
+			}
+		}
+	}()

 	ticker := time.NewTicker(a.Config.Agent.FlushInterval.Duration)
-
 	for {
 		select {
 		case <-shutdown:
 			log.Println("I! Hang on, flushing any cached metrics before shutdown")
+			// wait for outMetricC to get flushed before flushing outputs
+			wg.Wait()
 			a.flush()
 			return nil
 		case <-ticker.C:
 			internal.RandomSleep(a.Config.Agent.FlushJitter.Duration, shutdown)
 			a.flush()
-		case m := <-metricC:
-			for i, o := range a.Config.Outputs {
-				if i == len(a.Config.Outputs)-1 {
-					o.AddMetric(m)
-				} else {
-					o.AddMetric(copyMetric(m))
+		case metric := <-metricC:
+			mS := []telegraf.Metric{metric}
+			for _, processor := range a.Config.Processors {
+				mS = processor.Apply(mS...)
 				}
+			for _, m := range mS {
+				outMetricC <- m
 			}
 		}
 	}
@@ -303,18 +343,18 @@ func (a *Agent) Run(shutdown chan struct{}) error {
 	// channel shared between all input threads for accumulating metrics
 	metricC := make(chan telegraf.Metric, 10000)

+	// Start all ServicePlugins
 	for _, input := range a.Config.Inputs {
-		// Start service of any ServicePlugins
 		switch p := input.Input.(type) {
 		case telegraf.ServiceInput:
-			acc := NewAccumulator(input.Config, metricC)
+			acc := NewAccumulator(input, metricC)
 			// Service input plugins should set their own precision of their
 			// metrics.
-			acc.DisablePrecision()
-			acc.setDefaultTags(a.Config.Tags)
+			acc.SetPrecision(time.Nanosecond, 0)
+			input.SetDefaultTags(a.Config.Tags)
 			if err := p.Start(acc); err != nil {
 				log.Printf("E! Service for input %s failed to start, exiting\n%s\n",
-					input.Name, err.Error())
+					input.Name(), err.Error())
 				return err
 			}
 			defer p.Stop()
@@ -327,6 +367,18 @@ func (a *Agent) Run(shutdown chan struct{}) error {
 		time.Sleep(time.Duration(i - (time.Now().UnixNano() % i)))
 	}

+	// Start all Aggregators
+	for _, aggregator := range a.Config.Aggregators {
+		acc := NewAccumulator(aggregator, metricC)
+		acc.SetPrecision(a.Config.Agent.Precision.Duration,
+			a.Config.Agent.Interval.Duration)
+		if err := aggregator.Aggregator.Start(acc); err != nil {
+			log.Printf("[%s] failed to start, exiting\n%s\n",
+				aggregator.Name(), err.Error())
+			return err
+		}
+	}
+
 	wg.Add(1)
 	go func() {
 		defer wg.Done()