Fix bug: too many cloudwatch metrics (#1885)

* Fix bug: too many cloudwatch metrics Cloudwatch metrics were being added incorrectly. The most obvious symptom of this was that too many metrics were being added. A simple check against the name of the metric proved to be a sufficient fix. In order to test the fix, a metric selection function was factored out. * Go fmt cloudwatch * Cloudwatch isSelected checks metric name * Move cloudwatch line in changelog to 1.2 features
2016-12-13 06:13:53 -08:00 · 2016-12-13 06:13:53 -08:00 · 9add7b9e9a
parent bf8e1b5f13
commit 9add7b9e9a
3 changed files with 112 additions and 14 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -14,6 +14,7 @@
 - [#2127](https://github.com/influxdata/telegraf/pull/2127): Update Go version to 1.7.4.
 - [#2126](https://github.com/influxdata/telegraf/pull/2126): Support a metric.Split function.
 - [#2026](https://github.com/influxdata/telegraf/pull/2065): elasticsearch "shield" (basic auth) support doc.
+- [#1885](https://github.com/influxdata/telegraf/pull/1885): Fix over-querying of cloudwatch metrics

 ### Bugfixes

--- a/plugins/inputs/cloudwatch/cloudwatch.go
+++ b/plugins/inputs/cloudwatch/cloudwatch.go
@ -126,11 +126,7 @@ func (c *CloudWatch) Description() string {
 	return "Pull Metric Statistics from Amazon CloudWatch"
 }

-func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
-	if c.client == nil {
-		c.initializeCloudWatch()
-	}
-
+func SelectMetrics(c *CloudWatch) ([]*cloudwatch.Metric, error) {
 	var metrics []*cloudwatch.Metric

 	// check for provided metric filter
@ -155,11 +151,11 @@ func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
 			} else {
 				allMetrics, err := c.fetchNamespaceMetrics()
 				if err != nil {
-					return err
+					return nil, err
 				}
 				for _, name := range m.MetricNames {
 					for _, metric := range allMetrics {
-						if isSelected(metric, m.Dimensions) {
+						if isSelected(name, metric, m.Dimensions) {
 							metrics = append(metrics, &cloudwatch.Metric{
 								Namespace:  aws.String(c.Namespace),
 								MetricName: aws.String(name),
@ -169,16 +165,26 @@ func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
 					}
 				}
 			}
-
 		}
 	} else {
 		var err error
 		metrics, err = c.fetchNamespaceMetrics()
 		if err != nil {
-			return err
+			return nil, err
 		}
 	}
+	return metrics, nil
+}

+func (c *CloudWatch) Gather(acc telegraf.Accumulator) error {
+	if c.client == nil {
+		c.initializeCloudWatch()
+	}
+
+	metrics, err := SelectMetrics(c)
+	if err != nil {
+		return err
+	}
 	metricCount := len(metrics)
 	errChan := errchan.New(metricCount)

@ -380,7 +386,10 @@ func hasWilcard(dimensions []*Dimension) bool {
 	return false
 }

-func isSelected(metric *cloudwatch.Metric, dimensions []*Dimension) bool {
+func isSelected(name string, metric *cloudwatch.Metric, dimensions []*Dimension) bool {
+	if name != *metric.MetricName {
+		return false
+	}
 	if len(metric.Dimensions) != len(dimensions) {
 		return false
 	}
--- a/plugins/inputs/cloudwatch/cloudwatch_test.go
+++ b/plugins/inputs/cloudwatch/cloudwatch_test.go
@ -11,9 +11,9 @@ import (
 	"github.com/stretchr/testify/assert"
 )

-type mockCloudWatchClient struct{}
+type mockGatherCloudWatchClient struct{}

-func (m *mockCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error) {
+func (m *mockGatherCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error) {
 	metric := &cloudwatch.Metric{
 		Namespace:  params.Namespace,
 		MetricName: aws.String("Latency"),
@ -31,7 +31,7 @@ func (m *mockCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput)
 	return result, nil
 }

-func (m *mockCloudWatchClient) GetMetricStatistics(params *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) {
+func (m *mockGatherCloudWatchClient) GetMetricStatistics(params *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) {
 	dataPoint := &cloudwatch.Datapoint{
 		Timestamp:   params.EndTime,
 		Minimum:     aws.Float64(0.1),
@ -62,7 +62,7 @@ func TestGather(t *testing.T) {
 	}

 	var acc testutil.Accumulator
-	c.client = &mockCloudWatchClient{}
+	c.client = &mockGatherCloudWatchClient{}

 	c.Gather(&acc)

@ -83,6 +83,94 @@ func TestGather(t *testing.T) {

 }

+type mockSelectMetricsCloudWatchClient struct{}
+
+func (m *mockSelectMetricsCloudWatchClient) ListMetrics(params *cloudwatch.ListMetricsInput) (*cloudwatch.ListMetricsOutput, error) {
+	metrics := []*cloudwatch.Metric{}
+	// 4 metrics are available
+	metricNames := []string{"Latency", "RequestCount", "HealthyHostCount", "UnHealthyHostCount"}
+	// for 3 ELBs
+	loadBalancers := []string{"lb-1", "lb-2", "lb-3"}
+	// in 2 AZs
+	availabilityZones := []string{"us-east-1a", "us-east-1b"}
+	for _, m := range metricNames {
+		for _, lb := range loadBalancers {
+			// For each metric/ELB pair, we get an aggregate value across all AZs.
+			metrics = append(metrics, &cloudwatch.Metric{
+				Namespace:  aws.String("AWS/ELB"),
+				MetricName: aws.String(m),
+				Dimensions: []*cloudwatch.Dimension{
+					&cloudwatch.Dimension{
+						Name:  aws.String("LoadBalancerName"),
+						Value: aws.String(lb),
+					},
+				},
+			})
+			for _, az := range availabilityZones {
+				// We get a metric for each metric/ELB/AZ triplet.
+				metrics = append(metrics, &cloudwatch.Metric{
+					Namespace:  aws.String("AWS/ELB"),
+					MetricName: aws.String(m),
+					Dimensions: []*cloudwatch.Dimension{
+						&cloudwatch.Dimension{
+							Name:  aws.String("LoadBalancerName"),
+							Value: aws.String(lb),
+						},
+						&cloudwatch.Dimension{
+							Name:  aws.String("AvailabilityZone"),
+							Value: aws.String(az),
+						},
+					},
+				})
+			}
+		}
+	}
+
+	result := &cloudwatch.ListMetricsOutput{
+		Metrics: metrics,
+	}
+	return result, nil
+}
+
+func (m *mockSelectMetricsCloudWatchClient) GetMetricStatistics(params *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) {
+	return nil, nil
+}
+
+func TestSelectMetrics(t *testing.T) {
+	duration, _ := time.ParseDuration("1m")
+	internalDuration := internal.Duration{
+		Duration: duration,
+	}
+	c := &CloudWatch{
+		Region:    "us-east-1",
+		Namespace: "AWS/ELB",
+		Delay:     internalDuration,
+		Period:    internalDuration,
+		RateLimit: 10,
+		Metrics: []*Metric{
+			&Metric{
+				MetricNames: []string{"Latency", "RequestCount"},
+				Dimensions: []*Dimension{
+					&Dimension{
+						Name:  "LoadBalancerName",
+						Value: "*",
+					},
+					&Dimension{
+						Name:  "AvailabilityZone",
+						Value: "*",
+					},
+				},
+			},
+		},
+	}
+	c.client = &mockSelectMetricsCloudWatchClient{}
+	metrics, err := SelectMetrics(c)
+	// We've asked for 2 (out of 4) metrics, over all 3 load balancers in all 2
+	// AZs. We should get 12 metrics.
+	assert.Equal(t, 12, len(metrics))
+	assert.Nil(t, err)
+}
+
 func TestGenerateStatisticsInputParams(t *testing.T) {
 	d := &cloudwatch.Dimension{
 		Name:  aws.String("LoadBalancerName"),