From 03e66d5b87584a9862f8d37a5e70c962a22c1429 Mon Sep 17 00:00:00 2001
From: Cameron Sparr <cameronsparr@gmail.com>
Date: Mon, 7 Dec 2015 15:37:05 -0700
Subject: [PATCH] Implement Glob matching for pass/drop filters

---
 LICENSE_OF_DEPENDENCIES.md     |   1 +
 README.md                      |  29 +++---
 internal/config/config.go      |  12 ++-
 internal/config/config_test.go | 172 +++++++++++++++++++++++++++++++++
 internal/internal.go           |  56 +++++++++++
 internal/internal_test.go      |  44 +++++++++
 plugins/system/cpu.go          |   2 +-
 7 files changed, 299 insertions(+), 17 deletions(-)
 create mode 100644 internal/internal_test.go

diff --git a/LICENSE_OF_DEPENDENCIES.md b/LICENSE_OF_DEPENDENCIES.md
index 417d47d0e..c8f3b0926 100644
--- a/LICENSE_OF_DEPENDENCIES.md
+++ b/LICENSE_OF_DEPENDENCIES.md
@@ -29,4 +29,5 @@
 - gopkg.in/dancannon/gorethink.v1 [APACHE LICENSE](https://github.com/dancannon/gorethink/blob/v1.1.2/LICENSE)
 - gopkg.in/mgo.v2 [BSD LICENSE](https://github.com/go-mgo/mgo/blob/v2/LICENSE)
 - golang.org/x/crypto/* [BSD LICENSE](https://github.com/golang/crypto/blob/master/LICENSE)
+- internal Glob function [MIT LICENSE](https://github.com/ryanuber/go-glob/blob/master/LICENSE)
 
diff --git a/README.md b/README.md
index 138b8023d..6c7e2aa7b 100644
--- a/README.md
+++ b/README.md
@@ -94,13 +94,12 @@ InfluxDB.
 There are 5 configuration options that are configurable per plugin:
 
 * **pass**: An array of strings that is used to filter metrics generated by the
-current plugin. Each string in the array is tested as a prefix against metric names
+current plugin. Each string in the array is tested as a glob match against metric names
 and if it matches, the metric is emitted.
 * **drop**: The inverse of pass, if a metric name matches, it is not emitted.
-* **tagpass**: (added in 0.1.5) tag names and arrays of strings that are used to filter metrics by
-the current plugin. Each string in the array is tested as an exact match against
+* **tagpass**: tag names and arrays of strings that are used to filter metrics by the current plugin. Each string in the array is tested as a glob match against
 the tag name, and if it matches the metric is emitted.
-* **tagdrop**: (added in 0.1.5) The inverse of tagpass. If a tag matches, the metric is not emitted.
+* **tagdrop**: The inverse of tagpass. If a tag matches, the metric is not emitted.
 This is tested on metrics that have passed the tagpass test.
 * **interval**: How often to gather this metric. Normal plugins use a single
 global interval, but if one particular plugin should be run less or more often,
@@ -132,10 +131,10 @@ measurements which begin with `cpu_time`.
 [[plugins.cpu]]
   percpu = true
   totalcpu = false
-  drop = ["cpu_time"]
+  drop = ["cpu_time*"]
 ```
 
-Below is how to configure `tagpass` and `tagdrop` parameters (added in 0.1.5)
+Below is how to configure `tagpass` and `tagdrop` parameters
 
 ```toml
 [plugins]
@@ -153,10 +152,11 @@ Below is how to configure `tagpass` and `tagdrop` parameters (added in 0.1.5)
     # If the (filesystem is ext4 or xfs) OR (the path is /opt or /home)
     # then the metric passes
     fstype = [ "ext4", "xfs" ]
-    path = [ "/opt", "/home" ]
+    # Globs can also be used on the tag values
+    path = [ "/opt", "/home*" ]
 ```
 
-Below is how to configure `pass` and `drop` parameters (added in 0.1.5)
+Below is how to configure `pass` and `drop` parameters
 
 ```toml
 # Drop all metrics for guest CPU usage
@@ -165,17 +165,22 @@ Below is how to configure `pass` and `drop` parameters (added in 0.1.5)
 
 # Only store inode related metrics for disks
 [[plugins.disk]]
-  pass = [ "disk_inodes" ]
+  pass = [ "disk_inodes*" ]
 ```
 
 
 Additional plugins (or outputs) of the same type can be specified,
-just define another instance in the config file:
+just define more instances in the config file:
 
 ```toml
 [[plugins.cpu]]
   percpu = false
   totalcpu = true
+
+[[plugins.cpu]]
+  percpu = true
+  totalcpu = false
+  drop = ["cpu_time*"]
 ```
 
 ## Supported Plugins
@@ -246,14 +251,14 @@ Outputs also support the same configurable options as plugins (pass, drop, tagpa
   database = "telegraf"
   precision = "s"
   # Drop all measurements that start with "aerospike"
-  drop = ["aerospike"]
+  drop = ["aerospike*"]
 
 [[outputs.influxdb]]
   urls = [ "http://localhost:8086" ]
   database = "telegraf-aerospike-data"
   precision = "s"
   # Only accept aerospike data:
-  pass = ["aerospike"]
+  pass = ["aerospike*"]
 
 [[outputs.influxdb]]
   urls = [ "http://localhost:8086" ]
diff --git a/internal/config/config.go b/internal/config/config.go
index 26a1eb640..348496f0a 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -145,7 +145,9 @@ func (ro *RunningOutput) FilterPoints(points []*client.Point) []*client.Point {
 func (f Filter) ShouldPass(measurement string) bool {
 	if f.Pass != nil {
 		for _, pat := range f.Pass {
-			if strings.HasPrefix(measurement, pat) {
+			// TODO remove HasPrefix check, leaving it for now for legacy support.
+			// Cam, 2015-12-07
+			if strings.HasPrefix(measurement, pat) || internal.Glob(pat, measurement) {
 				return true
 			}
 		}
@@ -154,7 +156,9 @@ func (f Filter) ShouldPass(measurement string) bool {
 
 	if f.Drop != nil {
 		for _, pat := range f.Drop {
-			if strings.HasPrefix(measurement, pat) {
+			// TODO remove HasPrefix check, leaving it for now for legacy support.
+			// Cam, 2015-12-07
+			if strings.HasPrefix(measurement, pat) || internal.Glob(pat, measurement) {
 				return false
 			}
 		}
@@ -171,7 +175,7 @@ func (f Filter) ShouldTagsPass(tags map[string]string) bool {
 		for _, pat := range f.TagPass {
 			if tagval, ok := tags[pat.Name]; ok {
 				for _, filter := range pat.Filter {
-					if filter == tagval {
+					if internal.Glob(filter, tagval) {
 						return true
 					}
 				}
@@ -184,7 +188,7 @@ func (f Filter) ShouldTagsPass(tags map[string]string) bool {
 		for _, pat := range f.TagDrop {
 			if tagval, ok := tags[pat.Name]; ok {
 				for _, filter := range pat.Filter {
-					if filter == tagval {
+					if internal.Glob(filter, tagval) {
 						return false
 					}
 				}
diff --git a/internal/config/config_test.go b/internal/config/config_test.go
index 067592a86..f6b929976 100644
--- a/internal/config/config_test.go
+++ b/internal/config/config_test.go
@@ -122,3 +122,175 @@ func TestConfig_LoadDirectory(t *testing.T) {
 	assert.Equal(t, pConfig, c.Plugins[3].Config,
 		"Merged Testdata did not produce correct procstat metadata.")
 }
+
+func TestFilter_Empty(t *testing.T) {
+	f := Filter{}
+
+	measurements := []string{
+		"foo",
+		"bar",
+		"barfoo",
+		"foo_bar",
+		"foo.bar",
+		"foo-bar",
+		"supercalifradjulisticexpialidocious",
+	}
+
+	for _, measurement := range measurements {
+		if !f.ShouldPass(measurement) {
+			t.Errorf("Expected measurement %s to pass", measurement)
+		}
+	}
+}
+
+func TestFilter_Pass(t *testing.T) {
+	f := Filter{
+		Pass: []string{"foo*", "cpu_usage_idle"},
+	}
+
+	passes := []string{
+		"foo",
+		"foo_bar",
+		"foo.bar",
+		"foo-bar",
+		"cpu_usage_idle",
+	}
+
+	drops := []string{
+		"bar",
+		"barfoo",
+		"bar_foo",
+		"cpu_usage_busy",
+	}
+
+	for _, measurement := range passes {
+		if !f.ShouldPass(measurement) {
+			t.Errorf("Expected measurement %s to pass", measurement)
+		}
+	}
+
+	for _, measurement := range drops {
+		if f.ShouldPass(measurement) {
+			t.Errorf("Expected measurement %s to drop", measurement)
+		}
+	}
+}
+
+func TestFilter_Drop(t *testing.T) {
+	f := Filter{
+		Drop: []string{"foo*", "cpu_usage_idle"},
+	}
+
+	drops := []string{
+		"foo",
+		"foo_bar",
+		"foo.bar",
+		"foo-bar",
+		"cpu_usage_idle",
+	}
+
+	passes := []string{
+		"bar",
+		"barfoo",
+		"bar_foo",
+		"cpu_usage_busy",
+	}
+
+	for _, measurement := range passes {
+		if !f.ShouldPass(measurement) {
+			t.Errorf("Expected measurement %s to pass", measurement)
+		}
+	}
+
+	for _, measurement := range drops {
+		if f.ShouldPass(measurement) {
+			t.Errorf("Expected measurement %s to drop", measurement)
+		}
+	}
+}
+
+func TestFilter_TagPass(t *testing.T) {
+	filters := []TagFilter{
+		TagFilter{
+			Name:   "cpu",
+			Filter: []string{"cpu-*"},
+		},
+		TagFilter{
+			Name:   "mem",
+			Filter: []string{"mem_free"},
+		}}
+	f := Filter{
+		TagPass: filters,
+	}
+
+	passes := []map[string]string{
+		{"cpu": "cpu-total"},
+		{"cpu": "cpu-0"},
+		{"cpu": "cpu-1"},
+		{"cpu": "cpu-2"},
+		{"mem": "mem_free"},
+	}
+
+	drops := []map[string]string{
+		{"cpu": "cputotal"},
+		{"cpu": "cpu0"},
+		{"cpu": "cpu1"},
+		{"cpu": "cpu2"},
+		{"mem": "mem_used"},
+	}
+
+	for _, tags := range passes {
+		if !f.ShouldTagsPass(tags) {
+			t.Errorf("Expected tags %v to pass", tags)
+		}
+	}
+
+	for _, tags := range drops {
+		if f.ShouldTagsPass(tags) {
+			t.Errorf("Expected tags %v to drop", tags)
+		}
+	}
+}
+
+func TestFilter_TagDrop(t *testing.T) {
+	filters := []TagFilter{
+		TagFilter{
+			Name:   "cpu",
+			Filter: []string{"cpu-*"},
+		},
+		TagFilter{
+			Name:   "mem",
+			Filter: []string{"mem_free"},
+		}}
+	f := Filter{
+		TagDrop: filters,
+	}
+
+	drops := []map[string]string{
+		{"cpu": "cpu-total"},
+		{"cpu": "cpu-0"},
+		{"cpu": "cpu-1"},
+		{"cpu": "cpu-2"},
+		{"mem": "mem_free"},
+	}
+
+	passes := []map[string]string{
+		{"cpu": "cputotal"},
+		{"cpu": "cpu0"},
+		{"cpu": "cpu1"},
+		{"cpu": "cpu2"},
+		{"mem": "mem_used"},
+	}
+
+	for _, tags := range passes {
+		if !f.ShouldTagsPass(tags) {
+			t.Errorf("Expected tags %v to pass", tags)
+		}
+	}
+
+	for _, tags := range drops {
+		if f.ShouldTagsPass(tags) {
+			t.Errorf("Expected tags %v to drop", tags)
+		}
+	}
+}
diff --git a/internal/internal.go b/internal/internal.go
index eb690fdc4..45164682b 100644
--- a/internal/internal.go
+++ b/internal/internal.go
@@ -61,3 +61,59 @@ func ReadLinesOffsetN(filename string, offset uint, n int) ([]string, error) {
 
 	return ret, nil
 }
+
+// Glob will test a string pattern, potentially containing globs, against a
+// subject string. The result is a simple true/false, determining whether or
+// not the glob pattern matched the subject text.
+//
+// Adapted from https://github.com/ryanuber/go-glob/blob/master/glob.go
+// thanks Ryan Uber!
+func Glob(pattern, measurement string) bool {
+	// Empty pattern can only match empty subject
+	if pattern == "" {
+		return measurement == pattern
+	}
+
+	// If the pattern _is_ a glob, it matches everything
+	if pattern == "*" {
+		return true
+	}
+
+	parts := strings.Split(pattern, "*")
+
+	if len(parts) == 1 {
+		// No globs in pattern, so test for match
+		return pattern == measurement
+	}
+
+	leadingGlob := strings.HasPrefix(pattern, "*")
+	trailingGlob := strings.HasSuffix(pattern, "*")
+	end := len(parts) - 1
+
+	for i, part := range parts {
+		switch i {
+		case 0:
+			if leadingGlob {
+				continue
+			}
+			if !strings.HasPrefix(measurement, part) {
+				return false
+			}
+		case end:
+			if len(measurement) > 0 {
+				return trailingGlob || strings.HasSuffix(measurement, part)
+			}
+		default:
+			if !strings.Contains(measurement, part) {
+				return false
+			}
+		}
+
+		// Trim evaluated text from measurement as we loop over the pattern.
+		idx := strings.Index(measurement, part) + len(part)
+		measurement = measurement[idx:]
+	}
+
+	// All parts of the pattern matched
+	return true
+}
diff --git a/internal/internal_test.go b/internal/internal_test.go
new file mode 100644
index 000000000..7f0c687a8
--- /dev/null
+++ b/internal/internal_test.go
@@ -0,0 +1,44 @@
+package internal
+
+import "testing"
+
+func testGlobMatch(t *testing.T, pattern, subj string) {
+	if !Glob(pattern, subj) {
+		t.Errorf("%s should match %s", pattern, subj)
+	}
+}
+
+func testGlobNoMatch(t *testing.T, pattern, subj string) {
+	if Glob(pattern, subj) {
+		t.Errorf("%s should not match %s", pattern, subj)
+	}
+}
+
+func TestEmptyPattern(t *testing.T) {
+	testGlobMatch(t, "", "")
+	testGlobNoMatch(t, "", "test")
+}
+
+func TestPatternWithoutGlobs(t *testing.T) {
+	testGlobMatch(t, "test", "test")
+}
+
+func TestGlob(t *testing.T) {
+	for _, pattern := range []string{
+		"*test",           // Leading glob
+		"this*",           // Trailing glob
+		"*is*a*",          // Lots of globs
+		"**test**",        // Double glob characters
+		"**is**a***test*", // Varying number of globs
+	} {
+		testGlobMatch(t, pattern, "this_is_a_test")
+	}
+
+	for _, pattern := range []string{
+		"test*", // Implicit substring match should fail
+		"*is",   // Partial match should fail
+		"*no*",  // Globs without a match between them should fail
+	} {
+		testGlobNoMatch(t, pattern, "this_is_a_test")
+	}
+}
diff --git a/plugins/system/cpu.go b/plugins/system/cpu.go
index 38e4ad6d7..837a1bc23 100644
--- a/plugins/system/cpu.go
+++ b/plugins/system/cpu.go
@@ -31,7 +31,7 @@ var sampleConfig = `
   # Whether to report total system cpu stats or not
   totalcpu = true
   # Comment this line if you want the raw CPU time metrics
-  drop = ["cpu_time"]
+  drop = ["cpu_time*"]
 `
 
 func (_ *CPUStats) SampleConfig() string {