Statsd plugin, tags and timings

Closes #237 Closes #39
2015-10-07 16:11:52 -06:00
parent 52be516fa3
commit 6977119f1e
24 changed files with 1096 additions and 242 deletions
--- a/plugins/statsd/README.md
+++ b/plugins/statsd/README.md
@@ -1,59 +1,5 @@
 # Telegraf Service Plugin: statsd

-#### Plugin arguments:
-
- **service_address** string: Address to listen for statsd UDP packets on
- **delete_gauges** boolean: Delete gauges on every collection interval
- **delete_counters** boolean: Delete counters on every collection interval
- **delete_sets** boolean: Delete set counters on every collection interval
- **allowed_pending_messages** integer: Number of messages allowed to queue up
-on the UDP listener before the next flush. NOTE: gauge, counter, and set
-measurements are aggregated as they arrive, so this is not a straight counter of
-the number of total messages that the listener can handle between flushes.
-
-#### Statsd bucket -> InfluxDB Mapping
-
-By default, statsd buckets are converted to measurement names with the rules:
- "." -> "_"
- "-" -> "__"
-
-This plugin also accepts a list of config tables to describe a mapping of a statsd
-bucket to an InfluxDB measurement name and tags.
-
-Each mapping must specify a match glob pattern. It can optionally take a name
-for the measurement and a map of bucket indices to tag names.
-
-For example, the following configuration:
-
-```
-    [[statsd.mappings]]
-    match = "users.current.*.*"
-    name = "current_users"
-    [statsd.mappings.tagmap]
-    unit = 0
-    server = 2
-    service = 3
-
-    [[statsd.mappings]]
-    match = "deploys.*.*"
-    name = "service_deploys"
-    [statsd.mappings.tagmap]
-    service_type = 1
-    service_name = 2
-```
-
-Will map statsd -> influx like so:
-```
-users.current.den001.myapp:32|g
-=> [server="den001" service="myapp" unit="users"] statsd_current_users_gauge value=32
-
-deploys.test.myservice:1|c
-=> [service_name="myservice" service_type="test"] statsd_service_deploys_counter value=1
-
-random.jumping-sheep:10|c
-=> [] statsd_random_jumping__sheep_counter value=10
-```
-
 #### Description

 The statsd plugin is a special type of plugin which runs a backgrounded statsd
@@ -70,10 +16,129 @@ implementation. In short, the telegraf statsd listener will accept:
 - Counters
    - `deploys.test.myservice:1|c` <- increments by 1
    - `deploys.test.myservice:101|c` <- increments by 101
-    - `deploys.test.myservice:1|c|@0.1` <- sample rate, increments by 10
+    - `deploys.test.myservice:1|c|@0.1` <- with sample rate, increments by 10
 - Sets
    - `users.unique:101|s`
    - `users.unique:101|s`
    - `users.unique:102|s` <- would result in a count of 2 for `users.unique`
- Timings
-    - TODO
+- Timings & Histograms
+    - `load.time:320|ms`
+    - `load.time.nanoseconds:1|h`
+    - `load.time:200|ms|@0.1` <- sampled 1/10 of the time
+
+#### Influx Statsd
+
+In order to take advantage of InfluxDB's tagging system, we have made a couple
+additions to the standard statsd protocol. First, you can specify
+tags in a manner similar to the line-protocol, like this:
+
+```
+users.current,service=payroll,region=us-west:32|g
+```
+
+COMING SOON: there will be a way to specify multiple fields.
+<!-- TODO Second, you can specify multiple fields within a measurement:
+
+```
+current.users,service=payroll,server=host01:west=10,east=10,central=2,south=10|g
+``` -->
+
+#### Measurements:
+
+Meta:
+- tags: `metric_type=<gauge|set|counter|timing|histogram>`
+
+Outputted measurements will depend entirely on the measurements that the user
+sends, but here is a brief rundown of what you can expect to find from each
+metric type:
+
+- Gauges
+    - Gauges are a constant data type. They are not subject to averaging, and they
+    don’t change unless you change them. That is, once you set a gauge value, it
+    will be a flat line on the graph until you change it again.
+- Counters
+    - Counters are the most basic type. They are treated as a count of a type of
+    event. They will continually increase unless you set `delete_counters=true`.
+- Sets
+    - Sets count the number of unique values passed to a key. For example, you
+    could count the number of users accessing your system using `users:<user_id>|s`.
+    No matter how many times the same user_id is sent, the count will only increase
+    by 1.
+- Timings & Histograms
+    - Timers are meant to track how long something took. They are an invaluable
+    tool for tracking application performance.
+    - The following aggregate measurements are made for timers:
+        - `statsd_<name>_lower`: The lower bound is the lowest value statsd saw
+        for that stat during that interval.
+        - `statsd_<name>_upper`: The upper bound is the highest value statsd saw
+        for that stat during that interval.
+        - `statsd_<name>_mean`: The mean is the average of all values statsd saw
+        for that stat during that interval.
+        - `statsd_<name>_stddev`: The stddev is the sample standard deviation
+        of all values statsd saw for that stat during that interval.
+        - `statsd_<name>_count`: The count is the number of timings statsd saw
+        for that stat during that interval. It is not averaged.
+        - `statsd_<name>_percentile_<P>` The `Pth` percentile is a value x such
+        that `P%` of all the values statsd saw for that stat during that time
+        period are below x. The most common value that people use for `P` is the
+        `90`, this is a great number to try to optimize.
+
+#### Plugin arguments
+
+- **service_address** string: Address to listen for statsd UDP packets on
+- **delete_gauges** boolean: Delete gauges on every collection interval
+- **delete_counters** boolean: Delete counters on every collection interval
+- **delete_sets** boolean: Delete set counters on every collection interval
+- **delete_timings** boolean: Delete timings on every collection interval
+- **percentiles** []int: Percentiles to calculate for timing & histogram stats
+- **allowed_pending_messages** integer: Number of messages allowed to queue up
+waiting to be processed. When this fills, messages will be dropped and logged.
+- **percentile_limit** integer: Number of timing/histogram values to track
+per-measurement in the calculation of percentiles. Raising this limit increases
+the accuracy of percentiles but also increases the memory usage and cpu time.
+- **templates** []string: Templates for transforming statsd buckets into influx
+measurements and tags.
+
+#### Statsd bucket -> InfluxDB line-protocol Templates
+
+The plugin supports specifying templates for transforming statsd buckets into
+InfluxDB measurement names and tags. The templates have a _measurement_ keyword,
+which can be used to specify parts of the bucket that are to be used in the
+measurement name. Other words in the template are used as tag names. For example,
+the following template:
+
+```
+templates = [
+    "measurement.measurement.region"
+]
+```
+
+would result in the following transformation:
+
+```
+cpu.load.us-west:100|g
+=> cpu_load,region=us-west 100
+```
+
+Users can also filter the template to use based on the name of the bucket,
+using glob matching, like so:
+
+```
+templates = [
+    "cpu.* measurement.measurement.region",
+    "mem.* measurement.measurement.host"
+]
+```
+
+which would result in the following transformation:
+
+```
+cpu.load.us-west:100|g
+=> cpu_load,region=us-west 100
+
+mem.cached.localhost:256|g
+=> mem_cached,host=localhost 256
+```
+
+There are many more options available,
+[More details can be found here](https://github.com/influxdb/influxdb/tree/master/services/graphite#templates)
--- a/plugins/statsd/running_stats.go
+++ b/plugins/statsd/running_stats.go
@@ -0,0 +1,108 @@
+package statsd
+
+import (
+	"math"
+	"math/rand"
+	"sort"
+)
+
+const defaultPercentileLimit = 1000
+
+// RunningStats calculates a running mean, variance, standard deviation,
+// lower bound, upper bound, count, and can calculate estimated percentiles.
+// It is based on the incremental algorithm described here:
+//    https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+type RunningStats struct {
+	k   float64
+	n   int64
+	ex  float64
+	ex2 float64
+
+	// Array used to calculate estimated percentiles
+	// We will store a maximum of PercLimit values, at which point we will start
+	// randomly replacing old values, hence it is an estimated percentile.
+	perc      []float64
+	PercLimit int
+
+	upper float64
+	lower float64
+
+	// cache if we have sorted the list so that we never re-sort a sorted list,
+	// which can have very bad performance.
+	sorted bool
+}
+
+func (rs *RunningStats) AddValue(v float64) {
+	// Whenever a value is added, the list is no longer sorted.
+	rs.sorted = false
+
+	if rs.n == 0 {
+		rs.k = v
+		rs.upper = v
+		rs.lower = v
+		if rs.PercLimit == 0 {
+			rs.PercLimit = defaultPercentileLimit
+		}
+		rs.perc = make([]float64, 0, rs.PercLimit)
+	}
+
+	// These are used for the running mean and variance
+	rs.n += 1
+	rs.ex += v - rs.k
+	rs.ex2 += (v - rs.k) * (v - rs.k)
+
+	// track upper and lower bounds
+	if v > rs.upper {
+		rs.upper = v
+	} else if v < rs.lower {
+		rs.lower = v
+	}
+
+	if len(rs.perc) < rs.PercLimit {
+		rs.perc = append(rs.perc, v)
+	} else {
+		// Reached limit, choose random index to overwrite in the percentile array
+		rs.perc[rand.Intn(len(rs.perc))] = v
+	}
+}
+
+func (rs *RunningStats) Mean() float64 {
+	return rs.k + rs.ex/float64(rs.n)
+}
+
+func (rs *RunningStats) Variance() float64 {
+	return (rs.ex2 - (rs.ex*rs.ex)/float64(rs.n)) / float64(rs.n)
+}
+
+func (rs *RunningStats) Stddev() float64 {
+	return math.Sqrt(rs.Variance())
+}
+
+func (rs *RunningStats) Upper() float64 {
+	return rs.upper
+}
+
+func (rs *RunningStats) Lower() float64 {
+	return rs.lower
+}
+
+func (rs *RunningStats) Count() int64 {
+	return rs.n
+}
+
+func (rs *RunningStats) Percentile(n int) float64 {
+	if n > 100 {
+		n = 100
+	}
+
+	if !rs.sorted {
+		sort.Float64s(rs.perc)
+		rs.sorted = true
+	}
+
+	i := int(float64(len(rs.perc)) * float64(n) / float64(100))
+	if i < 0 {
+		i = 0
+	}
+	return rs.perc[i]
+}
--- a/plugins/statsd/running_stats_test.go
+++ b/plugins/statsd/running_stats_test.go
@@ -0,0 +1,136 @@
+package statsd
+
+import (
+	"math"
+	"testing"
+)
+
+// Test that a single metric is handled correctly
+func TestRunningStats_Single(t *testing.T) {
+	rs := RunningStats{}
+	values := []float64{10.1}
+
+	for _, v := range values {
+		rs.AddValue(v)
+	}
+
+	if rs.Mean() != 10.1 {
+		t.Errorf("Expected %v, got %v", 10.1, rs.Mean())
+	}
+	if rs.Upper() != 10.1 {
+		t.Errorf("Expected %v, got %v", 10.1, rs.Upper())
+	}
+	if rs.Lower() != 10.1 {
+		t.Errorf("Expected %v, got %v", 10.1, rs.Lower())
+	}
+	if rs.Percentile(90) != 10.1 {
+		t.Errorf("Expected %v, got %v", 10.1, rs.Percentile(90))
+	}
+	if rs.Percentile(50) != 10.1 {
+		t.Errorf("Expected %v, got %v", 10.1, rs.Percentile(50))
+	}
+	if rs.Count() != 1 {
+		t.Errorf("Expected %v, got %v", 1, rs.Count())
+	}
+	if rs.Variance() != 0 {
+		t.Errorf("Expected %v, got %v", 0, rs.Variance())
+	}
+	if rs.Stddev() != 0 {
+		t.Errorf("Expected %v, got %v", 0, rs.Stddev())
+	}
+}
+
+// Test that duplicate values are handled correctly
+func TestRunningStats_Duplicate(t *testing.T) {
+	rs := RunningStats{}
+	values := []float64{10.1, 10.1, 10.1, 10.1}
+
+	for _, v := range values {
+		rs.AddValue(v)
+	}
+
+	if rs.Mean() != 10.1 {
+		t.Errorf("Expected %v, got %v", 10.1, rs.Mean())
+	}
+	if rs.Upper() != 10.1 {
+		t.Errorf("Expected %v, got %v", 10.1, rs.Upper())
+	}
+	if rs.Lower() != 10.1 {
+		t.Errorf("Expected %v, got %v", 10.1, rs.Lower())
+	}
+	if rs.Percentile(90) != 10.1 {
+		t.Errorf("Expected %v, got %v", 10.1, rs.Percentile(90))
+	}
+	if rs.Percentile(50) != 10.1 {
+		t.Errorf("Expected %v, got %v", 10.1, rs.Percentile(50))
+	}
+	if rs.Count() != 4 {
+		t.Errorf("Expected %v, got %v", 4, rs.Count())
+	}
+	if rs.Variance() != 0 {
+		t.Errorf("Expected %v, got %v", 0, rs.Variance())
+	}
+	if rs.Stddev() != 0 {
+		t.Errorf("Expected %v, got %v", 0, rs.Stddev())
+	}
+}
+
+// Test a list of sample values, returns all correct values
+func TestRunningStats(t *testing.T) {
+	rs := RunningStats{}
+	values := []float64{10, 20, 10, 30, 20, 11, 12, 32, 45, 9, 5, 5, 5, 10, 23, 8}
+
+	for _, v := range values {
+		rs.AddValue(v)
+	}
+
+	if rs.Mean() != 15.9375 {
+		t.Errorf("Expected %v, got %v", 15.9375, rs.Mean())
+	}
+	if rs.Upper() != 45 {
+		t.Errorf("Expected %v, got %v", 45, rs.Upper())
+	}
+	if rs.Lower() != 5 {
+		t.Errorf("Expected %v, got %v", 5, rs.Lower())
+	}
+	if rs.Percentile(90) != 32 {
+		t.Errorf("Expected %v, got %v", 32, rs.Percentile(90))
+	}
+	if rs.Percentile(50) != 11 {
+		t.Errorf("Expected %v, got %v", 11, rs.Percentile(50))
+	}
+	if rs.Count() != 16 {
+		t.Errorf("Expected %v, got %v", 4, rs.Count())
+	}
+	if !fuzzyEqual(rs.Variance(), 124.93359, .00001) {
+		t.Errorf("Expected %v, got %v", 124.93359, rs.Variance())
+	}
+	if !fuzzyEqual(rs.Stddev(), 11.17736, .00001) {
+		t.Errorf("Expected %v, got %v", 11.17736, rs.Stddev())
+	}
+}
+
+// Test that the percentile limit is respected.
+func TestRunningStats_PercentileLimit(t *testing.T) {
+	rs := RunningStats{}
+	rs.PercLimit = 10
+	values := []float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
+
+	for _, v := range values {
+		rs.AddValue(v)
+	}
+
+	if rs.Count() != 11 {
+		t.Errorf("Expected %v, got %v", 11, rs.Count())
+	}
+	if len(rs.perc) != 10 {
+		t.Errorf("Expected %v, got %v", 10, len(rs.perc))
+	}
+}
+
+func fuzzyEqual(a, b, epsilon float64) bool {
+	if math.Abs(a-b) > epsilon {
+		return false
+	}
+	return true
+}
--- a/plugins/statsd/statsd.go
+++ b/plugins/statsd/statsd.go
@@ -26,21 +26,27 @@ type Statsd struct {
 	// fills up, packets will get dropped until the next Gather interval is ran.
 	AllowedPendingMessages int

+	// Percentiles specifies the percentiles that will be calculated for timing
+	// and histogram stats.
+	Percentiles     []int
+	PercentileLimit int
+
 	DeleteGauges   bool
 	DeleteCounters bool
 	DeleteSets     bool
+	DeleteTimings  bool

 	sync.Mutex

 	// Channel for all incoming statsd messages
-	in        chan string
-	inmetrics chan metric
-	done      chan struct{}
+	in   chan string
+	done chan struct{}

 	// Cache gauges, counters & sets so they can be aggregated as they arrive
 	gauges   map[string]cachedgauge
 	counters map[string]cachedcounter
 	sets     map[string]cachedset
+	timings  map[string]cachedtimings

 	// bucket -> influx templates
 	Templates []string
@@ -52,10 +58,10 @@ func NewStatsd() *Statsd {
 	// Make data structures
 	s.done = make(chan struct{})
 	s.in = make(chan string, s.AllowedPendingMessages)
-	s.inmetrics = make(chan metric, s.AllowedPendingMessages)
 	s.gauges = make(map[string]cachedgauge)
 	s.counters = make(map[string]cachedcounter)
 	s.sets = make(map[string]cachedset)
+	s.timings = make(map[string]cachedtimings)

 	return &s
 }
@@ -91,10 +97,10 @@ type cachedcounter struct {
 	tags  map[string]string
 }

-type cachedtiming struct {
-	name    string
-	timings []float64
-	tags    map[string]string
+type cachedtimings struct {
+	name  string
+	stats RunningStats
+	tags  map[string]string
 }

 func (_ *Statsd) Description() string {
@@ -104,16 +110,29 @@ func (_ *Statsd) Description() string {
 const sampleConfig = `
    # Address and port to host UDP listener on
    service_address = ":8125"
-    # Delete gauges every interval
+    # Delete gauges every interval (default=false)
    delete_gauges = false
-    # Delete counters every interval
+    # Delete counters every interval (default=false)
    delete_counters = false
-    # Delete sets every interval
+    # Delete sets every interval (default=false)
    delete_sets = false
+    # Delete timings & histograms every interval (default=true)
+    delete_timings = true
+    # Percentiles to calculate for timing & histogram stats
+    percentiles = [90]

-    # Number of messages allowed to queue up, once filled,
+    # templates = [
+    #     "cpu.* measurement*"
+    # ]
+
+    # Number of UDP messages allowed to queue up, once filled,
    # the statsd server will start dropping packets
    allowed_pending_messages = 10000
+
+    # Number of timing/histogram values to track per-measurement in the
+    # calculation of percentiles. Raising this limit increases the accuracy
+    # of percentiles but also increases the memory usage and cpu time.
+    percentile_limit = 1000
 `

 func (_ *Statsd) SampleConfig() string {
@@ -124,35 +143,37 @@ func (s *Statsd) Gather(acc plugins.Accumulator) error {
 	s.Lock()
 	defer s.Unlock()

-	items := len(s.inmetrics)
-	for i := 0; i < items; i++ {
-
-		m := <-s.inmetrics
-
-		switch m.mtype {
-		case "c", "g", "s":
-			log.Println("ERROR: Uh oh, this should not have happened")
-		case "ms", "h":
-			// TODO
+	for _, metric := range s.timings {
+		acc.Add(metric.name+"_mean", metric.stats.Mean(), metric.tags)
+		acc.Add(metric.name+"_stddev", metric.stats.Stddev(), metric.tags)
+		acc.Add(metric.name+"_upper", metric.stats.Upper(), metric.tags)
+		acc.Add(metric.name+"_lower", metric.stats.Lower(), metric.tags)
+		acc.Add(metric.name+"_count", metric.stats.Count(), metric.tags)
+		for _, percentile := range s.Percentiles {
+			name := fmt.Sprintf("%s_percentile_%v", metric.name, percentile)
+			acc.Add(name, metric.stats.Percentile(percentile), metric.tags)
 		}
 	}
+	if s.DeleteTimings {
+		s.timings = make(map[string]cachedtimings)
+	}

-	for _, cmetric := range s.gauges {
-		acc.Add(cmetric.name, cmetric.value, cmetric.tags)
+	for _, metric := range s.gauges {
+		acc.Add(metric.name, metric.value, metric.tags)
 	}
 	if s.DeleteGauges {
 		s.gauges = make(map[string]cachedgauge)
 	}

-	for _, cmetric := range s.counters {
-		acc.Add(cmetric.name, cmetric.value, cmetric.tags)
+	for _, metric := range s.counters {
+		acc.Add(metric.name, metric.value, metric.tags)
 	}
 	if s.DeleteCounters {
 		s.counters = make(map[string]cachedcounter)
 	}

-	for _, cmetric := range s.sets {
-		acc.Add(cmetric.name, int64(len(cmetric.set)), cmetric.tags)
+	for _, metric := range s.sets {
+		acc.Add(metric.name, int64(len(metric.set)), metric.tags)
 	}
 	if s.DeleteSets {
 		s.sets = make(map[string]cachedset)
@@ -167,10 +188,10 @@ func (s *Statsd) Start() error {
 	// Make data structures
 	s.done = make(chan struct{})
 	s.in = make(chan string, s.AllowedPendingMessages)
-	s.inmetrics = make(chan metric, s.AllowedPendingMessages)
 	s.gauges = make(map[string]cachedgauge)
 	s.counters = make(map[string]cachedcounter)
 	s.sets = make(map[string]cachedset)
+	s.timings = make(map[string]cachedtimings)

 	// Start the UDP listener
 	go s.udpListen()
@@ -216,8 +237,7 @@ func (s *Statsd) udpListen() error {
 }

 // parser monitors the s.in channel, if there is a line ready, it parses the
-// statsd string into a usable metric struct and either aggregates the value
-// or pushes it into the s.inmetrics channel.
+// statsd string into a usable metric struct and aggregates the value
 func (s *Statsd) parser() error {
 	for {
 		select {
@@ -235,14 +255,15 @@ func (s *Statsd) parseStatsdLine(line string) error {
 	s.Lock()
 	defer s.Unlock()

-	// Validate splitting the line on "|"
 	m := metric{}
-	parts1 := strings.Split(line, "|")
-	if len(parts1) < 2 {
+
+	// Validate splitting the line on "|"
+	pipesplit := strings.Split(line, "|")
+	if len(pipesplit) < 2 {
 		log.Printf("Error: splitting '|', Unable to parse metric: %s\n", line)
 		return errors.New("Error Parsing statsd line")
-	} else if len(parts1) > 2 {
-		sr := parts1[2]
+	} else if len(pipesplit) > 2 {
+		sr := pipesplit[2]
 		errmsg := "Error: parsing sample rate, %s, it must be in format like: " +
 			"@0.1, @0.5, etc. Ignoring sample rate for line: %s\n"
 		if strings.Contains(sr, "@") && len(sr) > 1 {
@@ -250,6 +271,7 @@ func (s *Statsd) parseStatsdLine(line string) error {
 			if err != nil {
 				log.Printf(errmsg, err.Error(), line)
 			} else {
+				// sample rate successfully parsed
 				m.samplerate = samplerate
 			}
 		} else {
@@ -258,24 +280,24 @@ func (s *Statsd) parseStatsdLine(line string) error {
 	}

 	// Validate metric type
-	switch parts1[1] {
+	switch pipesplit[1] {
 	case "g", "c", "s", "ms", "h":
-		m.mtype = parts1[1]
+		m.mtype = pipesplit[1]
 	default:
-		log.Printf("Error: Statsd Metric type %s unsupported", parts1[1])
+		log.Printf("Error: Statsd Metric type %s unsupported", pipesplit[1])
 		return errors.New("Error Parsing statsd line")
 	}

 	// Validate splitting the rest of the line on ":"
-	parts2 := strings.Split(parts1[0], ":")
-	if len(parts2) != 2 {
+	colonsplit := strings.Split(pipesplit[0], ":")
+	if len(colonsplit) != 2 {
 		log.Printf("Error: splitting ':', Unable to parse metric: %s\n", line)
 		return errors.New("Error Parsing statsd line")
 	}
-	m.bucket = parts2[0]
+	m.bucket = colonsplit[0]

 	// Parse the value
-	if strings.ContainsAny(parts2[1], "-+") {
+	if strings.ContainsAny(colonsplit[1], "-+") {
 		if m.mtype != "g" {
 			log.Printf("Error: +- values are only supported for gauges: %s\n", line)
 			return errors.New("Error Parsing statsd line")
@@ -285,14 +307,14 @@ func (s *Statsd) parseStatsdLine(line string) error {

 	switch m.mtype {
 	case "g", "ms", "h":
-		v, err := strconv.ParseFloat(parts2[1], 64)
+		v, err := strconv.ParseFloat(colonsplit[1], 64)
 		if err != nil {
 			log.Printf("Error: parsing value to float64: %s\n", line)
 			return errors.New("Error Parsing statsd line")
 		}
 		m.floatvalue = v
 	case "c", "s":
-		v, err := strconv.ParseInt(parts2[1], 10, 64)
+		v, err := strconv.ParseInt(colonsplit[1], 10, 64)
 		if err != nil {
 			log.Printf("Error: parsing value to int64: %s\n", line)
 			return errors.New("Error Parsing statsd line")
@@ -304,8 +326,20 @@ func (s *Statsd) parseStatsdLine(line string) error {
 		m.intvalue = v
 	}

-	// Parse the name
-	m.name, m.tags = s.parseName(m)
+	// Parse the name & tags from bucket
+	m.name, m.tags = s.parseName(m.bucket)
+	switch m.mtype {
+	case "c":
+		m.tags["metric_type"] = "counter"
+	case "g":
+		m.tags["metric_type"] = "gauge"
+	case "s":
+		m.tags["metric_type"] = "set"
+	case "ms":
+		m.tags["metric_type"] = "timing"
+	case "h":
+		m.tags["metric_type"] = "histogram"
+	}

 	// Make a unique key for the measurement name/tags
 	var tg []string
@@ -315,18 +349,7 @@ func (s *Statsd) parseStatsdLine(line string) error {
 	sort.Strings(tg)
 	m.hash = fmt.Sprintf("%s%s", strings.Join(tg, ""), m.name)

-	switch m.mtype {
-	// Aggregate gauges, counters and sets as we go
-	case "g", "c", "s":
-		s.aggregate(m)
-	// Timers get processed at flush time
-	default:
-		select {
-		case s.inmetrics <- m:
-		default:
-			log.Printf(dropwarn, line)
-		}
-	}
+	s.aggregate(m)
 	return nil
 }

@@ -334,42 +357,79 @@ func (s *Statsd) parseStatsdLine(line string) error {
 // config file. If there is a match, it will parse the name of the metric and
 // map of tags.
 // Return values are (<name>, <tags>)
-func (s *Statsd) parseName(m metric) (string, map[string]string) {
-	name := m.bucket
+func (s *Statsd) parseName(bucket string) (string, map[string]string) {
 	tags := make(map[string]string)

-	o := graphite.Options{
-		Separator: "_",
-		Templates: s.Templates,
+	bucketparts := strings.Split(bucket, ",")
+	// Parse out any tags in the bucket
+	if len(bucketparts) > 1 {
+		for _, btag := range bucketparts[1:] {
+			k, v := parseKeyValue(btag)
+			if k != "" {
+				tags[k] = v
+			}
+		}
 	}

+	o := graphite.Options{
+		Separator:   "_",
+		Templates:   s.Templates,
+		DefaultTags: tags,
+	}
+
+	name := bucketparts[0]
 	p, err := graphite.NewParserWithOptions(o)
 	if err == nil {
-		name, tags = p.ApplyTemplate(m.bucket)
+		name, tags = p.ApplyTemplate(name)
 	}
 	name = strings.Replace(name, ".", "_", -1)
 	name = strings.Replace(name, "-", "__", -1)

-	switch m.mtype {
-	case "c":
-		tags["metric_type"] = "counter"
-	case "g":
-		tags["metric_type"] = "gauge"
-	case "s":
-		tags["metric_type"] = "set"
-	case "ms", "h":
-		tags["metric_type"] = "timer"
-	}
-
 	return name, tags
 }

-// aggregate takes in a metric of type "counter", "gauge", or "set". It then
-// aggregates and caches the current value. It does not deal with the
-// DeleteCounters, DeleteGauges or DeleteSets options, because those are dealt
-// with in the Gather function.
+// Parse the key,value out of a string that looks like "key=value"
+func parseKeyValue(keyvalue string) (string, string) {
+	var key, val string
+
+	split := strings.Split(keyvalue, "=")
+	// Must be exactly 2 to get anything meaningful out of them
+	if len(split) == 2 {
+		key = split[0]
+		val = split[1]
+	} else if len(split) == 1 {
+		val = split[0]
+	}
+
+	return key, val
+}
+
+// aggregate takes in a metric. It then
+// aggregates and caches the current value(s). It does not deal with the
+// Delete* options, because those are dealt with in the Gather function.
 func (s *Statsd) aggregate(m metric) {
 	switch m.mtype {
+	case "ms", "h":
+		cached, ok := s.timings[m.hash]
+		if !ok {
+			cached = cachedtimings{
+				name: m.name,
+				tags: m.tags,
+				stats: RunningStats{
+					PercLimit: s.PercentileLimit,
+				},
+			}
+		}
+
+		if m.samplerate > 0 {
+			for i := 0; i < int(1.0/m.samplerate); i++ {
+				cached.stats.AddValue(m.floatvalue)
+			}
+			s.timings[m.hash] = cached
+		} else {
+			cached.stats.AddValue(m.floatvalue)
+			s.timings[m.hash] = cached
+		}
 	case "c":
 		cached, ok := s.counters[m.hash]
 		if !ok {
@@ -380,7 +440,6 @@ func (s *Statsd) aggregate(m metric) {
 			}
 		} else {
 			cached.value += m.intvalue
-			cached.tags = m.tags
 			s.counters[m.hash] = cached
 		}
 	case "g":
@@ -397,7 +456,6 @@ func (s *Statsd) aggregate(m metric) {
 			} else {
 				cached.value = m.floatvalue
 			}
-			cached.tags = m.tags
 			s.gauges[m.hash] = cached
 		}
 	case "s":
@@ -422,7 +480,6 @@ func (s *Statsd) Stop() {
 	log.Println("Stopping the statsd service")
 	close(s.done)
 	close(s.in)
-	close(s.inmetrics)
 }

 func init() {
--- a/plugins/statsd/statsd_test.go
+++ b/plugins/statsd/statsd_test.go
@@ -121,25 +121,208 @@ func TestParse_DefaultNameParsing(t *testing.T) {
 	}
 }

-// Test that name mappings match and work
-func TestParse_NameMap(t *testing.T) {
+// Test that template name transformation works
+func TestParse_Template(t *testing.T) {
+	s := NewStatsd()
+	s.Templates = []string{
+		"measurement.measurement.host.service",
+	}
+
+	lines := []string{
+		"cpu.idle.localhost:1|c",
+		"cpu.busy.host01.myservice:11|c",
+	}
+
+	for _, line := range lines {
+		err := s.parseStatsdLine(line)
+		if err != nil {
+			t.Errorf("Parsing line %s should not have resulted in an error\n", line)
+		}
+	}
+
+	validations := []struct {
+		name  string
+		value int64
+	}{
+		{
+			"cpu_idle",
+			1,
+		},
+		{
+			"cpu_busy",
+			11,
+		},
+	}
+
+	// Validate counters
+	for _, test := range validations {
+		err := test_validate_counter(test.name, test.value, s.counters)
+		if err != nil {
+			t.Error(err.Error())
+		}
+	}
+}
+
+// Test that template filters properly
+func TestParse_TemplateFilter(t *testing.T) {
+	s := NewStatsd()
+	s.Templates = []string{
+		"cpu.idle.* measurement.measurement.host",
+	}
+
+	lines := []string{
+		"cpu.idle.localhost:1|c",
+		"cpu.busy.host01.myservice:11|c",
+	}
+
+	for _, line := range lines {
+		err := s.parseStatsdLine(line)
+		if err != nil {
+			t.Errorf("Parsing line %s should not have resulted in an error\n", line)
+		}
+	}
+
+	validations := []struct {
+		name  string
+		value int64
+	}{
+		{
+			"cpu_idle",
+			1,
+		},
+		{
+			"cpu_busy_host01_myservice",
+			11,
+		},
+	}
+
+	// Validate counters
+	for _, test := range validations {
+		err := test_validate_counter(test.name, test.value, s.counters)
+		if err != nil {
+			t.Error(err.Error())
+		}
+	}
+}
+
+// Test that most specific template is chosen
+func TestParse_TemplateSpecificity(t *testing.T) {
+	s := NewStatsd()
+	s.Templates = []string{
+		"cpu.* measurement.foo.host",
+		"cpu.idle.* measurement.measurement.host",
+	}
+
+	lines := []string{
+		"cpu.idle.localhost:1|c",
+	}
+
+	for _, line := range lines {
+		err := s.parseStatsdLine(line)
+		if err != nil {
+			t.Errorf("Parsing line %s should not have resulted in an error\n", line)
+		}
+	}
+
+	validations := []struct {
+		name  string
+		value int64
+	}{
+		{
+			"cpu_idle",
+			1,
+		},
+	}
+
+	// Validate counters
+	for _, test := range validations {
+		err := test_validate_counter(test.name, test.value, s.counters)
+		if err != nil {
+			t.Error(err.Error())
+		}
+	}
+}
+
+// Test that fields are parsed correctly
+func TestParse_Fields(t *testing.T) {
 	if false {
 		t.Errorf("TODO")
 	}
 }

-// Test that name map tags are applied properly
-func TestParse_NameMapTags(t *testing.T) {
-	if false {
-		t.Errorf("TODO")
+// Test that tags within the bucket are parsed correctly
+func TestParse_Tags(t *testing.T) {
+	s := NewStatsd()
+
+	tests := []struct {
+		bucket string
+		name   string
+		tags   map[string]string
+	}{
+		{
+			"cpu.idle,host=localhost",
+			"cpu_idle",
+			map[string]string{
+				"host": "localhost",
+			},
+		},
+		{
+			"cpu.idle,host=localhost,region=west",
+			"cpu_idle",
+			map[string]string{
+				"host":   "localhost",
+				"region": "west",
+			},
+		},
+		{
+			"cpu.idle,host=localhost,color=red,region=west",
+			"cpu_idle",
+			map[string]string{
+				"host":   "localhost",
+				"region": "west",
+				"color":  "red",
+			},
+		},
+	}
+
+	for _, test := range tests {
+		name, tags := s.parseName(test.bucket)
+		if name != test.name {
+			t.Errorf("Expected: %s, got %s", test.name, name)
+		}
+
+		for k, v := range test.tags {
+			actual, ok := tags[k]
+			if !ok {
+				t.Errorf("Expected key: %s not found", k)
+			}
+			if actual != v {
+				t.Errorf("Expected %s, got %s", v, actual)
+			}
+		}
 	}
 }

 // Test that measurements with the same name, but different tags, are treated
-// as different values in the statsd cache
+// as different outputs
 func TestParse_MeasurementsWithSameName(t *testing.T) {
-	if false {
-		t.Errorf("TODO")
+	s := NewStatsd()
+
+	// Test that counters work
+	valid_lines := []string{
+		"test.counter,host=localhost:1|c",
+		"test.counter,host=localhost,region=west:1|c",
+	}
+
+	for _, line := range valid_lines {
+		err := s.parseStatsdLine(line)
+		if err != nil {
+			t.Errorf("Parsing line %s should not have resulted in an error\n", line)
+		}
+	}
+
+	if len(s.counters) != 2 {
+		t.Errorf("Expected 2 separate measurements, found %d", len(s.counters))
 	}
 }

@@ -150,9 +333,8 @@ func TestParse_ValidLines(t *testing.T) {
 		"valid:45|c",
 		"valid:45|s",
 		"valid:45|g",
-		// TODO(cam): timings
-		//"valid.timer:45|ms",
-		//"valid.timer:45|h",
+		"valid.timer:45|ms",
+		"valid.timer:45|h",
 	}

 	for _, line := range valid_lines {
@@ -163,13 +345,6 @@ func TestParse_ValidLines(t *testing.T) {
 	}
 }

-// Test that floats are handled as expected for all metric types
-func TestParse_Floats(t *testing.T) {
-	if false {
-		t.Errorf("TODO")
-	}
-}
-
 // Tests low-level functionality of gauges
 func TestParse_Gauges(t *testing.T) {
 	s := NewStatsd()
@@ -340,8 +515,86 @@ func TestParse_Counters(t *testing.T) {

 // Tests low-level functionality of timings
 func TestParse_Timings(t *testing.T) {
-	if false {
-		t.Errorf("TODO")
+	s := NewStatsd()
+	s.Percentiles = []int{90}
+	testacc := &testutil.Accumulator{}
+
+	// Test that counters work
+	valid_lines := []string{
+		"test.timing:1|ms",
+		"test.timing:1|ms",
+		"test.timing:1|ms",
+		"test.timing:1|ms",
+		"test.timing:1|ms",
+	}
+
+	for _, line := range valid_lines {
+		err := s.parseStatsdLine(line)
+		if err != nil {
+			t.Errorf("Parsing line %s should not have resulted in an error\n", line)
+		}
+	}
+
+	s.Gather(testacc)
+
+	tests := []struct {
+		name  string
+		value interface{}
+	}{
+		{
+			"test_timing_mean",
+			float64(1),
+		},
+		{
+			"test_timing_stddev",
+			float64(0),
+		},
+		{
+			"test_timing_upper",
+			float64(1),
+		},
+		{
+			"test_timing_lower",
+			float64(1),
+		},
+		{
+			"test_timing_count",
+			int64(5),
+		},
+		{
+			"test_timing_percentile_90",
+			float64(1),
+		},
+	}
+
+	for _, test := range tests {
+		if !testacc.CheckValue(test.name, test.value) {
+			t.Errorf("Did not find measurement %s with value %v",
+				test.name, test.value)
+		}
+	}
+}
+
+func TestParse_Timings_Delete(t *testing.T) {
+	s := NewStatsd()
+	s.DeleteTimings = true
+	fakeacc := &testutil.Accumulator{}
+	var err error
+
+	line := "timing:100|ms"
+	err = s.parseStatsdLine(line)
+	if err != nil {
+		t.Errorf("Parsing line %s should not have resulted in an error\n", line)
+	}
+
+	if len(s.timings) != 1 {
+		t.Errorf("Should be 1 timing, found %d", len(s.timings))
+	}
+
+	s.Gather(fakeacc)
+
+	if len(s.timings) != 0 {
+		t.Errorf("All timings should have been deleted, found %d", len(s.timings))
 	}
 }

@@ -423,10 +676,21 @@ func TestParse_Counters_Delete(t *testing.T) {
 	}
 }

-// Integration test the listener starting up and receiving UDP packets
-func TestListen(t *testing.T) {
-	if false {
-		t.Errorf("TODO")
+func TestParseKeyValue(t *testing.T) {
+	k, v := parseKeyValue("foo=bar")
+	if k != "foo" {
+		t.Errorf("Expected %s, got %s", "foo", k)
+	}
+	if v != "bar" {
+		t.Errorf("Expected %s, got %s", "bar", v)
+	}
+
+	k2, v2 := parseKeyValue("baz")
+	if k2 != "" {
+		t.Errorf("Expected %s, got %s", "", k2)
+	}
+	if v2 != "baz" {
+		t.Errorf("Expected %s, got %s", "baz", v2)
 	}
 }