From 80391bfe1f7434d3eb3ebb1c29c48f1c1c05b223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Harasimowicz?= Date: Tue, 30 Aug 2016 17:44:12 +0200 Subject: [PATCH] Fixed tags on mesos_task metrics. Tagging values by executor_id can create quite a lot data series in InfluxDB so we should stick to framework_id and server. --- plugins/inputs/mesos/README.md | 58 +++++++++++++----------------- plugins/inputs/mesos/mesos.go | 3 +- plugins/inputs/mesos/mesos_test.go | 19 +++++----- 3 files changed, 36 insertions(+), 44 deletions(-) diff --git a/plugins/inputs/mesos/README.md b/plugins/inputs/mesos/README.md index 9151ff9a2..620d44649 100644 --- a/plugins/inputs/mesos/README.md +++ b/plugins/inputs/mesos/README.md @@ -37,6 +37,9 @@ For more information, please check the [Mesos Observability Metrics](http://meso # ] ## Include mesos tasks statistics, default is false # slave_tasks = true + ## Should tags in slave task metrics be normalized? This will remove UUIDs from + ## task_id tag so we don't generate milions of series in InfluxDB, default is false + # slave_tasks_normalize = true ``` By default this plugin is not configured to gather metrics from mesos. Since a mesos cluster can be deployed in numerous ways it does not provide any default @@ -238,27 +241,23 @@ Mesos slave metric groups Mesos tasks metric groups - executor_id -- executor_name -- framework_id -- source -- statistics - - cpus_limit - - cpus_system_time_secs - - cpus_user_time_secs - - mem_anon_bytes - - mem_cache_bytes - - mem_critical_pressure_counter - - mem_file_bytes - - mem_limit_bytes - - mem_low_pressure_counter - - mem_mapped_file_bytes - - mem_medium_pressure_counter - - mem_rss_bytes - - mem_swap_bytes - - mem_total_bytes - - mem_total_memsw_bytes - - mem_unevictable_bytes - - timestamp +- cpus_limit +- cpus_system_time_secs +- cpus_user_time_secs +- mem_anon_bytes +- mem_cache_bytes +- mem_critical_pressure_counter +- mem_file_bytes +- mem_limit_bytes +- mem_low_pressure_counter +- mem_mapped_file_bytes +- mem_medium_pressure_counter +- mem_rss_bytes +- mem_swap_bytes +- mem_total_bytes +- mem_total_memsw_bytes +- mem_unevictable_bytes +- timestamp ### Tags: @@ -271,14 +270,13 @@ Mesos tasks metric groups - Tasks measurements have the following tags: - server - - framework_id - - task_id + - framework_id ### Example Output: ``` $ telegraf -config ~/mesos.conf -input-filter mesos -test * Plugin: mesos, Collection 1 -mesos,role=master,state=leader,host=172.17.8.102,server=172.17.8.101 +mesos,role=master,state=leader,host=172.17.8.102,server=172.17.8.101 allocator/event_queue_dispatches=0,master/cpus_percent=0, master/cpus_revocable_percent=0,master/cpus_revocable_total=0, master/cpus_revocable_used=0,master/cpus_total=2, @@ -299,13 +297,7 @@ master/messages_deactivate_framework=0 ... Meoso tasks metrics (if enabled): ``` -mesos-tasks,host=172.17.8.102,server=172.17.8.101,framework_id=e3060235-c4ed-4765-9d36-784e3beca07f-0000,task_id=hello-world.e4b5b497-2ccd-11e6-a659-0242fb222ce2 -cpus_limit=0.2,cpus_system_time_secs=142.49,cpus_user_time_secs=388.14, -mem_anon_bytes=359129088,mem_cache_bytes=3964928, -mem_critical_pressure_counter=0,mem_file_bytes=3964928, -mem_limit_bytes=767557632,mem_low_pressure_counter=0, -mem_mapped_file_bytes=114688,mem_medium_pressure_counter=0, -mem_rss_bytes=359129088,mem_swap_bytes=0,mem_total_bytes=363094016, -mem_total_memsw_bytes=363094016,mem_unevictable_bytes=0, -timestamp=1465486052.70525 1465486053052811792... +> mesos_tasks,framework_id=20151016-120318-1243483658-5050-6139-0000,host=localhost,server=mesos-1 +cpus_limit=0.2,cpus_system_time_secs=84.04,cpus_user_time_secs=1161,executor_id="some_app.5d9f3cf8-6b19-11e6-8d24-0242f3fd597e", +mem_limit_bytes=348127232,mem_rss_bytes=310820864,timestamp=1472572204.22177 1472572204000000000... ``` diff --git a/plugins/inputs/mesos/mesos.go b/plugins/inputs/mesos/mesos.go index ffcd5969b..6258c72f3 100644 --- a/plugins/inputs/mesos/mesos.go +++ b/plugins/inputs/mesos/mesos.go @@ -459,7 +459,6 @@ func (m *Mesos) gatherSlaveTaskMetrics(address string, defaultPort string, acc t } for _, task := range metrics { - tags["task_id"] = task.ExecutorID tags["framework_id"] = task.FrameworkID jf := jsonparser.JSONFlattener{} @@ -468,7 +467,9 @@ func (m *Mesos) gatherSlaveTaskMetrics(address string, defaultPort string, acc t if err != nil { return err } + timestamp := time.Unix(int64(jf.Fields["timestamp"].(float64)), 0) + jf.Fields["executor_id"] = task.ExecutorID acc.AddFields("mesos_tasks", jf.Fields, tags, timestamp) } diff --git a/plugins/inputs/mesos/mesos_test.go b/plugins/inputs/mesos/mesos_test.go index 4ea6f6e16..5f7525c80 100644 --- a/plugins/inputs/mesos/mesos_test.go +++ b/plugins/inputs/mesos/mesos_test.go @@ -9,7 +9,6 @@ import ( "os" "testing" - jsonparser "github.com/influxdata/telegraf/plugins/parsers/json" "github.com/influxdata/telegraf/testutil" ) @@ -217,10 +216,10 @@ func generateMetrics() { } slaveTaskMetrics = map[string]interface{}{ - "executor_id": fmt.Sprintf("task_%s", randUUID()), + "executor_id": fmt.Sprintf("task_name.%s", randUUID()), "executor_name": "Some task description", "framework_id": randUUID(), - "source": fmt.Sprintf("task_source_%s", randUUID()), + "source": fmt.Sprintf("task_source.%s", randUUID()), "statistics": map[string]interface{}{ "cpus_limit": rand.Float64(), "cpus_system_time_secs": rand.Float64(), @@ -338,17 +337,17 @@ func TestMesosSlave(t *testing.T) { acc.AssertContainsFields(t, "mesos", slaveMetrics) - jf := jsonparser.JSONFlattener{} - err = jf.FlattenJSON("", slaveTaskMetrics) - - if err != nil { - t.Errorf(err.Error()) + expectedFields := make(map[string]interface{}, len(slaveTaskMetrics["statistics"].(map[string]interface{}))+1) + for k, v := range slaveTaskMetrics["statistics"].(map[string]interface{}) { + expectedFields[k] = v } + expectedFields["executor_id"] = slaveTaskMetrics["executor_id"] - acc.AssertContainsFields( + acc.AssertContainsTaggedFields( t, "mesos_tasks", - slaveTaskMetrics["statistics"].(map[string]interface{})) + expectedFields, + map[string]string{"server": "127.0.0.1", "framework_id": slaveTaskMetrics["framework_id"].(string)}) } func TestSlaveFilter(t *testing.T) {