Fixed tags on mesos_task metrics.

Tagging values by executor_id can create quite a lot data series
in InfluxDB so we should stick to framework_id and server.
This commit is contained in:
Łukasz Harasimowicz 2016-08-30 17:44:12 +02:00 committed by Cameron Sparr
parent e19845c202
commit 80391bfe1f
3 changed files with 36 additions and 44 deletions

View File

@ -37,6 +37,9 @@ For more information, please check the [Mesos Observability Metrics](http://meso
# ] # ]
## Include mesos tasks statistics, default is false ## Include mesos tasks statistics, default is false
# slave_tasks = true # slave_tasks = true
## Should tags in slave task metrics be normalized? This will remove UUIDs from
## task_id tag so we don't generate milions of series in InfluxDB, default is false
# slave_tasks_normalize = true
``` ```
By default this plugin is not configured to gather metrics from mesos. Since a mesos cluster can be deployed in numerous ways it does not provide any default By default this plugin is not configured to gather metrics from mesos. Since a mesos cluster can be deployed in numerous ways it does not provide any default
@ -238,27 +241,23 @@ Mesos slave metric groups
Mesos tasks metric groups Mesos tasks metric groups
- executor_id - executor_id
- executor_name - cpus_limit
- framework_id - cpus_system_time_secs
- source - cpus_user_time_secs
- statistics - mem_anon_bytes
- cpus_limit - mem_cache_bytes
- cpus_system_time_secs - mem_critical_pressure_counter
- cpus_user_time_secs - mem_file_bytes
- mem_anon_bytes - mem_limit_bytes
- mem_cache_bytes - mem_low_pressure_counter
- mem_critical_pressure_counter - mem_mapped_file_bytes
- mem_file_bytes - mem_medium_pressure_counter
- mem_limit_bytes - mem_rss_bytes
- mem_low_pressure_counter - mem_swap_bytes
- mem_mapped_file_bytes - mem_total_bytes
- mem_medium_pressure_counter - mem_total_memsw_bytes
- mem_rss_bytes - mem_unevictable_bytes
- mem_swap_bytes - timestamp
- mem_total_bytes
- mem_total_memsw_bytes
- mem_unevictable_bytes
- timestamp
### Tags: ### Tags:
@ -271,14 +270,13 @@ Mesos tasks metric groups
- Tasks measurements have the following tags: - Tasks measurements have the following tags:
- server - server
- framework_id - framework_id
- task_id
### Example Output: ### Example Output:
``` ```
$ telegraf -config ~/mesos.conf -input-filter mesos -test $ telegraf -config ~/mesos.conf -input-filter mesos -test
* Plugin: mesos, Collection 1 * Plugin: mesos, Collection 1
mesos,role=master,state=leader,host=172.17.8.102,server=172.17.8.101 mesos,role=master,state=leader,host=172.17.8.102,server=172.17.8.101
allocator/event_queue_dispatches=0,master/cpus_percent=0, allocator/event_queue_dispatches=0,master/cpus_percent=0,
master/cpus_revocable_percent=0,master/cpus_revocable_total=0, master/cpus_revocable_percent=0,master/cpus_revocable_total=0,
master/cpus_revocable_used=0,master/cpus_total=2, master/cpus_revocable_used=0,master/cpus_total=2,
@ -299,13 +297,7 @@ master/messages_deactivate_framework=0 ...
Meoso tasks metrics (if enabled): Meoso tasks metrics (if enabled):
``` ```
mesos-tasks,host=172.17.8.102,server=172.17.8.101,framework_id=e3060235-c4ed-4765-9d36-784e3beca07f-0000,task_id=hello-world.e4b5b497-2ccd-11e6-a659-0242fb222ce2 > mesos_tasks,framework_id=20151016-120318-1243483658-5050-6139-0000,host=localhost,server=mesos-1
cpus_limit=0.2,cpus_system_time_secs=142.49,cpus_user_time_secs=388.14, cpus_limit=0.2,cpus_system_time_secs=84.04,cpus_user_time_secs=1161,executor_id="some_app.5d9f3cf8-6b19-11e6-8d24-0242f3fd597e",
mem_anon_bytes=359129088,mem_cache_bytes=3964928, mem_limit_bytes=348127232,mem_rss_bytes=310820864,timestamp=1472572204.22177 1472572204000000000...
mem_critical_pressure_counter=0,mem_file_bytes=3964928,
mem_limit_bytes=767557632,mem_low_pressure_counter=0,
mem_mapped_file_bytes=114688,mem_medium_pressure_counter=0,
mem_rss_bytes=359129088,mem_swap_bytes=0,mem_total_bytes=363094016,
mem_total_memsw_bytes=363094016,mem_unevictable_bytes=0,
timestamp=1465486052.70525 1465486053052811792...
``` ```

View File

@ -459,7 +459,6 @@ func (m *Mesos) gatherSlaveTaskMetrics(address string, defaultPort string, acc t
} }
for _, task := range metrics { for _, task := range metrics {
tags["task_id"] = task.ExecutorID
tags["framework_id"] = task.FrameworkID tags["framework_id"] = task.FrameworkID
jf := jsonparser.JSONFlattener{} jf := jsonparser.JSONFlattener{}
@ -468,7 +467,9 @@ func (m *Mesos) gatherSlaveTaskMetrics(address string, defaultPort string, acc t
if err != nil { if err != nil {
return err return err
} }
timestamp := time.Unix(int64(jf.Fields["timestamp"].(float64)), 0) timestamp := time.Unix(int64(jf.Fields["timestamp"].(float64)), 0)
jf.Fields["executor_id"] = task.ExecutorID
acc.AddFields("mesos_tasks", jf.Fields, tags, timestamp) acc.AddFields("mesos_tasks", jf.Fields, tags, timestamp)
} }

View File

@ -9,7 +9,6 @@ import (
"os" "os"
"testing" "testing"
jsonparser "github.com/influxdata/telegraf/plugins/parsers/json"
"github.com/influxdata/telegraf/testutil" "github.com/influxdata/telegraf/testutil"
) )
@ -217,10 +216,10 @@ func generateMetrics() {
} }
slaveTaskMetrics = map[string]interface{}{ slaveTaskMetrics = map[string]interface{}{
"executor_id": fmt.Sprintf("task_%s", randUUID()), "executor_id": fmt.Sprintf("task_name.%s", randUUID()),
"executor_name": "Some task description", "executor_name": "Some task description",
"framework_id": randUUID(), "framework_id": randUUID(),
"source": fmt.Sprintf("task_source_%s", randUUID()), "source": fmt.Sprintf("task_source.%s", randUUID()),
"statistics": map[string]interface{}{ "statistics": map[string]interface{}{
"cpus_limit": rand.Float64(), "cpus_limit": rand.Float64(),
"cpus_system_time_secs": rand.Float64(), "cpus_system_time_secs": rand.Float64(),
@ -338,17 +337,17 @@ func TestMesosSlave(t *testing.T) {
acc.AssertContainsFields(t, "mesos", slaveMetrics) acc.AssertContainsFields(t, "mesos", slaveMetrics)
jf := jsonparser.JSONFlattener{} expectedFields := make(map[string]interface{}, len(slaveTaskMetrics["statistics"].(map[string]interface{}))+1)
err = jf.FlattenJSON("", slaveTaskMetrics) for k, v := range slaveTaskMetrics["statistics"].(map[string]interface{}) {
expectedFields[k] = v
if err != nil {
t.Errorf(err.Error())
} }
expectedFields["executor_id"] = slaveTaskMetrics["executor_id"]
acc.AssertContainsFields( acc.AssertContainsTaggedFields(
t, t,
"mesos_tasks", "mesos_tasks",
slaveTaskMetrics["statistics"].(map[string]interface{})) expectedFields,
map[string]string{"server": "127.0.0.1", "framework_id": slaveTaskMetrics["framework_id"].(string)})
} }
func TestSlaveFilter(t *testing.T) { func TestSlaveFilter(t *testing.T) {