Fixed tags on mesos_task metrics.

Tagging values by executor_id can create quite a lot data series
in InfluxDB so we should stick to framework_id and server.
This commit is contained in:
Łukasz Harasimowicz 2016-08-30 17:44:12 +02:00 committed by Cameron Sparr
parent e19845c202
commit 80391bfe1f
3 changed files with 36 additions and 44 deletions

View File

@ -37,6 +37,9 @@ For more information, please check the [Mesos Observability Metrics](http://meso
# ]
## Include mesos tasks statistics, default is false
# slave_tasks = true
## Should tags in slave task metrics be normalized? This will remove UUIDs from
## task_id tag so we don't generate milions of series in InfluxDB, default is false
# slave_tasks_normalize = true
```
By default this plugin is not configured to gather metrics from mesos. Since a mesos cluster can be deployed in numerous ways it does not provide any default
@ -238,27 +241,23 @@ Mesos slave metric groups
Mesos tasks metric groups
- executor_id
- executor_name
- framework_id
- source
- statistics
- cpus_limit
- cpus_system_time_secs
- cpus_user_time_secs
- mem_anon_bytes
- mem_cache_bytes
- mem_critical_pressure_counter
- mem_file_bytes
- mem_limit_bytes
- mem_low_pressure_counter
- mem_mapped_file_bytes
- mem_medium_pressure_counter
- mem_rss_bytes
- mem_swap_bytes
- mem_total_bytes
- mem_total_memsw_bytes
- mem_unevictable_bytes
- timestamp
- cpus_limit
- cpus_system_time_secs
- cpus_user_time_secs
- mem_anon_bytes
- mem_cache_bytes
- mem_critical_pressure_counter
- mem_file_bytes
- mem_limit_bytes
- mem_low_pressure_counter
- mem_mapped_file_bytes
- mem_medium_pressure_counter
- mem_rss_bytes
- mem_swap_bytes
- mem_total_bytes
- mem_total_memsw_bytes
- mem_unevictable_bytes
- timestamp
### Tags:
@ -271,14 +270,13 @@ Mesos tasks metric groups
- Tasks measurements have the following tags:
- server
- framework_id
- task_id
- framework_id
### Example Output:
```
$ telegraf -config ~/mesos.conf -input-filter mesos -test
* Plugin: mesos, Collection 1
mesos,role=master,state=leader,host=172.17.8.102,server=172.17.8.101
mesos,role=master,state=leader,host=172.17.8.102,server=172.17.8.101
allocator/event_queue_dispatches=0,master/cpus_percent=0,
master/cpus_revocable_percent=0,master/cpus_revocable_total=0,
master/cpus_revocable_used=0,master/cpus_total=2,
@ -299,13 +297,7 @@ master/messages_deactivate_framework=0 ...
Meoso tasks metrics (if enabled):
```
mesos-tasks,host=172.17.8.102,server=172.17.8.101,framework_id=e3060235-c4ed-4765-9d36-784e3beca07f-0000,task_id=hello-world.e4b5b497-2ccd-11e6-a659-0242fb222ce2
cpus_limit=0.2,cpus_system_time_secs=142.49,cpus_user_time_secs=388.14,
mem_anon_bytes=359129088,mem_cache_bytes=3964928,
mem_critical_pressure_counter=0,mem_file_bytes=3964928,
mem_limit_bytes=767557632,mem_low_pressure_counter=0,
mem_mapped_file_bytes=114688,mem_medium_pressure_counter=0,
mem_rss_bytes=359129088,mem_swap_bytes=0,mem_total_bytes=363094016,
mem_total_memsw_bytes=363094016,mem_unevictable_bytes=0,
timestamp=1465486052.70525 1465486053052811792...
> mesos_tasks,framework_id=20151016-120318-1243483658-5050-6139-0000,host=localhost,server=mesos-1
cpus_limit=0.2,cpus_system_time_secs=84.04,cpus_user_time_secs=1161,executor_id="some_app.5d9f3cf8-6b19-11e6-8d24-0242f3fd597e",
mem_limit_bytes=348127232,mem_rss_bytes=310820864,timestamp=1472572204.22177 1472572204000000000...
```

View File

@ -459,7 +459,6 @@ func (m *Mesos) gatherSlaveTaskMetrics(address string, defaultPort string, acc t
}
for _, task := range metrics {
tags["task_id"] = task.ExecutorID
tags["framework_id"] = task.FrameworkID
jf := jsonparser.JSONFlattener{}
@ -468,7 +467,9 @@ func (m *Mesos) gatherSlaveTaskMetrics(address string, defaultPort string, acc t
if err != nil {
return err
}
timestamp := time.Unix(int64(jf.Fields["timestamp"].(float64)), 0)
jf.Fields["executor_id"] = task.ExecutorID
acc.AddFields("mesos_tasks", jf.Fields, tags, timestamp)
}

View File

@ -9,7 +9,6 @@ import (
"os"
"testing"
jsonparser "github.com/influxdata/telegraf/plugins/parsers/json"
"github.com/influxdata/telegraf/testutil"
)
@ -217,10 +216,10 @@ func generateMetrics() {
}
slaveTaskMetrics = map[string]interface{}{
"executor_id": fmt.Sprintf("task_%s", randUUID()),
"executor_id": fmt.Sprintf("task_name.%s", randUUID()),
"executor_name": "Some task description",
"framework_id": randUUID(),
"source": fmt.Sprintf("task_source_%s", randUUID()),
"source": fmt.Sprintf("task_source.%s", randUUID()),
"statistics": map[string]interface{}{
"cpus_limit": rand.Float64(),
"cpus_system_time_secs": rand.Float64(),
@ -338,17 +337,17 @@ func TestMesosSlave(t *testing.T) {
acc.AssertContainsFields(t, "mesos", slaveMetrics)
jf := jsonparser.JSONFlattener{}
err = jf.FlattenJSON("", slaveTaskMetrics)
if err != nil {
t.Errorf(err.Error())
expectedFields := make(map[string]interface{}, len(slaveTaskMetrics["statistics"].(map[string]interface{}))+1)
for k, v := range slaveTaskMetrics["statistics"].(map[string]interface{}) {
expectedFields[k] = v
}
expectedFields["executor_id"] = slaveTaskMetrics["executor_id"]
acc.AssertContainsFields(
acc.AssertContainsTaggedFields(
t,
"mesos_tasks",
slaveTaskMetrics["statistics"].(map[string]interface{}))
expectedFields,
map[string]string{"server": "127.0.0.1", "framework_id": slaveTaskMetrics["framework_id"].(string)})
}
func TestSlaveFilter(t *testing.T) {