Collect framework_offers and allocator metrics in mesos input (#5719)
This commit is contained in:
parent
337a579dd0
commit
f5a4d72382
|
@ -19,10 +19,12 @@ For more information, please check the [Mesos Observability Metrics](http://meso
|
|||
"system",
|
||||
"agents",
|
||||
"frameworks",
|
||||
"framework_offers",
|
||||
"tasks",
|
||||
"messages",
|
||||
"evqueue",
|
||||
"registrar",
|
||||
"allocator",
|
||||
]
|
||||
## A list of Mesos slaves, default is []
|
||||
# slaves = []
|
||||
|
@ -100,6 +102,10 @@ Mesos master metric groups
|
|||
- master/slaves_connected
|
||||
- master/slaves_disconnected
|
||||
- master/slaves_inactive
|
||||
- master/slave_unreachable_canceled
|
||||
- master/slave_unreachable_completed
|
||||
- master/slave_unreachable_scheduled
|
||||
- master/slaves_unreachable
|
||||
|
||||
- frameworks
|
||||
- master/frameworks_active
|
||||
|
@ -108,6 +114,22 @@ Mesos master metric groups
|
|||
- master/frameworks_inactive
|
||||
- master/outstanding_offers
|
||||
|
||||
- framework offers
|
||||
- master/frameworks/subscribed
|
||||
- master/frameworks/calls_total
|
||||
- master/frameworks/calls
|
||||
- master/frameworks/events_total
|
||||
- master/frameworks/events
|
||||
- master/frameworks/operations_total
|
||||
- master/frameworks/operations
|
||||
- master/frameworks/tasks/active
|
||||
- master/frameworks/tasks/terminal
|
||||
- master/frameworks/offers/sent
|
||||
- master/frameworks/offers/accepted
|
||||
- master/frameworks/offers/declined
|
||||
- master/frameworks/offers/rescinded
|
||||
- master/frameworks/roles/suppressed
|
||||
|
||||
- tasks
|
||||
- master/tasks_error
|
||||
- master/tasks_failed
|
||||
|
@ -117,6 +139,11 @@ Mesos master metric groups
|
|||
- master/tasks_running
|
||||
- master/tasks_staging
|
||||
- master/tasks_starting
|
||||
- master/tasks_dropped
|
||||
- master/tasks_gone
|
||||
- master/tasks_gone_by_operator
|
||||
- master/tasks_killing
|
||||
- master/tasks_unreachable
|
||||
|
||||
- messages
|
||||
- master/invalid_executor_to_framework_messages
|
||||
|
@ -155,11 +182,17 @@ Mesos master metric groups
|
|||
- master/task_lost/source_master/reason_slave_removed
|
||||
- master/task_lost/source_slave/reason_executor_terminated
|
||||
- master/valid_executor_to_framework_messages
|
||||
- master/invalid_operation_status_update_acknowledgements
|
||||
- master/messages_operation_status_update_acknowledgement
|
||||
- master/messages_reconcile_operations
|
||||
- master/messages_suppress_offers
|
||||
- master/valid_operation_status_update_acknowledgements
|
||||
|
||||
- evqueue
|
||||
- master/event_queue_dispatches
|
||||
- master/event_queue_http_requests
|
||||
- master/event_queue_messages
|
||||
- master/operator_event_stream_subscribers
|
||||
|
||||
- registrar
|
||||
- registrar/state_fetch_ms
|
||||
|
@ -172,6 +205,45 @@ Mesos master metric groups
|
|||
- registrar/state_store_ms/p99
|
||||
- registrar/state_store_ms/p999
|
||||
- registrar/state_store_ms/p9999
|
||||
- registrar/state_store_ms/count
|
||||
- registrar/log/ensemble_size
|
||||
- registrar/log/recovered
|
||||
- registrar/queued_operations
|
||||
- registrar/registry_size_bytes
|
||||
|
||||
- allocator
|
||||
- allocator/allocation_run_ms
|
||||
- allocator/allocation_run_ms/count
|
||||
- allocator/allocation_run_ms/max
|
||||
- allocator/allocation_run_ms/min
|
||||
- allocator/allocation_run_ms/p50
|
||||
- allocator/allocation_run_ms/p90
|
||||
- allocator/allocation_run_ms/p95
|
||||
- allocator/allocation_run_ms/p99
|
||||
- allocator/allocation_run_ms/p999
|
||||
- allocator/allocation_run_ms/p9999
|
||||
- allocator/allocation_runs
|
||||
- allocator/allocation_run_latency_ms
|
||||
- allocator/allocation_run_latency_ms/count
|
||||
- allocator/allocation_run_latency_ms/max
|
||||
- allocator/allocation_run_latency_ms/min
|
||||
- allocator/allocation_run_latency_ms/p50
|
||||
- allocator/allocation_run_latency_ms/p90
|
||||
- allocator/allocation_run_latency_ms/p95
|
||||
- allocator/allocation_run_latency_ms/p99
|
||||
- allocator/allocation_run_latency_ms/p999
|
||||
- allocator/allocation_run_latency_ms/p9999
|
||||
- allocator/roles/shares/dominant
|
||||
- allocator/event_queue_dispatches
|
||||
- allocator/offer_filters/roles/active
|
||||
- allocator/quota/roles/resources/offered_or_allocated
|
||||
- allocator/quota/roles/resources/guarantee
|
||||
- allocator/resources/cpus/offered_or_allocated
|
||||
- allocator/resources/cpus/total
|
||||
- allocator/resources/disk/offered_or_allocated
|
||||
- allocator/resources/disk/total
|
||||
- allocator/resources/mem/offered_or_allocated
|
||||
- allocator/resources/mem/total
|
||||
|
||||
Mesos slave metric groups
|
||||
- resources
|
||||
|
|
|
@ -42,7 +42,7 @@ type Mesos struct {
|
|||
}
|
||||
|
||||
var allMetrics = map[Role][]string{
|
||||
MASTER: {"resources", "master", "system", "agents", "frameworks", "tasks", "messages", "evqueue", "registrar"},
|
||||
MASTER: {"resources", "master", "system", "agents", "frameworks", "framework_offers", "tasks", "messages", "evqueue", "registrar", "allocator"},
|
||||
SLAVE: {"resources", "agent", "system", "executors", "tasks", "messages"},
|
||||
}
|
||||
|
||||
|
@ -58,10 +58,12 @@ var sampleConfig = `
|
|||
"system",
|
||||
"agents",
|
||||
"frameworks",
|
||||
"framework_offers",
|
||||
"tasks",
|
||||
"messages",
|
||||
"evqueue",
|
||||
"registrar",
|
||||
"allocator",
|
||||
]
|
||||
## A list of Mesos slaves, default is []
|
||||
# slaves = []
|
||||
|
@ -305,6 +307,10 @@ func getMetrics(role Role, group string) []string {
|
|||
"master/slaves_connected",
|
||||
"master/slaves_disconnected",
|
||||
"master/slaves_inactive",
|
||||
"master/slave_unreachable_canceled",
|
||||
"master/slave_unreachable_completed",
|
||||
"master/slave_unreachable_scheduled",
|
||||
"master/slaves_unreachable",
|
||||
}
|
||||
|
||||
m["frameworks"] = []string{
|
||||
|
@ -315,6 +321,12 @@ func getMetrics(role Role, group string) []string {
|
|||
"master/outstanding_offers",
|
||||
}
|
||||
|
||||
// framework_offers and allocator metrics have unpredictable names, so they can't be listed here.
|
||||
// These empty groups are included to prevent the "unknown metrics group" info log below.
|
||||
// filterMetrics() filters these metrics by looking for names with the corresponding prefix.
|
||||
m["framework_offers"] = []string{}
|
||||
m["allocator"] = []string{}
|
||||
|
||||
m["tasks"] = []string{
|
||||
"master/tasks_error",
|
||||
"master/tasks_failed",
|
||||
|
@ -324,6 +336,11 @@ func getMetrics(role Role, group string) []string {
|
|||
"master/tasks_running",
|
||||
"master/tasks_staging",
|
||||
"master/tasks_starting",
|
||||
"master/tasks_dropped",
|
||||
"master/tasks_gone",
|
||||
"master/tasks_gone_by_operator",
|
||||
"master/tasks_killing",
|
||||
"master/tasks_unreachable",
|
||||
}
|
||||
|
||||
m["messages"] = []string{
|
||||
|
@ -363,12 +380,18 @@ func getMetrics(role Role, group string) []string {
|
|||
"master/task_lost/source_master/reason_slave_removed",
|
||||
"master/task_lost/source_slave/reason_executor_terminated",
|
||||
"master/valid_executor_to_framework_messages",
|
||||
"master/invalid_operation_status_update_acknowledgements",
|
||||
"master/messages_operation_status_update_acknowledgement",
|
||||
"master/messages_reconcile_operations",
|
||||
"master/messages_suppress_offers",
|
||||
"master/valid_operation_status_update_acknowledgements",
|
||||
}
|
||||
|
||||
m["evqueue"] = []string{
|
||||
"master/event_queue_dispatches",
|
||||
"master/event_queue_http_requests",
|
||||
"master/event_queue_messages",
|
||||
"master/operator_event_stream_subscribers",
|
||||
}
|
||||
|
||||
m["registrar"] = []string{
|
||||
|
@ -382,6 +405,11 @@ func getMetrics(role Role, group string) []string {
|
|||
"registrar/state_store_ms/p99",
|
||||
"registrar/state_store_ms/p999",
|
||||
"registrar/state_store_ms/p9999",
|
||||
"registrar/log/ensemble_size",
|
||||
"registrar/log/recovered",
|
||||
"registrar/queued_operations",
|
||||
"registrar/registry_size_bytes",
|
||||
"registrar/state_store_ms/count",
|
||||
}
|
||||
} else if role == SLAVE {
|
||||
m["resources"] = []string{
|
||||
|
@ -477,9 +505,27 @@ func (m *Mesos) filterMetrics(role Role, metrics *map[string]interface{}) {
|
|||
}
|
||||
|
||||
for _, k := range metricsDiff(role, selectedMetrics) {
|
||||
for _, v := range getMetrics(role, k) {
|
||||
if _, ok = (*metrics)[v]; ok {
|
||||
delete((*metrics), v)
|
||||
switch k {
|
||||
// allocator and framework_offers metrics have unpredictable names, so we have to identify them by name prefix.
|
||||
case "allocator":
|
||||
for m := range *metrics {
|
||||
if strings.HasPrefix(m, "allocator/") {
|
||||
delete((*metrics), m)
|
||||
}
|
||||
}
|
||||
case "framework_offers":
|
||||
for m := range *metrics {
|
||||
if strings.HasPrefix(m, "master/frameworks/") || strings.HasPrefix(m, "frameworks/") {
|
||||
delete((*metrics), m)
|
||||
}
|
||||
}
|
||||
|
||||
// All other metrics have predictable names. We can use getMetrics() to retrieve them.
|
||||
default:
|
||||
for _, v := range getMetrics(role, k) {
|
||||
if _, ok = (*metrics)[v]; ok {
|
||||
delete((*metrics), v)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
|
@ -27,194 +28,262 @@ func randUUID() string {
|
|||
return fmt.Sprintf("%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
|
||||
}
|
||||
|
||||
// master metrics that will be returned by generateMetrics()
|
||||
var masterMetricNames []string = []string{
|
||||
// resources
|
||||
"master/cpus_percent",
|
||||
"master/cpus_used",
|
||||
"master/cpus_total",
|
||||
"master/cpus_revocable_percent",
|
||||
"master/cpus_revocable_total",
|
||||
"master/cpus_revocable_used",
|
||||
"master/disk_percent",
|
||||
"master/disk_used",
|
||||
"master/disk_total",
|
||||
"master/disk_revocable_percent",
|
||||
"master/disk_revocable_total",
|
||||
"master/disk_revocable_used",
|
||||
"master/gpus_percent",
|
||||
"master/gpus_used",
|
||||
"master/gpus_total",
|
||||
"master/gpus_revocable_percent",
|
||||
"master/gpus_revocable_total",
|
||||
"master/gpus_revocable_used",
|
||||
"master/mem_percent",
|
||||
"master/mem_used",
|
||||
"master/mem_total",
|
||||
"master/mem_revocable_percent",
|
||||
"master/mem_revocable_total",
|
||||
"master/mem_revocable_used",
|
||||
// master
|
||||
"master/elected",
|
||||
"master/uptime_secs",
|
||||
// system
|
||||
"system/cpus_total",
|
||||
"system/load_15min",
|
||||
"system/load_5min",
|
||||
"system/load_1min",
|
||||
"system/mem_free_bytes",
|
||||
"system/mem_total_bytes",
|
||||
// agents
|
||||
"master/slave_registrations",
|
||||
"master/slave_removals",
|
||||
"master/slave_reregistrations",
|
||||
"master/slave_shutdowns_scheduled",
|
||||
"master/slave_shutdowns_canceled",
|
||||
"master/slave_shutdowns_completed",
|
||||
"master/slaves_active",
|
||||
"master/slaves_connected",
|
||||
"master/slaves_disconnected",
|
||||
"master/slaves_inactive",
|
||||
"master/slave_unreachable_canceled",
|
||||
"master/slave_unreachable_completed",
|
||||
"master/slave_unreachable_scheduled",
|
||||
"master/slaves_unreachable",
|
||||
// frameworks
|
||||
"master/frameworks_active",
|
||||
"master/frameworks_connected",
|
||||
"master/frameworks_disconnected",
|
||||
"master/frameworks_inactive",
|
||||
"master/outstanding_offers",
|
||||
// framework offers
|
||||
"master/frameworks/marathon/abc-123/calls",
|
||||
"master/frameworks/marathon/abc-123/calls/accept",
|
||||
"master/frameworks/marathon/abc-123/events",
|
||||
"master/frameworks/marathon/abc-123/events/error",
|
||||
"master/frameworks/marathon/abc-123/offers/sent",
|
||||
"master/frameworks/marathon/abc-123/operations",
|
||||
"master/frameworks/marathon/abc-123/operations/create",
|
||||
"master/frameworks/marathon/abc-123/roles/*/suppressed",
|
||||
"master/frameworks/marathon/abc-123/subscribed",
|
||||
"master/frameworks/marathon/abc-123/tasks/active/task_killing",
|
||||
"master/frameworks/marathon/abc-123/tasks/active/task_dropped",
|
||||
"master/frameworks/marathon/abc-123/tasks/terminal/task_dropped",
|
||||
"master/frameworks/marathon/abc-123/unknown/unknown", // test case for unknown metric type
|
||||
// tasks
|
||||
"master/tasks_error",
|
||||
"master/tasks_failed",
|
||||
"master/tasks_finished",
|
||||
"master/tasks_killed",
|
||||
"master/tasks_lost",
|
||||
"master/tasks_running",
|
||||
"master/tasks_staging",
|
||||
"master/tasks_starting",
|
||||
"master/tasks_dropped",
|
||||
"master/tasks_gone",
|
||||
"master/tasks_gone_by_operator",
|
||||
"master/tasks_killing",
|
||||
"master/tasks_unreachable",
|
||||
// messages
|
||||
"master/invalid_executor_to_framework_messages",
|
||||
"master/invalid_framework_to_executor_messages",
|
||||
"master/invalid_status_update_acknowledgements",
|
||||
"master/invalid_status_updates",
|
||||
"master/dropped_messages",
|
||||
"master/messages_authenticate",
|
||||
"master/messages_deactivate_framework",
|
||||
"master/messages_decline_offers",
|
||||
"master/messages_executor_to_framework",
|
||||
"master/messages_exited_executor",
|
||||
"master/messages_framework_to_executor",
|
||||
"master/messages_kill_task",
|
||||
"master/messages_launch_tasks",
|
||||
"master/messages_reconcile_tasks",
|
||||
"master/messages_register_framework",
|
||||
"master/messages_register_slave",
|
||||
"master/messages_reregister_framework",
|
||||
"master/messages_reregister_slave",
|
||||
"master/messages_resource_request",
|
||||
"master/messages_revive_offers",
|
||||
"master/messages_status_update",
|
||||
"master/messages_status_update_acknowledgement",
|
||||
"master/messages_unregister_framework",
|
||||
"master/messages_unregister_slave",
|
||||
"master/messages_update_slave",
|
||||
"master/recovery_slave_removals",
|
||||
"master/slave_removals/reason_registered",
|
||||
"master/slave_removals/reason_unhealthy",
|
||||
"master/slave_removals/reason_unregistered",
|
||||
"master/valid_framework_to_executor_messages",
|
||||
"master/valid_status_update_acknowledgements",
|
||||
"master/valid_status_updates",
|
||||
"master/task_lost/source_master/reason_invalid_offers",
|
||||
"master/task_lost/source_master/reason_slave_removed",
|
||||
"master/task_lost/source_slave/reason_executor_terminated",
|
||||
"master/valid_executor_to_framework_messages",
|
||||
"master/invalid_operation_status_update_acknowledgements",
|
||||
"master/messages_operation_status_update_acknowledgement",
|
||||
"master/messages_reconcile_operations",
|
||||
"master/messages_suppress_offers",
|
||||
"master/valid_operation_status_update_acknowledgements",
|
||||
// evgqueue
|
||||
"master/event_queue_dispatches",
|
||||
"master/event_queue_http_requests",
|
||||
"master/event_queue_messages",
|
||||
"master/operator_event_stream_subscribers",
|
||||
// registrar
|
||||
"registrar/log/ensemble_size",
|
||||
"registrar/log/recovered",
|
||||
"registrar/queued_operations",
|
||||
"registrar/registry_size_bytes",
|
||||
"registrar/state_fetch_ms",
|
||||
"registrar/state_store_ms",
|
||||
"registrar/state_store_ms/max",
|
||||
"registrar/state_store_ms/min",
|
||||
"registrar/state_store_ms/p50",
|
||||
"registrar/state_store_ms/p90",
|
||||
"registrar/state_store_ms/p95",
|
||||
"registrar/state_store_ms/p99",
|
||||
"registrar/state_store_ms/p999",
|
||||
"registrar/state_store_ms/p9999",
|
||||
"registrar/state_store_ms/count",
|
||||
// allocator
|
||||
"allocator/mesos/allocation_run_ms",
|
||||
"allocator/mesos/allocation_run_ms/count",
|
||||
"allocator/mesos/allocation_run_ms/max",
|
||||
"allocator/mesos/allocation_run_ms/min",
|
||||
"allocator/mesos/allocation_run_ms/p50",
|
||||
"allocator/mesos/allocation_run_ms/p90",
|
||||
"allocator/mesos/allocation_run_ms/p95",
|
||||
"allocator/mesos/allocation_run_ms/p99",
|
||||
"allocator/mesos/allocation_run_ms/p999",
|
||||
"allocator/mesos/allocation_run_ms/p9999",
|
||||
"allocator/mesos/allocation_runs",
|
||||
"allocator/mesos/allocation_run_latency_ms",
|
||||
"allocator/mesos/allocation_run_latency_ms/count",
|
||||
"allocator/mesos/allocation_run_latency_ms/max",
|
||||
"allocator/mesos/allocation_run_latency_ms/min",
|
||||
"allocator/mesos/allocation_run_latency_ms/p50",
|
||||
"allocator/mesos/allocation_run_latency_ms/p90",
|
||||
"allocator/mesos/allocation_run_latency_ms/p95",
|
||||
"allocator/mesos/allocation_run_latency_ms/p99",
|
||||
"allocator/mesos/allocation_run_latency_ms/p999",
|
||||
"allocator/mesos/allocation_run_latency_ms/p9999",
|
||||
"allocator/mesos/roles/*/shares/dominant",
|
||||
"allocator/mesos/event_queue_dispatches",
|
||||
"allocator/mesos/offer_filters/roles/*/active",
|
||||
"allocator/mesos/quota/roles/*/resources/disk/offered_or_allocated",
|
||||
"allocator/mesos/quota/roles/*/resources/mem/guarantee",
|
||||
"allocator/mesos/quota/roles/*/resources/disk/guarantee",
|
||||
"allocator/mesos/resources/cpus/offered_or_allocated",
|
||||
"allocator/mesos/resources/cpus/total",
|
||||
"allocator/mesos/resources/disk/offered_or_allocated",
|
||||
"allocator/mesos/resources/disk/total",
|
||||
"allocator/mesos/resources/mem/offered_or_allocated",
|
||||
"allocator/mesos/resources/mem/total",
|
||||
}
|
||||
|
||||
// slave metrics that will be returned by generateMetrics()
|
||||
var slaveMetricNames []string = []string{
|
||||
// resources
|
||||
"slave/cpus_percent",
|
||||
"slave/cpus_used",
|
||||
"slave/cpus_total",
|
||||
"slave/cpus_revocable_percent",
|
||||
"slave/cpus_revocable_total",
|
||||
"slave/cpus_revocable_used",
|
||||
"slave/disk_percent",
|
||||
"slave/disk_used",
|
||||
"slave/disk_total",
|
||||
"slave/disk_revocable_percent",
|
||||
"slave/disk_revocable_total",
|
||||
"slave/disk_revocable_used",
|
||||
"slave/gpus_percent",
|
||||
"slave/gpus_used",
|
||||
"slave/gpus_total",
|
||||
"slave/gpus_revocable_percent",
|
||||
"slave/gpus_revocable_total",
|
||||
"slave/gpus_revocable_used",
|
||||
"slave/mem_percent",
|
||||
"slave/mem_used",
|
||||
"slave/mem_total",
|
||||
"slave/mem_revocable_percent",
|
||||
"slave/mem_revocable_total",
|
||||
"slave/mem_revocable_used",
|
||||
// agent
|
||||
"slave/registered",
|
||||
"slave/uptime_secs",
|
||||
// system
|
||||
"system/cpus_total",
|
||||
"system/load_15min",
|
||||
"system/load_5min",
|
||||
"system/load_1min",
|
||||
"system/mem_free_bytes",
|
||||
"system/mem_total_bytes",
|
||||
// executors
|
||||
"containerizer/mesos/container_destroy_errors",
|
||||
"slave/container_launch_errors",
|
||||
"slave/executors_preempted",
|
||||
"slave/frameworks_active",
|
||||
"slave/executor_directory_max_allowed_age_secs",
|
||||
"slave/executors_registering",
|
||||
"slave/executors_running",
|
||||
"slave/executors_terminated",
|
||||
"slave/executors_terminating",
|
||||
"slave/recovery_errors",
|
||||
// tasks
|
||||
"slave/tasks_failed",
|
||||
"slave/tasks_finished",
|
||||
"slave/tasks_killed",
|
||||
"slave/tasks_lost",
|
||||
"slave/tasks_running",
|
||||
"slave/tasks_staging",
|
||||
"slave/tasks_starting",
|
||||
// messages
|
||||
"slave/invalid_framework_messages",
|
||||
"slave/invalid_status_updates",
|
||||
"slave/valid_framework_messages",
|
||||
"slave/valid_status_updates",
|
||||
}
|
||||
|
||||
func generateMetrics() {
|
||||
masterMetrics = make(map[string]interface{})
|
||||
|
||||
metricNames := []string{
|
||||
// resources
|
||||
"master/cpus_percent",
|
||||
"master/cpus_used",
|
||||
"master/cpus_total",
|
||||
"master/cpus_revocable_percent",
|
||||
"master/cpus_revocable_total",
|
||||
"master/cpus_revocable_used",
|
||||
"master/disk_percent",
|
||||
"master/disk_used",
|
||||
"master/disk_total",
|
||||
"master/disk_revocable_percent",
|
||||
"master/disk_revocable_total",
|
||||
"master/disk_revocable_used",
|
||||
"master/gpus_percent",
|
||||
"master/gpus_used",
|
||||
"master/gpus_total",
|
||||
"master/gpus_revocable_percent",
|
||||
"master/gpus_revocable_total",
|
||||
"master/gpus_revocable_used",
|
||||
"master/mem_percent",
|
||||
"master/mem_used",
|
||||
"master/mem_total",
|
||||
"master/mem_revocable_percent",
|
||||
"master/mem_revocable_total",
|
||||
"master/mem_revocable_used",
|
||||
// master
|
||||
"master/elected",
|
||||
"master/uptime_secs",
|
||||
// system
|
||||
"system/cpus_total",
|
||||
"system/load_15min",
|
||||
"system/load_5min",
|
||||
"system/load_1min",
|
||||
"system/mem_free_bytes",
|
||||
"system/mem_total_bytes",
|
||||
// agents
|
||||
"master/slave_registrations",
|
||||
"master/slave_removals",
|
||||
"master/slave_reregistrations",
|
||||
"master/slave_shutdowns_scheduled",
|
||||
"master/slave_shutdowns_canceled",
|
||||
"master/slave_shutdowns_completed",
|
||||
"master/slaves_active",
|
||||
"master/slaves_connected",
|
||||
"master/slaves_disconnected",
|
||||
"master/slaves_inactive",
|
||||
// frameworks
|
||||
"master/frameworks_active",
|
||||
"master/frameworks_connected",
|
||||
"master/frameworks_disconnected",
|
||||
"master/frameworks_inactive",
|
||||
"master/outstanding_offers",
|
||||
// tasks
|
||||
"master/tasks_error",
|
||||
"master/tasks_failed",
|
||||
"master/tasks_finished",
|
||||
"master/tasks_killed",
|
||||
"master/tasks_lost",
|
||||
"master/tasks_running",
|
||||
"master/tasks_staging",
|
||||
"master/tasks_starting",
|
||||
// messages
|
||||
"master/invalid_executor_to_framework_messages",
|
||||
"master/invalid_framework_to_executor_messages",
|
||||
"master/invalid_status_update_acknowledgements",
|
||||
"master/invalid_status_updates",
|
||||
"master/dropped_messages",
|
||||
"master/messages_authenticate",
|
||||
"master/messages_deactivate_framework",
|
||||
"master/messages_decline_offers",
|
||||
"master/messages_executor_to_framework",
|
||||
"master/messages_exited_executor",
|
||||
"master/messages_framework_to_executor",
|
||||
"master/messages_kill_task",
|
||||
"master/messages_launch_tasks",
|
||||
"master/messages_reconcile_tasks",
|
||||
"master/messages_register_framework",
|
||||
"master/messages_register_slave",
|
||||
"master/messages_reregister_framework",
|
||||
"master/messages_reregister_slave",
|
||||
"master/messages_resource_request",
|
||||
"master/messages_revive_offers",
|
||||
"master/messages_status_update",
|
||||
"master/messages_status_update_acknowledgement",
|
||||
"master/messages_unregister_framework",
|
||||
"master/messages_unregister_slave",
|
||||
"master/messages_update_slave",
|
||||
"master/recovery_slave_removals",
|
||||
"master/slave_removals/reason_registered",
|
||||
"master/slave_removals/reason_unhealthy",
|
||||
"master/slave_removals/reason_unregistered",
|
||||
"master/valid_framework_to_executor_messages",
|
||||
"master/valid_status_update_acknowledgements",
|
||||
"master/valid_status_updates",
|
||||
"master/task_lost/source_master/reason_invalid_offers",
|
||||
"master/task_lost/source_master/reason_slave_removed",
|
||||
"master/task_lost/source_slave/reason_executor_terminated",
|
||||
"master/valid_executor_to_framework_messages",
|
||||
// evgqueue
|
||||
"master/event_queue_dispatches",
|
||||
"master/event_queue_http_requests",
|
||||
"master/event_queue_messages",
|
||||
// registrar
|
||||
"registrar/state_fetch_ms",
|
||||
"registrar/state_store_ms",
|
||||
"registrar/state_store_ms/max",
|
||||
"registrar/state_store_ms/min",
|
||||
"registrar/state_store_ms/p50",
|
||||
"registrar/state_store_ms/p90",
|
||||
"registrar/state_store_ms/p95",
|
||||
"registrar/state_store_ms/p99",
|
||||
"registrar/state_store_ms/p999",
|
||||
"registrar/state_store_ms/p9999",
|
||||
}
|
||||
|
||||
for _, k := range metricNames {
|
||||
for _, k := range masterMetricNames {
|
||||
masterMetrics[k] = rand.Float64()
|
||||
}
|
||||
|
||||
slaveMetrics = make(map[string]interface{})
|
||||
|
||||
metricNames = []string{
|
||||
// resources
|
||||
"slave/cpus_percent",
|
||||
"slave/cpus_used",
|
||||
"slave/cpus_total",
|
||||
"slave/cpus_revocable_percent",
|
||||
"slave/cpus_revocable_total",
|
||||
"slave/cpus_revocable_used",
|
||||
"slave/disk_percent",
|
||||
"slave/disk_used",
|
||||
"slave/disk_total",
|
||||
"slave/disk_revocable_percent",
|
||||
"slave/disk_revocable_total",
|
||||
"slave/disk_revocable_used",
|
||||
"slave/gpus_percent",
|
||||
"slave/gpus_used",
|
||||
"slave/gpus_total",
|
||||
"slave/gpus_revocable_percent",
|
||||
"slave/gpus_revocable_total",
|
||||
"slave/gpus_revocable_used",
|
||||
"slave/mem_percent",
|
||||
"slave/mem_used",
|
||||
"slave/mem_total",
|
||||
"slave/mem_revocable_percent",
|
||||
"slave/mem_revocable_total",
|
||||
"slave/mem_revocable_used",
|
||||
// agent
|
||||
"slave/registered",
|
||||
"slave/uptime_secs",
|
||||
// system
|
||||
"system/cpus_total",
|
||||
"system/load_15min",
|
||||
"system/load_5min",
|
||||
"system/load_1min",
|
||||
"system/mem_free_bytes",
|
||||
"system/mem_total_bytes",
|
||||
// executors
|
||||
"containerizer/mesos/container_destroy_errors",
|
||||
"slave/container_launch_errors",
|
||||
"slave/executors_preempted",
|
||||
"slave/frameworks_active",
|
||||
"slave/executor_directory_max_allowed_age_secs",
|
||||
"slave/executors_registering",
|
||||
"slave/executors_running",
|
||||
"slave/executors_terminated",
|
||||
"slave/executors_terminating",
|
||||
"slave/recovery_errors",
|
||||
// tasks
|
||||
"slave/tasks_failed",
|
||||
"slave/tasks_finished",
|
||||
"slave/tasks_killed",
|
||||
"slave/tasks_lost",
|
||||
"slave/tasks_running",
|
||||
"slave/tasks_staging",
|
||||
"slave/tasks_starting",
|
||||
// messages
|
||||
"slave/invalid_framework_messages",
|
||||
"slave/invalid_status_updates",
|
||||
"slave/valid_framework_messages",
|
||||
"slave/valid_status_updates",
|
||||
}
|
||||
|
||||
for _, k := range metricNames {
|
||||
for _, k := range slaveMetricNames {
|
||||
slaveMetrics[k] = rand.Float64()
|
||||
}
|
||||
|
||||
|
@ -296,7 +365,7 @@ func TestMesosMaster(t *testing.T) {
|
|||
func TestMasterFilter(t *testing.T) {
|
||||
m := Mesos{
|
||||
MasterCols: []string{
|
||||
"resources", "master", "registrar",
|
||||
"resources", "master", "registrar", "allocator",
|
||||
},
|
||||
}
|
||||
b := []string{
|
||||
|
@ -306,6 +375,26 @@ func TestMasterFilter(t *testing.T) {
|
|||
|
||||
m.filterMetrics(MASTER, &masterMetrics)
|
||||
|
||||
// Assert expected metrics are present.
|
||||
for _, v := range m.MasterCols {
|
||||
for _, x := range getMetrics(MASTER, v) {
|
||||
if _, ok := masterMetrics[x]; !ok {
|
||||
t.Errorf("Didn't find key %s, it should present.", x)
|
||||
}
|
||||
}
|
||||
}
|
||||
// m.MasterCols includes "allocator", so allocator metrics should be present.
|
||||
// allocator metrics have unpredictable names, so we can't rely on the list of metrics returned from
|
||||
// getMetrics(). We have to find them by checking name prefixes.
|
||||
for _, x := range masterMetricNames {
|
||||
if strings.HasPrefix(x, "allocator/") {
|
||||
if _, ok := masterMetrics[x]; !ok {
|
||||
t.Errorf("Didn't find key %s, it should be present.", x)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assert unexpected metrics are not present.
|
||||
for _, v := range b {
|
||||
for _, x := range getMetrics(MASTER, v) {
|
||||
if _, ok := masterMetrics[x]; ok {
|
||||
|
@ -313,11 +402,12 @@ func TestMasterFilter(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
for _, v := range m.MasterCols {
|
||||
for _, x := range getMetrics(MASTER, v) {
|
||||
if _, ok := masterMetrics[x]; !ok {
|
||||
t.Errorf("Didn't find key %s, it should present.", x)
|
||||
}
|
||||
// m.MasterCols does not include "framework_offers", so framework_offers metrics should not be present.
|
||||
// framework_offers metrics have unpredictable names, so we can't rely on the list of metrics returned from
|
||||
// getMetrics(). We have to find them by checking name prefixes.
|
||||
for k := range masterMetrics {
|
||||
if strings.HasPrefix(k, "master/frameworks/") || strings.HasPrefix(k, "frameworks/") {
|
||||
t.Errorf("Found key %s, it should be gone.", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -339,18 +429,6 @@ func TestMesosSlave(t *testing.T) {
|
|||
}
|
||||
|
||||
acc.AssertContainsFields(t, "mesos", slaveMetrics)
|
||||
|
||||
// expectedFields := make(map[string]interface{}, len(slaveTaskMetrics["statistics"].(map[string]interface{}))+1)
|
||||
// for k, v := range slaveTaskMetrics["statistics"].(map[string]interface{}) {
|
||||
// expectedFields[k] = v
|
||||
// }
|
||||
// expectedFields["executor_id"] = slaveTaskMetrics["executor_id"]
|
||||
|
||||
// acc.AssertContainsTaggedFields(
|
||||
// t,
|
||||
// "mesos_tasks",
|
||||
// expectedFields,
|
||||
// map[string]string{"server": "127.0.0.1", "framework_id": slaveTaskMetrics["framework_id"].(string)})
|
||||
}
|
||||
|
||||
func TestSlaveFilter(t *testing.T) {
|
||||
|
|
Loading…
Reference in New Issue