From dfb83778ea2f4054fa7b94c3db37025253dedf38 Mon Sep 17 00:00:00 2001 From: Evan Baker Date: Sun, 26 May 2019 22:01:02 -0500 Subject: [PATCH] Add ecs/fargate input plugin (#5121) --- README.md | 1 + plugins/inputs/all/all.go | 1 + plugins/inputs/docker/docker.go | 6 +- plugins/inputs/docker/stats_helpers.go | 8 +- plugins/inputs/ecs/README.md | 64 ++ plugins/inputs/ecs/client.go | 124 ++++ plugins/inputs/ecs/client_test.go | 211 ++++++ plugins/inputs/ecs/ecs.go | 251 +++++++ plugins/inputs/ecs/ecs_test.go | 767 ++++++++++++++++++++ plugins/inputs/ecs/stats.go | 295 ++++++++ plugins/inputs/ecs/stats_test.go | 226 ++++++ plugins/inputs/ecs/testdata/metadata.golden | 78 ++ plugins/inputs/ecs/testdata/stats.golden | 663 +++++++++++++++++ plugins/inputs/ecs/types.go | 75 ++ plugins/inputs/ecs/types_test.go | 61 ++ 15 files changed, 2824 insertions(+), 7 deletions(-) create mode 100644 plugins/inputs/ecs/README.md create mode 100644 plugins/inputs/ecs/client.go create mode 100644 plugins/inputs/ecs/client_test.go create mode 100644 plugins/inputs/ecs/ecs.go create mode 100644 plugins/inputs/ecs/ecs_test.go create mode 100644 plugins/inputs/ecs/stats.go create mode 100644 plugins/inputs/ecs/stats_test.go create mode 100644 plugins/inputs/ecs/testdata/metadata.golden create mode 100644 plugins/inputs/ecs/testdata/stats.golden create mode 100644 plugins/inputs/ecs/types.go create mode 100644 plugins/inputs/ecs/types_test.go diff --git a/README.md b/README.md index 1bd96896b..6b3931f42 100644 --- a/README.md +++ b/README.md @@ -165,6 +165,7 @@ For documentation on the latest development code see the [documentation index][d * [dns query time](./plugins/inputs/dns_query) * [docker](./plugins/inputs/docker) * [dovecot](./plugins/inputs/dovecot) +* [ecs](./plugins/inputs/ecs) * [elasticsearch](./plugins/inputs/elasticsearch) * [exec](./plugins/inputs/exec) (generic executable plugin, support JSON, influx, graphite and nagios) * [fail2ban](./plugins/inputs/fail2ban) diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 02002a4f0..47f977f32 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -31,6 +31,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/dns_query" _ "github.com/influxdata/telegraf/plugins/inputs/docker" _ "github.com/influxdata/telegraf/plugins/inputs/dovecot" + _ "github.com/influxdata/telegraf/plugins/inputs/ecs" _ "github.com/influxdata/telegraf/plugins/inputs/elasticsearch" _ "github.com/influxdata/telegraf/plugins/inputs/exec" _ "github.com/influxdata/telegraf/plugins/inputs/fail2ban" diff --git a/plugins/inputs/docker/docker.go b/plugins/inputs/docker/docker.go index 10759fc3e..117aabfb4 100644 --- a/plugins/inputs/docker/docker.go +++ b/plugins/inputs/docker/docker.go @@ -549,10 +549,10 @@ func parseContainerStats( memfields["limit"] = stat.MemoryStats.Limit memfields["max_usage"] = stat.MemoryStats.MaxUsage - mem := calculateMemUsageUnixNoCache(stat.MemoryStats) + mem := CalculateMemUsageUnixNoCache(stat.MemoryStats) memLimit := float64(stat.MemoryStats.Limit) memfields["usage"] = uint64(mem) - memfields["usage_percent"] = calculateMemPercentUnixNoCache(memLimit, mem) + memfields["usage_percent"] = CalculateMemPercentUnixNoCache(memLimit, mem) } else { memfields["commit_bytes"] = stat.MemoryStats.Commit memfields["commit_peak_bytes"] = stat.MemoryStats.CommitPeak @@ -575,7 +575,7 @@ func parseContainerStats( if daemonOSType != "windows" { previousCPU := stat.PreCPUStats.CPUUsage.TotalUsage previousSystem := stat.PreCPUStats.SystemUsage - cpuPercent := calculateCPUPercentUnix(previousCPU, previousSystem, stat) + cpuPercent := CalculateCPUPercentUnix(previousCPU, previousSystem, stat) cpufields["usage_percent"] = cpuPercent } else { cpuPercent := calculateCPUPercentWindows(stat) diff --git a/plugins/inputs/docker/stats_helpers.go b/plugins/inputs/docker/stats_helpers.go index b4c91e2fc..93ea2f219 100644 --- a/plugins/inputs/docker/stats_helpers.go +++ b/plugins/inputs/docker/stats_helpers.go @@ -4,7 +4,7 @@ package docker import "github.com/docker/docker/api/types" -func calculateCPUPercentUnix(previousCPU, previousSystem uint64, v *types.StatsJSON) float64 { +func CalculateCPUPercentUnix(previousCPU, previousSystem uint64, v *types.StatsJSON) float64 { var ( cpuPercent = 0.0 // calculate the change for the cpu usage of the container in between readings @@ -39,13 +39,13 @@ func calculateCPUPercentWindows(v *types.StatsJSON) float64 { return 0.00 } -// calculateMemUsageUnixNoCache calculate memory usage of the container. +// CalculateMemUsageUnixNoCache calculate memory usage of the container. // Page cache is intentionally excluded to avoid misinterpretation of the output. -func calculateMemUsageUnixNoCache(mem types.MemoryStats) float64 { +func CalculateMemUsageUnixNoCache(mem types.MemoryStats) float64 { return float64(mem.Usage - mem.Stats["cache"]) } -func calculateMemPercentUnixNoCache(limit float64, usedNoCache float64) float64 { +func CalculateMemPercentUnixNoCache(limit float64, usedNoCache float64) float64 { // MemoryStats.Limit will never be 0 unless the container is not running and we haven't // got any data from cgroup if limit != 0 { diff --git a/plugins/inputs/ecs/README.md b/plugins/inputs/ecs/README.md new file mode 100644 index 000000000..411322959 --- /dev/null +++ b/plugins/inputs/ecs/README.md @@ -0,0 +1,64 @@ +# ECS Input Plugin + +ECS, Fargate compatible, input plugin which uses the [ECS v2 metadata and stats API](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v2.html) +endpoints to gather stats on running containers in a Task. + +The telegraf container must be run in the same Task as the workload it is inspecting. + +This is similar to (and reuses a few pieces of) the [Docker](../docker/README.md) input plugin, with some ECS specific modifications for AWS metadata and stats formats. + + +### Configuration: + +```toml +# Read metrics about ECS containers +[[inputs.ecs]] + # endpoint_url = http:// + ## Containers to include and exclude. Globs accepted. + ## Note that an empty array for both will include all containers + container_name_include = [] + container_name_exclude = [] + + ## Container states to include and exclude. Globs accepted. + ## When empty only containers in the "running" state will be captured. + # container_status_include = [] + # container_status_exclude = [] + + ## ecs labels to include and exclude as tags. Globs accepted. + ## Note that an empty array for both will include all labels as tags + ecs_label_include = [ "com.amazonaws.ecs.*" ] + ecs_label_exclude = [] + + ## Timeout for docker list, info, and stats commands + timeout = "5s" +``` + +#### Environment Configuration + +The ECS client can optionally also be configured with the following env vars: + +``` +ECS_TIMEOUT +``` + + +### Example Output: + +``` +ecs_task_status,cluster=test,family=nginx,host=c4b301d4a123,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a revision="2",desired_status="RUNNING",known_status="RUNNING",limit_cpu=0.5,limit_mem=512 1542641488000000000 +ecs_container_mem,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a active_anon=40960i,active_file=8192i,cache=790528i,pgpgin=1243i,total_pgfault=1298i,total_rss=40960i,limit=1033658368i,max_usage=4825088i,hierarchical_memory_limit=536870912i,rss=40960i,total_active_file=8192i,total_mapped_file=618496i,usage_percent=0.05349543109392212,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",pgfault=1298i,pgmajfault=6i,pgpgout=1040i,total_active_anon=40960i,total_inactive_file=782336i,total_pgpgin=1243i,usage=552960i,inactive_file=782336i,mapped_file=618496i,total_cache=790528i,total_pgpgout=1040i 1542642001000000000 +ecs_container_cpu,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,cpu=cpu-total,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a usage_in_kernelmode=0i,throttling_throttled_periods=0i,throttling_periods=0i,throttling_throttled_time=0i,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",usage_percent=0,usage_total=26426156i,usage_in_usermode=20000000i,usage_system=2336100000000i 1542642001000000000 +ecs_container_cpu,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,cpu=cpu0,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",usage_total=26426156i 1542642001000000000 +ecs_container_net,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,network=eth0,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a rx_errors=0i,rx_packets=36i,tx_errors=0i,tx_bytes=648i,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",rx_dropped=0i,rx_bytes=5338i,tx_packets=8i,tx_dropped=0i 1542642001000000000 +ecs_container_net,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,network=eth5,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a rx_errors=0i,tx_packets=9i,rx_packets=26i,tx_errors=0i,rx_bytes=4641i,tx_dropped=0i,tx_bytes=690i,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",rx_dropped=0i 1542642001000000000 +ecs_container_net,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,network=total,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a rx_dropped=0i,rx_bytes=9979i,rx_errors=0i,rx_packets=62i,tx_bytes=1338i,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",tx_packets=17i,tx_dropped=0i,tx_errors=0i 1542642001000000000 +ecs_container_blkio,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,device=253:1,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a io_service_bytes_recursive_sync=790528i,io_service_bytes_recursive_total=790528i,io_serviced_recursive_sync=10i,io_serviced_recursive_write=0i,io_serviced_recursive_async=0i,io_serviced_recursive_total=10i,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",io_service_bytes_recursive_read=790528i,io_service_bytes_recursive_write=0i,io_service_bytes_recursive_async=0i,io_serviced_recursive_read=10i 1542642001000000000 +ecs_container_blkio,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,device=253:2,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a io_service_bytes_recursive_sync=790528i,io_service_bytes_recursive_total=790528i,io_serviced_recursive_async=0i,io_serviced_recursive_total=10i,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",io_service_bytes_recursive_read=790528i,io_service_bytes_recursive_write=0i,io_service_bytes_recursive_async=0i,io_serviced_recursive_read=10i,io_serviced_recursive_write=0i,io_serviced_recursive_sync=10i 1542642001000000000 +ecs_container_blkio,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,device=253:4,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a io_service_bytes_recursive_write=0i,io_service_bytes_recursive_sync=790528i,io_service_bytes_recursive_async=0i,io_service_bytes_recursive_total=790528i,io_serviced_recursive_async=0i,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",io_service_bytes_recursive_read=790528i,io_serviced_recursive_read=10i,io_serviced_recursive_write=0i,io_serviced_recursive_sync=10i,io_serviced_recursive_total=10i 1542642001000000000 +ecs_container_blkio,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,device=202:26368,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a io_serviced_recursive_read=10i,io_serviced_recursive_write=0i,io_serviced_recursive_sync=10i,io_serviced_recursive_async=0i,io_serviced_recursive_total=10i,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",io_service_bytes_recursive_sync=790528i,io_service_bytes_recursive_total=790528i,io_service_bytes_recursive_async=0i,io_service_bytes_recursive_read=790528i,io_service_bytes_recursive_write=0i 1542642001000000000 +ecs_container_blkio,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,device=total,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a io_serviced_recursive_async=0i,io_serviced_recursive_read=40i,io_serviced_recursive_sync=40i,io_serviced_recursive_write=0i,io_serviced_recursive_total=40i,io_service_bytes_recursive_read=3162112i,io_service_bytes_recursive_write=0i,io_service_bytes_recursive_async=0i,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",io_service_bytes_recursive_sync=3162112i,io_service_bytes_recursive_total=3162112i 1542642001000000000 +ecs_container_meta,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a limit_mem=0,type="CNI_PAUSE",container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",docker_name="ecs-nginx-2-internalecspause",limit_cpu=0,known_status="RESOURCES_PROVISIONED",image="amazon/amazon-ecs-pause:0.1.0",image_id="",desired_status="RESOURCES_PROVISIONED" 1542642001000000000 +``` + +### Notes: +- the amazon-ecs-agent (though it _is_ a container running on the host) is not present in the metadata/stats endpoints. \ No newline at end of file diff --git a/plugins/inputs/ecs/client.go b/plugins/inputs/ecs/client.go new file mode 100644 index 000000000..eba0b0856 --- /dev/null +++ b/plugins/inputs/ecs/client.go @@ -0,0 +1,124 @@ +package ecs + +import ( + "log" + "net/http" + "net/url" + "os" + "time" + + "github.com/docker/docker/api/types" +) + +var ( + ecsMetadataPath, _ = url.Parse("/v2/metadata") + ecsMetaStatsPath, _ = url.Parse("/v2/stats") +) + +// Client is the ECS client contract +type Client interface { + Task() (*Task, error) + ContainerStats() (map[string]types.StatsJSON, error) +} + +type httpClient interface { + Do(req *http.Request) (*http.Response, error) +} + +// NewEnvClient configures a new Client from the env +func NewEnvClient() (*EcsClient, error) { + timeout := 5 * time.Second + if t := os.Getenv("ECS_TIMEOUT"); t != "" { + if d, err := time.ParseDuration(t); err == nil { + timeout = d + } + } + + return NewClient( + timeout, + ) +} + +// NewClient constructs an ECS client with the passed configuration params +func NewClient(timeout time.Duration) (*EcsClient, error) { + c := &http.Client{ + Timeout: timeout, + } + + return &EcsClient{ + client: c, + }, nil +} + +// EcsClient contains ECS connection config +type EcsClient struct { + client httpClient + BaseURL *url.URL + taskURL string + statsURL string +} + +// Task calls the ECS metadata endpoint and returns a populated Task +func (c *EcsClient) Task() (*Task, error) { + if c.taskURL == "" { + c.taskURL = c.BaseURL.ResolveReference(ecsMetadataPath).String() + } + + req, _ := http.NewRequest("GET", c.taskURL, nil) + resp, err := c.client.Do(req) + + if err != nil { + log.Println("failed to GET metadata endpoint", err) + return nil, err + } + + task, err := unmarshalTask(resp.Body) + if err != nil { + log.Println("failed to decode response from metadata endpoint", err) + return nil, err + } + + return task, nil +} + +// ContainerStats calls the ECS stats endpoint and returns a populated container stats map +func (c *EcsClient) ContainerStats() (map[string]types.StatsJSON, error) { + if c.statsURL == "" { + c.statsURL = c.BaseURL.ResolveReference(ecsMetaStatsPath).String() + } + + req, _ := http.NewRequest("GET", c.statsURL, nil) + resp, err := c.client.Do(req) + + if err != nil { + log.Println("failed to GET stats endpoint", err) + return map[string]types.StatsJSON{}, err + } + + statsMap, err := unmarshalStats(resp.Body) + if err != nil { + log.Println("failed to decode response from stats endpoint") + return map[string]types.StatsJSON{}, err + } + + return statsMap, nil +} + +// PollSync executes Task and ContainerStats in parallel. If both succeed, both structs are returned. +// If either errors, a single error is returned. +func PollSync(c Client) (*Task, map[string]types.StatsJSON, error) { + + var task *Task + var stats map[string]types.StatsJSON + var err error + + if stats, err = c.ContainerStats(); err != nil { + return nil, nil, err + } + + if task, err = c.Task(); err != nil { + return nil, nil, err + } + + return task, stats, nil +} diff --git a/plugins/inputs/ecs/client_test.go b/plugins/inputs/ecs/client_test.go new file mode 100644 index 000000000..d6fbd1165 --- /dev/null +++ b/plugins/inputs/ecs/client_test.go @@ -0,0 +1,211 @@ +package ecs + +import ( + "bytes" + "errors" + "io/ioutil" + "net/http" + "os" + "testing" + + "github.com/docker/docker/api/types" + "github.com/stretchr/testify/assert" +) + +type pollMock struct { + task func() (*Task, error) + stats func() (map[string]types.StatsJSON, error) +} + +func (p *pollMock) Task() (*Task, error) { + return p.task() +} + +func (p *pollMock) ContainerStats() (map[string]types.StatsJSON, error) { + return p.stats() +} + +func TestEcsClient_PollSync(t *testing.T) { + + tests := []struct { + name string + mock *pollMock + want *Task + want1 map[string]types.StatsJSON + wantErr bool + }{ + { + name: "success", + mock: &pollMock{ + task: func() (*Task, error) { + return &validMeta, nil + }, + stats: func() (map[string]types.StatsJSON, error) { + return validStats, nil + }, + }, + want: &validMeta, + want1: validStats, + }, + { + name: "task err", + mock: &pollMock{ + task: func() (*Task, error) { + return nil, errors.New("err") + }, + stats: func() (map[string]types.StatsJSON, error) { + return validStats, nil + }, + }, + wantErr: true, + }, + { + name: "stats err", + mock: &pollMock{ + task: func() (*Task, error) { + return &validMeta, nil + }, + stats: func() (map[string]types.StatsJSON, error) { + return nil, errors.New("err") + }, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, got1, err := PollSync(tt.mock) + + if (err != nil) != tt.wantErr { + t.Errorf("EcsClient.PollSync() error = %v, wantErr %v", err, tt.wantErr) + return + } + assert.Equal(t, tt.want, got, "EcsClient.PollSync() got = %v, want %v", got, tt.want) + assert.Equal(t, tt.want1, got1, "EcsClient.PollSync() got1 = %v, want %v", got1, tt.want1) + }) + } +} + +type mockDo struct { + do func(req *http.Request) (*http.Response, error) +} + +func (m mockDo) Do(req *http.Request) (*http.Response, error) { + return m.do(req) +} + +func TestEcsClient_Task(t *testing.T) { + rc, _ := os.Open("testdata/metadata.golden") + tests := []struct { + name string + client httpClient + want *Task + wantErr bool + }{ + { + name: "happy", + client: mockDo{ + do: func(req *http.Request) (*http.Response, error) { + return &http.Response{ + Body: ioutil.NopCloser(rc), + }, nil + }, + }, + want: &validMeta, + }, + { + name: "do err", + client: mockDo{ + do: func(req *http.Request) (*http.Response, error) { + return nil, errors.New("err") + }, + }, + wantErr: true, + }, + { + name: "malformed resp", + client: mockDo{ + do: func(req *http.Request) (*http.Response, error) { + return &http.Response{ + Body: ioutil.NopCloser(bytes.NewReader([]byte("foo"))), + }, nil + }, + }, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &EcsClient{ + client: tt.client, + taskURL: "abc", + } + got, err := c.Task() + if (err != nil) != tt.wantErr { + t.Errorf("EcsClient.Task() error = %v, wantErr %v", err, tt.wantErr) + return + } + assert.Equal(t, tt.want, got, "EcsClient.Task() = %v, want %v", got, tt.want) + }) + } +} + +func TestEcsClient_ContainerStats(t *testing.T) { + rc, _ := os.Open("testdata/stats.golden") + tests := []struct { + name string + client httpClient + want map[string]types.StatsJSON + wantErr bool + }{ + { + name: "happy", + client: mockDo{ + do: func(req *http.Request) (*http.Response, error) { + return &http.Response{ + Body: ioutil.NopCloser(rc), + }, nil + }, + }, + want: validStats, + }, + { + name: "do err", + client: mockDo{ + do: func(req *http.Request) (*http.Response, error) { + return nil, errors.New("err") + }, + }, + want: map[string]types.StatsJSON{}, + wantErr: true, + }, + { + name: "malformed resp", + client: mockDo{ + do: func(req *http.Request) (*http.Response, error) { + return &http.Response{ + Body: ioutil.NopCloser(bytes.NewReader([]byte("foo"))), + }, nil + }, + }, + want: map[string]types.StatsJSON{}, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &EcsClient{ + client: tt.client, + statsURL: "abc", + } + got, err := c.ContainerStats() + if (err != nil) != tt.wantErr { + t.Errorf("EcsClient.ContainerStats() error = %v, wantErr %v", err, tt.wantErr) + return + } + assert.Equal(t, tt.want, got, "EcsClient.ContainerStats() = %v, want %v", got, tt.want) + }) + } +} diff --git a/plugins/inputs/ecs/ecs.go b/plugins/inputs/ecs/ecs.go new file mode 100644 index 000000000..36a51229a --- /dev/null +++ b/plugins/inputs/ecs/ecs.go @@ -0,0 +1,251 @@ +package ecs + +import ( + "log" + "net/url" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/filter" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" +) + +// Ecs config object +type Ecs struct { + EndpointURL string `toml:"endpoint_url"` + EnvCfg bool `toml:"envcfg"` + Timeout internal.Duration + + ContainerNameInclude []string `toml:"container_name_include"` + ContainerNameExclude []string `toml:"container_name_exclude"` + + ContainerStatusInclude []string `toml:"container_status_include"` + ContainerStatusExclude []string `toml:"container_status_exclude"` + + LabelInclude []string `toml:"ecs_label_include"` + LabelExclude []string `toml:"ecs_label_exclude"` + + newEnvClient func() (*EcsClient, error) + newClient func(timeout time.Duration) (*EcsClient, error) + + client Client + filtersCreated bool + labelFilter filter.Filter + containerNameFilter filter.Filter + statusFilter filter.Filter +} + +const ( + KB = 1000 + MB = 1000 * KB + GB = 1000 * MB + TB = 1000 * GB + PB = 1000 * TB +) + +var sampleConfig = ` + ## ECS metadata url + # endpoint_url = "http://169.254.170.2" + + ## Set to true to configure from env vars + envcfg = false + + ## Containers to include and exclude. Globs accepted. + ## Note that an empty array for both will include all containers + container_name_include = [] + container_name_exclude = [] + + ## Container states to include and exclude. Globs accepted. + ## When empty only containers in the "running" state will be captured. + # container_status_include = [] + # container_status_exclude = [] + + ## ecs labels to include and exclude as tags. Globs accepted. + ## Note that an empty array for both will include all labels as tags + ecs_label_include = [ "com.amazonaws.ecs.*" ] + ecs_label_exclude = [] + + ## Timeout for docker list, info, and stats commands + timeout = "5s" +` + +// Description describes ECS plugin +func (ecs *Ecs) Description() string { + return "Read metrics about docker containers from Fargate/ECS v2 meta endpoints." +} + +// SampleConfig returns the ECS example config +func (ecs *Ecs) SampleConfig() string { + return sampleConfig +} + +// Gather is the entrypoint for telegraf metrics collection +func (ecs *Ecs) Gather(acc telegraf.Accumulator) error { + err := initSetup(ecs) + if err != nil { + return err + } + + task, stats, err := PollSync(ecs.client) + if err != nil { + return err + } + + mergeTaskStats(task, stats) + + taskTags := map[string]string{ + "cluster": task.Cluster, + "task_arn": task.TaskARN, + "family": task.Family, + "revision": task.Revision, + } + + // accumulate metrics + ecs.accTask(task, taskTags, acc) + ecs.accContainers(task, taskTags, acc) + + return nil +} + +func initSetup(ecs *Ecs) error { + if ecs.client == nil { + var c *EcsClient + var err error + if ecs.EnvCfg { + c, err = ecs.newEnvClient() + } else { + c, err = ecs.newClient(ecs.Timeout.Duration) + } + if err != nil { + return err + } + + c.BaseURL, err = url.Parse(ecs.EndpointURL) + if err != nil { + return err + } + + ecs.client = c + } + + // Create filters + if !ecs.filtersCreated { + err := ecs.createContainerNameFilters() + if err != nil { + return err + } + err = ecs.createContainerStatusFilters() + if err != nil { + return err + } + err = ecs.createLabelFilters() + if err != nil { + return err + } + ecs.filtersCreated = true + } + + return nil +} + +func (ecs *Ecs) accTask(task *Task, tags map[string]string, acc telegraf.Accumulator) { + taskFields := map[string]interface{}{ + "revision": task.Revision, + "desired_status": task.DesiredStatus, + "known_status": task.KnownStatus, + "limit_cpu": task.Limits["CPU"], + "limit_mem": task.Limits["Memory"], + } + + acc.AddFields("ecs_task", taskFields, tags, task.PullStoppedAt) +} + +func (ecs *Ecs) accContainers(task *Task, taskTags map[string]string, acc telegraf.Accumulator) { + for _, c := range task.Containers { + if !ecs.containerNameFilter.Match(c.Name) { + log.Printf("container %v did not match name filter", c.ID) + continue + } + + if !ecs.statusFilter.Match(c.KnownStatus) { + log.Printf("container %v did not match status filter", c.ID) + continue + } + + // add matching ECS container Labels + containerTags := map[string]string{ + "id": c.ID, + "name": c.Name, + } + for k, v := range c.Labels { + if ecs.labelFilter.Match(k) { + containerTags[k] = v + } + } + tags := mergeTags(taskTags, containerTags) + + parseContainerStats(c, acc, tags) + } +} + +// returns a new map with the same content values as the input map +func copyTags(in map[string]string) map[string]string { + out := make(map[string]string) + for k, v := range in { + out[k] = v + } + return out +} + +// returns a new map with the merged content values of the two input maps +func mergeTags(a map[string]string, b map[string]string) map[string]string { + c := copyTags(a) + for k, v := range b { + c[k] = v + } + return c +} + +func (ecs *Ecs) createContainerNameFilters() error { + filter, err := filter.NewIncludeExcludeFilter(ecs.ContainerNameInclude, ecs.ContainerNameExclude) + if err != nil { + return err + } + ecs.containerNameFilter = filter + return nil +} + +func (ecs *Ecs) createLabelFilters() error { + filter, err := filter.NewIncludeExcludeFilter(ecs.LabelInclude, ecs.LabelExclude) + if err != nil { + return err + } + ecs.labelFilter = filter + return nil +} + +func (ecs *Ecs) createContainerStatusFilters() error { + if len(ecs.ContainerStatusInclude) == 0 && len(ecs.ContainerStatusExclude) == 0 { + ecs.ContainerStatusInclude = []string{"running"} + } + filter, err := filter.NewIncludeExcludeFilter(ecs.ContainerStatusInclude, ecs.ContainerStatusExclude) + if err != nil { + return err + } + ecs.statusFilter = filter + return nil +} + +func init() { + inputs.Add("ecs", func() telegraf.Input { + return &Ecs{ + EndpointURL: "http://169.254.170.2", + Timeout: internal.Duration{Duration: 5 * time.Second}, + EnvCfg: true, + newEnvClient: NewEnvClient, + newClient: NewClient, + filtersCreated: false, + } + }) +} diff --git a/plugins/inputs/ecs/ecs_test.go b/plugins/inputs/ecs/ecs_test.go new file mode 100644 index 000000000..b105a433f --- /dev/null +++ b/plugins/inputs/ecs/ecs_test.go @@ -0,0 +1,767 @@ +package ecs + +import ( + "time" + + "github.com/docker/docker/api/types" +) + +// codified golden objects for tests + +// stats +const pauseStatsKey = "e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba" +const nginxStatsKey = "fffe894e232d46c76475cfeabf4907f712e8b92618a37fca3ef0805bbbfb0299" + +var pauseStatsRead, _ = time.Parse(time.RFC3339Nano, "2018-11-19T15:40:00.936081344Z") +var pauseStatsPreRead, _ = time.Parse(time.RFC3339Nano, "2018-11-19T15:39:59.933000984Z") + +var nginxStatsRead, _ = time.Parse(time.RFC3339Nano, "2018-11-19T15:40:00.93733207Z") +var nginxStatsPreRead, _ = time.Parse(time.RFC3339Nano, "2018-11-19T15:39:59.934291009Z") + +var validStats = map[string]types.StatsJSON{ + pauseStatsKey: { + Stats: types.Stats{ + Read: pauseStatsRead, + PreRead: pauseStatsPreRead, + BlkioStats: types.BlkioStats{ + IoServiceBytesRecursive: []types.BlkioStatEntry{ + { + Major: 202, + Minor: 26368, + Op: "Read", + Value: 790528, + }, + { + Major: 202, + Minor: 26368, + Op: "Write", + }, + { + Major: 202, + Minor: 26368, + Op: "Sync", + Value: 790528, + }, + { + Major: 202, + Minor: 26368, + Op: "Async", + }, + { + Major: 202, + Minor: 26368, + Op: "Total", + Value: 790528, + }, + { + Major: 253, + Minor: 1, + Op: "Read", + Value: 790528, + }, + { + Major: 253, + Minor: 1, + Op: "Write", + }, + { + Major: 253, + Minor: 1, + Op: "Sync", + Value: 790528, + }, + { + Major: 253, + Minor: 1, + Op: "Async", + }, + { + Major: 253, + Minor: 1, + Op: "Total", + Value: 790528, + }, + { + Major: 253, + Minor: 2, + Op: "Read", + Value: 790528, + }, + { + Major: 253, + Minor: 2, + Op: "Write", + }, + { + Major: 253, + Minor: 2, + Op: "Sync", + Value: 790528, + }, + { + Major: 253, + Minor: 2, + Op: "Async", + }, + { + Major: 253, + Minor: 2, + Op: "Total", + Value: 790528, + }, + { + Major: 253, + Minor: 4, + Op: "Read", + Value: 790528, + }, + { + Major: 253, + Minor: 4, + Op: "Write", + }, + { + Major: 253, + Minor: 4, + Op: "Sync", + Value: 790528, + }, + { + Major: 253, + Minor: 4, + Op: "Async", + }, + { + Major: 253, + Minor: 4, + Op: "Total", + Value: 790528, + }, + }, + IoServicedRecursive: []types.BlkioStatEntry{ + { + Major: 202, + Minor: 26368, + Op: "Read", + Value: 10, + }, + { + Major: 202, + Minor: 26368, + Op: "Write", + }, + { + Major: 202, + Minor: 26368, + Op: "Sync", + Value: 10, + }, + { + Major: 202, + Minor: 26368, + Op: "Async", + }, + { + Major: 202, + Minor: 26368, + Op: "Total", + Value: 10, + }, + { + Major: 253, + Minor: 1, + Op: "Read", + Value: 10, + }, + { + Major: 253, + Minor: 1, + Op: "Write", + }, + { + Major: 253, + Minor: 1, + Op: "Sync", + Value: 10, + }, + { + Major: 253, + Minor: 1, + Op: "Async", + }, + { + Major: 253, + Minor: 1, + Op: "Total", + Value: 10, + }, + { + Major: 253, + Minor: 2, + Op: "Read", + Value: 10, + }, + { + Major: 253, + Minor: 2, + Op: "Write", + }, + { + Major: 253, + Minor: 2, + Op: "Sync", + Value: 10, + }, + { + Major: 253, + Minor: 2, + Op: "Async", + }, + { + Major: 253, + Minor: 2, + Op: "Total", + Value: 10, + }, + { + Major: 253, + Minor: 4, + Op: "Read", + Value: 10, + }, + { + Major: 253, + Minor: 4, + Op: "Write", + }, + { + Major: 253, + Minor: 4, + Op: "Sync", + Value: 10, + }, + { + Major: 253, + Minor: 4, + Op: "Async", + }, + { + Major: 253, + Minor: 4, + Op: "Total", + Value: 10, + }, + }, + }, + CPUStats: types.CPUStats{ + CPUUsage: types.CPUUsage{ + PercpuUsage: []uint64{ + 26426156, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + UsageInUsermode: 20000000, + TotalUsage: 26426156, + }, + SystemUsage: 2336100000000, + OnlineCPUs: 1, + ThrottlingData: types.ThrottlingData{}, + }, + PreCPUStats: types.CPUStats{ + CPUUsage: types.CPUUsage{ + PercpuUsage: []uint64{ + 26426156, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + UsageInUsermode: 20000000, + TotalUsage: 26426156, + }, + SystemUsage: 2335090000000, + OnlineCPUs: 1, + ThrottlingData: types.ThrottlingData{}, + }, + MemoryStats: types.MemoryStats{ + Stats: map[string]uint64{ + "cache": 790528, + "mapped_file": 618496, + "total_inactive_file": 782336, + "pgpgout": 1040, + "rss": 40960, + "total_mapped_file": 618496, + "pgpgin": 1243, + "pgmajfault": 6, + "total_rss": 40960, + "hierarchical_memory_limit": 536870912, + "total_pgfault": 1298, + "total_active_file": 8192, + "active_anon": 40960, + "total_active_anon": 40960, + "total_pgpgout": 1040, + "total_cache": 790528, + "active_file": 8192, + "pgfault": 1298, + "inactive_file": 782336, + "total_pgpgin": 1243, + "hierarchical_memsw_limit": 9223372036854772000, + }, + MaxUsage: 4825088, + Usage: 1343488, + Limit: 1033658368, + }, + }, + Networks: map[string]types.NetworkStats{ + "eth0": { + RxBytes: uint64(5338), + RxDropped: uint64(0), + RxErrors: uint64(0), + RxPackets: uint64(36), + TxBytes: uint64(648), + TxDropped: uint64(0), + TxErrors: uint64(0), + TxPackets: uint64(8), + }, + "eth5": { + RxBytes: uint64(4641), + RxDropped: uint64(0), + RxErrors: uint64(0), + RxPackets: uint64(26), + TxBytes: uint64(690), + TxDropped: uint64(0), + TxErrors: uint64(0), + TxPackets: uint64(9), + }, + }, + }, + nginxStatsKey: { + Stats: types.Stats{ + Read: nginxStatsRead, + PreRead: nginxStatsPreRead, + BlkioStats: types.BlkioStats{ + IoServiceBytesRecursive: []types.BlkioStatEntry{ + { + Major: 202, + Minor: 26368, + Op: "Read", + Value: 5730304, + }, + { + Major: 202, + Minor: 26368, + Op: "Write", + }, + { + Major: 202, + Minor: 26368, + Op: "Sync", + Value: 5730304, + }, + { + Major: 202, + Minor: 26368, + Op: "Async", + }, + { + Major: 202, + Minor: 26368, + Op: "Total", + Value: 5730304, + }, + { + Major: 253, + Minor: 1, + Op: "Read", + Value: 5730304, + }, + { + Major: 253, + Minor: 1, + Op: "Write", + }, + { + Major: 253, + Minor: 1, + Op: "Sync", + Value: 5730304, + }, + { + Major: 253, + Minor: 1, + Op: "Async", + }, + { + Major: 253, + Minor: 1, + Op: "Total", + Value: 5730304, + }, + { + Major: 253, + Minor: 2, + Op: "Read", + Value: 5730304, + }, + { + Major: 253, + Minor: 2, + Op: "Write", + }, + { + Major: 253, + Minor: 2, + Op: "Sync", + Value: 5730304, + }, + { + Major: 253, + Minor: 2, + Op: "Async", + }, + { + Major: 253, + Minor: 2, + Op: "Total", + Value: 5730304, + }, + { + Major: 253, + Minor: 5, + Op: "Read", + Value: 5730304, + }, + { + Major: 253, + Minor: 5, + Op: "Write", + }, + { + Major: 253, + Minor: 5, + Op: "Sync", + Value: 5730304, + }, + { + Major: 253, + Minor: 5, + Op: "Async", + }, + { + Major: 253, + Minor: 5, + Op: "Total", + Value: 5730304, + }, + }, + IoServicedRecursive: []types.BlkioStatEntry{ + { + Major: 202, + Minor: 26368, + Op: "Read", + Value: 156, + }, + { + Major: 202, + Minor: 26368, + Op: "Write", + }, + { + Major: 202, + Minor: 26368, + Op: "Sync", + Value: 156, + }, + { + Major: 202, + Minor: 26368, + Op: "Async", + }, + { + Major: 202, + Minor: 26368, + Op: "Total", + Value: 156, + }, + { + Major: 253, + Minor: 1, + Op: "Read", + Value: 156, + }, + { + Major: 253, + Minor: 1, + Op: "Write", + }, + { + Major: 253, + Minor: 1, + Op: "Sync", + Value: 156, + }, + { + Major: 253, + Minor: 1, + Op: "Async", + }, + { + Major: 253, + Minor: 1, + Op: "Total", + Value: 156, + }, + { + Major: 253, + Minor: 2, + Op: "Read", + Value: 156, + }, + { + Major: 253, + Minor: 2, + Op: "Write", + }, + { + Major: 253, + Minor: 2, + Op: "Sync", + Value: 156, + }, + { + Major: 253, + Minor: 2, + Op: "Async", + }, + { + Major: 253, + Minor: 2, + Op: "Total", + Value: 156, + }, + { + Major: 253, + Minor: 5, + Op: "Read", + Value: 147, + }, + { + Major: 253, + Minor: 5, + Op: "Write", + }, + { + Major: 253, + Minor: 5, + Op: "Sync", + Value: 147, + }, + { + Major: 253, + Minor: 5, + Op: "Async", + }, + { + Major: 253, + Minor: 5, + Op: "Total", + Value: 147, + }, + }, + }, + CPUStats: types.CPUStats{ + CPUUsage: types.CPUUsage{ + PercpuUsage: []uint64{ + 65599511, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + UsageInUsermode: 40000000, + TotalUsage: 65599511, + UsageInKernelmode: 10000000, + }, + SystemUsage: 2336100000000, + OnlineCPUs: 1, + ThrottlingData: types.ThrottlingData{}, + }, + PreCPUStats: types.CPUStats{ + CPUUsage: types.CPUUsage{ + PercpuUsage: []uint64{ + 65599511, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + UsageInUsermode: 40000000, + TotalUsage: 65599511, + UsageInKernelmode: 10000000, + }, + SystemUsage: 2335090000000, + OnlineCPUs: 1, + ThrottlingData: types.ThrottlingData{}, + }, + MemoryStats: types.MemoryStats{ + Stats: map[string]uint64{ + "cache": 5787648, + "mapped_file": 3616768, + "total_inactive_file": 4321280, + "pgpgout": 1674, + "rss": 1597440, + "total_mapped_file": 3616768, + "pgpgin": 3477, + "pgmajfault": 40, + "total_rss": 1597440, + "total_inactive_anon": 4096, + "hierarchical_memory_limit": 536870912, + "total_pgfault": 2924, + "total_active_file": 1462272, + "active_anon": 1597440, + "total_active_anon": 1597440, + "total_pgpgout": 1674, + "total_cache": 5787648, + "inactive_anon": 4096, + "active_file": 1462272, + "pgfault": 2924, + "inactive_file": 4321280, + "total_pgpgin": 3477, + "hierarchical_memsw_limit": 9223372036854772000, + }, + MaxUsage: 8667136, + Usage: 8179712, + Limit: 1033658368, + }, + }, + }, +} + +// meta +var metaPauseCreated, _ = time.Parse(time.RFC3339Nano, "2018-11-19T15:31:26.641964373Z") +var metaPauseStarted, _ = time.Parse(time.RFC3339Nano, "2018-11-19T15:31:27.035698679Z") +var metaCreated, _ = time.Parse(time.RFC3339Nano, "2018-11-19T15:31:27.614884084Z") +var metaStarted, _ = time.Parse(time.RFC3339Nano, "2018-11-19T15:31:27.975996351Z") +var metaPullStart, _ = time.Parse(time.RFC3339Nano, "2018-11-19T15:31:27.197327103Z") +var metaPullStop, _ = time.Parse(time.RFC3339Nano, "2018-11-19T15:31:27.609089471Z") + +var validMeta = Task{ + Cluster: "test", + TaskARN: "arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a", + Family: "nginx", + Revision: "2", + DesiredStatus: "RUNNING", + KnownStatus: "RUNNING", + Containers: []Container{ + { + ID: pauseStatsKey, + Name: "~internal~ecs~pause", + DockerName: "ecs-nginx-2-internalecspause", + Image: "amazon/amazon-ecs-pause:0.1.0", + ImageID: "", + Labels: map[string]string{ + "com.amazonaws.ecs.cluster": "test", + "com.amazonaws.ecs.container-name": "~internal~ecs~pause", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a", + "com.amazonaws.ecs.task-definition-family": "nginx", + "com.amazonaws.ecs.task-definition-version": "2", + }, + DesiredStatus: "RESOURCES_PROVISIONED", + KnownStatus: "RESOURCES_PROVISIONED", + Limits: map[string]float64{ + "CPU": 0, + "Memory": 0, + }, + CreatedAt: metaPauseCreated, + StartedAt: metaPauseStarted, + Type: "CNI_PAUSE", + Networks: []Network{ + { + NetworkMode: "awsvpc", + IPv4Addresses: []string{ + "172.31.25.181", + }, + }, + }, + }, + { + ID: nginxStatsKey, + Name: "nginx", + DockerName: "ecs-nginx-2-nginx", + Image: "nginx:alpine", + ImageID: "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + Labels: map[string]string{ + "com.amazonaws.ecs.cluster": "test", + "com.amazonaws.ecs.container-name": "nginx", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a", + "com.amazonaws.ecs.task-definition-family": "nginx", + "com.amazonaws.ecs.task-definition-version": "2", + }, + DesiredStatus: "RUNNING", + KnownStatus: "RUNNING", + Limits: map[string]float64{ + "CPU": 0, + "Memory": 0, + }, + CreatedAt: metaCreated, + StartedAt: metaStarted, + Type: "NORMAL", + Networks: []Network{ + { + NetworkMode: "awsvpc", + IPv4Addresses: []string{ + "172.31.25.181", + }, + }, + }, + }, + }, + Limits: map[string]float64{ + "CPU": 0.5, + "Memory": 512, + }, + PullStartedAt: metaPullStart, + PullStoppedAt: metaPullStop, +} diff --git a/plugins/inputs/ecs/stats.go b/plugins/inputs/ecs/stats.go new file mode 100644 index 000000000..d2a8ee5d3 --- /dev/null +++ b/plugins/inputs/ecs/stats.go @@ -0,0 +1,295 @@ +package ecs + +import ( + "fmt" + "strings" + "time" + + "github.com/docker/docker/api/types" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs/docker" +) + +func parseContainerStats(c Container, acc telegraf.Accumulator, tags map[string]string) { + id := c.ID + stats := c.Stats + tm := stats.Read + + if tm.Before(time.Unix(0, 0)) { + tm = time.Now() + } + + metastats(id, c, acc, tags, tm) + memstats(id, stats, acc, tags, tm) + cpustats(id, stats, acc, tags, tm) + netstats(id, stats, acc, tags, tm) + blkstats(id, stats, acc, tags, tm) +} + +func metastats(id string, c Container, acc telegraf.Accumulator, tags map[string]string, tm time.Time) { + metafields := map[string]interface{}{ + "container_id": id, + "docker_name": c.DockerName, + "image": c.Image, + "image_id": c.ImageID, + "desired_status": c.DesiredStatus, + "known_status": c.KnownStatus, + "limit_cpu": c.Limits["CPU"], + "limit_mem": c.Limits["Memory"], + "created_at": c.CreatedAt, + "started_at": c.StartedAt, + "type": c.Type, + } + + acc.AddFields("ecs_container_meta", metafields, tags, tm) +} + +func memstats(id string, stats types.StatsJSON, acc telegraf.Accumulator, tags map[string]string, tm time.Time) { + memfields := map[string]interface{}{ + "container_id": id, + } + + memstats := []string{ + "active_anon", + "active_file", + "cache", + "hierarchical_memory_limit", + "inactive_anon", + "inactive_file", + "mapped_file", + "pgfault", + "pgmajfault", + "pgpgin", + "pgpgout", + "rss", + "rss_huge", + "total_active_anon", + "total_active_file", + "total_cache", + "total_inactive_anon", + "total_inactive_file", + "total_mapped_file", + "total_pgfault", + "total_pgmajfault", + "total_pgpgin", + "total_pgpgout", + "total_rss", + "total_rss_huge", + "total_unevictable", + "total_writeback", + "unevictable", + "writeback", + } + + for _, field := range memstats { + if value, ok := stats.MemoryStats.Stats[field]; ok { + memfields[field] = value + } + } + if stats.MemoryStats.Failcnt != 0 { + memfields["fail_count"] = stats.MemoryStats.Failcnt + } + + memfields["limit"] = stats.MemoryStats.Limit + memfields["max_usage"] = stats.MemoryStats.MaxUsage + + mem := docker.CalculateMemUsageUnixNoCache(stats.MemoryStats) + memLimit := float64(stats.MemoryStats.Limit) + memfields["usage"] = uint64(mem) + memfields["usage_percent"] = docker.CalculateMemPercentUnixNoCache(memLimit, mem) + + acc.AddFields("ecs_container_mem", memfields, tags, tm) +} + +func cpustats(id string, stats types.StatsJSON, acc telegraf.Accumulator, tags map[string]string, tm time.Time) { + cpufields := map[string]interface{}{ + "usage_total": stats.CPUStats.CPUUsage.TotalUsage, + "usage_in_usermode": stats.CPUStats.CPUUsage.UsageInUsermode, + "usage_in_kernelmode": stats.CPUStats.CPUUsage.UsageInKernelmode, + "usage_system": stats.CPUStats.SystemUsage, + "throttling_periods": stats.CPUStats.ThrottlingData.Periods, + "throttling_throttled_periods": stats.CPUStats.ThrottlingData.ThrottledPeriods, + "throttling_throttled_time": stats.CPUStats.ThrottlingData.ThrottledTime, + "container_id": id, + } + + previousCPU := stats.PreCPUStats.CPUUsage.TotalUsage + previousSystem := stats.PreCPUStats.SystemUsage + cpuPercent := docker.CalculateCPUPercentUnix(previousCPU, previousSystem, &stats) + cpufields["usage_percent"] = cpuPercent + + cputags := copyTags(tags) + cputags["cpu"] = "cpu-total" + acc.AddFields("ecs_container_cpu", cpufields, cputags, tm) + + // If we have OnlineCPUs field, then use it to restrict stats gathering to only Online CPUs + // (https://github.com/moby/moby/commit/115f91d7575d6de6c7781a96a082f144fd17e400) + var percpuusage []uint64 + if stats.CPUStats.OnlineCPUs > 0 { + percpuusage = stats.CPUStats.CPUUsage.PercpuUsage[:stats.CPUStats.OnlineCPUs] + } else { + percpuusage = stats.CPUStats.CPUUsage.PercpuUsage + } + + for i, percpu := range percpuusage { + percputags := copyTags(tags) + percputags["cpu"] = fmt.Sprintf("cpu%d", i) + fields := map[string]interface{}{ + "usage_total": percpu, + "container_id": id, + } + acc.AddFields("ecs_container_cpu", fields, percputags, tm) + } +} + +func netstats(id string, stats types.StatsJSON, acc telegraf.Accumulator, tags map[string]string, tm time.Time) { + totalNetworkStatMap := make(map[string]interface{}) + for network, netstats := range stats.Networks { + netfields := map[string]interface{}{ + "rx_dropped": netstats.RxDropped, + "rx_bytes": netstats.RxBytes, + "rx_errors": netstats.RxErrors, + "tx_packets": netstats.TxPackets, + "tx_dropped": netstats.TxDropped, + "rx_packets": netstats.RxPackets, + "tx_errors": netstats.TxErrors, + "tx_bytes": netstats.TxBytes, + "container_id": id, + } + + nettags := copyTags(tags) + nettags["network"] = network + acc.AddFields("ecs_container_net", netfields, nettags, tm) + + for field, value := range netfields { + if field == "container_id" { + continue + } + + var uintV uint64 + switch v := value.(type) { + case uint64: + uintV = v + case int64: + uintV = uint64(v) + default: + continue + } + + _, ok := totalNetworkStatMap[field] + if ok { + totalNetworkStatMap[field] = totalNetworkStatMap[field].(uint64) + uintV + } else { + totalNetworkStatMap[field] = uintV + } + } + } + + // totalNetworkStatMap could be empty if container is running with --net=host. + if len(totalNetworkStatMap) != 0 { + nettags := copyTags(tags) + nettags["network"] = "total" + totalNetworkStatMap["container_id"] = id + acc.AddFields("ecs_container_net", totalNetworkStatMap, nettags, tm) + } +} + +func blkstats(id string, stats types.StatsJSON, acc telegraf.Accumulator, tags map[string]string, tm time.Time) { + blkioStats := stats.BlkioStats + // Make a map of devices to their block io stats + deviceStatMap := make(map[string]map[string]interface{}) + + for _, metric := range blkioStats.IoServiceBytesRecursive { + device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) + _, ok := deviceStatMap[device] + if !ok { + deviceStatMap[device] = make(map[string]interface{}) + } + + field := fmt.Sprintf("io_service_bytes_recursive_%s", strings.ToLower(metric.Op)) + deviceStatMap[device][field] = metric.Value + } + + for _, metric := range blkioStats.IoServicedRecursive { + device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) + _, ok := deviceStatMap[device] + if !ok { + deviceStatMap[device] = make(map[string]interface{}) + } + + field := fmt.Sprintf("io_serviced_recursive_%s", strings.ToLower(metric.Op)) + deviceStatMap[device][field] = metric.Value + } + + for _, metric := range blkioStats.IoQueuedRecursive { + device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) + field := fmt.Sprintf("io_queue_recursive_%s", strings.ToLower(metric.Op)) + deviceStatMap[device][field] = metric.Value + } + + for _, metric := range blkioStats.IoServiceTimeRecursive { + device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) + field := fmt.Sprintf("io_service_time_recursive_%s", strings.ToLower(metric.Op)) + deviceStatMap[device][field] = metric.Value + } + + for _, metric := range blkioStats.IoWaitTimeRecursive { + device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) + field := fmt.Sprintf("io_wait_time_%s", strings.ToLower(metric.Op)) + deviceStatMap[device][field] = metric.Value + } + + for _, metric := range blkioStats.IoMergedRecursive { + device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) + field := fmt.Sprintf("io_merged_recursive_%s", strings.ToLower(metric.Op)) + deviceStatMap[device][field] = metric.Value + } + + for _, metric := range blkioStats.IoTimeRecursive { + device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) + deviceStatMap[device]["io_time_recursive"] = metric.Value + } + + for _, metric := range blkioStats.SectorsRecursive { + device := fmt.Sprintf("%d:%d", metric.Major, metric.Minor) + deviceStatMap[device]["sectors_recursive"] = metric.Value + } + + totalStatMap := make(map[string]interface{}) + for device, fields := range deviceStatMap { + fields["container_id"] = id + + iotags := copyTags(tags) + iotags["device"] = device + acc.AddFields("ecs_container_blkio", fields, iotags, tm) + + for field, value := range fields { + if field == "container_id" { + continue + } + + var uintV uint64 + switch v := value.(type) { + case uint64: + uintV = v + case int64: + uintV = uint64(v) + default: + continue + } + + _, ok := totalStatMap[field] + if ok { + totalStatMap[field] = totalStatMap[field].(uint64) + uintV + } else { + totalStatMap[field] = uintV + } + + } + } + + totalStatMap["container_id"] = id + iotags := copyTags(tags) + iotags["device"] = "total" + acc.AddFields("ecs_container_blkio", totalStatMap, iotags, tm) +} diff --git a/plugins/inputs/ecs/stats_test.go b/plugins/inputs/ecs/stats_test.go new file mode 100644 index 000000000..04632ac61 --- /dev/null +++ b/plugins/inputs/ecs/stats_test.go @@ -0,0 +1,226 @@ +package ecs + +import ( + "testing" + "time" + + "github.com/influxdata/telegraf/testutil" +) + +func Test_metastats(t *testing.T) { + var mockAcc testutil.Accumulator + + tags := map[string]string{ + "test_tag": "test", + } + tm := time.Now() + + metastats(nginxStatsKey, validMeta.Containers[1], &mockAcc, tags, tm) + mockAcc.AssertContainsTaggedFields( + t, + "ecs_container_meta", + map[string]interface{}{ + "container_id": nginxStatsKey, + "docker_name": "ecs-nginx-2-nginx", + "image": "nginx:alpine", + "image_id": "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "desired_status": "RUNNING", + "known_status": "RUNNING", + "limit_cpu": float64(0), + "limit_mem": float64(0), + "created_at": metaCreated, + "started_at": metaStarted, + "type": "NORMAL", + }, + tags, + ) +} + +func Test_memstats(t *testing.T) { + var mockAcc testutil.Accumulator + + tags := map[string]string{ + "test_tag": "test", + } + tm := time.Now() + + memstats(nginxStatsKey, validStats[nginxStatsKey], &mockAcc, tags, tm) + mockAcc.AssertContainsTaggedFields( + t, + "ecs_container_mem", + map[string]interface{}{ + "active_anon": uint64(1597440), + "active_file": uint64(1462272), + "cache": uint64(5787648), + "container_id": nginxStatsKey, + "hierarchical_memory_limit": uint64(536870912), + "inactive_anon": uint64(4096), + "inactive_file": uint64(4321280), + "limit": uint64(1033658368), + "mapped_file": uint64(3616768), + "max_usage": uint64(8667136), + "pgmajfault": uint64(40), + "pgpgin": uint64(3477), + "pgpgout": uint64(1674), + "pgfault": uint64(2924), + "rss": uint64(1597440), + "total_active_anon": uint64(1597440), + "total_active_file": uint64(1462272), + "total_cache": uint64(5787648), + "total_inactive_anon": uint64(4096), + "total_inactive_file": uint64(4321280), + "total_mapped_file": uint64(3616768), + "total_pgfault": uint64(2924), + "total_pgpgout": uint64(1674), + "total_pgpgin": uint64(3477), + "total_rss": uint64(1597440), + "usage": uint64(2392064), + "usage_percent": float64(0.23141727228778164), + }, + map[string]string{ + "test_tag": "test", + }, + ) +} + +func Test_cpustats(t *testing.T) { + var mockAcc testutil.Accumulator + + tags := map[string]string{ + "test_tag": "test", + } + tm := time.Now() + + cpustats(nginxStatsKey, validStats[nginxStatsKey], &mockAcc, tags, tm) + mockAcc.AssertContainsTaggedFields( + t, + "ecs_container_cpu", + map[string]interface{}{ + "container_id": nginxStatsKey, + "throttling_periods": uint64(0), + "throttling_throttled_periods": uint64(0), + "throttling_throttled_time": uint64(0), + "usage_in_usermode": uint64(40000000), + "usage_in_kernelmode": uint64(10000000), + "usage_percent": float64(0), + "usage_system": uint64(2336100000000), + "usage_total": uint64(65599511), + }, + map[string]string{ + "test_tag": "test", + "cpu": "cpu-total", + }, + ) + mockAcc.AssertContainsTaggedFields( + t, + "ecs_container_cpu", + map[string]interface{}{ + "container_id": nginxStatsKey, + "usage_total": uint64(65599511), + }, + map[string]string{ + "test_tag": "test", + "cpu": "cpu0", + }, + ) +} + +func Test_netstats(t *testing.T) { + var mockAcc testutil.Accumulator + + tags := map[string]string{ + "test_tag": "test", + } + tm := time.Now() + + netstats(pauseStatsKey, validStats[pauseStatsKey], &mockAcc, tags, tm) + mockAcc.AssertContainsTaggedFields( + t, + "ecs_container_net", + map[string]interface{}{ + "container_id": pauseStatsKey, + "rx_bytes": uint64(5338), + "rx_dropped": uint64(0), + "rx_errors": uint64(0), + "rx_packets": uint64(36), + "tx_bytes": uint64(648), + "tx_dropped": uint64(0), + "tx_errors": uint64(0), + "tx_packets": uint64(8), + }, + map[string]string{ + "test_tag": "test", + "network": "eth0", + }, + ) + mockAcc.AssertContainsTaggedFields( + t, + "ecs_container_net", + map[string]interface{}{ + "container_id": pauseStatsKey, + "rx_bytes": uint64(4641), + "rx_dropped": uint64(0), + "rx_errors": uint64(0), + "rx_packets": uint64(26), + "tx_bytes": uint64(690), + "tx_dropped": uint64(0), + "tx_errors": uint64(0), + "tx_packets": uint64(9), + }, + map[string]string{ + "test_tag": "test", + "network": "eth5", + }, + ) + mockAcc.AssertContainsTaggedFields( + t, + "ecs_container_net", + map[string]interface{}{ + "container_id": pauseStatsKey, + "rx_bytes": uint64(9979), + "rx_dropped": uint64(0), + "rx_errors": uint64(0), + "rx_packets": uint64(62), + "tx_bytes": uint64(1338), + "tx_dropped": uint64(0), + "tx_errors": uint64(0), + "tx_packets": uint64(17), + }, + map[string]string{ + "test_tag": "test", + "network": "total", + }, + ) +} + +func Test_blkstats(t *testing.T) { + var mockAcc testutil.Accumulator + + tags := map[string]string{ + "test_tag": "test", + } + tm := time.Now() + + blkstats(nginxStatsKey, validStats[nginxStatsKey], &mockAcc, tags, tm) + mockAcc.AssertContainsTaggedFields( + t, + "ecs_container_blkio", + map[string]interface{}{ + "container_id": nginxStatsKey, + "io_service_bytes_recursive_read": uint64(5730304), + "io_service_bytes_recursive_write": uint64(0), + "io_service_bytes_recursive_sync": uint64(5730304), + "io_service_bytes_recursive_async": uint64(0), + "io_service_bytes_recursive_total": uint64(5730304), + "io_serviced_recursive_read": uint64(156), + "io_serviced_recursive_write": uint64(0), + "io_serviced_recursive_sync": uint64(156), + "io_serviced_recursive_async": uint64(0), + "io_serviced_recursive_total": uint64(156), + }, + map[string]string{ + "test_tag": "test", + "device": "202:26368", + }, + ) +} diff --git a/plugins/inputs/ecs/testdata/metadata.golden b/plugins/inputs/ecs/testdata/metadata.golden new file mode 100644 index 000000000..6823d7e5e --- /dev/null +++ b/plugins/inputs/ecs/testdata/metadata.golden @@ -0,0 +1,78 @@ +{ + "Cluster": "test", + "TaskARN": "arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a", + "Family": "nginx", + "Revision": "2", + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Containers": [ + { + "DockerId": "e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba", + "Name": "~internal~ecs~pause", + "DockerName": "ecs-nginx-2-internalecspause", + "Image": "amazon/amazon-ecs-pause:0.1.0", + "ImageID": "", + "Labels": { + "com.amazonaws.ecs.cluster": "test", + "com.amazonaws.ecs.container-name": "~internal~ecs~pause", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a", + "com.amazonaws.ecs.task-definition-family": "nginx", + "com.amazonaws.ecs.task-definition-version": "2" + }, + "DesiredStatus": "RESOURCES_PROVISIONED", + "KnownStatus": "RESOURCES_PROVISIONED", + "Limits": { + "CPU": 0, + "Memory": 0 + }, + "CreatedAt": "2018-11-19T15:31:26.641964373Z", + "StartedAt": "2018-11-19T15:31:27.035698679Z", + "Type": "CNI_PAUSE", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "172.31.25.181" + ] + } + ] + }, + { + "DockerId": "fffe894e232d46c76475cfeabf4907f712e8b92618a37fca3ef0805bbbfb0299", + "Name": "nginx", + "DockerName": "ecs-nginx-2-nginx", + "Image": "nginx:alpine", + "ImageID": "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "Labels": { + "com.amazonaws.ecs.cluster": "test", + "com.amazonaws.ecs.container-name": "nginx", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a", + "com.amazonaws.ecs.task-definition-family": "nginx", + "com.amazonaws.ecs.task-definition-version": "2" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 0, + "Memory": 0 + }, + "CreatedAt": "2018-11-19T15:31:27.614884084Z", + "StartedAt": "2018-11-19T15:31:27.975996351Z", + "Type": "NORMAL", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "172.31.25.181" + ] + } + ] + } + ], + "Limits": { + "CPU": 0.5, + "Memory": 512 + }, + "PullStartedAt": "2018-11-19T15:31:27.197327103Z", + "PullStoppedAt": "2018-11-19T15:31:27.609089471Z" +} \ No newline at end of file diff --git a/plugins/inputs/ecs/testdata/stats.golden b/plugins/inputs/ecs/testdata/stats.golden new file mode 100644 index 000000000..791f4f0b3 --- /dev/null +++ b/plugins/inputs/ecs/testdata/stats.golden @@ -0,0 +1,663 @@ +{ + "e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba": { + "read": "2018-11-19T15:40:00.936081344Z", + "preread": "2018-11-19T15:39:59.933000984Z", + "num_procs": 0, + "pids_stats": {}, + "networks": { + "eth0": { + "rx_bytes": 5338, + "rx_dropped": 0, + "rx_errors": 0, + "rx_packets": 36, + "tx_bytes": 648, + "tx_dropped": 0, + "tx_errors": 0, + "tx_packets": 8 + }, + "eth5": { + "rx_bytes": 4641, + "rx_dropped": 0, + "rx_errors": 0, + "rx_packets": 26, + "tx_bytes": 690, + "tx_dropped": 0, + "tx_errors": 0, + "tx_packets": 9 + } + }, + "memory_stats": { + "stats": { + "cache": 790528, + "mapped_file": 618496, + "total_inactive_file": 782336, + "pgpgout": 1040, + "rss": 40960, + "total_mapped_file": 618496, + "pgpgin": 1243, + "pgmajfault": 6, + "total_rss": 40960, + "hierarchical_memory_limit": 536870912, + "total_pgfault": 1298, + "total_active_file": 8192, + "active_anon": 40960, + "total_active_anon": 40960, + "total_pgpgout": 1040, + "total_cache": 790528, + "active_file": 8192, + "pgfault": 1298, + "inactive_file": 782336, + "total_pgpgin": 1243, + "hierarchical_memsw_limit": 9223372036854772000 + }, + "max_usage": 4825088, + "usage": 1343488, + "limit": 1033658368 + }, + "blkio_stats": { + "io_service_bytes_recursive": [ + { + "major": 202, + "minor": 26368, + "op": "Read", + "value": 790528 + }, + { + "major": 202, + "minor": 26368, + "op": "Write" + }, + { + "major": 202, + "minor": 26368, + "op": "Sync", + "value": 790528 + }, + { + "major": 202, + "minor": 26368, + "op": "Async" + }, + { + "major": 202, + "minor": 26368, + "op": "Total", + "value": 790528 + }, + { + "major": 253, + "minor": 1, + "op": "Read", + "value": 790528 + }, + { + "major": 253, + "minor": 1, + "op": "Write" + }, + { + "major": 253, + "minor": 1, + "op": "Sync", + "value": 790528 + }, + { + "major": 253, + "minor": 1, + "op": "Async" + }, + { + "major": 253, + "minor": 1, + "op": "Total", + "value": 790528 + }, + { + "major": 253, + "minor": 2, + "op": "Read", + "value": 790528 + }, + { + "major": 253, + "minor": 2, + "op": "Write" + }, + { + "major": 253, + "minor": 2, + "op": "Sync", + "value": 790528 + }, + { + "major": 253, + "minor": 2, + "op": "Async" + }, + { + "major": 253, + "minor": 2, + "op": "Total", + "value": 790528 + }, + { + "major": 253, + "minor": 4, + "op": "Read", + "value": 790528 + }, + { + "major": 253, + "minor": 4, + "op": "Write" + }, + { + "major": 253, + "minor": 4, + "op": "Sync", + "value": 790528 + }, + { + "major": 253, + "minor": 4, + "op": "Async" + }, + { + "major": 253, + "minor": 4, + "op": "Total", + "value": 790528 + } + ], + "io_serviced_recursive": [ + { + "major": 202, + "minor": 26368, + "op": "Read", + "value": 10 + }, + { + "major": 202, + "minor": 26368, + "op": "Write" + }, + { + "major": 202, + "minor": 26368, + "op": "Sync", + "value": 10 + }, + { + "major": 202, + "minor": 26368, + "op": "Async" + }, + { + "major": 202, + "minor": 26368, + "op": "Total", + "value": 10 + }, + { + "major": 253, + "minor": 1, + "op": "Read", + "value": 10 + }, + { + "major": 253, + "minor": 1, + "op": "Write" + }, + { + "major": 253, + "minor": 1, + "op": "Sync", + "value": 10 + }, + { + "major": 253, + "minor": 1, + "op": "Async" + }, + { + "major": 253, + "minor": 1, + "op": "Total", + "value": 10 + }, + { + "major": 253, + "minor": 2, + "op": "Read", + "value": 10 + }, + { + "major": 253, + "minor": 2, + "op": "Write" + }, + { + "major": 253, + "minor": 2, + "op": "Sync", + "value": 10 + }, + { + "major": 253, + "minor": 2, + "op": "Async" + }, + { + "major": 253, + "minor": 2, + "op": "Total", + "value": 10 + }, + { + "major": 253, + "minor": 4, + "op": "Read", + "value": 10 + }, + { + "major": 253, + "minor": 4, + "op": "Write" + }, + { + "major": 253, + "minor": 4, + "op": "Sync", + "value": 10 + }, + { + "major": 253, + "minor": 4, + "op": "Async" + }, + { + "major": 253, + "minor": 4, + "op": "Total", + "value": 10 + } + ] + }, + "cpu_stats": { + "cpu_usage": { + "percpu_usage": [ + 26426156, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "usage_in_usermode": 20000000, + "total_usage": 26426156 + }, + "system_cpu_usage": 2336100000000, + "online_cpus": 1, + "throttling_data": {} + }, + "precpu_stats": { + "cpu_usage": { + "percpu_usage": [ + 26426156, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "usage_in_usermode": 20000000, + "total_usage": 26426156 + }, + "system_cpu_usage": 2335090000000, + "online_cpus": 1, + "throttling_data": {} + }, + "storage_stats": {} + }, + "fffe894e232d46c76475cfeabf4907f712e8b92618a37fca3ef0805bbbfb0299": { + "read": "2018-11-19T15:40:00.93733207Z", + "preread": "2018-11-19T15:39:59.934291009Z", + "num_procs": 0, + "pids_stats": {}, + "network": {}, + "memory_stats": { + "stats": { + "cache": 5787648, + "mapped_file": 3616768, + "total_inactive_file": 4321280, + "pgpgout": 1674, + "rss": 1597440, + "total_mapped_file": 3616768, + "pgpgin": 3477, + "pgmajfault": 40, + "total_rss": 1597440, + "total_inactive_anon": 4096, + "hierarchical_memory_limit": 536870912, + "total_pgfault": 2924, + "total_active_file": 1462272, + "active_anon": 1597440, + "total_active_anon": 1597440, + "total_pgpgout": 1674, + "total_cache": 5787648, + "inactive_anon": 4096, + "active_file": 1462272, + "pgfault": 2924, + "inactive_file": 4321280, + "total_pgpgin": 3477, + "hierarchical_memsw_limit": 9223372036854772000 + }, + "max_usage": 8667136, + "usage": 8179712, + "limit": 1033658368 + }, + "blkio_stats": { + "io_service_bytes_recursive": [ + { + "major": 202, + "minor": 26368, + "op": "Read", + "value": 5730304 + }, + { + "major": 202, + "minor": 26368, + "op": "Write" + }, + { + "major": 202, + "minor": 26368, + "op": "Sync", + "value": 5730304 + }, + { + "major": 202, + "minor": 26368, + "op": "Async" + }, + { + "major": 202, + "minor": 26368, + "op": "Total", + "value": 5730304 + }, + { + "major": 253, + "minor": 1, + "op": "Read", + "value": 5730304 + }, + { + "major": 253, + "minor": 1, + "op": "Write" + }, + { + "major": 253, + "minor": 1, + "op": "Sync", + "value": 5730304 + }, + { + "major": 253, + "minor": 1, + "op": "Async" + }, + { + "major": 253, + "minor": 1, + "op": "Total", + "value": 5730304 + }, + { + "major": 253, + "minor": 2, + "op": "Read", + "value": 5730304 + }, + { + "major": 253, + "minor": 2, + "op": "Write" + }, + { + "major": 253, + "minor": 2, + "op": "Sync", + "value": 5730304 + }, + { + "major": 253, + "minor": 2, + "op": "Async" + }, + { + "major": 253, + "minor": 2, + "op": "Total", + "value": 5730304 + }, + { + "major": 253, + "minor": 5, + "op": "Read", + "value": 5730304 + }, + { + "major": 253, + "minor": 5, + "op": "Write" + }, + { + "major": 253, + "minor": 5, + "op": "Sync", + "value": 5730304 + }, + { + "major": 253, + "minor": 5, + "op": "Async" + }, + { + "major": 253, + "minor": 5, + "op": "Total", + "value": 5730304 + } + ], + "io_serviced_recursive": [ + { + "major": 202, + "minor": 26368, + "op": "Read", + "value": 156 + }, + { + "major": 202, + "minor": 26368, + "op": "Write" + }, + { + "major": 202, + "minor": 26368, + "op": "Sync", + "value": 156 + }, + { + "major": 202, + "minor": 26368, + "op": "Async" + }, + { + "major": 202, + "minor": 26368, + "op": "Total", + "value": 156 + }, + { + "major": 253, + "minor": 1, + "op": "Read", + "value": 156 + }, + { + "major": 253, + "minor": 1, + "op": "Write" + }, + { + "major": 253, + "minor": 1, + "op": "Sync", + "value": 156 + }, + { + "major": 253, + "minor": 1, + "op": "Async" + }, + { + "major": 253, + "minor": 1, + "op": "Total", + "value": 156 + }, + { + "major": 253, + "minor": 2, + "op": "Read", + "value": 156 + }, + { + "major": 253, + "minor": 2, + "op": "Write" + }, + { + "major": 253, + "minor": 2, + "op": "Sync", + "value": 156 + }, + { + "major": 253, + "minor": 2, + "op": "Async" + }, + { + "major": 253, + "minor": 2, + "op": "Total", + "value": 156 + }, + { + "major": 253, + "minor": 5, + "op": "Read", + "value": 147 + }, + { + "major": 253, + "minor": 5, + "op": "Write" + }, + { + "major": 253, + "minor": 5, + "op": "Sync", + "value": 147 + }, + { + "major": 253, + "minor": 5, + "op": "Async" + }, + { + "major": 253, + "minor": 5, + "op": "Total", + "value": 147 + } + ] + }, + "cpu_stats": { + "cpu_usage": { + "percpu_usage": [ + 65599511, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "usage_in_usermode": 40000000, + "total_usage": 65599511, + "usage_in_kernelmode": 10000000 + }, + "system_cpu_usage": 2336100000000, + "online_cpus": 1, + "throttling_data": {} + }, + "precpu_stats": { + "cpu_usage": { + "percpu_usage": [ + 65599511, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "usage_in_usermode": 40000000, + "total_usage": 65599511, + "usage_in_kernelmode": 10000000 + }, + "system_cpu_usage": 2335090000000, + "online_cpus": 1, + "throttling_data": {} + }, + "storage_stats": {} + } +} diff --git a/plugins/inputs/ecs/types.go b/plugins/inputs/ecs/types.go new file mode 100644 index 000000000..0b9b402f6 --- /dev/null +++ b/plugins/inputs/ecs/types.go @@ -0,0 +1,75 @@ +package ecs + +import ( + "encoding/json" + "io" + "strings" + "time" + + "github.com/docker/docker/api/types" +) + +// Task is the ECS task representation +type Task struct { + Cluster string + TaskARN string + Family string + Revision string + DesiredStatus string + KnownStatus string + Containers []Container + Limits map[string]float64 + PullStartedAt time.Time + PullStoppedAt time.Time +} + +// Container is the ECS metadata container representation +type Container struct { + ID string `json:"DockerId"` + Name string + DockerName string + Image string + ImageID string + Labels map[string]string + DesiredStatus string + KnownStatus string + Limits map[string]float64 + CreatedAt time.Time + StartedAt time.Time + Stats types.StatsJSON + Type string + Networks []Network +} + +// Network is a docker network configuration +type Network struct { + NetworkMode string + IPv4Addresses []string +} + +func unmarshalTask(r io.Reader) (*Task, error) { + task := &Task{} + err := json.NewDecoder(r).Decode(task) + return task, err +} + +// docker parsers +func unmarshalStats(r io.Reader) (map[string]types.StatsJSON, error) { + var statsMap map[string]types.StatsJSON + err := json.NewDecoder(r).Decode(&statsMap) + return statsMap, err +} + +// interleaves Stats in to the Container objects in the Task +func mergeTaskStats(task *Task, stats map[string]types.StatsJSON) { + for i, c := range task.Containers { + if strings.Trim(c.ID, " ") == "" { + continue + } + stat, ok := stats[c.ID] + if !ok { + continue + } + task.Containers[i].Stats = stat + } +} diff --git a/plugins/inputs/ecs/types_test.go b/plugins/inputs/ecs/types_test.go new file mode 100644 index 000000000..d62ac6b40 --- /dev/null +++ b/plugins/inputs/ecs/types_test.go @@ -0,0 +1,61 @@ +package ecs + +import ( + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_parseTask(t *testing.T) { + r, err := os.Open("testdata/metadata.golden") + if err != nil { + t.Errorf("error opening test files") + } + parsed, err := unmarshalTask(r) + if err != nil { + t.Errorf("error parsing task %v", err) + } + assert.Equal(t, validMeta, *parsed, "Got = %v, want = %v", parsed, validMeta) +} + +func Test_parseStats(t *testing.T) { + r, err := os.Open("testdata/stats.golden") + if err != nil { + t.Errorf("error opening test files") + } + + parsed, err := unmarshalStats(r) + if err != nil { + t.Errorf("error parsing stats %v", err) + } + assert.Equal(t, validStats, parsed, "Got = %v, want = %v", parsed, validStats) +} + +func Test_mergeTaskStats(t *testing.T) { + metadata, err := os.Open("testdata/metadata.golden") + if err != nil { + t.Errorf("error opening test files") + } + + parsedMetadata, err := unmarshalTask(metadata) + if err != nil { + t.Errorf("error parsing task %v", err) + } + + stats, err := os.Open("testdata/stats.golden") + if err != nil { + t.Errorf("error opening test files") + } + + parsedStats, err := unmarshalStats(stats) + if err != nil { + t.Errorf("error parsing stats %v", err) + } + + mergeTaskStats(parsedMetadata, parsedStats) + + for _, cont := range parsedMetadata.Containers { + assert.Equal(t, validStats[cont.ID], cont.Stats, "Got = %v, want = %v", cont.Stats, validStats[cont.ID]) + } +}