Tidy ECS readme and make review changes

This commit is contained in:
Daniel Nelson 2019-05-26 20:01:11 -07:00
parent dfb83778ea
commit 980b174687
No known key found for this signature in database
GPG Key ID: CAAD59C9444F6155
6 changed files with 187 additions and 88 deletions

View File

@ -8,6 +8,7 @@
#### New Inputs
- [bind](/plugins/inputs/bind/README.md) - Contributed by @dswarbrick & @danielllek
- [ecs](/plugins/inputs/ecs/README.md) - Contributed by @rbtr
- [github](/plugins/inputs/github/README.md) - Contributed by @influxdata
- [powerdns_recursor](/plugins/inputs/powerdns_recursor/README.md) - Contributed by @dupondje

View File

@ -165,7 +165,7 @@ For documentation on the latest development code see the [documentation index][d
* [dns query time](./plugins/inputs/dns_query)
* [docker](./plugins/inputs/docker)
* [dovecot](./plugins/inputs/dovecot)
* [ecs](./plugins/inputs/ecs)
* [ecs](./plugins/inputs/ecs) (Amazon Elastic Container Service, Fargate)
* [elasticsearch](./plugins/inputs/elasticsearch)
* [exec](./plugins/inputs/exec) (generic executable plugin, support JSON, influx, graphite and nagios)
* [fail2ban](./plugins/inputs/fail2ban)

View File

@ -1,23 +1,31 @@
# ECS Input Plugin
ECS, Fargate compatible, input plugin which uses the [ECS v2 metadata and stats API](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v2.html)
endpoints to gather stats on running containers in a Task.
ECS, Fargate compatible, input plugin which uses the [ECS v2 metadata and
stats API][task-metadata-endpoint-v2] endpoints to gather stats on running
containers in a Task.
The telegraf container must be run in the same Task as the workload it is inspecting.
The telegraf container must be run in the same Task as the workload it is
inspecting.
This is similar to (and reuses a few pieces of) the [Docker](../docker/README.md) input plugin, with some ECS specific modifications for AWS metadata and stats formats.
This is similar to (and reuses a few pieces of) the [Docker][docker-input]
input plugin, with some ECS specific modifications for AWS metadata and stats
formats.
The amazon-ecs-agent (though it _is_ a container running on the host) is not
present in the metadata/stats endpoints.
### Configuration:
### Configuration
```toml
# Read metrics about ECS containers
[[inputs.ecs]]
# endpoint_url = http://
## ECS metadata url
# endpoint_url = "http://169.254.170.2"
## Containers to include and exclude. Globs accepted.
## Note that an empty array for both will include all containers
container_name_include = []
container_name_exclude = []
# container_name_include = []
# container_name_exclude = []
## Container states to include and exclude. Globs accepted.
## When empty only containers in the "running" state will be captured.
@ -33,19 +41,157 @@ This is similar to (and reuses a few pieces of) the [Docker](../docker/README.md
timeout = "5s"
```
#### Environment Configuration
### Metrics
The ECS client can optionally also be configured with the following env vars:
- ecs_task
- tags:
- cluster
- task_arn
- family
- revision
- id
- name
- fields:
- revision (string)
- desired_status (string)
- known_status (string)
- limit_cpu (float)
- limit_mem (float)
+ ecs_container_mem
- tags:
- cluster
- task_arn
- family
- revision
- id
- name
- fields:
- container_id
- active_anon
- active_file
- cache
- hierarchical_memory_limit
- inactive_anon
- inactive_file
- mapped_file
- pgfault
- pgmajfault
- pgpgin
- pgpgout
- rss
- rss_huge
- total_active_anon
- total_active_file
- total_cache
- total_inactive_anon
- total_inactive_file
- total_mapped_file
- total_pgfault
- total_pgmajfault
- total_pgpgin
- total_pgpgout
- total_rss
- total_rss_huge
- total_unevictable
- total_writeback
- unevictable
- writeback
- fail_count
- limit
- max_usage
- usage
- usage_percent
- ecs_container_cpu
- tags:
- cluster
- task_arn
- family
- revision
- id
- name
- cpu
- fields:
- container_id
- usage_total
- usage_in_usermode
- usage_in_kernelmode
- usage_system
- throttling_periods
- throttling_throttled_periods
- throttling_throttled_time
- usage_percent
- usage_total
+ ecs_container_net
- tags:
- cluster
- task_arn
- family
- revision
- id
- name
- network
- fields:
- container_id
- rx_packets
- rx_dropped
- rx_bytes
- rx_errors
- tx_packets
- tx_dropped
- tx_bytes
- tx_errors
- ecs_container_blkio
- tags:
- cluster
- task_arn
- family
- revision
- id
- name
- device
- fields:
- container_id
- io_service_bytes_recursive_async
- io_service_bytes_recursive_read
- io_service_bytes_recursive_sync
- io_service_bytes_recursive_total
- io_service_bytes_recursive_write
- io_serviced_recursive_async
- io_serviced_recursive_read
- io_serviced_recursive_sync
- io_serviced_recursive_total
- io_serviced_recursive_write
+ ecs_container_meta
- tags:
- cluster
- task_arn
- family
- revision
- id
- name
- fields:
- container_id
- docker_name
- image
- image_id
- desired_status
- known_status
- limit_cpu
- limit_mem
- created_at
- started_at
- type
### Example Output
```
ECS_TIMEOUT
```
### Example Output:
```
ecs_task_status,cluster=test,family=nginx,host=c4b301d4a123,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a revision="2",desired_status="RUNNING",known_status="RUNNING",limit_cpu=0.5,limit_mem=512 1542641488000000000
ecs_task,cluster=test,family=nginx,host=c4b301d4a123,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a revision="2",desired_status="RUNNING",known_status="RUNNING",limit_cpu=0.5,limit_mem=512 1542641488000000000
ecs_container_mem,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a active_anon=40960i,active_file=8192i,cache=790528i,pgpgin=1243i,total_pgfault=1298i,total_rss=40960i,limit=1033658368i,max_usage=4825088i,hierarchical_memory_limit=536870912i,rss=40960i,total_active_file=8192i,total_mapped_file=618496i,usage_percent=0.05349543109392212,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",pgfault=1298i,pgmajfault=6i,pgpgout=1040i,total_active_anon=40960i,total_inactive_file=782336i,total_pgpgin=1243i,usage=552960i,inactive_file=782336i,mapped_file=618496i,total_cache=790528i,total_pgpgout=1040i 1542642001000000000
ecs_container_cpu,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,cpu=cpu-total,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a usage_in_kernelmode=0i,throttling_throttled_periods=0i,throttling_periods=0i,throttling_throttled_time=0i,container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",usage_percent=0,usage_total=26426156i,usage_in_usermode=20000000i,usage_system=2336100000000i 1542642001000000000
ecs_container_cpu,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,cpu=cpu0,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",usage_total=26426156i 1542642001000000000
@ -60,5 +206,5 @@ ecs_container_blkio,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ec
ecs_container_meta,cluster=test,com.amazonaws.ecs.cluster=test,com.amazonaws.ecs.container-name=~internal~ecs~pause,com.amazonaws.ecs.task-arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a,com.amazonaws.ecs.task-definition-family=nginx,com.amazonaws.ecs.task-definition-version=2,family=nginx,host=c4b301d4a123,id=e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba,name=~internal~ecs~pause,revision=2,task_arn=arn:aws:ecs:aws-region-1:012345678901:task/a1234abc-a0a0-0a01-ab01-0abc012a0a0a limit_mem=0,type="CNI_PAUSE",container_id="e6af031b91deb3136a2b7c42f262ed2ab554e2fe2736998c7d8edf4afe708dba",docker_name="ecs-nginx-2-internalecspause",limit_cpu=0,known_status="RESOURCES_PROVISIONED",image="amazon/amazon-ecs-pause:0.1.0",image_id="",desired_status="RESOURCES_PROVISIONED" 1542642001000000000
```
### Notes:
- the amazon-ecs-agent (though it _is_ a container running on the host) is not present in the metadata/stats endpoints.
[docker-input]: /plugins/inputs/docker/README.md
[task-metadata-endpoint-v2]: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v2.html

View File

@ -1,10 +1,8 @@
package ecs
import (
"log"
"net/http"
"net/url"
"os"
"time"
"github.com/docker/docker/api/types"
@ -25,20 +23,6 @@ type httpClient interface {
Do(req *http.Request) (*http.Response, error)
}
// NewEnvClient configures a new Client from the env
func NewEnvClient() (*EcsClient, error) {
timeout := 5 * time.Second
if t := os.Getenv("ECS_TIMEOUT"); t != "" {
if d, err := time.ParseDuration(t); err == nil {
timeout = d
}
}
return NewClient(
timeout,
)
}
// NewClient constructs an ECS client with the passed configuration params
func NewClient(timeout time.Duration) (*EcsClient, error) {
c := &http.Client{
@ -68,13 +52,11 @@ func (c *EcsClient) Task() (*Task, error) {
resp, err := c.client.Do(req)
if err != nil {
log.Println("failed to GET metadata endpoint", err)
return nil, err
}
task, err := unmarshalTask(resp.Body)
if err != nil {
log.Println("failed to decode response from metadata endpoint", err)
return nil, err
}
@ -91,13 +73,11 @@ func (c *EcsClient) ContainerStats() (map[string]types.StatsJSON, error) {
resp, err := c.client.Do(req)
if err != nil {
log.Println("failed to GET stats endpoint", err)
return map[string]types.StatsJSON{}, err
}
statsMap, err := unmarshalStats(resp.Body)
if err != nil {
log.Println("failed to decode response from stats endpoint")
return map[string]types.StatsJSON{}, err
}

View File

@ -1,7 +1,6 @@
package ecs
import (
"log"
"net/url"
"time"
@ -14,7 +13,6 @@ import (
// Ecs config object
type Ecs struct {
EndpointURL string `toml:"endpoint_url"`
EnvCfg bool `toml:"envcfg"`
Timeout internal.Duration
ContainerNameInclude []string `toml:"container_name_include"`
@ -26,8 +24,7 @@ type Ecs struct {
LabelInclude []string `toml:"ecs_label_include"`
LabelExclude []string `toml:"ecs_label_exclude"`
newEnvClient func() (*EcsClient, error)
newClient func(timeout time.Duration) (*EcsClient, error)
newClient func(timeout time.Duration) (*EcsClient, error)
client Client
filtersCreated bool
@ -48,13 +45,10 @@ var sampleConfig = `
## ECS metadata url
# endpoint_url = "http://169.254.170.2"
## Set to true to configure from env vars
envcfg = false
## Containers to include and exclude. Globs accepted.
## Note that an empty array for both will include all containers
container_name_include = []
container_name_exclude = []
# container_name_include = []
# container_name_exclude = []
## Container states to include and exclude. Globs accepted.
## When empty only containers in the "running" state will be captured.
@ -110,13 +104,9 @@ func (ecs *Ecs) Gather(acc telegraf.Accumulator) error {
func initSetup(ecs *Ecs) error {
if ecs.client == nil {
var c *EcsClient
var err error
if ecs.EnvCfg {
c, err = ecs.newEnvClient()
} else {
c, err = ecs.newClient(ecs.Timeout.Duration)
}
var c *EcsClient
c, err = ecs.newClient(ecs.Timeout.Duration)
if err != nil {
return err
}
@ -164,12 +154,10 @@ func (ecs *Ecs) accTask(task *Task, tags map[string]string, acc telegraf.Accumul
func (ecs *Ecs) accContainers(task *Task, taskTags map[string]string, acc telegraf.Accumulator) {
for _, c := range task.Containers {
if !ecs.containerNameFilter.Match(c.Name) {
log.Printf("container %v did not match name filter", c.ID)
continue
}
if !ecs.statusFilter.Match(c.KnownStatus) {
log.Printf("container %v did not match status filter", c.ID)
continue
}
@ -242,8 +230,6 @@ func init() {
return &Ecs{
EndpointURL: "http://169.254.170.2",
Timeout: internal.Duration{Duration: 5 * time.Second},
EnvCfg: true,
newEnvClient: NewEnvClient,
newClient: NewClient,
filtersCreated: false,
}

View File

@ -4,58 +4,44 @@ import (
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func Test_parseTask(t *testing.T) {
r, err := os.Open("testdata/metadata.golden")
if err != nil {
t.Errorf("error opening test files")
}
require.NoError(t, err)
parsed, err := unmarshalTask(r)
if err != nil {
t.Errorf("error parsing task %v", err)
}
assert.Equal(t, validMeta, *parsed, "Got = %v, want = %v", parsed, validMeta)
require.NoError(t, err)
require.Equal(t, validMeta, *parsed)
}
func Test_parseStats(t *testing.T) {
r, err := os.Open("testdata/stats.golden")
if err != nil {
t.Errorf("error opening test files")
}
require.NoError(t, err)
parsed, err := unmarshalStats(r)
if err != nil {
t.Errorf("error parsing stats %v", err)
}
assert.Equal(t, validStats, parsed, "Got = %v, want = %v", parsed, validStats)
require.NoError(t, err)
require.Equal(t, validStats, parsed)
}
func Test_mergeTaskStats(t *testing.T) {
metadata, err := os.Open("testdata/metadata.golden")
if err != nil {
t.Errorf("error opening test files")
}
require.NoError(t, err)
parsedMetadata, err := unmarshalTask(metadata)
if err != nil {
t.Errorf("error parsing task %v", err)
}
require.NoError(t, err)
stats, err := os.Open("testdata/stats.golden")
if err != nil {
t.Errorf("error opening test files")
}
require.NoError(t, err)
parsedStats, err := unmarshalStats(stats)
if err != nil {
t.Errorf("error parsing stats %v", err)
}
require.NoError(t, err)
mergeTaskStats(parsedMetadata, parsedStats)
for _, cont := range parsedMetadata.Containers {
assert.Equal(t, validStats[cont.ID], cont.Stats, "Got = %v, want = %v", cont.Stats, validStats[cont.ID])
require.Equal(t, validStats[cont.ID], cont.Stats)
}
}