Use a timeout for docker list & stat cmds

closes #1133
This commit is contained in:
Cameron Sparr 2016-05-01 10:20:15 -06:00
parent 4e9798d0e6
commit c114849a31
3 changed files with 20 additions and 4 deletions

View File

@ -88,6 +88,8 @@ based on _prefix_ in addition to globs. This means that a filter like
- [#1125](https://github.com/influxdata/telegraf/pull/1125): Wrap all exec command runners with a timeout, so hung os processes don't halt Telegraf. - [#1125](https://github.com/influxdata/telegraf/pull/1125): Wrap all exec command runners with a timeout, so hung os processes don't halt Telegraf.
- [#1113](https://github.com/influxdata/telegraf/pull/1113): Set MaxRetry and RequiredAcks defaults in Kafka output. - [#1113](https://github.com/influxdata/telegraf/pull/1113): Set MaxRetry and RequiredAcks defaults in Kafka output.
- [#1090](https://github.com/influxdata/telegraf/issues/1090): [agent] and [global_tags] config sometimes not getting applied. - [#1090](https://github.com/influxdata/telegraf/issues/1090): [agent] and [global_tags] config sometimes not getting applied.
- [#1133](https://github.com/influxdata/telegraf/issues/1133): Use a timeout for docker list & stat cmds.
- [#1052](https://github.com/influxdata/telegraf/issues/1052): Docker panic fix when decode fails.
## v0.12.1 [2016-04-14] ## v0.12.1 [2016-04-14]

View File

@ -565,6 +565,8 @@
# endpoint = "unix:///var/run/docker.sock" # endpoint = "unix:///var/run/docker.sock"
# ## Only collect metrics for these containers, collect all if empty # ## Only collect metrics for these containers, collect all if empty
# container_names = [] # container_names = []
# ## Timeout for docker list, info, and stats commands
# timeout = "5s"
# # Read statistics from one or many dovecot servers # # Read statistics from one or many dovecot servers
@ -600,6 +602,9 @@
# ## Commands array # ## Commands array
# commands = ["/tmp/test.sh", "/usr/bin/mycollector --foo=bar"] # commands = ["/tmp/test.sh", "/usr/bin/mycollector --foo=bar"]
# #
# ## Timeout for each command to complete.
# timeout = "5s"
#
# ## measurement name suffix (for separating different commands) # ## measurement name suffix (for separating different commands)
# name_suffix = "_mycollector" # name_suffix = "_mycollector"
# #

View File

@ -16,6 +16,7 @@ import (
"github.com/docker/engine-api/client" "github.com/docker/engine-api/client"
"github.com/docker/engine-api/types" "github.com/docker/engine-api/types"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/inputs"
) )
@ -23,6 +24,7 @@ import (
type Docker struct { type Docker struct {
Endpoint string Endpoint string
ContainerNames []string ContainerNames []string
Timeout internal.Duration
client DockerClient client DockerClient
} }
@ -54,6 +56,8 @@ var sampleConfig = `
endpoint = "unix:///var/run/docker.sock" endpoint = "unix:///var/run/docker.sock"
## Only collect metrics for these containers, collect all if empty ## Only collect metrics for these containers, collect all if empty
container_names = [] container_names = []
## Timeout for docker list, info, and stats commands
timeout = "5s"
` `
// Description returns input description // Description returns input description
@ -97,7 +101,9 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
// List containers // List containers
opts := types.ContainerListOptions{} opts := types.ContainerListOptions{}
containers, err := d.client.ContainerList(context.Background(), opts) ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
containers, err := d.client.ContainerList(ctx, opts)
if err != nil { if err != nil {
return err return err
} }
@ -106,7 +112,6 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(len(containers)) wg.Add(len(containers))
for _, container := range containers { for _, container := range containers {
go func(c types.Container) { go func(c types.Container) {
defer wg.Done() defer wg.Done()
err := d.gatherContainer(c, acc) err := d.gatherContainer(c, acc)
@ -127,7 +132,9 @@ func (d *Docker) gatherInfo(acc telegraf.Accumulator) error {
metadataFields := make(map[string]interface{}) metadataFields := make(map[string]interface{})
now := time.Now() now := time.Now()
// Get info from docker daemon // Get info from docker daemon
info, err := d.client.Info(context.Background()) ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
info, err := d.client.Info(ctx)
if err != nil { if err != nil {
return err return err
} }
@ -210,7 +217,9 @@ func (d *Docker) gatherContainer(
} }
} }
r, err := d.client.ContainerStats(context.Background(), container.ID, false) ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
r, err := d.client.ContainerStats(ctx, container.ID, false)
if err != nil { if err != nil {
log.Printf("Error getting docker stats: %s\n", err.Error()) log.Printf("Error getting docker stats: %s\n", err.Error())
} }