Add better user-facing errors for API timeouts (#6016)
This commit is contained in:
parent
ba39d7b6a8
commit
80089c7caf
|
@ -51,7 +51,7 @@ type Docker struct {
|
||||||
|
|
||||||
client Client
|
client Client
|
||||||
httpClient *http.Client
|
httpClient *http.Client
|
||||||
engine_host string
|
engineHost string
|
||||||
serverVersion string
|
serverVersion string
|
||||||
filtersCreated bool
|
filtersCreated bool
|
||||||
labelFilter filter.Filter
|
labelFilter filter.Filter
|
||||||
|
@ -122,12 +122,15 @@ var sampleConfig = `
|
||||||
# insecure_skip_verify = false
|
# insecure_skip_verify = false
|
||||||
`
|
`
|
||||||
|
|
||||||
|
// SampleConfig returns the default Docker TOML configuration.
|
||||||
|
func (d *Docker) SampleConfig() string { return sampleConfig }
|
||||||
|
|
||||||
|
// Description the metrics returned.
|
||||||
func (d *Docker) Description() string {
|
func (d *Docker) Description() string {
|
||||||
return "Read metrics about docker containers"
|
return "Read metrics about docker containers"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *Docker) SampleConfig() string { return sampleConfig }
|
// Gather metrics from the docker server.
|
||||||
|
|
||||||
func (d *Docker) Gather(acc telegraf.Accumulator) error {
|
func (d *Docker) Gather(acc telegraf.Accumulator) error {
|
||||||
if d.client == nil {
|
if d.client == nil {
|
||||||
c, err := d.getNewClient()
|
c, err := d.getNewClient()
|
||||||
|
@ -185,7 +188,11 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
|
||||||
}
|
}
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
containers, err := d.client.ContainerList(ctx, opts)
|
containers, err := d.client.ContainerList(ctx, opts)
|
||||||
|
if err == context.DeadlineExceeded {
|
||||||
|
return errListTimeout
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -196,10 +203,8 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
|
||||||
for _, container := range containers {
|
for _, container := range containers {
|
||||||
go func(c types.Container) {
|
go func(c types.Container) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
err := d.gatherContainer(c, acc)
|
if err := d.gatherContainer(c, acc); err != nil {
|
||||||
if err != nil {
|
acc.AddError(err)
|
||||||
acc.AddError(fmt.Errorf("E! Error gathering container %s stats: %s\n",
|
|
||||||
c.Names, err.Error()))
|
|
||||||
}
|
}
|
||||||
}(container)
|
}(container)
|
||||||
}
|
}
|
||||||
|
@ -211,7 +216,11 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
|
||||||
func (d *Docker) gatherSwarmInfo(acc telegraf.Accumulator) error {
|
func (d *Docker) gatherSwarmInfo(acc telegraf.Accumulator) error {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
services, err := d.client.ServiceList(ctx, types.ServiceListOptions{})
|
services, err := d.client.ServiceList(ctx, types.ServiceListOptions{})
|
||||||
|
if err == context.DeadlineExceeded {
|
||||||
|
return errServiceTimeout
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -280,19 +289,24 @@ func (d *Docker) gatherInfo(acc telegraf.Accumulator) error {
|
||||||
dataFields := make(map[string]interface{})
|
dataFields := make(map[string]interface{})
|
||||||
metadataFields := make(map[string]interface{})
|
metadataFields := make(map[string]interface{})
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
|
|
||||||
// Get info from docker daemon
|
// Get info from docker daemon
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
info, err := d.client.Info(ctx)
|
info, err := d.client.Info(ctx)
|
||||||
|
if err == context.DeadlineExceeded {
|
||||||
|
return errInfoTimeout
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
d.engine_host = info.Name
|
d.engineHost = info.Name
|
||||||
d.serverVersion = info.ServerVersion
|
d.serverVersion = info.ServerVersion
|
||||||
|
|
||||||
tags := map[string]string{
|
tags := map[string]string{
|
||||||
"engine_host": d.engine_host,
|
"engine_host": d.engineHost,
|
||||||
"server_version": d.serverVersion,
|
"server_version": d.serverVersion,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -403,7 +417,7 @@ func (d *Docker) gatherContainer(
|
||||||
imageName, imageVersion := parseImage(container.Image)
|
imageName, imageVersion := parseImage(container.Image)
|
||||||
|
|
||||||
tags := map[string]string{
|
tags := map[string]string{
|
||||||
"engine_host": d.engine_host,
|
"engine_host": d.engineHost,
|
||||||
"server_version": d.serverVersion,
|
"server_version": d.serverVersion,
|
||||||
"container_name": cname,
|
"container_name": cname,
|
||||||
"container_image": imageName,
|
"container_image": imageName,
|
||||||
|
@ -412,17 +426,22 @@ func (d *Docker) gatherContainer(
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
r, err := d.client.ContainerStats(ctx, container.ID, false)
|
r, err := d.client.ContainerStats(ctx, container.ID, false)
|
||||||
if err != nil {
|
if err == context.DeadlineExceeded {
|
||||||
return fmt.Errorf("Error getting docker stats: %s", err.Error())
|
return errStatsTimeout
|
||||||
}
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error getting docker stats: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
defer r.Body.Close()
|
defer r.Body.Close()
|
||||||
dec := json.NewDecoder(r.Body)
|
dec := json.NewDecoder(r.Body)
|
||||||
if err = dec.Decode(&v); err != nil {
|
if err = dec.Decode(&v); err != nil {
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return fmt.Errorf("Error decoding: %s", err.Error())
|
return fmt.Errorf("error decoding: %v", err)
|
||||||
}
|
}
|
||||||
daemonOSType := r.OSType
|
daemonOSType := r.OSType
|
||||||
|
|
||||||
|
@ -438,19 +457,35 @@ func (d *Docker) gatherContainer(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return d.gatherContainerInspect(container, acc, tags, daemonOSType, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Docker) gatherContainerInspect(
|
||||||
|
container types.Container,
|
||||||
|
acc telegraf.Accumulator,
|
||||||
|
tags map[string]string,
|
||||||
|
daemonOSType string,
|
||||||
|
v *types.StatsJSON,
|
||||||
|
) error {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
info, err := d.client.ContainerInspect(ctx, container.ID)
|
info, err := d.client.ContainerInspect(ctx, container.ID)
|
||||||
|
if err == context.DeadlineExceeded {
|
||||||
|
return errInspectTimeout
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Error inspecting docker container: %s", err.Error())
|
return fmt.Errorf("error inspecting docker container: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add whitelisted environment variables to tags
|
// Add whitelisted environment variables to tags
|
||||||
if len(d.TagEnvironment) > 0 {
|
if len(d.TagEnvironment) > 0 {
|
||||||
for _, envvar := range info.Config.Env {
|
for _, envvar := range info.Config.Env {
|
||||||
for _, configvar := range d.TagEnvironment {
|
for _, configvar := range d.TagEnvironment {
|
||||||
dock_env := strings.SplitN(envvar, "=", 2)
|
dockEnv := strings.SplitN(envvar, "=", 2)
|
||||||
//check for presence of tag in whitelist
|
//check for presence of tag in whitelist
|
||||||
if len(dock_env) == 2 && len(strings.TrimSpace(dock_env[1])) != 0 && configvar == dock_env[0] {
|
if len(dockEnv) == 2 && len(strings.TrimSpace(dockEnv[1])) != 0 && configvar == dockEnv[0] {
|
||||||
tags[dock_env[0]] = dock_env[1]
|
tags[dockEnv[0]] = dockEnv[1]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -800,7 +835,7 @@ func sliceContains(in string, sl []string) bool {
|
||||||
func parseSize(sizeStr string) (int64, error) {
|
func parseSize(sizeStr string) (int64, error) {
|
||||||
matches := sizeRegex.FindStringSubmatch(sizeStr)
|
matches := sizeRegex.FindStringSubmatch(sizeStr)
|
||||||
if len(matches) != 4 {
|
if len(matches) != 4 {
|
||||||
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
|
return -1, fmt.Errorf("invalid size: %s", sizeStr)
|
||||||
}
|
}
|
||||||
|
|
||||||
size, err := strconv.ParseFloat(matches[1], 64)
|
size, err := strconv.ParseFloat(matches[1], 64)
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
package docker
|
||||||
|
|
||||||
|
import "errors"
|
||||||
|
|
||||||
|
var (
|
||||||
|
errInfoTimeout = errors.New("timeout retrieving docker engine info")
|
||||||
|
errStatsTimeout = errors.New("timeout retrieving container stats")
|
||||||
|
errInspectTimeout = errors.New("timeout retrieving container environment")
|
||||||
|
errListTimeout = errors.New("timeout retrieving container list")
|
||||||
|
errServiceTimeout = errors.New("timeout retrieving swarm service list")
|
||||||
|
)
|
Loading…
Reference in New Issue