Fix docker memory and cpu reporting in Windows (#3043)

This commit is contained in:
Daniel Nelson
2017-07-27 15:12:29 -07:00
committed by GitHub
parent 5f88be022c
commit d6cf9f4f30
8 changed files with 819 additions and 564 deletions

View File

@@ -12,7 +12,6 @@ import (
"time"
"github.com/docker/docker/api/types"
"github.com/docker/docker/client"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/filter"
"github.com/influxdata/telegraf/internal"
@@ -46,61 +45,14 @@ type Docker struct {
ContainerExclude []string `toml:"container_name_exclude"`
ContainerFilter DockerContainerFilter
client *client.Client
engine_host string
newEnvClient func() (Client, error)
newClient func(host string) (Client, error)
testing bool
client Client
engine_host string
filtersCreated bool
}
// infoWrapper wraps client.Client.List for testing.
func infoWrapper(c *client.Client, ctx context.Context) (types.Info, error) {
if c != nil {
return c.Info(ctx)
}
fc := FakeDockerClient{}
return fc.Info(ctx)
}
// listWrapper wraps client.Client.ContainerList for testing.
func listWrapper(
c *client.Client,
ctx context.Context,
options types.ContainerListOptions,
) ([]types.Container, error) {
if c != nil {
return c.ContainerList(ctx, options)
}
fc := FakeDockerClient{}
return fc.ContainerList(ctx, options)
}
// statsWrapper wraps client.Client.ContainerStats for testing.
func statsWrapper(
c *client.Client,
ctx context.Context,
containerID string,
stream bool,
) (types.ContainerStats, error) {
if c != nil {
return c.ContainerStats(ctx, containerID, stream)
}
fc := FakeDockerClient{}
return fc.ContainerStats(ctx, containerID, stream)
}
func inspectWrapper(
c *client.Client,
ctx context.Context,
containerID string,
) (types.ContainerJSON, error) {
if c != nil {
return c.ContainerInspect(ctx, containerID)
}
fc := FakeDockerClient{}
return fc.ContainerInspect(ctx, containerID)
}
// KB, MB, GB, TB, PB...human friendly
const (
KB = 1000
@@ -145,32 +97,28 @@ var sampleConfig = `
docker_label_exclude = []
`
// Description returns input description
func (d *Docker) Description() string {
return "Read metrics about docker containers"
}
// SampleConfig prints sampleConfig
func (d *Docker) SampleConfig() string { return sampleConfig }
// Gather starts stats collection
func (d *Docker) Gather(acc telegraf.Accumulator) error {
if d.client == nil && !d.testing {
var c *client.Client
if d.client == nil {
var c Client
var err error
defaultHeaders := map[string]string{"User-Agent": "engine-api-cli-1.0"}
if d.Endpoint == "ENV" {
c, err = client.NewEnvClient()
c, err = d.newEnvClient()
if err != nil {
return err
}
} else if d.Endpoint == "" {
c, err = client.NewClient("unix:///var/run/docker.sock", "", nil, defaultHeaders)
c, err = d.newClient("unix:///var/run/docker.sock")
if err != nil {
return err
}
} else {
c, err = client.NewClient(d.Endpoint, "", nil, defaultHeaders)
c, err = d.newClient(d.Endpoint)
if err != nil {
return err
}
@@ -201,7 +149,7 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
opts := types.ContainerListOptions{}
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
containers, err := listWrapper(d.client, ctx, opts)
containers, err := d.client.ContainerList(ctx, opts)
if err != nil {
return err
}
@@ -232,7 +180,7 @@ func (d *Docker) gatherInfo(acc telegraf.Accumulator) error {
// Get info from docker daemon
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
info, err := infoWrapper(d.client, ctx)
info, err := d.client.Info(ctx)
if err != nil {
return err
}
@@ -338,7 +286,7 @@ func (d *Docker) gatherContainer(
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
defer cancel()
r, err := statsWrapper(d.client, ctx, container.ID, false)
r, err := d.client.ContainerStats(ctx, container.ID, false)
if err != nil {
return fmt.Errorf("Error getting docker stats: %s", err.Error())
}
@@ -350,6 +298,7 @@ func (d *Docker) gatherContainer(
}
return fmt.Errorf("Error decoding: %s", err.Error())
}
daemonOSType := r.OSType
// Add labels to tags
for k, label := range container.Labels {
@@ -362,7 +311,7 @@ func (d *Docker) gatherContainer(
// Add whitelisted environment variables to tags
if len(d.TagEnvironment) > 0 {
info, err := inspectWrapper(d.client, ctx, container.ID)
info, err := d.client.ContainerInspect(ctx, container.ID)
if err != nil {
return fmt.Errorf("Error inspecting docker container: %s", err.Error())
}
@@ -377,7 +326,7 @@ func (d *Docker) gatherContainer(
}
}
gatherContainerStats(v, acc, tags, container.ID, d.PerDevice, d.Total)
gatherContainerStats(v, acc, tags, container.ID, d.PerDevice, d.Total, daemonOSType)
return nil
}
@@ -389,46 +338,68 @@ func gatherContainerStats(
id string,
perDevice bool,
total bool,
daemonOSType string,
) {
now := stat.Read
memfields := map[string]interface{}{
"max_usage": stat.MemoryStats.MaxUsage,
"usage": stat.MemoryStats.Usage,
"fail_count": stat.MemoryStats.Failcnt,
"limit": stat.MemoryStats.Limit,
"total_pgmafault": stat.MemoryStats.Stats["total_pgmajfault"],
"cache": stat.MemoryStats.Stats["cache"],
"mapped_file": stat.MemoryStats.Stats["mapped_file"],
"total_inactive_file": stat.MemoryStats.Stats["total_inactive_file"],
"pgpgout": stat.MemoryStats.Stats["pagpgout"],
"rss": stat.MemoryStats.Stats["rss"],
"total_mapped_file": stat.MemoryStats.Stats["total_mapped_file"],
"writeback": stat.MemoryStats.Stats["writeback"],
"unevictable": stat.MemoryStats.Stats["unevictable"],
"pgpgin": stat.MemoryStats.Stats["pgpgin"],
"total_unevictable": stat.MemoryStats.Stats["total_unevictable"],
"pgmajfault": stat.MemoryStats.Stats["pgmajfault"],
"total_rss": stat.MemoryStats.Stats["total_rss"],
"total_rss_huge": stat.MemoryStats.Stats["total_rss_huge"],
"total_writeback": stat.MemoryStats.Stats["total_write_back"],
"total_inactive_anon": stat.MemoryStats.Stats["total_inactive_anon"],
"rss_huge": stat.MemoryStats.Stats["rss_huge"],
"hierarchical_memory_limit": stat.MemoryStats.Stats["hierarchical_memory_limit"],
"total_pgfault": stat.MemoryStats.Stats["total_pgfault"],
"total_active_file": stat.MemoryStats.Stats["total_active_file"],
"active_anon": stat.MemoryStats.Stats["active_anon"],
"total_active_anon": stat.MemoryStats.Stats["total_active_anon"],
"total_pgpgout": stat.MemoryStats.Stats["total_pgpgout"],
"total_cache": stat.MemoryStats.Stats["total_cache"],
"inactive_anon": stat.MemoryStats.Stats["inactive_anon"],
"active_file": stat.MemoryStats.Stats["active_file"],
"pgfault": stat.MemoryStats.Stats["pgfault"],
"inactive_file": stat.MemoryStats.Stats["inactive_file"],
"total_pgpgin": stat.MemoryStats.Stats["total_pgpgin"],
"usage_percent": calculateMemPercent(stat),
"container_id": id,
"container_id": id,
}
memstats := []string{
"active_anon",
"active_file",
"cache",
"hierarchical_memory_limit",
"inactive_anon",
"inactive_file",
"mapped_file",
"pgfault",
"pgmajfault",
"pgpgin",
"pgpgout",
"rss",
"rss_huge",
"total_active_anon",
"total_active_file",
"total_cache",
"total_inactive_anon",
"total_inactive_file",
"total_mapped_file",
"total_pgfault",
"total_pgmajfault",
"total_pgpgin",
"total_pgpgout",
"total_rss",
"total_rss_huge",
"total_unevictable",
"total_writeback",
"unevictable",
"writeback",
}
for _, field := range memstats {
if value, ok := stat.MemoryStats.Stats[field]; ok {
memfields[field] = value
}
}
if stat.MemoryStats.Failcnt != 0 {
memfields["fail_count"] = stat.MemoryStats.Failcnt
}
if daemonOSType != "windows" {
memfields["limit"] = stat.MemoryStats.Limit
memfields["usage"] = stat.MemoryStats.Usage
memfields["max_usage"] = stat.MemoryStats.MaxUsage
mem := calculateMemUsageUnixNoCache(stat.MemoryStats)
memLimit := float64(stat.MemoryStats.Limit)
memfields["usage_percent"] = calculateMemPercentUnixNoCache(memLimit, mem)
} else {
memfields["commit_bytes"] = stat.MemoryStats.Commit
memfields["commit_peak_bytes"] = stat.MemoryStats.CommitPeak
memfields["private_working_set"] = stat.MemoryStats.PrivateWorkingSet
}
acc.AddFields("docker_container_mem", memfields, tags, now)
cpufields := map[string]interface{}{
@@ -439,9 +410,19 @@ func gatherContainerStats(
"throttling_periods": stat.CPUStats.ThrottlingData.Periods,
"throttling_throttled_periods": stat.CPUStats.ThrottlingData.ThrottledPeriods,
"throttling_throttled_time": stat.CPUStats.ThrottlingData.ThrottledTime,
"usage_percent": calculateCPUPercent(stat),
"container_id": id,
}
if daemonOSType != "windows" {
previousCPU := stat.PreCPUStats.CPUUsage.TotalUsage
previousSystem := stat.PreCPUStats.SystemUsage
cpuPercent := calculateCPUPercentUnix(previousCPU, previousSystem, stat)
cpufields["usage_percent"] = cpuPercent
} else {
cpuPercent := calculateCPUPercentWindows(stat)
cpufields["usage_percent"] = cpuPercent
}
cputags := copyTags(tags)
cputags["cpu"] = "cpu-total"
acc.AddFields("docker_container_cpu", cpufields, cputags, now)
@@ -521,30 +502,6 @@ func gatherContainerStats(
gatherBlockIOMetrics(stat, acc, tags, now, id, perDevice, total)
}
func calculateMemPercent(stat *types.StatsJSON) float64 {
var memPercent = 0.0
if stat.MemoryStats.Limit > 0 {
memPercent = float64(stat.MemoryStats.Usage) / float64(stat.MemoryStats.Limit) * 100.0
}
return memPercent
}
func calculateCPUPercent(stat *types.StatsJSON) float64 {
var cpuPercent = 0.0
// calculate the change for the cpu and system usage of the container in between readings
cpuDelta := float64(stat.CPUStats.CPUUsage.TotalUsage) - float64(stat.PreCPUStats.CPUUsage.TotalUsage)
systemDelta := float64(stat.CPUStats.SystemUsage) - float64(stat.PreCPUStats.SystemUsage)
if systemDelta > 0.0 && cpuDelta > 0.0 {
if stat.CPUStats.OnlineCPUs > 0 {
cpuPercent = (cpuDelta / systemDelta) * float64(stat.CPUStats.OnlineCPUs) * 100.0
} else {
cpuPercent = (cpuDelta / systemDelta) * float64(len(stat.CPUStats.CPUUsage.PercpuUsage)) * 100.0
}
}
return cpuPercent
}
func gatherBlockIOMetrics(
stat *types.StatsJSON,
acc telegraf.Accumulator,
@@ -742,6 +699,8 @@ func init() {
return &Docker{
PerDevice: true,
Timeout: internal.Duration{Duration: time.Second * 5},
newEnvClient: NewEnvClient,
newClient: NewClient,
filtersCreated: false,
}
})