Fix docker memory and cpu reporting in Windows (#3043)
This commit is contained in:
@@ -12,7 +12,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/filter"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
@@ -46,61 +45,14 @@ type Docker struct {
|
||||
ContainerExclude []string `toml:"container_name_exclude"`
|
||||
ContainerFilter DockerContainerFilter
|
||||
|
||||
client *client.Client
|
||||
engine_host string
|
||||
newEnvClient func() (Client, error)
|
||||
newClient func(host string) (Client, error)
|
||||
|
||||
testing bool
|
||||
client Client
|
||||
engine_host string
|
||||
filtersCreated bool
|
||||
}
|
||||
|
||||
// infoWrapper wraps client.Client.List for testing.
|
||||
func infoWrapper(c *client.Client, ctx context.Context) (types.Info, error) {
|
||||
if c != nil {
|
||||
return c.Info(ctx)
|
||||
}
|
||||
fc := FakeDockerClient{}
|
||||
return fc.Info(ctx)
|
||||
}
|
||||
|
||||
// listWrapper wraps client.Client.ContainerList for testing.
|
||||
func listWrapper(
|
||||
c *client.Client,
|
||||
ctx context.Context,
|
||||
options types.ContainerListOptions,
|
||||
) ([]types.Container, error) {
|
||||
if c != nil {
|
||||
return c.ContainerList(ctx, options)
|
||||
}
|
||||
fc := FakeDockerClient{}
|
||||
return fc.ContainerList(ctx, options)
|
||||
}
|
||||
|
||||
// statsWrapper wraps client.Client.ContainerStats for testing.
|
||||
func statsWrapper(
|
||||
c *client.Client,
|
||||
ctx context.Context,
|
||||
containerID string,
|
||||
stream bool,
|
||||
) (types.ContainerStats, error) {
|
||||
if c != nil {
|
||||
return c.ContainerStats(ctx, containerID, stream)
|
||||
}
|
||||
fc := FakeDockerClient{}
|
||||
return fc.ContainerStats(ctx, containerID, stream)
|
||||
}
|
||||
|
||||
func inspectWrapper(
|
||||
c *client.Client,
|
||||
ctx context.Context,
|
||||
containerID string,
|
||||
) (types.ContainerJSON, error) {
|
||||
if c != nil {
|
||||
return c.ContainerInspect(ctx, containerID)
|
||||
}
|
||||
fc := FakeDockerClient{}
|
||||
return fc.ContainerInspect(ctx, containerID)
|
||||
}
|
||||
|
||||
// KB, MB, GB, TB, PB...human friendly
|
||||
const (
|
||||
KB = 1000
|
||||
@@ -145,32 +97,28 @@ var sampleConfig = `
|
||||
docker_label_exclude = []
|
||||
`
|
||||
|
||||
// Description returns input description
|
||||
func (d *Docker) Description() string {
|
||||
return "Read metrics about docker containers"
|
||||
}
|
||||
|
||||
// SampleConfig prints sampleConfig
|
||||
func (d *Docker) SampleConfig() string { return sampleConfig }
|
||||
|
||||
// Gather starts stats collection
|
||||
func (d *Docker) Gather(acc telegraf.Accumulator) error {
|
||||
if d.client == nil && !d.testing {
|
||||
var c *client.Client
|
||||
if d.client == nil {
|
||||
var c Client
|
||||
var err error
|
||||
defaultHeaders := map[string]string{"User-Agent": "engine-api-cli-1.0"}
|
||||
if d.Endpoint == "ENV" {
|
||||
c, err = client.NewEnvClient()
|
||||
c, err = d.newEnvClient()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if d.Endpoint == "" {
|
||||
c, err = client.NewClient("unix:///var/run/docker.sock", "", nil, defaultHeaders)
|
||||
c, err = d.newClient("unix:///var/run/docker.sock")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
c, err = client.NewClient(d.Endpoint, "", nil, defaultHeaders)
|
||||
c, err = d.newClient(d.Endpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -201,7 +149,7 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
|
||||
opts := types.ContainerListOptions{}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
||||
defer cancel()
|
||||
containers, err := listWrapper(d.client, ctx, opts)
|
||||
containers, err := d.client.ContainerList(ctx, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -232,7 +180,7 @@ func (d *Docker) gatherInfo(acc telegraf.Accumulator) error {
|
||||
// Get info from docker daemon
|
||||
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
||||
defer cancel()
|
||||
info, err := infoWrapper(d.client, ctx)
|
||||
info, err := d.client.Info(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -338,7 +286,7 @@ func (d *Docker) gatherContainer(
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration)
|
||||
defer cancel()
|
||||
r, err := statsWrapper(d.client, ctx, container.ID, false)
|
||||
r, err := d.client.ContainerStats(ctx, container.ID, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error getting docker stats: %s", err.Error())
|
||||
}
|
||||
@@ -350,6 +298,7 @@ func (d *Docker) gatherContainer(
|
||||
}
|
||||
return fmt.Errorf("Error decoding: %s", err.Error())
|
||||
}
|
||||
daemonOSType := r.OSType
|
||||
|
||||
// Add labels to tags
|
||||
for k, label := range container.Labels {
|
||||
@@ -362,7 +311,7 @@ func (d *Docker) gatherContainer(
|
||||
|
||||
// Add whitelisted environment variables to tags
|
||||
if len(d.TagEnvironment) > 0 {
|
||||
info, err := inspectWrapper(d.client, ctx, container.ID)
|
||||
info, err := d.client.ContainerInspect(ctx, container.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error inspecting docker container: %s", err.Error())
|
||||
}
|
||||
@@ -377,7 +326,7 @@ func (d *Docker) gatherContainer(
|
||||
}
|
||||
}
|
||||
|
||||
gatherContainerStats(v, acc, tags, container.ID, d.PerDevice, d.Total)
|
||||
gatherContainerStats(v, acc, tags, container.ID, d.PerDevice, d.Total, daemonOSType)
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -389,46 +338,68 @@ func gatherContainerStats(
|
||||
id string,
|
||||
perDevice bool,
|
||||
total bool,
|
||||
daemonOSType string,
|
||||
) {
|
||||
now := stat.Read
|
||||
|
||||
memfields := map[string]interface{}{
|
||||
"max_usage": stat.MemoryStats.MaxUsage,
|
||||
"usage": stat.MemoryStats.Usage,
|
||||
"fail_count": stat.MemoryStats.Failcnt,
|
||||
"limit": stat.MemoryStats.Limit,
|
||||
"total_pgmafault": stat.MemoryStats.Stats["total_pgmajfault"],
|
||||
"cache": stat.MemoryStats.Stats["cache"],
|
||||
"mapped_file": stat.MemoryStats.Stats["mapped_file"],
|
||||
"total_inactive_file": stat.MemoryStats.Stats["total_inactive_file"],
|
||||
"pgpgout": stat.MemoryStats.Stats["pagpgout"],
|
||||
"rss": stat.MemoryStats.Stats["rss"],
|
||||
"total_mapped_file": stat.MemoryStats.Stats["total_mapped_file"],
|
||||
"writeback": stat.MemoryStats.Stats["writeback"],
|
||||
"unevictable": stat.MemoryStats.Stats["unevictable"],
|
||||
"pgpgin": stat.MemoryStats.Stats["pgpgin"],
|
||||
"total_unevictable": stat.MemoryStats.Stats["total_unevictable"],
|
||||
"pgmajfault": stat.MemoryStats.Stats["pgmajfault"],
|
||||
"total_rss": stat.MemoryStats.Stats["total_rss"],
|
||||
"total_rss_huge": stat.MemoryStats.Stats["total_rss_huge"],
|
||||
"total_writeback": stat.MemoryStats.Stats["total_write_back"],
|
||||
"total_inactive_anon": stat.MemoryStats.Stats["total_inactive_anon"],
|
||||
"rss_huge": stat.MemoryStats.Stats["rss_huge"],
|
||||
"hierarchical_memory_limit": stat.MemoryStats.Stats["hierarchical_memory_limit"],
|
||||
"total_pgfault": stat.MemoryStats.Stats["total_pgfault"],
|
||||
"total_active_file": stat.MemoryStats.Stats["total_active_file"],
|
||||
"active_anon": stat.MemoryStats.Stats["active_anon"],
|
||||
"total_active_anon": stat.MemoryStats.Stats["total_active_anon"],
|
||||
"total_pgpgout": stat.MemoryStats.Stats["total_pgpgout"],
|
||||
"total_cache": stat.MemoryStats.Stats["total_cache"],
|
||||
"inactive_anon": stat.MemoryStats.Stats["inactive_anon"],
|
||||
"active_file": stat.MemoryStats.Stats["active_file"],
|
||||
"pgfault": stat.MemoryStats.Stats["pgfault"],
|
||||
"inactive_file": stat.MemoryStats.Stats["inactive_file"],
|
||||
"total_pgpgin": stat.MemoryStats.Stats["total_pgpgin"],
|
||||
"usage_percent": calculateMemPercent(stat),
|
||||
"container_id": id,
|
||||
"container_id": id,
|
||||
}
|
||||
|
||||
memstats := []string{
|
||||
"active_anon",
|
||||
"active_file",
|
||||
"cache",
|
||||
"hierarchical_memory_limit",
|
||||
"inactive_anon",
|
||||
"inactive_file",
|
||||
"mapped_file",
|
||||
"pgfault",
|
||||
"pgmajfault",
|
||||
"pgpgin",
|
||||
"pgpgout",
|
||||
"rss",
|
||||
"rss_huge",
|
||||
"total_active_anon",
|
||||
"total_active_file",
|
||||
"total_cache",
|
||||
"total_inactive_anon",
|
||||
"total_inactive_file",
|
||||
"total_mapped_file",
|
||||
"total_pgfault",
|
||||
"total_pgmajfault",
|
||||
"total_pgpgin",
|
||||
"total_pgpgout",
|
||||
"total_rss",
|
||||
"total_rss_huge",
|
||||
"total_unevictable",
|
||||
"total_writeback",
|
||||
"unevictable",
|
||||
"writeback",
|
||||
}
|
||||
for _, field := range memstats {
|
||||
if value, ok := stat.MemoryStats.Stats[field]; ok {
|
||||
memfields[field] = value
|
||||
}
|
||||
}
|
||||
if stat.MemoryStats.Failcnt != 0 {
|
||||
memfields["fail_count"] = stat.MemoryStats.Failcnt
|
||||
}
|
||||
|
||||
if daemonOSType != "windows" {
|
||||
memfields["limit"] = stat.MemoryStats.Limit
|
||||
memfields["usage"] = stat.MemoryStats.Usage
|
||||
memfields["max_usage"] = stat.MemoryStats.MaxUsage
|
||||
|
||||
mem := calculateMemUsageUnixNoCache(stat.MemoryStats)
|
||||
memLimit := float64(stat.MemoryStats.Limit)
|
||||
memfields["usage_percent"] = calculateMemPercentUnixNoCache(memLimit, mem)
|
||||
} else {
|
||||
memfields["commit_bytes"] = stat.MemoryStats.Commit
|
||||
memfields["commit_peak_bytes"] = stat.MemoryStats.CommitPeak
|
||||
memfields["private_working_set"] = stat.MemoryStats.PrivateWorkingSet
|
||||
}
|
||||
|
||||
acc.AddFields("docker_container_mem", memfields, tags, now)
|
||||
|
||||
cpufields := map[string]interface{}{
|
||||
@@ -439,9 +410,19 @@ func gatherContainerStats(
|
||||
"throttling_periods": stat.CPUStats.ThrottlingData.Periods,
|
||||
"throttling_throttled_periods": stat.CPUStats.ThrottlingData.ThrottledPeriods,
|
||||
"throttling_throttled_time": stat.CPUStats.ThrottlingData.ThrottledTime,
|
||||
"usage_percent": calculateCPUPercent(stat),
|
||||
"container_id": id,
|
||||
}
|
||||
|
||||
if daemonOSType != "windows" {
|
||||
previousCPU := stat.PreCPUStats.CPUUsage.TotalUsage
|
||||
previousSystem := stat.PreCPUStats.SystemUsage
|
||||
cpuPercent := calculateCPUPercentUnix(previousCPU, previousSystem, stat)
|
||||
cpufields["usage_percent"] = cpuPercent
|
||||
} else {
|
||||
cpuPercent := calculateCPUPercentWindows(stat)
|
||||
cpufields["usage_percent"] = cpuPercent
|
||||
}
|
||||
|
||||
cputags := copyTags(tags)
|
||||
cputags["cpu"] = "cpu-total"
|
||||
acc.AddFields("docker_container_cpu", cpufields, cputags, now)
|
||||
@@ -521,30 +502,6 @@ func gatherContainerStats(
|
||||
gatherBlockIOMetrics(stat, acc, tags, now, id, perDevice, total)
|
||||
}
|
||||
|
||||
func calculateMemPercent(stat *types.StatsJSON) float64 {
|
||||
var memPercent = 0.0
|
||||
if stat.MemoryStats.Limit > 0 {
|
||||
memPercent = float64(stat.MemoryStats.Usage) / float64(stat.MemoryStats.Limit) * 100.0
|
||||
}
|
||||
return memPercent
|
||||
}
|
||||
|
||||
func calculateCPUPercent(stat *types.StatsJSON) float64 {
|
||||
var cpuPercent = 0.0
|
||||
// calculate the change for the cpu and system usage of the container in between readings
|
||||
cpuDelta := float64(stat.CPUStats.CPUUsage.TotalUsage) - float64(stat.PreCPUStats.CPUUsage.TotalUsage)
|
||||
systemDelta := float64(stat.CPUStats.SystemUsage) - float64(stat.PreCPUStats.SystemUsage)
|
||||
|
||||
if systemDelta > 0.0 && cpuDelta > 0.0 {
|
||||
if stat.CPUStats.OnlineCPUs > 0 {
|
||||
cpuPercent = (cpuDelta / systemDelta) * float64(stat.CPUStats.OnlineCPUs) * 100.0
|
||||
} else {
|
||||
cpuPercent = (cpuDelta / systemDelta) * float64(len(stat.CPUStats.CPUUsage.PercpuUsage)) * 100.0
|
||||
}
|
||||
}
|
||||
return cpuPercent
|
||||
}
|
||||
|
||||
func gatherBlockIOMetrics(
|
||||
stat *types.StatsJSON,
|
||||
acc telegraf.Accumulator,
|
||||
@@ -742,6 +699,8 @@ func init() {
|
||||
return &Docker{
|
||||
PerDevice: true,
|
||||
Timeout: internal.Duration{Duration: time.Second * 5},
|
||||
newEnvClient: NewEnvClient,
|
||||
newClient: NewClient,
|
||||
filtersCreated: false,
|
||||
}
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user