Improve scalability of vsphere input (#5113)
This commit is contained in:
parent
1d6ff4fe4c
commit
78c1ffbf27
|
@ -1064,7 +1064,7 @@
|
||||||
version = "v1.0.0"
|
version = "v1.0.0"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
digest = "1:f9fe29bf856d49f9a51d6001588cb5ee5d65c8a7ff5e8b0dd5423c3a510f0833"
|
digest = "1:6af52ce6dae9a912aa3113f247a63cd82599760ddc328a6721c3ef0426d31ca2"
|
||||||
name = "github.com/vmware/govmomi"
|
name = "github.com/vmware/govmomi"
|
||||||
packages = [
|
packages = [
|
||||||
".",
|
".",
|
||||||
|
@ -1090,8 +1090,8 @@
|
||||||
"vim25/xml",
|
"vim25/xml",
|
||||||
]
|
]
|
||||||
pruneopts = ""
|
pruneopts = ""
|
||||||
revision = "e3a01f9611c32b2362366434bcd671516e78955d"
|
revision = "3617f28d167d448f93f282a867870f109516d2a5"
|
||||||
version = "v0.18.0"
|
version = "v0.19.0"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
digest = "1:c1855527c165f0224708fbc7d76843b4b20bcb74b328f212f8d0e9c855d4c49d"
|
digest = "1:c1855527c165f0224708fbc7d76843b4b20bcb74b328f212f8d0e9c855d4c49d"
|
||||||
|
|
|
@ -228,7 +228,7 @@
|
||||||
|
|
||||||
[[constraint]]
|
[[constraint]]
|
||||||
name = "github.com/vmware/govmomi"
|
name = "github.com/vmware/govmomi"
|
||||||
version = "0.18.0"
|
version = "0.19.0"
|
||||||
|
|
||||||
[[constraint]]
|
[[constraint]]
|
||||||
name = "github.com/Azure/go-autorest"
|
name = "github.com/Azure/go-autorest"
|
||||||
|
|
|
@ -122,17 +122,17 @@ vm_metric_exclude = [ "*" ]
|
||||||
## Clusters
|
## Clusters
|
||||||
# cluster_metric_include = [] ## if omitted or empty, all metrics are collected
|
# cluster_metric_include = [] ## if omitted or empty, all metrics are collected
|
||||||
# cluster_metric_exclude = [] ## Nothing excluded by default
|
# cluster_metric_exclude = [] ## Nothing excluded by default
|
||||||
# cluster_instances = true ## true by default
|
# cluster_instances = false ## false by default
|
||||||
|
|
||||||
## Datastores
|
## Datastores
|
||||||
# datastore_metric_include = [] ## if omitted or empty, all metrics are collected
|
# datastore_metric_include = [] ## if omitted or empty, all metrics are collected
|
||||||
# datastore_metric_exclude = [] ## Nothing excluded by default
|
# datastore_metric_exclude = [] ## Nothing excluded by default
|
||||||
# datastore_instances = false ## false by default for Datastores only
|
# datastore_instances = false ## false by default
|
||||||
|
|
||||||
## Datacenters
|
## Datacenters
|
||||||
datacenter_metric_include = [] ## if omitted or empty, all metrics are collected
|
datacenter_metric_include = [] ## if omitted or empty, all metrics are collected
|
||||||
datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default.
|
datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default.
|
||||||
# datacenter_instances = false ## false by default for Datastores only
|
# datacenter_instances = false ## false by default
|
||||||
|
|
||||||
## Plugin Settings
|
## Plugin Settings
|
||||||
## separator character to use for measurement and field names (default: "_")
|
## separator character to use for measurement and field names (default: "_")
|
||||||
|
|
|
@ -3,6 +3,7 @@ package vsphere
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
@ -18,6 +19,7 @@ import (
|
||||||
"github.com/vmware/govmomi/vim25"
|
"github.com/vmware/govmomi/vim25"
|
||||||
"github.com/vmware/govmomi/vim25/methods"
|
"github.com/vmware/govmomi/vim25/methods"
|
||||||
"github.com/vmware/govmomi/vim25/soap"
|
"github.com/vmware/govmomi/vim25/soap"
|
||||||
|
"github.com/vmware/govmomi/vim25/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
// The highest number of metrics we can query for, no matter what settings
|
// The highest number of metrics we can query for, no matter what settings
|
||||||
|
@ -76,7 +78,7 @@ func (cf *ClientFactory) GetClient(ctx context.Context) (*Client, error) {
|
||||||
ctx2, cancel2 := context.WithTimeout(ctx, cf.parent.Timeout.Duration)
|
ctx2, cancel2 := context.WithTimeout(ctx, cf.parent.Timeout.Duration)
|
||||||
defer cancel2()
|
defer cancel2()
|
||||||
if cf.client.Client.SessionManager.Login(ctx2, url.UserPassword(cf.parent.Username, cf.parent.Password)) != nil {
|
if cf.client.Client.SessionManager.Login(ctx2, url.UserPassword(cf.parent.Username, cf.parent.Password)) != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("Renewing authentication failed: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,6 +207,8 @@ func (c *Client) close() {
|
||||||
|
|
||||||
// GetServerTime returns the time at the vCenter server
|
// GetServerTime returns the time at the vCenter server
|
||||||
func (c *Client) GetServerTime(ctx context.Context) (time.Time, error) {
|
func (c *Client) GetServerTime(ctx context.Context) (time.Time, error) {
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, c.Timeout)
|
||||||
|
defer cancel()
|
||||||
t, err := methods.GetCurrentTime(ctx, c.Client)
|
t, err := methods.GetCurrentTime(ctx, c.Client)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return time.Time{}, err
|
return time.Time{}, err
|
||||||
|
@ -235,7 +239,7 @@ func (c *Client) GetMaxQueryMetrics(ctx context.Context) (int, error) {
|
||||||
// Fall through version-based inference if value isn't usable
|
// Fall through version-based inference if value isn't usable
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Println("I! [input.vsphere] Option query for maxQueryMetrics failed. Using default")
|
log.Println("D! [input.vsphere] Option query for maxQueryMetrics failed. Using default")
|
||||||
}
|
}
|
||||||
|
|
||||||
// No usable maxQueryMetrics setting. Infer based on version
|
// No usable maxQueryMetrics setting. Infer based on version
|
||||||
|
@ -255,3 +259,38 @@ func (c *Client) GetMaxQueryMetrics(ctx context.Context) (int, error) {
|
||||||
}
|
}
|
||||||
return 256, nil
|
return 256, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// QueryMetrics wraps performance.Query to give it proper timeouts
|
||||||
|
func (c *Client) QueryMetrics(ctx context.Context, pqs []types.PerfQuerySpec) ([]performance.EntityMetric, error) {
|
||||||
|
ctx1, cancel1 := context.WithTimeout(ctx, c.Timeout)
|
||||||
|
defer cancel1()
|
||||||
|
metrics, err := c.Perf.Query(ctx1, pqs)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx2, cancel2 := context.WithTimeout(ctx, c.Timeout)
|
||||||
|
defer cancel2()
|
||||||
|
return c.Perf.ToMetricSeries(ctx2, metrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CounterInfoByName wraps performance.CounterInfoByName to give it proper timeouts
|
||||||
|
func (c *Client) CounterInfoByName(ctx context.Context) (map[string]*types.PerfCounterInfo, error) {
|
||||||
|
ctx1, cancel1 := context.WithTimeout(ctx, c.Timeout)
|
||||||
|
defer cancel1()
|
||||||
|
return c.Perf.CounterInfoByName(ctx1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CounterInfoByKey wraps performance.CounterInfoByKey to give it proper timeouts
|
||||||
|
func (c *Client) CounterInfoByKey(ctx context.Context) (map[int32]*types.PerfCounterInfo, error) {
|
||||||
|
ctx1, cancel1 := context.WithTimeout(ctx, c.Timeout)
|
||||||
|
defer cancel1()
|
||||||
|
return c.Perf.CounterInfoByKey(ctx1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListResources wraps property.Collector.Retrieve to give it proper timeouts
|
||||||
|
func (c *Client) ListResources(ctx context.Context, root *view.ContainerView, kind []string, ps []string, dst interface{}) error {
|
||||||
|
ctx1, cancel1 := context.WithTimeout(ctx, c.Timeout)
|
||||||
|
defer cancel1()
|
||||||
|
return root.Retrieve(ctx1, kind, ps, dst)
|
||||||
|
}
|
||||||
|
|
|
@ -2,8 +2,10 @@ package vsphere
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"math/rand"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
@ -24,15 +26,19 @@ import (
|
||||||
|
|
||||||
var isolateLUN = regexp.MustCompile(".*/([^/]+)/?$")
|
var isolateLUN = regexp.MustCompile(".*/([^/]+)/?$")
|
||||||
|
|
||||||
const metricLookback = 3
|
const metricLookback = 3 // Number of time periods to look back at for non-realtime metrics
|
||||||
|
|
||||||
|
const rtMetricLookback = 3 // Number of time periods to look back at for realtime metrics
|
||||||
|
|
||||||
|
const maxSampleConst = 10 // Absolute maximim number of samples regardless of period
|
||||||
|
|
||||||
|
const maxMetadataSamples = 100 // Number of resources to sample for metric metadata
|
||||||
|
|
||||||
// Endpoint is a high-level representation of a connected vCenter endpoint. It is backed by the lower
|
// Endpoint is a high-level representation of a connected vCenter endpoint. It is backed by the lower
|
||||||
// level Client type.
|
// level Client type.
|
||||||
type Endpoint struct {
|
type Endpoint struct {
|
||||||
Parent *VSphere
|
Parent *VSphere
|
||||||
URL *url.URL
|
URL *url.URL
|
||||||
lastColls map[string]time.Time
|
|
||||||
instanceInfo map[string]resourceInfo
|
|
||||||
resourceKinds map[string]resourceKind
|
resourceKinds map[string]resourceKind
|
||||||
hwMarks *TSCache
|
hwMarks *TSCache
|
||||||
lun2ds map[string]string
|
lun2ds map[string]string
|
||||||
|
@ -52,8 +58,14 @@ type resourceKind struct {
|
||||||
sampling int32
|
sampling int32
|
||||||
objects objectMap
|
objects objectMap
|
||||||
filters filter.Filter
|
filters filter.Filter
|
||||||
|
include []string
|
||||||
|
simple bool
|
||||||
|
metrics performance.MetricList
|
||||||
collectInstances bool
|
collectInstances bool
|
||||||
getObjects func(context.Context, *Endpoint, *view.ContainerView) (objectMap, error)
|
parent string
|
||||||
|
getObjects func(context.Context, *Client, *Endpoint, *view.ContainerView) (objectMap, error)
|
||||||
|
latestSample time.Time
|
||||||
|
lastColl time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
type metricEntry struct {
|
type metricEntry struct {
|
||||||
|
@ -74,33 +86,22 @@ type objectRef struct {
|
||||||
dcname string
|
dcname string
|
||||||
}
|
}
|
||||||
|
|
||||||
type resourceInfo struct {
|
func (e *Endpoint) getParent(obj *objectRef, res *resourceKind) (*objectRef, bool) {
|
||||||
name string
|
if pKind, ok := e.resourceKinds[res.parent]; ok {
|
||||||
metrics performance.MetricList
|
if p, ok := pKind.objects[obj.parentRef.Value]; ok {
|
||||||
parentRef *types.ManagedObjectReference
|
return &p, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, false
|
||||||
}
|
}
|
||||||
|
|
||||||
type metricQRequest struct {
|
|
||||||
res *resourceKind
|
|
||||||
obj objectRef
|
|
||||||
}
|
|
||||||
|
|
||||||
type metricQResponse struct {
|
|
||||||
obj objectRef
|
|
||||||
metrics *performance.MetricList
|
|
||||||
}
|
|
||||||
|
|
||||||
type multiError []error
|
|
||||||
|
|
||||||
// NewEndpoint returns a new connection to a vCenter based on the URL and configuration passed
|
// NewEndpoint returns a new connection to a vCenter based on the URL and configuration passed
|
||||||
// as parameters.
|
// as parameters.
|
||||||
func NewEndpoint(ctx context.Context, parent *VSphere, url *url.URL) (*Endpoint, error) {
|
func NewEndpoint(ctx context.Context, parent *VSphere, url *url.URL) (*Endpoint, error) {
|
||||||
e := Endpoint{
|
e := Endpoint{
|
||||||
URL: url,
|
URL: url,
|
||||||
Parent: parent,
|
Parent: parent,
|
||||||
lastColls: make(map[string]time.Time),
|
|
||||||
hwMarks: NewTSCache(1 * time.Hour),
|
hwMarks: NewTSCache(1 * time.Hour),
|
||||||
instanceInfo: make(map[string]resourceInfo),
|
|
||||||
lun2ds: make(map[string]string),
|
lun2ds: make(map[string]string),
|
||||||
initialized: false,
|
initialized: false,
|
||||||
clientFactory: NewClientFactory(ctx, url, parent),
|
clientFactory: NewClientFactory(ctx, url, parent),
|
||||||
|
@ -116,8 +117,11 @@ func NewEndpoint(ctx context.Context, parent *VSphere, url *url.URL) (*Endpoint,
|
||||||
sampling: 300,
|
sampling: 300,
|
||||||
objects: make(objectMap),
|
objects: make(objectMap),
|
||||||
filters: newFilterOrPanic(parent.DatacenterMetricInclude, parent.DatacenterMetricExclude),
|
filters: newFilterOrPanic(parent.DatacenterMetricInclude, parent.DatacenterMetricExclude),
|
||||||
|
simple: isSimple(parent.DatacenterMetricInclude, parent.DatacenterMetricExclude),
|
||||||
|
include: parent.DatacenterMetricInclude,
|
||||||
collectInstances: parent.DatacenterInstances,
|
collectInstances: parent.DatacenterInstances,
|
||||||
getObjects: getDatacenters,
|
getObjects: getDatacenters,
|
||||||
|
parent: "",
|
||||||
},
|
},
|
||||||
"cluster": {
|
"cluster": {
|
||||||
name: "cluster",
|
name: "cluster",
|
||||||
|
@ -128,8 +132,11 @@ func NewEndpoint(ctx context.Context, parent *VSphere, url *url.URL) (*Endpoint,
|
||||||
sampling: 300,
|
sampling: 300,
|
||||||
objects: make(objectMap),
|
objects: make(objectMap),
|
||||||
filters: newFilterOrPanic(parent.ClusterMetricInclude, parent.ClusterMetricExclude),
|
filters: newFilterOrPanic(parent.ClusterMetricInclude, parent.ClusterMetricExclude),
|
||||||
|
simple: isSimple(parent.ClusterMetricInclude, parent.ClusterMetricExclude),
|
||||||
|
include: parent.ClusterMetricInclude,
|
||||||
collectInstances: parent.ClusterInstances,
|
collectInstances: parent.ClusterInstances,
|
||||||
getObjects: getClusters,
|
getObjects: getClusters,
|
||||||
|
parent: "datacenter",
|
||||||
},
|
},
|
||||||
"host": {
|
"host": {
|
||||||
name: "host",
|
name: "host",
|
||||||
|
@ -140,8 +147,11 @@ func NewEndpoint(ctx context.Context, parent *VSphere, url *url.URL) (*Endpoint,
|
||||||
sampling: 20,
|
sampling: 20,
|
||||||
objects: make(objectMap),
|
objects: make(objectMap),
|
||||||
filters: newFilterOrPanic(parent.HostMetricInclude, parent.HostMetricExclude),
|
filters: newFilterOrPanic(parent.HostMetricInclude, parent.HostMetricExclude),
|
||||||
|
simple: isSimple(parent.HostMetricInclude, parent.HostMetricExclude),
|
||||||
|
include: parent.HostMetricInclude,
|
||||||
collectInstances: parent.HostInstances,
|
collectInstances: parent.HostInstances,
|
||||||
getObjects: getHosts,
|
getObjects: getHosts,
|
||||||
|
parent: "cluster",
|
||||||
},
|
},
|
||||||
"vm": {
|
"vm": {
|
||||||
name: "vm",
|
name: "vm",
|
||||||
|
@ -152,8 +162,11 @@ func NewEndpoint(ctx context.Context, parent *VSphere, url *url.URL) (*Endpoint,
|
||||||
sampling: 20,
|
sampling: 20,
|
||||||
objects: make(objectMap),
|
objects: make(objectMap),
|
||||||
filters: newFilterOrPanic(parent.VMMetricInclude, parent.VMMetricExclude),
|
filters: newFilterOrPanic(parent.VMMetricInclude, parent.VMMetricExclude),
|
||||||
|
simple: isSimple(parent.VMMetricInclude, parent.VMMetricExclude),
|
||||||
|
include: parent.VMMetricInclude,
|
||||||
collectInstances: parent.VMInstances,
|
collectInstances: parent.VMInstances,
|
||||||
getObjects: getVMs,
|
getObjects: getVMs,
|
||||||
|
parent: "host",
|
||||||
},
|
},
|
||||||
"datastore": {
|
"datastore": {
|
||||||
name: "datastore",
|
name: "datastore",
|
||||||
|
@ -163,8 +176,11 @@ func NewEndpoint(ctx context.Context, parent *VSphere, url *url.URL) (*Endpoint,
|
||||||
sampling: 300,
|
sampling: 300,
|
||||||
objects: make(objectMap),
|
objects: make(objectMap),
|
||||||
filters: newFilterOrPanic(parent.DatastoreMetricInclude, parent.DatastoreMetricExclude),
|
filters: newFilterOrPanic(parent.DatastoreMetricInclude, parent.DatastoreMetricExclude),
|
||||||
|
simple: isSimple(parent.DatastoreMetricInclude, parent.DatastoreMetricExclude),
|
||||||
|
include: parent.DatastoreMetricInclude,
|
||||||
collectInstances: parent.DatastoreInstances,
|
collectInstances: parent.DatastoreInstances,
|
||||||
getObjects: getDatastores,
|
getObjects: getDatastores,
|
||||||
|
parent: "",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -174,24 +190,6 @@ func NewEndpoint(ctx context.Context, parent *VSphere, url *url.URL) (*Endpoint,
|
||||||
return &e, err
|
return &e, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m multiError) Error() string {
|
|
||||||
switch len(m) {
|
|
||||||
case 0:
|
|
||||||
return "No error recorded. Something is wrong!"
|
|
||||||
case 1:
|
|
||||||
return m[0].Error()
|
|
||||||
default:
|
|
||||||
s := "Multiple errors detected concurrently: "
|
|
||||||
for i, e := range m {
|
|
||||||
if i != 0 {
|
|
||||||
s += ", "
|
|
||||||
}
|
|
||||||
s += e.Error()
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func anythingEnabled(ex []string) bool {
|
func anythingEnabled(ex []string) bool {
|
||||||
for _, s := range ex {
|
for _, s := range ex {
|
||||||
if s == "*" {
|
if s == "*" {
|
||||||
|
@ -209,6 +207,18 @@ func newFilterOrPanic(include []string, exclude []string) filter.Filter {
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isSimple(include []string, exclude []string) bool {
|
||||||
|
if len(exclude) > 0 || len(include) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, s := range include {
|
||||||
|
if strings.Contains(s, "*") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func (e *Endpoint) startDiscovery(ctx context.Context) {
|
func (e *Endpoint) startDiscovery(ctx context.Context) {
|
||||||
e.discoveryTicker = time.NewTicker(e.Parent.ObjectDiscoveryInterval.Duration)
|
e.discoveryTicker = time.NewTicker(e.Parent.ObjectDiscoveryInterval.Duration)
|
||||||
go func() {
|
go func() {
|
||||||
|
@ -249,7 +259,9 @@ func (e *Endpoint) init(ctx context.Context) error {
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, just run it in the background. We'll probably have an incomplete first metric
|
// Otherwise, just run it in the background. We'll probably have an incomplete first metric
|
||||||
// collection this way.
|
// collection this way.
|
||||||
go e.initalDiscovery(ctx)
|
go func() {
|
||||||
|
e.initalDiscovery(ctx)
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
e.initialized = true
|
e.initialized = true
|
||||||
|
@ -262,10 +274,7 @@ func (e *Endpoint) getMetricNameMap(ctx context.Context) (map[int32]string, erro
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx1, cancel1 := context.WithTimeout(ctx, e.Parent.Timeout.Duration)
|
mn, err := client.CounterInfoByName(ctx)
|
||||||
defer cancel1()
|
|
||||||
mn, err := client.Perf.CounterInfoByName(ctx1)
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -276,20 +285,19 @@ func (e *Endpoint) getMetricNameMap(ctx context.Context) (map[int32]string, erro
|
||||||
return names, nil
|
return names, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Endpoint) getMetadata(ctx context.Context, in interface{}) interface{} {
|
func (e *Endpoint) getMetadata(ctx context.Context, obj objectRef, sampling int32) (performance.MetricList, error) {
|
||||||
client, err := e.clientFactory.GetClient(ctx)
|
client, err := e.clientFactory.GetClient(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
rq := in.(*metricQRequest)
|
|
||||||
ctx1, cancel1 := context.WithTimeout(ctx, e.Parent.Timeout.Duration)
|
ctx1, cancel1 := context.WithTimeout(ctx, e.Parent.Timeout.Duration)
|
||||||
defer cancel1()
|
defer cancel1()
|
||||||
metrics, err := client.Perf.AvailableMetric(ctx1, rq.obj.ref.Reference(), rq.res.sampling)
|
metrics, err := client.Perf.AvailableMetric(ctx1, obj.ref.Reference(), sampling)
|
||||||
if err != nil && err != context.Canceled {
|
if err != nil {
|
||||||
log.Printf("E! [input.vsphere]: Error while getting metric metadata. Discovery will be incomplete. Error: %s", err)
|
return nil, err
|
||||||
}
|
}
|
||||||
return &metricQResponse{metrics: &metrics, obj: rq.obj}
|
return metrics, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Endpoint) getDatacenterName(ctx context.Context, client *Client, cache map[string]string, r types.ManagedObjectReference) string {
|
func (e *Endpoint) getDatacenterName(ctx context.Context, client *Client, cache map[string]string, r types.ManagedObjectReference) string {
|
||||||
|
@ -349,17 +357,17 @@ func (e *Endpoint) discover(ctx context.Context) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("D! [input.vsphere]: Discover new objects for %s", e.URL.Host)
|
log.Printf("D! [input.vsphere]: Discover new objects for %s", e.URL.Host)
|
||||||
|
|
||||||
instInfo := make(map[string]resourceInfo)
|
|
||||||
resourceKinds := make(map[string]resourceKind)
|
resourceKinds := make(map[string]resourceKind)
|
||||||
dcNameCache := make(map[string]string)
|
dcNameCache := make(map[string]string)
|
||||||
|
|
||||||
|
numRes := int64(0)
|
||||||
|
|
||||||
// Populate resource objects, and endpoint instance info.
|
// Populate resource objects, and endpoint instance info.
|
||||||
for k, res := range e.resourceKinds {
|
for k, res := range e.resourceKinds {
|
||||||
log.Printf("D! [input.vsphere] Discovering resources for %s", res.name)
|
log.Printf("D! [input.vsphere] Discovering resources for %s", res.name)
|
||||||
// Need to do this for all resource types even if they are not enabled
|
// Need to do this for all resource types even if they are not enabled
|
||||||
if res.enabled || k != "vm" {
|
if res.enabled || k != "vm" {
|
||||||
objects, err := res.getObjects(ctx, e, client.Root)
|
objects, err := res.getObjects(ctx, client, e, client.Root)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -374,42 +382,19 @@ func (e *Endpoint) discover(ctx context.Context) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set up a worker pool for processing metadata queries concurrently
|
// No need to collect metric metadata if resource type is not enabled
|
||||||
wp := NewWorkerPool(10)
|
|
||||||
wp.Run(ctx, e.getMetadata, e.Parent.DiscoverConcurrency)
|
|
||||||
|
|
||||||
// Fill the input channels with resources that need to be queried
|
|
||||||
// for metadata.
|
|
||||||
wp.Fill(ctx, func(ctx context.Context, f PushFunc) {
|
|
||||||
for _, obj := range objects {
|
|
||||||
f(ctx, &metricQRequest{obj: obj, res: &res})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Drain the resulting metadata and build instance infos.
|
|
||||||
wp.Drain(ctx, func(ctx context.Context, in interface{}) bool {
|
|
||||||
switch resp := in.(type) {
|
|
||||||
case *metricQResponse:
|
|
||||||
mList := make(performance.MetricList, 0)
|
|
||||||
if res.enabled {
|
if res.enabled {
|
||||||
for _, m := range *resp.metrics {
|
if res.simple {
|
||||||
if m.Instance != "" && !res.collectInstances {
|
e.simpleMetadataSelect(ctx, client, &res)
|
||||||
continue
|
} else {
|
||||||
}
|
e.complexMetadataSelect(ctx, &res, objects, metricNames)
|
||||||
if res.filters.Match(metricNames[m.CounterId]) {
|
|
||||||
mList = append(mList, m)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
instInfo[resp.obj.ref.Value] = resourceInfo{name: resp.obj.name, metrics: mList, parentRef: resp.obj.parentRef}
|
|
||||||
case error:
|
|
||||||
log.Printf("W! [input.vsphere]: Error while discovering resources: %s", resp)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
res.objects = objects
|
res.objects = objects
|
||||||
resourceKinds[k] = res
|
resourceKinds[k] = res
|
||||||
|
|
||||||
|
SendInternalCounterWithTags("discovered_objects", e.URL.Host, map[string]string{"type": res.name}, int64(len(objects)))
|
||||||
|
numRes += int64(len(objects))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -428,20 +413,100 @@ func (e *Endpoint) discover(ctx context.Context) error {
|
||||||
e.collectMux.Lock()
|
e.collectMux.Lock()
|
||||||
defer e.collectMux.Unlock()
|
defer e.collectMux.Unlock()
|
||||||
|
|
||||||
e.instanceInfo = instInfo
|
|
||||||
e.resourceKinds = resourceKinds
|
e.resourceKinds = resourceKinds
|
||||||
e.lun2ds = l2d
|
e.lun2ds = l2d
|
||||||
|
|
||||||
sw.Stop()
|
sw.Stop()
|
||||||
SendInternalCounter("discovered_objects", e.URL.Host, int64(len(instInfo)))
|
SendInternalCounterWithTags("discovered_objects", e.URL.Host, map[string]string{"type": "instance-total"}, numRes)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDatacenters(ctx context.Context, e *Endpoint, root *view.ContainerView) (objectMap, error) {
|
func (e *Endpoint) simpleMetadataSelect(ctx context.Context, client *Client, res *resourceKind) {
|
||||||
|
log.Printf("D! [input.vsphere] Using fast metric metadata selection for %s", res.name)
|
||||||
|
m, err := client.CounterInfoByName(ctx)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("E! [input.vsphere]: Error while getting metric metadata. Discovery will be incomplete. Error: %s", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
res.metrics = make(performance.MetricList, 0, len(res.include))
|
||||||
|
for _, s := range res.include {
|
||||||
|
if pci, ok := m[s]; ok {
|
||||||
|
cnt := types.PerfMetricId{
|
||||||
|
CounterId: pci.Key,
|
||||||
|
}
|
||||||
|
if res.collectInstances {
|
||||||
|
cnt.Instance = "*"
|
||||||
|
} else {
|
||||||
|
cnt.Instance = ""
|
||||||
|
}
|
||||||
|
res.metrics = append(res.metrics, cnt)
|
||||||
|
} else {
|
||||||
|
log.Printf("W! [input.vsphere] Metric name %s is unknown. Will not be collected", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Endpoint) complexMetadataSelect(ctx context.Context, res *resourceKind, objects objectMap, metricNames map[int32]string) {
|
||||||
|
// We're only going to get metadata from maxMetadataSamples resources. If we have
|
||||||
|
// more resources than that, we pick maxMetadataSamples samples at random.
|
||||||
|
sampledObjects := make([]objectRef, len(objects))
|
||||||
|
i := 0
|
||||||
|
for _, obj := range objects {
|
||||||
|
sampledObjects[i] = obj
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
n := len(sampledObjects)
|
||||||
|
if n > maxMetadataSamples {
|
||||||
|
// Shuffle samples into the maxMetadatSamples positions
|
||||||
|
for i := 0; i < maxMetadataSamples; i++ {
|
||||||
|
j := int(rand.Int31n(int32(i + 1)))
|
||||||
|
t := sampledObjects[i]
|
||||||
|
sampledObjects[i] = sampledObjects[j]
|
||||||
|
sampledObjects[j] = t
|
||||||
|
}
|
||||||
|
sampledObjects = sampledObjects[0:maxMetadataSamples]
|
||||||
|
}
|
||||||
|
|
||||||
|
instInfoMux := sync.Mutex{}
|
||||||
|
te := NewThrottledExecutor(e.Parent.DiscoverConcurrency)
|
||||||
|
for _, obj := range sampledObjects {
|
||||||
|
func(obj objectRef) {
|
||||||
|
te.Run(ctx, func() {
|
||||||
|
metrics, err := e.getMetadata(ctx, obj, res.sampling)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("E! [input.vsphere]: Error while getting metric metadata. Discovery will be incomplete. Error: %s", err)
|
||||||
|
}
|
||||||
|
mMap := make(map[string]types.PerfMetricId)
|
||||||
|
for _, m := range metrics {
|
||||||
|
if m.Instance != "" && res.collectInstances {
|
||||||
|
m.Instance = "*"
|
||||||
|
} else {
|
||||||
|
m.Instance = ""
|
||||||
|
}
|
||||||
|
if res.filters.Match(metricNames[m.CounterId]) {
|
||||||
|
mMap[strconv.Itoa(int(m.CounterId))+"|"+m.Instance] = m
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Printf("D! [input.vsphere] Found %d metrics for %s", len(mMap), obj.name)
|
||||||
|
instInfoMux.Lock()
|
||||||
|
defer instInfoMux.Unlock()
|
||||||
|
if len(mMap) > len(res.metrics) {
|
||||||
|
res.metrics = make(performance.MetricList, len(mMap))
|
||||||
|
i := 0
|
||||||
|
for _, m := range mMap {
|
||||||
|
res.metrics[i] = m
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}(obj)
|
||||||
|
}
|
||||||
|
te.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
func getDatacenters(ctx context.Context, client *Client, e *Endpoint, root *view.ContainerView) (objectMap, error) {
|
||||||
var resources []mo.Datacenter
|
var resources []mo.Datacenter
|
||||||
ctx1, cancel1 := context.WithTimeout(ctx, e.Parent.Timeout.Duration)
|
err := client.ListResources(ctx, root, []string{"Datacenter"}, []string{"name", "parent"}, &resources)
|
||||||
defer cancel1()
|
|
||||||
err := root.Retrieve(ctx1, []string{"Datacenter"}, []string{"name", "parent"}, &resources)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -453,11 +518,9 @@ func getDatacenters(ctx context.Context, e *Endpoint, root *view.ContainerView)
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getClusters(ctx context.Context, e *Endpoint, root *view.ContainerView) (objectMap, error) {
|
func getClusters(ctx context.Context, client *Client, e *Endpoint, root *view.ContainerView) (objectMap, error) {
|
||||||
var resources []mo.ClusterComputeResource
|
var resources []mo.ClusterComputeResource
|
||||||
ctx1, cancel1 := context.WithTimeout(ctx, e.Parent.Timeout.Duration)
|
err := client.ListResources(ctx, root, []string{"ClusterComputeResource"}, []string{"name", "parent"}, &resources)
|
||||||
defer cancel1()
|
|
||||||
err := root.Retrieve(ctx1, []string{"ClusterComputeResource"}, []string{"name", "parent"}, &resources)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -487,9 +550,9 @@ func getClusters(ctx context.Context, e *Endpoint, root *view.ContainerView) (ob
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getHosts(ctx context.Context, e *Endpoint, root *view.ContainerView) (objectMap, error) {
|
func getHosts(ctx context.Context, client *Client, e *Endpoint, root *view.ContainerView) (objectMap, error) {
|
||||||
var resources []mo.HostSystem
|
var resources []mo.HostSystem
|
||||||
err := root.Retrieve(ctx, []string{"HostSystem"}, []string{"name", "parent"}, &resources)
|
err := client.ListResources(ctx, root, []string{"HostSystem"}, []string{"name", "parent"}, &resources)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -501,16 +564,17 @@ func getHosts(ctx context.Context, e *Endpoint, root *view.ContainerView) (objec
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getVMs(ctx context.Context, e *Endpoint, root *view.ContainerView) (objectMap, error) {
|
func getVMs(ctx context.Context, client *Client, e *Endpoint, root *view.ContainerView) (objectMap, error) {
|
||||||
var resources []mo.VirtualMachine
|
var resources []mo.VirtualMachine
|
||||||
ctx1, cancel1 := context.WithTimeout(ctx, e.Parent.Timeout.Duration)
|
err := client.ListResources(ctx, root, []string{"VirtualMachine"}, []string{"name", "runtime.host", "runtime.powerState", "config.guestId", "config.uuid"}, &resources)
|
||||||
defer cancel1()
|
|
||||||
err := root.Retrieve(ctx1, []string{"VirtualMachine"}, []string{"name", "runtime.host", "config.guestId", "config.uuid"}, &resources)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
m := make(objectMap)
|
m := make(objectMap)
|
||||||
for _, r := range resources {
|
for _, r := range resources {
|
||||||
|
if r.Runtime.PowerState != "poweredOn" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
guest := "unknown"
|
guest := "unknown"
|
||||||
uuid := ""
|
uuid := ""
|
||||||
// Sometimes Config is unknown and returns a nil pointer
|
// Sometimes Config is unknown and returns a nil pointer
|
||||||
|
@ -525,11 +589,9 @@ func getVMs(ctx context.Context, e *Endpoint, root *view.ContainerView) (objectM
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDatastores(ctx context.Context, e *Endpoint, root *view.ContainerView) (objectMap, error) {
|
func getDatastores(ctx context.Context, client *Client, e *Endpoint, root *view.ContainerView) (objectMap, error) {
|
||||||
var resources []mo.Datastore
|
var resources []mo.Datastore
|
||||||
ctx1, cancel1 := context.WithTimeout(ctx, e.Parent.Timeout.Duration)
|
err := client.ListResources(ctx, root, []string{"Datastore"}, []string{"name", "parent", "info"}, &resources)
|
||||||
defer cancel1()
|
|
||||||
err := root.Retrieve(ctx1, []string{"Datastore"}, []string{"name", "parent", "info"}, &resources)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -555,10 +617,10 @@ func (e *Endpoint) Close() {
|
||||||
|
|
||||||
// Collect runs a round of data collections as specified in the configuration.
|
// Collect runs a round of data collections as specified in the configuration.
|
||||||
func (e *Endpoint) Collect(ctx context.Context, acc telegraf.Accumulator) error {
|
func (e *Endpoint) Collect(ctx context.Context, acc telegraf.Accumulator) error {
|
||||||
|
|
||||||
// If we never managed to do a discovery, collection will be a no-op. Therefore,
|
// If we never managed to do a discovery, collection will be a no-op. Therefore,
|
||||||
// we need to check that a connection is available, or the collection will
|
// we need to check that a connection is available, or the collection will
|
||||||
// silently fail.
|
// silently fail.
|
||||||
//
|
|
||||||
if _, err := e.clientFactory.GetClient(ctx); err != nil {
|
if _, err := e.clientFactory.GetClient(ctx); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -571,28 +633,41 @@ func (e *Endpoint) Collect(ctx context.Context, acc telegraf.Accumulator) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// If discovery interval is disabled (0), discover on each collection cycle
|
// If discovery interval is disabled (0), discover on each collection cycle
|
||||||
//
|
|
||||||
if e.Parent.ObjectDiscoveryInterval.Duration == 0 {
|
if e.Parent.ObjectDiscoveryInterval.Duration == 0 {
|
||||||
err := e.discover(ctx)
|
err := e.discover(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
var wg sync.WaitGroup
|
||||||
for k, res := range e.resourceKinds {
|
for k, res := range e.resourceKinds {
|
||||||
if res.enabled {
|
if res.enabled {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(k string) {
|
||||||
|
defer wg.Done()
|
||||||
err := e.collectResource(ctx, k, acc)
|
err := e.collectResource(ctx, k, acc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
acc.AddError(err)
|
||||||
}
|
}
|
||||||
|
}(k)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
// Purge old timestamps from the cache
|
// Purge old timestamps from the cache
|
||||||
e.hwMarks.Purge()
|
e.hwMarks.Purge()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, now time.Time, latest time.Time) {
|
// Workaround to make sure pqs is a copy of the loop variable and won't change.
|
||||||
|
func submitChunkJob(ctx context.Context, te *ThrottledExecutor, job func([]types.PerfQuerySpec), pqs []types.PerfQuerySpec) {
|
||||||
|
te.Run(ctx, func() {
|
||||||
|
job(pqs)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Endpoint) chunkify(ctx context.Context, res *resourceKind, now time.Time, latest time.Time, acc telegraf.Accumulator, job func([]types.PerfQuerySpec)) {
|
||||||
|
te := NewThrottledExecutor(e.Parent.CollectConcurrency)
|
||||||
maxMetrics := e.Parent.MaxQueryMetrics
|
maxMetrics := e.Parent.MaxQueryMetrics
|
||||||
if maxMetrics < 1 {
|
if maxMetrics < 1 {
|
||||||
maxMetrics = 1
|
maxMetrics = 1
|
||||||
|
@ -609,38 +684,30 @@ func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, n
|
||||||
total := 0
|
total := 0
|
||||||
nRes := 0
|
nRes := 0
|
||||||
for _, object := range res.objects {
|
for _, object := range res.objects {
|
||||||
info, found := e.instanceInfo[object.ref.Value]
|
mr := len(res.metrics)
|
||||||
if !found {
|
|
||||||
log.Printf("E! [input.vsphere]: Internal error: Instance info not found for MOID %s", object.ref)
|
|
||||||
}
|
|
||||||
mr := len(info.metrics)
|
|
||||||
for mr > 0 {
|
for mr > 0 {
|
||||||
mc := mr
|
mc := mr
|
||||||
headroom := maxMetrics - metrics
|
headroom := maxMetrics - metrics
|
||||||
if !res.realTime && mc > headroom { // Metric query limit only applies to non-realtime metrics
|
if !res.realTime && mc > headroom { // Metric query limit only applies to non-realtime metrics
|
||||||
mc = headroom
|
mc = headroom
|
||||||
}
|
}
|
||||||
fm := len(info.metrics) - mr
|
fm := len(res.metrics) - mr
|
||||||
pq := types.PerfQuerySpec{
|
pq := types.PerfQuerySpec{
|
||||||
Entity: object.ref,
|
Entity: object.ref,
|
||||||
MaxSample: 1,
|
MaxSample: maxSampleConst,
|
||||||
MetricId: info.metrics[fm : fm+mc],
|
MetricId: res.metrics[fm : fm+mc],
|
||||||
IntervalId: res.sampling,
|
IntervalId: res.sampling,
|
||||||
Format: "normal",
|
Format: "normal",
|
||||||
}
|
}
|
||||||
|
|
||||||
// For non-realtime metrics, we need to look back a few samples in case
|
start, ok := e.hwMarks.Get(object.ref.Value)
|
||||||
// the vCenter is late reporting metrics.
|
if !ok {
|
||||||
if !res.realTime {
|
// Look back 3 sampling periods by default
|
||||||
pq.MaxSample = metricLookback
|
start = latest.Add(time.Duration(-res.sampling) * time.Second * (metricLookback - 1))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look back 3 sampling periods
|
|
||||||
start := latest.Add(time.Duration(-res.sampling) * time.Second * (metricLookback - 1))
|
|
||||||
if !res.realTime {
|
|
||||||
pq.StartTime = &start
|
pq.StartTime = &start
|
||||||
pq.EndTime = &now
|
pq.EndTime = &now
|
||||||
}
|
|
||||||
pqs = append(pqs, pq)
|
pqs = append(pqs, pq)
|
||||||
mr -= mc
|
mr -= mc
|
||||||
metrics += mc
|
metrics += mc
|
||||||
|
@ -648,17 +715,18 @@ func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, n
|
||||||
// We need to dump the current chunk of metrics for one of two reasons:
|
// We need to dump the current chunk of metrics for one of two reasons:
|
||||||
// 1) We filled up the metric quota while processing the current resource
|
// 1) We filled up the metric quota while processing the current resource
|
||||||
// 2) We are at the last resource and have no more data to process.
|
// 2) We are at the last resource and have no more data to process.
|
||||||
if mr > 0 || (!res.realTime && metrics >= maxMetrics) || nRes >= e.Parent.MaxQueryObjects {
|
// 3) The query contains more than 100,000 individual metrics
|
||||||
|
if mr > 0 || nRes >= e.Parent.MaxQueryObjects || len(pqs) > 100000 {
|
||||||
log.Printf("D! [input.vsphere]: Queueing query: %d objects, %d metrics (%d remaining) of type %s for %s. Processed objects: %d. Total objects %d",
|
log.Printf("D! [input.vsphere]: Queueing query: %d objects, %d metrics (%d remaining) of type %s for %s. Processed objects: %d. Total objects %d",
|
||||||
len(pqs), metrics, mr, res.name, e.URL.Host, total+1, len(res.objects))
|
len(pqs), metrics, mr, res.name, e.URL.Host, total+1, len(res.objects))
|
||||||
|
|
||||||
// To prevent deadlocks, don't send work items if the context has been cancelled.
|
// Don't send work items if the context has been cancelled.
|
||||||
if ctx.Err() == context.Canceled {
|
if ctx.Err() == context.Canceled {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Call push function
|
// Run collection job
|
||||||
f(ctx, pqs)
|
submitChunkJob(ctx, te, job, pqs)
|
||||||
pqs = make([]types.PerfQuerySpec, 0, e.Parent.MaxQueryObjects)
|
pqs = make([]types.PerfQuerySpec, 0, e.Parent.MaxQueryObjects)
|
||||||
metrics = 0
|
metrics = 0
|
||||||
nRes = 0
|
nRes = 0
|
||||||
|
@ -667,19 +735,19 @@ func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, n
|
||||||
total++
|
total++
|
||||||
nRes++
|
nRes++
|
||||||
}
|
}
|
||||||
// There may be dangling stuff in the queue. Handle them
|
// Handle final partially filled chunk
|
||||||
//
|
|
||||||
if len(pqs) > 0 {
|
if len(pqs) > 0 {
|
||||||
// Call push function
|
// Run collection job
|
||||||
log.Printf("D! [input.vsphere]: Queuing query: %d objects, %d metrics (0 remaining) of type %s for %s. Total objects %d (final chunk)",
|
log.Printf("D! [input.vsphere]: Queuing query: %d objects, %d metrics (0 remaining) of type %s for %s. Total objects %d (final chunk)",
|
||||||
len(pqs), metrics, res.name, e.URL.Host, len(res.objects))
|
len(pqs), metrics, res.name, e.URL.Host, len(res.objects))
|
||||||
f(ctx, pqs)
|
submitChunkJob(ctx, te, job, pqs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wait for background collection to finish
|
||||||
|
te.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Endpoint) collectResource(ctx context.Context, resourceType string, acc telegraf.Accumulator) error {
|
func (e *Endpoint) collectResource(ctx context.Context, resourceType string, acc telegraf.Accumulator) error {
|
||||||
|
|
||||||
// Do we have new data yet?
|
|
||||||
res := e.resourceKinds[resourceType]
|
res := e.resourceKinds[resourceType]
|
||||||
client, err := e.clientFactory.GetClient(ctx)
|
client, err := e.clientFactory.GetClient(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -689,13 +757,23 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, acc
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
latest, hasLatest := e.lastColls[resourceType]
|
|
||||||
if hasLatest {
|
// Estimate the interval at which we're invoked. Use local time (not server time)
|
||||||
|
// since this is about how we got invoked locally.
|
||||||
|
localNow := time.Now()
|
||||||
|
estInterval := time.Duration(time.Minute)
|
||||||
|
if !res.lastColl.IsZero() {
|
||||||
|
estInterval = localNow.Sub(res.lastColl).Truncate(time.Duration(res.sampling) * time.Second)
|
||||||
|
}
|
||||||
|
log.Printf("D! [inputs.vsphere] Interval estimated to %s", estInterval)
|
||||||
|
|
||||||
|
latest := res.latestSample
|
||||||
|
if !latest.IsZero() {
|
||||||
elapsed := now.Sub(latest).Seconds() + 5.0 // Allow 5 second jitter.
|
elapsed := now.Sub(latest).Seconds() + 5.0 // Allow 5 second jitter.
|
||||||
log.Printf("D! [input.vsphere]: Latest: %s, elapsed: %f, resource: %s", latest, elapsed, resourceType)
|
log.Printf("D! [inputs.vsphere]: Latest: %s, elapsed: %f, resource: %s", latest, elapsed, resourceType)
|
||||||
if !res.realTime && elapsed < float64(res.sampling) {
|
if !res.realTime && elapsed < float64(res.sampling) {
|
||||||
// No new data would be available. We're outta herE! [input.vsphere]:
|
// No new data would be available. We're outta here!
|
||||||
log.Printf("D! [input.vsphere]: Sampling period for %s of %d has not elapsed on %s",
|
log.Printf("D! [inputs.vsphere]: Sampling period for %s of %d has not elapsed on %s",
|
||||||
resourceType, res.sampling, e.URL.Host)
|
resourceType, res.sampling, e.URL.Host)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -706,91 +784,108 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, acc
|
||||||
internalTags := map[string]string{"resourcetype": resourceType}
|
internalTags := map[string]string{"resourcetype": resourceType}
|
||||||
sw := NewStopwatchWithTags("gather_duration", e.URL.Host, internalTags)
|
sw := NewStopwatchWithTags("gather_duration", e.URL.Host, internalTags)
|
||||||
|
|
||||||
log.Printf("D! [input.vsphere]: Collecting metrics for %d objects of type %s for %s",
|
log.Printf("D! [inputs.vsphere]: Collecting metrics for %d objects of type %s for %s",
|
||||||
len(res.objects), resourceType, e.URL.Host)
|
len(res.objects), resourceType, e.URL.Host)
|
||||||
|
|
||||||
count := int64(0)
|
count := int64(0)
|
||||||
|
|
||||||
// Set up a worker pool for collecting chunk metrics
|
var tsMux sync.Mutex
|
||||||
wp := NewWorkerPool(10)
|
latestSample := time.Time{}
|
||||||
wp.Run(ctx, func(ctx context.Context, in interface{}) interface{} {
|
|
||||||
chunk := in.([]types.PerfQuerySpec)
|
// Divide workload into chunks and process them concurrently
|
||||||
n, err := e.collectChunk(ctx, chunk, resourceType, res, acc)
|
e.chunkify(ctx, &res, now, latest, acc,
|
||||||
log.Printf("D! [input.vsphere] CollectChunk for %s returned %d metrics", resourceType, n)
|
func(chunk []types.PerfQuerySpec) {
|
||||||
|
n, localLatest, err := e.collectChunk(ctx, chunk, &res, acc, now, estInterval)
|
||||||
|
log.Printf("D! [inputs.vsphere] CollectChunk for %s returned %d metrics", resourceType, n)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
acc.AddError(errors.New("While collecting " + res.name + ": " + err.Error()))
|
||||||
}
|
}
|
||||||
atomic.AddInt64(&count, int64(n))
|
atomic.AddInt64(&count, int64(n))
|
||||||
return nil
|
tsMux.Lock()
|
||||||
|
defer tsMux.Unlock()
|
||||||
}, e.Parent.CollectConcurrency)
|
if localLatest.After(latestSample) && !localLatest.IsZero() {
|
||||||
|
latestSample = localLatest
|
||||||
// Fill the input channel of the worker queue by running the chunking
|
|
||||||
// logic implemented in chunker()
|
|
||||||
wp.Fill(ctx, func(ctx context.Context, f PushFunc) {
|
|
||||||
e.chunker(ctx, f, &res, now, latest)
|
|
||||||
})
|
|
||||||
|
|
||||||
// Drain the pool. We're getting errors back. They should all be nil
|
|
||||||
var mux sync.Mutex
|
|
||||||
merr := make(multiError, 0)
|
|
||||||
wp.Drain(ctx, func(ctx context.Context, in interface{}) bool {
|
|
||||||
if in != nil {
|
|
||||||
mux.Lock()
|
|
||||||
defer mux.Unlock()
|
|
||||||
merr = append(merr, in.(error))
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
return true
|
|
||||||
})
|
})
|
||||||
e.lastColls[resourceType] = now // Use value captured at the beginning to avoid blind spots.
|
|
||||||
|
|
||||||
|
log.Printf("D! [inputs.vsphere] Latest sample for %s set to %s", resourceType, latestSample)
|
||||||
|
if !latestSample.IsZero() {
|
||||||
|
res.latestSample = latestSample
|
||||||
|
}
|
||||||
sw.Stop()
|
sw.Stop()
|
||||||
SendInternalCounterWithTags("gather_count", e.URL.Host, internalTags, count)
|
SendInternalCounterWithTags("gather_count", e.URL.Host, internalTags, count)
|
||||||
if len(merr) > 0 {
|
|
||||||
return merr
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Endpoint) collectChunk(ctx context.Context, pqs []types.PerfQuerySpec, resourceType string,
|
func alignSamples(info []types.PerfSampleInfo, values []int64, interval time.Duration) ([]types.PerfSampleInfo, []float64) {
|
||||||
res resourceKind, acc telegraf.Accumulator) (int, error) {
|
rInfo := make([]types.PerfSampleInfo, 0, len(info))
|
||||||
|
rValues := make([]float64, 0, len(values))
|
||||||
|
bi := 1.0
|
||||||
|
var lastBucket time.Time
|
||||||
|
for idx := range info {
|
||||||
|
// According to the docs, SampleInfo and Value should have the same length, but we've seen corrupted
|
||||||
|
// data coming back with missing values. Take care of that gracefully!
|
||||||
|
if idx >= len(values) {
|
||||||
|
log.Printf("D! [inputs.vsphere] len(SampleInfo)>len(Value) %d > %d", len(info), len(values))
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v := float64(values[idx])
|
||||||
|
if v < 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ts := info[idx].Timestamp
|
||||||
|
roundedTs := ts.Truncate(interval)
|
||||||
|
|
||||||
|
// Are we still working on the same bucket?
|
||||||
|
if roundedTs == lastBucket {
|
||||||
|
bi++
|
||||||
|
p := len(rValues) - 1
|
||||||
|
rValues[p] = ((bi-1)/bi)*float64(rValues[p]) + v/bi
|
||||||
|
} else {
|
||||||
|
rValues = append(rValues, v)
|
||||||
|
roundedInfo := types.PerfSampleInfo{
|
||||||
|
Timestamp: roundedTs,
|
||||||
|
Interval: info[idx].Interval,
|
||||||
|
}
|
||||||
|
rInfo = append(rInfo, roundedInfo)
|
||||||
|
bi = 1.0
|
||||||
|
lastBucket = roundedTs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//log.Printf("D! [inputs.vsphere] Aligned samples: %d collapsed into %d", len(info), len(rInfo))
|
||||||
|
return rInfo, rValues
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Endpoint) collectChunk(ctx context.Context, pqs []types.PerfQuerySpec, res *resourceKind, acc telegraf.Accumulator, now time.Time, interval time.Duration) (int, time.Time, error) {
|
||||||
|
log.Printf("D! [inputs.vsphere] Query for %s has %d QuerySpecs", res.name, len(pqs))
|
||||||
|
latestSample := time.Time{}
|
||||||
count := 0
|
count := 0
|
||||||
|
resourceType := res.name
|
||||||
prefix := "vsphere" + e.Parent.Separator + resourceType
|
prefix := "vsphere" + e.Parent.Separator + resourceType
|
||||||
|
|
||||||
client, err := e.clientFactory.GetClient(ctx)
|
client, err := e.clientFactory.GetClient(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return count, latestSample, err
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx1, cancel1 := context.WithTimeout(ctx, e.Parent.Timeout.Duration)
|
metricInfo, err := client.CounterInfoByName(ctx)
|
||||||
defer cancel1()
|
|
||||||
metricInfo, err := client.Perf.CounterInfoByName(ctx1)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return count, err
|
return count, latestSample, err
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx2, cancel2 := context.WithTimeout(ctx, e.Parent.Timeout.Duration)
|
ems, err := client.QueryMetrics(ctx, pqs)
|
||||||
defer cancel2()
|
|
||||||
metrics, err := client.Perf.Query(ctx2, pqs)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return count, err
|
return count, latestSample, err
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx3, cancel3 := context.WithTimeout(ctx, e.Parent.Timeout.Duration)
|
log.Printf("D! [inputs.vsphere] Query for %s returned metrics for %d objects", resourceType, len(ems))
|
||||||
defer cancel3()
|
|
||||||
ems, err := client.Perf.ToMetricSeries(ctx3, metrics)
|
|
||||||
if err != nil {
|
|
||||||
return count, err
|
|
||||||
}
|
|
||||||
log.Printf("D! [input.vsphere] Query for %s returned metrics for %d objects", resourceType, len(ems))
|
|
||||||
|
|
||||||
// Iterate through results
|
// Iterate through results
|
||||||
for _, em := range ems {
|
for _, em := range ems {
|
||||||
moid := em.Entity.Reference().Value
|
moid := em.Entity.Reference().Value
|
||||||
instInfo, found := e.instanceInfo[moid]
|
instInfo, found := res.objects[moid]
|
||||||
if !found {
|
if !found {
|
||||||
log.Printf("E! [input.vsphere]: MOID %s not found in cache. Skipping! (This should not happen!)", moid)
|
log.Printf("E! [inputs.vsphere]: MOID %s not found in cache. Skipping! (This should not happen!)", moid)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
buckets := make(map[string]metricEntry)
|
buckets := make(map[string]metricEntry)
|
||||||
|
@ -805,26 +900,28 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs []types.PerfQuerySpec,
|
||||||
// Populate tags
|
// Populate tags
|
||||||
objectRef, ok := res.objects[moid]
|
objectRef, ok := res.objects[moid]
|
||||||
if !ok {
|
if !ok {
|
||||||
log.Printf("E! [input.vsphere]: MOID %s not found in cache. Skipping", moid)
|
log.Printf("E! [inputs.vsphere]: MOID %s not found in cache. Skipping", moid)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
e.populateTags(&objectRef, resourceType, &res, t, &v)
|
e.populateTags(&objectRef, resourceType, res, t, &v)
|
||||||
|
|
||||||
// Now deal with the values. Iterate backwards so we start with the latest value
|
nValues := 0
|
||||||
tsKey := moid + "|" + name + "|" + v.Instance
|
alignedInfo, alignedValues := alignSamples(em.SampleInfo, v.Value, interval) // TODO: Estimate interval
|
||||||
for idx := len(v.Value) - 1; idx >= 0; idx-- {
|
|
||||||
ts := em.SampleInfo[idx].Timestamp
|
|
||||||
|
|
||||||
// Since non-realtime metrics are queries with a lookback, we need to check the high-water mark
|
for idx, sample := range alignedInfo {
|
||||||
// to determine if this should be included. Only samples not seen before should be included.
|
// According to the docs, SampleInfo and Value should have the same length, but we've seen corrupted
|
||||||
if !(res.realTime || e.hwMarks.IsNew(tsKey, ts)) {
|
// data coming back with missing values. Take care of that gracefully!
|
||||||
continue
|
if idx >= len(alignedValues) {
|
||||||
|
log.Printf("D! [inputs.vsphere] len(SampleInfo)>len(Value) %d > %d", len(alignedInfo), len(alignedValues))
|
||||||
|
break
|
||||||
}
|
}
|
||||||
value := v.Value[idx]
|
ts := sample.Timestamp
|
||||||
|
if ts.After(latestSample) {
|
||||||
|
latestSample = ts
|
||||||
|
}
|
||||||
|
nValues++
|
||||||
|
|
||||||
// Organize the metrics into a bucket per measurement.
|
// Organize the metrics into a bucket per measurement.
|
||||||
// Data SHOULD be presented to us with the same timestamp for all samples, but in case
|
|
||||||
// they don't we use the measurement name + timestamp as the key for the bucket.
|
|
||||||
mn, fn := e.makeMetricIdentifier(prefix, name)
|
mn, fn := e.makeMetricIdentifier(prefix, name)
|
||||||
bKey := mn + " " + v.Instance + " " + strconv.FormatInt(ts.UnixNano(), 10)
|
bKey := mn + " " + v.Instance + " " + strconv.FormatInt(ts.UnixNano(), 10)
|
||||||
bucket, found := buckets[bKey]
|
bucket, found := buckets[bKey]
|
||||||
|
@ -832,27 +929,26 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs []types.PerfQuerySpec,
|
||||||
bucket = metricEntry{name: mn, ts: ts, fields: make(map[string]interface{}), tags: t}
|
bucket = metricEntry{name: mn, ts: ts, fields: make(map[string]interface{}), tags: t}
|
||||||
buckets[bKey] = bucket
|
buckets[bKey] = bucket
|
||||||
}
|
}
|
||||||
if value < 0 {
|
|
||||||
log.Printf("D! [input.vsphere]: Negative value for %s on %s. Indicates missing samples", name, objectRef.name)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Percentage values must be scaled down by 100.
|
// Percentage values must be scaled down by 100.
|
||||||
info, ok := metricInfo[name]
|
info, ok := metricInfo[name]
|
||||||
if !ok {
|
if !ok {
|
||||||
log.Printf("E! [input.vsphere]: Could not determine unit for %s. Skipping", name)
|
log.Printf("E! [inputs.vsphere]: Could not determine unit for %s. Skipping", name)
|
||||||
}
|
}
|
||||||
|
v := alignedValues[idx]
|
||||||
if info.UnitInfo.GetElementDescription().Key == "percent" {
|
if info.UnitInfo.GetElementDescription().Key == "percent" {
|
||||||
bucket.fields[fn] = float64(value) / 100.0
|
bucket.fields[fn] = float64(v) / 100.0
|
||||||
} else {
|
} else {
|
||||||
bucket.fields[fn] = value
|
bucket.fields[fn] = v
|
||||||
}
|
}
|
||||||
count++
|
count++
|
||||||
|
|
||||||
// Update highwater marks for non-realtime metrics.
|
// Update highwater marks
|
||||||
if !res.realTime {
|
e.hwMarks.Put(moid, ts)
|
||||||
e.hwMarks.Put(tsKey, ts)
|
|
||||||
}
|
}
|
||||||
|
if nValues == 0 {
|
||||||
|
log.Printf("D! [inputs.vsphere]: Missing value for: %s, %s", name, objectRef.name)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We've iterated through all the metrics and collected buckets for each
|
// We've iterated through all the metrics and collected buckets for each
|
||||||
|
@ -861,17 +957,7 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs []types.PerfQuerySpec,
|
||||||
acc.AddFields(bucket.name, bucket.fields, bucket.tags, bucket.ts)
|
acc.AddFields(bucket.name, bucket.fields, bucket.tags, bucket.ts)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return count, nil
|
return count, latestSample, nil
|
||||||
}
|
|
||||||
|
|
||||||
func (e *Endpoint) getParent(obj resourceInfo) (resourceInfo, bool) {
|
|
||||||
p := obj.parentRef
|
|
||||||
if p == nil {
|
|
||||||
log.Printf("D! [input.vsphere] No parent found for %s", obj.name)
|
|
||||||
return resourceInfo{}, false
|
|
||||||
}
|
|
||||||
r, ok := e.instanceInfo[p.Value]
|
|
||||||
return r, ok
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Endpoint) populateTags(objectRef *objectRef, resourceType string, resource *resourceKind, t map[string]string, v *performance.MetricSeries) {
|
func (e *Endpoint) populateTags(objectRef *objectRef, resourceType string, resource *resourceKind, t map[string]string, v *performance.MetricSeries) {
|
||||||
|
@ -885,14 +971,14 @@ func (e *Endpoint) populateTags(objectRef *objectRef, resourceType string, resou
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map parent reference
|
// Map parent reference
|
||||||
parent, found := e.instanceInfo[objectRef.parentRef.Value]
|
parent, found := e.getParent(objectRef, resource)
|
||||||
if found {
|
if found {
|
||||||
t[resource.parentTag] = parent.name
|
t[resource.parentTag] = parent.name
|
||||||
if resourceType == "vm" {
|
if resourceType == "vm" {
|
||||||
if objectRef.guest != "" {
|
if objectRef.guest != "" {
|
||||||
t["guest"] = objectRef.guest
|
t["guest"] = objectRef.guest
|
||||||
}
|
}
|
||||||
if c, ok := e.getParent(parent); ok {
|
if c, ok := e.resourceKinds["cluster"].objects[parent.parentRef.Value]; ok {
|
||||||
t["clustername"] = c.name
|
t["clustername"] = c.name
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
package vsphere
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ThrottledExecutor provides a simple mechanism for running jobs in separate
|
||||||
|
// goroutines while limit the number of concurrent jobs running at any given time.
|
||||||
|
type ThrottledExecutor struct {
|
||||||
|
limiter chan struct{}
|
||||||
|
wg sync.WaitGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewThrottledExecutor creates a new ThrottlesExecutor with a specified maximum
|
||||||
|
// number of concurrent jobs
|
||||||
|
func NewThrottledExecutor(limit int) *ThrottledExecutor {
|
||||||
|
if limit == 0 {
|
||||||
|
panic("Limit must be > 0")
|
||||||
|
}
|
||||||
|
return &ThrottledExecutor{limiter: make(chan struct{}, limit)}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run schedules a job for execution as soon as possible while respecting the
|
||||||
|
// maximum concurrency limit.
|
||||||
|
func (t *ThrottledExecutor) Run(ctx context.Context, job func()) {
|
||||||
|
t.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer t.wg.Done()
|
||||||
|
select {
|
||||||
|
case t.limiter <- struct{}{}:
|
||||||
|
defer func() {
|
||||||
|
<-t.limiter
|
||||||
|
}()
|
||||||
|
job()
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait blocks until all scheduled jobs have finished
|
||||||
|
func (t *ThrottledExecutor) Wait() {
|
||||||
|
t.wg.Wait()
|
||||||
|
}
|
|
@ -49,6 +49,14 @@ func (t *TSCache) IsNew(key string, tm time.Time) bool {
|
||||||
return !tm.Before(v)
|
return !tm.Before(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get returns a timestamp (if present)
|
||||||
|
func (t *TSCache) Get(key string) (time.Time, bool) {
|
||||||
|
t.mux.RLock()
|
||||||
|
defer t.mux.RUnlock()
|
||||||
|
ts, ok := t.table[key]
|
||||||
|
return ts, ok
|
||||||
|
}
|
||||||
|
|
||||||
// Put updates the latest timestamp for the supplied key.
|
// Put updates the latest timestamp for the supplied key.
|
||||||
func (t *TSCache) Put(key string, time time.Time) {
|
func (t *TSCache) Put(key string, time time.Time) {
|
||||||
t.mux.Lock()
|
t.mux.Lock()
|
||||||
|
|
|
@ -155,7 +155,7 @@ var sampleConfig = `
|
||||||
## Clusters
|
## Clusters
|
||||||
# cluster_metric_include = [] ## if omitted or empty, all metrics are collected
|
# cluster_metric_include = [] ## if omitted or empty, all metrics are collected
|
||||||
# cluster_metric_exclude = [] ## Nothing excluded by default
|
# cluster_metric_exclude = [] ## Nothing excluded by default
|
||||||
# cluster_instances = true ## true by default
|
# cluster_instances = false ## false by default
|
||||||
|
|
||||||
## Datastores
|
## Datastores
|
||||||
# datastore_metric_include = [] ## if omitted or empty, all metrics are collected
|
# datastore_metric_include = [] ## if omitted or empty, all metrics are collected
|
||||||
|
@ -260,7 +260,6 @@ func (v *VSphere) Stop() {
|
||||||
// Gather is the main data collection function called by the Telegraf core. It performs all
|
// Gather is the main data collection function called by the Telegraf core. It performs all
|
||||||
// the data collection and writes all metrics into the Accumulator passed as an argument.
|
// the data collection and writes all metrics into the Accumulator passed as an argument.
|
||||||
func (v *VSphere) Gather(acc telegraf.Accumulator) error {
|
func (v *VSphere) Gather(acc telegraf.Accumulator) error {
|
||||||
merr := make(multiError, 0)
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
for _, ep := range v.endpoints {
|
for _, ep := range v.endpoints {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
|
@ -274,15 +273,11 @@ func (v *VSphere) Gather(acc telegraf.Accumulator) error {
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
acc.AddError(err)
|
acc.AddError(err)
|
||||||
merr = append(merr, err)
|
|
||||||
}
|
}
|
||||||
}(ep)
|
}(ep)
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
if len(merr) > 0 {
|
|
||||||
return merr
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -291,7 +286,7 @@ func init() {
|
||||||
return &VSphere{
|
return &VSphere{
|
||||||
Vcenters: []string{},
|
Vcenters: []string{},
|
||||||
|
|
||||||
ClusterInstances: true,
|
ClusterInstances: false,
|
||||||
ClusterMetricInclude: nil,
|
ClusterMetricInclude: nil,
|
||||||
ClusterMetricExclude: nil,
|
ClusterMetricExclude: nil,
|
||||||
HostInstances: true,
|
HostInstances: true,
|
||||||
|
|
|
@ -7,8 +7,11 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf/internal"
|
"github.com/influxdata/telegraf/internal"
|
||||||
itls "github.com/influxdata/telegraf/internal/tls"
|
itls "github.com/influxdata/telegraf/internal/tls"
|
||||||
|
@ -175,6 +178,8 @@ func defaultVSphere() *VSphere {
|
||||||
ObjectDiscoveryInterval: internal.Duration{Duration: time.Second * 300},
|
ObjectDiscoveryInterval: internal.Duration{Duration: time.Second * 300},
|
||||||
Timeout: internal.Duration{Duration: time.Second * 20},
|
Timeout: internal.Duration{Duration: time.Second * 20},
|
||||||
ForceDiscoverOnInit: true,
|
ForceDiscoverOnInit: true,
|
||||||
|
DiscoverConcurrency: 1,
|
||||||
|
CollectConcurrency: 1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,32 +210,43 @@ func TestParseConfig(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWorkerPool(t *testing.T) {
|
func TestWorkerPool(t *testing.T) {
|
||||||
wp := NewWorkerPool(100)
|
max := int64(0)
|
||||||
ctx := context.Background()
|
ngr := int64(0)
|
||||||
wp.Run(ctx, func(ctx context.Context, p interface{}) interface{} {
|
n := 10000
|
||||||
return p.(int) * 2
|
var mux sync.Mutex
|
||||||
}, 10)
|
results := make([]int, 0, n)
|
||||||
|
te := NewThrottledExecutor(5)
|
||||||
n := 100000
|
|
||||||
wp.Fill(ctx, func(ctx context.Context, f PushFunc) {
|
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
f(ctx, i)
|
func(i int) {
|
||||||
|
te.Run(context.Background(), func() {
|
||||||
|
atomic.AddInt64(&ngr, 1)
|
||||||
|
mux.Lock()
|
||||||
|
defer mux.Unlock()
|
||||||
|
results = append(results, i*2)
|
||||||
|
if ngr > max {
|
||||||
|
max = ngr
|
||||||
}
|
}
|
||||||
|
time.Sleep(100 * time.Microsecond)
|
||||||
|
atomic.AddInt64(&ngr, -1)
|
||||||
})
|
})
|
||||||
results := make([]int, n)
|
}(i)
|
||||||
i := 0
|
}
|
||||||
wp.Drain(ctx, func(ctx context.Context, p interface{}) bool {
|
te.Wait()
|
||||||
results[i] = p.(int)
|
|
||||||
i++
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
sort.Ints(results)
|
sort.Ints(results)
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
require.Equal(t, results[i], i*2)
|
require.Equal(t, results[i], i*2, "Some jobs didn't run")
|
||||||
}
|
}
|
||||||
|
require.Equal(t, int64(5), max, "Wrong number of goroutines spawned")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTimeout(t *testing.T) {
|
func TestTimeout(t *testing.T) {
|
||||||
|
// Don't run test on 32-bit machines due to bug in simulator.
|
||||||
|
// https://github.com/vmware/govmomi/issues/1330
|
||||||
|
var i int
|
||||||
|
if unsafe.Sizeof(i) < 8 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
m, s, err := createSim()
|
m, s, err := createSim()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
@ -245,7 +261,7 @@ func TestTimeout(t *testing.T) {
|
||||||
require.NoError(t, v.Start(nil)) // We're not using the Accumulator, so it can be nil.
|
require.NoError(t, v.Start(nil)) // We're not using the Accumulator, so it can be nil.
|
||||||
defer v.Stop()
|
defer v.Stop()
|
||||||
err = v.Gather(&acc)
|
err = v.Gather(&acc)
|
||||||
require.NotNil(t, err, "Error should not be nil here")
|
require.True(t, len(acc.Errors) > 0, "Errors should not be empty here")
|
||||||
|
|
||||||
// The accumulator must contain exactly one error and it must be a deadline exceeded.
|
// The accumulator must contain exactly one error and it must be a deadline exceeded.
|
||||||
require.Equal(t, 1, len(acc.Errors))
|
require.Equal(t, 1, len(acc.Errors))
|
||||||
|
@ -253,6 +269,12 @@ func TestTimeout(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMaxQuery(t *testing.T) {
|
func TestMaxQuery(t *testing.T) {
|
||||||
|
// Don't run test on 32-bit machines due to bug in simulator.
|
||||||
|
// https://github.com/vmware/govmomi/issues/1330
|
||||||
|
var i int
|
||||||
|
if unsafe.Sizeof(i) < 8 {
|
||||||
|
return
|
||||||
|
}
|
||||||
m, s, err := createSim()
|
m, s, err := createSim()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
@ -290,6 +312,13 @@ func TestMaxQuery(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAll(t *testing.T) {
|
func TestAll(t *testing.T) {
|
||||||
|
// Don't run test on 32-bit machines due to bug in simulator.
|
||||||
|
// https://github.com/vmware/govmomi/issues/1330
|
||||||
|
var i int
|
||||||
|
if unsafe.Sizeof(i) < 8 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
m, s, err := createSim()
|
m, s, err := createSim()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
@ -300,7 +329,8 @@ func TestAll(t *testing.T) {
|
||||||
var acc testutil.Accumulator
|
var acc testutil.Accumulator
|
||||||
v := defaultVSphere()
|
v := defaultVSphere()
|
||||||
v.Vcenters = []string{s.URL.String()}
|
v.Vcenters = []string{s.URL.String()}
|
||||||
v.Start(nil) // We're not using the Accumulator, so it can be nil.
|
v.Start(&acc)
|
||||||
defer v.Stop()
|
defer v.Stop()
|
||||||
require.NoError(t, v.Gather(&acc))
|
require.NoError(t, v.Gather(&acc))
|
||||||
|
require.Equal(t, 0, len(acc.Errors), fmt.Sprintf("Errors found: %s", acc.Errors))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,119 +0,0 @@
|
||||||
package vsphere
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"log"
|
|
||||||
"sync"
|
|
||||||
)
|
|
||||||
|
|
||||||
// WorkerFunc is a function that is supposed to do the actual work
|
|
||||||
// of the WorkerPool. It is similar to the "map" portion of the
|
|
||||||
// map/reduce semantics, in that it takes a single value as an input,
|
|
||||||
// does some processing and returns a single result.
|
|
||||||
type WorkerFunc func(context.Context, interface{}) interface{}
|
|
||||||
|
|
||||||
// PushFunc is called from a FillerFunc to push a workitem onto
|
|
||||||
// the input channel. Wraps some logic for gracefulk shutdowns.
|
|
||||||
type PushFunc func(context.Context, interface{}) bool
|
|
||||||
|
|
||||||
// DrainerFunc represents a function used to "drain" the WorkerPool,
|
|
||||||
// i.e. pull out all the results generated by the workers and processing
|
|
||||||
// them. The DrainerFunc is called once per result produced.
|
|
||||||
// If the function returns false, the draining of the pool is aborted.
|
|
||||||
type DrainerFunc func(context.Context, interface{}) bool
|
|
||||||
|
|
||||||
// FillerFunc represents a function for filling the WorkerPool with jobs.
|
|
||||||
// It is called once and is responsible for pushing jobs onto the supplied channel.
|
|
||||||
type FillerFunc func(context.Context, PushFunc)
|
|
||||||
|
|
||||||
// WorkerPool implements a simple work pooling mechanism. It runs a predefined
|
|
||||||
// number of goroutines to process jobs. Jobs are inserted using the Fill call
|
|
||||||
// and results are retrieved through the Drain function.
|
|
||||||
type WorkerPool struct {
|
|
||||||
wg sync.WaitGroup
|
|
||||||
In chan interface{}
|
|
||||||
Out chan interface{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewWorkerPool creates a worker pool
|
|
||||||
func NewWorkerPool(bufsize int) *WorkerPool {
|
|
||||||
return &WorkerPool{
|
|
||||||
In: make(chan interface{}, bufsize),
|
|
||||||
Out: make(chan interface{}, bufsize),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *WorkerPool) push(ctx context.Context, job interface{}) bool {
|
|
||||||
select {
|
|
||||||
case w.In <- job:
|
|
||||||
return true
|
|
||||||
case <-ctx.Done():
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *WorkerPool) pushOut(ctx context.Context, result interface{}) bool {
|
|
||||||
select {
|
|
||||||
case w.Out <- result:
|
|
||||||
return true
|
|
||||||
case <-ctx.Done():
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run takes a WorkerFunc and runs it in 'n' goroutines.
|
|
||||||
func (w *WorkerPool) Run(ctx context.Context, f WorkerFunc, n int) bool {
|
|
||||||
w.wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer w.wg.Done()
|
|
||||||
var localWg sync.WaitGroup
|
|
||||||
localWg.Add(n)
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
go func() {
|
|
||||||
defer localWg.Done()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case job, ok := <-w.In:
|
|
||||||
if !ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
w.pushOut(ctx, f(ctx, job))
|
|
||||||
case <-ctx.Done():
|
|
||||||
log.Printf("D! [input.vsphere]: Stop requested for worker pool. Exiting.")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
localWg.Wait()
|
|
||||||
close(w.Out)
|
|
||||||
}()
|
|
||||||
return ctx.Err() == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill runs a FillerFunc responsible for supplying work to the pool. You may only
|
|
||||||
// call Fill once. Calling it twice will panic.
|
|
||||||
func (w *WorkerPool) Fill(ctx context.Context, f FillerFunc) bool {
|
|
||||||
w.wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer w.wg.Done()
|
|
||||||
f(ctx, w.push)
|
|
||||||
close(w.In)
|
|
||||||
}()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Drain runs a DrainerFunc for each result generated by the workers.
|
|
||||||
func (w *WorkerPool) Drain(ctx context.Context, f DrainerFunc) bool {
|
|
||||||
w.wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer w.wg.Done()
|
|
||||||
for result := range w.Out {
|
|
||||||
if !f(ctx, result) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
w.wg.Wait()
|
|
||||||
return ctx.Err() != nil
|
|
||||||
}
|
|
Loading…
Reference in New Issue