Fix potential missing datastore metrics in vSphere plugin (#4968)
This commit is contained in:
parent
0e07bbb877
commit
2d782fbaac
|
@ -159,7 +159,7 @@ vm_metric_exclude = [ "*" ]
|
||||||
# object_discovery_interval = "300s"
|
# object_discovery_interval = "300s"
|
||||||
|
|
||||||
## timeout applies to any of the api request made to vcenter
|
## timeout applies to any of the api request made to vcenter
|
||||||
# timeout = "20s"
|
# timeout = "60s"
|
||||||
|
|
||||||
## Optional SSL Config
|
## Optional SSL Config
|
||||||
# ssl_ca = "/path/to/cafile"
|
# ssl_ca = "/path/to/cafile"
|
||||||
|
|
|
@ -5,10 +5,13 @@ import (
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"log"
|
"log"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/vmware/govmomi"
|
"github.com/vmware/govmomi"
|
||||||
|
"github.com/vmware/govmomi/object"
|
||||||
"github.com/vmware/govmomi/performance"
|
"github.com/vmware/govmomi/performance"
|
||||||
"github.com/vmware/govmomi/session"
|
"github.com/vmware/govmomi/session"
|
||||||
"github.com/vmware/govmomi/view"
|
"github.com/vmware/govmomi/view"
|
||||||
|
@ -17,6 +20,10 @@ import (
|
||||||
"github.com/vmware/govmomi/vim25/soap"
|
"github.com/vmware/govmomi/vim25/soap"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// The highest number of metrics we can query for, no matter what settings
|
||||||
|
// and server say.
|
||||||
|
const absoluteMaxMetrics = 10000
|
||||||
|
|
||||||
// ClientFactory is used to obtain Clients to be used throughout the plugin. Typically,
|
// ClientFactory is used to obtain Clients to be used throughout the plugin. Typically,
|
||||||
// a single Client is reused across all functions and goroutines, but the client
|
// a single Client is reused across all functions and goroutines, but the client
|
||||||
// is periodically recycled to avoid authentication expiration issues.
|
// is periodically recycled to avoid authentication expiration issues.
|
||||||
|
@ -79,6 +86,8 @@ func (cf *ClientFactory) GetClient(ctx context.Context) (*Client, error) {
|
||||||
// NewClient creates a new vSphere client based on the url and setting passed as parameters.
|
// NewClient creates a new vSphere client based on the url and setting passed as parameters.
|
||||||
func NewClient(ctx context.Context, u *url.URL, vs *VSphere) (*Client, error) {
|
func NewClient(ctx context.Context, u *url.URL, vs *VSphere) (*Client, error) {
|
||||||
sw := NewStopwatch("connect", u.Host)
|
sw := NewStopwatch("connect", u.Host)
|
||||||
|
defer sw.Stop()
|
||||||
|
|
||||||
tlsCfg, err := vs.ClientConfig.TLSConfig()
|
tlsCfg, err := vs.ClientConfig.TLSConfig()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -147,16 +156,27 @@ func NewClient(ctx context.Context, u *url.URL, vs *VSphere) (*Client, error) {
|
||||||
|
|
||||||
p := performance.NewManager(c.Client)
|
p := performance.NewManager(c.Client)
|
||||||
|
|
||||||
sw.Stop()
|
client := &Client{
|
||||||
|
|
||||||
return &Client{
|
|
||||||
Client: c,
|
Client: c,
|
||||||
Views: m,
|
Views: m,
|
||||||
Root: v,
|
Root: v,
|
||||||
Perf: p,
|
Perf: p,
|
||||||
Valid: true,
|
Valid: true,
|
||||||
Timeout: vs.Timeout.Duration,
|
Timeout: vs.Timeout.Duration,
|
||||||
}, nil
|
}
|
||||||
|
// Adjust max query size if needed
|
||||||
|
ctx3, cancel3 := context.WithTimeout(ctx, vs.Timeout.Duration)
|
||||||
|
defer cancel3()
|
||||||
|
n, err := client.GetMaxQueryMetrics(ctx3)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
log.Printf("D! [input.vsphere] vCenter says max_query_metrics should be %d", n)
|
||||||
|
if n < vs.MaxQueryMetrics {
|
||||||
|
log.Printf("W! [input.vsphere] Configured max_query_metrics is %d, but server limits it to %d. Reducing.", vs.MaxQueryMetrics, n)
|
||||||
|
vs.MaxQueryMetrics = n
|
||||||
|
}
|
||||||
|
return client, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close shuts down a ClientFactory and releases any resources associated with it.
|
// Close shuts down a ClientFactory and releases any resources associated with it.
|
||||||
|
@ -191,3 +211,47 @@ func (c *Client) GetServerTime(ctx context.Context) (time.Time, error) {
|
||||||
}
|
}
|
||||||
return *t, nil
|
return *t, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetMaxQueryMetrics returns the max_query_metrics setting as configured in vCenter
|
||||||
|
func (c *Client) GetMaxQueryMetrics(ctx context.Context) (int, error) {
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, c.Timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
om := object.NewOptionManager(c.Client.Client, *c.Client.Client.ServiceContent.Setting)
|
||||||
|
res, err := om.Query(ctx, "config.vpxd.stats.maxQueryMetrics")
|
||||||
|
if err == nil {
|
||||||
|
if len(res) > 0 {
|
||||||
|
if s, ok := res[0].GetOptionValue().Value.(string); ok {
|
||||||
|
v, err := strconv.Atoi(s)
|
||||||
|
if err == nil {
|
||||||
|
log.Printf("D! [input.vsphere] vCenter maxQueryMetrics is defined: %d", v)
|
||||||
|
if v == -1 {
|
||||||
|
// Whatever the server says, we never ask for more metrics than this.
|
||||||
|
return absoluteMaxMetrics, nil
|
||||||
|
}
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Fall through version-based inference if value isn't usable
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.Println("I! [input.vsphere] Option query for maxQueryMetrics failed. Using default")
|
||||||
|
}
|
||||||
|
|
||||||
|
// No usable maxQueryMetrics setting. Infer based on version
|
||||||
|
ver := c.Client.Client.ServiceContent.About.Version
|
||||||
|
parts := strings.Split(ver, ".")
|
||||||
|
if len(parts) < 2 {
|
||||||
|
log.Printf("W! [input.vsphere] vCenter returned an invalid version string: %s. Using default query size=64", ver)
|
||||||
|
return 64, nil
|
||||||
|
}
|
||||||
|
log.Printf("D! [input.vsphere] vCenter version is: %s", ver)
|
||||||
|
major, err := strconv.Atoi(parts[0])
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if major < 6 || major == 6 && parts[1] == "0" {
|
||||||
|
return 64, nil
|
||||||
|
}
|
||||||
|
return 256, nil
|
||||||
|
}
|
||||||
|
|
|
@ -24,6 +24,8 @@ import (
|
||||||
|
|
||||||
var isolateLUN = regexp.MustCompile(".*/([^/]+)/?$")
|
var isolateLUN = regexp.MustCompile(".*/([^/]+)/?$")
|
||||||
|
|
||||||
|
const metricLookback = 3
|
||||||
|
|
||||||
// Endpoint is a high-level representation of a connected vCenter endpoint. It is backed by the lower
|
// Endpoint is a high-level representation of a connected vCenter endpoint. It is backed by the lower
|
||||||
// level Client type.
|
// level Client type.
|
||||||
type Endpoint struct {
|
type Endpoint struct {
|
||||||
|
@ -32,6 +34,7 @@ type Endpoint struct {
|
||||||
lastColls map[string]time.Time
|
lastColls map[string]time.Time
|
||||||
instanceInfo map[string]resourceInfo
|
instanceInfo map[string]resourceInfo
|
||||||
resourceKinds map[string]resourceKind
|
resourceKinds map[string]resourceKind
|
||||||
|
hwMarks *TSCache
|
||||||
lun2ds map[string]string
|
lun2ds map[string]string
|
||||||
discoveryTicker *time.Ticker
|
discoveryTicker *time.Ticker
|
||||||
collectMux sync.RWMutex
|
collectMux sync.RWMutex
|
||||||
|
@ -96,6 +99,7 @@ func NewEndpoint(ctx context.Context, parent *VSphere, url *url.URL) (*Endpoint,
|
||||||
URL: url,
|
URL: url,
|
||||||
Parent: parent,
|
Parent: parent,
|
||||||
lastColls: make(map[string]time.Time),
|
lastColls: make(map[string]time.Time),
|
||||||
|
hwMarks: NewTSCache(1 * time.Hour),
|
||||||
instanceInfo: make(map[string]resourceInfo),
|
instanceInfo: make(map[string]resourceInfo),
|
||||||
lun2ds: make(map[string]string),
|
lun2ds: make(map[string]string),
|
||||||
initialized: false,
|
initialized: false,
|
||||||
|
@ -353,8 +357,8 @@ func (e *Endpoint) discover(ctx context.Context) error {
|
||||||
// Populate resource objects, and endpoint instance info.
|
// Populate resource objects, and endpoint instance info.
|
||||||
for k, res := range e.resourceKinds {
|
for k, res := range e.resourceKinds {
|
||||||
log.Printf("D! [input.vsphere] Discovering resources for %s", res.name)
|
log.Printf("D! [input.vsphere] Discovering resources for %s", res.name)
|
||||||
// Need to do this for all resource types even if they are not enabled (but datastore)
|
// Need to do this for all resource types even if they are not enabled
|
||||||
if res.enabled || (k != "datastore" && k != "vm") {
|
if res.enabled || k != "vm" {
|
||||||
objects, err := res.getObjects(ctx, e, client.Root)
|
objects, err := res.getObjects(ctx, e, client.Root)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -416,7 +420,6 @@ func (e *Endpoint) discover(ctx context.Context) error {
|
||||||
url := ds.altID
|
url := ds.altID
|
||||||
m := isolateLUN.FindStringSubmatch(url)
|
m := isolateLUN.FindStringSubmatch(url)
|
||||||
if m != nil {
|
if m != nil {
|
||||||
log.Printf("D! [input.vsphere]: LUN: %s", m[1])
|
|
||||||
l2d[m[1]] = ds.name
|
l2d[m[1]] = ds.name
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -539,7 +542,6 @@ func getDatastores(ctx context.Context, e *Endpoint, root *view.ContainerView) (
|
||||||
url = info.Url
|
url = info.Url
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.Printf("D! [input.vsphere]: DS URL: %s %s", url, r.Name)
|
|
||||||
m[r.ExtensibleManagedObject.Reference().Value] = objectRef{
|
m[r.ExtensibleManagedObject.Reference().Value] = objectRef{
|
||||||
name: r.Name, ref: r.ExtensibleManagedObject.Reference(), parentRef: r.Parent, altID: url}
|
name: r.Name, ref: r.ExtensibleManagedObject.Reference(), parentRef: r.Parent, altID: url}
|
||||||
}
|
}
|
||||||
|
@ -584,10 +586,24 @@ func (e *Endpoint) Collect(ctx context.Context, acc telegraf.Accumulator) error
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Purge old timestamps from the cache
|
||||||
|
e.hwMarks.Purge()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, now time.Time, latest time.Time) {
|
func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, now time.Time, latest time.Time) {
|
||||||
|
maxMetrics := e.Parent.MaxQueryMetrics
|
||||||
|
if maxMetrics < 1 {
|
||||||
|
maxMetrics = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Workaround for vCenter weirdness. Cluster metrics seem to count multiple times
|
||||||
|
// when checking query size, so keep it at a low value.
|
||||||
|
// Revisit this when we better understand the reason why vCenter counts it this way!
|
||||||
|
if res.name == "cluster" && maxMetrics > 10 {
|
||||||
|
maxMetrics = 10
|
||||||
|
}
|
||||||
pqs := make([]types.PerfQuerySpec, 0, e.Parent.MaxQueryObjects)
|
pqs := make([]types.PerfQuerySpec, 0, e.Parent.MaxQueryObjects)
|
||||||
metrics := 0
|
metrics := 0
|
||||||
total := 0
|
total := 0
|
||||||
|
@ -600,7 +616,7 @@ func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, n
|
||||||
mr := len(info.metrics)
|
mr := len(info.metrics)
|
||||||
for mr > 0 {
|
for mr > 0 {
|
||||||
mc := mr
|
mc := mr
|
||||||
headroom := e.Parent.MaxQueryMetrics - metrics
|
headroom := maxMetrics - metrics
|
||||||
if !res.realTime && mc > headroom { // Metric query limit only applies to non-realtime metrics
|
if !res.realTime && mc > headroom { // Metric query limit only applies to non-realtime metrics
|
||||||
mc = headroom
|
mc = headroom
|
||||||
}
|
}
|
||||||
|
@ -610,10 +626,19 @@ func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, n
|
||||||
MaxSample: 1,
|
MaxSample: 1,
|
||||||
MetricId: info.metrics[fm : fm+mc],
|
MetricId: info.metrics[fm : fm+mc],
|
||||||
IntervalId: res.sampling,
|
IntervalId: res.sampling,
|
||||||
|
Format: "normal",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For non-realtime metrics, we need to look back a few samples in case
|
||||||
|
// the vCenter is late reporting metrics.
|
||||||
if !res.realTime {
|
if !res.realTime {
|
||||||
pq.StartTime = &latest
|
pq.MaxSample = metricLookback
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look back 3 sampling periods
|
||||||
|
start := latest.Add(time.Duration(-res.sampling) * time.Second * (metricLookback - 1))
|
||||||
|
if !res.realTime {
|
||||||
|
pq.StartTime = &start
|
||||||
pq.EndTime = &now
|
pq.EndTime = &now
|
||||||
}
|
}
|
||||||
pqs = append(pqs, pq)
|
pqs = append(pqs, pq)
|
||||||
|
@ -623,8 +648,8 @@ func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, n
|
||||||
// We need to dump the current chunk of metrics for one of two reasons:
|
// We need to dump the current chunk of metrics for one of two reasons:
|
||||||
// 1) We filled up the metric quota while processing the current resource
|
// 1) We filled up the metric quota while processing the current resource
|
||||||
// 2) We are at the last resource and have no more data to process.
|
// 2) We are at the last resource and have no more data to process.
|
||||||
if mr > 0 || (!res.realTime && metrics >= e.Parent.MaxQueryMetrics) || nRes >= e.Parent.MaxQueryObjects {
|
if mr > 0 || (!res.realTime && metrics >= maxMetrics) || nRes >= e.Parent.MaxQueryObjects {
|
||||||
log.Printf("D! [input.vsphere]: Querying %d objects, %d metrics (%d remaining) of type %s for %s. Processed objects: %d. Total objects %d",
|
log.Printf("D! [input.vsphere]: Queueing query: %d objects, %d metrics (%d remaining) of type %s for %s. Processed objects: %d. Total objects %d",
|
||||||
len(pqs), metrics, mr, res.name, e.URL.Host, total+1, len(res.objects))
|
len(pqs), metrics, mr, res.name, e.URL.Host, total+1, len(res.objects))
|
||||||
|
|
||||||
// To prevent deadlocks, don't send work items if the context has been cancelled.
|
// To prevent deadlocks, don't send work items if the context has been cancelled.
|
||||||
|
@ -646,6 +671,8 @@ func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, n
|
||||||
//
|
//
|
||||||
if len(pqs) > 0 {
|
if len(pqs) > 0 {
|
||||||
// Call push function
|
// Call push function
|
||||||
|
log.Printf("D! [input.vsphere]: Queuing query: %d objects, %d metrics (0 remaining) of type %s for %s. Total objects %d (final chunk)",
|
||||||
|
len(pqs), metrics, res.name, e.URL.Host, len(res.objects))
|
||||||
f(ctx, pqs)
|
f(ctx, pqs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -668,7 +695,7 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, acc
|
||||||
log.Printf("D! [input.vsphere]: Latest: %s, elapsed: %f, resource: %s", latest, elapsed, resourceType)
|
log.Printf("D! [input.vsphere]: Latest: %s, elapsed: %f, resource: %s", latest, elapsed, resourceType)
|
||||||
if !res.realTime && elapsed < float64(res.sampling) {
|
if !res.realTime && elapsed < float64(res.sampling) {
|
||||||
// No new data would be available. We're outta herE! [input.vsphere]:
|
// No new data would be available. We're outta herE! [input.vsphere]:
|
||||||
log.Printf("D! [input.vsphere]: Sampling period for %s of %d has not elapsed for %s",
|
log.Printf("D! [input.vsphere]: Sampling period for %s of %d has not elapsed on %s",
|
||||||
resourceType, res.sampling, e.URL.Host)
|
resourceType, res.sampling, e.URL.Host)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -679,7 +706,6 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, acc
|
||||||
internalTags := map[string]string{"resourcetype": resourceType}
|
internalTags := map[string]string{"resourcetype": resourceType}
|
||||||
sw := NewStopwatchWithTags("gather_duration", e.URL.Host, internalTags)
|
sw := NewStopwatchWithTags("gather_duration", e.URL.Host, internalTags)
|
||||||
|
|
||||||
log.Printf("D! [input.vsphere]: Start of sample period deemed to be %s", latest)
|
|
||||||
log.Printf("D! [input.vsphere]: Collecting metrics for %d objects of type %s for %s",
|
log.Printf("D! [input.vsphere]: Collecting metrics for %d objects of type %s for %s",
|
||||||
len(res.objects), resourceType, e.URL.Host)
|
len(res.objects), resourceType, e.URL.Host)
|
||||||
|
|
||||||
|
@ -690,7 +716,7 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, acc
|
||||||
wp.Run(ctx, func(ctx context.Context, in interface{}) interface{} {
|
wp.Run(ctx, func(ctx context.Context, in interface{}) interface{} {
|
||||||
chunk := in.([]types.PerfQuerySpec)
|
chunk := in.([]types.PerfQuerySpec)
|
||||||
n, err := e.collectChunk(ctx, chunk, resourceType, res, acc)
|
n, err := e.collectChunk(ctx, chunk, resourceType, res, acc)
|
||||||
log.Printf("D! [input.vsphere]: Query returned %d metrics", n)
|
log.Printf("D! [input.vsphere] CollectChunk for %s returned %d metrics", resourceType, n)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -722,7 +748,7 @@ func (e *Endpoint) collectResource(ctx context.Context, resourceType string, acc
|
||||||
sw.Stop()
|
sw.Stop()
|
||||||
SendInternalCounterWithTags("gather_count", e.URL.Host, internalTags, count)
|
SendInternalCounterWithTags("gather_count", e.URL.Host, internalTags, count)
|
||||||
if len(merr) > 0 {
|
if len(merr) > 0 {
|
||||||
return err
|
return merr
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -757,6 +783,7 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs []types.PerfQuerySpec,
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return count, err
|
return count, err
|
||||||
}
|
}
|
||||||
|
log.Printf("D! [input.vsphere] Query for %s returned metrics for %d objects", resourceType, len(ems))
|
||||||
|
|
||||||
// Iterate through results
|
// Iterate through results
|
||||||
for _, em := range ems {
|
for _, em := range ems {
|
||||||
|
@ -783,10 +810,18 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs []types.PerfQuerySpec,
|
||||||
}
|
}
|
||||||
e.populateTags(&objectRef, resourceType, &res, t, &v)
|
e.populateTags(&objectRef, resourceType, &res, t, &v)
|
||||||
|
|
||||||
// Now deal with the values
|
// Now deal with the values. Iterate backwards so we start with the latest value
|
||||||
for idx, value := range v.Value {
|
tsKey := moid + "|" + name + "|" + v.Instance
|
||||||
|
for idx := len(v.Value) - 1; idx >= 0; idx-- {
|
||||||
ts := em.SampleInfo[idx].Timestamp
|
ts := em.SampleInfo[idx].Timestamp
|
||||||
|
|
||||||
|
// Since non-realtime metrics are queries with a lookback, we need to check the high-water mark
|
||||||
|
// to determine if this should be included. Only samples not seen before should be included.
|
||||||
|
if !(res.realTime || e.hwMarks.IsNew(tsKey, ts)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value := v.Value[idx]
|
||||||
|
|
||||||
// Organize the metrics into a bucket per measurement.
|
// Organize the metrics into a bucket per measurement.
|
||||||
// Data SHOULD be presented to us with the same timestamp for all samples, but in case
|
// Data SHOULD be presented to us with the same timestamp for all samples, but in case
|
||||||
// they don't we use the measurement name + timestamp as the key for the bucket.
|
// they don't we use the measurement name + timestamp as the key for the bucket.
|
||||||
|
@ -813,6 +848,11 @@ func (e *Endpoint) collectChunk(ctx context.Context, pqs []types.PerfQuerySpec,
|
||||||
bucket.fields[fn] = value
|
bucket.fields[fn] = value
|
||||||
}
|
}
|
||||||
count++
|
count++
|
||||||
|
|
||||||
|
// Update highwater marks for non-realtime metrics.
|
||||||
|
if !res.realTime {
|
||||||
|
e.hwMarks.Put(tsKey, ts)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We've iterated through all the metrics and collected buckets for each
|
// We've iterated through all the metrics and collected buckets for each
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
package vsphere
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TSCache is a cache of timestamps used to determine the validity of datapoints
|
||||||
|
type TSCache struct {
|
||||||
|
ttl time.Duration
|
||||||
|
table map[string]time.Time
|
||||||
|
done chan struct{}
|
||||||
|
mux sync.RWMutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTSCache creates a new TSCache with a specified time-to-live after which timestamps are discarded.
|
||||||
|
func NewTSCache(ttl time.Duration) *TSCache {
|
||||||
|
return &TSCache{
|
||||||
|
ttl: ttl,
|
||||||
|
table: make(map[string]time.Time),
|
||||||
|
done: make(chan struct{}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Purge removes timestamps that are older than the time-to-live
|
||||||
|
func (t *TSCache) Purge() {
|
||||||
|
t.mux.Lock()
|
||||||
|
defer t.mux.Unlock()
|
||||||
|
n := 0
|
||||||
|
for k, v := range t.table {
|
||||||
|
if time.Now().Sub(v) > t.ttl {
|
||||||
|
delete(t.table, k)
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Printf("D! [input.vsphere] Purged timestamp cache. %d deleted with %d remaining", n, len(t.table))
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsNew returns true if the supplied timestamp for the supplied key is more recent than the
|
||||||
|
// timestamp we have on record.
|
||||||
|
func (t *TSCache) IsNew(key string, tm time.Time) bool {
|
||||||
|
t.mux.RLock()
|
||||||
|
defer t.mux.RUnlock()
|
||||||
|
v, ok := t.table[key]
|
||||||
|
if !ok {
|
||||||
|
return true // We've never seen this before, so consider everything a new sample
|
||||||
|
}
|
||||||
|
return !tm.Before(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put updates the latest timestamp for the supplied key.
|
||||||
|
func (t *TSCache) Put(key string, time time.Time) {
|
||||||
|
t.mux.Lock()
|
||||||
|
defer t.mux.Unlock()
|
||||||
|
t.table[key] = time
|
||||||
|
}
|
|
@ -192,7 +192,7 @@ var sampleConfig = `
|
||||||
# object_discovery_interval = "300s"
|
# object_discovery_interval = "300s"
|
||||||
|
|
||||||
## timeout applies to any of the api request made to vcenter
|
## timeout applies to any of the api request made to vcenter
|
||||||
# timeout = "20s"
|
# timeout = "60s"
|
||||||
|
|
||||||
## Optional SSL Config
|
## Optional SSL Config
|
||||||
# ssl_ca = "/path/to/cafile"
|
# ssl_ca = "/path/to/cafile"
|
||||||
|
@ -260,6 +260,7 @@ func (v *VSphere) Stop() {
|
||||||
// Gather is the main data collection function called by the Telegraf core. It performs all
|
// Gather is the main data collection function called by the Telegraf core. It performs all
|
||||||
// the data collection and writes all metrics into the Accumulator passed as an argument.
|
// the data collection and writes all metrics into the Accumulator passed as an argument.
|
||||||
func (v *VSphere) Gather(acc telegraf.Accumulator) error {
|
func (v *VSphere) Gather(acc telegraf.Accumulator) error {
|
||||||
|
merr := make(multiError, 0)
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
for _, ep := range v.endpoints {
|
for _, ep := range v.endpoints {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
|
@ -273,11 +274,15 @@ func (v *VSphere) Gather(acc telegraf.Accumulator) error {
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
acc.AddError(err)
|
acc.AddError(err)
|
||||||
|
merr = append(merr, err)
|
||||||
}
|
}
|
||||||
}(ep)
|
}(ep)
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
if len(merr) > 0 {
|
||||||
|
return merr
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,7 +311,7 @@ func init() {
|
||||||
DiscoverConcurrency: 1,
|
DiscoverConcurrency: 1,
|
||||||
ForceDiscoverOnInit: false,
|
ForceDiscoverOnInit: false,
|
||||||
ObjectDiscoveryInterval: internal.Duration{Duration: time.Second * 300},
|
ObjectDiscoveryInterval: internal.Duration{Duration: time.Second * 300},
|
||||||
Timeout: internal.Duration{Duration: time.Second * 20},
|
Timeout: internal.Duration{Duration: time.Second * 60},
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,7 +15,9 @@ import (
|
||||||
"github.com/influxdata/telegraf/testutil"
|
"github.com/influxdata/telegraf/testutil"
|
||||||
"github.com/influxdata/toml"
|
"github.com/influxdata/toml"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
"github.com/vmware/govmomi/object"
|
||||||
"github.com/vmware/govmomi/simulator"
|
"github.com/vmware/govmomi/simulator"
|
||||||
|
"github.com/vmware/govmomi/vim25/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
var configHeader = `
|
var configHeader = `
|
||||||
|
@ -187,8 +189,6 @@ func createSim() (*simulator.Model, *simulator.Server, error) {
|
||||||
model.Service.TLS = new(tls.Config)
|
model.Service.TLS = new(tls.Config)
|
||||||
|
|
||||||
s := model.Service.NewServer()
|
s := model.Service.NewServer()
|
||||||
//fmt.Printf("Server created at: %s\n", s.URL)
|
|
||||||
|
|
||||||
return model, s, nil
|
return model, s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -244,13 +244,51 @@ func TestTimeout(t *testing.T) {
|
||||||
v.Timeout = internal.Duration{Duration: 1 * time.Nanosecond}
|
v.Timeout = internal.Duration{Duration: 1 * time.Nanosecond}
|
||||||
require.NoError(t, v.Start(nil)) // We're not using the Accumulator, so it can be nil.
|
require.NoError(t, v.Start(nil)) // We're not using the Accumulator, so it can be nil.
|
||||||
defer v.Stop()
|
defer v.Stop()
|
||||||
require.NoError(t, v.Gather(&acc))
|
err = v.Gather(&acc)
|
||||||
|
require.NotNil(t, err, "Error should not be nil here")
|
||||||
|
|
||||||
// The accumulator must contain exactly one error and it must be a deadline exceeded.
|
// The accumulator must contain exactly one error and it must be a deadline exceeded.
|
||||||
require.Equal(t, 1, len(acc.Errors))
|
require.Equal(t, 1, len(acc.Errors))
|
||||||
require.True(t, strings.Contains(acc.Errors[0].Error(), "context deadline exceeded"))
|
require.True(t, strings.Contains(acc.Errors[0].Error(), "context deadline exceeded"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMaxQuery(t *testing.T) {
|
||||||
|
m, s, err := createSim()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer m.Remove()
|
||||||
|
defer s.Close()
|
||||||
|
|
||||||
|
v := defaultVSphere()
|
||||||
|
v.MaxQueryMetrics = 256
|
||||||
|
ctx := context.Background()
|
||||||
|
c, err := NewClient(ctx, s.URL, v)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
require.Equal(t, 256, v.MaxQueryMetrics)
|
||||||
|
|
||||||
|
om := object.NewOptionManager(c.Client.Client, *c.Client.Client.ServiceContent.Setting)
|
||||||
|
err = om.Update(ctx, []types.BaseOptionValue{&types.OptionValue{
|
||||||
|
Key: "config.vpxd.stats.maxQueryMetrics",
|
||||||
|
Value: "42",
|
||||||
|
}})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
v.MaxQueryMetrics = 256
|
||||||
|
ctx = context.Background()
|
||||||
|
c2, err := NewClient(ctx, s.URL, v)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
require.Equal(t, 42, v.MaxQueryMetrics)
|
||||||
|
c.close()
|
||||||
|
c2.close()
|
||||||
|
}
|
||||||
|
|
||||||
func TestAll(t *testing.T) {
|
func TestAll(t *testing.T) {
|
||||||
m, s, err := createSim()
|
m, s, err := createSim()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
Loading…
Reference in New Issue