From 5f3c331f79924514ad83f6f8ed4f44250c38b877 Mon Sep 17 00:00:00 2001 From: Pontus Rydin Date: Tue, 11 Sep 2018 17:53:46 -0400 Subject: [PATCH] Add input plugin for VMware vSphere (#4141) --- Gopkg.lock | 45 ++ Gopkg.toml | 4 + plugins/inputs/all/all.go | 1 + plugins/inputs/vsphere/METRICS.MD | 287 +++++++++ plugins/inputs/vsphere/README.MD | 354 ++++++++++ plugins/inputs/vsphere/client.go | 175 +++++ plugins/inputs/vsphere/endpoint.go | 852 +++++++++++++++++++++++++ plugins/inputs/vsphere/selfhealth.go | 53 ++ plugins/inputs/vsphere/vsphere.go | 312 +++++++++ plugins/inputs/vsphere/vsphere_test.go | 246 +++++++ plugins/inputs/vsphere/workerpool.go | 119 ++++ 11 files changed, 2448 insertions(+) create mode 100644 plugins/inputs/vsphere/METRICS.MD create mode 100644 plugins/inputs/vsphere/README.MD create mode 100644 plugins/inputs/vsphere/client.go create mode 100644 plugins/inputs/vsphere/endpoint.go create mode 100644 plugins/inputs/vsphere/selfhealth.go create mode 100644 plugins/inputs/vsphere/vsphere.go create mode 100644 plugins/inputs/vsphere/vsphere_test.go create mode 100644 plugins/inputs/vsphere/workerpool.go diff --git a/Gopkg.lock b/Gopkg.lock index b592346a8..ed161e69b 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -448,6 +448,14 @@ revision = "3af367b6b30c263d47e8895973edcca9a49cf029" version = "v0.2.0" +[[projects]] + digest = "1:c1d7e883c50a26ea34019320d8ae40fad86c9e5d56e63a1ba2cb618cef43e986" + name = "github.com/google/uuid" + packages = ["."] + pruneopts = "" + revision = "064e2069ce9c359c118179501254f67d7d37ba24" + version = "0.2" + [[projects]] digest = "1:dbbeb8ddb0be949954c8157ee8439c2adfd8dc1c9510eb44a6e58cb68c3dce28" name = "github.com/gorilla/context" @@ -949,6 +957,36 @@ revision = "ce01e59abcf6fbc9833b7deb5e4b8ee1769bcc53" version = "v1.0.0" +[[projects]] + digest = "1:f9fe29bf856d49f9a51d6001588cb5ee5d65c8a7ff5e8b0dd5423c3a510f0833" + name = "github.com/vmware/govmomi" + packages = [ + ".", + "find", + "list", + "nfc", + "object", + "performance", + "property", + "session", + "simulator", + "simulator/esx", + "simulator/vpx", + "task", + "view", + "vim25", + "vim25/debug", + "vim25/methods", + "vim25/mo", + "vim25/progress", + "vim25/soap", + "vim25/types", + "vim25/xml", + ] + pruneopts = "" + revision = "e3a01f9611c32b2362366434bcd671516e78955d" + version = "v0.18.0" + [[projects]] branch = "master" digest = "1:98ed05e9796df287b90c1d96854e3913c8e349dbc546412d3cabb472ecf4b417" @@ -1329,6 +1367,13 @@ "github.com/stretchr/testify/require", "github.com/tidwall/gjson", "github.com/vjeantet/grok", + "github.com/vmware/govmomi", + "github.com/vmware/govmomi/performance", + "github.com/vmware/govmomi/simulator", + "github.com/vmware/govmomi/view", + "github.com/vmware/govmomi/vim25/mo", + "github.com/vmware/govmomi/vim25/soap", + "github.com/vmware/govmomi/vim25/types", "github.com/wvanbergen/kafka/consumergroup", "github.com/zensqlmonitor/go-mssqldb", "golang.org/x/net/context", diff --git a/Gopkg.toml b/Gopkg.toml index b4576ed6f..c89578397 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -223,6 +223,10 @@ name = "gopkg.in/fsnotify.v1" [[constraint]] + name = "github.com/vmware/govmomi" + version = "0.18.0" + + [[constraint]] name = "github.com/Azure/go-autorest" version = "10.12.0" diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 9dcb0dbd3..02008ffd5 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -128,6 +128,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/udp_listener" _ "github.com/influxdata/telegraf/plugins/inputs/unbound" _ "github.com/influxdata/telegraf/plugins/inputs/varnish" + _ "github.com/influxdata/telegraf/plugins/inputs/vsphere" _ "github.com/influxdata/telegraf/plugins/inputs/webhooks" _ "github.com/influxdata/telegraf/plugins/inputs/win_perf_counters" _ "github.com/influxdata/telegraf/plugins/inputs/win_services" diff --git a/plugins/inputs/vsphere/METRICS.MD b/plugins/inputs/vsphere/METRICS.MD new file mode 100644 index 000000000..0b9e0482f --- /dev/null +++ b/plugins/inputs/vsphere/METRICS.MD @@ -0,0 +1,287 @@ +# Common vSphere Performance Metrics +The set of performance metrics in vSphere is open ended. Metrics may be added or removed in new releases +and the set of available metrics may vary depending hardware, as well as what plugins and add-on products +are installed. Therefore, providing a definitive list of available metrics is difficult. The metrics listed +below are the most commonly available as of vSphere 6.5. + +To list the exact set in your environment, please use the govc tool available [here](https://github.com/vmware/govmomi/tree/master/govc) + +To obtain the set of metrics for e.g. a VM, you may use the following command: +``` +govc metric.ls vm/* +``` + +## Virtual Machine Metrics +``` +cpu.demandEntitlementRatio.latest +cpu.usage.average +cpu.ready.summation +cpu.run.summation +cpu.system.summation +cpu.swapwait.summation +cpu.costop.summation +cpu.demand.average +cpu.readiness.average +cpu.maxlimited.summation +cpu.wait.summation +cpu.usagemhz.average +cpu.latency.average +cpu.used.summation +cpu.overlap.summation +cpu.idle.summation +cpu.entitlement.latest +datastore.maxTotalLatency.latest +disk.usage.average +disk.read.average +disk.write.average +disk.maxTotalLatency.latest +mem.llSwapUsed.average +mem.swapin.average +mem.vmmemctltarget.average +mem.activewrite.average +mem.overhead.average +mem.vmmemctl.average +mem.zero.average +mem.swapoutRate.average +mem.active.average +mem.llSwapOutRate.average +mem.swapout.average +mem.llSwapInRate.average +mem.swapinRate.average +mem.granted.average +mem.latency.average +mem.overheadMax.average +mem.swapped.average +mem.compressionRate.average +mem.swaptarget.average +mem.shared.average +mem.zipSaved.latest +mem.overheadTouched.average +mem.zipped.latest +mem.consumed.average +mem.entitlement.average +mem.usage.average +mem.decompressionRate.average +mem.compressed.average +net.multicastRx.summation +net.transmitted.average +net.received.average +net.usage.average +net.broadcastTx.summation +net.broadcastRx.summation +net.packetsRx.summation +net.pnicBytesRx.average +net.multicastTx.summation +net.bytesTx.average +net.bytesRx.average +net.droppedRx.summation +net.pnicBytesTx.average +net.droppedTx.summation +net.packetsTx.summation +power.power.average +power.energy.summation +rescpu.runpk1.latest +rescpu.runpk15.latest +rescpu.maxLimited5.latest +rescpu.actpk5.latest +rescpu.samplePeriod.latest +rescpu.runav1.latest +rescpu.runav15.latest +rescpu.sampleCount.latest +rescpu.actpk1.latest +rescpu.runpk5.latest +rescpu.runav5.latest +rescpu.actav15.latest +rescpu.actav1.latest +rescpu.actpk15.latest +rescpu.actav5.latest +rescpu.maxLimited1.latest +rescpu.maxLimited15.latest +sys.osUptime.latest +sys.uptime.latest +sys.heartbeat.latest +virtualDisk.write.average +virtualDisk.read.average +``` + +## Host System Metrics +``` +cpu.corecount.contention.average +cpu.usage.average +cpu.reservedCapacity.average +cpu.usagemhz.minimum +cpu.usagemhz.maximum +cpu.usage.minimum +cpu.usage.maximum +cpu.capacity.provisioned.average +cpu.capacity.usage.average +cpu.capacity.demand.average +cpu.capacity.contention.average +cpu.corecount.provisioned.average +cpu.corecount.usage.average +cpu.usagemhz.average +disk.throughput.contention.average +disk.throughput.usage.average +mem.decompressionRate.average +mem.granted.average +mem.active.average +mem.shared.average +mem.zero.average +mem.swapused.average +mem.vmmemctl.average +mem.compressed.average +mem.compressionRate.average +mem.reservedCapacity.average +mem.capacity.provisioned.average +mem.capacity.usable.average +mem.capacity.usage.average +mem.capacity.entitlement.average +mem.capacity.contention.average +mem.usage.minimum +mem.overhead.minimum +mem.consumed.minimum +mem.granted.minimum +mem.active.minimum +mem.shared.minimum +mem.zero.minimum +mem.swapused.minimum +mem.consumed.average +mem.usage.maximum +mem.overhead.maximum +mem.consumed.maximum +mem.granted.maximum +mem.overhead.average +mem.shared.maximum +mem.zero.maximum +mem.swapused.maximum +mem.vmmemctl.maximum +mem.usage.average +mem.active.maximum +mem.vmmemctl.minimum +net.throughput.contention.summation +net.throughput.usage.average +net.throughput.usable.average +net.throughput.provisioned.average +power.power.average +power.powerCap.average +power.energy.summation +vmop.numShutdownGuest.latest +vmop.numPoweroff.latest +vmop.numSuspend.latest +vmop.numReset.latest +vmop.numRebootGuest.latest +vmop.numStandbyGuest.latest +vmop.numPoweron.latest +vmop.numCreate.latest +vmop.numDestroy.latest +vmop.numRegister.latest +vmop.numUnregister.latest +vmop.numReconfigure.latest +vmop.numClone.latest +vmop.numDeploy.latest +vmop.numChangeHost.latest +vmop.numChangeDS.latest +vmop.numChangeHostDS.latest +vmop.numVMotion.latest +vmop.numSVMotion.latest +vmop.numXVMotion.latest +``` + +## Cluster Metrics +``` +cpu.corecount.contention.average +cpu.usage.average +cpu.reservedCapacity.average +cpu.usagemhz.minimum +cpu.usagemhz.maximum +cpu.usage.minimum +cpu.usage.maximum +cpu.capacity.provisioned.average +cpu.capacity.usage.average +cpu.capacity.demand.average +cpu.capacity.contention.average +cpu.corecount.provisioned.average +cpu.corecount.usage.average +cpu.usagemhz.average +disk.throughput.contention.average +disk.throughput.usage.average +mem.decompressionRate.average +mem.granted.average +mem.active.average +mem.shared.average +mem.zero.average +mem.swapused.average +mem.vmmemctl.average +mem.compressed.average +mem.compressionRate.average +mem.reservedCapacity.average +mem.capacity.provisioned.average +mem.capacity.usable.average +mem.capacity.usage.average +mem.capacity.entitlement.average +mem.capacity.contention.average +mem.usage.minimum +mem.overhead.minimum +mem.consumed.minimum +mem.granted.minimum +mem.active.minimum +mem.shared.minimum +mem.zero.minimum +mem.swapused.minimum +mem.consumed.average +mem.usage.maximum +mem.overhead.maximum +mem.consumed.maximum +mem.granted.maximum +mem.overhead.average +mem.shared.maximum +mem.zero.maximum +mem.swapused.maximum +mem.vmmemctl.maximum +mem.usage.average +mem.active.maximum +mem.vmmemctl.minimum +net.throughput.contention.summation +net.throughput.usage.average +net.throughput.usable.average +net.throughput.provisioned.average +power.power.average +power.powerCap.average +power.energy.summation +vmop.numShutdownGuest.latest +vmop.numPoweroff.latest +vmop.numSuspend.latest +vmop.numReset.latest +vmop.numRebootGuest.latest +vmop.numStandbyGuest.latest +vmop.numPoweron.latest +vmop.numCreate.latest +vmop.numDestroy.latest +vmop.numRegister.latest +vmop.numUnregister.latest +vmop.numReconfigure.latest +vmop.numClone.latest +vmop.numDeploy.latest +vmop.numChangeHost.latest +vmop.numChangeDS.latest +vmop.numChangeHostDS.latest +vmop.numVMotion.latest +vmop.numSVMotion.latest +vmop.numXVMotion.latest +``` + +## Datastore Metrics +``` +datastore.numberReadAveraged.average +datastore.throughput.contention.average +datastore.throughput.usage.average +datastore.write.average +datastore.read.average +datastore.numberWriteAveraged.average +disk.used.latest +disk.provisioned.latest +disk.capacity.latest +disk.capacity.contention.average +disk.capacity.provisioned.average +disk.capacity.usage.average +``` \ No newline at end of file diff --git a/plugins/inputs/vsphere/README.MD b/plugins/inputs/vsphere/README.MD new file mode 100644 index 000000000..12332ea66 --- /dev/null +++ b/plugins/inputs/vsphere/README.MD @@ -0,0 +1,354 @@ +# VMware vSphere Input Plugin + +The VMware vSphere plugin uses the vSphere API to gather metrics from multiple vCenter servers. + +* Clusters +* Hosts +* VMs +* Data stores + +## Configuration + +NOTE: To disable collection of a specific resource type, simply exclude all metrics using the XX_metric_exclude. +For example, to disable collection of VMs, add this: +```vm_metric_exclude = [ "*" ]``` + +``` +# Read metrics from one or many vCenters +[[inputs.vsphere]] + ## List of vCenter URLs to be monitored. These three lines must be uncommented + ## and edited for the plugin to work. + vcenters = [ "https://vcenter.local/sdk" ] + username = "user@corp.local" + password = "secret" + + ## VMs + ## Typical VM metrics (if omitted or empty, all metrics are collected) + vm_metric_include = [ + "cpu.demand.average", + "cpu.idle.summation", + "cpu.latency.average", + "cpu.readiness.average", + "cpu.ready.summation", + "cpu.run.summation", + "cpu.usagemhz.average", + "cpu.used.summation", + "cpu.wait.summation", + "mem.active.average", + "mem.granted.average", + "mem.latency.average", + "mem.swapin.average", + "mem.swapinRate.average", + "mem.swapout.average", + "mem.swapoutRate.average", + "mem.usage.average", + "mem.vmmemctl.average", + "net.bytesRx.average", + "net.bytesTx.average", + "net.droppedRx.summation", + "net.droppedTx.summation", + "net.usage.average", + "power.power.average", + "virtualDisk.numberReadAveraged.average", + "virtualDisk.numberWriteAveraged.average", + "virtualDisk.read.average", + "virtualDisk.readOIO.latest", + "virtualDisk.throughput.usage.average", + "virtualDisk.totalReadLatency.average", + "virtualDisk.totalWriteLatency.average", + "virtualDisk.write.average", + "virtualDisk.writeOIO.latest", + "sys.uptime.latest", + ] + # vm_metric_exclude = [] ## Nothing is excluded by default + # vm_instances = true ## true by default + + ## Hosts + ## Typical host metrics (if omitted or empty, all metrics are collected) + host_metric_include = [ + "cpu.coreUtilization.average", + "cpu.costop.summation", + "cpu.demand.average", + "cpu.idle.summation", + "cpu.latency.average", + "cpu.readiness.average", + "cpu.ready.summation", + "cpu.swapwait.summation", + "cpu.usage.average", + "cpu.usagemhz.average", + "cpu.used.summation", + "cpu.utilization.average", + "cpu.wait.summation", + "disk.deviceReadLatency.average", + "disk.deviceWriteLatency.average", + "disk.kernelReadLatency.average", + "disk.kernelWriteLatency.average", + "disk.numberReadAveraged.average", + "disk.numberWriteAveraged.average", + "disk.read.average", + "disk.totalReadLatency.average", + "disk.totalWriteLatency.average", + "disk.write.average", + "mem.active.average", + "mem.latency.average", + "mem.state.latest", + "mem.swapin.average", + "mem.swapinRate.average", + "mem.swapout.average", + "mem.swapoutRate.average", + "mem.totalCapacity.average", + "mem.usage.average", + "mem.vmmemctl.average", + "net.bytesRx.average", + "net.bytesTx.average", + "net.droppedRx.summation", + "net.droppedTx.summation", + "net.errorsRx.summation", + "net.errorsTx.summation", + "net.usage.average", + "power.power.average", + "storageAdapter.numberReadAveraged.average", + "storageAdapter.numberWriteAveraged.average", + "storageAdapter.read.average", + "storageAdapter.write.average", + "sys.uptime.latest", + ] + # host_metric_exclude = [] ## Nothing excluded by default + # host_instances = true ## true by default + + ## Clusters + # cluster_metric_include = [] ## if omitted or empty, all metrics are collected + # cluster_metric_exclude = [] ## Nothing excluded by default + # cluster_instances = true ## true by default + + ## Datastores + # datastore_metric_include = [] ## if omitted or empty, all metrics are collected + # datastore_metric_exclude = [] ## Nothing excluded by default + # datastore_instances = false ## false by default for Datastores only + + ## Datacenters + datacenter_metric_include = [] ## if omitted or empty, all metrics are collected + datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default. + # datacenter_instances = false ## false by default for Datastores only + + ## Plugin Settings + ## separator character to use for measurement and field names (default: "_") + # separator = "_" + + ## number of objects to retreive per query for realtime resources (vms and hosts) + ## set to 64 for vCenter 5.5 and 6.0 (default: 256) + # max_query_objects = 256 + + ## number of metrics to retreive per query for non-realtime resources (clusters and datastores) + ## set to 64 for vCenter 5.5 and 6.0 (default: 256) + # max_query_metrics = 256 + + ## number of go routines to use for collection and discovery of objects and metrics + # collect_concurrency = 1 + # discover_concurrency = 1 + + ## whether or not to force discovery of new objects on initial gather call before collecting metrics + ## when true for large environments this may cause errors for time elapsed while collecting metrics + ## when false (default) the first collection cycle may result in no or limited metrics while objects are discovered + # force_discover_on_init = false + + ## the interval before (re)discovering objects subject to metrics collection (default: 300s) + # object_discovery_interval = "300s" + + ## timeout applies to any of the api request made to vcenter + # timeout = "20s" + + ## Optional SSL Config + # ssl_ca = "/path/to/cafile" + # ssl_cert = "/path/to/certfile" + # ssl_key = "/path/to/keyfile" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false +``` + +### Objects and Metrics Per Query + +Default settings for vCenter 6.5 and above is 256. Prior versions of vCenter have this set to 64. A vCenter administrator +can change this setting, which should be reflected in this plugin. See this [VMware KB article](https://kb.vmware.com/s/article/2107096) +for more information. + +### Collection and Discovery concurrency + +On large vCenter setups it may be prudent to have multiple concurrent go routines collect performance metrics +in order to avoid potential errors for time elapsed during a collection cycle. This should never be greater than 8, +though the default of 1 (no concurrency) should be sufficient for most configurations. + +## Measurements & Fields + +- Cluster Stats + - Cluster services: CPU, memory, failover + - CPU: total, usage + - Memory: consumed, total, vmmemctl + - VM operations: # changes, clone, create, deploy, destroy, power, reboot, reconfigure, register, reset, shutdown, standby, vmotion +- Host Stats: + - CPU: total, usage, cost, mhz + - Datastore: iops, latency, read/write bytes, # reads/writes + - Disk: commands, latency, kernel reads/writes, # reads/writes, queues + - Memory: total, usage, active, latency, swap, shared, vmmemctl + - Network: broadcast, bytes, dropped, errors, multicast, packets, usage + - Power: energy, usage, capacity + - Res CPU: active, max, running + - Storage Adapter: commands, latency, # reads/writes + - Storage Path: commands, latency, # reads/writes + - System Resources: cpu active, cpu max, cpu running, cpu usage, mem allocated, mem consumed, mem shared, swap + - System: uptime + - Flash Module: active VMDKs +- VM Stats: + - CPU: demand, usage, readiness, cost, mhz + - Datastore: latency, # reads/writes + - Disk: commands, latency, # reads/writes, provisioned, usage + - Memory: granted, usage, active, swap, vmmemctl + - Network: broadcast, bytes, dropped, multicast, packets, usage + - Power: energy, usage + - Res CPU: active, max, running + - System: operating system uptime, uptime + - Virtual Disk: seeks, # reads/writes, latency, load +- Datastore stats: + - Disk: Capacity, provisioned, used + +For a detailed list of commonly available metrics, please refer to [METRICS.MD](METRICS.MD) + +## Tags + +- all metrics + - vcenter (vcenter url) +- all host metrics + - cluster (vcenter cluster) +- all vm metrics + - cluster (vcenter cluster) + - esxhost (name of ESXi host) + - guest (guest operating system id) +- cpu stats for Host and VM + - cpu (cpu core - not all CPU fields will have this tag) +- datastore stats for Host and VM + - datastore (id of datastore) +- disk stats for Host and VM + - disk (name of disk) +- disk.used.capacity for Datastore + - disk (type of disk) +- net stats for Host and VM + - interface (name of network interface) +- storageAdapter stats for Host + - adapter (name of storage adapter) +- storagePath stats for Host + - path (id of storage path) +- sys.resource* stats for Host + - resource (resource type) +- vflashModule stats for Host + - module (name of flash module) +- virtualDisk stats for VM + - disk (name of virtual disk) + +## Sample output + +``` +vsphere_vm_cpu,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-35,os=Mac,source=DC0_H0_VM0,vcenter=localhost:8989,vmname=DC0_H0_VM0 run_summation=2608i,ready_summation=129i,usage_average=5.01,used_summation=2134i,demand_average=326i 1535660299000000000 +vsphere_vm_net,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-35,os=Mac,source=DC0_H0_VM0,vcenter=localhost:8989,vmname=DC0_H0_VM0 bytesRx_average=321i,bytesTx_average=335i 1535660299000000000 +vsphere_vm_virtualDisk,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-35,os=Mac,source=DC0_H0_VM0,vcenter=localhost:8989,vmname=DC0_H0_VM0 write_average=144i,read_average=4i 1535660299000000000 +vsphere_vm_net,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-38,os=Mac,source=DC0_H0_VM1,vcenter=localhost:8989,vmname=DC0_H0_VM1 bytesRx_average=242i,bytesTx_average=308i 1535660299000000000 +vsphere_vm_virtualDisk,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-38,os=Mac,source=DC0_H0_VM1,vcenter=localhost:8989,vmname=DC0_H0_VM1 write_average=232i,read_average=4i 1535660299000000000 +vsphere_vm_cpu,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-38,os=Mac,source=DC0_H0_VM1,vcenter=localhost:8989,vmname=DC0_H0_VM1 usage_average=5.49,used_summation=1804i,demand_average=308i,run_summation=2001i,ready_summation=120i 1535660299000000000 +vsphere_vm_cpu,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-41,os=Mac,source=DC0_C0_RP0_VM0,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM0 usage_average=4.19,used_summation=2108i,demand_average=285i,run_summation=1793i,ready_summation=93i 1535660299000000000 +vsphere_vm_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-41,os=Mac,source=DC0_C0_RP0_VM0,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM0 bytesRx_average=272i,bytesTx_average=419i 1535660299000000000 +vsphere_vm_virtualDisk,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-41,os=Mac,source=DC0_C0_RP0_VM0,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM0 write_average=229i,read_average=4i 1535660299000000000 +vsphere_vm_cpu,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-44,os=Mac,source=DC0_C0_RP0_VM1,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM1 run_summation=2277i,ready_summation=118i,usage_average=4.67,used_summation=2546i,demand_average=289i 1535660299000000000 +vsphere_vm_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-44,os=Mac,source=DC0_C0_RP0_VM1,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM1 bytesRx_average=243i,bytesTx_average=296i 1535660299000000000 +vsphere_vm_virtualDisk,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-44,os=Mac,source=DC0_C0_RP0_VM1,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM1 write_average=158i,read_average=4i 1535660299000000000 +vsphere_host_net,esxhostname=DC0_H0,host=host.example.com,interface=vmnic0,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 usage_average=1042i,bytesTx_average=753i,bytesRx_average=660i 1535660299000000000 +vsphere_host_cpu,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 utilization_average=10.46,usage_average=22.4,readiness_average=0.4,costop_summation=2i,coreUtilization_average=19.61,wait_summation=5148518i,idle_summation=58581i,latency_average=0.6,ready_summation=13370i,used_summation=19219i 1535660299000000000 +vsphere_host_cpu,cpu=0,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 coreUtilization_average=25.6,utilization_average=11.58,used_summation=24306i,usage_average=24.26,idle_summation=86688i 1535660299000000000 +vsphere_host_cpu,cpu=1,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 coreUtilization_average=12.29,utilization_average=8.32,used_summation=31312i,usage_average=22.47,idle_summation=94934i 1535660299000000000 +vsphere_host_disk,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 read_average=331i,write_average=2800i 1535660299000000000 +vsphere_host_disk,disk=/var/folders/rf/txwdm4pj409f70wnkdlp7sz80000gq/T/govcsim-DC0-LocalDS_0-367088371@folder-5,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 write_average=2701i,read_average=258i 1535660299000000000 +vsphere_host_mem,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 usage_average=93.27 1535660299000000000 +vsphere_host_net,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 bytesTx_average=650i,usage_average=1414i,bytesRx_average=569i 1535660299000000000 +vsphere_host_cpu,clustername=DC0_C0,cpu=1,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 utilization_average=12.6,used_summation=25775i,usage_average=24.44,idle_summation=68886i,coreUtilization_average=17.59 1535660299000000000 +vsphere_host_disk,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 read_average=340i,write_average=2340i 1535660299000000000 +vsphere_host_disk,clustername=DC0_C0,disk=/var/folders/rf/txwdm4pj409f70wnkdlp7sz80000gq/T/govcsim-DC0-LocalDS_0-367088371@folder-5,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 write_average=2277i,read_average=282i 1535660299000000000 +vsphere_host_mem,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 usage_average=104.78 1535660299000000000 +vsphere_host_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 bytesTx_average=463i,usage_average=1131i,bytesRx_average=719i 1535660299000000000 +vsphere_host_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,interface=vmnic0,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 usage_average=1668i,bytesTx_average=838i,bytesRx_average=921i 1535660299000000000 +vsphere_host_cpu,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 used_summation=28952i,utilization_average=11.36,idle_summation=93261i,latency_average=0.46,ready_summation=12837i,usage_average=21.56,readiness_average=0.39,costop_summation=2i,coreUtilization_average=27.19,wait_summation=3820829i 1535660299000000000 +vsphere_host_cpu,clustername=DC0_C0,cpu=0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 coreUtilization_average=24.12,utilization_average=13.83,used_summation=22462i,usage_average=24.69,idle_summation=96993i 1535660299000000000 +internal_vsphere,host=host.example.com,os=Mac,vcenter=localhost:8989 connect_ns=4727607i,discover_ns=65389011i,discovered_objects=8i 1535660309000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=datastore,vcenter=localhost:8989 gather_duration_ns=296223i,gather_count=0i 1535660309000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=vm,vcenter=192.168.1.151 gather_duration_ns=136050i,gather_count=0i 1535660309000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=host,vcenter=localhost:8989 gather_count=62i,gather_duration_ns=8788033i 1535660309000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=host,vcenter=192.168.1.151 gather_count=0i,gather_duration_ns=162002i 1535660309000000000 +internal_gather,host=host.example.com,input=vsphere,os=Mac gather_time_ns=17483653i,metrics_gathered=28i 1535660309000000000 +internal_vsphere,host=host.example.com,os=Mac,vcenter=192.168.1.151 connect_ns=0i 1535660309000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=vm,vcenter=localhost:8989 gather_duration_ns=7291897i,gather_count=36i 1535660309000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=datastore,vcenter=192.168.1.151 gather_duration_ns=958474i,gather_count=0i 1535660309000000000 +vsphere_vm_cpu,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-38,os=Mac,source=DC0_H0_VM1,vcenter=localhost:8989,vmname=DC0_H0_VM1 usage_average=8.82,used_summation=3192i,demand_average=283i,run_summation=2419i,ready_summation=115i 1535660319000000000 +vsphere_vm_net,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-38,os=Mac,source=DC0_H0_VM1,vcenter=localhost:8989,vmname=DC0_H0_VM1 bytesRx_average=277i,bytesTx_average=343i 1535660319000000000 +vsphere_vm_virtualDisk,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-38,os=Mac,source=DC0_H0_VM1,vcenter=localhost:8989,vmname=DC0_H0_VM1 read_average=1i,write_average=741i 1535660319000000000 +vsphere_vm_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-41,os=Mac,source=DC0_C0_RP0_VM0,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM0 bytesRx_average=386i,bytesTx_average=369i 1535660319000000000 +vsphere_vm_virtualDisk,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-41,os=Mac,source=DC0_C0_RP0_VM0,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM0 write_average=814i,read_average=1i 1535660319000000000 +vsphere_vm_cpu,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-41,os=Mac,source=DC0_C0_RP0_VM0,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM0 run_summation=1778i,ready_summation=111i,usage_average=7.54,used_summation=2339i,demand_average=297i 1535660319000000000 +vsphere_vm_cpu,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-44,os=Mac,source=DC0_C0_RP0_VM1,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM1 usage_average=6.98,used_summation=2125i,demand_average=211i,run_summation=2990i,ready_summation=141i 1535660319000000000 +vsphere_vm_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-44,os=Mac,source=DC0_C0_RP0_VM1,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM1 bytesRx_average=357i,bytesTx_average=268i 1535660319000000000 +vsphere_vm_virtualDisk,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-44,os=Mac,source=DC0_C0_RP0_VM1,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM1 write_average=528i,read_average=1i 1535660319000000000 +vsphere_vm_cpu,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-35,os=Mac,source=DC0_H0_VM0,vcenter=localhost:8989,vmname=DC0_H0_VM0 used_summation=2374i,demand_average=195i,run_summation=3454i,ready_summation=110i,usage_average=7.34 1535660319000000000 +vsphere_vm_net,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-35,os=Mac,source=DC0_H0_VM0,vcenter=localhost:8989,vmname=DC0_H0_VM0 bytesRx_average=308i,bytesTx_average=246i 1535660319000000000 +vsphere_vm_virtualDisk,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-35,os=Mac,source=DC0_H0_VM0,vcenter=localhost:8989,vmname=DC0_H0_VM0 write_average=1178i,read_average=1i 1535660319000000000 +vsphere_host_net,esxhostname=DC0_H0,host=host.example.com,interface=vmnic0,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 bytesRx_average=773i,usage_average=1521i,bytesTx_average=890i 1535660319000000000 +vsphere_host_cpu,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 wait_summation=3421258i,idle_summation=67994i,latency_average=0.36,usage_average=29.86,readiness_average=0.37,used_summation=25244i,costop_summation=2i,coreUtilization_average=21.94,utilization_average=17.19,ready_summation=15897i 1535660319000000000 +vsphere_host_cpu,cpu=0,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 utilization_average=11.32,used_summation=19333i,usage_average=14.29,idle_summation=92708i,coreUtilization_average=27.68 1535660319000000000 +vsphere_host_cpu,cpu=1,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 used_summation=28596i,usage_average=25.32,idle_summation=79553i,coreUtilization_average=28.01,utilization_average=11.33 1535660319000000000 +vsphere_host_disk,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 read_average=86i,write_average=1659i 1535660319000000000 +vsphere_host_disk,disk=/var/folders/rf/txwdm4pj409f70wnkdlp7sz80000gq/T/govcsim-DC0-LocalDS_0-367088371@folder-5,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 write_average=1997i,read_average=58i 1535660319000000000 +vsphere_host_mem,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 usage_average=68.45 1535660319000000000 +vsphere_host_net,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 bytesTx_average=679i,usage_average=2286i,bytesRx_average=719i 1535660319000000000 +vsphere_host_cpu,clustername=DC0_C0,cpu=1,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 utilization_average=10.52,used_summation=21693i,usage_average=23.09,idle_summation=84590i,coreUtilization_average=29.92 1535660319000000000 +vsphere_host_disk,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 read_average=113i,write_average=1236i 1535660319000000000 +vsphere_host_disk,clustername=DC0_C0,disk=/var/folders/rf/txwdm4pj409f70wnkdlp7sz80000gq/T/govcsim-DC0-LocalDS_0-367088371@folder-5,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 write_average=1708i,read_average=110i 1535660319000000000 +vsphere_host_mem,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 usage_average=111.46 1535660319000000000 +vsphere_host_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 bytesTx_average=998i,usage_average=2000i,bytesRx_average=881i 1535660319000000000 +vsphere_host_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,interface=vmnic0,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 usage_average=1683i,bytesTx_average=675i,bytesRx_average=1078i 1535660319000000000 +vsphere_host_cpu,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 used_summation=28531i,wait_summation=3139129i,utilization_average=9.99,idle_summation=98579i,latency_average=0.51,costop_summation=2i,coreUtilization_average=14.35,ready_summation=16121i,usage_average=34.19,readiness_average=0.4 1535660319000000000 +vsphere_host_cpu,clustername=DC0_C0,cpu=0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 utilization_average=12.2,used_summation=22750i,usage_average=18.84,idle_summation=99539i,coreUtilization_average=23.05 1535660319000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=host,vcenter=localhost:8989 gather_duration_ns=7076543i,gather_count=62i 1535660339000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=host,vcenter=192.168.1.151 gather_duration_ns=4051303i,gather_count=0i 1535660339000000000 +internal_gather,host=host.example.com,input=vsphere,os=Mac metrics_gathered=56i,gather_time_ns=13555029i 1535660339000000000 +internal_vsphere,host=host.example.com,os=Mac,vcenter=192.168.1.151 connect_ns=0i 1535660339000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=vm,vcenter=localhost:8989 gather_duration_ns=6335467i,gather_count=36i 1535660339000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=datastore,vcenter=192.168.1.151 gather_duration_ns=958474i,gather_count=0i 1535660339000000000 +internal_vsphere,host=host.example.com,os=Mac,vcenter=localhost:8989 discover_ns=65389011i,discovered_objects=8i,connect_ns=4727607i 1535660339000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=datastore,vcenter=localhost:8989 gather_duration_ns=296223i,gather_count=0i 1535660339000000000 +internal_vsphere,host=host.example.com,os=Mac,resourcetype=vm,vcenter=192.168.1.151 gather_count=0i,gather_duration_ns=1540920i 1535660339000000000 +vsphere_vm_virtualDisk,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-35,os=Mac,source=DC0_H0_VM0,vcenter=localhost:8989,vmname=DC0_H0_VM0 write_average=302i,read_average=11i 1535660339000000000 +vsphere_vm_cpu,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-35,os=Mac,source=DC0_H0_VM0,vcenter=localhost:8989,vmname=DC0_H0_VM0 usage_average=5.58,used_summation=2941i,demand_average=298i,run_summation=3255i,ready_summation=96i 1535660339000000000 +vsphere_vm_net,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-35,os=Mac,source=DC0_H0_VM0,vcenter=localhost:8989,vmname=DC0_H0_VM0 bytesRx_average=155i,bytesTx_average=241i 1535660339000000000 +vsphere_vm_cpu,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-38,os=Mac,source=DC0_H0_VM1,vcenter=localhost:8989,vmname=DC0_H0_VM1 usage_average=10.3,used_summation=3053i,demand_average=346i,run_summation=3289i,ready_summation=122i 1535660339000000000 +vsphere_vm_net,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-38,os=Mac,source=DC0_H0_VM1,vcenter=localhost:8989,vmname=DC0_H0_VM1 bytesRx_average=215i,bytesTx_average=275i 1535660339000000000 +vsphere_vm_virtualDisk,esxhostname=DC0_H0,guest=other,host=host.example.com,moid=vm-38,os=Mac,source=DC0_H0_VM1,vcenter=localhost:8989,vmname=DC0_H0_VM1 write_average=252i,read_average=14i 1535660339000000000 +vsphere_vm_cpu,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-41,os=Mac,source=DC0_C0_RP0_VM0,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM0 usage_average=8,used_summation=2183i,demand_average=354i,run_summation=3542i,ready_summation=128i 1535660339000000000 +vsphere_vm_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-41,os=Mac,source=DC0_C0_RP0_VM0,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM0 bytesRx_average=178i,bytesTx_average=200i 1535660339000000000 +vsphere_vm_virtualDisk,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-41,os=Mac,source=DC0_C0_RP0_VM0,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM0 write_average=283i,read_average=12i 1535660339000000000 +vsphere_vm_cpu,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-44,os=Mac,source=DC0_C0_RP0_VM1,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM1 demand_average=328i,run_summation=3481i,ready_summation=122i,usage_average=7.95,used_summation=2167i 1535660339000000000 +vsphere_vm_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-44,os=Mac,source=DC0_C0_RP0_VM1,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM1 bytesTx_average=282i,bytesRx_average=196i 1535660339000000000 +vsphere_vm_virtualDisk,clustername=DC0_C0,esxhostname=DC0_C0_H0,guest=other,host=host.example.com,moid=vm-44,os=Mac,source=DC0_C0_RP0_VM1,vcenter=localhost:8989,vmname=DC0_C0_RP0_VM1 write_average=321i,read_average=13i 1535660339000000000 +vsphere_host_disk,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 read_average=39i,write_average=2635i 1535660339000000000 +vsphere_host_disk,disk=/var/folders/rf/txwdm4pj409f70wnkdlp7sz80000gq/T/govcsim-DC0-LocalDS_0-367088371@folder-5,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 write_average=2635i,read_average=30i 1535660339000000000 +vsphere_host_mem,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 usage_average=98.5 1535660339000000000 +vsphere_host_net,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 usage_average=1887i,bytesRx_average=662i,bytesTx_average=251i 1535660339000000000 +vsphere_host_net,esxhostname=DC0_H0,host=host.example.com,interface=vmnic0,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 usage_average=1481i,bytesTx_average=899i,bytesRx_average=992i 1535660339000000000 +vsphere_host_cpu,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 used_summation=50405i,costop_summation=2i,utilization_average=17.32,latency_average=0.61,ready_summation=14843i,usage_average=27.94,coreUtilization_average=32.12,wait_summation=3058787i,idle_summation=56600i,readiness_average=0.36 1535660339000000000 +vsphere_host_cpu,cpu=0,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 coreUtilization_average=37.61,utilization_average=17.05,used_summation=38013i,usage_average=32.66,idle_summation=89575i 1535660339000000000 +vsphere_host_cpu,cpu=1,esxhostname=DC0_H0,host=host.example.com,moid=host-19,os=Mac,source=DC0_H0,vcenter=localhost:8989 coreUtilization_average=25.92,utilization_average=18.72,used_summation=39790i,usage_average=40.42,idle_summation=69457i 1535660339000000000 +vsphere_host_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,interface=vmnic0,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 usage_average=1246i,bytesTx_average=673i,bytesRx_average=781i 1535660339000000000 +vsphere_host_cpu,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 coreUtilization_average=33.8,idle_summation=77121i,ready_summation=15857i,readiness_average=0.39,used_summation=29554i,costop_summation=2i,wait_summation=4338417i,utilization_average=17.87,latency_average=0.44,usage_average=28.78 1535660339000000000 +vsphere_host_cpu,clustername=DC0_C0,cpu=0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 idle_summation=86610i,coreUtilization_average=34.36,utilization_average=19.03,used_summation=28766i,usage_average=23.72 1535660339000000000 +vsphere_host_cpu,clustername=DC0_C0,cpu=1,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 coreUtilization_average=33.15,utilization_average=16.8,used_summation=44282i,usage_average=30.08,idle_summation=93490i 1535660339000000000 +vsphere_host_disk,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 read_average=56i,write_average=1672i 1535660339000000000 +vsphere_host_disk,clustername=DC0_C0,disk=/var/folders/rf/txwdm4pj409f70wnkdlp7sz80000gq/T/govcsim-DC0-LocalDS_0-367088371@folder-5,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 write_average=2110i,read_average=48i 1535660339000000000 +vsphere_host_mem,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 usage_average=116.21 1535660339000000000 +vsphere_host_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 bytesRx_average=726i,bytesTx_average=643i,usage_average=1504i 1535660339000000000 +vsphere_host_mem,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 usage_average=116.21 1535660339000000000 +vsphere_host_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 bytesRx_average=726i,bytesTx_average=643i,usage_average=1504i 1535660339000000000 +``` diff --git a/plugins/inputs/vsphere/client.go b/plugins/inputs/vsphere/client.go new file mode 100644 index 000000000..b9547b179 --- /dev/null +++ b/plugins/inputs/vsphere/client.go @@ -0,0 +1,175 @@ +package vsphere + +import ( + "context" + "crypto/tls" + "log" + "net/url" + "sync" + + "github.com/vmware/govmomi" + "github.com/vmware/govmomi/performance" + "github.com/vmware/govmomi/session" + "github.com/vmware/govmomi/view" + "github.com/vmware/govmomi/vim25" + "github.com/vmware/govmomi/vim25/methods" + "github.com/vmware/govmomi/vim25/soap" +) + +// ClientFactory is used to obtain Clients to be used throughout the plugin. Typically, +// a single Client is reused across all functions and goroutines, but the client +// is periodically recycled to avoid authentication expiration issues. +type ClientFactory struct { + client *Client + mux sync.Mutex + url *url.URL + parent *VSphere +} + +// Client represents a connection to vSphere and is backed by a govmoni connection +type Client struct { + Client *govmomi.Client + Views *view.Manager + Root *view.ContainerView + Perf *performance.Manager + Valid bool + closeGate sync.Once +} + +// NewClientFactory creates a new ClientFactory and prepares it for use. +func NewClientFactory(ctx context.Context, url *url.URL, parent *VSphere) *ClientFactory { + return &ClientFactory{ + client: nil, + parent: parent, + url: url, + } +} + +// GetClient returns a client. The caller is responsible for calling Release() +// on the client once it's done using it. +func (cf *ClientFactory) GetClient(ctx context.Context) (*Client, error) { + cf.mux.Lock() + defer cf.mux.Unlock() + if cf.client == nil { + var err error + if cf.client, err = NewClient(cf.url, cf.parent); err != nil { + return nil, err + } + } + + // Execute a dummy call against the server to make sure the client is + // still functional. If not, try to log back in. If that doesn't work, + // we give up. + if _, err := methods.GetCurrentTime(ctx, cf.client.Client); err != nil { + log.Printf("I! [input.vsphere]: Client session seems to have time out. Reauthenticating!") + if cf.client.Client.SessionManager.Login(ctx, url.UserPassword(cf.parent.Username, cf.parent.Password)) != nil { + return nil, err + } + } + + return cf.client, nil +} + +// NewClient creates a new vSphere client based on the url and setting passed as parameters. +func NewClient(u *url.URL, vs *VSphere) (*Client, error) { + sw := NewStopwatch("connect", u.Host) + tlsCfg, err := vs.ClientConfig.TLSConfig() + if err != nil { + return nil, err + } + // Use a default TLS config if it's missing + if tlsCfg == nil { + tlsCfg = &tls.Config{} + } + if vs.Username != "" { + u.User = url.UserPassword(vs.Username, vs.Password) + } + ctx := context.Background() + + log.Printf("D! [input.vsphere]: Creating client: %s", u.Host) + soapClient := soap.NewClient(u, tlsCfg.InsecureSkipVerify) + + // Add certificate if we have it. Use it to log us in. + if tlsCfg != nil && len(tlsCfg.Certificates) > 0 { + soapClient.SetCertificate(tlsCfg.Certificates[0]) + } + + // Set up custom CA chain if specified. We need to do this before we create the vim25 client, + // since it might fail on missing CA chains otherwise. + if vs.TLSCA != "" { + if err := soapClient.SetRootCAs(vs.TLSCA); err != nil { + return nil, err + } + } + + vimClient, err := vim25.NewClient(ctx, soapClient) + if err != nil { + return nil, err + } + sm := session.NewManager(vimClient) + + // If TSLKey is specified, try to log in as an extension using a cert. + if vs.TLSKey != "" { + if err := sm.LoginExtensionByCertificate(ctx, vs.TLSKey); err != nil { + return nil, err + } + } + + // Create the govmomi client. + c := &govmomi.Client{ + Client: vimClient, + SessionManager: sm, + } + + // Only login if the URL contains user information. + if u.User != nil { + if err := c.Login(ctx, u.User); err != nil { + return nil, err + } + } + + c.Timeout = vs.Timeout.Duration + m := view.NewManager(c.Client) + + v, err := m.CreateContainerView(ctx, c.ServiceContent.RootFolder, []string{}, true) + if err != nil { + return nil, err + } + + p := performance.NewManager(c.Client) + + sw.Stop() + + return &Client{ + Client: c, + Views: m, + Root: v, + Perf: p, + Valid: true, + }, nil +} + +// Close shuts down a ClientFactory and releases any resources associated with it. +func (cf *ClientFactory) Close() { + cf.mux.Lock() + defer cf.mux.Unlock() + if cf.client != nil { + cf.client.close() + } +} + +func (c *Client) close() { + + // Use a Once to prevent us from panics stemming from trying + // to close it multiple times. + c.closeGate.Do(func() { + ctx := context.Background() + if c.Views != nil { + c.Views.Destroy(ctx) + + } + if c.Client != nil { + c.Client.Logout(ctx) + } + }) +} diff --git a/plugins/inputs/vsphere/endpoint.go b/plugins/inputs/vsphere/endpoint.go new file mode 100644 index 000000000..cad4dec00 --- /dev/null +++ b/plugins/inputs/vsphere/endpoint.go @@ -0,0 +1,852 @@ +package vsphere + +import ( + "context" + "fmt" + "log" + "net/url" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/influxdata/telegraf/filter" + + "github.com/influxdata/telegraf" + "github.com/vmware/govmomi/object" + "github.com/vmware/govmomi/performance" + "github.com/vmware/govmomi/view" + "github.com/vmware/govmomi/vim25/mo" + "github.com/vmware/govmomi/vim25/types" +) + +// Endpoint is a high-level representation of a connected vCenter endpoint. It is backed by the lower +// level Client type. +type Endpoint struct { + Parent *VSphere + URL *url.URL + lastColls map[string]time.Time + instanceInfo map[string]resourceInfo + resourceKinds map[string]resourceKind + discoveryTicker *time.Ticker + collectMux sync.RWMutex + initialized bool + clientFactory *ClientFactory + busy sync.Mutex +} + +type resourceKind struct { + name string + pKey string + parentTag string + enabled bool + realTime bool + sampling int32 + objects objectMap + filters filter.Filter + collectInstances bool + getObjects func(context.Context, *view.ContainerView) (objectMap, error) +} + +type metricEntry struct { + tags map[string]string + name string + ts time.Time + fields map[string]interface{} +} + +type objectMap map[string]objectRef + +type objectRef struct { + name string + ref types.ManagedObjectReference + parentRef *types.ManagedObjectReference //Pointer because it must be nillable + guest string + dcname string +} + +type resourceInfo struct { + name string + metrics performance.MetricList + parentRef *types.ManagedObjectReference +} + +type metricQRequest struct { + res *resourceKind + obj objectRef +} + +type metricQResponse struct { + obj objectRef + metrics *performance.MetricList +} + +type multiError []error + +// NewEndpoint returns a new connection to a vCenter based on the URL and configuration passed +// as parameters. +func NewEndpoint(ctx context.Context, parent *VSphere, url *url.URL) (*Endpoint, error) { + e := Endpoint{ + URL: url, + Parent: parent, + lastColls: make(map[string]time.Time), + instanceInfo: make(map[string]resourceInfo), + initialized: false, + clientFactory: NewClientFactory(ctx, url, parent), + } + + e.resourceKinds = map[string]resourceKind{ + "datacenter": { + name: "datacenter", + pKey: "dcname", + parentTag: "", + enabled: anythingEnabled(parent.DatacenterMetricExclude), + realTime: false, + sampling: 300, + objects: make(objectMap), + filters: newFilterOrPanic(parent.DatacenterMetricInclude, parent.DatacenterMetricExclude), + collectInstances: parent.DatacenterInstances, + getObjects: getDatacenters, + }, + "cluster": { + name: "cluster", + pKey: "clustername", + parentTag: "dcname", + enabled: anythingEnabled(parent.ClusterMetricExclude), + realTime: false, + sampling: 300, + objects: make(objectMap), + filters: newFilterOrPanic(parent.ClusterMetricInclude, parent.ClusterMetricExclude), + collectInstances: parent.ClusterInstances, + getObjects: getClusters, + }, + "host": { + name: "host", + pKey: "esxhostname", + parentTag: "clustername", + enabled: anythingEnabled(parent.HostMetricExclude), + realTime: true, + sampling: 20, + objects: make(objectMap), + filters: newFilterOrPanic(parent.HostMetricInclude, parent.HostMetricExclude), + collectInstances: parent.HostInstances, + getObjects: getHosts, + }, + "vm": { + name: "vm", + pKey: "vmname", + parentTag: "esxhostname", + enabled: anythingEnabled(parent.VMMetricExclude), + realTime: true, + sampling: 20, + objects: make(objectMap), + filters: newFilterOrPanic(parent.VMMetricInclude, parent.VMMetricExclude), + collectInstances: parent.VMInstances, + getObjects: getVMs, + }, + "datastore": { + name: "datastore", + pKey: "dsname", + enabled: anythingEnabled(parent.DatastoreMetricExclude), + realTime: false, + sampling: 300, + objects: make(objectMap), + filters: newFilterOrPanic(parent.DatastoreMetricInclude, parent.DatastoreMetricExclude), + collectInstances: parent.DatastoreInstances, + getObjects: getDatastores, + }, + } + + // Start discover and other goodness + err := e.init(ctx) + + return &e, err +} + +func (m multiError) Error() string { + switch len(m) { + case 0: + return "No error recorded. Something is wrong!" + case 1: + return m[0].Error() + default: + s := "Multiple errors detected concurrently: " + for i, e := range m { + if i != 0 { + s += ", " + } + s += e.Error() + } + return s + } +} + +func anythingEnabled(ex []string) bool { + for _, s := range ex { + if s == "*" { + return false + } + } + return true +} + +func newFilterOrPanic(include []string, exclude []string) filter.Filter { + f, err := filter.NewIncludeExcludeFilter(include, exclude) + if err != nil { + panic(fmt.Sprintf("Include/exclude filters are invalid: %s", err)) + } + return f +} + +func (e *Endpoint) startDiscovery(ctx context.Context) { + e.discoveryTicker = time.NewTicker(e.Parent.ObjectDiscoveryInterval.Duration) + go func() { + for { + select { + case <-e.discoveryTicker.C: + err := e.discover(ctx) + if err != nil && err != context.Canceled { + log.Printf("E! [input.vsphere]: Error in discovery for %s: %v", e.URL.Host, err) + } + case <-ctx.Done(): + log.Printf("D! [input.vsphere]: Exiting discovery goroutine for %s", e.URL.Host) + e.discoveryTicker.Stop() + return + } + } + }() +} + +func (e *Endpoint) initalDiscovery(ctx context.Context) { + err := e.discover(ctx) + if err != nil && err != context.Canceled { + log.Printf("E! [input.vsphere]: Error in discovery for %s: %v", e.URL.Host, err) + } + e.startDiscovery(ctx) +} + +func (e *Endpoint) init(ctx context.Context) error { + + if e.Parent.ObjectDiscoveryInterval.Duration > 0 { + + // Run an initial discovery. If force_discovery_on_init isn't set, we kick it off as a + // goroutine without waiting for it. This will probably cause us to report an empty + // dataset on the first collection, but it solves the issue of the first collection timing out. + if e.Parent.ForceDiscoverOnInit { + log.Printf("D! [input.vsphere]: Running initial discovery and waiting for it to finish") + e.initalDiscovery(ctx) + } else { + // Otherwise, just run it in the background. We'll probably have an incomplete first metric + // collection this way. + go e.initalDiscovery(ctx) + } + } + e.initialized = true + return nil +} + +func (e *Endpoint) getMetricNameMap(ctx context.Context) (map[int32]string, error) { + client, err := e.clientFactory.GetClient(ctx) + if err != nil { + return nil, err + } + + mn, err := client.Perf.CounterInfoByName(ctx) + + if err != nil { + return nil, err + } + names := make(map[int32]string) + for name, m := range mn { + names[m.Key] = name + } + return names, nil +} + +func (e *Endpoint) getMetadata(ctx context.Context, in interface{}) interface{} { + client, err := e.clientFactory.GetClient(ctx) + if err != nil { + return err + } + + rq := in.(*metricQRequest) + metrics, err := client.Perf.AvailableMetric(ctx, rq.obj.ref.Reference(), rq.res.sampling) + if err != nil && err != context.Canceled { + log.Printf("E! [input.vsphere]: Error while getting metric metadata. Discovery will be incomplete. Error: %s", err) + } + return &metricQResponse{metrics: &metrics, obj: rq.obj} +} + +func (e *Endpoint) getDatacenterName(ctx context.Context, client *Client, cache map[string]string, r types.ManagedObjectReference) string { + path := make([]string, 0) + returnVal := "" + here := r + for { + if name, ok := cache[here.Reference().String()]; ok { + // Populate cache for the entire chain of objects leading here. + returnVal = name + break + } + path = append(path, here.Reference().String()) + o := object.NewCommon(client.Client.Client, r) + var result mo.ManagedEntity + err := o.Properties(ctx, here, []string{"parent", "name"}, &result) + if err != nil { + log.Printf("W! [input.vsphere]: Error while resolving parent. Assuming no parent exists. Error: %s", err) + break + } + if result.Reference().Type == "Datacenter" { + // Populate cache for the entire chain of objects leading here. + returnVal = result.Name + break + } + if result.Parent == nil { + log.Printf("D! [input.vsphere]: No parent found for %s (ascending from %s)", here.Reference(), r.Reference()) + break + } + here = result.Parent.Reference() + } + for _, s := range path { + cache[s] = returnVal + } + return returnVal +} + +func (e *Endpoint) discover(ctx context.Context) error { + e.busy.Lock() + defer e.busy.Unlock() + if ctx.Err() != nil { + return ctx.Err() + } + + metricNames, err := e.getMetricNameMap(ctx) + if err != nil { + return err + } + + sw := NewStopwatch("discover", e.URL.Host) + + client, err := e.clientFactory.GetClient(ctx) + if err != nil { + return err + } + + log.Printf("D! [input.vsphere]: Discover new objects for %s", e.URL.Host) + + instInfo := make(map[string]resourceInfo) + resourceKinds := make(map[string]resourceKind) + dcNameCache := make(map[string]string) + + // Populate resource objects, and endpoint instance info. + for k, res := range e.resourceKinds { + log.Printf("D! [input.vsphere] Discovering resources for %s", res.name) + // Need to do this for all resource types even if they are not enabled (but datastore) + if res.enabled || (k != "datastore" && k != "vm") { + objects, err := res.getObjects(ctx, client.Root) + if err != nil { + return err + } + + // Fill in datacenter names where available (no need to do it for Datacenters) + if res.name != "Datacenter" { + for k, obj := range objects { + if obj.parentRef != nil { + obj.dcname = e.getDatacenterName(ctx, client, dcNameCache, *obj.parentRef) + objects[k] = obj + } + } + } + + // Set up a worker pool for processing metadata queries concurrently + wp := NewWorkerPool(10) + wp.Run(ctx, e.getMetadata, e.Parent.DiscoverConcurrency) + + // Fill the input channels with resources that need to be queried + // for metadata. + wp.Fill(ctx, func(ctx context.Context, f PushFunc) { + for _, obj := range objects { + f(ctx, &metricQRequest{obj: obj, res: &res}) + } + }) + + // Drain the resulting metadata and build instance infos. + wp.Drain(ctx, func(ctx context.Context, in interface{}) bool { + switch resp := in.(type) { + case *metricQResponse: + mList := make(performance.MetricList, 0) + if res.enabled { + for _, m := range *resp.metrics { + if m.Instance != "" && !res.collectInstances { + continue + } + if res.filters.Match(metricNames[m.CounterId]) { + mList = append(mList, m) + } + } + } + instInfo[resp.obj.ref.Value] = resourceInfo{name: resp.obj.name, metrics: mList, parentRef: resp.obj.parentRef} + case error: + log.Printf("W! [input.vsphere]: Error while discovering resources: %s", resp) + return false + } + return true + }) + res.objects = objects + resourceKinds[k] = res + } + } + + // Atomically swap maps + // + e.collectMux.Lock() + defer e.collectMux.Unlock() + + e.instanceInfo = instInfo + e.resourceKinds = resourceKinds + + sw.Stop() + SendInternalCounter("discovered_objects", e.URL.Host, int64(len(instInfo))) + return nil +} + +func getDatacenters(ctx context.Context, root *view.ContainerView) (objectMap, error) { + var resources []mo.Datacenter + err := root.Retrieve(ctx, []string{"Datacenter"}, []string{"name", "parent"}, &resources) + if err != nil { + return nil, err + } + m := make(objectMap, len(resources)) + for _, r := range resources { + m[r.ExtensibleManagedObject.Reference().Value] = objectRef{ + name: r.Name, ref: r.ExtensibleManagedObject.Reference(), parentRef: r.Parent, dcname: r.Name} + } + return m, nil +} + +func getClusters(ctx context.Context, root *view.ContainerView) (objectMap, error) { + var resources []mo.ClusterComputeResource + err := root.Retrieve(ctx, []string{"ClusterComputeResource"}, []string{"name", "parent"}, &resources) + if err != nil { + return nil, err + } + cache := make(map[string]*types.ManagedObjectReference) + m := make(objectMap, len(resources)) + for _, r := range resources { + // We're not interested in the immediate parent (a folder), but the data center. + p, ok := cache[r.Parent.Value] + if !ok { + o := object.NewFolder(root.Client(), *r.Parent) + var folder mo.Folder + err := o.Properties(ctx, *r.Parent, []string{"parent"}, &folder) + if err != nil { + log.Printf("W! [input.vsphere] Error while getting folder parent: %e", err) + p = nil + } else { + pp := folder.Parent.Reference() + p = &pp + cache[r.Parent.Value] = p + } + } + m[r.ExtensibleManagedObject.Reference().Value] = objectRef{ + name: r.Name, ref: r.ExtensibleManagedObject.Reference(), parentRef: p} + } + return m, nil +} + +func getHosts(ctx context.Context, root *view.ContainerView) (objectMap, error) { + var resources []mo.HostSystem + err := root.Retrieve(ctx, []string{"HostSystem"}, []string{"name", "parent"}, &resources) + if err != nil { + return nil, err + } + m := make(objectMap) + for _, r := range resources { + m[r.ExtensibleManagedObject.Reference().Value] = objectRef{ + name: r.Name, ref: r.ExtensibleManagedObject.Reference(), parentRef: r.Parent} + } + return m, nil +} + +func getVMs(ctx context.Context, root *view.ContainerView) (objectMap, error) { + var resources []mo.VirtualMachine + err := root.Retrieve(ctx, []string{"VirtualMachine"}, []string{"name", "runtime.host", "config.guestId"}, &resources) + if err != nil { + return nil, err + } + m := make(objectMap) + for _, r := range resources { + var guest string + // Sometimes Config is unknown and returns a nil pointer + // + if r.Config != nil { + guest = cleanGuestID(r.Config.GuestId) + } else { + guest = "unknown" + } + m[r.ExtensibleManagedObject.Reference().Value] = objectRef{ + name: r.Name, ref: r.ExtensibleManagedObject.Reference(), parentRef: r.Runtime.Host, guest: guest} + } + return m, nil +} + +func getDatastores(ctx context.Context, root *view.ContainerView) (objectMap, error) { + var resources []mo.Datastore + err := root.Retrieve(ctx, []string{"Datastore"}, []string{"name", "parent"}, &resources) + if err != nil { + return nil, err + } + m := make(objectMap) + for _, r := range resources { + m[r.ExtensibleManagedObject.Reference().Value] = objectRef{ + name: r.Name, ref: r.ExtensibleManagedObject.Reference(), parentRef: r.Parent} + } + return m, nil +} + +// Close shuts down an Endpoint and releases any resources associated with it. +func (e *Endpoint) Close() { + e.clientFactory.Close() +} + +// Collect runs a round of data collections as specified in the configuration. +func (e *Endpoint) Collect(ctx context.Context, acc telegraf.Accumulator) error { + // If we never managed to do a discovery, collection will be a no-op. Therefore, + // we need to check that a connection is available, or the collection will + // silently fail. + // + if _, err := e.clientFactory.GetClient(ctx); err != nil { + return err + } + + e.collectMux.RLock() + defer e.collectMux.RUnlock() + + if ctx.Err() != nil { + return ctx.Err() + } + + // If discovery interval is disabled (0), discover on each collection cycle + // + if e.Parent.ObjectDiscoveryInterval.Duration == 0 { + err := e.discover(ctx) + if err != nil { + return err + } + } + for k, res := range e.resourceKinds { + if res.enabled { + err := e.collectResource(ctx, k, acc) + if err != nil { + return err + } + } + } + return nil +} + +func (e *Endpoint) chunker(ctx context.Context, f PushFunc, res *resourceKind, now time.Time, latest time.Time) { + pqs := make([]types.PerfQuerySpec, 0, e.Parent.MaxQueryObjects) + metrics := 0 + total := 0 + nRes := 0 + for _, object := range res.objects { + info, found := e.instanceInfo[object.ref.Value] + if !found { + log.Printf("E! [input.vsphere]: Internal error: Instance info not found for MOID %s", object.ref) + } + mr := len(info.metrics) + for mr > 0 { + mc := mr + headroom := e.Parent.MaxQueryMetrics - metrics + if !res.realTime && mc > headroom { // Metric query limit only applies to non-realtime metrics + mc = headroom + } + fm := len(info.metrics) - mr + pq := types.PerfQuerySpec{ + Entity: object.ref, + MaxSample: 1, + MetricId: info.metrics[fm : fm+mc], + IntervalId: res.sampling, + } + + if !res.realTime { + pq.StartTime = &latest + pq.EndTime = &now + } + pqs = append(pqs, pq) + mr -= mc + metrics += mc + + // We need to dump the current chunk of metrics for one of two reasons: + // 1) We filled up the metric quota while processing the current resource + // 2) We are at the last resource and have no more data to process. + if mr > 0 || (!res.realTime && metrics >= e.Parent.MaxQueryMetrics) || nRes >= e.Parent.MaxQueryObjects { + log.Printf("D! [input.vsphere]: Querying %d objects, %d metrics (%d remaining) of type %s for %s. Processed objects: %d. Total objects %d", + len(pqs), metrics, mr, res.name, e.URL.Host, total+1, len(res.objects)) + + // To prevent deadlocks, don't send work items if the context has been cancelled. + if ctx.Err() == context.Canceled { + return + } + + // Call push function + f(ctx, pqs) + pqs = make([]types.PerfQuerySpec, 0, e.Parent.MaxQueryObjects) + metrics = 0 + nRes = 0 + } + } + total++ + nRes++ + } + // There may be dangling stuff in the queue. Handle them + // + if len(pqs) > 0 { + // Call push function + f(ctx, pqs) + } +} + +func (e *Endpoint) collectResource(ctx context.Context, resourceType string, acc telegraf.Accumulator) error { + + // Do we have new data yet? + res := e.resourceKinds[resourceType] + now := time.Now() + latest, hasLatest := e.lastColls[resourceType] + if hasLatest { + elapsed := time.Now().Sub(latest).Seconds() + 5.0 // Allow 5 second jitter. + log.Printf("D! [input.vsphere]: Latest: %s, elapsed: %f, resource: %s", latest, elapsed, resourceType) + if !res.realTime && elapsed < float64(res.sampling) { + // No new data would be available. We're outta herE! [input.vsphere]: + log.Printf("D! [input.vsphere]: Sampling period for %s of %d has not elapsed for %s", + resourceType, res.sampling, e.URL.Host) + return nil + } + } else { + latest = time.Now().Add(time.Duration(-res.sampling) * time.Second) + } + + internalTags := map[string]string{"resourcetype": resourceType} + sw := NewStopwatchWithTags("gather_duration", e.URL.Host, internalTags) + + log.Printf("D! [input.vsphere]: Start of sample period deemed to be %s", latest) + log.Printf("D! [input.vsphere]: Collecting metrics for %d objects of type %s for %s", + len(res.objects), resourceType, e.URL.Host) + + count := int64(0) + + // Set up a worker pool for collecting chunk metrics + wp := NewWorkerPool(10) + wp.Run(ctx, func(ctx context.Context, in interface{}) interface{} { + chunk := in.([]types.PerfQuerySpec) + n, err := e.collectChunk(ctx, chunk, resourceType, res, acc) + log.Printf("D! [input.vsphere]: Query returned %d metrics", n) + if err != nil { + return err + } + atomic.AddInt64(&count, int64(n)) + return nil + + }, e.Parent.CollectConcurrency) + + // Fill the input channel of the worker queue by running the chunking + // logic implemented in chunker() + wp.Fill(ctx, func(ctx context.Context, f PushFunc) { + e.chunker(ctx, f, &res, now, latest) + }) + + // Drain the pool. We're getting errors back. They should all be nil + var mux sync.Mutex + err := make(multiError, 0) + wp.Drain(ctx, func(ctx context.Context, in interface{}) bool { + if in != nil { + mux.Unlock() + defer mux.Unlock() + err = append(err, in.(error)) + return false + } + return true + }) + e.lastColls[resourceType] = now // Use value captured at the beginning to avoid blind spots. + + sw.Stop() + SendInternalCounterWithTags("gather_count", e.URL.Host, internalTags, count) + if len(err) > 0 { + return err + } + return nil +} + +func (e *Endpoint) collectChunk(ctx context.Context, pqs []types.PerfQuerySpec, resourceType string, + res resourceKind, acc telegraf.Accumulator) (int, error) { + count := 0 + prefix := "vsphere" + e.Parent.Separator + resourceType + + client, err := e.clientFactory.GetClient(ctx) + if err != nil { + return 0, err + } + + metricInfo, err := client.Perf.CounterInfoByName(ctx) + if err != nil { + return count, err + } + + metrics, err := client.Perf.Query(ctx, pqs) + if err != nil { + return count, err + } + + ems, err := client.Perf.ToMetricSeries(ctx, metrics) + if err != nil { + return count, err + } + + // Iterate through results + for _, em := range ems { + moid := em.Entity.Reference().Value + instInfo, found := e.instanceInfo[moid] + if !found { + log.Printf("E! [input.vsphere]: MOID %s not found in cache. Skipping! (This should not happen!)", moid) + continue + } + buckets := make(map[string]metricEntry) + for _, v := range em.Value { + name := v.Name + t := map[string]string{ + "vcenter": e.URL.Host, + "source": instInfo.name, + "moid": moid, + } + + // Populate tags + objectRef, ok := res.objects[moid] + if !ok { + log.Printf("E! [input.vsphere]: MOID %s not found in cache. Skipping", moid) + continue + } + e.populateTags(&objectRef, resourceType, &res, t, &v) + + // Now deal with the values + for idx, value := range v.Value { + ts := em.SampleInfo[idx].Timestamp + + // Organize the metrics into a bucket per measurement. + // Data SHOULD be presented to us with the same timestamp for all samples, but in case + // they don't we use the measurement name + timestamp as the key for the bucket. + mn, fn := e.makeMetricIdentifier(prefix, name) + bKey := mn + " " + v.Instance + " " + strconv.FormatInt(ts.UnixNano(), 10) + bucket, found := buckets[bKey] + if !found { + bucket = metricEntry{name: mn, ts: ts, fields: make(map[string]interface{}), tags: t} + buckets[bKey] = bucket + } + if value < 0 { + log.Printf("D! [input.vsphere]: Negative value for %s on %s. Indicates missing samples", name, objectRef.name) + continue + } + + // Percentage values must be scaled down by 100. + info, ok := metricInfo[name] + if !ok { + log.Printf("E! [input.vsphere]: Could not determine unit for %s. Skipping", name) + } + if info.UnitInfo.GetElementDescription().Key == "percent" { + bucket.fields[fn] = float64(value) / 100.0 + } else { + bucket.fields[fn] = value + } + count++ + } + } + // We've iterated through all the metrics and collected buckets for each + // measurement name. Now emit them! + for _, bucket := range buckets { + acc.AddFields(bucket.name, bucket.fields, bucket.tags, bucket.ts) + } + } + return count, nil +} + +func (e *Endpoint) getParent(obj resourceInfo) (resourceInfo, bool) { + p := obj.parentRef + if p == nil { + log.Printf("D! [input.vsphere] No parent found for %s", obj.name) + return resourceInfo{}, false + } + r, ok := e.instanceInfo[p.Value] + return r, ok +} + +func (e *Endpoint) populateTags(objectRef *objectRef, resourceType string, resource *resourceKind, t map[string]string, v *performance.MetricSeries) { + // Map name of object. + if resource.pKey != "" { + t[resource.pKey] = objectRef.name + } + + // Map parent reference + parent, found := e.instanceInfo[objectRef.parentRef.Value] + if found { + t[resource.parentTag] = parent.name + if resourceType == "vm" { + if objectRef.guest != "" { + t["guest"] = objectRef.guest + } + if c, ok := e.getParent(parent); ok { + t["clustername"] = c.name + } + } + } + + // Fill in Datacenter name + if objectRef.dcname != "" { + t["dcname"] = objectRef.dcname + } + + // Determine which point tag to map to the instance + name := v.Name + instance := "instance-total" + if v.Instance != "" { + instance = v.Instance + } + if strings.HasPrefix(name, "cpu.") { + t["cpu"] = instance + } else if strings.HasPrefix(name, "datastore.") { + t["lun"] = instance + } else if strings.HasPrefix(name, "disk.") { + t["disk"] = cleanDiskTag(instance) + } else if strings.HasPrefix(name, "net.") { + t["interface"] = instance + } else if strings.HasPrefix(name, "storageAdapter.") { + t["adapter"] = instance + } else if strings.HasPrefix(name, "storagePath.") { + t["path"] = instance + } else if strings.HasPrefix(name, "sys.resource") { + t["resource"] = instance + } else if strings.HasPrefix(name, "vflashModule.") { + t["module"] = instance + } else if strings.HasPrefix(name, "virtualDisk.") { + t["disk"] = instance + } else if v.Instance != "" { + // default + t["instance"] = v.Instance + } +} + +func (e *Endpoint) makeMetricIdentifier(prefix, metric string) (string, string) { + parts := strings.Split(metric, ".") + if len(parts) == 1 { + return prefix, parts[0] + } + return prefix + e.Parent.Separator + parts[0], strings.Join(parts[1:], e.Parent.Separator) +} + +func cleanGuestID(id string) string { + return strings.TrimSuffix(id, "Guest") +} + +func cleanDiskTag(disk string) string { + // Remove enclosing "<>" + return strings.TrimSuffix(strings.TrimPrefix(disk, "<"), ">") +} diff --git a/plugins/inputs/vsphere/selfhealth.go b/plugins/inputs/vsphere/selfhealth.go new file mode 100644 index 000000000..66069ca75 --- /dev/null +++ b/plugins/inputs/vsphere/selfhealth.go @@ -0,0 +1,53 @@ +package vsphere + +import ( + "time" + + "github.com/influxdata/telegraf/selfstat" +) + +// Stopwatch is a simple helper for recording timing information, +// such as gather times and discovery times. +type Stopwatch struct { + stat selfstat.Stat + start time.Time +} + +// NewStopwatch creates a new StopWatch and starts measuring time +// its creation. +func NewStopwatch(name, vCenter string) *Stopwatch { + return &Stopwatch{ + stat: selfstat.RegisterTiming("vsphere", name+"_ns", map[string]string{"vcenter": vCenter}), + start: time.Now(), + } +} + +// NewStopwatchWithTags creates a new StopWatch and starts measuring time +// its creation. Allows additional tags. +func NewStopwatchWithTags(name, vCenter string, tags map[string]string) *Stopwatch { + tags["vcenter"] = vCenter + return &Stopwatch{ + stat: selfstat.RegisterTiming("vsphere", name+"_ns", tags), + start: time.Now(), + } +} + +// Stop stops a Stopwatch and records the time. +func (s *Stopwatch) Stop() { + s.stat.Set(time.Since(s.start).Nanoseconds()) +} + +// SendInternalCounter is a convenience method for sending +// non-timing internal metrics. +func SendInternalCounter(name, vCenter string, value int64) { + s := selfstat.Register("vsphere", name, map[string]string{"vcenter": vCenter}) + s.Set(value) +} + +// SendInternalCounterWithTags is a convenience method for sending +// non-timing internal metrics. Allows additional tags +func SendInternalCounterWithTags(name, vCenter string, tags map[string]string, value int64) { + tags["vcenter"] = vCenter + s := selfstat.Register("vsphere", name, tags) + s.Set(value) +} diff --git a/plugins/inputs/vsphere/vsphere.go b/plugins/inputs/vsphere/vsphere.go new file mode 100644 index 000000000..26af1e8cc --- /dev/null +++ b/plugins/inputs/vsphere/vsphere.go @@ -0,0 +1,312 @@ +package vsphere + +import ( + "context" + "log" + "sync" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/internal/tls" + "github.com/influxdata/telegraf/plugins/inputs" + "github.com/vmware/govmomi/vim25/soap" +) + +// VSphere is the top level type for the vSphere input plugin. It contains all the configuration +// and a list of connected vSphere endpoints +type VSphere struct { + Vcenters []string + Username string + Password string + DatacenterInstances bool + DatacenterMetricInclude []string + DatacenterMetricExclude []string + ClusterInstances bool + ClusterMetricInclude []string + ClusterMetricExclude []string + HostInstances bool + HostMetricInclude []string + HostMetricExclude []string + VMInstances bool `toml:"vm_instances"` + VMMetricInclude []string `toml:"vm_metric_include"` + VMMetricExclude []string `toml:"vm_metric_exclude"` + DatastoreInstances bool + DatastoreMetricInclude []string + DatastoreMetricExclude []string + Separator string + + MaxQueryObjects int + MaxQueryMetrics int + CollectConcurrency int + DiscoverConcurrency int + ForceDiscoverOnInit bool + ObjectDiscoveryInterval internal.Duration + Timeout internal.Duration + + endpoints []*Endpoint + cancel context.CancelFunc + + // Mix in the TLS/SSL goodness from core + tls.ClientConfig +} + +var sampleConfig = ` + ## List of vCenter URLs to be monitored. These three lines must be uncommented + ## and edited for the plugin to work. + vcenters = [ "https://vcenter.local/sdk" ] + username = "user@corp.local" + password = "secret" + + ## VMs + ## Typical VM metrics (if omitted or empty, all metrics are collected) + vm_metric_include = [ + "cpu.demand.average", + "cpu.idle.summation", + "cpu.latency.average", + "cpu.readiness.average", + "cpu.ready.summation", + "cpu.run.summation", + "cpu.usagemhz.average", + "cpu.used.summation", + "cpu.wait.summation", + "mem.active.average", + "mem.granted.average", + "mem.latency.average", + "mem.swapin.average", + "mem.swapinRate.average", + "mem.swapout.average", + "mem.swapoutRate.average", + "mem.usage.average", + "mem.vmmemctl.average", + "net.bytesRx.average", + "net.bytesTx.average", + "net.droppedRx.summation", + "net.droppedTx.summation", + "net.usage.average", + "power.power.average", + "virtualDisk.numberReadAveraged.average", + "virtualDisk.numberWriteAveraged.average", + "virtualDisk.read.average", + "virtualDisk.readOIO.latest", + "virtualDisk.throughput.usage.average", + "virtualDisk.totalReadLatency.average", + "virtualDisk.totalWriteLatency.average", + "virtualDisk.write.average", + "virtualDisk.writeOIO.latest", + "sys.uptime.latest", + ] + # vm_metric_exclude = [] ## Nothing is excluded by default + # vm_instances = true ## true by default + + ## Hosts + ## Typical host metrics (if omitted or empty, all metrics are collected) + host_metric_include = [ + "cpu.coreUtilization.average", + "cpu.costop.summation", + "cpu.demand.average", + "cpu.idle.summation", + "cpu.latency.average", + "cpu.readiness.average", + "cpu.ready.summation", + "cpu.swapwait.summation", + "cpu.usage.average", + "cpu.usagemhz.average", + "cpu.used.summation", + "cpu.utilization.average", + "cpu.wait.summation", + "disk.deviceReadLatency.average", + "disk.deviceWriteLatency.average", + "disk.kernelReadLatency.average", + "disk.kernelWriteLatency.average", + "disk.numberReadAveraged.average", + "disk.numberWriteAveraged.average", + "disk.read.average", + "disk.totalReadLatency.average", + "disk.totalWriteLatency.average", + "disk.write.average", + "mem.active.average", + "mem.latency.average", + "mem.state.latest", + "mem.swapin.average", + "mem.swapinRate.average", + "mem.swapout.average", + "mem.swapoutRate.average", + "mem.totalCapacity.average", + "mem.usage.average", + "mem.vmmemctl.average", + "net.bytesRx.average", + "net.bytesTx.average", + "net.droppedRx.summation", + "net.droppedTx.summation", + "net.errorsRx.summation", + "net.errorsTx.summation", + "net.usage.average", + "power.power.average", + "storageAdapter.numberReadAveraged.average", + "storageAdapter.numberWriteAveraged.average", + "storageAdapter.read.average", + "storageAdapter.write.average", + "sys.uptime.latest", + ] + # host_metric_exclude = [] ## Nothing excluded by default + # host_instances = true ## true by default + + ## Clusters + # cluster_metric_include = [] ## if omitted or empty, all metrics are collected + # cluster_metric_exclude = [] ## Nothing excluded by default + # cluster_instances = true ## true by default + + ## Datastores + # datastore_metric_include = [] ## if omitted or empty, all metrics are collected + # datastore_metric_exclude = [] ## Nothing excluded by default + # datastore_instances = false ## false by default for Datastores only + + ## Datacenters + datacenter_metric_include = [] ## if omitted or empty, all metrics are collected + datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default. + # datacenter_instances = false ## false by default for Datastores only + + ## Plugin Settings + ## separator character to use for measurement and field names (default: "_") + # separator = "_" + + ## number of objects to retreive per query for realtime resources (vms and hosts) + ## set to 64 for vCenter 5.5 and 6.0 (default: 256) + # max_query_objects = 256 + + ## number of metrics to retreive per query for non-realtime resources (clusters and datastores) + ## set to 64 for vCenter 5.5 and 6.0 (default: 256) + # max_query_metrics = 256 + + ## number of go routines to use for collection and discovery of objects and metrics + # collect_concurrency = 1 + # discover_concurrency = 1 + + ## whether or not to force discovery of new objects on initial gather call before collecting metrics + ## when true for large environments this may cause errors for time elapsed while collecting metrics + ## when false (default) the first collection cycle may result in no or limited metrics while objects are discovered + # force_discover_on_init = false + + ## the interval before (re)discovering objects subject to metrics collection (default: 300s) + # object_discovery_interval = "300s" + + ## timeout applies to any of the api request made to vcenter + # timeout = "20s" + + ## Optional SSL Config + # ssl_ca = "/path/to/cafile" + # ssl_cert = "/path/to/certfile" + # ssl_key = "/path/to/keyfile" + ## Use SSL but skip chain & host verification + # insecure_skip_verify = false +` + +// SampleConfig returns a set of default configuration to be used as a boilerplate when setting up +// Telegraf. +func (v *VSphere) SampleConfig() string { + return sampleConfig +} + +// Description returns a short textual description of the plugin +func (v *VSphere) Description() string { + return "Read metrics from VMware vCenter" +} + +// Start is called from telegraf core when a plugin is started and allows it to +// perform initialization tasks. +func (v *VSphere) Start(acc telegraf.Accumulator) error { + log.Println("D! [input.vsphere]: Starting plugin") + ctx, cancel := context.WithCancel(context.Background()) + v.cancel = cancel + + // Create endpoints, one for each vCenter we're monitoring + v.endpoints = make([]*Endpoint, len(v.Vcenters)) + for i, rawURL := range v.Vcenters { + u, err := soap.ParseURL(rawURL) + if err != nil { + return err + } + ep, err := NewEndpoint(ctx, v, u) + if err != nil { + return err + } + v.endpoints[i] = ep + } + return nil +} + +// Stop is called from telegraf core when a plugin is stopped and allows it to +// perform shutdown tasks. +func (v *VSphere) Stop() { + log.Println("D! [input.vsphere]: Stopping plugin") + v.cancel() + + // Wait for all endpoints to finish. No need to wait for + // Gather() to finish here, since it Stop() will only be called + // after the last Gather() has finished. We do, however, need to + // wait for any discovery to complete by trying to grab the + // "busy" mutex. + for _, ep := range v.endpoints { + log.Printf("D! [input.vsphere]: Waiting for endpoint %s to finish", ep.URL.Host) + func() { + ep.busy.Lock() // Wait until discovery is finished + defer ep.busy.Unlock() + ep.Close() + }() + } +} + +// Gather is the main data collection function called by the Telegraf core. It performs all +// the data collection and writes all metrics into the Accumulator passed as an argument. +func (v *VSphere) Gather(acc telegraf.Accumulator) error { + var wg sync.WaitGroup + for _, ep := range v.endpoints { + wg.Add(1) + go func(endpoint *Endpoint) { + defer wg.Done() + err := endpoint.Collect(context.Background(), acc) + if err == context.Canceled { + + // No need to signal errors if we were merely canceled. + err = nil + } + if err != nil { + acc.AddError(err) + } + }(ep) + } + + wg.Wait() + return nil +} + +func init() { + inputs.Add("vsphere", func() telegraf.Input { + return &VSphere{ + Vcenters: []string{}, + + ClusterInstances: true, + ClusterMetricInclude: nil, + ClusterMetricExclude: nil, + HostInstances: true, + HostMetricInclude: nil, + HostMetricExclude: nil, + VMInstances: true, + VMMetricInclude: nil, + VMMetricExclude: nil, + DatastoreInstances: false, + DatastoreMetricInclude: nil, + DatastoreMetricExclude: nil, + Separator: "_", + + MaxQueryObjects: 256, + MaxQueryMetrics: 256, + CollectConcurrency: 1, + DiscoverConcurrency: 1, + ForceDiscoverOnInit: false, + ObjectDiscoveryInterval: internal.Duration{Duration: time.Second * 300}, + Timeout: internal.Duration{Duration: time.Second * 20}, + } + }) +} diff --git a/plugins/inputs/vsphere/vsphere_test.go b/plugins/inputs/vsphere/vsphere_test.go new file mode 100644 index 000000000..20c61d92b --- /dev/null +++ b/plugins/inputs/vsphere/vsphere_test.go @@ -0,0 +1,246 @@ +package vsphere + +import ( + "context" + "crypto/tls" + "fmt" + "regexp" + "sort" + "testing" + "time" + + "github.com/influxdata/telegraf/internal" + itls "github.com/influxdata/telegraf/internal/tls" + "github.com/influxdata/telegraf/testutil" + "github.com/influxdata/toml" + "github.com/stretchr/testify/require" + "github.com/vmware/govmomi/simulator" +) + +var configHeader = ` +# Telegraf Configuration +# +# Telegraf is entirely plugin driven. All metrics are gathered from the +# declared inputs, and sent to the declared outputs. +# +# Plugins must be declared in here to be active. +# To deactivate a plugin, comment out the name and any variables. +# +# Use 'telegraf -config telegraf.conf -test' to see what metrics a config +# file would generate. +# +# Environment variables can be used anywhere in this config file, simply prepend +# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), +# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) + + +# Global tags can be specified here in key="value" format. +[global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + # user = "$USER" + + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at most + ## metric_batch_size metrics. + ## This controls the size of writes that Telegraf sends to output plugins. + metric_batch_size = 1000 + + ## For failed writes, telegraf will cache metric_buffer_limit metrics for each + ## output, and will flush this buffer on a successful write. Oldest metrics + ## are dropped first when this buffer fills. + ## This buffer only fills when writes fail to output plugin(s). + metric_buffer_limit = 10000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Default flushing interval for all outputs. You shouldn't set this below + ## interval. Maximum flush_interval will be flush_interval + flush_jitter + flush_interval = "10s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## By default or when set to "0s", precision will be set to the same + ## timestamp order as the collection interval, with the maximum being 1s. + ## ie, when interval = "10s", precision will be "1s" + ## when interval = "250ms", precision will be "1ms" + ## Precision will NOT be used for service inputs. It is up to each individual + ## service input to set the timestamp at the appropriate precision. + ## Valid time units are "ns", "us" (or "µs"), "ms", "s". + precision = "" + + ## Logging configuration: + ## Run telegraf with debug log messages. + debug = false + ## Run telegraf in quiet mode (error log messages only). + quiet = false + ## Specify the log file name. The empty string means to log to stderr. + logfile = "" + + ## Override default hostname, if empty use os.Hostname() + hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false +` + +func defaultVSphere() *VSphere { + return &VSphere{ + ClusterMetricInclude: []string{ + "cpu.usage.*", + "cpu.usagemhz.*", + "mem.usage.*", + "mem.active.*"}, + ClusterMetricExclude: nil, + HostMetricInclude: []string{ + "cpu.ready.summation.delta.millisecond", + "cpu.latency.average.rate.percent", + "cpu.coreUtilization.average.rate.percent", + "mem.usage.average.absolute.percent", + "mem.swapinRate.average.rate.kiloBytesPerSecond", + "mem.state.latest.absolute.number", + "mem.latency.average.absolute.percent", + "mem.vmmemctl.average.absolute.kiloBytes", + "disk.read.average.rate.kiloBytesPerSecond", + "disk.write.average.rate.kiloBytesPerSecond", + "disk.numberReadAveraged.average.rate.number", + "disk.numberWriteAveraged.average.rate.number", + "disk.deviceReadLatency.average.absolute.millisecond", + "disk.deviceWriteLatency.average.absolute.millisecond", + "disk.totalReadLatency.average.absolute.millisecond", + "disk.totalWriteLatency.average.absolute.millisecond", + "storageAdapter.read.average.rate.kiloBytesPerSecond", + "storageAdapter.write.average.rate.kiloBytesPerSecond", + "storageAdapter.numberReadAveraged.average.rate.number", + "storageAdapter.numberWriteAveraged.average.rate.number", + "net.errorsRx.summation.delta.number", + "net.errorsTx.summation.delta.number", + "net.bytesRx.average.rate.kiloBytesPerSecond", + "net.bytesTx.average.rate.kiloBytesPerSecond", + "cpu.used.summation.delta.millisecond", + "cpu.usage.average.rate.percent", + "cpu.utilization.average.rate.percent", + "cpu.wait.summation.delta.millisecond", + "cpu.idle.summation.delta.millisecond", + "cpu.readiness.average.rate.percent", + "cpu.costop.summation.delta.millisecond", + "cpu.swapwait.summation.delta.millisecond", + "mem.swapoutRate.average.rate.kiloBytesPerSecond", + "disk.kernelReadLatency.average.absolute.millisecond", + "disk.kernelWriteLatency.average.absolute.millisecond"}, + HostMetricExclude: nil, + VMMetricInclude: []string{ + "cpu.ready.summation.delta.millisecond", + "mem.swapinRate.average.rate.kiloBytesPerSecond", + "virtualDisk.numberReadAveraged.average.rate.number", + "virtualDisk.numberWriteAveraged.average.rate.number", + "virtualDisk.totalReadLatency.average.absolute.millisecond", + "virtualDisk.totalWriteLatency.average.absolute.millisecond", + "virtualDisk.readOIO.latest.absolute.number", + "virtualDisk.writeOIO.latest.absolute.number", + "net.bytesRx.average.rate.kiloBytesPerSecond", + "net.bytesTx.average.rate.kiloBytesPerSecond", + "net.droppedRx.summation.delta.number", + "net.droppedTx.summation.delta.number", + "cpu.run.summation.delta.millisecond", + "cpu.used.summation.delta.millisecond", + "mem.swapoutRate.average.rate.kiloBytesPerSecond", + "virtualDisk.read.average.rate.kiloBytesPerSecond", + "virtualDisk.write.average.rate.kiloBytesPerSecond"}, + VMMetricExclude: nil, + DatastoreMetricInclude: []string{ + "disk.used.*", + "disk.provsioned.*"}, + DatastoreMetricExclude: nil, + ClientConfig: itls.ClientConfig{InsecureSkipVerify: true}, + + MaxQueryObjects: 256, + ObjectDiscoveryInterval: internal.Duration{Duration: time.Second * 300}, + Timeout: internal.Duration{Duration: time.Second * 20}, + ForceDiscoverOnInit: true, + } +} + +func createSim() (*simulator.Model, *simulator.Server, error) { + model := simulator.VPX() + + err := model.Create() + if err != nil { + return nil, nil, err + } + + model.Service.TLS = new(tls.Config) + + s := model.Service.NewServer() + //fmt.Printf("Server created at: %s\n", s.URL) + + return model, s, nil +} + +func TestParseConfig(t *testing.T) { + v := VSphere{} + c := v.SampleConfig() + p := regexp.MustCompile("\n#") + fmt.Printf("Source=%s", p.ReplaceAllLiteralString(c, "\n")) + c = configHeader + "\n[[inputs.vsphere]]\n" + p.ReplaceAllLiteralString(c, "\n") + fmt.Printf("Source=%s", c) + tab, err := toml.Parse([]byte(c)) + require.NoError(t, err) + require.NotNil(t, tab) +} + +func TestWorkerPool(t *testing.T) { + wp := NewWorkerPool(100) + ctx := context.Background() + wp.Run(ctx, func(ctx context.Context, p interface{}) interface{} { + return p.(int) * 2 + }, 10) + + n := 100000 + wp.Fill(ctx, func(ctx context.Context, f PushFunc) { + for i := 0; i < n; i++ { + f(ctx, i) + } + }) + results := make([]int, n) + i := 0 + wp.Drain(ctx, func(ctx context.Context, p interface{}) bool { + results[i] = p.(int) + i++ + return true + }) + sort.Ints(results) + for i := 0; i < n; i++ { + require.Equal(t, results[i], i*2) + } +} + +func TestAll(t *testing.T) { + m, s, err := createSim() + if err != nil { + t.Fatal(err) + } + defer m.Remove() + defer s.Close() + + var acc testutil.Accumulator + v := defaultVSphere() + v.Vcenters = []string{s.URL.String()} + v.Start(nil) // We're not using the Accumulator, so it can be nil. + defer v.Stop() + require.NoError(t, v.Gather(&acc)) +} diff --git a/plugins/inputs/vsphere/workerpool.go b/plugins/inputs/vsphere/workerpool.go new file mode 100644 index 000000000..6695735ce --- /dev/null +++ b/plugins/inputs/vsphere/workerpool.go @@ -0,0 +1,119 @@ +package vsphere + +import ( + "context" + "log" + "sync" +) + +// WorkerFunc is a function that is supposed to do the actual work +// of the WorkerPool. It is similar to the "map" portion of the +// map/reduce semantics, in that it takes a single value as an input, +// does some processing and returns a single result. +type WorkerFunc func(context.Context, interface{}) interface{} + +// PushFunc is called from a FillerFunc to push a workitem onto +// the input channel. Wraps some logic for gracefulk shutdowns. +type PushFunc func(context.Context, interface{}) bool + +// DrainerFunc represents a function used to "drain" the WorkerPool, +// i.e. pull out all the results generated by the workers and processing +// them. The DrainerFunc is called once per result produced. +// If the function returns false, the draining of the pool is aborted. +type DrainerFunc func(context.Context, interface{}) bool + +// FillerFunc represents a function for filling the WorkerPool with jobs. +// It is called once and is responsible for pushing jobs onto the supplied channel. +type FillerFunc func(context.Context, PushFunc) + +// WorkerPool implements a simple work pooling mechanism. It runs a predefined +// number of goroutines to process jobs. Jobs are inserted using the Fill call +// and results are retrieved through the Drain function. +type WorkerPool struct { + wg sync.WaitGroup + In chan interface{} + Out chan interface{} +} + +// NewWorkerPool creates a worker pool +func NewWorkerPool(bufsize int) *WorkerPool { + return &WorkerPool{ + In: make(chan interface{}, bufsize), + Out: make(chan interface{}, bufsize), + } +} + +func (w *WorkerPool) push(ctx context.Context, job interface{}) bool { + select { + case w.In <- job: + return true + case <-ctx.Done(): + return false + } +} + +func (w *WorkerPool) pushOut(ctx context.Context, result interface{}) bool { + select { + case w.Out <- result: + return true + case <-ctx.Done(): + return false + } +} + +// Run takes a WorkerFunc and runs it in 'n' goroutines. +func (w *WorkerPool) Run(ctx context.Context, f WorkerFunc, n int) bool { + w.wg.Add(1) + go func() { + defer w.wg.Done() + var localWg sync.WaitGroup + localWg.Add(n) + for i := 0; i < n; i++ { + go func() { + defer localWg.Done() + for { + select { + case job, ok := <-w.In: + if !ok { + return + } + w.pushOut(ctx, f(ctx, job)) + case <-ctx.Done(): + log.Printf("D! [input.vsphere]: Stop requested for worker pool. Exiting.") + return + } + } + }() + } + localWg.Wait() + close(w.Out) + }() + return ctx.Err() == nil +} + +// Fill runs a FillerFunc responsible for supplying work to the pool. You may only +// call Fill once. Calling it twice will panic. +func (w *WorkerPool) Fill(ctx context.Context, f FillerFunc) bool { + w.wg.Add(1) + go func() { + defer w.wg.Done() + f(ctx, w.push) + close(w.In) + }() + return true +} + +// Drain runs a DrainerFunc for each result generated by the workers. +func (w *WorkerPool) Drain(ctx context.Context, f DrainerFunc) bool { + w.wg.Add(1) + go func() { + defer w.wg.Done() + for result := range w.Out { + if !f(ctx, result) { + break + } + } + }() + w.wg.Wait() + return ctx.Err() != nil +}