Update kubernetes input docs (#4990)

This commit is contained in:
Daniel Nelson 2018-11-15 15:43:47 -08:00 committed by GitHub
parent d886055f67
commit 274af39a5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 115 additions and 245 deletions

View File

@ -1,7 +1,5 @@
# Kubernetes Input Plugin # Kubernetes Input Plugin
**This plugin is experimental and may cause high cardinality issues with moderate to large Kubernetes deployments**
This input plugin talks to the kubelet api using the `/stats/summary` endpoint to gather metrics about the running pods and containers for a single host. It is assumed that this plugin is running as part of a `daemonset` within a kubernetes installation. This means that telegraf is running on every node within the cluster. Therefore, you should configure this plugin to talk to its locally running kubelet. This input plugin talks to the kubelet api using the `/stats/summary` endpoint to gather metrics about the running pods and containers for a single host. It is assumed that this plugin is running as part of a `daemonset` within a kubernetes installation. This means that telegraf is running on every node within the cluster. Therefore, you should configure this plugin to talk to its locally running kubelet.
To find the ip address of the host you are running on you can issue a command like the following: To find the ip address of the host you are running on you can issue a command like the following:
@ -10,256 +8,128 @@ $ curl -s $API_URL/api/v1/namespaces/$POD_NAMESPACE/pods/$HOSTNAME --header "Aut
``` ```
In this case we used the downward API to pass in the `$POD_NAMESPACE` and `$HOSTNAME` is the hostname of the pod which is set by the kubernetes API. In this case we used the downward API to pass in the `$POD_NAMESPACE` and `$HOSTNAME` is the hostname of the pod which is set by the kubernetes API.
## Summary Data #### Series Cardinality Warning
```json This plugin may produce a high number of series which, when not controlled
{ for, will cause high load on your database. Use the following techniques to
"node": { avoid cardinality issues:
"nodeName": "node1",
"systemContainers": [ - Use [metric filtering][] options to exclude unneeded measurements and tags.
{ - Write to a database with an appropriate [retention policy][].
"name": "kubelet", - Limit series cardinality in your database using the
"startTime": "2016-08-25T18:46:52Z", [max-series-per-database][] and [max-values-per-tag][] settings.
"cpu": { - Consider using the [Time Series Index][tsi].
"time": "2016-09-27T16:57:31Z", - Monitor your databases [series cardinality][].
"usageNanoCores": 56652446, - Consult the [InfluxDB documentation][influx-docs] for the most up-to-date techniques.
"usageCoreNanoSeconds": 101437561712262
}, ### Configuration
"memory": {
"time": "2016-09-27T16:57:31Z", ```toml
"usageBytes": 62529536, [[inputs.kubernetes]]
"workingSetBytes": 62349312, ## URL for the kubelet
"rssBytes": 47509504, url = "http://127.0.0.1:10255"
"pageFaults": 4769397409,
"majorPageFaults": 13 ## Use bearer token for authorization
}, # bearer_token = /path/to/bearer/token
"rootfs": {
"availableBytes": 84379979776, ## Set response_timeout (default 5 seconds)
"capacityBytes": 105553100800 # response_timeout = "5s"
},
"logs": { ## Optional TLS Config
"availableBytes": 84379979776, # tls_ca = /path/to/cafile
"capacityBytes": 105553100800 # tls_cert = /path/to/certfile
}, # tls_key = /path/to/keyfile
"userDefinedMetrics": null ## Use TLS but skip chain & host verification
}, # insecure_skip_verify = false
{
"name": "bar",
"startTime": "2016-08-25T18:46:52Z",
"cpu": {
"time": "2016-09-27T16:57:31Z",
"usageNanoCores": 56652446,
"usageCoreNanoSeconds": 101437561712262
},
"memory": {
"time": "2016-09-27T16:57:31Z",
"usageBytes": 62529536,
"workingSetBytes": 62349312,
"rssBytes": 47509504,
"pageFaults": 4769397409,
"majorPageFaults": 13
},
"rootfs": {
"availableBytes": 84379979776,
"capacityBytes": 105553100800
},
"logs": {
"availableBytes": 84379979776,
"capacityBytes": 105553100800
},
"userDefinedMetrics": null
}
],
"startTime": "2016-08-25T18:46:52Z",
"cpu": {
"time": "2016-09-27T16:57:41Z",
"usageNanoCores": 576996212,
"usageCoreNanoSeconds": 774129887054161
},
"memory": {
"time": "2016-09-27T16:57:41Z",
"availableBytes": 10726387712,
"usageBytes": 12313182208,
"workingSetBytes": 5081538560,
"rssBytes": 35586048,
"pageFaults": 351742,
"majorPageFaults": 1236
},
"network": {
"time": "2016-09-27T16:57:41Z",
"rxBytes": 213281337459,
"rxErrors": 0,
"txBytes": 292869995684,
"txErrors": 0
},
"fs": {
"availableBytes": 84379979776,
"capacityBytes": 105553100800,
"usedBytes": 16754286592
},
"runtime": {
"imageFs": {
"availableBytes": 84379979776,
"capacityBytes": 105553100800,
"usedBytes": 5809371475
}
}
},
"pods": [
{
"podRef": {
"name": "foopod",
"namespace": "foons",
"uid": "6d305b06-8419-11e6-825c-42010af000ae"
},
"startTime": "2016-09-26T18:45:42Z",
"containers": [
{
"name": "foocontainer",
"startTime": "2016-09-26T18:46:43Z",
"cpu": {
"time": "2016-09-27T16:57:32Z",
"usageNanoCores": 846503,
"usageCoreNanoSeconds": 56507553554
},
"memory": {
"time": "2016-09-27T16:57:32Z",
"usageBytes": 30789632,
"workingSetBytes": 30789632,
"rssBytes": 30695424,
"pageFaults": 10761,
"majorPageFaults": 0
},
"rootfs": {
"availableBytes": 84379979776,
"capacityBytes": 105553100800,
"usedBytes": 57344
},
"logs": {
"availableBytes": 84379979776,
"capacityBytes": 105553100800,
"usedBytes": 24576
},
"userDefinedMetrics": null
}
],
"network": {
"time": "2016-09-27T16:57:34Z",
"rxBytes": 70749124,
"rxErrors": 0,
"txBytes": 47813506,
"txErrors": 0
},
"volume": [
{
"availableBytes": 7903948800,
"capacityBytes": 7903961088,
"usedBytes": 12288,
"name": "volume1"
},
{
"availableBytes": 7903956992,
"capacityBytes": 7903961088,
"usedBytes": 4096,
"name": "volume2"
},
{
"availableBytes": 7903948800,
"capacityBytes": 7903961088,
"usedBytes": 12288,
"name": "volume3"
},
{
"availableBytes": 7903952896,
"capacityBytes": 7903961088,
"usedBytes": 8192,
"name": "volume4"
}
]
}
]
}
``` ```
### Daemonset YAML ### DaemonSet
```yaml For recommendations on running Telegraf as a DaemonSet see [Monitoring Kubernetes
apiVersion: extensions/v1beta1 Architecture][k8s-telegraf] or view the [Helm charts][tick-charts].
kind: DaemonSet
metadata: ### Metrics
name: telegraf
namespace: telegraf - kubernetes_node
spec: - tags:
template: - node_name
metadata: - fields:
labels: - cpu_usage_nanocores
app: telegraf - cpu_usage_core_nanoseconds
spec: - memory_available_bytes
serviceAccount: telegraf - memory_usage_bytes
containers: - memory_working_set_bytes
- name: telegraf - memory_rss_bytes
image: quay.io/org/image:latest - memory_page_faults
imagePullPolicy: IfNotPresent - memory_major_page_faults
env: - network_rx_bytes
- name: POD_NAMESPACE - network_rx_errors
valueFrom: - network_tx_bytes
fieldRef: - network_tx_errors
fieldPath: metadata.namespace - fs_available_bytes
- name: "HOST_PROC" - fs_capacity_bytes
value: "/rootfs/proc" - fs_used_bytes
- name: "HOST_SYS" - runtime_image_fs_available_bytes
value: "/rootfs/sys" - runtime_image_fs_capacity_bytes
volumeMounts: - runtime_image_fs_used_bytes
- name: sysro
mountPath: /rootfs/sys - kubernetes_pod_container
readOnly: true - tags:
- name: procro - container_name
mountPath: /rootfs/proc - namespace
readOnly: true - node_name
- name: varrunutmpro - pod_name
mountPath: /var/run/utmp - fields:
readOnly: true - cpu_usage_nanocores
- name: logger-redis-creds - cpu_usage_core_nanoseconds
mountPath: /var/run/secrets/deis/redis/creds - memory_usage_bytes
volumes: - memory_working_set_bytes
- name: sysro - memory_rss_bytes
hostPath: - memory_page_faults
path: /sys - memory_major_page_faults
- name: procro - rootfs_available_bytes
hostPath: - rootfs_capacity_bytes
path: /proc - rootfs_used_bytes
- name: varrunutmpro - logsfs_avaialble_bytes
hostPath: - logsfs_capacity_bytes
path: /var/run/utmp - logsfs_used_bytes
- kubernetes_pod_volume
- tags:
- volume_name
- namespace
- node_name
- pod_name
- fields:
- available_bytes
- capacity_bytes
- used_bytes
- kubernetes_pod_network
- tags:
- namespace
- node_name
- pod_name
- fields:
- rx_bytes
- rx_errors
- tx_bytes
- tx_errors
### Example Output
```
kubernetes_pod_container,host=ip-10-0-0-0.ec2.internal,container_name=deis-controller,namespace=deis,node_name=ip-10-0-0-0.ec2.internal,pod_name=deis-controller-3058870187-xazsr cpu_usage_core_nanoseconds=2432835i,cpu_usage_nanocores=0i,logsfs_avaialble_bytes=121128271872i,logsfs_capacity_bytes=153567944704i,logsfs_used_bytes=20787200i,memory_major_page_faults=0i,memory_page_faults=175i,memory_rss_bytes=0i,memory_usage_bytes=0i,memory_working_set_bytes=0i,rootfs_available_bytes=121128271872i,rootfs_capacity_bytes=153567944704i,rootfs_used_bytes=1110016i 1476477530000000000
kubernetes_pod_volume,host=ip-10-0-0-0.ec2.internal,name=default-token-f7wts,namespace=kube-system,node_name=ip-10-0-0-0.ec2.internal,pod_name=kubernetes-dashboard-v1.1.1-t4x4t available_bytes=8415240192i,capacity_bytes=8415252480i,used_bytes=12288i 1476477530000000000
kubernetes_pod_network,host=ip-10-0-0-0.ec2.internal,namespace=deis,node_name=ip-10-0-0-0.ec2.internal,pod_name=deis-controller-3058870187-xazsr rx_bytes=120671099i,rx_errors=0i,tx_bytes=102451983i,tx_errors=0i 1476477530000000000
``` ```
### Line Protocol [metric filtering]: https://github.com/influxdata/telegraf/blob/master/docs/CONFIGURATION.md#metric-filtering
[retention policy]: https://docs.influxdata.com/influxdb/latest/guides/downsampling_and_retention/
#### kubernetes_pod_container [max-series-per-database]: https://docs.influxdata.com/influxdb/latest/administration/config/#max-series-per-database-1000000
``` [max-values-per-tag]: https://docs.influxdata.com/influxdb/latest/administration/config/#max-values-per-tag-100000
kubernetes_pod_container,host=ip-10-0-0-0.ec2.internal, [tsi]: https://docs.influxdata.com/influxdb/latest/concepts/time-series-index/
container_name=deis-controller,namespace=deis, [series cardinality]: https://docs.influxdata.com/influxdb/latest/query_language/spec/#show-cardinality
node_name=ip-10-0-0-0.ec2.internal, pod_name=deis-controller-3058870187-xazsr, cpu_usage_core_nanoseconds=2432835i,cpu_usage_nanocores=0i, [influx-docs]: https://docs.influxdata.com/influxdb/latest/
logsfs_avaialble_bytes=121128271872i,logsfs_capacity_bytes=153567944704i, [k8s-telegraf]: https://www.influxdata.com/blog/monitoring-kubernetes-architecture/
logsfs_used_bytes=20787200i,memory_major_page_faults=0i, [tick-charts]: https://github.com/influxdata/tick-charts
memory_page_faults=175i,memory_rss_bytes=0i,
memory_usage_bytes=0i,memory_working_set_bytes=0i,
rootfs_available_bytes=121128271872i,rootfs_capacity_bytes=153567944704i,
rootfs_used_bytes=1110016i 1476477530000000000
```
#### kubernetes_pod_volume
```
kubernetes_pod_volume,host=ip-10-0-0-0.ec2.internal,name=default-token-f7wts,
namespace=kube-system,node_name=ip-10-0-0-0.ec2.internal,
pod_name=kubernetes-dashboard-v1.1.1-t4x4t, available_bytes=8415240192i,
capacity_bytes=8415252480i,used_bytes=12288i 1476477530000000000
```
#### kubernetes_pod_network
```
kubernetes_pod_network,host=ip-10-0-0-0.ec2.internal,namespace=deis,
node_name=ip-10-0-0-0.ec2.internal,pod_name=deis-controller-3058870187-xazsr,
rx_bytes=120671099i,rx_errors=0i,
tx_bytes=102451983i,tx_errors=0i 1476477530000000000
```