From b71a387ca270f18d6021af01be483e90556600fd Mon Sep 17 00:00:00 2001 From: Daniel Nelson Date: Tue, 12 Nov 2019 16:13:30 -0800 Subject: [PATCH] Add additional nvidia-smi examples as testcases --- plugins/inputs/nvidia_smi/README.md | 13 +- plugins/inputs/nvidia_smi/nvidia_smi_test.go | 202 +++++++++++-------- 2 files changed, 131 insertions(+), 84 deletions(-) diff --git a/plugins/inputs/nvidia_smi/README.md b/plugins/inputs/nvidia_smi/README.md index 8afa74538..2173c904e 100644 --- a/plugins/inputs/nvidia_smi/README.md +++ b/plugins/inputs/nvidia_smi/README.md @@ -55,11 +55,20 @@ SELECT mean("temperature_gpu") FROM "nvidia_smi" WHERE time > now() - 5m GROUP B ### Troubleshooting -As the `telegraf` user run the following command. Adjust the path to `nvidia-smi` if customized. +Check the full output by running `nvidia-smi` binary manually. + +Linux: ``` -/usr/bin/nvidia-smi --format=noheader,nounits,csv --query-gpu=fan.speed,memory.total,memory.used,memory.free,pstate,temperature.gpu,name,uuid,compute_mode,utilization.gpu,utilization.memory,index,power.draw,pcie.link.gen.current,pcie.link.width.current,encoder.stats.sessionCount,encoder.stats.averageFps,encoder.stats.averageLatency,clocks.current.graphics,clocks.current.sm,clocks.current.memory,clocks.current.video +sudo -u telegraf -- /usr/bin/nvidia-smi -q -x ``` +Windows: +``` +"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" -q -x +``` + +Please include the output of this command if opening an GitHub issue. + ### Example Output ``` nvidia_smi,compute_mode=Default,host=8218cf,index=0,name=GeForce\ GTX\ 1070,pstate=P2,uuid=GPU-823bc202-6279-6f2c-d729-868a30f14d96 fan_speed=100i,memory_free=7563i,memory_total=8112i,memory_used=549i,temperature_gpu=53i,utilization_gpu=100i,utilization_memory=90i 1523991122000000000 diff --git a/plugins/inputs/nvidia_smi/nvidia_smi_test.go b/plugins/inputs/nvidia_smi/nvidia_smi_test.go index 7d0ec4666..6fd37b570 100644 --- a/plugins/inputs/nvidia_smi/nvidia_smi_test.go +++ b/plugins/inputs/nvidia_smi/nvidia_smi_test.go @@ -1,99 +1,137 @@ package nvidia_smi import ( - "fmt" + "io/ioutil" + "path/filepath" "testing" + "time" + "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/require" ) -var payload = []byte(` - - - - GeForce GTX 1070 Ti - GPU-f9ba66fc-a7f5-94c5-da19-019ef2f9c665 - - - - 1 - - - 16x - - - - 100 % - P8 - - 4096 MiB - 42 MiB - 4054 MiB - - Default - - 0 % - 0 % - - - 0 - 0 - 0 - - - 39 C - - - N/A - - - 135 MHz - 135 MHz - 405 MHz - 405 MHz - - -`) - -func TestGatherSMI(t *testing.T) { - var expectedMetric = struct { - tags map[string]string - fields map[string]interface{} +func TestGatherValidXML(t *testing.T) { + tests := []struct { + name string + filename string + expected []telegraf.Metric }{ - tags: map[string]string{ - "name": "GeForce GTX 1070 Ti", - "compute_mode": "Default", - "index": "0", - "pstate": "P8", - "uuid": "GPU-f9ba66fc-a7f5-94c5-da19-019ef2f9c665", + { + name: "GeForce GTX 1070 Ti", + filename: "gtx-1070-ti.xml", + expected: []telegraf.Metric{ + testutil.MustMetric( + "nvidia_smi", + map[string]string{ + "name": "GeForce GTX 1070 Ti", + "compute_mode": "Default", + "index": "0", + "pstate": "P8", + "uuid": "GPU-f9ba66fc-a7f5-94c5-da19-019ef2f9c665", + }, + map[string]interface{}{ + "clocks_current_graphics": 135, + "clocks_current_memory": 405, + "clocks_current_sm": 135, + "clocks_current_video": 405, + "encoder_stats_average_fps": 0, + "encoder_stats_average_latency": 0, + "encoder_stats_session_count": 0, + "fan_speed": 100, + "memory_free": 4054, + "memory_total": 4096, + "memory_used": 42, + "pcie_link_gen_current": 1, + "pcie_link_width_current": 16, + "temperature_gpu": 39, + "utilization_gpu": 0, + "utilization_memory": 0, + }, + time.Unix(0, 0)), + }, }, - fields: map[string]interface{}{ - "fan_speed": 100, - "memory_free": 4054, - "memory_used": 42, - "memory_total": 4096, - "temperature_gpu": 39, - "utilization_gpu": 0, - "utilization_memory": 0, - "pcie_link_gen_current": 1, - "pcie_link_width_current": 16, - "encoder_stats_session_count": 0, - "encoder_stats_average_fps": 0, - "encoder_stats_average_latency": 0, - "clocks_current_graphics": 135, - "clocks_current_sm": 135, - "clocks_current_memory": 405, - "clocks_current_video": 405, + { + name: "GeForce GTX 1660 Ti", + filename: "gtx-1660-ti.xml", + expected: []telegraf.Metric{ + testutil.MustMetric( + "nvidia_smi", + map[string]string{ + "compute_mode": "Default", + "index": "0", + "name": "Graphics Device", + "pstate": "P8", + "uuid": "GPU-304a277d-3545-63b8-3a36-dfde3c992989", + }, + map[string]interface{}{ + "clocks_current_graphics": 300, + "clocks_current_memory": 405, + "clocks_current_sm": 300, + "clocks_current_video": 540, + "encoder_stats_average_fps": 0, + "encoder_stats_average_latency": 0, + "encoder_stats_session_count": 0, + "fan_speed": 0, + "memory_free": 5912, + "memory_total": 5912, + "memory_used": 0, + "pcie_link_gen_current": 1, + "pcie_link_width_current": 16, + "power_draw": 8.93, + "temperature_gpu": 40, + "utilization_gpu": 0, + "utilization_memory": 1, + }, + time.Unix(0, 0)), + }, + }, + { + name: "Quadro P400", + filename: "quadro-p400.xml", + expected: []telegraf.Metric{ + testutil.MustMetric( + "nvidia_smi", + map[string]string{ + "compute_mode": "Default", + "index": "0", + "name": "Quadro P400", + "pstate": "P8", + "uuid": "GPU-8f750be4-dfbc-23b9-b33f-da729a536494", + }, + map[string]interface{}{ + "clocks_current_graphics": 139, + "clocks_current_memory": 405, + "clocks_current_sm": 139, + "clocks_current_video": 544, + "encoder_stats_average_fps": 0, + "encoder_stats_average_latency": 0, + "encoder_stats_session_count": 0, + "fan_speed": 34, + "memory_free": 1998, + "memory_total": 1998, + "memory_used": 0, + "pcie_link_gen_current": 1, + "pcie_link_width_current": 16, + "temperature_gpu": 33, + "utilization_gpu": 0, + "utilization_memory": 3, + }, + time.Unix(0, 0)), + }, }, } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var acc testutil.Accumulator - acc := &testutil.Accumulator{} + octets, err := ioutil.ReadFile(filepath.Join("testdata", tt.filename)) + require.NoError(t, err) - gatherNvidiaSMI(payload, acc) - fmt.Println() + err = gatherNvidiaSMI(octets, &acc) + require.NoError(t, err) - require.Equal(t, 1, len(acc.Metrics)) - require.Equal(t, expectedMetric.fields, acc.Metrics[0].Fields) - require.Equal(t, expectedMetric.tags, acc.Metrics[0].Tags) + testutil.RequireMetricsEqual(t, tt.expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime()) + }) + } }