Add additional nvidia-smi examples as testcases
This commit is contained in:
parent
2cf5116d14
commit
b71a387ca2
|
@ -55,11 +55,20 @@ SELECT mean("temperature_gpu") FROM "nvidia_smi" WHERE time > now() - 5m GROUP B
|
|||
|
||||
### Troubleshooting
|
||||
|
||||
As the `telegraf` user run the following command. Adjust the path to `nvidia-smi` if customized.
|
||||
Check the full output by running `nvidia-smi` binary manually.
|
||||
|
||||
Linux:
|
||||
```
|
||||
/usr/bin/nvidia-smi --format=noheader,nounits,csv --query-gpu=fan.speed,memory.total,memory.used,memory.free,pstate,temperature.gpu,name,uuid,compute_mode,utilization.gpu,utilization.memory,index,power.draw,pcie.link.gen.current,pcie.link.width.current,encoder.stats.sessionCount,encoder.stats.averageFps,encoder.stats.averageLatency,clocks.current.graphics,clocks.current.sm,clocks.current.memory,clocks.current.video
|
||||
sudo -u telegraf -- /usr/bin/nvidia-smi -q -x
|
||||
```
|
||||
|
||||
Windows:
|
||||
```
|
||||
"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" -q -x
|
||||
```
|
||||
|
||||
Please include the output of this command if opening an GitHub issue.
|
||||
|
||||
### Example Output
|
||||
```
|
||||
nvidia_smi,compute_mode=Default,host=8218cf,index=0,name=GeForce\ GTX\ 1070,pstate=P2,uuid=GPU-823bc202-6279-6f2c-d729-868a30f14d96 fan_speed=100i,memory_free=7563i,memory_total=8112i,memory_used=549i,temperature_gpu=53i,utilization_gpu=100i,utilization_memory=90i 1523991122000000000
|
||||
|
|
|
@ -1,99 +1,137 @@
|
|||
package nvidia_smi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
var payload = []byte(`<?xml version="1.0" ?>
|
||||
<!DOCTYPE nvidia_smi_log SYSTEM "nvsmi_device_v10.dtd">
|
||||
<nvidia_smi_log>
|
||||
<gpu id="00000000:01:00.0">
|
||||
<product_name>GeForce GTX 1070 Ti</product_name>
|
||||
<uuid>GPU-f9ba66fc-a7f5-94c5-da19-019ef2f9c665</uuid>
|
||||
<pci>
|
||||
<pci_gpu_link_info>
|
||||
<pcie_gen>
|
||||
<current_link_gen>1</current_link_gen>
|
||||
</pcie_gen>
|
||||
<link_widths>
|
||||
<current_link_width>16x</current_link_width>
|
||||
</link_widths>
|
||||
</pci_gpu_link_info>
|
||||
</pci>
|
||||
<fan_speed>100 %</fan_speed>
|
||||
<performance_state>P8</performance_state>
|
||||
<fb_memory_usage>
|
||||
<total>4096 MiB</total>
|
||||
<used>42 MiB</used>
|
||||
<free>4054 MiB</free>
|
||||
</fb_memory_usage>
|
||||
<compute_mode>Default</compute_mode>
|
||||
<utilization>
|
||||
<gpu_util>0 %</gpu_util>
|
||||
<memory_util>0 %</memory_util>
|
||||
</utilization>
|
||||
<encoder_stats>
|
||||
<session_count>0</session_count>
|
||||
<average_fps>0</average_fps>
|
||||
<average_latency>0</average_latency>
|
||||
</encoder_stats>
|
||||
<temperature>
|
||||
<gpu_temp>39 C</gpu_temp>
|
||||
</temperature>
|
||||
<power_readings>
|
||||
<power_draw>N/A</power_draw>
|
||||
</power_readings>
|
||||
<clocks>
|
||||
<graphics_clock>135 MHz</graphics_clock>
|
||||
<sm_clock>135 MHz</sm_clock>
|
||||
<mem_clock>405 MHz</mem_clock>
|
||||
<video_clock>405 MHz</video_clock>
|
||||
</clocks>
|
||||
</gpu>
|
||||
</nvidia_smi_log>`)
|
||||
|
||||
func TestGatherSMI(t *testing.T) {
|
||||
var expectedMetric = struct {
|
||||
tags map[string]string
|
||||
fields map[string]interface{}
|
||||
func TestGatherValidXML(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
filename string
|
||||
expected []telegraf.Metric
|
||||
}{
|
||||
tags: map[string]string{
|
||||
{
|
||||
name: "GeForce GTX 1070 Ti",
|
||||
filename: "gtx-1070-ti.xml",
|
||||
expected: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"nvidia_smi",
|
||||
map[string]string{
|
||||
"name": "GeForce GTX 1070 Ti",
|
||||
"compute_mode": "Default",
|
||||
"index": "0",
|
||||
"pstate": "P8",
|
||||
"uuid": "GPU-f9ba66fc-a7f5-94c5-da19-019ef2f9c665",
|
||||
},
|
||||
fields: map[string]interface{}{
|
||||
map[string]interface{}{
|
||||
"clocks_current_graphics": 135,
|
||||
"clocks_current_memory": 405,
|
||||
"clocks_current_sm": 135,
|
||||
"clocks_current_video": 405,
|
||||
"encoder_stats_average_fps": 0,
|
||||
"encoder_stats_average_latency": 0,
|
||||
"encoder_stats_session_count": 0,
|
||||
"fan_speed": 100,
|
||||
"memory_free": 4054,
|
||||
"memory_used": 42,
|
||||
"memory_total": 4096,
|
||||
"memory_used": 42,
|
||||
"pcie_link_gen_current": 1,
|
||||
"pcie_link_width_current": 16,
|
||||
"temperature_gpu": 39,
|
||||
"utilization_gpu": 0,
|
||||
"utilization_memory": 0,
|
||||
"pcie_link_gen_current": 1,
|
||||
"pcie_link_width_current": 16,
|
||||
"encoder_stats_session_count": 0,
|
||||
},
|
||||
time.Unix(0, 0)),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "GeForce GTX 1660 Ti",
|
||||
filename: "gtx-1660-ti.xml",
|
||||
expected: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"nvidia_smi",
|
||||
map[string]string{
|
||||
"compute_mode": "Default",
|
||||
"index": "0",
|
||||
"name": "Graphics Device",
|
||||
"pstate": "P8",
|
||||
"uuid": "GPU-304a277d-3545-63b8-3a36-dfde3c992989",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"clocks_current_graphics": 300,
|
||||
"clocks_current_memory": 405,
|
||||
"clocks_current_sm": 300,
|
||||
"clocks_current_video": 540,
|
||||
"encoder_stats_average_fps": 0,
|
||||
"encoder_stats_average_latency": 0,
|
||||
"clocks_current_graphics": 135,
|
||||
"clocks_current_sm": 135,
|
||||
"encoder_stats_session_count": 0,
|
||||
"fan_speed": 0,
|
||||
"memory_free": 5912,
|
||||
"memory_total": 5912,
|
||||
"memory_used": 0,
|
||||
"pcie_link_gen_current": 1,
|
||||
"pcie_link_width_current": 16,
|
||||
"power_draw": 8.93,
|
||||
"temperature_gpu": 40,
|
||||
"utilization_gpu": 0,
|
||||
"utilization_memory": 1,
|
||||
},
|
||||
time.Unix(0, 0)),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Quadro P400",
|
||||
filename: "quadro-p400.xml",
|
||||
expected: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"nvidia_smi",
|
||||
map[string]string{
|
||||
"compute_mode": "Default",
|
||||
"index": "0",
|
||||
"name": "Quadro P400",
|
||||
"pstate": "P8",
|
||||
"uuid": "GPU-8f750be4-dfbc-23b9-b33f-da729a536494",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"clocks_current_graphics": 139,
|
||||
"clocks_current_memory": 405,
|
||||
"clocks_current_video": 405,
|
||||
"clocks_current_sm": 139,
|
||||
"clocks_current_video": 544,
|
||||
"encoder_stats_average_fps": 0,
|
||||
"encoder_stats_average_latency": 0,
|
||||
"encoder_stats_session_count": 0,
|
||||
"fan_speed": 34,
|
||||
"memory_free": 1998,
|
||||
"memory_total": 1998,
|
||||
"memory_used": 0,
|
||||
"pcie_link_gen_current": 1,
|
||||
"pcie_link_width_current": 16,
|
||||
"temperature_gpu": 33,
|
||||
"utilization_gpu": 0,
|
||||
"utilization_memory": 3,
|
||||
},
|
||||
time.Unix(0, 0)),
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
|
||||
acc := &testutil.Accumulator{}
|
||||
octets, err := ioutil.ReadFile(filepath.Join("testdata", tt.filename))
|
||||
require.NoError(t, err)
|
||||
|
||||
gatherNvidiaSMI(payload, acc)
|
||||
fmt.Println()
|
||||
err = gatherNvidiaSMI(octets, &acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, 1, len(acc.Metrics))
|
||||
require.Equal(t, expectedMetric.fields, acc.Metrics[0].Fields)
|
||||
require.Equal(t, expectedMetric.tags, acc.Metrics[0].Tags)
|
||||
testutil.RequireMetricsEqual(t, tt.expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue