Handle unknown error in nvidia-smi output (#6073)
This commit is contained in:
parent
1e12006ad6
commit
aa07b95e00
|
@ -59,3 +59,7 @@ nvidia_smi,compute_mode=Default,host=8218cf,index=0,name=GeForce\ GTX\ 1070,psta
|
||||||
nvidia_smi,compute_mode=Default,host=8218cf,index=1,name=GeForce\ GTX\ 1080,pstate=P2,uuid=GPU-f9ba66fc-a7f5-94c5-da19-019ef2f9c665 fan_speed=100i,memory_free=7557i,memory_total=8114i,memory_used=557i,temperature_gpu=50i,utilization_gpu=100i,utilization_memory=85i 1523991122000000000
|
nvidia_smi,compute_mode=Default,host=8218cf,index=1,name=GeForce\ GTX\ 1080,pstate=P2,uuid=GPU-f9ba66fc-a7f5-94c5-da19-019ef2f9c665 fan_speed=100i,memory_free=7557i,memory_total=8114i,memory_used=557i,temperature_gpu=50i,utilization_gpu=100i,utilization_memory=85i 1523991122000000000
|
||||||
nvidia_smi,compute_mode=Default,host=8218cf,index=2,name=GeForce\ GTX\ 1080,pstate=P2,uuid=GPU-d4cfc28d-0481-8d07-b81a-ddfc63d74adf fan_speed=100i,memory_free=7557i,memory_total=8114i,memory_used=557i,temperature_gpu=58i,utilization_gpu=100i,utilization_memory=86i 1523991122000000000
|
nvidia_smi,compute_mode=Default,host=8218cf,index=2,name=GeForce\ GTX\ 1080,pstate=P2,uuid=GPU-d4cfc28d-0481-8d07-b81a-ddfc63d74adf fan_speed=100i,memory_free=7557i,memory_total=8114i,memory_used=557i,temperature_gpu=58i,utilization_gpu=100i,utilization_memory=86i 1523991122000000000
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Limitations
|
||||||
|
Note that there seems to be an issue with getting current memory clock values when the memory is overclocked.
|
||||||
|
This may or may not apply to everyone but it's confirmed to be an issue on an EVGA 2080 Ti.
|
||||||
|
|
|
@ -143,7 +143,16 @@ func parseLine(line string) (map[string]string, map[string]interface{}, error) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.Contains(col, "[Not Supported]") {
|
// In some cases we may not be able to get data.
|
||||||
|
// One such case is when the memory is overclocked.
|
||||||
|
// nvidia-smi reads the max supported memory clock from the stock value.
|
||||||
|
// If the current memory clock is greater than the max detected memory clock then we receive [Unknown Error] as a value.
|
||||||
|
|
||||||
|
// For example, the stock max memory clock speed on a 2080 Ti is 7000 MHz which nvidia-smi detects.
|
||||||
|
// The user has overclocked their memory using an offset of +1000 so under load the memory clock reaches 8000 MHz.
|
||||||
|
// Now when nvidia-smi tries to read the current memory clock it fails and spits back [Unknown Error] as the value.
|
||||||
|
// This value will break the parsing logic below unless it is accounted for here.
|
||||||
|
if strings.Contains(col, "[Not Supported]") || strings.Contains(col, "[Unknown Error]") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,3 +42,10 @@ func TestParseLineNotSupported(t *testing.T) {
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, nil, fields["fan_speed"])
|
require.Equal(t, nil, fields["fan_speed"])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseLineUnknownError(t *testing.T) {
|
||||||
|
line := "[Unknown Error], 11264, 1074, 10190, P8, 32, GeForce RTX 2080 Ti, GPU-c97b7f88-c06d-650f-5339-f8dd0c1315c0, Default, 1, 4, 0, 24.33, 1, 16, 0, 0, 0, 300, 300, 405, 540\n"
|
||||||
|
_, fields, err := parseLine(line)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, nil, fields["fan_speed"])
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue