Skip to content

Commit 5f25061

Browse files
mchakerjjisnow
authored andcommitted
webui: detect scoped-down GPU environment (Sygil-Dev#993)
* webui: detect scoped-down GPU environment check if we're using a scoped-down GPU environment (pynvml does not listen to CUDA_VISIBLE_DEVICES) so that we can measure memory on the correct GPU * remove unnecessary import
1 parent 37626b9 commit 5f25061

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

scripts/webui.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,15 @@ def run(self):
219219
print(f"[{self.name}] Unable to initialize NVIDIA management. No memory stats. \n")
220220
return
221221
print(f"[{self.name}] Recording max memory usage...\n")
222-
handle = pynvml.nvmlDeviceGetHandleByIndex(opt.gpu)
222+
# check if we're using a scoped-down GPU environment (pynvml does not listen to CUDA_VISIBLE_DEVICES)
223+
# so that we can measure memory on the correct GPU
224+
try:
225+
isinstance(int(os.environ["CUDA_VISIBLE_DEVICES"]), int)
226+
handle = pynvml.nvmlDeviceGetHandleByIndex(int(os.environ["CUDA_VISIBLE_DEVICES"]))
227+
except (KeyError, ValueError) as pynvmlHandleError:
228+
print("[MemMon][WARNING]", pynvmlHandleError)
229+
print("[MemMon][INFO]", "defaulting to monitoring memory on the default gpu (set via --gpu flag)")
230+
handle = pynvml.nvmlDeviceGetHandleByIndex(opt.gpu)
223231
self.total = pynvml.nvmlDeviceGetMemoryInfo(handle).total
224232
while not self.stop_flag:
225233
m = pynvml.nvmlDeviceGetMemoryInfo(handle)

0 commit comments

Comments
 (0)