From 58934c8b61e9f3cb7316c9e61ce528819e354853 Mon Sep 17 00:00:00 2001 From: drbh Date: Fri, 16 May 2025 11:48:58 -0400 Subject: [PATCH] fix: count gpu uuids if NVIDIA_VISIBLE_DEVICES env set to all (#3230) --- launcher/src/main.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index a82ad12f..ee80eb00 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -1263,7 +1263,23 @@ fn num_cuda_devices() -> Option { let devices = match env::var("CUDA_VISIBLE_DEVICES") { Ok(devices) => devices, Err(_) => match env::var("NVIDIA_VISIBLE_DEVICES") { - Ok(devices) => devices, + Ok(devices) => { + if devices.trim() == "all" { + // Count the number of all GPUs via nvidia-smi + let output = Command::new("nvidia-smi") + .args(["--query-gpu=uuid", "--format=csv,noheader"]) + .output() + .ok()?; + + String::from_utf8_lossy(&output.stdout) + .lines() + .filter(|line| !line.trim().is_empty()) + .count() + .to_string() + } else { + devices + } + } Err(_) => env::var("ZE_AFFINITY_MASK").ok()?, }, };