diff --git a/backends/v3/src/block_allocator.rs b/backends/v3/src/block_allocator.rs index e7f3d85a..6da2b51d 100644 --- a/backends/v3/src/block_allocator.rs +++ b/backends/v3/src/block_allocator.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use tokio::sync::{mpsc, oneshot}; use crate::radix::RadixAllocator; - +use text_generation_router::usage_stats::Env; #[derive(Debug, Clone)] pub struct BlockAllocation { pub allocation_id: u64, @@ -141,6 +141,7 @@ pub struct SimpleAllocator { free_blocks: Vec, block_size: u32, window_size: Option, + is_hpu_device: bool, } impl SimpleAllocator { @@ -150,6 +151,7 @@ impl SimpleAllocator { // Block 0 is reserved for health checks free_blocks: (1..blocks).collect(), window_size, + is_hpu_device: Env::new().is_hpu_device(), } } } @@ -179,9 +181,15 @@ impl Allocator for SimpleAllocator { if required_blocks > self.free_blocks.len() as u32 { None } else { - let blocks = self + if self.is_hpu_device { + self.free_blocks.sort_by(|a, b| b.cmp(a)); + } + let mut blocks = self .free_blocks .split_off(self.free_blocks.len() - required_blocks as usize); + if self.is_hpu_device { + blocks.sort(); + } let mut slots = Vec::with_capacity((required_blocks * self.block_size * repeats as u32) as usize); diff --git a/router/src/usage_stats.rs b/router/src/usage_stats.rs index 353e9e37..a17aade9 100644 --- a/router/src/usage_stats.rs +++ b/router/src/usage_stats.rs @@ -157,6 +157,7 @@ pub struct Env { docker_label: &'static str, nvidia_info: Option>, xpu_info: Option>, + hpu_info: Option>, system_env: SystemInfo, } @@ -289,6 +290,60 @@ impl XpuSmiInfo { } } +#[derive(Debug, Serialize, Clone)] +struct HpuSmiInfo { + name: String, + pci_bus_id: String, + driver_version: String, + temperature: String, + utilization: String, + memory_total: String, + memory_free: String, + memory_used: String, + power_draw_instant: String, +} + +impl HpuSmiInfo { + fn new() -> Option> { + let output = Command::new("hl-smi") + .args([ + "--query-aip=name,bus_id,driver_version,temperature.aip,utilization.aip,memory.total,memory.free,memory.used,power.draw", + "--format=csv" + ]) + .output() + .ok()?; + + if !output.status.success() { + return None; + } + + let stdout = String::from_utf8(output.stdout).ok()?; + + let mut rdr = ReaderBuilder::new() + .has_headers(true) + .from_reader(stdout.as_bytes()); + + let mut infos = Vec::new(); + + for result in rdr.records() { + let record = result.ok()?; + infos.push(HpuSmiInfo { + name: record[0].to_string(), + pci_bus_id: record[1].to_string(), + driver_version: record[2].to_string(), + temperature: record[3].to_string(), + utilization: record[4].to_string(), + memory_total: record[5].to_string(), + memory_free: record[6].to_string(), + memory_used: record[7].to_string(), + power_draw_instant: record[8].to_string(), + }); + } + + Some(infos) + } +} + #[derive(Serialize, Debug, Clone)] pub struct SystemInfo { cpu_count: usize, @@ -335,10 +390,14 @@ impl Env { system_env: SystemInfo::new(), nvidia_info: NvidiaSmiInfo::new(), xpu_info: XpuSmiInfo::new(), + hpu_info: HpuSmiInfo::new(), git_sha: option_env!("VERGEN_GIT_SHA").unwrap_or("N/A"), docker_label: option_env!("DOCKER_LABEL").unwrap_or("N/A"), } } + pub fn is_hpu_device(&self) -> bool { + self.hpu_info.is_some() + } } pub fn is_container() -> io::Result {