adjust block table in hpu to improve performance

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
Wang, Yi A 2025-03-16 19:40:40 -07:00
parent b7fea6fc2f
commit 5d3653943c
2 changed files with 69 additions and 2 deletions

View File

@ -2,7 +2,7 @@ use std::sync::Arc;
use tokio::sync::{mpsc, oneshot};
use crate::radix::RadixAllocator;
use text_generation_router::usage_stats::Env;
#[derive(Debug, Clone)]
pub struct BlockAllocation {
pub allocation_id: u64,
@ -141,6 +141,7 @@ pub struct SimpleAllocator {
free_blocks: Vec<u32>,
block_size: u32,
window_size: Option<u32>,
is_hpu_device: bool,
}
impl SimpleAllocator {
@ -150,6 +151,7 @@ impl SimpleAllocator {
// Block 0 is reserved for health checks
free_blocks: (1..blocks).collect(),
window_size,
is_hpu_device: Env::new().is_hpu_device(),
}
}
}
@ -179,9 +181,15 @@ impl Allocator for SimpleAllocator {
if required_blocks > self.free_blocks.len() as u32 {
None
} else {
let blocks = self
if self.is_hpu_device {
self.free_blocks.sort_by(|a, b| b.cmp(a));
}
let mut blocks = self
.free_blocks
.split_off(self.free_blocks.len() - required_blocks as usize);
if self.is_hpu_device {
blocks.sort();
}
let mut slots =
Vec::with_capacity((required_blocks * self.block_size * repeats as u32) as usize);

View File

@ -157,6 +157,7 @@ pub struct Env {
docker_label: &'static str,
nvidia_info: Option<Vec<NvidiaSmiInfo>>,
xpu_info: Option<Vec<XpuSmiInfo>>,
hpu_info: Option<Vec<HpuSmiInfo>>,
system_env: SystemInfo,
}
@ -289,6 +290,60 @@ impl XpuSmiInfo {
}
}
#[derive(Debug, Serialize, Clone)]
struct HpuSmiInfo {
name: String,
pci_bus_id: String,
driver_version: String,
temperature: String,
utilization: String,
memory_total: String,
memory_free: String,
memory_used: String,
power_draw_instant: String,
}
impl HpuSmiInfo {
fn new() -> Option<Vec<HpuSmiInfo>> {
let output = Command::new("hl-smi")
.args([
"--query-aip=name,bus_id,driver_version,temperature.aip,utilization.aip,memory.total,memory.free,memory.used,power.draw",
"--format=csv"
])
.output()
.ok()?;
if !output.status.success() {
return None;
}
let stdout = String::from_utf8(output.stdout).ok()?;
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.from_reader(stdout.as_bytes());
let mut infos = Vec::new();
for result in rdr.records() {
let record = result.ok()?;
infos.push(HpuSmiInfo {
name: record[0].to_string(),
pci_bus_id: record[1].to_string(),
driver_version: record[2].to_string(),
temperature: record[3].to_string(),
utilization: record[4].to_string(),
memory_total: record[5].to_string(),
memory_free: record[6].to_string(),
memory_used: record[7].to_string(),
power_draw_instant: record[8].to_string(),
});
}
Some(infos)
}
}
#[derive(Serialize, Debug, Clone)]
pub struct SystemInfo {
cpu_count: usize,
@ -335,10 +390,14 @@ impl Env {
system_env: SystemInfo::new(),
nvidia_info: NvidiaSmiInfo::new(),
xpu_info: XpuSmiInfo::new(),
hpu_info: HpuSmiInfo::new(),
git_sha: option_env!("VERGEN_GIT_SHA").unwrap_or("N/A"),
docker_label: option_env!("DOCKER_LABEL").unwrap_or("N/A"),
}
}
pub fn is_hpu_device(&self) -> bool {
self.hpu_info.is_some()
}
}
pub fn is_container() -> io::Result<bool> {