mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-26 12:32:10 +00:00
adjust block table in hpu to improve performance
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
b7fea6fc2f
commit
5d3653943c
@ -2,7 +2,7 @@ use std::sync::Arc;
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
|
||||
use crate::radix::RadixAllocator;
|
||||
|
||||
use text_generation_router::usage_stats::Env;
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BlockAllocation {
|
||||
pub allocation_id: u64,
|
||||
@ -141,6 +141,7 @@ pub struct SimpleAllocator {
|
||||
free_blocks: Vec<u32>,
|
||||
block_size: u32,
|
||||
window_size: Option<u32>,
|
||||
is_hpu_device: bool,
|
||||
}
|
||||
|
||||
impl SimpleAllocator {
|
||||
@ -150,6 +151,7 @@ impl SimpleAllocator {
|
||||
// Block 0 is reserved for health checks
|
||||
free_blocks: (1..blocks).collect(),
|
||||
window_size,
|
||||
is_hpu_device: Env::new().is_hpu_device(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -179,9 +181,15 @@ impl Allocator for SimpleAllocator {
|
||||
if required_blocks > self.free_blocks.len() as u32 {
|
||||
None
|
||||
} else {
|
||||
let blocks = self
|
||||
if self.is_hpu_device {
|
||||
self.free_blocks.sort_by(|a, b| b.cmp(a));
|
||||
}
|
||||
let mut blocks = self
|
||||
.free_blocks
|
||||
.split_off(self.free_blocks.len() - required_blocks as usize);
|
||||
if self.is_hpu_device {
|
||||
blocks.sort();
|
||||
}
|
||||
let mut slots =
|
||||
Vec::with_capacity((required_blocks * self.block_size * repeats as u32) as usize);
|
||||
|
||||
|
@ -157,6 +157,7 @@ pub struct Env {
|
||||
docker_label: &'static str,
|
||||
nvidia_info: Option<Vec<NvidiaSmiInfo>>,
|
||||
xpu_info: Option<Vec<XpuSmiInfo>>,
|
||||
hpu_info: Option<Vec<HpuSmiInfo>>,
|
||||
system_env: SystemInfo,
|
||||
}
|
||||
|
||||
@ -289,6 +290,60 @@ impl XpuSmiInfo {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
struct HpuSmiInfo {
|
||||
name: String,
|
||||
pci_bus_id: String,
|
||||
driver_version: String,
|
||||
temperature: String,
|
||||
utilization: String,
|
||||
memory_total: String,
|
||||
memory_free: String,
|
||||
memory_used: String,
|
||||
power_draw_instant: String,
|
||||
}
|
||||
|
||||
impl HpuSmiInfo {
|
||||
fn new() -> Option<Vec<HpuSmiInfo>> {
|
||||
let output = Command::new("hl-smi")
|
||||
.args([
|
||||
"--query-aip=name,bus_id,driver_version,temperature.aip,utilization.aip,memory.total,memory.free,memory.used,power.draw",
|
||||
"--format=csv"
|
||||
])
|
||||
.output()
|
||||
.ok()?;
|
||||
|
||||
if !output.status.success() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8(output.stdout).ok()?;
|
||||
|
||||
let mut rdr = ReaderBuilder::new()
|
||||
.has_headers(true)
|
||||
.from_reader(stdout.as_bytes());
|
||||
|
||||
let mut infos = Vec::new();
|
||||
|
||||
for result in rdr.records() {
|
||||
let record = result.ok()?;
|
||||
infos.push(HpuSmiInfo {
|
||||
name: record[0].to_string(),
|
||||
pci_bus_id: record[1].to_string(),
|
||||
driver_version: record[2].to_string(),
|
||||
temperature: record[3].to_string(),
|
||||
utilization: record[4].to_string(),
|
||||
memory_total: record[5].to_string(),
|
||||
memory_free: record[6].to_string(),
|
||||
memory_used: record[7].to_string(),
|
||||
power_draw_instant: record[8].to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Some(infos)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone)]
|
||||
pub struct SystemInfo {
|
||||
cpu_count: usize,
|
||||
@ -335,10 +390,14 @@ impl Env {
|
||||
system_env: SystemInfo::new(),
|
||||
nvidia_info: NvidiaSmiInfo::new(),
|
||||
xpu_info: XpuSmiInfo::new(),
|
||||
hpu_info: HpuSmiInfo::new(),
|
||||
git_sha: option_env!("VERGEN_GIT_SHA").unwrap_or("N/A"),
|
||||
docker_label: option_env!("DOCKER_LABEL").unwrap_or("N/A"),
|
||||
}
|
||||
}
|
||||
pub fn is_hpu_device(&self) -> bool {
|
||||
self.hpu_info.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_container() -> io::Result<bool> {
|
||||
|
Loading…
Reference in New Issue
Block a user