mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-27 21:12:07 +00:00
adjust block table in hpu to improve performance
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
b7fea6fc2f
commit
5d3653943c
@ -2,7 +2,7 @@ use std::sync::Arc;
|
|||||||
use tokio::sync::{mpsc, oneshot};
|
use tokio::sync::{mpsc, oneshot};
|
||||||
|
|
||||||
use crate::radix::RadixAllocator;
|
use crate::radix::RadixAllocator;
|
||||||
|
use text_generation_router::usage_stats::Env;
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct BlockAllocation {
|
pub struct BlockAllocation {
|
||||||
pub allocation_id: u64,
|
pub allocation_id: u64,
|
||||||
@ -141,6 +141,7 @@ pub struct SimpleAllocator {
|
|||||||
free_blocks: Vec<u32>,
|
free_blocks: Vec<u32>,
|
||||||
block_size: u32,
|
block_size: u32,
|
||||||
window_size: Option<u32>,
|
window_size: Option<u32>,
|
||||||
|
is_hpu_device: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SimpleAllocator {
|
impl SimpleAllocator {
|
||||||
@ -150,6 +151,7 @@ impl SimpleAllocator {
|
|||||||
// Block 0 is reserved for health checks
|
// Block 0 is reserved for health checks
|
||||||
free_blocks: (1..blocks).collect(),
|
free_blocks: (1..blocks).collect(),
|
||||||
window_size,
|
window_size,
|
||||||
|
is_hpu_device: Env::new().is_hpu_device(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -179,9 +181,15 @@ impl Allocator for SimpleAllocator {
|
|||||||
if required_blocks > self.free_blocks.len() as u32 {
|
if required_blocks > self.free_blocks.len() as u32 {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
let blocks = self
|
if self.is_hpu_device {
|
||||||
|
self.free_blocks.sort_by(|a, b| b.cmp(a));
|
||||||
|
}
|
||||||
|
let mut blocks = self
|
||||||
.free_blocks
|
.free_blocks
|
||||||
.split_off(self.free_blocks.len() - required_blocks as usize);
|
.split_off(self.free_blocks.len() - required_blocks as usize);
|
||||||
|
if self.is_hpu_device {
|
||||||
|
blocks.sort();
|
||||||
|
}
|
||||||
let mut slots =
|
let mut slots =
|
||||||
Vec::with_capacity((required_blocks * self.block_size * repeats as u32) as usize);
|
Vec::with_capacity((required_blocks * self.block_size * repeats as u32) as usize);
|
||||||
|
|
||||||
|
@ -157,6 +157,7 @@ pub struct Env {
|
|||||||
docker_label: &'static str,
|
docker_label: &'static str,
|
||||||
nvidia_info: Option<Vec<NvidiaSmiInfo>>,
|
nvidia_info: Option<Vec<NvidiaSmiInfo>>,
|
||||||
xpu_info: Option<Vec<XpuSmiInfo>>,
|
xpu_info: Option<Vec<XpuSmiInfo>>,
|
||||||
|
hpu_info: Option<Vec<HpuSmiInfo>>,
|
||||||
system_env: SystemInfo,
|
system_env: SystemInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -289,6 +290,60 @@ impl XpuSmiInfo {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Clone)]
|
||||||
|
struct HpuSmiInfo {
|
||||||
|
name: String,
|
||||||
|
pci_bus_id: String,
|
||||||
|
driver_version: String,
|
||||||
|
temperature: String,
|
||||||
|
utilization: String,
|
||||||
|
memory_total: String,
|
||||||
|
memory_free: String,
|
||||||
|
memory_used: String,
|
||||||
|
power_draw_instant: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HpuSmiInfo {
|
||||||
|
fn new() -> Option<Vec<HpuSmiInfo>> {
|
||||||
|
let output = Command::new("hl-smi")
|
||||||
|
.args([
|
||||||
|
"--query-aip=name,bus_id,driver_version,temperature.aip,utilization.aip,memory.total,memory.free,memory.used,power.draw",
|
||||||
|
"--format=csv"
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.ok()?;
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let stdout = String::from_utf8(output.stdout).ok()?;
|
||||||
|
|
||||||
|
let mut rdr = ReaderBuilder::new()
|
||||||
|
.has_headers(true)
|
||||||
|
.from_reader(stdout.as_bytes());
|
||||||
|
|
||||||
|
let mut infos = Vec::new();
|
||||||
|
|
||||||
|
for result in rdr.records() {
|
||||||
|
let record = result.ok()?;
|
||||||
|
infos.push(HpuSmiInfo {
|
||||||
|
name: record[0].to_string(),
|
||||||
|
pci_bus_id: record[1].to_string(),
|
||||||
|
driver_version: record[2].to_string(),
|
||||||
|
temperature: record[3].to_string(),
|
||||||
|
utilization: record[4].to_string(),
|
||||||
|
memory_total: record[5].to_string(),
|
||||||
|
memory_free: record[6].to_string(),
|
||||||
|
memory_used: record[7].to_string(),
|
||||||
|
power_draw_instant: record[8].to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(infos)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone)]
|
#[derive(Serialize, Debug, Clone)]
|
||||||
pub struct SystemInfo {
|
pub struct SystemInfo {
|
||||||
cpu_count: usize,
|
cpu_count: usize,
|
||||||
@ -335,10 +390,14 @@ impl Env {
|
|||||||
system_env: SystemInfo::new(),
|
system_env: SystemInfo::new(),
|
||||||
nvidia_info: NvidiaSmiInfo::new(),
|
nvidia_info: NvidiaSmiInfo::new(),
|
||||||
xpu_info: XpuSmiInfo::new(),
|
xpu_info: XpuSmiInfo::new(),
|
||||||
|
hpu_info: HpuSmiInfo::new(),
|
||||||
git_sha: option_env!("VERGEN_GIT_SHA").unwrap_or("N/A"),
|
git_sha: option_env!("VERGEN_GIT_SHA").unwrap_or("N/A"),
|
||||||
docker_label: option_env!("DOCKER_LABEL").unwrap_or("N/A"),
|
docker_label: option_env!("DOCKER_LABEL").unwrap_or("N/A"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pub fn is_hpu_device(&self) -> bool {
|
||||||
|
self.hpu_info.is_some()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_container() -> io::Result<bool> {
|
pub fn is_container() -> io::Result<bool> {
|
||||||
|
Loading…
Reference in New Issue
Block a user