2023-05-02 13:43:19 +00:00
|
|
|
use std::fmt;
|
|
|
|
use std::process::Command;
|
|
|
|
|
|
|
|
pub(crate) struct Env {
|
|
|
|
cargo_target: &'static str,
|
|
|
|
cargo_version: &'static str,
|
|
|
|
git_sha: &'static str,
|
|
|
|
docker_label: &'static str,
|
|
|
|
nvidia_env: String,
|
2024-04-26 13:48:58 +00:00
|
|
|
xpu_env: String,
|
2025-02-28 11:14:58 +00:00
|
|
|
hpu_env: String,
|
2023-05-02 13:43:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Env {
|
|
|
|
pub fn new() -> Self {
|
|
|
|
let nvidia_env = nvidia_smi();
|
2024-04-26 13:48:58 +00:00
|
|
|
let xpu_env = xpu_smi();
|
2025-02-28 11:14:58 +00:00
|
|
|
let hpu_env = hl_smi();
|
2023-05-02 13:43:19 +00:00
|
|
|
|
|
|
|
Self {
|
|
|
|
nvidia_env: nvidia_env.unwrap_or("N/A".to_string()),
|
2024-04-26 13:48:58 +00:00
|
|
|
xpu_env: xpu_env.unwrap_or("N/A".to_string()),
|
2025-02-28 11:14:58 +00:00
|
|
|
hpu_env: hpu_env.unwrap_or("N/A".to_string()),
|
2023-05-02 13:43:19 +00:00
|
|
|
cargo_target: env!("VERGEN_CARGO_TARGET_TRIPLE"),
|
|
|
|
cargo_version: env!("VERGEN_RUSTC_SEMVER"),
|
|
|
|
git_sha: option_env!("VERGEN_GIT_SHA").unwrap_or("N/A"),
|
|
|
|
docker_label: option_env!("DOCKER_LABEL").unwrap_or("N/A"),
|
|
|
|
}
|
|
|
|
}
|
2025-02-28 11:14:58 +00:00
|
|
|
|
Gaudi: clean cuda/rocm code in hpu backend, enable flat_hpu (#3113)
* clean cuda/rocm code in hpu backend, enable flat_hpu
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* fix TP in pageattn
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* adjust block table in hpu to improve performance
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* enable all the model. not testet yet
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* use tensor cache in hpu graph to avoid replay issue
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* add moe support, fix qwen/mistral/mixtral crash
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* fix phimoe issue
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* gpt_bigcode could also go pageattn
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* enable dbrx remove some unused code
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* multi-modality initial PR
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* adjust warmup and enable vlm
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* fix incorrect output in qwen2 idefics if hpu graph is used
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* remove unused quantization code and enable awq/gptq int4
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* fix gptq issue
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* enable fp8
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* warmup prefill
remove model where pageattn is not used, set block table to None since it's not used
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* add warmup_decode
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* warmup decode
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* remove block_tables and prefill_cache_indices which will lead to dynamic shape
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* fix comment
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* missing gptj change...
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* fix some issue
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* remove torch.where to fix incorrect output in hpu graph model
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
* match the latest vllm_extension ops
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
---------
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
2025-04-14 13:58:13 +00:00
|
|
|
pub fn should_start_a_single_hpu_shard(&self) -> bool {
|
|
|
|
self.hpu_env != "N/A" && std::env::var("ATTENTION").as_deref() != Ok("paged")
|
2025-02-28 11:14:58 +00:00
|
|
|
}
|
2023-05-02 13:43:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl fmt::Display for Env {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
writeln!(f, "Runtime environment:")?;
|
|
|
|
|
|
|
|
writeln!(f, "Target: {}", self.cargo_target)?;
|
|
|
|
writeln!(f, "Cargo version: {}", self.cargo_version)?;
|
|
|
|
writeln!(f, "Commit sha: {}", self.git_sha)?;
|
|
|
|
writeln!(f, "Docker label: {}", self.docker_label)?;
|
2024-04-26 13:48:58 +00:00
|
|
|
writeln!(f, "nvidia-smi:\n{}", self.nvidia_env)?;
|
2025-02-28 11:14:58 +00:00
|
|
|
writeln!(f, "xpu-smi:\n{}", self.xpu_env)?;
|
|
|
|
writeln!(f, "hpu-smi:\n{}", self.hpu_env)?;
|
2023-05-02 13:43:19 +00:00
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn nvidia_smi() -> Option<String> {
|
|
|
|
let output = Command::new("nvidia-smi").output().ok()?;
|
|
|
|
let nvidia_smi = String::from_utf8(output.stdout).ok()?;
|
|
|
|
let output = nvidia_smi.replace('\n', "\n ");
|
|
|
|
Some(output.trim().to_string())
|
|
|
|
}
|
2024-04-26 13:48:58 +00:00
|
|
|
|
|
|
|
fn xpu_smi() -> Option<String> {
|
|
|
|
let output = Command::new("xpu-smi").arg("discovery").output().ok()?;
|
|
|
|
let xpu_smi = String::from_utf8(output.stdout).ok()?;
|
|
|
|
let output = xpu_smi.replace('\n', "\n ");
|
|
|
|
Some(output.trim().to_string())
|
|
|
|
}
|
2025-02-28 11:14:58 +00:00
|
|
|
|
|
|
|
fn hl_smi() -> Option<String> {
|
|
|
|
let output = Command::new("hl-smi").output().ok()?;
|
|
|
|
let hl_smi = String::from_utf8(output.stdout).ok()?;
|
|
|
|
let output = hl_smi.replace('\n', "\n ");
|
|
|
|
Some(output.trim().to_string())
|
|
|
|
}
|