mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Remove the workaround for HPU distributed.
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
e202b5f98f
commit
5f26a72876
@ -27,10 +27,6 @@ impl Env {
|
|||||||
docker_label: option_env!("DOCKER_LABEL").unwrap_or("N/A"),
|
docker_label: option_env!("DOCKER_LABEL").unwrap_or("N/A"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn should_start_a_single_hpu_shard(&self) -> bool {
|
|
||||||
self.hpu_env != "N/A" && std::env::var("ATTENTION").as_deref() != Ok("paged")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Env {
|
impl fmt::Display for Env {
|
||||||
|
@ -1590,11 +1590,6 @@ fn spawn_shards(
|
|||||||
) -> Result<(), LauncherError> {
|
) -> Result<(), LauncherError> {
|
||||||
// Start shard processes
|
// Start shard processes
|
||||||
for rank in 0..num_shard {
|
for rank in 0..num_shard {
|
||||||
if rank != 0 && env_runtime::Env::new().should_start_a_single_hpu_shard() {
|
|
||||||
tracing::info!("Running on HPU, the launcher will not do any sharding as actual sharding is done in the server");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
let model_id = args.model_id.clone();
|
let model_id = args.model_id.clone();
|
||||||
let revision = args.revision.clone();
|
let revision = args.revision.clone();
|
||||||
let uds_path = args.shard_uds_path.clone();
|
let uds_path = args.shard_uds_path.clone();
|
||||||
@ -1670,10 +1665,6 @@ fn spawn_shards(
|
|||||||
if shard_ready == num_shard {
|
if shard_ready == num_shard {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if env_runtime::Env::new().should_start_a_single_hpu_shard() {
|
|
||||||
tracing::info!("HPU detected, shard is ready");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Err(TryRecvError::Empty) => {
|
Err(TryRecvError::Empty) => {
|
||||||
sleep(Duration::from_millis(100));
|
sleep(Duration::from_millis(100));
|
||||||
|
Loading…
Reference in New Issue
Block a user