mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 22:32:07 +00:00
fix: use TORCH_NCCL_AVOID_RECORD_STREAMS=1
This commit is contained in:
parent
91d7267534
commit
65db02f192
@ -477,6 +477,7 @@ fn shard_manager(
|
||||
envs.push(("MASTER_ADDR".into(), master_addr.into()));
|
||||
envs.push(("MASTER_PORT".into(), master_port.to_string().into()));
|
||||
envs.push(("NCCL_ASYNC_ERROR_HANDLING".into(), "1".into()));
|
||||
envs.push(("TORCH_NCCL_AVOID_RECORD_STREAMS".into(), "1".into()))
|
||||
|
||||
// CUDA memory fraction
|
||||
envs.push((
|
||||
|
Loading…
Reference in New Issue
Block a user