mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
fix: use TORCH_NCCL_AVOID_RECORD_STREAMS=1
This commit is contained in:
parent
532146338b
commit
1d929a243a
@ -489,6 +489,7 @@ fn shard_manager(
|
||||
envs.push(("MASTER_ADDR".into(), master_addr.into()));
|
||||
envs.push(("MASTER_PORT".into(), master_port.to_string().into()));
|
||||
envs.push(("NCCL_ASYNC_ERROR_HANDLING".into(), "1".into()));
|
||||
envs.push(("TORCH_NCCL_AVOID_RECORD_STREAMS".into(), "1".into()))
|
||||
|
||||
// CUDA memory fraction
|
||||
envs.push((
|
||||
|
Loading…
Reference in New Issue
Block a user