diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index abe161db..6c968053 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -183,4 +183,3 @@ jobs: export HF_TOKEN=${{ secrets.HF_TOKEN }} echo $DOCKER_IMAGE pytest -s -vv integration-tests ${PYTEST_FLAGS} - diff --git a/server/text_generation_server/utils/dist.py b/server/text_generation_server/utils/dist.py index 36d63e86..82aeba6c 100644 --- a/server/text_generation_server/utils/dist.py +++ b/server/text_generation_server/utils/dist.py @@ -56,7 +56,7 @@ def initialize_torch_distributed(): backend = "nccl" options = ProcessGroupNCCL.Options() options.is_high_priority_stream = True - options._timeout = timedelta(seconds=60) + options._timeout = timedelta(seconds=120) else: backend = "gloo" options = None @@ -76,7 +76,7 @@ def initialize_torch_distributed(): backend="ccl", world_size=WORLD_SIZE, rank=RANK, - timeout=timedelta(seconds=60), + timeout=timedelta(seconds=120), pg_options=options, ) else: @@ -84,7 +84,7 @@ def initialize_torch_distributed(): backend=backend, world_size=WORLD_SIZE, rank=RANK, - timeout=timedelta(seconds=60), + timeout=timedelta(seconds=120), pg_options=options, ) else: