From 10bec164a981e39157eb7e91bd1afb053854604a Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Sat, 20 Jul 2024 10:09:03 +0200 Subject: [PATCH] increase timeout --- .github/workflows/build.yaml | 1 - server/text_generation_server/utils/dist.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index abe161db..6c968053 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -183,4 +183,3 @@ jobs: export HF_TOKEN=${{ secrets.HF_TOKEN }} echo $DOCKER_IMAGE pytest -s -vv integration-tests ${PYTEST_FLAGS} - diff --git a/server/text_generation_server/utils/dist.py b/server/text_generation_server/utils/dist.py index 36d63e86..82aeba6c 100644 --- a/server/text_generation_server/utils/dist.py +++ b/server/text_generation_server/utils/dist.py @@ -56,7 +56,7 @@ def initialize_torch_distributed(): backend = "nccl" options = ProcessGroupNCCL.Options() options.is_high_priority_stream = True - options._timeout = timedelta(seconds=60) + options._timeout = timedelta(seconds=120) else: backend = "gloo" options = None @@ -76,7 +76,7 @@ def initialize_torch_distributed(): backend="ccl", world_size=WORLD_SIZE, rank=RANK, - timeout=timedelta(seconds=60), + timeout=timedelta(seconds=120), pg_options=options, ) else: @@ -84,7 +84,7 @@ def initialize_torch_distributed(): backend=backend, world_size=WORLD_SIZE, rank=RANK, - timeout=timedelta(seconds=60), + timeout=timedelta(seconds=120), pg_options=options, ) else: