From 284894303a050179ff8d9931828e94048e37bd35 Mon Sep 17 00:00:00 2001 From: Felix Marty <9808326+fxmarty@users.noreply.github.com> Date: Fri, 21 Jun 2024 12:31:08 +0000 Subject: [PATCH] remove require_backend decorators on handles, for some reasons fails in github actions --- .github/workflows/build.yaml | 4 ++-- integration-tests/models/test_bloom_560m.py | 4 +--- integration-tests/models/test_flash_awq_sharded.py | 4 +--- integration-tests/models/test_flash_gemma.py | 4 +--- integration-tests/models/test_flash_gemma_gptq.py | 4 +--- integration-tests/models/test_flash_llama_exl2.py | 2 -- integration-tests/models/test_flash_pali_gemma.py | 4 +--- integration-tests/models/test_flash_phi.py | 4 +--- integration-tests/models/test_mamba.py | 2 -- 9 files changed, 8 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 63f245ae..9d9ebd07 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -191,7 +191,7 @@ jobs: pwd echo "ls:" ls - python integration-tests/clean_cache_and_download.py --token ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + python integration-tests/clean_cache_and_download.py --token ${{ secrets.HF_TOKEN }} fi @@ -239,7 +239,7 @@ jobs: - name: Run tests run: | export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }} - export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} + export HUGGING_FACE_HUB_TOKEN=${{ secrets.HF_TOKEN }} export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }} echo "DOCKER_IMAGE:" diff --git a/integration-tests/models/test_bloom_560m.py b/integration-tests/models/test_bloom_560m.py index f0174730..9d508f01 100644 --- a/integration-tests/models/test_bloom_560m.py +++ b/integration-tests/models/test_bloom_560m.py @@ -1,17 +1,15 @@ import pytest -from testing_utils import require_backend_async, require_backend +from testing_utils import require_backend_async @pytest.fixture(scope="module") -@require_backend("cuda") def bloom_560_handle(launcher): with launcher("bigscience/bloom-560m") as handle: yield handle @pytest.fixture(scope="module") -@require_backend_async("cuda") async def bloom_560(bloom_560_handle): await bloom_560_handle.health(240) return bloom_560_handle.client diff --git a/integration-tests/models/test_flash_awq_sharded.py b/integration-tests/models/test_flash_awq_sharded.py index d7104ff1..a76121e6 100644 --- a/integration-tests/models/test_flash_awq_sharded.py +++ b/integration-tests/models/test_flash_awq_sharded.py @@ -1,10 +1,9 @@ import pytest -from testing_utils import SYSTEM, is_flaky_async, require_backend_async, require_backend +from testing_utils import SYSTEM, is_flaky_async, require_backend_async @pytest.fixture(scope="module") -@require_backend("cuda", "rocm") def flash_llama_awq_handle_sharded(launcher): if SYSTEM == "rocm": # On ROCm, for awq checkpoints, we need to use gptq kernel that supports ROCm. @@ -21,7 +20,6 @@ def flash_llama_awq_handle_sharded(launcher): @pytest.fixture(scope="module") -@require_backend_async("cuda", "rocm") async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded): await flash_llama_awq_handle_sharded.health(300) return flash_llama_awq_handle_sharded.client diff --git a/integration-tests/models/test_flash_gemma.py b/integration-tests/models/test_flash_gemma.py index f6888efe..fdab66d8 100644 --- a/integration-tests/models/test_flash_gemma.py +++ b/integration-tests/models/test_flash_gemma.py @@ -1,19 +1,17 @@ import pytest -from testing_utils import require_backend_async, require_backend +from testing_utils import require_backend_async # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256). @pytest.fixture(scope="module") -@require_backend("cuda", "xpu") def flash_gemma_handle(launcher): with launcher("google/gemma-2b", num_shard=1) as handle: yield handle @pytest.fixture(scope="module") -@require_backend_async("cuda", "xpu") async def flash_gemma(flash_gemma_handle): await flash_gemma_handle.health(300) return flash_gemma_handle.client diff --git a/integration-tests/models/test_flash_gemma_gptq.py b/integration-tests/models/test_flash_gemma_gptq.py index 8dc674b6..ba39fe46 100644 --- a/integration-tests/models/test_flash_gemma_gptq.py +++ b/integration-tests/models/test_flash_gemma_gptq.py @@ -1,19 +1,17 @@ import pytest -from testing_utils import require_backend_async, require_backend +from testing_utils import require_backend_async # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256). @pytest.fixture(scope="module") -@require_backend("cuda", "xpu") def flash_gemma_gptq_handle(launcher): with launcher("TechxGenus/gemma-2b-GPTQ", num_shard=1, quantize="gptq") as handle: yield handle @pytest.fixture(scope="module") -@require_backend_async("cuda", "xpu") async def flash_gemma_gptq(flash_gemma_gptq_handle): await flash_gemma_gptq_handle.health(300) return flash_gemma_gptq_handle.client diff --git a/integration-tests/models/test_flash_llama_exl2.py b/integration-tests/models/test_flash_llama_exl2.py index 2db40257..9d625977 100644 --- a/integration-tests/models/test_flash_llama_exl2.py +++ b/integration-tests/models/test_flash_llama_exl2.py @@ -3,7 +3,6 @@ from testing_utils import require_backend_async, require_backend @pytest.fixture(scope="module") -@require_backend("cuda") def flash_llama_exl2_handle(launcher): with launcher( "turboderp/Llama-3-8B-Instruct-exl2", @@ -18,7 +17,6 @@ def flash_llama_exl2_handle(launcher): @pytest.fixture(scope="module") -@require_backend_async("cuda") async def flash_llama_exl2(flash_llama_exl2_handle): await flash_llama_exl2_handle.health(300) return flash_llama_exl2_handle.client diff --git a/integration-tests/models/test_flash_pali_gemma.py b/integration-tests/models/test_flash_pali_gemma.py index 00c12821..9c080e7e 100644 --- a/integration-tests/models/test_flash_pali_gemma.py +++ b/integration-tests/models/test_flash_pali_gemma.py @@ -3,13 +3,12 @@ import requests import io import base64 -from testing_utils import require_backend_async, require_backend +from testing_utils import require_backend_async # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256). @pytest.fixture(scope="module") -@require_backend("cuda", "xpu") def flash_pali_gemma_handle(launcher): with launcher( "google/paligemma-3b-pt-224", @@ -22,7 +21,6 @@ def flash_pali_gemma_handle(launcher): @pytest.fixture(scope="module") -@require_backend_async("cuda", "xpu") async def flash_pali_gemma(flash_pali_gemma_handle): await flash_pali_gemma_handle.health(300) return flash_pali_gemma_handle.client diff --git a/integration-tests/models/test_flash_phi.py b/integration-tests/models/test_flash_phi.py index 9d0abfb3..eee908d4 100644 --- a/integration-tests/models/test_flash_phi.py +++ b/integration-tests/models/test_flash_phi.py @@ -1,19 +1,17 @@ import pytest -from testing_utils import require_backend_async, require_backend +from testing_utils import require_backend_async # These tests do not pass on ROCm, with different generations. @pytest.fixture(scope="module") -@require_backend("cuda") def flash_phi_handle(launcher): with launcher("microsoft/phi-2", num_shard=1) as handle: yield handle @pytest.fixture(scope="module") -@require_backend_async("cuda") async def flash_phi(flash_phi_handle): await flash_phi_handle.health(300) return flash_phi_handle.client diff --git a/integration-tests/models/test_mamba.py b/integration-tests/models/test_mamba.py index 0f939705..bdb29f54 100644 --- a/integration-tests/models/test_mamba.py +++ b/integration-tests/models/test_mamba.py @@ -4,14 +4,12 @@ from testing_utils import require_backend_async, require_backend @pytest.fixture(scope="module") -@require_backend("cuda") def fused_kernel_mamba_handle(launcher): with launcher("state-spaces/mamba-130m", num_shard=1) as handle: yield handle @pytest.fixture(scope="module") -@require_backend_async("cuda") async def fused_kernel_mamba(fused_kernel_mamba_handle): await fused_kernel_mamba_handle.health(300) return fused_kernel_mamba_handle.client