remove require_backend decorators on handles, for some reasons fails in github actions

2025-09-10 20:04:52 +00:00 · 2024-06-21 12:31:08 +00:00 · 2024-06-21 12:31:08 +00:00 · 284894303a
commit 284894303a
parent 7e0f4f25c7
9 changed files with 8 additions and 24 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -191,7 +191,7 @@ jobs:
            pwd
            echo "ls:"
            ls
-            python integration-tests/clean_cache_and_download.py --token ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+            python integration-tests/clean_cache_and_download.py --token ${{ secrets.HF_TOKEN }}
          fi


@ -239,7 +239,7 @@ jobs:
      - name: Run tests
        run: |
          export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
-          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HF_TOKEN }}

          export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
          echo "DOCKER_IMAGE:"
--- a/integration-tests/models/test_bloom_560m.py
+++ b/integration-tests/models/test_bloom_560m.py
@ -1,17 +1,15 @@
 import pytest

-from testing_utils import require_backend_async, require_backend
+from testing_utils import require_backend_async


@pytest.fixture(scope="module")
-@require_backend("cuda")
 def bloom_560_handle(launcher):
    with launcher("bigscience/bloom-560m") as handle:
        yield handle


@pytest.fixture(scope="module")
-@require_backend_async("cuda")
 async def bloom_560(bloom_560_handle):
    await bloom_560_handle.health(240)
    return bloom_560_handle.client
--- a/integration-tests/models/test_flash_awq_sharded.py
+++ b/integration-tests/models/test_flash_awq_sharded.py
@ -1,10 +1,9 @@
 import pytest

-from testing_utils import SYSTEM, is_flaky_async, require_backend_async, require_backend
+from testing_utils import SYSTEM, is_flaky_async, require_backend_async


@pytest.fixture(scope="module")
-@require_backend("cuda", "rocm")
 def flash_llama_awq_handle_sharded(launcher):
    if SYSTEM == "rocm":
        # On ROCm, for awq checkpoints, we need to use gptq kernel that supports ROCm.
@ -21,7 +20,6 @@ def flash_llama_awq_handle_sharded(launcher):


@pytest.fixture(scope="module")
-@require_backend_async("cuda", "rocm")
 async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
    await flash_llama_awq_handle_sharded.health(300)
    return flash_llama_awq_handle_sharded.client
--- a/integration-tests/models/test_flash_gemma.py
+++ b/integration-tests/models/test_flash_gemma.py
@ -1,19 +1,17 @@
 import pytest

-from testing_utils import require_backend_async, require_backend
+from testing_utils import require_backend_async

 # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).


@pytest.fixture(scope="module")
-@require_backend("cuda", "xpu")
 def flash_gemma_handle(launcher):
    with launcher("google/gemma-2b", num_shard=1) as handle:
        yield handle


@pytest.fixture(scope="module")
-@require_backend_async("cuda", "xpu")
 async def flash_gemma(flash_gemma_handle):
    await flash_gemma_handle.health(300)
    return flash_gemma_handle.client
--- a/integration-tests/models/test_flash_gemma_gptq.py
+++ b/integration-tests/models/test_flash_gemma_gptq.py
@ -1,19 +1,17 @@
 import pytest

-from testing_utils import require_backend_async, require_backend
+from testing_utils import require_backend_async

 # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).


@pytest.fixture(scope="module")
-@require_backend("cuda", "xpu")
 def flash_gemma_gptq_handle(launcher):
    with launcher("TechxGenus/gemma-2b-GPTQ", num_shard=1, quantize="gptq") as handle:
        yield handle


@pytest.fixture(scope="module")
-@require_backend_async("cuda", "xpu")
 async def flash_gemma_gptq(flash_gemma_gptq_handle):
    await flash_gemma_gptq_handle.health(300)
    return flash_gemma_gptq_handle.client
--- a/integration-tests/models/test_flash_llama_exl2.py
+++ b/integration-tests/models/test_flash_llama_exl2.py
@ -3,7 +3,6 @@ from testing_utils import require_backend_async, require_backend


@pytest.fixture(scope="module")
-@require_backend("cuda")
 def flash_llama_exl2_handle(launcher):
    with launcher(
        "turboderp/Llama-3-8B-Instruct-exl2",
@ -18,7 +17,6 @@ def flash_llama_exl2_handle(launcher):


@pytest.fixture(scope="module")
-@require_backend_async("cuda")
 async def flash_llama_exl2(flash_llama_exl2_handle):
    await flash_llama_exl2_handle.health(300)
    return flash_llama_exl2_handle.client
--- a/integration-tests/models/test_flash_pali_gemma.py
+++ b/integration-tests/models/test_flash_pali_gemma.py
@ -3,13 +3,12 @@ import requests
 import io
 import base64

-from testing_utils import require_backend_async, require_backend
+from testing_utils import require_backend_async

 # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).


@pytest.fixture(scope="module")
-@require_backend("cuda", "xpu")
 def flash_pali_gemma_handle(launcher):
    with launcher(
        "google/paligemma-3b-pt-224",
@ -22,7 +21,6 @@ def flash_pali_gemma_handle(launcher):


@pytest.fixture(scope="module")
-@require_backend_async("cuda", "xpu")
 async def flash_pali_gemma(flash_pali_gemma_handle):
    await flash_pali_gemma_handle.health(300)
    return flash_pali_gemma_handle.client
--- a/integration-tests/models/test_flash_phi.py
+++ b/integration-tests/models/test_flash_phi.py
@ -1,19 +1,17 @@
 import pytest

-from testing_utils import require_backend_async, require_backend
+from testing_utils import require_backend_async

 # These tests do not pass on ROCm, with different generations.


@pytest.fixture(scope="module")
-@require_backend("cuda")
 def flash_phi_handle(launcher):
    with launcher("microsoft/phi-2", num_shard=1) as handle:
        yield handle


@pytest.fixture(scope="module")
-@require_backend_async("cuda")
 async def flash_phi(flash_phi_handle):
    await flash_phi_handle.health(300)
    return flash_phi_handle.client
--- a/integration-tests/models/test_mamba.py
+++ b/integration-tests/models/test_mamba.py
@ -4,14 +4,12 @@ from testing_utils import require_backend_async, require_backend


@pytest.fixture(scope="module")
-@require_backend("cuda")
 def fused_kernel_mamba_handle(launcher):
    with launcher("state-spaces/mamba-130m", num_shard=1) as handle:
        yield handle


@pytest.fixture(scope="module")
-@require_backend_async("cuda")
 async def fused_kernel_mamba(fused_kernel_mamba_handle):
    await fused_kernel_mamba_handle.health(300)
    return fused_kernel_mamba_handle.client