From 284894303a050179ff8d9931828e94048e37bd35 Mon Sep 17 00:00:00 2001
From: Felix Marty <9808326+fxmarty@users.noreply.github.com>
Date: Fri, 21 Jun 2024 12:31:08 +0000
Subject: [PATCH] remove require_backend decorators on handles, for some
 reasons fails in github actions

---
 .github/workflows/build.yaml                       | 4 ++--
 integration-tests/models/test_bloom_560m.py        | 4 +---
 integration-tests/models/test_flash_awq_sharded.py | 4 +---
 integration-tests/models/test_flash_gemma.py       | 4 +---
 integration-tests/models/test_flash_gemma_gptq.py  | 4 +---
 integration-tests/models/test_flash_llama_exl2.py  | 2 --
 integration-tests/models/test_flash_pali_gemma.py  | 4 +---
 integration-tests/models/test_flash_phi.py         | 4 +---
 integration-tests/models/test_mamba.py             | 2 --
 9 files changed, 8 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 63f245ae..9d9ebd07 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -191,7 +191,7 @@ jobs:
             pwd
             echo "ls:"
             ls
-            python integration-tests/clean_cache_and_download.py --token ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+            python integration-tests/clean_cache_and_download.py --token ${{ secrets.HF_TOKEN }}
           fi
 
 
@@ -239,7 +239,7 @@ jobs:
       - name: Run tests
         run: |
           export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
-          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HF_TOKEN }}
 
           export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
           echo "DOCKER_IMAGE:"
diff --git a/integration-tests/models/test_bloom_560m.py b/integration-tests/models/test_bloom_560m.py
index f0174730..9d508f01 100644
--- a/integration-tests/models/test_bloom_560m.py
+++ b/integration-tests/models/test_bloom_560m.py
@@ -1,17 +1,15 @@
 import pytest
 
-from testing_utils import require_backend_async, require_backend
+from testing_utils import require_backend_async
 
 
 @pytest.fixture(scope="module")
-@require_backend("cuda")
 def bloom_560_handle(launcher):
     with launcher("bigscience/bloom-560m") as handle:
         yield handle
 
 
 @pytest.fixture(scope="module")
-@require_backend_async("cuda")
 async def bloom_560(bloom_560_handle):
     await bloom_560_handle.health(240)
     return bloom_560_handle.client
diff --git a/integration-tests/models/test_flash_awq_sharded.py b/integration-tests/models/test_flash_awq_sharded.py
index d7104ff1..a76121e6 100644
--- a/integration-tests/models/test_flash_awq_sharded.py
+++ b/integration-tests/models/test_flash_awq_sharded.py
@@ -1,10 +1,9 @@
 import pytest
 
-from testing_utils import SYSTEM, is_flaky_async, require_backend_async, require_backend
+from testing_utils import SYSTEM, is_flaky_async, require_backend_async
 
 
 @pytest.fixture(scope="module")
-@require_backend("cuda", "rocm")
 def flash_llama_awq_handle_sharded(launcher):
     if SYSTEM == "rocm":
         # On ROCm, for awq checkpoints, we need to use gptq kernel that supports ROCm.
@@ -21,7 +20,6 @@ def flash_llama_awq_handle_sharded(launcher):
 
 
 @pytest.fixture(scope="module")
-@require_backend_async("cuda", "rocm")
 async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
     await flash_llama_awq_handle_sharded.health(300)
     return flash_llama_awq_handle_sharded.client
diff --git a/integration-tests/models/test_flash_gemma.py b/integration-tests/models/test_flash_gemma.py
index f6888efe..fdab66d8 100644
--- a/integration-tests/models/test_flash_gemma.py
+++ b/integration-tests/models/test_flash_gemma.py
@@ -1,19 +1,17 @@
 import pytest
 
-from testing_utils import require_backend_async, require_backend
+from testing_utils import require_backend_async
 
 # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
 
 
 @pytest.fixture(scope="module")
-@require_backend("cuda", "xpu")
 def flash_gemma_handle(launcher):
     with launcher("google/gemma-2b", num_shard=1) as handle:
         yield handle
 
 
 @pytest.fixture(scope="module")
-@require_backend_async("cuda", "xpu")
 async def flash_gemma(flash_gemma_handle):
     await flash_gemma_handle.health(300)
     return flash_gemma_handle.client
diff --git a/integration-tests/models/test_flash_gemma_gptq.py b/integration-tests/models/test_flash_gemma_gptq.py
index 8dc674b6..ba39fe46 100644
--- a/integration-tests/models/test_flash_gemma_gptq.py
+++ b/integration-tests/models/test_flash_gemma_gptq.py
@@ -1,19 +1,17 @@
 import pytest
 
-from testing_utils import require_backend_async, require_backend
+from testing_utils import require_backend_async
 
 # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
 
 
 @pytest.fixture(scope="module")
-@require_backend("cuda", "xpu")
 def flash_gemma_gptq_handle(launcher):
     with launcher("TechxGenus/gemma-2b-GPTQ", num_shard=1, quantize="gptq") as handle:
         yield handle
 
 
 @pytest.fixture(scope="module")
-@require_backend_async("cuda", "xpu")
 async def flash_gemma_gptq(flash_gemma_gptq_handle):
     await flash_gemma_gptq_handle.health(300)
     return flash_gemma_gptq_handle.client
diff --git a/integration-tests/models/test_flash_llama_exl2.py b/integration-tests/models/test_flash_llama_exl2.py
index 2db40257..9d625977 100644
--- a/integration-tests/models/test_flash_llama_exl2.py
+++ b/integration-tests/models/test_flash_llama_exl2.py
@@ -3,7 +3,6 @@ from testing_utils import require_backend_async, require_backend
 
 
 @pytest.fixture(scope="module")
-@require_backend("cuda")
 def flash_llama_exl2_handle(launcher):
     with launcher(
         "turboderp/Llama-3-8B-Instruct-exl2",
@@ -18,7 +17,6 @@ def flash_llama_exl2_handle(launcher):
 
 
 @pytest.fixture(scope="module")
-@require_backend_async("cuda")
 async def flash_llama_exl2(flash_llama_exl2_handle):
     await flash_llama_exl2_handle.health(300)
     return flash_llama_exl2_handle.client
diff --git a/integration-tests/models/test_flash_pali_gemma.py b/integration-tests/models/test_flash_pali_gemma.py
index 00c12821..9c080e7e 100644
--- a/integration-tests/models/test_flash_pali_gemma.py
+++ b/integration-tests/models/test_flash_pali_gemma.py
@@ -3,13 +3,12 @@ import requests
 import io
 import base64
 
-from testing_utils import require_backend_async, require_backend
+from testing_utils import require_backend_async
 
 # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
 
 
 @pytest.fixture(scope="module")
-@require_backend("cuda", "xpu")
 def flash_pali_gemma_handle(launcher):
     with launcher(
         "google/paligemma-3b-pt-224",
@@ -22,7 +21,6 @@ def flash_pali_gemma_handle(launcher):
 
 
 @pytest.fixture(scope="module")
-@require_backend_async("cuda", "xpu")
 async def flash_pali_gemma(flash_pali_gemma_handle):
     await flash_pali_gemma_handle.health(300)
     return flash_pali_gemma_handle.client
diff --git a/integration-tests/models/test_flash_phi.py b/integration-tests/models/test_flash_phi.py
index 9d0abfb3..eee908d4 100644
--- a/integration-tests/models/test_flash_phi.py
+++ b/integration-tests/models/test_flash_phi.py
@@ -1,19 +1,17 @@
 import pytest
 
-from testing_utils import require_backend_async, require_backend
+from testing_utils import require_backend_async
 
 # These tests do not pass on ROCm, with different generations.
 
 
 @pytest.fixture(scope="module")
-@require_backend("cuda")
 def flash_phi_handle(launcher):
     with launcher("microsoft/phi-2", num_shard=1) as handle:
         yield handle
 
 
 @pytest.fixture(scope="module")
-@require_backend_async("cuda")
 async def flash_phi(flash_phi_handle):
     await flash_phi_handle.health(300)
     return flash_phi_handle.client
diff --git a/integration-tests/models/test_mamba.py b/integration-tests/models/test_mamba.py
index 0f939705..bdb29f54 100644
--- a/integration-tests/models/test_mamba.py
+++ b/integration-tests/models/test_mamba.py
@@ -4,14 +4,12 @@ from testing_utils import require_backend_async, require_backend
 
 
 @pytest.fixture(scope="module")
-@require_backend("cuda")
 def fused_kernel_mamba_handle(launcher):
     with launcher("state-spaces/mamba-130m", num_shard=1) as handle:
         yield handle
 
 
 @pytest.fixture(scope="module")
-@require_backend_async("cuda")
 async def fused_kernel_mamba(fused_kernel_mamba_handle):
     await fused_kernel_mamba_handle.health(300)
     return fused_kernel_mamba_handle.client