remove require_backend decorators on handles, for some reasons fails in github actions

This commit is contained in:
Felix Marty 2024-06-21 12:31:08 +00:00 committed by Nicolas Patry
parent 7e0f4f25c7
commit 284894303a
No known key found for this signature in database
GPG Key ID: E939E8CC91A1C674
9 changed files with 8 additions and 24 deletions

View File

@ -191,7 +191,7 @@ jobs:
pwd pwd
echo "ls:" echo "ls:"
ls ls
python integration-tests/clean_cache_and_download.py --token ${{ secrets.HUGGING_FACE_HUB_TOKEN }} python integration-tests/clean_cache_and_download.py --token ${{ secrets.HF_TOKEN }}
fi fi
@ -239,7 +239,7 @@ jobs:
- name: Run tests - name: Run tests
run: | run: |
export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }} export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} export HUGGING_FACE_HUB_TOKEN=${{ secrets.HF_TOKEN }}
export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }} export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
echo "DOCKER_IMAGE:" echo "DOCKER_IMAGE:"

View File

@ -1,17 +1,15 @@
import pytest import pytest
from testing_utils import require_backend_async, require_backend from testing_utils import require_backend_async
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend("cuda")
def bloom_560_handle(launcher): def bloom_560_handle(launcher):
with launcher("bigscience/bloom-560m") as handle: with launcher("bigscience/bloom-560m") as handle:
yield handle yield handle
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend_async("cuda")
async def bloom_560(bloom_560_handle): async def bloom_560(bloom_560_handle):
await bloom_560_handle.health(240) await bloom_560_handle.health(240)
return bloom_560_handle.client return bloom_560_handle.client

View File

@ -1,10 +1,9 @@
import pytest import pytest
from testing_utils import SYSTEM, is_flaky_async, require_backend_async, require_backend from testing_utils import SYSTEM, is_flaky_async, require_backend_async
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend("cuda", "rocm")
def flash_llama_awq_handle_sharded(launcher): def flash_llama_awq_handle_sharded(launcher):
if SYSTEM == "rocm": if SYSTEM == "rocm":
# On ROCm, for awq checkpoints, we need to use gptq kernel that supports ROCm. # On ROCm, for awq checkpoints, we need to use gptq kernel that supports ROCm.
@ -21,7 +20,6 @@ def flash_llama_awq_handle_sharded(launcher):
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend_async("cuda", "rocm")
async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded): async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
await flash_llama_awq_handle_sharded.health(300) await flash_llama_awq_handle_sharded.health(300)
return flash_llama_awq_handle_sharded.client return flash_llama_awq_handle_sharded.client

View File

@ -1,19 +1,17 @@
import pytest import pytest
from testing_utils import require_backend_async, require_backend from testing_utils import require_backend_async
# These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256). # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend("cuda", "xpu")
def flash_gemma_handle(launcher): def flash_gemma_handle(launcher):
with launcher("google/gemma-2b", num_shard=1) as handle: with launcher("google/gemma-2b", num_shard=1) as handle:
yield handle yield handle
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend_async("cuda", "xpu")
async def flash_gemma(flash_gemma_handle): async def flash_gemma(flash_gemma_handle):
await flash_gemma_handle.health(300) await flash_gemma_handle.health(300)
return flash_gemma_handle.client return flash_gemma_handle.client

View File

@ -1,19 +1,17 @@
import pytest import pytest
from testing_utils import require_backend_async, require_backend from testing_utils import require_backend_async
# These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256). # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend("cuda", "xpu")
def flash_gemma_gptq_handle(launcher): def flash_gemma_gptq_handle(launcher):
with launcher("TechxGenus/gemma-2b-GPTQ", num_shard=1, quantize="gptq") as handle: with launcher("TechxGenus/gemma-2b-GPTQ", num_shard=1, quantize="gptq") as handle:
yield handle yield handle
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend_async("cuda", "xpu")
async def flash_gemma_gptq(flash_gemma_gptq_handle): async def flash_gemma_gptq(flash_gemma_gptq_handle):
await flash_gemma_gptq_handle.health(300) await flash_gemma_gptq_handle.health(300)
return flash_gemma_gptq_handle.client return flash_gemma_gptq_handle.client

View File

@ -3,7 +3,6 @@ from testing_utils import require_backend_async, require_backend
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend("cuda")
def flash_llama_exl2_handle(launcher): def flash_llama_exl2_handle(launcher):
with launcher( with launcher(
"turboderp/Llama-3-8B-Instruct-exl2", "turboderp/Llama-3-8B-Instruct-exl2",
@ -18,7 +17,6 @@ def flash_llama_exl2_handle(launcher):
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend_async("cuda")
async def flash_llama_exl2(flash_llama_exl2_handle): async def flash_llama_exl2(flash_llama_exl2_handle):
await flash_llama_exl2_handle.health(300) await flash_llama_exl2_handle.health(300)
return flash_llama_exl2_handle.client return flash_llama_exl2_handle.client

View File

@ -3,13 +3,12 @@ import requests
import io import io
import base64 import base64
from testing_utils import require_backend_async, require_backend from testing_utils import require_backend_async
# These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256). # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend("cuda", "xpu")
def flash_pali_gemma_handle(launcher): def flash_pali_gemma_handle(launcher):
with launcher( with launcher(
"google/paligemma-3b-pt-224", "google/paligemma-3b-pt-224",
@ -22,7 +21,6 @@ def flash_pali_gemma_handle(launcher):
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend_async("cuda", "xpu")
async def flash_pali_gemma(flash_pali_gemma_handle): async def flash_pali_gemma(flash_pali_gemma_handle):
await flash_pali_gemma_handle.health(300) await flash_pali_gemma_handle.health(300)
return flash_pali_gemma_handle.client return flash_pali_gemma_handle.client

View File

@ -1,19 +1,17 @@
import pytest import pytest
from testing_utils import require_backend_async, require_backend from testing_utils import require_backend_async
# These tests do not pass on ROCm, with different generations. # These tests do not pass on ROCm, with different generations.
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend("cuda")
def flash_phi_handle(launcher): def flash_phi_handle(launcher):
with launcher("microsoft/phi-2", num_shard=1) as handle: with launcher("microsoft/phi-2", num_shard=1) as handle:
yield handle yield handle
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend_async("cuda")
async def flash_phi(flash_phi_handle): async def flash_phi(flash_phi_handle):
await flash_phi_handle.health(300) await flash_phi_handle.health(300)
return flash_phi_handle.client return flash_phi_handle.client

View File

@ -4,14 +4,12 @@ from testing_utils import require_backend_async, require_backend
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend("cuda")
def fused_kernel_mamba_handle(launcher): def fused_kernel_mamba_handle(launcher):
with launcher("state-spaces/mamba-130m", num_shard=1) as handle: with launcher("state-spaces/mamba-130m", num_shard=1) as handle:
yield handle yield handle
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@require_backend_async("cuda")
async def fused_kernel_mamba(fused_kernel_mamba_handle): async def fused_kernel_mamba(fused_kernel_mamba_handle):
await fused_kernel_mamba_handle.health(300) await fused_kernel_mamba_handle.health(300)
return fused_kernel_mamba_handle.client return fused_kernel_mamba_handle.client