mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
remove require_backend decorators on handles, for some reasons fails in github actions
This commit is contained in:
parent
7e0f4f25c7
commit
284894303a
4
.github/workflows/build.yaml
vendored
4
.github/workflows/build.yaml
vendored
@ -191,7 +191,7 @@ jobs:
|
|||||||
pwd
|
pwd
|
||||||
echo "ls:"
|
echo "ls:"
|
||||||
ls
|
ls
|
||||||
python integration-tests/clean_cache_and_download.py --token ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
python integration-tests/clean_cache_and_download.py --token ${{ secrets.HF_TOKEN }}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
@ -239,7 +239,7 @@ jobs:
|
|||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: |
|
run: |
|
||||||
export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
|
export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
|
||||||
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HF_TOKEN }}
|
||||||
|
|
||||||
export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
|
export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
|
||||||
echo "DOCKER_IMAGE:"
|
echo "DOCKER_IMAGE:"
|
||||||
|
@ -1,17 +1,15 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from testing_utils import require_backend_async, require_backend
|
from testing_utils import require_backend_async
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend("cuda")
|
|
||||||
def bloom_560_handle(launcher):
|
def bloom_560_handle(launcher):
|
||||||
with launcher("bigscience/bloom-560m") as handle:
|
with launcher("bigscience/bloom-560m") as handle:
|
||||||
yield handle
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend_async("cuda")
|
|
||||||
async def bloom_560(bloom_560_handle):
|
async def bloom_560(bloom_560_handle):
|
||||||
await bloom_560_handle.health(240)
|
await bloom_560_handle.health(240)
|
||||||
return bloom_560_handle.client
|
return bloom_560_handle.client
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from testing_utils import SYSTEM, is_flaky_async, require_backend_async, require_backend
|
from testing_utils import SYSTEM, is_flaky_async, require_backend_async
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend("cuda", "rocm")
|
|
||||||
def flash_llama_awq_handle_sharded(launcher):
|
def flash_llama_awq_handle_sharded(launcher):
|
||||||
if SYSTEM == "rocm":
|
if SYSTEM == "rocm":
|
||||||
# On ROCm, for awq checkpoints, we need to use gptq kernel that supports ROCm.
|
# On ROCm, for awq checkpoints, we need to use gptq kernel that supports ROCm.
|
||||||
@ -21,7 +20,6 @@ def flash_llama_awq_handle_sharded(launcher):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend_async("cuda", "rocm")
|
|
||||||
async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
|
async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
|
||||||
await flash_llama_awq_handle_sharded.health(300)
|
await flash_llama_awq_handle_sharded.health(300)
|
||||||
return flash_llama_awq_handle_sharded.client
|
return flash_llama_awq_handle_sharded.client
|
||||||
|
@ -1,19 +1,17 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from testing_utils import require_backend_async, require_backend
|
from testing_utils import require_backend_async
|
||||||
|
|
||||||
# These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
|
# These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend("cuda", "xpu")
|
|
||||||
def flash_gemma_handle(launcher):
|
def flash_gemma_handle(launcher):
|
||||||
with launcher("google/gemma-2b", num_shard=1) as handle:
|
with launcher("google/gemma-2b", num_shard=1) as handle:
|
||||||
yield handle
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend_async("cuda", "xpu")
|
|
||||||
async def flash_gemma(flash_gemma_handle):
|
async def flash_gemma(flash_gemma_handle):
|
||||||
await flash_gemma_handle.health(300)
|
await flash_gemma_handle.health(300)
|
||||||
return flash_gemma_handle.client
|
return flash_gemma_handle.client
|
||||||
|
@ -1,19 +1,17 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from testing_utils import require_backend_async, require_backend
|
from testing_utils import require_backend_async
|
||||||
|
|
||||||
# These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
|
# These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend("cuda", "xpu")
|
|
||||||
def flash_gemma_gptq_handle(launcher):
|
def flash_gemma_gptq_handle(launcher):
|
||||||
with launcher("TechxGenus/gemma-2b-GPTQ", num_shard=1, quantize="gptq") as handle:
|
with launcher("TechxGenus/gemma-2b-GPTQ", num_shard=1, quantize="gptq") as handle:
|
||||||
yield handle
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend_async("cuda", "xpu")
|
|
||||||
async def flash_gemma_gptq(flash_gemma_gptq_handle):
|
async def flash_gemma_gptq(flash_gemma_gptq_handle):
|
||||||
await flash_gemma_gptq_handle.health(300)
|
await flash_gemma_gptq_handle.health(300)
|
||||||
return flash_gemma_gptq_handle.client
|
return flash_gemma_gptq_handle.client
|
||||||
|
@ -3,7 +3,6 @@ from testing_utils import require_backend_async, require_backend
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend("cuda")
|
|
||||||
def flash_llama_exl2_handle(launcher):
|
def flash_llama_exl2_handle(launcher):
|
||||||
with launcher(
|
with launcher(
|
||||||
"turboderp/Llama-3-8B-Instruct-exl2",
|
"turboderp/Llama-3-8B-Instruct-exl2",
|
||||||
@ -18,7 +17,6 @@ def flash_llama_exl2_handle(launcher):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend_async("cuda")
|
|
||||||
async def flash_llama_exl2(flash_llama_exl2_handle):
|
async def flash_llama_exl2(flash_llama_exl2_handle):
|
||||||
await flash_llama_exl2_handle.health(300)
|
await flash_llama_exl2_handle.health(300)
|
||||||
return flash_llama_exl2_handle.client
|
return flash_llama_exl2_handle.client
|
||||||
|
@ -3,13 +3,12 @@ import requests
|
|||||||
import io
|
import io
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
from testing_utils import require_backend_async, require_backend
|
from testing_utils import require_backend_async
|
||||||
|
|
||||||
# These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
|
# These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256).
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend("cuda", "xpu")
|
|
||||||
def flash_pali_gemma_handle(launcher):
|
def flash_pali_gemma_handle(launcher):
|
||||||
with launcher(
|
with launcher(
|
||||||
"google/paligemma-3b-pt-224",
|
"google/paligemma-3b-pt-224",
|
||||||
@ -22,7 +21,6 @@ def flash_pali_gemma_handle(launcher):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend_async("cuda", "xpu")
|
|
||||||
async def flash_pali_gemma(flash_pali_gemma_handle):
|
async def flash_pali_gemma(flash_pali_gemma_handle):
|
||||||
await flash_pali_gemma_handle.health(300)
|
await flash_pali_gemma_handle.health(300)
|
||||||
return flash_pali_gemma_handle.client
|
return flash_pali_gemma_handle.client
|
||||||
|
@ -1,19 +1,17 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from testing_utils import require_backend_async, require_backend
|
from testing_utils import require_backend_async
|
||||||
|
|
||||||
# These tests do not pass on ROCm, with different generations.
|
# These tests do not pass on ROCm, with different generations.
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend("cuda")
|
|
||||||
def flash_phi_handle(launcher):
|
def flash_phi_handle(launcher):
|
||||||
with launcher("microsoft/phi-2", num_shard=1) as handle:
|
with launcher("microsoft/phi-2", num_shard=1) as handle:
|
||||||
yield handle
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend_async("cuda")
|
|
||||||
async def flash_phi(flash_phi_handle):
|
async def flash_phi(flash_phi_handle):
|
||||||
await flash_phi_handle.health(300)
|
await flash_phi_handle.health(300)
|
||||||
return flash_phi_handle.client
|
return flash_phi_handle.client
|
||||||
|
@ -4,14 +4,12 @@ from testing_utils import require_backend_async, require_backend
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend("cuda")
|
|
||||||
def fused_kernel_mamba_handle(launcher):
|
def fused_kernel_mamba_handle(launcher):
|
||||||
with launcher("state-spaces/mamba-130m", num_shard=1) as handle:
|
with launcher("state-spaces/mamba-130m", num_shard=1) as handle:
|
||||||
yield handle
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@require_backend_async("cuda")
|
|
||||||
async def fused_kernel_mamba(fused_kernel_mamba_handle):
|
async def fused_kernel_mamba(fused_kernel_mamba_handle):
|
||||||
await fused_kernel_mamba_handle.health(300)
|
await fused_kernel_mamba_handle.health(300)
|
||||||
return fused_kernel_mamba_handle.client
|
return fused_kernel_mamba_handle.client
|
||||||
|
Loading…
Reference in New Issue
Block a user