diff --git a/backends/gaudi/Makefile b/backends/gaudi/Makefile index 7652a7d2..3ece5a7e 100644 --- a/backends/gaudi/Makefile +++ b/backends/gaudi/Makefile @@ -54,7 +54,7 @@ run-integration-tests: uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt DOCKER_VOLUME=${root_dir}/data \ HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \ - pytest --durations=0 -s -vv integration-tests --gaudi + pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi # This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests capture-expected-outputs-for-integration-tests: diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 84d24637..594ffd49 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -1,4 +1,8 @@ -pytest_plugins = ["fixtures.neuron.service", "fixtures.neuron.export_models"] +pytest_plugins = [ + "fixtures.neuron.service", + "fixtures.neuron.export_models", + "fixtures.gaudi.service", +] # ruff: noqa: E402 from _pytest.fixtures import SubRequest from huggingface_hub.inference._generated.types.chat_completion import ( diff --git a/integration-tests/fixtures/gaudi/service.py b/integration-tests/fixtures/gaudi/service.py index 6b39a1e6..44c7f999 100644 --- a/integration-tests/fixtures/gaudi/service.py +++ b/integration-tests/fixtures/gaudi/service.py @@ -15,9 +15,10 @@ import pytest from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError from docker.errors import NotFound import logging -from gaudi.test_generate import TEST_CONFIGS +from gaudi.test_gaudi_generate import TEST_CONFIGS from text_generation import AsyncClient from text_generation.types import Response +import huggingface_hub logging.basicConfig( level=logging.INFO, @@ -29,7 +30,7 @@ logger = logging.getLogger(__file__) # Use the latest image from the local docker build DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "tgi-gaudi") DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", None) -HF_TOKEN = os.getenv("HF_TOKEN", None) +HF_TOKEN = huggingface_hub.get_token() assert ( HF_TOKEN is not None @@ -152,7 +153,7 @@ def data_volume(): @pytest.fixture(scope="module") -def launcher(data_volume): +def gaudi_launcher(event_loop): @contextlib.contextmanager def docker_launcher( model_id: str, @@ -272,7 +273,7 @@ def launcher(data_volume): @pytest.fixture(scope="module") -def generate_load(): +def gaudi_generate_load(): async def generate_load_inner( client: AsyncClient, prompt: str, max_new_tokens: int, n: int ) -> List[Response]: