feat(ci): llama3 test working

This commit is contained in:
baptiste 2025-04-10 08:32:28 +00:00 committed by baptiste
parent b4917f67e4
commit 7779d0c786
3 changed files with 11 additions and 6 deletions

View File

@ -54,7 +54,7 @@ run-integration-tests:
uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt
DOCKER_VOLUME=${root_dir}/data \
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
pytest --durations=0 -s -vv integration-tests --gaudi
pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi
# This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests
capture-expected-outputs-for-integration-tests:

View File

@ -1,4 +1,8 @@
pytest_plugins = ["fixtures.neuron.service", "fixtures.neuron.export_models"]
pytest_plugins = [
"fixtures.neuron.service",
"fixtures.neuron.export_models",
"fixtures.gaudi.service",
]
# ruff: noqa: E402
from _pytest.fixtures import SubRequest
from huggingface_hub.inference._generated.types.chat_completion import (

View File

@ -15,9 +15,10 @@ import pytest
from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
from docker.errors import NotFound
import logging
from gaudi.test_generate import TEST_CONFIGS
from gaudi.test_gaudi_generate import TEST_CONFIGS
from text_generation import AsyncClient
from text_generation.types import Response
import huggingface_hub
logging.basicConfig(
level=logging.INFO,
@ -29,7 +30,7 @@ logger = logging.getLogger(__file__)
# Use the latest image from the local docker build
DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "tgi-gaudi")
DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", None)
HF_TOKEN = os.getenv("HF_TOKEN", None)
HF_TOKEN = huggingface_hub.get_token()
assert (
HF_TOKEN is not None
@ -152,7 +153,7 @@ def data_volume():
@pytest.fixture(scope="module")
def launcher(data_volume):
def gaudi_launcher(event_loop):
@contextlib.contextmanager
def docker_launcher(
model_id: str,
@ -272,7 +273,7 @@ def launcher(data_volume):
@pytest.fixture(scope="module")
def generate_load():
def gaudi_generate_load():
async def generate_load_inner(
client: AsyncClient, prompt: str, max_new_tokens: int, n: int
) -> List[Response]: