From 918b29a0afd58c0ec2e6b3a3b96902dff992e0d2 Mon Sep 17 00:00:00 2001 From: baptiste Date: Thu, 10 Apr 2025 07:46:59 +0000 Subject: [PATCH] wip(test): adding test to ci --- .github/workflows/build.yaml | 4 +-- backends/gaudi/Makefile | 3 +- .../gaudi/server/integration-tests/pytest.ini | 2 -- .../server/integration-tests/requirements.txt | 7 ----- integration-tests/conftest.py | 28 +++++++++++-------- .../fixtures/gaudi/service.py | 17 +++++------ .../gaudi}/capture_expected_outputs.py | 2 +- .../gaudi}/test_model.py | 5 +--- 8 files changed, 30 insertions(+), 38 deletions(-) delete mode 100644 backends/gaudi/server/integration-tests/pytest.ini delete mode 100644 backends/gaudi/server/integration-tests/requirements.txt rename backends/gaudi/server/integration-tests/conftest.py => integration-tests/fixtures/gaudi/service.py (98%) rename {backends/gaudi/server/integration-tests => integration-tests/gaudi}/capture_expected_outputs.py (98%) rename {backends/gaudi/server/integration-tests => integration-tests/gaudi}/test_model.py (97%) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index a87191c2..59fd66ce 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -129,9 +129,9 @@ jobs: export label_extension="-gaudi" export docker_volume="/mnt/cache" export docker_devices="" - export runs_on="ubuntu-latest" + export runs_on="aws-dl1-24xlarge" export platform="" - export extra_pytest="" + export extra_pytest="--gaudi" export target="" esac echo $dockerfile diff --git a/backends/gaudi/Makefile b/backends/gaudi/Makefile index f760f4d6..bae0cdad 100644 --- a/backends/gaudi/Makefile +++ b/backends/gaudi/Makefile @@ -50,10 +50,9 @@ local-dev-install: install-dependencies # In order to run the integration tests, you need to first build the image (make -C backends/gaudi image) run-integration-tests: - uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt DOCKER_VOLUME=${root_dir}/data \ HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \ - uv run pytest --durations=0 -sv ${root_dir}/backends/gaudi/server/integration-tests + pytest --durations=0 -s -vv integration-tests --gaudi # This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests capture-expected-outputs-for-integration-tests: diff --git a/backends/gaudi/server/integration-tests/pytest.ini b/backends/gaudi/server/integration-tests/pytest.ini deleted file mode 100644 index 2f4c80e3..00000000 --- a/backends/gaudi/server/integration-tests/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -asyncio_mode = auto diff --git a/backends/gaudi/server/integration-tests/requirements.txt b/backends/gaudi/server/integration-tests/requirements.txt deleted file mode 100644 index b67d2d8c..00000000 --- a/backends/gaudi/server/integration-tests/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -pytest >= 8.3.5 -pytest-asyncio >= 0.26.0 -docker >= 7.1.0 -Levenshtein >= 0.27.1 -loguru >= 0.7.3 -aiohttp >= 3.11.14 -text-generation diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index f7852441..b9466ae3 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -47,7 +47,6 @@ from text_generation.types import ( ChatComplete, ChatCompletionChunk, ChatCompletionComplete, - Completion, Details, Grammar, InputToken, @@ -68,6 +67,9 @@ def pytest_addoption(parser): parser.addoption( "--neuron", action="store_true", default=False, help="run neuron tests" ) + parser.addoption( + "--gaudi", action="store_true", default=False, help="run gaudi tests" + ) def pytest_configure(config): @@ -84,6 +86,14 @@ def pytest_collection_modifyitems(config, items): item.add_marker(pytest.mark.skip(reason="need --release option to run")) selectors.append(skip_release) + + if config.getoption("--gaudi"): + + def skip_not_gaudi(item): + if "gaudi" not in item.keywords: + item.add_marker(pytest.mark.skip(reason="requires --gaudi to run")) + + selectors.append(skip_not_gaudi) if config.getoption("--neuron"): def skip_not_neuron(item): @@ -99,7 +109,12 @@ def pytest_collection_modifyitems(config, items): if "neuron" in item.keywords: item.add_marker(pytest.mark.skip(reason="requires --neuron to run")) + def skip_gaudi(item): + if "gaudi" in item.keywords: + item.add_marker(pytest.mark.skip(reason="requires --gaudi to run")) + selectors.append(skip_neuron) + selectors.append(skip_gaudi) for item in items: for selector in selectors: selector(item) @@ -131,7 +146,6 @@ class ResponseComparator(JSONSnapshotExtension): or isinstance(data, ChatComplete) or isinstance(data, ChatCompletionChunk) or isinstance(data, ChatCompletionComplete) - or isinstance(data, Completion) or isinstance(data, OAIChatCompletionChunk) or isinstance(data, OAICompletion) ): @@ -188,8 +202,6 @@ class ResponseComparator(JSONSnapshotExtension): if isinstance(choices, List) and len(choices) >= 1: if "delta" in choices[0]: return ChatCompletionChunk(**data) - if "text" in choices[0]: - return Completion(**data) return ChatComplete(**data) else: return Response(**data) @@ -282,9 +294,6 @@ class ResponseComparator(JSONSnapshotExtension): ) ) - def eq_completion(response: Completion, other: Completion) -> bool: - return response.choices[0].text == other.choices[0].text - def eq_chat_complete(response: ChatComplete, other: ChatComplete) -> bool: return ( response.choices[0].message.content == other.choices[0].message.content @@ -329,11 +338,6 @@ class ResponseComparator(JSONSnapshotExtension): if len(serialized_data) == 0: return len(snapshot_data) == len(serialized_data) - if isinstance(serialized_data[0], Completion): - return len(snapshot_data) == len(serialized_data) and all( - [eq_completion(r, o) for r, o in zip(serialized_data, snapshot_data)] - ) - if isinstance(serialized_data[0], ChatComplete): return len(snapshot_data) == len(serialized_data) and all( [eq_chat_complete(r, o) for r, o in zip(serialized_data, snapshot_data)] diff --git a/backends/gaudi/server/integration-tests/conftest.py b/integration-tests/fixtures/gaudi/service.py similarity index 98% rename from backends/gaudi/server/integration-tests/conftest.py rename to integration-tests/fixtures/gaudi/service.py index c7daf70e..6b39a1e6 100644 --- a/backends/gaudi/server/integration-tests/conftest.py +++ b/integration-tests/fixtures/gaudi/service.py @@ -14,11 +14,18 @@ import docker import pytest from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError from docker.errors import NotFound -from loguru import logger -from test_model import TEST_CONFIGS +import logging +from gaudi.test_generate import TEST_CONFIGS from text_generation import AsyncClient from text_generation.types import Response +logging.basicConfig( + level=logging.INFO, + format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}", + stream=sys.stdout, +) +logger = logging.getLogger(__file__) + # Use the latest image from the local docker build DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "tgi-gaudi") DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", None) @@ -48,12 +55,6 @@ HABANA_RUN_ARGS = { "cap_add": ["sys_nice"], } -logger.add( - sys.stderr, - format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}", - level="INFO", -) - def stream_container_logs(container, test_name): """Stream container logs in a separate thread.""" diff --git a/backends/gaudi/server/integration-tests/capture_expected_outputs.py b/integration-tests/gaudi/capture_expected_outputs.py similarity index 98% rename from backends/gaudi/server/integration-tests/capture_expected_outputs.py rename to integration-tests/gaudi/capture_expected_outputs.py index 051b9d69..6a5d4a68 100644 --- a/backends/gaudi/server/integration-tests/capture_expected_outputs.py +++ b/integration-tests/gaudi/capture_expected_outputs.py @@ -3,7 +3,7 @@ import os from typing import Dict, Any, Generator import pytest -from test_model import TEST_CONFIGS +from test_generate import TEST_CONFIGS UNKNOWN_CONFIGS = { name: config diff --git a/backends/gaudi/server/integration-tests/test_model.py b/integration-tests/gaudi/test_model.py similarity index 97% rename from backends/gaudi/server/integration-tests/test_model.py rename to integration-tests/gaudi/test_model.py index cb2bf6a9..cfdb0554 100644 --- a/backends/gaudi/server/integration-tests/test_model.py +++ b/integration-tests/gaudi/test_model.py @@ -2,7 +2,6 @@ from typing import Any, Dict from text_generation import AsyncClient import pytest -from Levenshtein import distance as levenshtein_distance # The "args" config is not optimized for speed but only check that the inference is working for the different models architectures TEST_CONFIGS = { @@ -271,6 +270,4 @@ async def test_model_multiple_requests( expected = expected_outputs["batch"] for r in responses: assert r.details.generated_tokens == 32 - # Compute the similarity with the expectation using the levenshtein distance - # We should not have more than two substitutions or additions - assert levenshtein_distance(r.generated_text, expected) < 3 + assert r.generated_text == expected