diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index a87191c2..59fd66ce 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -129,9 +129,9 @@ jobs:
export label_extension="-gaudi"
export docker_volume="/mnt/cache"
export docker_devices=""
- export runs_on="ubuntu-latest"
+ export runs_on="aws-dl1-24xlarge"
export platform=""
- export extra_pytest=""
+ export extra_pytest="--gaudi"
export target=""
esac
echo $dockerfile
diff --git a/backends/gaudi/Makefile b/backends/gaudi/Makefile
index f760f4d6..bae0cdad 100644
--- a/backends/gaudi/Makefile
+++ b/backends/gaudi/Makefile
@@ -50,10 +50,9 @@ local-dev-install: install-dependencies
# In order to run the integration tests, you need to first build the image (make -C backends/gaudi image)
run-integration-tests:
- uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt
DOCKER_VOLUME=${root_dir}/data \
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
- uv run pytest --durations=0 -sv ${root_dir}/backends/gaudi/server/integration-tests
+ pytest --durations=0 -s -vv integration-tests --gaudi
# This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests
capture-expected-outputs-for-integration-tests:
diff --git a/backends/gaudi/server/integration-tests/pytest.ini b/backends/gaudi/server/integration-tests/pytest.ini
deleted file mode 100644
index 2f4c80e3..00000000
--- a/backends/gaudi/server/integration-tests/pytest.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-asyncio_mode = auto
diff --git a/backends/gaudi/server/integration-tests/requirements.txt b/backends/gaudi/server/integration-tests/requirements.txt
deleted file mode 100644
index b67d2d8c..00000000
--- a/backends/gaudi/server/integration-tests/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-pytest >= 8.3.5
-pytest-asyncio >= 0.26.0
-docker >= 7.1.0
-Levenshtein >= 0.27.1
-loguru >= 0.7.3
-aiohttp >= 3.11.14
-text-generation
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index f7852441..b9466ae3 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -47,7 +47,6 @@ from text_generation.types import (
ChatComplete,
ChatCompletionChunk,
ChatCompletionComplete,
- Completion,
Details,
Grammar,
InputToken,
@@ -68,6 +67,9 @@ def pytest_addoption(parser):
parser.addoption(
"--neuron", action="store_true", default=False, help="run neuron tests"
)
+ parser.addoption(
+ "--gaudi", action="store_true", default=False, help="run gaudi tests"
+ )
def pytest_configure(config):
@@ -84,6 +86,14 @@ def pytest_collection_modifyitems(config, items):
item.add_marker(pytest.mark.skip(reason="need --release option to run"))
selectors.append(skip_release)
+
+ if config.getoption("--gaudi"):
+
+ def skip_not_gaudi(item):
+ if "gaudi" not in item.keywords:
+ item.add_marker(pytest.mark.skip(reason="requires --gaudi to run"))
+
+ selectors.append(skip_not_gaudi)
if config.getoption("--neuron"):
def skip_not_neuron(item):
@@ -99,7 +109,12 @@ def pytest_collection_modifyitems(config, items):
if "neuron" in item.keywords:
item.add_marker(pytest.mark.skip(reason="requires --neuron to run"))
+ def skip_gaudi(item):
+ if "gaudi" in item.keywords:
+ item.add_marker(pytest.mark.skip(reason="requires --gaudi to run"))
+
selectors.append(skip_neuron)
+ selectors.append(skip_gaudi)
for item in items:
for selector in selectors:
selector(item)
@@ -131,7 +146,6 @@ class ResponseComparator(JSONSnapshotExtension):
or isinstance(data, ChatComplete)
or isinstance(data, ChatCompletionChunk)
or isinstance(data, ChatCompletionComplete)
- or isinstance(data, Completion)
or isinstance(data, OAIChatCompletionChunk)
or isinstance(data, OAICompletion)
):
@@ -188,8 +202,6 @@ class ResponseComparator(JSONSnapshotExtension):
if isinstance(choices, List) and len(choices) >= 1:
if "delta" in choices[0]:
return ChatCompletionChunk(**data)
- if "text" in choices[0]:
- return Completion(**data)
return ChatComplete(**data)
else:
return Response(**data)
@@ -282,9 +294,6 @@ class ResponseComparator(JSONSnapshotExtension):
)
)
- def eq_completion(response: Completion, other: Completion) -> bool:
- return response.choices[0].text == other.choices[0].text
-
def eq_chat_complete(response: ChatComplete, other: ChatComplete) -> bool:
return (
response.choices[0].message.content == other.choices[0].message.content
@@ -329,11 +338,6 @@ class ResponseComparator(JSONSnapshotExtension):
if len(serialized_data) == 0:
return len(snapshot_data) == len(serialized_data)
- if isinstance(serialized_data[0], Completion):
- return len(snapshot_data) == len(serialized_data) and all(
- [eq_completion(r, o) for r, o in zip(serialized_data, snapshot_data)]
- )
-
if isinstance(serialized_data[0], ChatComplete):
return len(snapshot_data) == len(serialized_data) and all(
[eq_chat_complete(r, o) for r, o in zip(serialized_data, snapshot_data)]
diff --git a/backends/gaudi/server/integration-tests/conftest.py b/integration-tests/fixtures/gaudi/service.py
similarity index 98%
rename from backends/gaudi/server/integration-tests/conftest.py
rename to integration-tests/fixtures/gaudi/service.py
index c7daf70e..6b39a1e6 100644
--- a/backends/gaudi/server/integration-tests/conftest.py
+++ b/integration-tests/fixtures/gaudi/service.py
@@ -14,11 +14,18 @@ import docker
import pytest
from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
from docker.errors import NotFound
-from loguru import logger
-from test_model import TEST_CONFIGS
+import logging
+from gaudi.test_generate import TEST_CONFIGS
from text_generation import AsyncClient
from text_generation.types import Response
+logging.basicConfig(
+ level=logging.INFO,
+ format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}",
+ stream=sys.stdout,
+)
+logger = logging.getLogger(__file__)
+
# Use the latest image from the local docker build
DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "tgi-gaudi")
DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", None)
@@ -48,12 +55,6 @@ HABANA_RUN_ARGS = {
"cap_add": ["sys_nice"],
}
-logger.add(
- sys.stderr,
- format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}",
- level="INFO",
-)
-
def stream_container_logs(container, test_name):
"""Stream container logs in a separate thread."""
diff --git a/backends/gaudi/server/integration-tests/capture_expected_outputs.py b/integration-tests/gaudi/capture_expected_outputs.py
similarity index 98%
rename from backends/gaudi/server/integration-tests/capture_expected_outputs.py
rename to integration-tests/gaudi/capture_expected_outputs.py
index 051b9d69..6a5d4a68 100644
--- a/backends/gaudi/server/integration-tests/capture_expected_outputs.py
+++ b/integration-tests/gaudi/capture_expected_outputs.py
@@ -3,7 +3,7 @@ import os
from typing import Dict, Any, Generator
import pytest
-from test_model import TEST_CONFIGS
+from test_generate import TEST_CONFIGS
UNKNOWN_CONFIGS = {
name: config
diff --git a/backends/gaudi/server/integration-tests/test_model.py b/integration-tests/gaudi/test_model.py
similarity index 97%
rename from backends/gaudi/server/integration-tests/test_model.py
rename to integration-tests/gaudi/test_model.py
index cb2bf6a9..cfdb0554 100644
--- a/backends/gaudi/server/integration-tests/test_model.py
+++ b/integration-tests/gaudi/test_model.py
@@ -2,7 +2,6 @@ from typing import Any, Dict
from text_generation import AsyncClient
import pytest
-from Levenshtein import distance as levenshtein_distance
# The "args" config is not optimized for speed but only check that the inference is working for the different models architectures
TEST_CONFIGS = {
@@ -271,6 +270,4 @@ async def test_model_multiple_requests(
expected = expected_outputs["batch"]
for r in responses:
assert r.details.generated_tokens == 32
- # Compute the similarity with the expectation using the levenshtein distance
- # We should not have more than two substitutions or additions
- assert levenshtein_distance(r.generated_text, expected) < 3
+ assert r.generated_text == expected