From 4e40467c6d6979dff2927cc853a10ad692972824 Mon Sep 17 00:00:00 2001 From: baptiste Date: Thu, 10 Apr 2025 07:46:59 +0000 Subject: [PATCH 01/19] wip(test): adding test to ci --- .github/workflows/build.yaml | 4 +-- backends/gaudi/Makefile | 2 +- .../gaudi/server/integration-tests/pytest.ini | 2 -- .../server/integration-tests/requirements.txt | 7 ----- integration-tests/conftest.py | 28 +++++++++++-------- .../fixtures/gaudi/service.py | 17 +++++------ .../gaudi}/capture_expected_outputs.py | 2 +- .../gaudi}/test_model.py | 5 +--- 8 files changed, 30 insertions(+), 37 deletions(-) delete mode 100644 backends/gaudi/server/integration-tests/pytest.ini delete mode 100644 backends/gaudi/server/integration-tests/requirements.txt rename backends/gaudi/server/integration-tests/conftest.py => integration-tests/fixtures/gaudi/service.py (98%) rename {backends/gaudi/server/integration-tests => integration-tests/gaudi}/capture_expected_outputs.py (98%) rename {backends/gaudi/server/integration-tests => integration-tests/gaudi}/test_model.py (97%) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index a87191c2..59fd66ce 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -129,9 +129,9 @@ jobs: export label_extension="-gaudi" export docker_volume="/mnt/cache" export docker_devices="" - export runs_on="ubuntu-latest" + export runs_on="aws-dl1-24xlarge" export platform="" - export extra_pytest="" + export extra_pytest="--gaudi" export target="" esac echo $dockerfile diff --git a/backends/gaudi/Makefile b/backends/gaudi/Makefile index e135f16e..7652a7d2 100644 --- a/backends/gaudi/Makefile +++ b/backends/gaudi/Makefile @@ -54,7 +54,7 @@ run-integration-tests: uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt DOCKER_VOLUME=${root_dir}/data \ HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \ - uv run pytest --durations=0 -sv ${root_dir}/backends/gaudi/server/integration-tests + pytest --durations=0 -s -vv integration-tests --gaudi # This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests capture-expected-outputs-for-integration-tests: diff --git a/backends/gaudi/server/integration-tests/pytest.ini b/backends/gaudi/server/integration-tests/pytest.ini deleted file mode 100644 index 2f4c80e3..00000000 --- a/backends/gaudi/server/integration-tests/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -asyncio_mode = auto diff --git a/backends/gaudi/server/integration-tests/requirements.txt b/backends/gaudi/server/integration-tests/requirements.txt deleted file mode 100644 index b67d2d8c..00000000 --- a/backends/gaudi/server/integration-tests/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -pytest >= 8.3.5 -pytest-asyncio >= 0.26.0 -docker >= 7.1.0 -Levenshtein >= 0.27.1 -loguru >= 0.7.3 -aiohttp >= 3.11.14 -text-generation diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index f7852441..b9466ae3 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -47,7 +47,6 @@ from text_generation.types import ( ChatComplete, ChatCompletionChunk, ChatCompletionComplete, - Completion, Details, Grammar, InputToken, @@ -68,6 +67,9 @@ def pytest_addoption(parser): parser.addoption( "--neuron", action="store_true", default=False, help="run neuron tests" ) + parser.addoption( + "--gaudi", action="store_true", default=False, help="run gaudi tests" + ) def pytest_configure(config): @@ -84,6 +86,14 @@ def pytest_collection_modifyitems(config, items): item.add_marker(pytest.mark.skip(reason="need --release option to run")) selectors.append(skip_release) + + if config.getoption("--gaudi"): + + def skip_not_gaudi(item): + if "gaudi" not in item.keywords: + item.add_marker(pytest.mark.skip(reason="requires --gaudi to run")) + + selectors.append(skip_not_gaudi) if config.getoption("--neuron"): def skip_not_neuron(item): @@ -99,7 +109,12 @@ def pytest_collection_modifyitems(config, items): if "neuron" in item.keywords: item.add_marker(pytest.mark.skip(reason="requires --neuron to run")) + def skip_gaudi(item): + if "gaudi" in item.keywords: + item.add_marker(pytest.mark.skip(reason="requires --gaudi to run")) + selectors.append(skip_neuron) + selectors.append(skip_gaudi) for item in items: for selector in selectors: selector(item) @@ -131,7 +146,6 @@ class ResponseComparator(JSONSnapshotExtension): or isinstance(data, ChatComplete) or isinstance(data, ChatCompletionChunk) or isinstance(data, ChatCompletionComplete) - or isinstance(data, Completion) or isinstance(data, OAIChatCompletionChunk) or isinstance(data, OAICompletion) ): @@ -188,8 +202,6 @@ class ResponseComparator(JSONSnapshotExtension): if isinstance(choices, List) and len(choices) >= 1: if "delta" in choices[0]: return ChatCompletionChunk(**data) - if "text" in choices[0]: - return Completion(**data) return ChatComplete(**data) else: return Response(**data) @@ -282,9 +294,6 @@ class ResponseComparator(JSONSnapshotExtension): ) ) - def eq_completion(response: Completion, other: Completion) -> bool: - return response.choices[0].text == other.choices[0].text - def eq_chat_complete(response: ChatComplete, other: ChatComplete) -> bool: return ( response.choices[0].message.content == other.choices[0].message.content @@ -329,11 +338,6 @@ class ResponseComparator(JSONSnapshotExtension): if len(serialized_data) == 0: return len(snapshot_data) == len(serialized_data) - if isinstance(serialized_data[0], Completion): - return len(snapshot_data) == len(serialized_data) and all( - [eq_completion(r, o) for r, o in zip(serialized_data, snapshot_data)] - ) - if isinstance(serialized_data[0], ChatComplete): return len(snapshot_data) == len(serialized_data) and all( [eq_chat_complete(r, o) for r, o in zip(serialized_data, snapshot_data)] diff --git a/backends/gaudi/server/integration-tests/conftest.py b/integration-tests/fixtures/gaudi/service.py similarity index 98% rename from backends/gaudi/server/integration-tests/conftest.py rename to integration-tests/fixtures/gaudi/service.py index c7daf70e..6b39a1e6 100644 --- a/backends/gaudi/server/integration-tests/conftest.py +++ b/integration-tests/fixtures/gaudi/service.py @@ -14,11 +14,18 @@ import docker import pytest from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError from docker.errors import NotFound -from loguru import logger -from test_model import TEST_CONFIGS +import logging +from gaudi.test_generate import TEST_CONFIGS from text_generation import AsyncClient from text_generation.types import Response +logging.basicConfig( + level=logging.INFO, + format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}", + stream=sys.stdout, +) +logger = logging.getLogger(__file__) + # Use the latest image from the local docker build DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "tgi-gaudi") DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", None) @@ -48,12 +55,6 @@ HABANA_RUN_ARGS = { "cap_add": ["sys_nice"], } -logger.add( - sys.stderr, - format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}", - level="INFO", -) - def stream_container_logs(container, test_name): """Stream container logs in a separate thread.""" diff --git a/backends/gaudi/server/integration-tests/capture_expected_outputs.py b/integration-tests/gaudi/capture_expected_outputs.py similarity index 98% rename from backends/gaudi/server/integration-tests/capture_expected_outputs.py rename to integration-tests/gaudi/capture_expected_outputs.py index 051b9d69..6a5d4a68 100644 --- a/backends/gaudi/server/integration-tests/capture_expected_outputs.py +++ b/integration-tests/gaudi/capture_expected_outputs.py @@ -3,7 +3,7 @@ import os from typing import Dict, Any, Generator import pytest -from test_model import TEST_CONFIGS +from test_generate import TEST_CONFIGS UNKNOWN_CONFIGS = { name: config diff --git a/backends/gaudi/server/integration-tests/test_model.py b/integration-tests/gaudi/test_model.py similarity index 97% rename from backends/gaudi/server/integration-tests/test_model.py rename to integration-tests/gaudi/test_model.py index 40b27164..407bccc2 100644 --- a/backends/gaudi/server/integration-tests/test_model.py +++ b/integration-tests/gaudi/test_model.py @@ -2,7 +2,6 @@ from typing import Any, Dict from text_generation import AsyncClient import pytest -from Levenshtein import distance as levenshtein_distance # The "args" config is not optimized for speed but only check that the inference is working for the different models architectures TEST_CONFIGS = { @@ -257,6 +256,4 @@ async def test_model_multiple_requests( expected = expected_outputs["batch"] for r in responses: assert r.details.generated_tokens == 32 - # Compute the similarity with the expectation using the levenshtein distance - # We should not have more than two substitutions or additions - assert levenshtein_distance(r.generated_text, expected) < 3 + assert r.generated_text == expected From b4917f67e49f97bdf36c1a6dc5c4a362973b73ef Mon Sep 17 00:00:00 2001 From: baptiste Date: Thu, 10 Apr 2025 07:52:20 +0000 Subject: [PATCH 02/19] wip: able to launch gaudi tests --- integration-tests/conftest.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index b9466ae3..84d24637 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -94,6 +94,14 @@ def pytest_collection_modifyitems(config, items): item.add_marker(pytest.mark.skip(reason="requires --gaudi to run")) selectors.append(skip_not_gaudi) + else: + + def skip_gaudi(item): + if "gaudi" in item.keywords: + item.add_marker(pytest.mark.skip(reason="requires --gaudi to run")) + + selectors.append(skip_gaudi) + if config.getoption("--neuron"): def skip_not_neuron(item): @@ -109,12 +117,8 @@ def pytest_collection_modifyitems(config, items): if "neuron" in item.keywords: item.add_marker(pytest.mark.skip(reason="requires --neuron to run")) - def skip_gaudi(item): - if "gaudi" in item.keywords: - item.add_marker(pytest.mark.skip(reason="requires --gaudi to run")) - selectors.append(skip_neuron) - selectors.append(skip_gaudi) + for item in items: for selector in selectors: selector(item) From 7779d0c786e3056a29af08c83f4056460dd90120 Mon Sep 17 00:00:00 2001 From: baptiste Date: Thu, 10 Apr 2025 08:32:28 +0000 Subject: [PATCH 03/19] feat(ci): llama3 test working --- backends/gaudi/Makefile | 2 +- integration-tests/conftest.py | 6 +++++- integration-tests/fixtures/gaudi/service.py | 9 +++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/backends/gaudi/Makefile b/backends/gaudi/Makefile index 7652a7d2..3ece5a7e 100644 --- a/backends/gaudi/Makefile +++ b/backends/gaudi/Makefile @@ -54,7 +54,7 @@ run-integration-tests: uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt DOCKER_VOLUME=${root_dir}/data \ HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \ - pytest --durations=0 -s -vv integration-tests --gaudi + pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi # This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests capture-expected-outputs-for-integration-tests: diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 84d24637..594ffd49 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -1,4 +1,8 @@ -pytest_plugins = ["fixtures.neuron.service", "fixtures.neuron.export_models"] +pytest_plugins = [ + "fixtures.neuron.service", + "fixtures.neuron.export_models", + "fixtures.gaudi.service", +] # ruff: noqa: E402 from _pytest.fixtures import SubRequest from huggingface_hub.inference._generated.types.chat_completion import ( diff --git a/integration-tests/fixtures/gaudi/service.py b/integration-tests/fixtures/gaudi/service.py index 6b39a1e6..44c7f999 100644 --- a/integration-tests/fixtures/gaudi/service.py +++ b/integration-tests/fixtures/gaudi/service.py @@ -15,9 +15,10 @@ import pytest from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError from docker.errors import NotFound import logging -from gaudi.test_generate import TEST_CONFIGS +from gaudi.test_gaudi_generate import TEST_CONFIGS from text_generation import AsyncClient from text_generation.types import Response +import huggingface_hub logging.basicConfig( level=logging.INFO, @@ -29,7 +30,7 @@ logger = logging.getLogger(__file__) # Use the latest image from the local docker build DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "tgi-gaudi") DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", None) -HF_TOKEN = os.getenv("HF_TOKEN", None) +HF_TOKEN = huggingface_hub.get_token() assert ( HF_TOKEN is not None @@ -152,7 +153,7 @@ def data_volume(): @pytest.fixture(scope="module") -def launcher(data_volume): +def gaudi_launcher(event_loop): @contextlib.contextmanager def docker_launcher( model_id: str, @@ -272,7 +273,7 @@ def launcher(data_volume): @pytest.fixture(scope="module") -def generate_load(): +def gaudi_generate_load(): async def generate_load_inner( client: AsyncClient, prompt: str, max_new_tokens: int, n: int ) -> List[Response]: From 781dd203e96379b55d825073a824c655bb43d0ce Mon Sep 17 00:00:00 2001 From: baptiste Date: Thu, 10 Apr 2025 08:32:37 +0000 Subject: [PATCH 04/19] feat(ci): llama3 test working --- .../gaudi/test_gaudi_generate.py | 273 ++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 integration-tests/gaudi/test_gaudi_generate.py diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py new file mode 100644 index 00000000..423ac17f --- /dev/null +++ b/integration-tests/gaudi/test_gaudi_generate.py @@ -0,0 +1,273 @@ +from typing import Any, Dict + +from text_generation import AsyncClient +import pytest + +# The "args" config is not optimized for speed but only check that the inference is working for the different models architectures +TEST_CONFIGS = { + "meta-llama/Llama-3.1-8B-Instruct-shared": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "input": "What is Deep Learning?", + "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use", + "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use", + "args": [ + "--sharded", + "true", + "--num-shard", + "8", + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "8", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "meta-llama/Llama-3.1-8B-Instruct": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "input": "What is Deep Learning?", + "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", + "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", + "env_config": {}, + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "meta-llama/Llama-2-7b-chat-hf": { + "model_id": "meta-llama/Llama-2-7b-chat-hf", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", + "expected_batch_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "mistralai/Mistral-7B-Instruct-v0.3": { + "model_id": "mistralai/Mistral-7B-Instruct-v0.3", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", + "expected_batch_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "bigcode/starcoder2-3b": { + "model_id": "bigcode/starcoder2-3b", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", + "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "google/gemma-7b-it": { + "model_id": "google/gemma-7b-it", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", + "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "Qwen/Qwen2-0.5B-Instruct": { + "model_id": "Qwen/Qwen2-0.5B-Instruct", + "input": "What is Deep Learning?", + "expected_greedy_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", + "expected_batch_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "tiiuae/falcon-7b-instruct": { + "model_id": "tiiuae/falcon-7b-instruct", + "input": "What is Deep Learning?", + "expected_greedy_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", + "expected_batch_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + ], + }, + "microsoft/phi-1_5": { + "model_id": "microsoft/phi-1_5", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", + "expected_batch_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + ], + }, + "openai-community/gpt2": { + "model_id": "openai-community/gpt2", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", + "expected_batch_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + ], + }, + "facebook/opt-125m": { + "model_id": "facebook/opt-125m", + "input": "What is Deep Learning?", + "expected_greedy_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout", + "expected_batch_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + ], + }, + "EleutherAI/gpt-j-6b": { + "model_id": "EleutherAI/gpt-j-6b", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", + "expected_batch_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + ], + }, +} + +print(f"Testing {len(TEST_CONFIGS)} models") + + +@pytest.fixture(scope="module", params=TEST_CONFIGS.keys()) +def test_config(request) -> Dict[str, Any]: + """Fixture that provides model configurations for testing.""" + test_config = TEST_CONFIGS[request.param] + test_config["test_name"] = request.param + return test_config + + +@pytest.fixture(scope="module") +def model_id(test_config): + yield test_config["model_id"] + + +@pytest.fixture(scope="module") +def test_name(test_config): + yield test_config["test_name"] + + +@pytest.fixture(scope="module") +def expected_outputs(test_config): + return { + "greedy": test_config["expected_greedy_output"], + # "sampling": model_config["expected_sampling_output"], + "batch": test_config["expected_batch_output"], + } + + +@pytest.fixture(scope="module") +def input(test_config): + return test_config["input"] + + +@pytest.fixture(scope="module") +def tgi_service(gaudi_launcher, model_id, test_name): + with gaudi_launcher(model_id, test_name) as tgi_service: + yield tgi_service + + +@pytest.fixture(scope="module") +async def tgi_client(tgi_service) -> AsyncClient: + await tgi_service.health(1000) + return tgi_service.client + + +@pytest.mark.asyncio +async def test_model_single_request( + tgi_client: AsyncClient, expected_outputs: Dict[str, Any], input: str +): + # Bounded greedy decoding without input + response = await tgi_client.generate( + input, + max_new_tokens=32, + ) + assert response.details.generated_tokens == 32 + assert response.generated_text == expected_outputs["greedy"] + + +@pytest.mark.asyncio +async def test_model_multiple_requests( + tgi_client, gaudi_generate_load, expected_outputs, input +): + num_requests = 4 + responses = await gaudi_generate_load( + tgi_client, + input, + max_new_tokens=32, + n=num_requests, + ) + + assert len(responses) == 4 + expected = expected_outputs["batch"] + for r in responses: + assert r.details.generated_tokens == 32 + assert r.generated_text == expected From 8568f910a7caf5437b0496ead59ee6442fa145d0 Mon Sep 17 00:00:00 2001 From: baptiste Date: Thu, 10 Apr 2025 09:03:49 +0000 Subject: [PATCH 05/19] fix llama failing test --- integration-tests/gaudi/test_gaudi_generate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py index 423ac17f..d2adf2b2 100644 --- a/integration-tests/gaudi/test_gaudi_generate.py +++ b/integration-tests/gaudi/test_gaudi_generate.py @@ -8,8 +8,8 @@ TEST_CONFIGS = { "meta-llama/Llama-3.1-8B-Instruct-shared": { "model_id": "meta-llama/Llama-3.1-8B-Instruct", "input": "What is Deep Learning?", - "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use", - "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use", + "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", + "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", "args": [ "--sharded", "true", From 76d155e660f40993275f067b02ba28443d98005a Mon Sep 17 00:00:00 2001 From: baptiste Date: Thu, 10 Apr 2025 11:47:40 +0000 Subject: [PATCH 06/19] wip(ci): rerun ci to debug --- integration-tests/gaudi/test_gaudi_generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py index d2adf2b2..4ed37dad 100644 --- a/integration-tests/gaudi/test_gaudi_generate.py +++ b/integration-tests/gaudi/test_gaudi_generate.py @@ -3,7 +3,7 @@ from typing import Any, Dict from text_generation import AsyncClient import pytest -# The "args" config is not optimized for speed but only check that the inference is working for the different models architectures +# The "args" values in TEST_CONFIGS are not optimized for speed but only check that the inference is working for the different models architectures. TEST_CONFIGS = { "meta-llama/Llama-3.1-8B-Instruct-shared": { "model_id": "meta-llama/Llama-3.1-8B-Instruct", From 1bd2ad9635e423553e26d03179181fc3a6d5cd9f Mon Sep 17 00:00:00 2001 From: Pauline Bailly-Masson <155966238+paulinebm@users.noreply.github.com> Date: Thu, 10 Apr 2025 15:16:14 +0200 Subject: [PATCH 07/19] Update tests.yaml --- .github/workflows/tests.yaml | 136 +++++++++++++++++++---------------- 1 file changed, 74 insertions(+), 62 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 3e431c86..128952d6 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,67 +1,79 @@ -name: Server Tests - +--- +name: Ci-test on: - pull_request: - paths: - - ".github/workflows/tests.yaml" - - "server/**" - - "proto/**" - - "router/**" - - "launcher/**" - - "backends/**" - - "Cargo.lock" - - "rust-toolchain.toml" - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true + workflow_dispatch: jobs: - run_tests: + Ci-test: runs-on: - group: aws-highmemory-32-plus-priv + group: aws-dl1-24xlarge steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v4 - id: python - with: - python-version: 3.11 - - uses: dtolnay/rust-toolchain@1.85.0 - with: - components: rustfmt, clippy - - name: Install Protoc - uses: arduino/setup-protoc@v1 - - name: Clean unused files - run: | - sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android - sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET - - name: Install - run: | - sudo apt update - sudo apt install python3.11-dev -y - pip install -U pip uv - uv venv - source ./.venv/bin/activate - make install-cpu - - name: Download locked kernels - run: | - source ./.venv/bin/activate - kernels download server - - name: Run server tests - run: | - source ./.venv/bin/activate - uv pip install pytest - export HF_TOKEN=${{ secrets.HF_TOKEN }} - pytest -s -vv server/tests - - name: Pre-commit checks - run: | - pip install pre-commit - pre-commit install - pre-commit run --all-files - - name: Run Rust tests - run: | - cargo test - - name: Run Rust tests with google feature - run: | - cargo test --features google + - run: | + echo "🎉 This job uses runner scale set runners!" +# name: Server Tests + +# on: +# pull_request: +# paths: +# - ".github/workflows/tests.yaml" +# - "server/**" +# - "proto/**" +# - "router/**" +# - "launcher/**" +# - "backends/**" +# - "Cargo.lock" +# - "rust-toolchain.toml" + +# concurrency: +# group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} +# cancel-in-progress: true + +# jobs: +# run_tests: +# runs-on: +# group: aws-highmemory-32-plus-priv +# steps: +# - uses: actions/checkout@v4 +# - name: Set up Python +# uses: actions/setup-python@v4 +# id: python +# with: +# python-version: 3.11 +# - uses: dtolnay/rust-toolchain@1.85.0 +# with: +# components: rustfmt, clippy +# - name: Install Protoc +# uses: arduino/setup-protoc@v1 +# - name: Clean unused files +# run: | +# sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android +# sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET +# - name: Install +# run: | +# sudo apt update +# sudo apt install python3.11-dev -y +# pip install -U pip uv +# uv venv +# source ./.venv/bin/activate +# make install-cpu +# - name: Download locked kernels +# run: | +# source ./.venv/bin/activate +# kernels download server +# - name: Run server tests +# run: | +# source ./.venv/bin/activate +# uv pip install pytest +# export HF_TOKEN=${{ secrets.HF_TOKEN }} +# pytest -s -vv server/tests +# - name: Pre-commit checks +# run: | +# pip install pre-commit +# pre-commit install +# pre-commit run --all-files +# - name: Run Rust tests +# run: | +# cargo test +# - name: Run Rust tests with google feature +# run: | +# cargo test --features google From 2c2cfc09c5b7024e06ce788ba4d74021be17d242 Mon Sep 17 00:00:00 2001 From: Pauline Bailly-Masson <155966238+paulinebm@users.noreply.github.com> Date: Thu, 10 Apr 2025 15:23:17 +0200 Subject: [PATCH 08/19] Update tests.yaml --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 128952d6..70b2a536 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,7 +1,7 @@ --- name: Ci-test on: - workflow_dispatch: + push: jobs: Ci-test: From 4b5e812fe12b961b48da103326b89650ea39fde4 Mon Sep 17 00:00:00 2001 From: Baptiste Colle Date: Thu, 10 Apr 2025 16:08:06 +0200 Subject: [PATCH 09/19] wip(ci): debug the ci --- .github/workflows/tests.yaml | 136 ++++++++++++++++------------------- 1 file changed, 62 insertions(+), 74 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 70b2a536..3e431c86 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,79 +1,67 @@ ---- -name: Ci-test +name: Server Tests + on: - push: + pull_request: + paths: + - ".github/workflows/tests.yaml" + - "server/**" + - "proto/**" + - "router/**" + - "launcher/**" + - "backends/**" + - "Cargo.lock" + - "rust-toolchain.toml" + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true jobs: - Ci-test: + run_tests: runs-on: - group: aws-dl1-24xlarge + group: aws-highmemory-32-plus-priv steps: - - run: | - echo "🎉 This job uses runner scale set runners!" -# name: Server Tests - -# on: -# pull_request: -# paths: -# - ".github/workflows/tests.yaml" -# - "server/**" -# - "proto/**" -# - "router/**" -# - "launcher/**" -# - "backends/**" -# - "Cargo.lock" -# - "rust-toolchain.toml" - -# concurrency: -# group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} -# cancel-in-progress: true - -# jobs: -# run_tests: -# runs-on: -# group: aws-highmemory-32-plus-priv -# steps: -# - uses: actions/checkout@v4 -# - name: Set up Python -# uses: actions/setup-python@v4 -# id: python -# with: -# python-version: 3.11 -# - uses: dtolnay/rust-toolchain@1.85.0 -# with: -# components: rustfmt, clippy -# - name: Install Protoc -# uses: arduino/setup-protoc@v1 -# - name: Clean unused files -# run: | -# sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android -# sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET -# - name: Install -# run: | -# sudo apt update -# sudo apt install python3.11-dev -y -# pip install -U pip uv -# uv venv -# source ./.venv/bin/activate -# make install-cpu -# - name: Download locked kernels -# run: | -# source ./.venv/bin/activate -# kernels download server -# - name: Run server tests -# run: | -# source ./.venv/bin/activate -# uv pip install pytest -# export HF_TOKEN=${{ secrets.HF_TOKEN }} -# pytest -s -vv server/tests -# - name: Pre-commit checks -# run: | -# pip install pre-commit -# pre-commit install -# pre-commit run --all-files -# - name: Run Rust tests -# run: | -# cargo test -# - name: Run Rust tests with google feature -# run: | -# cargo test --features google + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + id: python + with: + python-version: 3.11 + - uses: dtolnay/rust-toolchain@1.85.0 + with: + components: rustfmt, clippy + - name: Install Protoc + uses: arduino/setup-protoc@v1 + - name: Clean unused files + run: | + sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android + sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET + - name: Install + run: | + sudo apt update + sudo apt install python3.11-dev -y + pip install -U pip uv + uv venv + source ./.venv/bin/activate + make install-cpu + - name: Download locked kernels + run: | + source ./.venv/bin/activate + kernels download server + - name: Run server tests + run: | + source ./.venv/bin/activate + uv pip install pytest + export HF_TOKEN=${{ secrets.HF_TOKEN }} + pytest -s -vv server/tests + - name: Pre-commit checks + run: | + pip install pre-commit + pre-commit install + pre-commit run --all-files + - name: Run Rust tests + run: | + cargo test + - name: Run Rust tests with google feature + run: | + cargo test --features google From a2a5772cd719883dcba61a30730b51e6d23b3d71 Mon Sep 17 00:00:00 2001 From: Baptiste Colle Date: Thu, 10 Apr 2025 17:17:16 +0200 Subject: [PATCH 10/19] wip(ci): debug the ci --- .../gaudi/test_gaudi_generate.py | 340 +++++++++--------- 1 file changed, 170 insertions(+), 170 deletions(-) diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py index 4ed37dad..184cbf15 100644 --- a/integration-tests/gaudi/test_gaudi_generate.py +++ b/integration-tests/gaudi/test_gaudi_generate.py @@ -5,26 +5,26 @@ import pytest # The "args" values in TEST_CONFIGS are not optimized for speed but only check that the inference is working for the different models architectures. TEST_CONFIGS = { - "meta-llama/Llama-3.1-8B-Instruct-shared": { - "model_id": "meta-llama/Llama-3.1-8B-Instruct", - "input": "What is Deep Learning?", - "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", - "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", - "args": [ - "--sharded", - "true", - "--num-shard", - "8", - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "8", - "--max-batch-prefill-tokens", - "2048", - ], - }, + # "meta-llama/Llama-3.1-8B-Instruct-shared": { + # "model_id": "meta-llama/Llama-3.1-8B-Instruct", + # "input": "What is Deep Learning?", + # "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", + # "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", + # "args": [ + # "--sharded", + # "true", + # "--num-shard", + # "8", + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "8", + # "--max-batch-prefill-tokens", + # "2048", + # ], + # }, "meta-llama/Llama-3.1-8B-Instruct": { "model_id": "meta-llama/Llama-3.1-8B-Instruct", "input": "What is Deep Learning?", @@ -42,156 +42,156 @@ TEST_CONFIGS = { "2048", ], }, - "meta-llama/Llama-2-7b-chat-hf": { - "model_id": "meta-llama/Llama-2-7b-chat-hf", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", - "expected_batch_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "mistralai/Mistral-7B-Instruct-v0.3": { - "model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", - "expected_batch_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "bigcode/starcoder2-3b": { - "model_id": "bigcode/starcoder2-3b", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", - "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "google/gemma-7b-it": { - "model_id": "google/gemma-7b-it", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", - "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "Qwen/Qwen2-0.5B-Instruct": { - "model_id": "Qwen/Qwen2-0.5B-Instruct", - "input": "What is Deep Learning?", - "expected_greedy_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", - "expected_batch_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "tiiuae/falcon-7b-instruct": { - "model_id": "tiiuae/falcon-7b-instruct", - "input": "What is Deep Learning?", - "expected_greedy_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", - "expected_batch_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - ], - }, - "microsoft/phi-1_5": { - "model_id": "microsoft/phi-1_5", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", - "expected_batch_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - ], - }, - "openai-community/gpt2": { - "model_id": "openai-community/gpt2", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", - "expected_batch_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - ], - }, - "facebook/opt-125m": { - "model_id": "facebook/opt-125m", - "input": "What is Deep Learning?", - "expected_greedy_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout", - "expected_batch_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - ], - }, - "EleutherAI/gpt-j-6b": { - "model_id": "EleutherAI/gpt-j-6b", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", - "expected_batch_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - ], - }, + # "meta-llama/Llama-2-7b-chat-hf": { + # "model_id": "meta-llama/Llama-2-7b-chat-hf", + # "input": "What is Deep Learning?", + # "expected_greedy_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", + # "expected_batch_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", + # "args": [ + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "4", + # "--max-batch-prefill-tokens", + # "2048", + # ], + # }, + # "mistralai/Mistral-7B-Instruct-v0.3": { + # "model_id": "mistralai/Mistral-7B-Instruct-v0.3", + # "input": "What is Deep Learning?", + # "expected_greedy_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", + # "expected_batch_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", + # "args": [ + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "4", + # "--max-batch-prefill-tokens", + # "2048", + # ], + # }, + # "bigcode/starcoder2-3b": { + # "model_id": "bigcode/starcoder2-3b", + # "input": "What is Deep Learning?", + # "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", + # "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", + # "args": [ + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "4", + # "--max-batch-prefill-tokens", + # "2048", + # ], + # }, + # "google/gemma-7b-it": { + # "model_id": "google/gemma-7b-it", + # "input": "What is Deep Learning?", + # "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", + # "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", + # "args": [ + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "4", + # "--max-batch-prefill-tokens", + # "2048", + # ], + # }, + # "Qwen/Qwen2-0.5B-Instruct": { + # "model_id": "Qwen/Qwen2-0.5B-Instruct", + # "input": "What is Deep Learning?", + # "expected_greedy_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", + # "expected_batch_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", + # "args": [ + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "4", + # "--max-batch-prefill-tokens", + # "2048", + # ], + # }, + # "tiiuae/falcon-7b-instruct": { + # "model_id": "tiiuae/falcon-7b-instruct", + # "input": "What is Deep Learning?", + # "expected_greedy_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", + # "expected_batch_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", + # "args": [ + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "4", + # ], + # }, + # "microsoft/phi-1_5": { + # "model_id": "microsoft/phi-1_5", + # "input": "What is Deep Learning?", + # "expected_greedy_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", + # "expected_batch_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", + # "args": [ + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "4", + # ], + # }, + # "openai-community/gpt2": { + # "model_id": "openai-community/gpt2", + # "input": "What is Deep Learning?", + # "expected_greedy_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", + # "expected_batch_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", + # "args": [ + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "4", + # ], + # }, + # "facebook/opt-125m": { + # "model_id": "facebook/opt-125m", + # "input": "What is Deep Learning?", + # "expected_greedy_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout", + # "expected_batch_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout", + # "args": [ + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "4", + # ], + # }, + # "EleutherAI/gpt-j-6b": { + # "model_id": "EleutherAI/gpt-j-6b", + # "input": "What is Deep Learning?", + # "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", + # "expected_batch_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", + # "args": [ + # "--max-input-tokens", + # "512", + # "--max-total-tokens", + # "1024", + # "--max-batch-size", + # "4", + # ], + # }, } print(f"Testing {len(TEST_CONFIGS)} models") From 9c6776375eb7ae52f0c16b42acdbebd3b1200360 Mon Sep 17 00:00:00 2001 From: baptiste Date: Tue, 22 Apr 2025 08:15:11 +0000 Subject: [PATCH 11/19] change defualt behaviour to only run a subset of all the models --- backends/gaudi/Makefile | 5 + integration-tests/conftest.py | 6 + .../gaudi/test_gaudi_generate.py | 381 +++++++++--------- 3 files changed, 208 insertions(+), 184 deletions(-) diff --git a/backends/gaudi/Makefile b/backends/gaudi/Makefile index 3ece5a7e..2eb5506f 100644 --- a/backends/gaudi/Makefile +++ b/backends/gaudi/Makefile @@ -56,6 +56,11 @@ run-integration-tests: HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \ pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi +run-integration-tests-with-all-models: + DOCKER_VOLUME=${root_dir}/data \ + HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \ + pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi --gaudi-all-models + # This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests capture-expected-outputs-for-integration-tests: pip install -U pip uv diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 594ffd49..534aaaea 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -74,6 +74,12 @@ def pytest_addoption(parser): parser.addoption( "--gaudi", action="store_true", default=False, help="run gaudi tests" ) + parser.addoption( + "--gaudi-all-models", + action="store_true", + default=False, + help="Run tests for all models instead of just the default subset", + ) def pytest_configure(config): diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py index 184cbf15..c2d768d0 100644 --- a/integration-tests/gaudi/test_gaudi_generate.py +++ b/integration-tests/gaudi/test_gaudi_generate.py @@ -1,30 +1,39 @@ -from typing import Any, Dict +from typing import Any, Dict, Generator +from _pytest.fixtures import SubRequest from text_generation import AsyncClient import pytest + +def pytest_configure(config): + config.addinivalue_line( + "markers", "gaudi_all_models: mark test to run with all models" + ) + + # The "args" values in TEST_CONFIGS are not optimized for speed but only check that the inference is working for the different models architectures. TEST_CONFIGS = { - # "meta-llama/Llama-3.1-8B-Instruct-shared": { - # "model_id": "meta-llama/Llama-3.1-8B-Instruct", - # "input": "What is Deep Learning?", - # "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", - # "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", - # "args": [ - # "--sharded", - # "true", - # "--num-shard", - # "8", - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "8", - # "--max-batch-prefill-tokens", - # "2048", - # ], - # }, + "meta-llama/Llama-3.1-8B-Instruct-shared": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "input": "What is Deep Learning?", + "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", + "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", + "args": [ + "--sharded", + "true", + "--num-shard", + "8", + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "8", + "--max-batch-prefill-tokens", + "2048", + ], + "run_by_default": True, + }, "meta-llama/Llama-3.1-8B-Instruct": { "model_id": "meta-llama/Llama-3.1-8B-Instruct", "input": "What is Deep Learning?", @@ -41,196 +50,195 @@ TEST_CONFIGS = { "--max-batch-prefill-tokens", "2048", ], + "run_by_default": True, + }, + "meta-llama/Llama-2-7b-chat-hf": { + "model_id": "meta-llama/Llama-2-7b-chat-hf", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", + "expected_batch_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "mistralai/Mistral-7B-Instruct-v0.3": { + "model_id": "mistralai/Mistral-7B-Instruct-v0.3", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", + "expected_batch_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "bigcode/starcoder2-3b": { + "model_id": "bigcode/starcoder2-3b", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", + "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "google/gemma-7b-it": { + "model_id": "google/gemma-7b-it", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", + "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "Qwen/Qwen2-0.5B-Instruct": { + "model_id": "Qwen/Qwen2-0.5B-Instruct", + "input": "What is Deep Learning?", + "expected_greedy_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", + "expected_batch_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + "--max-batch-prefill-tokens", + "2048", + ], + }, + "tiiuae/falcon-7b-instruct": { + "model_id": "tiiuae/falcon-7b-instruct", + "input": "What is Deep Learning?", + "expected_greedy_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", + "expected_batch_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + ], + }, + "microsoft/phi-1_5": { + "model_id": "microsoft/phi-1_5", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", + "expected_batch_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + ], + }, + "openai-community/gpt2": { + "model_id": "openai-community/gpt2", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", + "expected_batch_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + ], + }, + "EleutherAI/gpt-j-6b": { + "model_id": "EleutherAI/gpt-j-6b", + "input": "What is Deep Learning?", + "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", + "expected_batch_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", + "args": [ + "--max-input-tokens", + "512", + "--max-total-tokens", + "1024", + "--max-batch-size", + "4", + ], }, - # "meta-llama/Llama-2-7b-chat-hf": { - # "model_id": "meta-llama/Llama-2-7b-chat-hf", - # "input": "What is Deep Learning?", - # "expected_greedy_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", - # "expected_batch_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", - # "args": [ - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "4", - # "--max-batch-prefill-tokens", - # "2048", - # ], - # }, - # "mistralai/Mistral-7B-Instruct-v0.3": { - # "model_id": "mistralai/Mistral-7B-Instruct-v0.3", - # "input": "What is Deep Learning?", - # "expected_greedy_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", - # "expected_batch_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", - # "args": [ - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "4", - # "--max-batch-prefill-tokens", - # "2048", - # ], - # }, - # "bigcode/starcoder2-3b": { - # "model_id": "bigcode/starcoder2-3b", - # "input": "What is Deep Learning?", - # "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", - # "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", - # "args": [ - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "4", - # "--max-batch-prefill-tokens", - # "2048", - # ], - # }, - # "google/gemma-7b-it": { - # "model_id": "google/gemma-7b-it", - # "input": "What is Deep Learning?", - # "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", - # "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", - # "args": [ - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "4", - # "--max-batch-prefill-tokens", - # "2048", - # ], - # }, - # "Qwen/Qwen2-0.5B-Instruct": { - # "model_id": "Qwen/Qwen2-0.5B-Instruct", - # "input": "What is Deep Learning?", - # "expected_greedy_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", - # "expected_batch_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", - # "args": [ - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "4", - # "--max-batch-prefill-tokens", - # "2048", - # ], - # }, - # "tiiuae/falcon-7b-instruct": { - # "model_id": "tiiuae/falcon-7b-instruct", - # "input": "What is Deep Learning?", - # "expected_greedy_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", - # "expected_batch_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", - # "args": [ - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "4", - # ], - # }, - # "microsoft/phi-1_5": { - # "model_id": "microsoft/phi-1_5", - # "input": "What is Deep Learning?", - # "expected_greedy_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", - # "expected_batch_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", - # "args": [ - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "4", - # ], - # }, - # "openai-community/gpt2": { - # "model_id": "openai-community/gpt2", - # "input": "What is Deep Learning?", - # "expected_greedy_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", - # "expected_batch_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", - # "args": [ - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "4", - # ], - # }, - # "facebook/opt-125m": { - # "model_id": "facebook/opt-125m", - # "input": "What is Deep Learning?", - # "expected_greedy_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout", - # "expected_batch_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout", - # "args": [ - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "4", - # ], - # }, - # "EleutherAI/gpt-j-6b": { - # "model_id": "EleutherAI/gpt-j-6b", - # "input": "What is Deep Learning?", - # "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", - # "expected_batch_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", - # "args": [ - # "--max-input-tokens", - # "512", - # "--max-total-tokens", - # "1024", - # "--max-batch-size", - # "4", - # ], - # }, } -print(f"Testing {len(TEST_CONFIGS)} models") + +def pytest_generate_tests(metafunc): + if "test_config" in metafunc.fixturenames: + if metafunc.config.getoption("--gaudi-all-models"): + models = list(TEST_CONFIGS.keys()) + else: + models = [ + name + for name, config in TEST_CONFIGS.items() + if config.get("run_by_default", False) + ] + print(f"Testing {len(models)} models") + metafunc.parametrize("test_config", models, indirect=True) -@pytest.fixture(scope="module", params=TEST_CONFIGS.keys()) -def test_config(request) -> Dict[str, Any]: +@pytest.fixture(scope="module") +def test_config(request: SubRequest) -> Dict[str, Any]: """Fixture that provides model configurations for testing.""" - test_config = TEST_CONFIGS[request.param] - test_config["test_name"] = request.param + model_name = request.param + test_config = TEST_CONFIGS[model_name] + test_config["test_name"] = model_name return test_config @pytest.fixture(scope="module") -def model_id(test_config): +def model_id(test_config: Dict[str, Any]) -> Generator[str, None, None]: yield test_config["model_id"] @pytest.fixture(scope="module") -def test_name(test_config): +def test_name(test_config: Dict[str, Any]) -> Generator[str, None, None]: yield test_config["test_name"] @pytest.fixture(scope="module") -def expected_outputs(test_config): +def expected_outputs(test_config: Dict[str, Any]) -> Dict[str, str]: return { "greedy": test_config["expected_greedy_output"], - # "sampling": model_config["expected_sampling_output"], "batch": test_config["expected_batch_output"], } @pytest.fixture(scope="module") -def input(test_config): +def input(test_config: Dict[str, Any]) -> str: return test_config["input"] @pytest.fixture(scope="module") -def tgi_service(gaudi_launcher, model_id, test_name): +def tgi_service(gaudi_launcher, model_id: str, test_name: str): with gaudi_launcher(model_id, test_name) as tgi_service: yield tgi_service @@ -242,8 +250,9 @@ async def tgi_client(tgi_service) -> AsyncClient: @pytest.mark.asyncio +@pytest.mark.all_models async def test_model_single_request( - tgi_client: AsyncClient, expected_outputs: Dict[str, Any], input: str + tgi_client: AsyncClient, expected_outputs: Dict[str, str], input: str ): # Bounded greedy decoding without input response = await tgi_client.generate( @@ -255,8 +264,12 @@ async def test_model_single_request( @pytest.mark.asyncio +@pytest.mark.all_models async def test_model_multiple_requests( - tgi_client, gaudi_generate_load, expected_outputs, input + tgi_client: AsyncClient, + gaudi_generate_load, + expected_outputs: Dict[str, str], + input: str, ): num_requests = 4 responses = await gaudi_generate_load( From 59dc8c2699b0e43c33403e6e9d1dfaa0fa4f4680 Mon Sep 17 00:00:00 2001 From: baptiste Date: Tue, 22 Apr 2025 08:16:17 +0000 Subject: [PATCH 12/19] change defualt behaviour to only run a subset of all the models --- backends/gaudi/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backends/gaudi/README.md b/backends/gaudi/README.md index ba890f0b..fa68c0a9 100644 --- a/backends/gaudi/README.md +++ b/backends/gaudi/README.md @@ -104,11 +104,16 @@ To run the integration tests, you need to first build the image: make -C backends/gaudi image ``` -Then run the following command to run the integration tests: +Then run the following command to run the integration tests (CI tests): ```bash make -C backends/gaudi run-integration-tests ``` +To run the integration tests with all models, you can run the following command: +```bash +make -C backends/gaudi run-integration-tests-with-all-models +``` + To capture the expected outputs for the integration tests, you can run the following command: ```bash make -C backends/gaudi capture-expected-outputs-for-integration-tests From fcf6870d20976336601545e39a35af4c553b2314 Mon Sep 17 00:00:00 2001 From: baptiste Date: Tue, 22 Apr 2025 08:43:45 +0000 Subject: [PATCH 13/19] testing --- integration-tests/gaudi/test_gaudi_generate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py index c2d768d0..26ba47fe 100644 --- a/integration-tests/gaudi/test_gaudi_generate.py +++ b/integration-tests/gaudi/test_gaudi_generate.py @@ -32,7 +32,6 @@ TEST_CONFIGS = { "--max-batch-prefill-tokens", "2048", ], - "run_by_default": True, }, "meta-llama/Llama-3.1-8B-Instruct": { "model_id": "meta-llama/Llama-3.1-8B-Instruct", From 9c235f4d66bef213537496e11f36cf5d7fc04168 Mon Sep 17 00:00:00 2001 From: baptiste Date: Tue, 22 Apr 2025 09:17:44 +0000 Subject: [PATCH 14/19] feat(gaudi/ci): added ci for gaudi device --- integration-tests/fixtures/gaudi/service.py | 15 ++++++++------- .../gaudi/capture_expected_outputs.py | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/integration-tests/fixtures/gaudi/service.py b/integration-tests/fixtures/gaudi/service.py index 44c7f999..b6942dbe 100644 --- a/integration-tests/fixtures/gaudi/service.py +++ b/integration-tests/fixtures/gaudi/service.py @@ -190,11 +190,7 @@ def gaudi_launcher(event_loop): except Exception as e: logger.error(f"Error handling existing container: {str(e)}") - model_name = next( - name for name, cfg in TEST_CONFIGS.items() if cfg["model_id"] == model_id - ) - - tgi_args = TEST_CONFIGS[model_name]["args"].copy() + tgi_args = TEST_CONFIGS[test_name]["args"].copy() env = BASE_ENV.copy() @@ -202,15 +198,20 @@ def gaudi_launcher(event_loop): env["MODEL_ID"] = model_id # Add env config that is definied in the fixture parameter - if "env_config" in TEST_CONFIGS[model_name]: - env.update(TEST_CONFIGS[model_name]["env_config"].copy()) + if "env_config" in TEST_CONFIGS[test_name]: + env.update(TEST_CONFIGS[test_name]["env_config"].copy()) volumes = [f"{DOCKER_VOLUME}:/data"] logger.debug(f"Using volume {volumes}") try: + logger.debug(f"Using command {tgi_args}") logger.info(f"Creating container with name {container_name}") + logger.debug(f"Using environment {env}") + logger.debug(f"Using volumes {volumes}") + logger.debug(f"HABANA_RUN_ARGS {HABANA_RUN_ARGS}") + # Log equivalent docker run command for debugging, this is not actually executed container = client.containers.run( DOCKER_IMAGE, diff --git a/integration-tests/gaudi/capture_expected_outputs.py b/integration-tests/gaudi/capture_expected_outputs.py index 6a5d4a68..5a5fd179 100644 --- a/integration-tests/gaudi/capture_expected_outputs.py +++ b/integration-tests/gaudi/capture_expected_outputs.py @@ -3,7 +3,7 @@ import os from typing import Dict, Any, Generator import pytest -from test_generate import TEST_CONFIGS +from test_gaudi_generate import TEST_CONFIGS UNKNOWN_CONFIGS = { name: config From 8768085c8c085419cb3471e74b2265d434b3d053 Mon Sep 17 00:00:00 2001 From: baptiste Date: Wed, 21 May 2025 11:27:11 +0000 Subject: [PATCH 15/19] add new gaudi3 runners --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 59fd66ce..c7ccf764 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -129,7 +129,7 @@ jobs: export label_extension="-gaudi" export docker_volume="/mnt/cache" export docker_devices="" - export runs_on="aws-dl1-24xlarge" + export runs_on="itac-bm-emr-gaudi3-dell-1gaudi" export platform="" export extra_pytest="--gaudi" export target="" From 1f03afe94dc2070767aa28d479868408c341890b Mon Sep 17 00:00:00 2001 From: baptiste Date: Wed, 21 May 2025 15:28:58 +0000 Subject: [PATCH 16/19] enable multi-card test --- .github/workflows/build.yaml | 2 +- integration-tests/gaudi/test_gaudi_generate.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c7ccf764..14c69a2b 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -129,7 +129,7 @@ jobs: export label_extension="-gaudi" export docker_volume="/mnt/cache" export docker_devices="" - export runs_on="itac-bm-emr-gaudi3-dell-1gaudi" + export runs_on="itac-bm-emr-gaudi3-dell-8gaudi" export platform="" export extra_pytest="--gaudi" export target="" diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py index 26ba47fe..c2d768d0 100644 --- a/integration-tests/gaudi/test_gaudi_generate.py +++ b/integration-tests/gaudi/test_gaudi_generate.py @@ -32,6 +32,7 @@ TEST_CONFIGS = { "--max-batch-prefill-tokens", "2048", ], + "run_by_default": True, }, "meta-llama/Llama-3.1-8B-Instruct": { "model_id": "meta-llama/Llama-3.1-8B-Instruct", From 0295bf243f38341185b597e7c613d6283f0d7c5d Mon Sep 17 00:00:00 2001 From: baptiste Date: Mon, 23 Jun 2025 12:10:14 +0000 Subject: [PATCH 17/19] fix broken test --- backends/gaudi/Makefile | 2 - backends/gaudi/README.md | 5 + integration-tests/fixtures/gaudi/service.py | 49 ++-- .../gaudi/test_gaudi_generate.py | 22 +- integration-tests/gaudi/test_model.py | 259 ------------------ 5 files changed, 52 insertions(+), 285 deletions(-) delete mode 100644 integration-tests/gaudi/test_model.py diff --git a/backends/gaudi/Makefile b/backends/gaudi/Makefile index 2eb5506f..40d17f61 100644 --- a/backends/gaudi/Makefile +++ b/backends/gaudi/Makefile @@ -50,8 +50,6 @@ local-dev-install: install-dependencies # In order to run the integration tests, you need to first build the image (make -C backends/gaudi image) run-integration-tests: - pip install -U pip uv - uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt DOCKER_VOLUME=${root_dir}/data \ HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \ pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi diff --git a/backends/gaudi/README.md b/backends/gaudi/README.md index fa68c0a9..7713040f 100644 --- a/backends/gaudi/README.md +++ b/backends/gaudi/README.md @@ -99,6 +99,11 @@ curl 127.0.0.1:8080/generate \ ### Integration tests +Install the dependencies: +```bash +pip install -r integration-tests/requirements.txt +``` + To run the integration tests, you need to first build the image: ```bash make -C backends/gaudi image diff --git a/integration-tests/fixtures/gaudi/service.py b/integration-tests/fixtures/gaudi/service.py index b6942dbe..5c7d729b 100644 --- a/integration-tests/fixtures/gaudi/service.py +++ b/integration-tests/fixtures/gaudi/service.py @@ -16,8 +16,7 @@ from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError from docker.errors import NotFound import logging from gaudi.test_gaudi_generate import TEST_CONFIGS -from text_generation import AsyncClient -from text_generation.types import Response +from huggingface_hub import AsyncInferenceClient, TextGenerationOutput import huggingface_hub logging.basicConfig( @@ -71,9 +70,15 @@ def stream_container_logs(container, test_name): logger.error(f"Error streaming container logs: {str(e)}") +class TestClient(AsyncInferenceClient): + def __init__(self, service_name: str, base_url: str): + super().__init__(model=base_url) + self.service_name = service_name + + class LauncherHandle: - def __init__(self, port: int): - self.client = AsyncClient(f"http://localhost:{port}", timeout=3600) + def __init__(self, service_name: str, port: int): + self.client = TestClient(service_name, f"http://localhost:{port}") def _inner_health(self): raise NotImplementedError @@ -89,7 +94,7 @@ class LauncherHandle: raise RuntimeError("Launcher crashed") try: - await self.client.generate("test") + await self.client.text_generation("test", max_new_tokens=1) elapsed = time.time() - start_time logger.info(f"Health check passed after {elapsed:.1f}s") return @@ -113,7 +118,8 @@ class LauncherHandle: class ContainerLauncherHandle(LauncherHandle): def __init__(self, docker_client, container_name, port: int): - super(ContainerLauncherHandle, self).__init__(port) + service_name = container_name # Use container name as service name + super(ContainerLauncherHandle, self).__init__(service_name, port) self.docker_client = docker_client self.container_name = container_name @@ -134,7 +140,8 @@ class ContainerLauncherHandle(LauncherHandle): class ProcessLauncherHandle(LauncherHandle): def __init__(self, process, port: int): - super(ProcessLauncherHandle, self).__init__(port) + service_name = "process" # Use generic name for process launcher + super(ProcessLauncherHandle, self).__init__(service_name, port) self.process = process def _inner_health(self) -> bool: @@ -153,11 +160,13 @@ def data_volume(): @pytest.fixture(scope="module") -def gaudi_launcher(event_loop): +def gaudi_launcher(): @contextlib.contextmanager def docker_launcher( model_id: str, test_name: str, + tgi_args: List[str] = None, + env_config: dict = None ): logger.info( f"Starting docker launcher for model {model_id} and test {test_name}" @@ -185,23 +194,30 @@ def gaudi_launcher(event_loop): ) container.stop() container.wait() + container.remove() + logger.info(f"Removed existing container {container_name}") except NotFound: pass except Exception as e: logger.error(f"Error handling existing container: {str(e)}") - tgi_args = TEST_CONFIGS[test_name]["args"].copy() + if tgi_args is None: + tgi_args = [] + else: + tgi_args = tgi_args.copy() env = BASE_ENV.copy() # Add model_id to env env["MODEL_ID"] = model_id - # Add env config that is definied in the fixture parameter - if "env_config" in TEST_CONFIGS[test_name]: - env.update(TEST_CONFIGS[test_name]["env_config"].copy()) + # Add env config that is defined in the fixture parameter + if env_config is not None: + env.update(env_config.copy()) - volumes = [f"{DOCKER_VOLUME}:/data"] + volumes = [] + if DOCKER_VOLUME: + volumes = [f"{DOCKER_VOLUME}:/data"] logger.debug(f"Using volume {volumes}") try: @@ -276,13 +292,14 @@ def gaudi_launcher(event_loop): @pytest.fixture(scope="module") def gaudi_generate_load(): async def generate_load_inner( - client: AsyncClient, prompt: str, max_new_tokens: int, n: int - ) -> List[Response]: + client: AsyncInferenceClient, prompt: str, max_new_tokens: int, n: int + ) -> List[TextGenerationOutput]: try: futures = [ - client.generate( + client.text_generation( prompt, max_new_tokens=max_new_tokens, + details=True, decoder_input_details=True, ) for _ in range(n) diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py index c2d768d0..f5d71ab7 100644 --- a/integration-tests/gaudi/test_gaudi_generate.py +++ b/integration-tests/gaudi/test_gaudi_generate.py @@ -1,7 +1,6 @@ from typing import Any, Dict, Generator from _pytest.fixtures import SubRequest - -from text_generation import AsyncClient +from huggingface_hub import AsyncInferenceClient, TextGenerationOutput import pytest @@ -238,13 +237,18 @@ def input(test_config: Dict[str, Any]) -> str: @pytest.fixture(scope="module") -def tgi_service(gaudi_launcher, model_id: str, test_name: str): - with gaudi_launcher(model_id, test_name) as tgi_service: +def tgi_service(gaudi_launcher, model_id: str, test_name: str, test_config: Dict[str, Any]): + with gaudi_launcher( + model_id, + test_name, + tgi_args=test_config.get("args", []), + env_config=test_config.get("env_config", {}) + ) as tgi_service: yield tgi_service @pytest.fixture(scope="module") -async def tgi_client(tgi_service) -> AsyncClient: +async def tgi_client(tgi_service) -> AsyncInferenceClient: await tgi_service.health(1000) return tgi_service.client @@ -252,12 +256,14 @@ async def tgi_client(tgi_service) -> AsyncClient: @pytest.mark.asyncio @pytest.mark.all_models async def test_model_single_request( - tgi_client: AsyncClient, expected_outputs: Dict[str, str], input: str + tgi_client: AsyncInferenceClient, expected_outputs: Dict[str, str], input: str ): # Bounded greedy decoding without input - response = await tgi_client.generate( + response = await tgi_client.text_generation( input, max_new_tokens=32, + details=True, + decoder_input_details=True, ) assert response.details.generated_tokens == 32 assert response.generated_text == expected_outputs["greedy"] @@ -266,7 +272,7 @@ async def test_model_single_request( @pytest.mark.asyncio @pytest.mark.all_models async def test_model_multiple_requests( - tgi_client: AsyncClient, + tgi_client: AsyncInferenceClient, gaudi_generate_load, expected_outputs: Dict[str, str], input: str, diff --git a/integration-tests/gaudi/test_model.py b/integration-tests/gaudi/test_model.py deleted file mode 100644 index 407bccc2..00000000 --- a/integration-tests/gaudi/test_model.py +++ /dev/null @@ -1,259 +0,0 @@ -from typing import Any, Dict - -from text_generation import AsyncClient -import pytest - -# The "args" config is not optimized for speed but only check that the inference is working for the different models architectures -TEST_CONFIGS = { - "meta-llama/Llama-3.1-8B-Instruct-shared": { - "model_id": "meta-llama/Llama-3.1-8B-Instruct", - "input": "What is Deep Learning?", - "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", - "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", - "args": [ - "--sharded", - "true", - "--num-shard", - "8", - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "8", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "meta-llama/Llama-3.1-8B-Instruct": { - "model_id": "meta-llama/Llama-3.1-8B-Instruct", - "input": "What is Deep Learning?", - "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", - "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", - "env_config": {}, - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "meta-llama/Llama-2-7b-chat-hf": { - "model_id": "meta-llama/Llama-2-7b-chat-hf", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", - "expected_batch_output": "\n\nDeep learning (also known as deep structured learning) is part of a broader family of machine learning techniques based on artificial neural networks\u2014specific", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "mistralai/Mistral-7B-Instruct-v0.3": { - "model_id": "mistralai/Mistral-7B-Instruct-v0.3", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", - "expected_batch_output": "\n\nDeep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "bigcode/starcoder2-3b": { - "model_id": "bigcode/starcoder2-3b", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", - "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to perform tasks.\n\nNeural networks are a type of machine learning algorithm that", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "google/gemma-7b-it": { - "model_id": "google/gemma-7b-it", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", - "expected_batch_output": "\n\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from large amounts of data. Neural networks are inspired by the structure and function of", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "Qwen/Qwen2-0.5B-Instruct": { - "model_id": "Qwen/Qwen2-0.5B-Instruct", - "input": "What is Deep Learning?", - "expected_greedy_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", - "expected_batch_output": " Deep Learning is a type of machine learning that is based on the principles of artificial neural networks. It is a type of machine learning that is used to train models", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - "--max-batch-prefill-tokens", - "2048", - ], - }, - "tiiuae/falcon-7b-instruct": { - "model_id": "tiiuae/falcon-7b-instruct", - "input": "What is Deep Learning?", - "expected_greedy_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", - "expected_batch_output": "\nDeep learning is a branch of machine learning that uses artificial neural networks to learn and make decisions. It is based on the concept of hierarchical learning, where a", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - ], - }, - "microsoft/phi-1_5": { - "model_id": "microsoft/phi-1_5", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", - "expected_batch_output": "\n\nDeep Learning is a subfield of Machine Learning that focuses on building neural networks with multiple layers of interconnected nodes. These networks are designed to learn from large", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - ], - }, - "openai-community/gpt2": { - "model_id": "openai-community/gpt2", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", - "expected_batch_output": "\n\nDeep learning is a new field of research that has been around for a long time. It is a new field of research that has been around for a", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - ], - }, - "EleutherAI/gpt-j-6b": { - "model_id": "EleutherAI/gpt-j-6b", - "input": "What is Deep Learning?", - "expected_greedy_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", - "expected_batch_output": "\n\nDeep learning is a subset of machine learning that is based on the idea of neural networks. Neural networks are a type of artificial intelligence that is inspired by", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - ], - }, -} - -print(f"Testing {len(TEST_CONFIGS)} models") - - -@pytest.fixture(scope="module", params=TEST_CONFIGS.keys()) -def test_config(request) -> Dict[str, Any]: - """Fixture that provides model configurations for testing.""" - test_config = TEST_CONFIGS[request.param] - test_config["test_name"] = request.param - return test_config - - -@pytest.fixture(scope="module") -def model_id(test_config): - yield test_config["model_id"] - - -@pytest.fixture(scope="module") -def test_name(test_config): - yield test_config["test_name"] - - -@pytest.fixture(scope="module") -def expected_outputs(test_config): - return { - "greedy": test_config["expected_greedy_output"], - # "sampling": model_config["expected_sampling_output"], - "batch": test_config["expected_batch_output"], - } - - -@pytest.fixture(scope="module") -def input(test_config): - return test_config["input"] - - -@pytest.fixture(scope="module") -def tgi_service(launcher, model_id, test_name): - with launcher(model_id, test_name) as tgi_service: - yield tgi_service - - -@pytest.fixture(scope="module") -async def tgi_client(tgi_service) -> AsyncClient: - await tgi_service.health(1000) - return tgi_service.client - - -@pytest.mark.asyncio -async def test_model_single_request( - tgi_client: AsyncClient, expected_outputs: Dict[str, Any], input: str -): - # Bounded greedy decoding without input - response = await tgi_client.generate( - input, - max_new_tokens=32, - ) - assert response.details.generated_tokens == 32 - assert response.generated_text == expected_outputs["greedy"] - - -@pytest.mark.asyncio -async def test_model_multiple_requests( - tgi_client, generate_load, expected_outputs, input -): - num_requests = 4 - responses = await generate_load( - tgi_client, - input, - max_new_tokens=32, - n=num_requests, - ) - - assert len(responses) == 4 - expected = expected_outputs["batch"] - for r in responses: - assert r.details.generated_tokens == 32 - assert r.generated_text == expected From a32025f931f9e27e2595bd11f31fc1ce4dd3cf8e Mon Sep 17 00:00:00 2001 From: baptiste Date: Mon, 23 Jun 2025 12:26:06 +0000 Subject: [PATCH 18/19] fix style --- integration-tests/fixtures/gaudi/service.py | 3 +-- integration-tests/gaudi/test_gaudi_generate.py | 12 +++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/integration-tests/fixtures/gaudi/service.py b/integration-tests/fixtures/gaudi/service.py index 5c7d729b..f4f43691 100644 --- a/integration-tests/fixtures/gaudi/service.py +++ b/integration-tests/fixtures/gaudi/service.py @@ -15,7 +15,6 @@ import pytest from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError from docker.errors import NotFound import logging -from gaudi.test_gaudi_generate import TEST_CONFIGS from huggingface_hub import AsyncInferenceClient, TextGenerationOutput import huggingface_hub @@ -166,7 +165,7 @@ def gaudi_launcher(): model_id: str, test_name: str, tgi_args: List[str] = None, - env_config: dict = None + env_config: dict = None, ): logger.info( f"Starting docker launcher for model {model_id} and test {test_name}" diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py index f5d71ab7..2b8b0c76 100644 --- a/integration-tests/gaudi/test_gaudi_generate.py +++ b/integration-tests/gaudi/test_gaudi_generate.py @@ -1,6 +1,6 @@ from typing import Any, Dict, Generator from _pytest.fixtures import SubRequest -from huggingface_hub import AsyncInferenceClient, TextGenerationOutput +from huggingface_hub import AsyncInferenceClient import pytest @@ -237,12 +237,14 @@ def input(test_config: Dict[str, Any]) -> str: @pytest.fixture(scope="module") -def tgi_service(gaudi_launcher, model_id: str, test_name: str, test_config: Dict[str, Any]): +def tgi_service( + gaudi_launcher, model_id: str, test_name: str, test_config: Dict[str, Any] +): with gaudi_launcher( - model_id, - test_name, + model_id, + test_name, tgi_args=test_config.get("args", []), - env_config=test_config.get("env_config", {}) + env_config=test_config.get("env_config", {}), ) as tgi_service: yield tgi_service From ae7f3aeba1ccfa38d9da86bcdb398f71afc99f41 Mon Sep 17 00:00:00 2001 From: baptiste Date: Mon, 23 Jun 2025 12:27:32 +0000 Subject: [PATCH 19/19] update conftest --- integration-tests/conftest.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 534aaaea..9cc33416 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -51,6 +51,7 @@ from text_generation.types import ( ChatComplete, ChatCompletionChunk, ChatCompletionComplete, + Completion, Details, Grammar, InputToken, @@ -160,6 +161,7 @@ class ResponseComparator(JSONSnapshotExtension): or isinstance(data, ChatComplete) or isinstance(data, ChatCompletionChunk) or isinstance(data, ChatCompletionComplete) + or isinstance(data, Completion) or isinstance(data, OAIChatCompletionChunk) or isinstance(data, OAICompletion) ): @@ -216,6 +218,8 @@ class ResponseComparator(JSONSnapshotExtension): if isinstance(choices, List) and len(choices) >= 1: if "delta" in choices[0]: return ChatCompletionChunk(**data) + if "text" in choices[0]: + return Completion(**data) return ChatComplete(**data) else: return Response(**data) @@ -308,6 +312,9 @@ class ResponseComparator(JSONSnapshotExtension): ) ) + def eq_completion(response: Completion, other: Completion) -> bool: + return response.choices[0].text == other.choices[0].text + def eq_chat_complete(response: ChatComplete, other: ChatComplete) -> bool: return ( response.choices[0].message.content == other.choices[0].message.content @@ -352,6 +359,11 @@ class ResponseComparator(JSONSnapshotExtension): if len(serialized_data) == 0: return len(snapshot_data) == len(serialized_data) + if isinstance(serialized_data[0], Completion): + return len(snapshot_data) == len(serialized_data) and all( + [eq_completion(r, o) for r, o in zip(serialized_data, snapshot_data)] + ) + if isinstance(serialized_data[0], ChatComplete): return len(snapshot_data) == len(serialized_data) and all( [eq_chat_complete(r, o) for r, o in zip(serialized_data, snapshot_data)]