diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c2aba1606..79b3c7773 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -10,6 +10,7 @@ on: pull_request: paths: - ".github/workflows/build.yaml" + - "integration-tests/**" - "server/**" - "proto/**" - "router/**" diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 00bb99fc5..06d5f9cbf 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "text-generation" -version = "0.5.1" +version = "0.5.2" description = "Hugging Face Text Generation Python Client" license = "Apache-2.0" authors = ["Olivier Dehaene "] diff --git a/clients/python/tests/test_client.py b/clients/python/tests/test_client.py index 8972dfd1d..32462f147 100644 --- a/clients/python/tests/test_client.py +++ b/clients/python/tests/test_client.py @@ -16,9 +16,9 @@ def test_generate(flan_t5_xxl_url, hf_headers): assert len(response.details.prefill) == 1 assert response.details.prefill[0] == PrefillToken(id=0, text="", logprob=None) assert len(response.details.tokens) == 1 - assert response.details.tokens[0] == Token( - id=3, text="", logprob=-1.984375, special=False - ) + assert response.details.tokens[0].id == 3 + assert response.details.tokens[0].text == "" + assert not response.details.tokens[0].special def test_generate_best_of(flan_t5_xxl_url, hf_headers): @@ -82,9 +82,9 @@ async def test_generate_async(flan_t5_xxl_url, hf_headers): assert len(response.details.prefill) == 1 assert response.details.prefill[0] == PrefillToken(id=0, text="", logprob=None) assert len(response.details.tokens) == 1 - assert response.details.tokens[0] == Token( - id=3, text="", logprob=-1.984375, special=False - ) + assert response.details.tokens[0].id == 3 + assert response.details.tokens[0].text == "" + assert not response.details.tokens[0].special @pytest.mark.asyncio diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py index f3f9dcb5a..ad3cd09b1 100644 --- a/clients/python/text_generation/types.py +++ b/clients/python/text_generation/types.py @@ -154,7 +154,7 @@ class Token(BaseModel): # Generation finish reason -class FinishReason(Enum): +class FinishReason(str, Enum): # number of generated tokens == `max_new_tokens` Length = "length" # the model generated its end of sequence token diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index e9c51c373..ba1abca99 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -4,22 +4,192 @@ import pytest import asyncio import os import docker +import json +import math +import time from docker.errors import NotFound -from typing import Optional, List -from syrupy.filters import props +from typing import Optional, List, Dict +from syrupy.extensions.json import JSONSnapshotExtension +from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError from text_generation import AsyncClient -from text_generation.types import Response +from text_generation.types import Response, Details, PrefillToken, Token, BestOfSequence DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", None) HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN", None) DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data") +class ResponseComparator(JSONSnapshotExtension): + def serialize( + self, + data, + *, + exclude=None, + matcher=None, + ): + if isinstance(data, List): + data = [d.dict() for d in data] + + data = self._filter( + data=data, depth=0, path=(), exclude=exclude, matcher=matcher + ) + return json.dumps(data, indent=2, ensure_ascii=False, sort_keys=False) + "\n" + + def matches( + self, + *, + serialized_data, + snapshot_data, + ) -> bool: + def convert_data(data): + data = json.loads(data) + + if isinstance(data, Dict): + return Response(**data) + if isinstance(data, List): + return [Response(**d) for d in data] + raise NotImplementedError + + def eq_token(token: Token, other: Token) -> bool: + return ( + token.id == other.id + and token.text == other.text + and math.isclose(token.logprob, other.logprob, rel_tol=0.2) + and token.special == other.special + ) + + def eq_prefill_token(prefill_token: PrefillToken, other: PrefillToken) -> bool: + try: + return ( + prefill_token.id == other.id + and prefill_token.text == other.text + and ( + math.isclose(prefill_token.logprob, other.logprob, rel_tol=0.2) + if prefill_token.logprob is not None + else prefill_token.logprob == other.logprob + ) + ) + except TypeError: + return False + + def eq_best_of(details: BestOfSequence, other: BestOfSequence) -> bool: + return ( + details.finish_reason == other.finish_reason + and details.generated_tokens == other.generated_tokens + and details.seed == other.seed + and len(details.prefill) == len(other.prefill) + and all( + [ + eq_prefill_token(d, o) + for d, o in zip(details.prefill, other.prefill) + ] + ) + and len(details.tokens) == len(other.tokens) + and all([eq_token(d, o) for d, o in zip(details.tokens, other.tokens)]) + ) + + def eq_details(details: Details, other: Details) -> bool: + return ( + details.finish_reason == other.finish_reason + and details.generated_tokens == other.generated_tokens + and details.seed == other.seed + and len(details.prefill) == len(other.prefill) + and all( + [ + eq_prefill_token(d, o) + for d, o in zip(details.prefill, other.prefill) + ] + ) + and len(details.tokens) == len(other.tokens) + and all([eq_token(d, o) for d, o in zip(details.tokens, other.tokens)]) + and ( + len(details.best_of_sequences) + if details.best_of_sequences is not None + else 0 + ) + == ( + len(other.best_of_sequences) + if other.best_of_sequences is not None + else 0 + ) + and ( + all( + [ + eq_best_of(d, o) + for d, o in zip( + details.best_of_sequences, other.best_of_sequences + ) + ] + ) + if details.best_of_sequences is not None + else details.best_of_sequences == other.best_of_sequences + ) + ) + + def eq_response(response: Response, other: Response) -> bool: + return response.generated_text == other.generated_text and eq_details( + response.details, other.details + ) + + serialized_data = convert_data(serialized_data) + snapshot_data = convert_data(snapshot_data) + + if not isinstance(serialized_data, List): + serialized_data = [serialized_data] + if not isinstance(snapshot_data, List): + snapshot_data = [snapshot_data] + + return len(snapshot_data) == len(serialized_data) and all( + [eq_response(r, o) for r, o in zip(serialized_data, snapshot_data)] + ) + + +class LauncherHandle: + def __init__(self, port: int): + self.client = AsyncClient(f"http://localhost:{port}") + + def _inner_health(self): + raise NotImplementedError + + async def health(self, timeout: int = 60): + assert timeout > 0 + for _ in range(timeout): + if not self._inner_health(): + raise RuntimeError("Launcher crashed") + + try: + await self.client.generate("test") + return + except (ClientConnectorError, ClientOSError, ServerDisconnectedError) as e: + time.sleep(1) + raise RuntimeError("Health check failed") + + +class ContainerLauncherHandle(LauncherHandle): + def __init__(self, docker_client, container_name, port: int): + super(ContainerLauncherHandle, self).__init__(port) + self.docker_client = docker_client + self.container_name = container_name + + def _inner_health(self) -> bool: + container = self.docker_client.containers.get(self.container_name) + return container.status in ["running", "created"] + + +class ProcessLauncherHandle(LauncherHandle): + def __init__(self, process, port: int): + super(ProcessLauncherHandle, self).__init__(port) + self.process = process + + def _inner_health(self) -> bool: + return self.process.poll() is None + + @pytest.fixture -def snapshot_test(snapshot): - return lambda value: value == snapshot(exclude=props("logprob")) +def response_snapshot(snapshot): + return snapshot.use_extension(ResponseComparator) @pytest.fixture(scope="module") @@ -60,7 +230,7 @@ def launcher(event_loop): with subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) as process: - yield AsyncClient(f"http://localhost:{port}") + yield ProcessLauncherHandle(process, port) process.terminate() process.wait(60) @@ -110,7 +280,7 @@ def launcher(event_loop): command=args, name=container_name, environment=env, - auto_remove=True, + auto_remove=False, detach=True, device_requests=[ docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]]) @@ -119,13 +289,19 @@ def launcher(event_loop): ports={"80/tcp": port}, ) - yield AsyncClient(f"http://localhost:{port}") + yield ContainerLauncherHandle(client, container.name, port) - container.stop() + try: + container.stop() + container.wait() + except NotFound: + pass container_output = container.logs().decode("utf-8") print(container_output) + container.remove() + if DOCKER_IMAGE is not None: return docker_launcher return local_launcher @@ -140,7 +316,6 @@ def generate_load(): client.generate(prompt, max_new_tokens=max_new_tokens) for _ in range(n) ] - results = await asyncio.gather(*futures) - return [r.dict() for r in results] + return await asyncio.gather(*futures) return generate_load_inner diff --git a/integration-tests/models/__snapshots__/test_bloom_560m.ambr b/integration-tests/models/__snapshots__/test_bloom_560m.ambr deleted file mode 100644 index 1067513dd..000000000 --- a/integration-tests/models/__snapshots__/test_bloom_560m.ambr +++ /dev/null @@ -1,627 +0,0 @@ -# serializer version: 1 -# name: test_bloom_560m - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 17934, - 'text': 'Pour', - }), - dict({ - 'id': 49833, - 'text': ' dég', - }), - dict({ - 'id': 21543, - 'text': 'uster', - }), - dict({ - 'id': 447, - 'text': ' un', - }), - dict({ - 'id': 46341, - 'text': ' ort', - }), - dict({ - 'id': 35567, - 'text': 'olan', - }), - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': 0, - 'tokens': list([ - dict({ - 'id': 578, - 'special': False, - 'text': ' le', - }), - dict({ - 'id': 5608, - 'special': False, - 'text': ' faire', - }), - dict({ - 'id': 159570, - 'special': False, - 'text': ' réch', - }), - dict({ - 'id': 810, - 'special': False, - 'text': 'au', - }), - dict({ - 'id': 12736, - 'special': False, - 'text': 'ffer', - }), - dict({ - 'id': 1742, - 'special': False, - 'text': ' au', - }), - dict({ - 'id': 6105, - 'special': False, - 'text': ' bain', - }), - dict({ - 'id': 88254, - 'special': False, - 'text': '-mar', - }), - dict({ - 'id': 641, - 'special': False, - 'text': 'ie', - }), - dict({ - 'id': 2940, - 'special': False, - 'text': ' avec', - }), - ]), - }), - 'generated_text': ' le faire réchauffer au bain-marie avec', - }) -# --- -# name: test_bloom_560m_all_params - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': 0, - 'tokens': list([ - dict({ - 'id': 408, - 'special': False, - 'text': ' que', - }), - dict({ - 'id': 20288, - 'special': False, - 'text': " l'on", - }), - dict({ - 'id': 22255, - 'special': False, - 'text': ' trouve', - }), - dict({ - 'id': 1622, - 'special': False, - 'text': ' une', - }), - dict({ - 'id': 187079, - 'special': False, - 'text': ' posture', - }), - dict({ - 'id': 501, - 'special': False, - 'text': ' par', - }), - dict({ - 'id': 8741, - 'special': False, - 'text': ' rapport', - }), - dict({ - 'id': 693, - 'special': False, - 'text': ' à', - }), - dict({ - 'id': 366, - 'special': False, - 'text': ' la', - }), - dict({ - 'id': 36503, - 'special': False, - 'text': ' pratique', - }), - ]), - }), - 'generated_text': "Pour déguster un ortolan, il faut tout d'abord que l'on trouve une posture par rapport à la pratique", - }) -# --- -# name: test_bloom_560m_load - list([ - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 17934, - 'text': 'Pour', - }), - dict({ - 'id': 49833, - 'text': ' dég', - }), - dict({ - 'id': 21543, - 'text': 'uster', - }), - dict({ - 'id': 447, - 'text': ' un', - }), - dict({ - 'id': 46341, - 'text': ' ort', - }), - dict({ - 'id': 35567, - 'text': 'olan', - }), - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 578, - 'special': False, - 'text': ' le', - }), - dict({ - 'id': 5608, - 'special': False, - 'text': ' faire', - }), - dict({ - 'id': 1767, - 'special': False, - 'text': ' cu', - }), - dict({ - 'id': 1273, - 'special': False, - 'text': 'ire', - }), - dict({ - 'id': 1486, - 'special': False, - 'text': ' dans', - }), - dict({ - 'id': 283, - 'special': False, - 'text': ' de', - }), - dict({ - 'id': 40410, - 'special': False, - 'text': " l'eau", - }), - dict({ - 'id': 20226, - 'special': False, - 'text': ' bou', - }), - dict({ - 'id': 172483, - 'special': False, - 'text': 'illante', - }), - dict({ - 'id': 2805, - 'special': False, - 'text': ' sal', - }), - ]), - }), - 'generated_text': " le faire cuire dans de l'eau bouillante sal", - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 17934, - 'text': 'Pour', - }), - dict({ - 'id': 49833, - 'text': ' dég', - }), - dict({ - 'id': 21543, - 'text': 'uster', - }), - dict({ - 'id': 447, - 'text': ' un', - }), - dict({ - 'id': 46341, - 'text': ' ort', - }), - dict({ - 'id': 35567, - 'text': 'olan', - }), - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 578, - 'special': False, - 'text': ' le', - }), - dict({ - 'id': 5608, - 'special': False, - 'text': ' faire', - }), - dict({ - 'id': 1767, - 'special': False, - 'text': ' cu', - }), - dict({ - 'id': 1273, - 'special': False, - 'text': 'ire', - }), - dict({ - 'id': 1486, - 'special': False, - 'text': ' dans', - }), - dict({ - 'id': 283, - 'special': False, - 'text': ' de', - }), - dict({ - 'id': 40410, - 'special': False, - 'text': " l'eau", - }), - dict({ - 'id': 20226, - 'special': False, - 'text': ' bou', - }), - dict({ - 'id': 172483, - 'special': False, - 'text': 'illante', - }), - dict({ - 'id': 2805, - 'special': False, - 'text': ' sal', - }), - ]), - }), - 'generated_text': " le faire cuire dans de l'eau bouillante sal", - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 17934, - 'text': 'Pour', - }), - dict({ - 'id': 49833, - 'text': ' dég', - }), - dict({ - 'id': 21543, - 'text': 'uster', - }), - dict({ - 'id': 447, - 'text': ' un', - }), - dict({ - 'id': 46341, - 'text': ' ort', - }), - dict({ - 'id': 35567, - 'text': 'olan', - }), - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 578, - 'special': False, - 'text': ' le', - }), - dict({ - 'id': 5608, - 'special': False, - 'text': ' faire', - }), - dict({ - 'id': 1767, - 'special': False, - 'text': ' cu', - }), - dict({ - 'id': 1273, - 'special': False, - 'text': 'ire', - }), - dict({ - 'id': 1486, - 'special': False, - 'text': ' dans', - }), - dict({ - 'id': 283, - 'special': False, - 'text': ' de', - }), - dict({ - 'id': 40410, - 'special': False, - 'text': " l'eau", - }), - dict({ - 'id': 20226, - 'special': False, - 'text': ' bou', - }), - dict({ - 'id': 172483, - 'special': False, - 'text': 'illante', - }), - dict({ - 'id': 2805, - 'special': False, - 'text': ' sal', - }), - ]), - }), - 'generated_text': " le faire cuire dans de l'eau bouillante sal", - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 17934, - 'text': 'Pour', - }), - dict({ - 'id': 49833, - 'text': ' dég', - }), - dict({ - 'id': 21543, - 'text': 'uster', - }), - dict({ - 'id': 447, - 'text': ' un', - }), - dict({ - 'id': 46341, - 'text': ' ort', - }), - dict({ - 'id': 35567, - 'text': 'olan', - }), - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 578, - 'special': False, - 'text': ' le', - }), - dict({ - 'id': 5608, - 'special': False, - 'text': ' faire', - }), - dict({ - 'id': 1767, - 'special': False, - 'text': ' cu', - }), - dict({ - 'id': 1273, - 'special': False, - 'text': 'ire', - }), - dict({ - 'id': 1486, - 'special': False, - 'text': ' dans', - }), - dict({ - 'id': 283, - 'special': False, - 'text': ' de', - }), - dict({ - 'id': 40410, - 'special': False, - 'text': " l'eau", - }), - dict({ - 'id': 20226, - 'special': False, - 'text': ' bou', - }), - dict({ - 'id': 172483, - 'special': False, - 'text': 'illante', - }), - dict({ - 'id': 2805, - 'special': False, - 'text': ' sal', - }), - ]), - }), - 'generated_text': " le faire cuire dans de l'eau bouillante sal", - }), - ]) -# --- diff --git a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json new file mode 100644 index 000000000..53a4ab854 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json @@ -0,0 +1,128 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.5625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14770508, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4609375, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5585938, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4003906, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5673828, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94628906, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.703125, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], + "seed": 0, + "tokens": [ + { + "id": 578, + "logprob": -1.6591797, + "special": false, + "text": " le" + }, + { + "id": 5608, + "logprob": -2.4492188, + "special": false, + "text": " faire" + }, + { + "id": 159570, + "logprob": -6.6835938, + "special": false, + "text": " réch" + }, + { + "id": 810, + "logprob": 0.0, + "special": false, + "text": "au" + }, + { + "id": 12736, + "logprob": 0.0, + "special": false, + "text": "ffer" + }, + { + "id": 1742, + "logprob": -2.5175781, + "special": false, + "text": " au" + }, + { + "id": 6105, + "logprob": -2.0078125, + "special": false, + "text": " bain" + }, + { + "id": 88254, + "logprob": -0.12695312, + "special": false, + "text": "-mar" + }, + { + "id": 641, + "logprob": 0.0, + "special": false, + "text": "ie" + }, + { + "id": 2940, + "logprob": -3.5175781, + "special": false, + "text": " avec" + } + ] + }, + "generated_text": " le faire réchauffer au bain-marie avec" +} diff --git a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json new file mode 100644 index 000000000..93a958048 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json @@ -0,0 +1,98 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 15, + "logprob": null, + "text": "," + }, + { + "id": 1669, + "logprob": -5.4414062, + "text": " il" + }, + { + "id": 11580, + "logprob": -2.3378906, + "text": " faut" + }, + { + "id": 3913, + "logprob": -4.3554688, + "text": " tout" + }, + { + "id": 39261, + "logprob": -2.9238281, + "text": " d'abord" + } + ], + "seed": 0, + "tokens": [ + { + "id": 408, + "logprob": -1.9267578, + "special": false, + "text": " que" + }, + { + "id": 20288, + "logprob": -2.9257812, + "special": false, + "text": " l'on" + }, + { + "id": 22255, + "logprob": -2.8964844, + "special": false, + "text": " trouve" + }, + { + "id": 1622, + "logprob": -1.1083984, + "special": false, + "text": " une" + }, + { + "id": 187079, + "logprob": -7.796875, + "special": false, + "text": " posture" + }, + { + "id": 501, + "logprob": -5.390625, + "special": false, + "text": " par" + }, + { + "id": 8741, + "logprob": -0.34936523, + "special": false, + "text": " rapport" + }, + { + "id": 693, + "logprob": 0.0, + "special": false, + "text": " à" + }, + { + "id": 366, + "logprob": -2.3378906, + "special": false, + "text": " la" + }, + { + "id": 36503, + "logprob": -3.6640625, + "special": false, + "text": " pratique" + } + ] + }, + "generated_text": "Pour déguster un ortolan, il faut tout d'abord que l'on trouve une posture par rapport à la pratique" +} diff --git a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json new file mode 100644 index 000000000..0a86bef8e --- /dev/null +++ b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json @@ -0,0 +1,514 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.5625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14770508, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4609375, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5585938, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4003906, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5673828, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94628906, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.703125, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], + "seed": null, + "tokens": [ + { + "id": 578, + "logprob": -1.7646484, + "special": false, + "text": " le" + }, + { + "id": 5608, + "logprob": -2.6113281, + "special": false, + "text": " faire" + }, + { + "id": 1767, + "logprob": -1.5263672, + "special": false, + "text": " cu" + }, + { + "id": 1273, + "logprob": -0.00010049343, + "special": false, + "text": "ire" + }, + { + "id": 1486, + "logprob": -1.4707031, + "special": false, + "text": " dans" + }, + { + "id": 283, + "logprob": -1.2119141, + "special": false, + "text": " de" + }, + { + "id": 40410, + "logprob": -0.11883545, + "special": false, + "text": " l'eau" + }, + { + "id": 20226, + "logprob": -0.40844727, + "special": false, + "text": " bou" + }, + { + "id": 172483, + "logprob": -0.0037841797, + "special": false, + "text": "illante" + }, + { + "id": 2805, + "logprob": -1.0195312, + "special": false, + "text": " sal" + } + ] + }, + "generated_text": " le faire cuire dans de l'eau bouillante sal" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.53125, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14770508, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4140625, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5234375, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.3613281, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5458984, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94189453, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.7011719, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], + "seed": null, + "tokens": [ + { + "id": 578, + "logprob": -1.7548828, + "special": false, + "text": " le" + }, + { + "id": 5608, + "logprob": -2.578125, + "special": false, + "text": " faire" + }, + { + "id": 1767, + "logprob": -1.5117188, + "special": false, + "text": " cu" + }, + { + "id": 1273, + "logprob": -0.00010049343, + "special": false, + "text": "ire" + }, + { + "id": 1486, + "logprob": -1.4707031, + "special": false, + "text": " dans" + }, + { + "id": 283, + "logprob": -1.1982422, + "special": false, + "text": " de" + }, + { + "id": 40410, + "logprob": -0.11004639, + "special": false, + "text": " l'eau" + }, + { + "id": 20226, + "logprob": -0.4506836, + "special": false, + "text": " bou" + }, + { + "id": 172483, + "logprob": -0.003047943, + "special": false, + "text": "illante" + }, + { + "id": 2805, + "logprob": -1.0185547, + "special": false, + "text": " sal" + } + ] + }, + "generated_text": " le faire cuire dans de l'eau bouillante sal" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.53125, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14770508, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4140625, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5234375, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.3613281, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5458984, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94189453, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.7011719, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], + "seed": null, + "tokens": [ + { + "id": 578, + "logprob": -1.7548828, + "special": false, + "text": " le" + }, + { + "id": 5608, + "logprob": -2.578125, + "special": false, + "text": " faire" + }, + { + "id": 1767, + "logprob": -1.5117188, + "special": false, + "text": " cu" + }, + { + "id": 1273, + "logprob": -0.00010049343, + "special": false, + "text": "ire" + }, + { + "id": 1486, + "logprob": -1.4707031, + "special": false, + "text": " dans" + }, + { + "id": 283, + "logprob": -1.1982422, + "special": false, + "text": " de" + }, + { + "id": 40410, + "logprob": -0.11004639, + "special": false, + "text": " l'eau" + }, + { + "id": 20226, + "logprob": -0.4506836, + "special": false, + "text": " bou" + }, + { + "id": 172483, + "logprob": -0.003047943, + "special": false, + "text": "illante" + }, + { + "id": 2805, + "logprob": -1.0185547, + "special": false, + "text": " sal" + } + ] + }, + "generated_text": " le faire cuire dans de l'eau bouillante sal" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.53125, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14770508, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4140625, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5234375, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.3613281, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5458984, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94189453, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.7011719, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], + "seed": null, + "tokens": [ + { + "id": 578, + "logprob": -1.7548828, + "special": false, + "text": " le" + }, + { + "id": 5608, + "logprob": -2.578125, + "special": false, + "text": " faire" + }, + { + "id": 1767, + "logprob": -1.5117188, + "special": false, + "text": " cu" + }, + { + "id": 1273, + "logprob": -0.00010049343, + "special": false, + "text": "ire" + }, + { + "id": 1486, + "logprob": -1.4707031, + "special": false, + "text": " dans" + }, + { + "id": 283, + "logprob": -1.1982422, + "special": false, + "text": " de" + }, + { + "id": 40410, + "logprob": -0.11004639, + "special": false, + "text": " l'eau" + }, + { + "id": 20226, + "logprob": -0.4506836, + "special": false, + "text": " bou" + }, + { + "id": 172483, + "logprob": -0.003047943, + "special": false, + "text": "illante" + }, + { + "id": 2805, + "logprob": -1.0185547, + "special": false, + "text": " sal" + } + ] + }, + "generated_text": " le faire cuire dans de l'eau bouillante sal" + } +] diff --git a/integration-tests/models/__snapshots__/test_bloom_560m_sharded.ambr b/integration-tests/models/__snapshots__/test_bloom_560m_sharded.ambr deleted file mode 100644 index 667a0373b..000000000 --- a/integration-tests/models/__snapshots__/test_bloom_560m_sharded.ambr +++ /dev/null @@ -1,542 +0,0 @@ -# serializer version: 1 -# name: test_bloom_560m_sharded - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 17934, - 'text': 'Pour', - }), - dict({ - 'id': 49833, - 'text': ' dég', - }), - dict({ - 'id': 21543, - 'text': 'uster', - }), - dict({ - 'id': 447, - 'text': ' un', - }), - dict({ - 'id': 46341, - 'text': ' ort', - }), - dict({ - 'id': 35567, - 'text': 'olan', - }), - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': 0, - 'tokens': list([ - dict({ - 'id': 578, - 'special': False, - 'text': ' le', - }), - dict({ - 'id': 5608, - 'special': False, - 'text': ' faire', - }), - dict({ - 'id': 159570, - 'special': False, - 'text': ' réch', - }), - dict({ - 'id': 810, - 'special': False, - 'text': 'au', - }), - dict({ - 'id': 12736, - 'special': False, - 'text': 'ffer', - }), - dict({ - 'id': 1742, - 'special': False, - 'text': ' au', - }), - dict({ - 'id': 6105, - 'special': False, - 'text': ' bain', - }), - dict({ - 'id': 88254, - 'special': False, - 'text': '-mar', - }), - dict({ - 'id': 641, - 'special': False, - 'text': 'ie', - }), - dict({ - 'id': 2940, - 'special': False, - 'text': ' avec', - }), - ]), - }), - 'generated_text': ' le faire réchauffer au bain-marie avec', - }) -# --- -# name: test_bloom_560m_sharded_load - list([ - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 17934, - 'text': 'Pour', - }), - dict({ - 'id': 49833, - 'text': ' dég', - }), - dict({ - 'id': 21543, - 'text': 'uster', - }), - dict({ - 'id': 447, - 'text': ' un', - }), - dict({ - 'id': 46341, - 'text': ' ort', - }), - dict({ - 'id': 35567, - 'text': 'olan', - }), - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 578, - 'special': False, - 'text': ' le', - }), - dict({ - 'id': 5608, - 'special': False, - 'text': ' faire', - }), - dict({ - 'id': 1767, - 'special': False, - 'text': ' cu', - }), - dict({ - 'id': 1273, - 'special': False, - 'text': 'ire', - }), - dict({ - 'id': 1486, - 'special': False, - 'text': ' dans', - }), - dict({ - 'id': 283, - 'special': False, - 'text': ' de', - }), - dict({ - 'id': 40410, - 'special': False, - 'text': " l'eau", - }), - dict({ - 'id': 20226, - 'special': False, - 'text': ' bou', - }), - dict({ - 'id': 172483, - 'special': False, - 'text': 'illante', - }), - dict({ - 'id': 2805, - 'special': False, - 'text': ' sal', - }), - ]), - }), - 'generated_text': " le faire cuire dans de l'eau bouillante sal", - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 17934, - 'text': 'Pour', - }), - dict({ - 'id': 49833, - 'text': ' dég', - }), - dict({ - 'id': 21543, - 'text': 'uster', - }), - dict({ - 'id': 447, - 'text': ' un', - }), - dict({ - 'id': 46341, - 'text': ' ort', - }), - dict({ - 'id': 35567, - 'text': 'olan', - }), - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 578, - 'special': False, - 'text': ' le', - }), - dict({ - 'id': 5608, - 'special': False, - 'text': ' faire', - }), - dict({ - 'id': 1767, - 'special': False, - 'text': ' cu', - }), - dict({ - 'id': 1273, - 'special': False, - 'text': 'ire', - }), - dict({ - 'id': 1486, - 'special': False, - 'text': ' dans', - }), - dict({ - 'id': 283, - 'special': False, - 'text': ' de', - }), - dict({ - 'id': 40410, - 'special': False, - 'text': " l'eau", - }), - dict({ - 'id': 20226, - 'special': False, - 'text': ' bou', - }), - dict({ - 'id': 172483, - 'special': False, - 'text': 'illante', - }), - dict({ - 'id': 2805, - 'special': False, - 'text': ' sal', - }), - ]), - }), - 'generated_text': " le faire cuire dans de l'eau bouillante sal", - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 17934, - 'text': 'Pour', - }), - dict({ - 'id': 49833, - 'text': ' dég', - }), - dict({ - 'id': 21543, - 'text': 'uster', - }), - dict({ - 'id': 447, - 'text': ' un', - }), - dict({ - 'id': 46341, - 'text': ' ort', - }), - dict({ - 'id': 35567, - 'text': 'olan', - }), - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 578, - 'special': False, - 'text': ' le', - }), - dict({ - 'id': 5608, - 'special': False, - 'text': ' faire', - }), - dict({ - 'id': 1767, - 'special': False, - 'text': ' cu', - }), - dict({ - 'id': 1273, - 'special': False, - 'text': 'ire', - }), - dict({ - 'id': 1486, - 'special': False, - 'text': ' dans', - }), - dict({ - 'id': 283, - 'special': False, - 'text': ' de', - }), - dict({ - 'id': 40410, - 'special': False, - 'text': " l'eau", - }), - dict({ - 'id': 20226, - 'special': False, - 'text': ' bou', - }), - dict({ - 'id': 172483, - 'special': False, - 'text': 'illante', - }), - dict({ - 'id': 2805, - 'special': False, - 'text': ' sal', - }), - ]), - }), - 'generated_text': " le faire cuire dans de l'eau bouillante sal", - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 17934, - 'text': 'Pour', - }), - dict({ - 'id': 49833, - 'text': ' dég', - }), - dict({ - 'id': 21543, - 'text': 'uster', - }), - dict({ - 'id': 447, - 'text': ' un', - }), - dict({ - 'id': 46341, - 'text': ' ort', - }), - dict({ - 'id': 35567, - 'text': 'olan', - }), - dict({ - 'id': 15, - 'text': ',', - }), - dict({ - 'id': 1669, - 'text': ' il', - }), - dict({ - 'id': 11580, - 'text': ' faut', - }), - dict({ - 'id': 3913, - 'text': ' tout', - }), - dict({ - 'id': 39261, - 'text': " d'abord", - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 578, - 'special': False, - 'text': ' le', - }), - dict({ - 'id': 5608, - 'special': False, - 'text': ' faire', - }), - dict({ - 'id': 1767, - 'special': False, - 'text': ' cu', - }), - dict({ - 'id': 1273, - 'special': False, - 'text': 'ire', - }), - dict({ - 'id': 1486, - 'special': False, - 'text': ' dans', - }), - dict({ - 'id': 283, - 'special': False, - 'text': ' de', - }), - dict({ - 'id': 40410, - 'special': False, - 'text': " l'eau", - }), - dict({ - 'id': 20226, - 'special': False, - 'text': ' bou', - }), - dict({ - 'id': 172483, - 'special': False, - 'text': 'illante', - }), - dict({ - 'id': 2805, - 'special': False, - 'text': ' sal', - }), - ]), - }), - 'generated_text': " le faire cuire dans de l'eau bouillante sal", - }), - ]) -# --- diff --git a/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json new file mode 100644 index 000000000..dd8936afd --- /dev/null +++ b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json @@ -0,0 +1,128 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.5390625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14758301, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9296875, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4453125, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.59375, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.3994141, + "text": "," + }, + { + "id": 1669, + "logprob": -1.578125, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.9453125, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.7011719, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], + "seed": 0, + "tokens": [ + { + "id": 578, + "logprob": -1.6474609, + "special": false, + "text": " le" + }, + { + "id": 5608, + "logprob": -2.5097656, + "special": false, + "text": " faire" + }, + { + "id": 159570, + "logprob": -6.65625, + "special": false, + "text": " réch" + }, + { + "id": 810, + "logprob": 0.0, + "special": false, + "text": "au" + }, + { + "id": 12736, + "logprob": 0.0, + "special": false, + "text": "ffer" + }, + { + "id": 1742, + "logprob": -2.5859375, + "special": false, + "text": " au" + }, + { + "id": 6105, + "logprob": -2.03125, + "special": false, + "text": " bain" + }, + { + "id": 88254, + "logprob": -0.12695312, + "special": false, + "text": "-mar" + }, + { + "id": 641, + "logprob": 0.0, + "special": false, + "text": "ie" + }, + { + "id": 2940, + "logprob": -3.5175781, + "special": false, + "text": " avec" + } + ] + }, + "generated_text": " le faire réchauffer au bain-marie avec" +} diff --git a/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json new file mode 100644 index 000000000..2dd480b9e --- /dev/null +++ b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json @@ -0,0 +1,514 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.5390625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14758301, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9296875, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4453125, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.59375, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.3994141, + "text": "," + }, + { + "id": 1669, + "logprob": -1.578125, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.9453125, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.7011719, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], + "seed": null, + "tokens": [ + { + "id": 578, + "logprob": -1.7529297, + "special": false, + "text": " le" + }, + { + "id": 5608, + "logprob": -2.6054688, + "special": false, + "text": " faire" + }, + { + "id": 1767, + "logprob": -1.5283203, + "special": false, + "text": " cu" + }, + { + "id": 1273, + "logprob": -0.00010049343, + "special": false, + "text": "ire" + }, + { + "id": 1486, + "logprob": -1.4716797, + "special": false, + "text": " dans" + }, + { + "id": 283, + "logprob": -1.1982422, + "special": false, + "text": " de" + }, + { + "id": 40410, + "logprob": -0.11853027, + "special": false, + "text": " l'eau" + }, + { + "id": 20226, + "logprob": -0.41210938, + "special": false, + "text": " bou" + }, + { + "id": 172483, + "logprob": -0.0037765503, + "special": false, + "text": "illante" + }, + { + "id": 2805, + "logprob": -1.0166016, + "special": false, + "text": " sal" + } + ] + }, + "generated_text": " le faire cuire dans de l'eau bouillante sal" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.515625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.1484375, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.34375, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.515625, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4199219, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5664062, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94091797, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.6660156, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.7753906, + "text": " d'abord" + } + ], + "seed": null, + "tokens": [ + { + "id": 578, + "logprob": -1.7626953, + "special": false, + "text": " le" + }, + { + "id": 5608, + "logprob": -2.5820312, + "special": false, + "text": " faire" + }, + { + "id": 1767, + "logprob": -1.5097656, + "special": false, + "text": " cu" + }, + { + "id": 1273, + "logprob": -9.393692e-05, + "special": false, + "text": "ire" + }, + { + "id": 1486, + "logprob": -1.5175781, + "special": false, + "text": " dans" + }, + { + "id": 283, + "logprob": -1.1982422, + "special": false, + "text": " de" + }, + { + "id": 40410, + "logprob": -0.11883545, + "special": false, + "text": " l'eau" + }, + { + "id": 20226, + "logprob": -0.4909668, + "special": false, + "text": " bou" + }, + { + "id": 172483, + "logprob": -0.003047943, + "special": false, + "text": "illante" + }, + { + "id": 2805, + "logprob": -1.0185547, + "special": false, + "text": " sal" + } + ] + }, + "generated_text": " le faire cuire dans de l'eau bouillante sal" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.515625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.1484375, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.34375, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.515625, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4199219, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5664062, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94091797, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.6660156, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.7753906, + "text": " d'abord" + } + ], + "seed": null, + "tokens": [ + { + "id": 578, + "logprob": -1.7626953, + "special": false, + "text": " le" + }, + { + "id": 5608, + "logprob": -2.5820312, + "special": false, + "text": " faire" + }, + { + "id": 1767, + "logprob": -1.5097656, + "special": false, + "text": " cu" + }, + { + "id": 1273, + "logprob": -9.393692e-05, + "special": false, + "text": "ire" + }, + { + "id": 1486, + "logprob": -1.5175781, + "special": false, + "text": " dans" + }, + { + "id": 283, + "logprob": -1.1982422, + "special": false, + "text": " de" + }, + { + "id": 40410, + "logprob": -0.11883545, + "special": false, + "text": " l'eau" + }, + { + "id": 20226, + "logprob": -0.4909668, + "special": false, + "text": " bou" + }, + { + "id": 172483, + "logprob": -0.003047943, + "special": false, + "text": "illante" + }, + { + "id": 2805, + "logprob": -1.0185547, + "special": false, + "text": " sal" + } + ] + }, + "generated_text": " le faire cuire dans de l'eau bouillante sal" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.515625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.1484375, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.34375, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.515625, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4199219, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5664062, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94091797, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.6660156, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.7753906, + "text": " d'abord" + } + ], + "seed": null, + "tokens": [ + { + "id": 578, + "logprob": -1.7626953, + "special": false, + "text": " le" + }, + { + "id": 5608, + "logprob": -2.5820312, + "special": false, + "text": " faire" + }, + { + "id": 1767, + "logprob": -1.5097656, + "special": false, + "text": " cu" + }, + { + "id": 1273, + "logprob": -9.393692e-05, + "special": false, + "text": "ire" + }, + { + "id": 1486, + "logprob": -1.5175781, + "special": false, + "text": " dans" + }, + { + "id": 283, + "logprob": -1.1982422, + "special": false, + "text": " de" + }, + { + "id": 40410, + "logprob": -0.11883545, + "special": false, + "text": " l'eau" + }, + { + "id": 20226, + "logprob": -0.4909668, + "special": false, + "text": " bou" + }, + { + "id": 172483, + "logprob": -0.003047943, + "special": false, + "text": "illante" + }, + { + "id": 2805, + "logprob": -1.0185547, + "special": false, + "text": " sal" + } + ] + }, + "generated_text": " le faire cuire dans de l'eau bouillante sal" + } +] diff --git a/integration-tests/models/__snapshots__/test_flash_llama.ambr b/integration-tests/models/__snapshots__/test_flash_llama.ambr deleted file mode 100644 index f4e3a4c1e..000000000 --- a/integration-tests/models/__snapshots__/test_flash_llama.ambr +++ /dev/null @@ -1,465 +0,0 @@ -# serializer version: 1 -# name: test_flash_llama - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 1, - 'text': '', - }), - dict({ - 'id': 4321, - 'text': 'Test', - }), - dict({ - 'id': 2009, - 'text': 'request', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 363, - 'special': False, - 'text': ' for', - }), - dict({ - 'id': 847, - 'special': False, - 'text': ' /', - }), - dict({ - 'id': 2754, - 'special': False, - 'text': 'api', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 29894, - 'special': False, - 'text': 'v', - }), - dict({ - 'id': 29896, - 'special': False, - 'text': '1', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 16418, - 'special': False, - 'text': 'projects', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 29896, - 'special': False, - 'text': '1', - }), - ]), - }), - 'generated_text': 'for /api/v1/projects/1', - }) -# --- -# name: test_flash_llama_all_params - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 1, - 'text': '', - }), - dict({ - 'id': 4321, - 'text': 'Test', - }), - dict({ - 'id': 2009, - 'text': 'request', - }), - ]), - 'seed': 0, - 'tokens': list([ - dict({ - 'id': 5229, - 'special': False, - 'text': ' failed', - }), - dict({ - 'id': 363, - 'special': False, - 'text': ' for', - }), - dict({ - 'id': 5641, - 'special': False, - 'text': ' IP', - }), - dict({ - 'id': 16428, - 'special': False, - 'text': ' Address', - }), - dict({ - 'id': 29901, - 'special': False, - 'text': ':', - }), - dict({ - 'id': 525, - 'special': False, - 'text': " '", - }), - dict({ - 'id': 8516, - 'special': False, - 'text': 'None', - }), - dict({ - 'id': 4286, - 'special': False, - 'text': "'.", - }), - dict({ - 'id': 13, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 294, - 'special': False, - 'text': 'as', - }), - ]), - }), - 'generated_text': ''' - Test requestfailed for IP Address: 'None'. - as - ''', - }) -# --- -# name: test_flash_llama_load - list([ - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 1, - 'text': '', - }), - dict({ - 'id': 4321, - 'text': 'Test', - }), - dict({ - 'id': 2009, - 'text': 'request', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 363, - 'special': False, - 'text': ' for', - }), - dict({ - 'id': 847, - 'special': False, - 'text': ' /', - }), - dict({ - 'id': 2754, - 'special': False, - 'text': 'api', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 29894, - 'special': False, - 'text': 'v', - }), - dict({ - 'id': 29896, - 'special': False, - 'text': '1', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 16418, - 'special': False, - 'text': 'projects', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 29896, - 'special': False, - 'text': '1', - }), - ]), - }), - 'generated_text': 'for /api/v1/projects/1', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 1, - 'text': '', - }), - dict({ - 'id': 4321, - 'text': 'Test', - }), - dict({ - 'id': 2009, - 'text': 'request', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 363, - 'special': False, - 'text': ' for', - }), - dict({ - 'id': 847, - 'special': False, - 'text': ' /', - }), - dict({ - 'id': 2754, - 'special': False, - 'text': 'api', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 29894, - 'special': False, - 'text': 'v', - }), - dict({ - 'id': 29896, - 'special': False, - 'text': '1', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 16418, - 'special': False, - 'text': 'projects', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 29896, - 'special': False, - 'text': '1', - }), - ]), - }), - 'generated_text': 'for /api/v1/projects/1', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 1, - 'text': '', - }), - dict({ - 'id': 4321, - 'text': 'Test', - }), - dict({ - 'id': 2009, - 'text': 'request', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 363, - 'special': False, - 'text': ' for', - }), - dict({ - 'id': 847, - 'special': False, - 'text': ' /', - }), - dict({ - 'id': 2754, - 'special': False, - 'text': 'api', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 29894, - 'special': False, - 'text': 'v', - }), - dict({ - 'id': 29896, - 'special': False, - 'text': '1', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 16418, - 'special': False, - 'text': 'projects', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 29896, - 'special': False, - 'text': '1', - }), - ]), - }), - 'generated_text': 'for /api/v1/projects/1', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 1, - 'text': '', - }), - dict({ - 'id': 4321, - 'text': 'Test', - }), - dict({ - 'id': 2009, - 'text': 'request', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 363, - 'special': False, - 'text': ' for', - }), - dict({ - 'id': 847, - 'special': False, - 'text': ' /', - }), - dict({ - 'id': 2754, - 'special': False, - 'text': 'api', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 29894, - 'special': False, - 'text': 'v', - }), - dict({ - 'id': 29896, - 'special': False, - 'text': '1', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 16418, - 'special': False, - 'text': 'projects', - }), - dict({ - 'id': 29914, - 'special': False, - 'text': '/', - }), - dict({ - 'id': 29896, - 'special': False, - 'text': '1', - }), - ]), - }), - 'generated_text': 'for /api/v1/projects/1', - }), - ]) -# --- diff --git a/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama.json b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama.json new file mode 100644 index 000000000..49bc996c1 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama.json @@ -0,0 +1,88 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4321, + "logprob": -8.6875, + "text": "Test" + }, + { + "id": 2009, + "logprob": -11.5546875, + "text": "request" + } + ], + "seed": null, + "tokens": [ + { + "id": 363, + "logprob": -1.5380859, + "special": false, + "text": " for" + }, + { + "id": 847, + "logprob": -2.5917969, + "special": false, + "text": " /" + }, + { + "id": 2754, + "logprob": -2.2773438, + "special": false, + "text": "api" + }, + { + "id": 29914, + "logprob": -0.034362793, + "special": false, + "text": "/" + }, + { + "id": 29894, + "logprob": -0.96533203, + "special": false, + "text": "v" + }, + { + "id": 29896, + "logprob": -0.36669922, + "special": false, + "text": "1" + }, + { + "id": 29914, + "logprob": -0.013122559, + "special": false, + "text": "/" + }, + { + "id": 16418, + "logprob": -3.1503906, + "special": false, + "text": "projects" + }, + { + "id": 29914, + "logprob": -0.43652344, + "special": false, + "text": "/" + }, + { + "id": 29896, + "logprob": -1.9404297, + "special": false, + "text": "1" + } + ] + }, + "generated_text": "for /api/v1/projects/1" +} diff --git a/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_all_params.json b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_all_params.json new file mode 100644 index 000000000..1b6b51a37 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_all_params.json @@ -0,0 +1,88 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4321, + "logprob": -8.6875, + "text": "Test" + }, + { + "id": 2009, + "logprob": -11.5546875, + "text": "request" + } + ], + "seed": 0, + "tokens": [ + { + "id": 5229, + "logprob": -3.3085938, + "special": false, + "text": " failed" + }, + { + "id": 363, + "logprob": -3.984375, + "special": false, + "text": " for" + }, + { + "id": 5641, + "logprob": -6.53125, + "special": false, + "text": " IP" + }, + { + "id": 16428, + "logprob": -3.1835938, + "special": false, + "text": " Address" + }, + { + "id": 29901, + "logprob": -1.2324219, + "special": false, + "text": ":" + }, + { + "id": 525, + "logprob": -2.6855469, + "special": false, + "text": " '" + }, + { + "id": 8516, + "logprob": -7.1601562, + "special": false, + "text": "None" + }, + { + "id": 4286, + "logprob": -2.4433594, + "special": false, + "text": "'." + }, + { + "id": 13, + "logprob": -0.06530762, + "special": false, + "text": "\n" + }, + { + "id": 294, + "logprob": -7.953125, + "special": false, + "text": "as" + } + ] + }, + "generated_text": "Test requestfailed for IP Address: 'None'.\nas" +} diff --git a/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_load.json b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_load.json new file mode 100644 index 000000000..5a8ba2175 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_load.json @@ -0,0 +1,354 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4321, + "logprob": -8.6875, + "text": "Test" + }, + { + "id": 2009, + "logprob": -11.5546875, + "text": "request" + } + ], + "seed": null, + "tokens": [ + { + "id": 363, + "logprob": -1.5322266, + "special": false, + "text": " for" + }, + { + "id": 847, + "logprob": -2.5585938, + "special": false, + "text": " /" + }, + { + "id": 2754, + "logprob": -2.265625, + "special": false, + "text": "api" + }, + { + "id": 29914, + "logprob": -0.034088135, + "special": false, + "text": "/" + }, + { + "id": 29894, + "logprob": -0.96240234, + "special": false, + "text": "v" + }, + { + "id": 29896, + "logprob": -0.36816406, + "special": false, + "text": "1" + }, + { + "id": 29914, + "logprob": -0.013191223, + "special": false, + "text": "/" + }, + { + "id": 16418, + "logprob": -3.15625, + "special": false, + "text": "projects" + }, + { + "id": 29914, + "logprob": -0.43774414, + "special": false, + "text": "/" + }, + { + "id": 29896, + "logprob": -1.9443359, + "special": false, + "text": "1" + } + ] + }, + "generated_text": "for /api/v1/projects/1" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4321, + "logprob": -8.6875, + "text": "Test" + }, + { + "id": 2009, + "logprob": -11.5546875, + "text": "request" + } + ], + "seed": null, + "tokens": [ + { + "id": 363, + "logprob": -1.5380859, + "special": false, + "text": " for" + }, + { + "id": 847, + "logprob": -2.5859375, + "special": false, + "text": " /" + }, + { + "id": 2754, + "logprob": -2.2695312, + "special": false, + "text": "api" + }, + { + "id": 29914, + "logprob": -0.03439331, + "special": false, + "text": "/" + }, + { + "id": 29894, + "logprob": -0.96240234, + "special": false, + "text": "v" + }, + { + "id": 29896, + "logprob": -0.36694336, + "special": false, + "text": "1" + }, + { + "id": 29914, + "logprob": -0.013114929, + "special": false, + "text": "/" + }, + { + "id": 16418, + "logprob": -3.1542969, + "special": false, + "text": "projects" + }, + { + "id": 29914, + "logprob": -0.43847656, + "special": false, + "text": "/" + }, + { + "id": 29896, + "logprob": -1.9433594, + "special": false, + "text": "1" + } + ] + }, + "generated_text": "for /api/v1/projects/1" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4321, + "logprob": -8.6875, + "text": "Test" + }, + { + "id": 2009, + "logprob": -11.5546875, + "text": "request" + } + ], + "seed": null, + "tokens": [ + { + "id": 363, + "logprob": -1.5322266, + "special": false, + "text": " for" + }, + { + "id": 847, + "logprob": -2.5585938, + "special": false, + "text": " /" + }, + { + "id": 2754, + "logprob": -2.265625, + "special": false, + "text": "api" + }, + { + "id": 29914, + "logprob": -0.034088135, + "special": false, + "text": "/" + }, + { + "id": 29894, + "logprob": -0.96240234, + "special": false, + "text": "v" + }, + { + "id": 29896, + "logprob": -0.36816406, + "special": false, + "text": "1" + }, + { + "id": 29914, + "logprob": -0.013191223, + "special": false, + "text": "/" + }, + { + "id": 16418, + "logprob": -3.15625, + "special": false, + "text": "projects" + }, + { + "id": 29914, + "logprob": -0.43774414, + "special": false, + "text": "/" + }, + { + "id": 29896, + "logprob": -1.9443359, + "special": false, + "text": "1" + } + ] + }, + "generated_text": "for /api/v1/projects/1" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4321, + "logprob": -8.6875, + "text": "Test" + }, + { + "id": 2009, + "logprob": -11.5546875, + "text": "request" + } + ], + "seed": null, + "tokens": [ + { + "id": 363, + "logprob": -1.5322266, + "special": false, + "text": " for" + }, + { + "id": 847, + "logprob": -2.5585938, + "special": false, + "text": " /" + }, + { + "id": 2754, + "logprob": -2.265625, + "special": false, + "text": "api" + }, + { + "id": 29914, + "logprob": -0.034088135, + "special": false, + "text": "/" + }, + { + "id": 29894, + "logprob": -0.96240234, + "special": false, + "text": "v" + }, + { + "id": 29896, + "logprob": -0.36816406, + "special": false, + "text": "1" + }, + { + "id": 29914, + "logprob": -0.013191223, + "special": false, + "text": "/" + }, + { + "id": 16418, + "logprob": -3.15625, + "special": false, + "text": "projects" + }, + { + "id": 29914, + "logprob": -0.43774414, + "special": false, + "text": "/" + }, + { + "id": 29896, + "logprob": -1.9443359, + "special": false, + "text": "1" + } + ] + }, + "generated_text": "for /api/v1/projects/1" + } +] diff --git a/integration-tests/models/__snapshots__/test_flash_neox.ambr b/integration-tests/models/__snapshots__/test_flash_neox.ambr deleted file mode 100644 index 4330db6bf..000000000 --- a/integration-tests/models/__snapshots__/test_flash_neox.ambr +++ /dev/null @@ -1,682 +0,0 @@ -# serializer version: 1 -# name: test_flash_neox - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 50278, - 'text': '<|prompter|>', - }), - dict({ - 'id': 1276, - 'text': 'What', - }), - dict({ - 'id': 310, - 'text': ' is', - }), - dict({ - 'id': 247, - 'text': ' a', - }), - dict({ - 'id': 1167, - 'text': ' mem', - }), - dict({ - 'id': 70, - 'text': 'e', - }), - dict({ - 'id': 13, - 'text': ',', - }), - dict({ - 'id': 285, - 'text': ' and', - }), - dict({ - 'id': 752, - 'text': ' what', - }), - dict({ - 'id': 434, - 'text': "'s", - }), - dict({ - 'id': 253, - 'text': ' the', - }), - dict({ - 'id': 2892, - 'text': ' history', - }), - dict({ - 'id': 3212, - 'text': ' behind', - }), - dict({ - 'id': 436, - 'text': ' this', - }), - dict({ - 'id': 3159, - 'text': ' word', - }), - dict({ - 'id': 32, - 'text': '?', - }), - dict({ - 'id': 0, - 'text': '<|endoftext|>', - }), - dict({ - 'id': 50281, - 'text': '<|assistant|>', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 510, - 'special': False, - 'text': 'The', - }), - dict({ - 'id': 3159, - 'special': False, - 'text': ' word', - }), - dict({ - 'id': 346, - 'special': False, - 'text': ' "', - }), - dict({ - 'id': 6441, - 'special': False, - 'text': 'mem', - }), - dict({ - 'id': 70, - 'special': False, - 'text': 'e', - }), - dict({ - 'id': 3, - 'special': False, - 'text': '"', - }), - dict({ - 'id': 369, - 'special': False, - 'text': ' was', - }), - dict({ - 'id': 806, - 'special': False, - 'text': ' first', - }), - dict({ - 'id': 908, - 'special': False, - 'text': ' used', - }), - dict({ - 'id': 275, - 'special': False, - 'text': ' in', - }), - ]), - }), - 'generated_text': 'The word "meme" was first used in', - }) -# --- -# name: test_flash_neox_load - list([ - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 50278, - 'text': '<|prompter|>', - }), - dict({ - 'id': 1276, - 'text': 'What', - }), - dict({ - 'id': 310, - 'text': ' is', - }), - dict({ - 'id': 247, - 'text': ' a', - }), - dict({ - 'id': 1167, - 'text': ' mem', - }), - dict({ - 'id': 70, - 'text': 'e', - }), - dict({ - 'id': 13, - 'text': ',', - }), - dict({ - 'id': 285, - 'text': ' and', - }), - dict({ - 'id': 752, - 'text': ' what', - }), - dict({ - 'id': 434, - 'text': "'s", - }), - dict({ - 'id': 253, - 'text': ' the', - }), - dict({ - 'id': 2892, - 'text': ' history', - }), - dict({ - 'id': 3212, - 'text': ' behind', - }), - dict({ - 'id': 436, - 'text': ' this', - }), - dict({ - 'id': 3159, - 'text': ' word', - }), - dict({ - 'id': 32, - 'text': '?', - }), - dict({ - 'id': 0, - 'text': '<|endoftext|>', - }), - dict({ - 'id': 50281, - 'text': '<|assistant|>', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 510, - 'special': False, - 'text': 'The', - }), - dict({ - 'id': 3159, - 'special': False, - 'text': ' word', - }), - dict({ - 'id': 346, - 'special': False, - 'text': ' "', - }), - dict({ - 'id': 6441, - 'special': False, - 'text': 'mem', - }), - dict({ - 'id': 70, - 'special': False, - 'text': 'e', - }), - dict({ - 'id': 3, - 'special': False, - 'text': '"', - }), - dict({ - 'id': 369, - 'special': False, - 'text': ' was', - }), - dict({ - 'id': 806, - 'special': False, - 'text': ' first', - }), - dict({ - 'id': 908, - 'special': False, - 'text': ' used', - }), - dict({ - 'id': 275, - 'special': False, - 'text': ' in', - }), - ]), - }), - 'generated_text': 'The word "meme" was first used in', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 50278, - 'text': '<|prompter|>', - }), - dict({ - 'id': 1276, - 'text': 'What', - }), - dict({ - 'id': 310, - 'text': ' is', - }), - dict({ - 'id': 247, - 'text': ' a', - }), - dict({ - 'id': 1167, - 'text': ' mem', - }), - dict({ - 'id': 70, - 'text': 'e', - }), - dict({ - 'id': 13, - 'text': ',', - }), - dict({ - 'id': 285, - 'text': ' and', - }), - dict({ - 'id': 752, - 'text': ' what', - }), - dict({ - 'id': 434, - 'text': "'s", - }), - dict({ - 'id': 253, - 'text': ' the', - }), - dict({ - 'id': 2892, - 'text': ' history', - }), - dict({ - 'id': 3212, - 'text': ' behind', - }), - dict({ - 'id': 436, - 'text': ' this', - }), - dict({ - 'id': 3159, - 'text': ' word', - }), - dict({ - 'id': 32, - 'text': '?', - }), - dict({ - 'id': 0, - 'text': '<|endoftext|>', - }), - dict({ - 'id': 50281, - 'text': '<|assistant|>', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 510, - 'special': False, - 'text': 'The', - }), - dict({ - 'id': 3159, - 'special': False, - 'text': ' word', - }), - dict({ - 'id': 346, - 'special': False, - 'text': ' "', - }), - dict({ - 'id': 6441, - 'special': False, - 'text': 'mem', - }), - dict({ - 'id': 70, - 'special': False, - 'text': 'e', - }), - dict({ - 'id': 3, - 'special': False, - 'text': '"', - }), - dict({ - 'id': 369, - 'special': False, - 'text': ' was', - }), - dict({ - 'id': 806, - 'special': False, - 'text': ' first', - }), - dict({ - 'id': 908, - 'special': False, - 'text': ' used', - }), - dict({ - 'id': 275, - 'special': False, - 'text': ' in', - }), - ]), - }), - 'generated_text': 'The word "meme" was first used in', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 50278, - 'text': '<|prompter|>', - }), - dict({ - 'id': 1276, - 'text': 'What', - }), - dict({ - 'id': 310, - 'text': ' is', - }), - dict({ - 'id': 247, - 'text': ' a', - }), - dict({ - 'id': 1167, - 'text': ' mem', - }), - dict({ - 'id': 70, - 'text': 'e', - }), - dict({ - 'id': 13, - 'text': ',', - }), - dict({ - 'id': 285, - 'text': ' and', - }), - dict({ - 'id': 752, - 'text': ' what', - }), - dict({ - 'id': 434, - 'text': "'s", - }), - dict({ - 'id': 253, - 'text': ' the', - }), - dict({ - 'id': 2892, - 'text': ' history', - }), - dict({ - 'id': 3212, - 'text': ' behind', - }), - dict({ - 'id': 436, - 'text': ' this', - }), - dict({ - 'id': 3159, - 'text': ' word', - }), - dict({ - 'id': 32, - 'text': '?', - }), - dict({ - 'id': 0, - 'text': '<|endoftext|>', - }), - dict({ - 'id': 50281, - 'text': '<|assistant|>', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 510, - 'special': False, - 'text': 'The', - }), - dict({ - 'id': 3159, - 'special': False, - 'text': ' word', - }), - dict({ - 'id': 346, - 'special': False, - 'text': ' "', - }), - dict({ - 'id': 6441, - 'special': False, - 'text': 'mem', - }), - dict({ - 'id': 70, - 'special': False, - 'text': 'e', - }), - dict({ - 'id': 3, - 'special': False, - 'text': '"', - }), - dict({ - 'id': 369, - 'special': False, - 'text': ' was', - }), - dict({ - 'id': 806, - 'special': False, - 'text': ' first', - }), - dict({ - 'id': 908, - 'special': False, - 'text': ' used', - }), - dict({ - 'id': 275, - 'special': False, - 'text': ' in', - }), - ]), - }), - 'generated_text': 'The word "meme" was first used in', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 50278, - 'text': '<|prompter|>', - }), - dict({ - 'id': 1276, - 'text': 'What', - }), - dict({ - 'id': 310, - 'text': ' is', - }), - dict({ - 'id': 247, - 'text': ' a', - }), - dict({ - 'id': 1167, - 'text': ' mem', - }), - dict({ - 'id': 70, - 'text': 'e', - }), - dict({ - 'id': 13, - 'text': ',', - }), - dict({ - 'id': 285, - 'text': ' and', - }), - dict({ - 'id': 752, - 'text': ' what', - }), - dict({ - 'id': 434, - 'text': "'s", - }), - dict({ - 'id': 253, - 'text': ' the', - }), - dict({ - 'id': 2892, - 'text': ' history', - }), - dict({ - 'id': 3212, - 'text': ' behind', - }), - dict({ - 'id': 436, - 'text': ' this', - }), - dict({ - 'id': 3159, - 'text': ' word', - }), - dict({ - 'id': 32, - 'text': '?', - }), - dict({ - 'id': 0, - 'text': '<|endoftext|>', - }), - dict({ - 'id': 50281, - 'text': '<|assistant|>', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 510, - 'special': False, - 'text': 'The', - }), - dict({ - 'id': 3159, - 'special': False, - 'text': ' word', - }), - dict({ - 'id': 346, - 'special': False, - 'text': ' "', - }), - dict({ - 'id': 6441, - 'special': False, - 'text': 'mem', - }), - dict({ - 'id': 70, - 'special': False, - 'text': 'e', - }), - dict({ - 'id': 3, - 'special': False, - 'text': '"', - }), - dict({ - 'id': 369, - 'special': False, - 'text': ' was', - }), - dict({ - 'id': 806, - 'special': False, - 'text': ' first', - }), - dict({ - 'id': 908, - 'special': False, - 'text': ' used', - }), - dict({ - 'id': 275, - 'special': False, - 'text': ' in', - }), - ]), - }), - 'generated_text': 'The word "meme" was first used in', - }), - ]) -# --- diff --git a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json new file mode 100644 index 000000000..787704ce0 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json @@ -0,0 +1,163 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.03125, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1601562, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.4609375, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005657196, + "text": "e" + }, + { + "id": 13, + "logprob": -7.28125, + "text": "," + }, + { + "id": 285, + "logprob": -0.2980957, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1679688, + "text": " what" + }, + { + "id": 434, + "logprob": -5.6210938, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.81103516, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6640625, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.265625, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.5078125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1582031, + "text": " word" + }, + { + "id": 32, + "logprob": -0.008720398, + "text": "?" + }, + { + "id": 0, + "logprob": -2.4726562, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.265625, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.63183594, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.5390625, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.045684814, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.002090454, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -1.3589859e-05, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.0009455681, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.088012695, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12585449, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.017196655, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.49731445, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" +} diff --git a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json new file mode 100644 index 000000000..47d6a77e1 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json @@ -0,0 +1,654 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.03125, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1601562, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.4609375, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005657196, + "text": "e" + }, + { + "id": 13, + "logprob": -7.28125, + "text": "," + }, + { + "id": 285, + "logprob": -0.2980957, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1679688, + "text": " what" + }, + { + "id": 434, + "logprob": -5.6210938, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.81103516, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6640625, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.265625, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.5078125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1582031, + "text": " word" + }, + { + "id": 32, + "logprob": -0.008720398, + "text": "?" + }, + { + "id": 0, + "logprob": -2.4726562, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.265625, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.63183594, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.5488281, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.045684814, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.00207901, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -1.335144e-05, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.00097227097, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.0892334, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12463379, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.01737976, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.50341797, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.03125, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1601562, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.4609375, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005657196, + "text": "e" + }, + { + "id": 13, + "logprob": -7.28125, + "text": "," + }, + { + "id": 285, + "logprob": -0.2980957, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1679688, + "text": " what" + }, + { + "id": 434, + "logprob": -5.6210938, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.81103516, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6640625, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.265625, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.5078125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1582031, + "text": " word" + }, + { + "id": 32, + "logprob": -0.008720398, + "text": "?" + }, + { + "id": 0, + "logprob": -2.4726562, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.265625, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.63183594, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.5488281, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.045684814, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.00207901, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -1.335144e-05, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.00097227097, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.0892334, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12463379, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.01737976, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.50341797, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.03125, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1601562, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.4609375, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005657196, + "text": "e" + }, + { + "id": 13, + "logprob": -7.28125, + "text": "," + }, + { + "id": 285, + "logprob": -0.2980957, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1679688, + "text": " what" + }, + { + "id": 434, + "logprob": -5.6210938, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.81103516, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6640625, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.265625, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.5078125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1582031, + "text": " word" + }, + { + "id": 32, + "logprob": -0.008720398, + "text": "?" + }, + { + "id": 0, + "logprob": -2.4726562, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.265625, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.63183594, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.5488281, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.045684814, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.00207901, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -1.335144e-05, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.00097227097, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.0892334, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12463379, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.01737976, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.50341797, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.03125, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1601562, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.4609375, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005657196, + "text": "e" + }, + { + "id": 13, + "logprob": -7.28125, + "text": "," + }, + { + "id": 285, + "logprob": -0.2980957, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1679688, + "text": " what" + }, + { + "id": 434, + "logprob": -5.6210938, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.81103516, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6640625, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.265625, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.5078125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1582031, + "text": " word" + }, + { + "id": 32, + "logprob": -0.008720398, + "text": "?" + }, + { + "id": 0, + "logprob": -2.4726562, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.265625, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.63183594, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.5488281, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.045684814, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.00207901, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -1.335144e-05, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.00097227097, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.0892334, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12463379, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.01737976, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.50341797, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" + } +] diff --git a/integration-tests/models/__snapshots__/test_flash_santacoder.ambr b/integration-tests/models/__snapshots__/test_flash_santacoder.ambr deleted file mode 100644 index 030820cb0..000000000 --- a/integration-tests/models/__snapshots__/test_flash_santacoder.ambr +++ /dev/null @@ -1,472 +0,0 @@ -# serializer version: 1 -# name: test_flash_santacoder - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 563, - 'text': 'def', - }), - dict({ - 'id': 942, - 'text': ' print', - }), - dict({ - 'id': 62, - 'text': '_', - }), - dict({ - 'id': 7196, - 'text': 'hello', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 1241, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 258, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 942, - 'special': False, - 'text': ' print', - }), - dict({ - 'id': 372, - 'special': False, - 'text': '("', - }), - dict({ - 'id': 7371, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 9956, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 8657, - 'special': False, - 'text': '!")', - }), - dict({ - 'id': 185, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 185, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 1018, - 'special': False, - 'text': 'print', - }), - ]), - }), - 'generated_text': ''' - (): - print("Hello World!") - - print - ''', - }) -# --- -# name: test_flash_santacoder_load - list([ - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 563, - 'text': 'def', - }), - dict({ - 'id': 942, - 'text': ' print', - }), - dict({ - 'id': 62, - 'text': '_', - }), - dict({ - 'id': 7196, - 'text': 'hello', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 1241, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 258, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 942, - 'special': False, - 'text': ' print', - }), - dict({ - 'id': 372, - 'special': False, - 'text': '("', - }), - dict({ - 'id': 7371, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 9956, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 8657, - 'special': False, - 'text': '!")', - }), - dict({ - 'id': 185, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 185, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 1018, - 'special': False, - 'text': 'print', - }), - ]), - }), - 'generated_text': ''' - (): - print("Hello World!") - - print - ''', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 563, - 'text': 'def', - }), - dict({ - 'id': 942, - 'text': ' print', - }), - dict({ - 'id': 62, - 'text': '_', - }), - dict({ - 'id': 7196, - 'text': 'hello', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 1241, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 258, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 942, - 'special': False, - 'text': ' print', - }), - dict({ - 'id': 372, - 'special': False, - 'text': '("', - }), - dict({ - 'id': 7371, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 9956, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 8657, - 'special': False, - 'text': '!")', - }), - dict({ - 'id': 185, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 185, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 1018, - 'special': False, - 'text': 'print', - }), - ]), - }), - 'generated_text': ''' - (): - print("Hello World!") - - print - ''', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 563, - 'text': 'def', - }), - dict({ - 'id': 942, - 'text': ' print', - }), - dict({ - 'id': 62, - 'text': '_', - }), - dict({ - 'id': 7196, - 'text': 'hello', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 1241, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 258, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 942, - 'special': False, - 'text': ' print', - }), - dict({ - 'id': 372, - 'special': False, - 'text': '("', - }), - dict({ - 'id': 7371, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 9956, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 8657, - 'special': False, - 'text': '!")', - }), - dict({ - 'id': 185, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 185, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 1018, - 'special': False, - 'text': 'print', - }), - ]), - }), - 'generated_text': ''' - (): - print("Hello World!") - - print - ''', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 563, - 'text': 'def', - }), - dict({ - 'id': 942, - 'text': ' print', - }), - dict({ - 'id': 62, - 'text': '_', - }), - dict({ - 'id': 7196, - 'text': 'hello', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 1241, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 258, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 942, - 'special': False, - 'text': ' print', - }), - dict({ - 'id': 372, - 'special': False, - 'text': '("', - }), - dict({ - 'id': 7371, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 9956, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 8657, - 'special': False, - 'text': '!")', - }), - dict({ - 'id': 185, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 185, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 1018, - 'special': False, - 'text': 'print', - }), - ]), - }), - 'generated_text': ''' - (): - print("Hello World!") - - print - ''', - }), - ]) -# --- diff --git a/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json b/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json new file mode 100644 index 000000000..0293e35ab --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json @@ -0,0 +1,93 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 563, + "logprob": null, + "text": "def" + }, + { + "id": 942, + "logprob": -5.1367188, + "text": " print" + }, + { + "id": 62, + "logprob": -0.24450684, + "text": "_" + }, + { + "id": 7196, + "logprob": -6.9609375, + "text": "hello" + } + ], + "seed": null, + "tokens": [ + { + "id": 1241, + "logprob": -0.9863281, + "special": false, + "text": "():" + }, + { + "id": 258, + "logprob": -0.21447754, + "special": false, + "text": "\n " + }, + { + "id": 942, + "logprob": -0.43701172, + "special": false, + "text": " print" + }, + { + "id": 372, + "logprob": -0.5361328, + "special": false, + "text": "(\"" + }, + { + "id": 7371, + "logprob": -0.44555664, + "special": false, + "text": "Hello" + }, + { + "id": 9956, + "logprob": -1.2412109, + "special": false, + "text": " World" + }, + { + "id": 8657, + "logprob": -0.7583008, + "special": false, + "text": "!\")" + }, + { + "id": 185, + "logprob": -0.76171875, + "special": false, + "text": "\n" + }, + { + "id": 185, + "logprob": -0.20837402, + "special": false, + "text": "\n" + }, + { + "id": 1018, + "logprob": -1.2470703, + "special": false, + "text": "print" + } + ] + }, + "generated_text": "():\n print(\"Hello World!\")\n\nprint" +} diff --git a/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json b/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json new file mode 100644 index 000000000..a03580b3d --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json @@ -0,0 +1,374 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 563, + "logprob": null, + "text": "def" + }, + { + "id": 942, + "logprob": -5.1367188, + "text": " print" + }, + { + "id": 62, + "logprob": -0.24450684, + "text": "_" + }, + { + "id": 7196, + "logprob": -6.9609375, + "text": "hello" + } + ], + "seed": null, + "tokens": [ + { + "id": 1241, + "logprob": -0.9863281, + "special": false, + "text": "():" + }, + { + "id": 258, + "logprob": -0.21362305, + "special": false, + "text": "\n " + }, + { + "id": 942, + "logprob": -0.44360352, + "special": false, + "text": " print" + }, + { + "id": 372, + "logprob": -0.54248047, + "special": false, + "text": "(\"" + }, + { + "id": 7371, + "logprob": -0.44555664, + "special": false, + "text": "Hello" + }, + { + "id": 9956, + "logprob": -1.2441406, + "special": false, + "text": " World" + }, + { + "id": 8657, + "logprob": -0.75878906, + "special": false, + "text": "!\")" + }, + { + "id": 185, + "logprob": -0.76171875, + "special": false, + "text": "\n" + }, + { + "id": 185, + "logprob": -0.2084961, + "special": false, + "text": "\n" + }, + { + "id": 1018, + "logprob": -1.2460938, + "special": false, + "text": "print" + } + ] + }, + "generated_text": "():\n print(\"Hello World!\")\n\nprint" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 563, + "logprob": null, + "text": "def" + }, + { + "id": 942, + "logprob": -5.1367188, + "text": " print" + }, + { + "id": 62, + "logprob": -0.24450684, + "text": "_" + }, + { + "id": 7196, + "logprob": -6.9609375, + "text": "hello" + } + ], + "seed": null, + "tokens": [ + { + "id": 1241, + "logprob": -0.9863281, + "special": false, + "text": "():" + }, + { + "id": 258, + "logprob": -0.21362305, + "special": false, + "text": "\n " + }, + { + "id": 942, + "logprob": -0.44360352, + "special": false, + "text": " print" + }, + { + "id": 372, + "logprob": -0.54248047, + "special": false, + "text": "(\"" + }, + { + "id": 7371, + "logprob": -0.44555664, + "special": false, + "text": "Hello" + }, + { + "id": 9956, + "logprob": -1.2441406, + "special": false, + "text": " World" + }, + { + "id": 8657, + "logprob": -0.75878906, + "special": false, + "text": "!\")" + }, + { + "id": 185, + "logprob": -0.76171875, + "special": false, + "text": "\n" + }, + { + "id": 185, + "logprob": -0.2084961, + "special": false, + "text": "\n" + }, + { + "id": 1018, + "logprob": -1.2460938, + "special": false, + "text": "print" + } + ] + }, + "generated_text": "():\n print(\"Hello World!\")\n\nprint" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 563, + "logprob": null, + "text": "def" + }, + { + "id": 942, + "logprob": -5.1367188, + "text": " print" + }, + { + "id": 62, + "logprob": -0.24450684, + "text": "_" + }, + { + "id": 7196, + "logprob": -6.9609375, + "text": "hello" + } + ], + "seed": null, + "tokens": [ + { + "id": 1241, + "logprob": -0.9863281, + "special": false, + "text": "():" + }, + { + "id": 258, + "logprob": -0.21362305, + "special": false, + "text": "\n " + }, + { + "id": 942, + "logprob": -0.44360352, + "special": false, + "text": " print" + }, + { + "id": 372, + "logprob": -0.54248047, + "special": false, + "text": "(\"" + }, + { + "id": 7371, + "logprob": -0.44555664, + "special": false, + "text": "Hello" + }, + { + "id": 9956, + "logprob": -1.2441406, + "special": false, + "text": " World" + }, + { + "id": 8657, + "logprob": -0.75878906, + "special": false, + "text": "!\")" + }, + { + "id": 185, + "logprob": -0.76171875, + "special": false, + "text": "\n" + }, + { + "id": 185, + "logprob": -0.2084961, + "special": false, + "text": "\n" + }, + { + "id": 1018, + "logprob": -1.2460938, + "special": false, + "text": "print" + } + ] + }, + "generated_text": "():\n print(\"Hello World!\")\n\nprint" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 563, + "logprob": null, + "text": "def" + }, + { + "id": 942, + "logprob": -5.1367188, + "text": " print" + }, + { + "id": 62, + "logprob": -0.24450684, + "text": "_" + }, + { + "id": 7196, + "logprob": -6.9609375, + "text": "hello" + } + ], + "seed": null, + "tokens": [ + { + "id": 1241, + "logprob": -0.9863281, + "special": false, + "text": "():" + }, + { + "id": 258, + "logprob": -0.21362305, + "special": false, + "text": "\n " + }, + { + "id": 942, + "logprob": -0.44360352, + "special": false, + "text": " print" + }, + { + "id": 372, + "logprob": -0.54248047, + "special": false, + "text": "(\"" + }, + { + "id": 7371, + "logprob": -0.44555664, + "special": false, + "text": "Hello" + }, + { + "id": 9956, + "logprob": -1.2441406, + "special": false, + "text": " World" + }, + { + "id": 8657, + "logprob": -0.75878906, + "special": false, + "text": "!\")" + }, + { + "id": 185, + "logprob": -0.76171875, + "special": false, + "text": "\n" + }, + { + "id": 185, + "logprob": -0.2084961, + "special": false, + "text": "\n" + }, + { + "id": 1018, + "logprob": -1.2460938, + "special": false, + "text": "print" + } + ] + }, + "generated_text": "():\n print(\"Hello World!\")\n\nprint" + } +] diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder.ambr b/integration-tests/models/__snapshots__/test_flash_starcoder.ambr deleted file mode 100644 index e0f4b5685..000000000 --- a/integration-tests/models/__snapshots__/test_flash_starcoder.ambr +++ /dev/null @@ -1,573 +0,0 @@ -# serializer version: 1 -# name: test_flash_starcoder - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 589, - 'text': 'def', - }), - dict({ - 'id': 1459, - 'text': ' print', - }), - dict({ - 'id': 81, - 'text': '_', - }), - dict({ - 'id': 7656, - 'text': 'hello', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 2262, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 284, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 1459, - 'special': False, - 'text': ' print', - }), - dict({ - 'id': 440, - 'special': False, - 'text': '("', - }), - dict({ - 'id': 8279, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 10896, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 657, - 'special': False, - 'text': '")', - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 589, - 'special': False, - 'text': 'def', - }), - ]), - }), - 'generated_text': ''' - (): - print("Hello World") - - def - ''', - }) -# --- -# name: test_flash_starcoder_default_params - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 12, - 'prefill': list([ - dict({ - 'id': 589, - 'text': 'def', - }), - dict({ - 'id': 1459, - 'text': ' print', - }), - dict({ - 'id': 81, - 'text': '_', - }), - dict({ - 'id': 7656, - 'text': 'hello', - }), - ]), - 'seed': 0, - 'tokens': list([ - dict({ - 'id': 2262, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 284, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 5741, - 'special': False, - 'text': ' logging', - }), - dict({ - 'id': 32, - 'special': False, - 'text': '.', - }), - dict({ - 'id': 1338, - 'special': False, - 'text': 'info', - }), - dict({ - 'id': 463, - 'special': False, - 'text': "('", - }), - dict({ - 'id': 8279, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 30, - 'special': False, - 'text': ',', - }), - dict({ - 'id': 10896, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 683, - 'special': False, - 'text': "')", - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 0, - 'special': True, - 'text': '<|endoftext|>', - }), - ]), - }), - 'generated_text': ''' - (): - logging.info('Hello, World') - <|endoftext|> - ''', - }) -# --- -# name: test_flash_starcoder_load - list([ - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 589, - 'text': 'def', - }), - dict({ - 'id': 1459, - 'text': ' print', - }), - dict({ - 'id': 81, - 'text': '_', - }), - dict({ - 'id': 7656, - 'text': 'hello', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 2262, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 284, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 1459, - 'special': False, - 'text': ' print', - }), - dict({ - 'id': 440, - 'special': False, - 'text': '("', - }), - dict({ - 'id': 8279, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 10896, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 657, - 'special': False, - 'text': '")', - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 589, - 'special': False, - 'text': 'def', - }), - ]), - }), - 'generated_text': ''' - (): - print("Hello World") - - def - ''', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 589, - 'text': 'def', - }), - dict({ - 'id': 1459, - 'text': ' print', - }), - dict({ - 'id': 81, - 'text': '_', - }), - dict({ - 'id': 7656, - 'text': 'hello', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 2262, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 284, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 1459, - 'special': False, - 'text': ' print', - }), - dict({ - 'id': 440, - 'special': False, - 'text': '("', - }), - dict({ - 'id': 8279, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 10896, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 657, - 'special': False, - 'text': '")', - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 589, - 'special': False, - 'text': 'def', - }), - ]), - }), - 'generated_text': ''' - (): - print("Hello World") - - def - ''', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 589, - 'text': 'def', - }), - dict({ - 'id': 1459, - 'text': ' print', - }), - dict({ - 'id': 81, - 'text': '_', - }), - dict({ - 'id': 7656, - 'text': 'hello', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 2262, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 284, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 1459, - 'special': False, - 'text': ' print', - }), - dict({ - 'id': 440, - 'special': False, - 'text': '("', - }), - dict({ - 'id': 8279, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 10896, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 657, - 'special': False, - 'text': '")', - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 589, - 'special': False, - 'text': 'def', - }), - ]), - }), - 'generated_text': ''' - (): - print("Hello World") - - def - ''', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 589, - 'text': 'def', - }), - dict({ - 'id': 1459, - 'text': ' print', - }), - dict({ - 'id': 81, - 'text': '_', - }), - dict({ - 'id': 7656, - 'text': 'hello', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 2262, - 'special': False, - 'text': '():', - }), - dict({ - 'id': 284, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 1459, - 'special': False, - 'text': ' print', - }), - dict({ - 'id': 440, - 'special': False, - 'text': '("', - }), - dict({ - 'id': 8279, - 'special': False, - 'text': 'Hello', - }), - dict({ - 'id': 10896, - 'special': False, - 'text': ' World', - }), - dict({ - 'id': 657, - 'special': False, - 'text': '")', - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 203, - 'special': False, - 'text': ''' - - - ''', - }), - dict({ - 'id': 589, - 'special': False, - 'text': 'def', - }), - ]), - }), - 'generated_text': ''' - (): - print("Hello World") - - def - ''', - }), - ]) -# --- diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json new file mode 100644 index 000000000..8505c1db9 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json @@ -0,0 +1,93 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 589, + "logprob": null, + "text": "def" + }, + { + "id": 1459, + "logprob": -5.6289062, + "text": " print" + }, + { + "id": 81, + "logprob": -1.6005859, + "text": "_" + }, + { + "id": 7656, + "logprob": -5.9921875, + "text": "hello" + } + ], + "seed": null, + "tokens": [ + { + "id": 2262, + "logprob": -0.7705078, + "special": false, + "text": "():" + }, + { + "id": 284, + "logprob": -0.2590332, + "special": false, + "text": "\n " + }, + { + "id": 1459, + "logprob": -0.39379883, + "special": false, + "text": " print" + }, + { + "id": 440, + "logprob": -0.61376953, + "special": false, + "text": "(\"" + }, + { + "id": 8279, + "logprob": -0.47338867, + "special": false, + "text": "Hello" + }, + { + "id": 10896, + "logprob": -1.5068359, + "special": false, + "text": " World" + }, + { + "id": 657, + "logprob": -0.80810547, + "special": false, + "text": "\")" + }, + { + "id": 203, + "logprob": -0.7397461, + "special": false, + "text": "\n" + }, + { + "id": 203, + "logprob": -0.35229492, + "special": false, + "text": "\n" + }, + { + "id": 589, + "logprob": -1.0371094, + "special": false, + "text": "def" + } + ] + }, + "generated_text": "():\n print(\"Hello World\")\n\ndef" +} diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json new file mode 100644 index 000000000..21bb509b4 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json @@ -0,0 +1,105 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "eos_token", + "generated_tokens": 12, + "prefill": [ + { + "id": 589, + "logprob": null, + "text": "def" + }, + { + "id": 1459, + "logprob": -5.6289062, + "text": " print" + }, + { + "id": 81, + "logprob": -1.6005859, + "text": "_" + }, + { + "id": 7656, + "logprob": -5.9921875, + "text": "hello" + } + ], + "seed": 0, + "tokens": [ + { + "id": 2262, + "logprob": -0.7451172, + "special": false, + "text": "():" + }, + { + "id": 284, + "logprob": -0.21325684, + "special": false, + "text": "\n " + }, + { + "id": 5741, + "logprob": -5.734375, + "special": false, + "text": " logging" + }, + { + "id": 32, + "logprob": 0.0, + "special": false, + "text": "." + }, + { + "id": 1338, + "logprob": -0.3232422, + "special": false, + "text": "info" + }, + { + "id": 463, + "logprob": -1.0380859, + "special": false, + "text": "('" + }, + { + "id": 8279, + "logprob": -0.8378906, + "special": false, + "text": "Hello" + }, + { + "id": 30, + "logprob": -1.9501953, + "special": false, + "text": "," + }, + { + "id": 10896, + "logprob": -1.3476562, + "special": false, + "text": " World" + }, + { + "id": 683, + "logprob": -1.796875, + "special": false, + "text": "')" + }, + { + "id": 203, + "logprob": -0.9873047, + "special": false, + "text": "\n" + }, + { + "id": 0, + "logprob": -0.7495117, + "special": true, + "text": "<|endoftext|>" + } + ] + }, + "generated_text": "():\n logging.info('Hello, World')\n<|endoftext|>" +} diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json new file mode 100644 index 000000000..0b3ad554d --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json @@ -0,0 +1,374 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 589, + "logprob": null, + "text": "def" + }, + { + "id": 1459, + "logprob": -5.6289062, + "text": " print" + }, + { + "id": 81, + "logprob": -1.6005859, + "text": "_" + }, + { + "id": 7656, + "logprob": -5.9921875, + "text": "hello" + } + ], + "seed": null, + "tokens": [ + { + "id": 2262, + "logprob": -0.7705078, + "special": false, + "text": "():" + }, + { + "id": 284, + "logprob": -0.2602539, + "special": false, + "text": "\n " + }, + { + "id": 1459, + "logprob": -0.39282227, + "special": false, + "text": " print" + }, + { + "id": 440, + "logprob": -0.6113281, + "special": false, + "text": "(\"" + }, + { + "id": 8279, + "logprob": -0.4765625, + "special": false, + "text": "Hello" + }, + { + "id": 10896, + "logprob": -1.5068359, + "special": false, + "text": " World" + }, + { + "id": 657, + "logprob": -0.8154297, + "special": false, + "text": "\")" + }, + { + "id": 203, + "logprob": -0.7319336, + "special": false, + "text": "\n" + }, + { + "id": 203, + "logprob": -0.35229492, + "special": false, + "text": "\n" + }, + { + "id": 589, + "logprob": -1.0380859, + "special": false, + "text": "def" + } + ] + }, + "generated_text": "():\n print(\"Hello World\")\n\ndef" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 589, + "logprob": null, + "text": "def" + }, + { + "id": 1459, + "logprob": -5.6289062, + "text": " print" + }, + { + "id": 81, + "logprob": -1.6005859, + "text": "_" + }, + { + "id": 7656, + "logprob": -5.9921875, + "text": "hello" + } + ], + "seed": null, + "tokens": [ + { + "id": 2262, + "logprob": -0.7705078, + "special": false, + "text": "():" + }, + { + "id": 284, + "logprob": -0.2602539, + "special": false, + "text": "\n " + }, + { + "id": 1459, + "logprob": -0.39282227, + "special": false, + "text": " print" + }, + { + "id": 440, + "logprob": -0.6113281, + "special": false, + "text": "(\"" + }, + { + "id": 8279, + "logprob": -0.4765625, + "special": false, + "text": "Hello" + }, + { + "id": 10896, + "logprob": -1.5068359, + "special": false, + "text": " World" + }, + { + "id": 657, + "logprob": -0.8154297, + "special": false, + "text": "\")" + }, + { + "id": 203, + "logprob": -0.7319336, + "special": false, + "text": "\n" + }, + { + "id": 203, + "logprob": -0.35229492, + "special": false, + "text": "\n" + }, + { + "id": 589, + "logprob": -1.0380859, + "special": false, + "text": "def" + } + ] + }, + "generated_text": "():\n print(\"Hello World\")\n\ndef" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 589, + "logprob": null, + "text": "def" + }, + { + "id": 1459, + "logprob": -5.6289062, + "text": " print" + }, + { + "id": 81, + "logprob": -1.6005859, + "text": "_" + }, + { + "id": 7656, + "logprob": -5.9921875, + "text": "hello" + } + ], + "seed": null, + "tokens": [ + { + "id": 2262, + "logprob": -0.7705078, + "special": false, + "text": "():" + }, + { + "id": 284, + "logprob": -0.2602539, + "special": false, + "text": "\n " + }, + { + "id": 1459, + "logprob": -0.39282227, + "special": false, + "text": " print" + }, + { + "id": 440, + "logprob": -0.6113281, + "special": false, + "text": "(\"" + }, + { + "id": 8279, + "logprob": -0.4765625, + "special": false, + "text": "Hello" + }, + { + "id": 10896, + "logprob": -1.5068359, + "special": false, + "text": " World" + }, + { + "id": 657, + "logprob": -0.8154297, + "special": false, + "text": "\")" + }, + { + "id": 203, + "logprob": -0.7319336, + "special": false, + "text": "\n" + }, + { + "id": 203, + "logprob": -0.35229492, + "special": false, + "text": "\n" + }, + { + "id": 589, + "logprob": -1.0380859, + "special": false, + "text": "def" + } + ] + }, + "generated_text": "():\n print(\"Hello World\")\n\ndef" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 589, + "logprob": null, + "text": "def" + }, + { + "id": 1459, + "logprob": -5.6289062, + "text": " print" + }, + { + "id": 81, + "logprob": -1.6005859, + "text": "_" + }, + { + "id": 7656, + "logprob": -5.9921875, + "text": "hello" + } + ], + "seed": null, + "tokens": [ + { + "id": 2262, + "logprob": -0.7705078, + "special": false, + "text": "():" + }, + { + "id": 284, + "logprob": -0.2602539, + "special": false, + "text": "\n " + }, + { + "id": 1459, + "logprob": -0.39282227, + "special": false, + "text": " print" + }, + { + "id": 440, + "logprob": -0.6113281, + "special": false, + "text": "(\"" + }, + { + "id": 8279, + "logprob": -0.4765625, + "special": false, + "text": "Hello" + }, + { + "id": 10896, + "logprob": -1.5068359, + "special": false, + "text": " World" + }, + { + "id": 657, + "logprob": -0.8154297, + "special": false, + "text": "\")" + }, + { + "id": 203, + "logprob": -0.7319336, + "special": false, + "text": "\n" + }, + { + "id": 203, + "logprob": -0.35229492, + "special": false, + "text": "\n" + }, + { + "id": 589, + "logprob": -1.0380859, + "special": false, + "text": "def" + } + ] + }, + "generated_text": "():\n print(\"Hello World\")\n\ndef" + } +] diff --git a/integration-tests/models/__snapshots__/test_mt0_base.ambr b/integration-tests/models/__snapshots__/test_mt0_base.ambr deleted file mode 100644 index d7c6eaf6d..000000000 --- a/integration-tests/models/__snapshots__/test_mt0_base.ambr +++ /dev/null @@ -1,306 +0,0 @@ -# serializer version: 1 -# name: test_mt0_base - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 5, - 'prefill': list([ - dict({ - 'id': 0, - 'text': '', - }), - ]), - 'seed': 0, - 'tokens': list([ - dict({ - 'id': 926, - 'special': False, - 'text': 'To', - }), - dict({ - 'id': 18295, - 'special': False, - 'text': ' sell', - }), - dict({ - 'id': 7868, - 'special': False, - 'text': ' things', - }), - dict({ - 'id': 260, - 'special': False, - 'text': '.', - }), - dict({ - 'id': 1, - 'special': True, - 'text': '', - }), - ]), - }), - 'generated_text': 'To sell things.', - }) -# --- -# name: test_mt0_base_all_params - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 10, - 'prefill': list([ - dict({ - 'id': 0, - 'text': '', - }), - ]), - 'seed': 0, - 'tokens': list([ - dict({ - 'id': 16017, - 'special': False, - 'text': 'blue', - }), - dict({ - 'id': 20495, - 'special': False, - 'text': ' sky', - }), - dict({ - 'id': 259, - 'special': False, - 'text': ' ', - }), - dict({ - 'id': 15484, - 'special': False, - 'text': 'appear', - }), - dict({ - 'id': 345, - 'special': False, - 'text': 'ed', - }), - dict({ - 'id': 288, - 'special': False, - 'text': ' to', - }), - dict({ - 'id': 35622, - 'special': False, - 'text': ' cloud', - }), - dict({ - 'id': 263, - 'special': False, - 'text': 's', - }), - dict({ - 'id': 14701, - 'special': False, - 'text': ' above', - }), - dict({ - 'id': 751, - 'special': False, - 'text': ' all', - }), - ]), - }), - 'generated_text': 'Why is the sky blue?blue sky appeared to clouds above all', - }) -# --- -# name: test_mt0_base_load - list([ - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 6, - 'prefill': list([ - dict({ - 'id': 0, - 'text': '', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 259, - 'special': False, - 'text': '', - }), - dict({ - 'id': 39261, - 'special': False, - 'text': 'Because', - }), - dict({ - 'id': 609, - 'special': False, - 'text': ' it', - }), - dict({ - 'id': 339, - 'special': False, - 'text': ' is', - }), - dict({ - 'id': 16017, - 'special': False, - 'text': ' blue', - }), - dict({ - 'id': 1, - 'special': True, - 'text': '', - }), - ]), - }), - 'generated_text': 'Because it is blue', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 6, - 'prefill': list([ - dict({ - 'id': 0, - 'text': '', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 259, - 'special': False, - 'text': '', - }), - dict({ - 'id': 39261, - 'special': False, - 'text': 'Because', - }), - dict({ - 'id': 609, - 'special': False, - 'text': ' it', - }), - dict({ - 'id': 339, - 'special': False, - 'text': ' is', - }), - dict({ - 'id': 16017, - 'special': False, - 'text': ' blue', - }), - dict({ - 'id': 1, - 'special': True, - 'text': '', - }), - ]), - }), - 'generated_text': 'Because it is blue', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 6, - 'prefill': list([ - dict({ - 'id': 0, - 'text': '', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 259, - 'special': False, - 'text': '', - }), - dict({ - 'id': 39261, - 'special': False, - 'text': 'Because', - }), - dict({ - 'id': 609, - 'special': False, - 'text': ' it', - }), - dict({ - 'id': 339, - 'special': False, - 'text': ' is', - }), - dict({ - 'id': 16017, - 'special': False, - 'text': ' blue', - }), - dict({ - 'id': 1, - 'special': True, - 'text': '', - }), - ]), - }), - 'generated_text': 'Because it is blue', - }), - dict({ - 'details': dict({ - 'best_of_sequences': None, - 'finish_reason': , - 'generated_tokens': 6, - 'prefill': list([ - dict({ - 'id': 0, - 'text': '', - }), - ]), - 'seed': None, - 'tokens': list([ - dict({ - 'id': 259, - 'special': False, - 'text': '', - }), - dict({ - 'id': 39261, - 'special': False, - 'text': 'Because', - }), - dict({ - 'id': 609, - 'special': False, - 'text': ' it', - }), - dict({ - 'id': 339, - 'special': False, - 'text': ' is', - }), - dict({ - 'id': 16017, - 'special': False, - 'text': ' blue', - }), - dict({ - 'id': 1, - 'special': True, - 'text': '', - }), - ]), - }), - 'generated_text': 'Because it is blue', - }), - ]) -# --- diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json new file mode 100644 index 000000000..2a26e3db9 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json @@ -0,0 +1,48 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "eos_token", + "generated_tokens": 5, + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], + "seed": 0, + "tokens": [ + { + "id": 926, + "logprob": -4.3554688, + "special": false, + "text": "To" + }, + { + "id": 18295, + "logprob": -7.7734375, + "special": false, + "text": " sell" + }, + { + "id": 7868, + "logprob": -3.9257812, + "special": false, + "text": " things" + }, + { + "id": 260, + "logprob": -2.4179688, + "special": false, + "text": "." + }, + { + "id": 1, + "logprob": 0.0, + "special": true, + "text": "" + } + ] + }, + "generated_text": "To sell things." +} diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json new file mode 100644 index 000000000..fd77252d1 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json @@ -0,0 +1,78 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], + "seed": 0, + "tokens": [ + { + "id": 16017, + "logprob": -1.3505859, + "special": false, + "text": "blue" + }, + { + "id": 20495, + "logprob": -0.50439453, + "special": false, + "text": " sky" + }, + { + "id": 259, + "logprob": -1.2011719, + "special": false, + "text": " " + }, + { + "id": 15484, + "logprob": -2.8378906, + "special": false, + "text": "appear" + }, + { + "id": 345, + "logprob": -0.87597656, + "special": false, + "text": "ed" + }, + { + "id": 288, + "logprob": -1.8447266, + "special": false, + "text": " to" + }, + { + "id": 35622, + "logprob": -7.1445312, + "special": false, + "text": " cloud" + }, + { + "id": 263, + "logprob": -1.2929688, + "special": false, + "text": "s" + }, + { + "id": 14701, + "logprob": -3.0761719, + "special": false, + "text": " above" + }, + { + "id": 751, + "logprob": -4.4375, + "special": false, + "text": " all" + } + ] + }, + "generated_text": "Why is the sky blue?blue sky appeared to clouds above all" +} diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json new file mode 100644 index 000000000..c9e552b64 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json @@ -0,0 +1,218 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "eos_token", + "generated_tokens": 6, + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], + "seed": null, + "tokens": [ + { + "id": 259, + "logprob": -1.3789062, + "special": false, + "text": "" + }, + { + "id": 39261, + "logprob": -0.36279297, + "special": false, + "text": "Because" + }, + { + "id": 609, + "logprob": -1.0966797, + "special": false, + "text": " it" + }, + { + "id": 339, + "logprob": -0.8276367, + "special": false, + "text": " is" + }, + { + "id": 16017, + "logprob": -1.6845703, + "special": false, + "text": " blue" + }, + { + "id": 1, + "logprob": -0.72753906, + "special": true, + "text": "" + } + ] + }, + "generated_text": "Because it is blue" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "eos_token", + "generated_tokens": 6, + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], + "seed": null, + "tokens": [ + { + "id": 259, + "logprob": -1.3798828, + "special": false, + "text": "" + }, + { + "id": 39261, + "logprob": -0.36328125, + "special": false, + "text": "Because" + }, + { + "id": 609, + "logprob": -1.0947266, + "special": false, + "text": " it" + }, + { + "id": 339, + "logprob": -0.8286133, + "special": false, + "text": " is" + }, + { + "id": 16017, + "logprob": -1.6826172, + "special": false, + "text": " blue" + }, + { + "id": 1, + "logprob": -0.7290039, + "special": true, + "text": "" + } + ] + }, + "generated_text": "Because it is blue" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "eos_token", + "generated_tokens": 6, + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], + "seed": null, + "tokens": [ + { + "id": 259, + "logprob": -1.3789062, + "special": false, + "text": "" + }, + { + "id": 39261, + "logprob": -0.36279297, + "special": false, + "text": "Because" + }, + { + "id": 609, + "logprob": -1.0966797, + "special": false, + "text": " it" + }, + { + "id": 339, + "logprob": -0.8276367, + "special": false, + "text": " is" + }, + { + "id": 16017, + "logprob": -1.6845703, + "special": false, + "text": " blue" + }, + { + "id": 1, + "logprob": -0.72753906, + "special": true, + "text": "" + } + ] + }, + "generated_text": "Because it is blue" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "eos_token", + "generated_tokens": 6, + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], + "seed": null, + "tokens": [ + { + "id": 259, + "logprob": -1.3789062, + "special": false, + "text": "" + }, + { + "id": 39261, + "logprob": -0.36279297, + "special": false, + "text": "Because" + }, + { + "id": 609, + "logprob": -1.0966797, + "special": false, + "text": " it" + }, + { + "id": 339, + "logprob": -0.8276367, + "special": false, + "text": " is" + }, + { + "id": 16017, + "logprob": -1.6845703, + "special": false, + "text": " blue" + }, + { + "id": 1, + "logprob": -0.72753906, + "special": true, + "text": "" + } + ] + }, + "generated_text": "Because it is blue" + } +] diff --git a/integration-tests/models/test_bloom_560m.py b/integration-tests/models/test_bloom_560m.py index e13606f78..3c598c045 100644 --- a/integration-tests/models/test_bloom_560m.py +++ b/integration-tests/models/test_bloom_560m.py @@ -1,18 +1,20 @@ import pytest -from utils import health_check - @pytest.fixture(scope="module") -def bloom_560(launcher): - with launcher("bigscience/bloom-560m") as client: - yield client +def bloom_560_handle(launcher): + with launcher("bigscience/bloom-560m") as handle: + yield handle + + +@pytest.fixture(scope="module") +async def bloom_560(bloom_560_handle): + await bloom_560_handle.health(60) + return bloom_560_handle.client @pytest.mark.asyncio -async def test_bloom_560m(bloom_560, snapshot_test): - await health_check(bloom_560, 60) - +async def test_bloom_560m(bloom_560, response_snapshot): response = await bloom_560.generate( "Pour déguster un ortolan, il faut tout d'abord", max_new_tokens=10, @@ -21,13 +23,11 @@ async def test_bloom_560m(bloom_560, snapshot_test): ) assert response.details.generated_tokens == 10 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio -async def test_bloom_560m_all_params(bloom_560, snapshot_test): - await health_check(bloom_560, 60) - +async def test_bloom_560m_all_params(bloom_560, response_snapshot): response = await bloom_560.generate( "Pour déguster un ortolan, il faut tout d'abord", max_new_tokens=10, @@ -44,13 +44,11 @@ async def test_bloom_560m_all_params(bloom_560, snapshot_test): ) assert response.details.generated_tokens == 10 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio -async def test_bloom_560m_load(bloom_560, generate_load, snapshot_test): - await health_check(bloom_560, 60) - +async def test_bloom_560m_load(bloom_560, generate_load, response_snapshot): responses = await generate_load( bloom_560, "Pour déguster un ortolan, il faut tout d'abord", @@ -59,5 +57,6 @@ async def test_bloom_560m_load(bloom_560, generate_load, snapshot_test): ) assert len(responses) == 4 + assert all([r.generated_text == responses[0].generated_text for r in responses]) - assert snapshot_test(responses) + assert responses == response_snapshot diff --git a/integration-tests/models/test_bloom_560m_sharded.py b/integration-tests/models/test_bloom_560m_sharded.py index bfb702536..25f6b2d7e 100644 --- a/integration-tests/models/test_bloom_560m_sharded.py +++ b/integration-tests/models/test_bloom_560m_sharded.py @@ -1,18 +1,20 @@ import pytest -from utils import health_check - @pytest.fixture(scope="module") -def bloom_560m_sharded(launcher): - with launcher("bigscience/bloom-560m", num_shard=2) as client: - yield client +def bloom_560m_sharded_handle(launcher): + with launcher("bigscience/bloom-560m", num_shard=2) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def bloom_560m_sharded(bloom_560m_sharded_handle): + await bloom_560m_sharded_handle.health(60) + return bloom_560m_sharded_handle.client @pytest.mark.asyncio -async def test_bloom_560m_sharded(bloom_560m_sharded, snapshot_test): - await health_check(bloom_560m_sharded, 60) - +async def test_bloom_560m_sharded(bloom_560m_sharded, response_snapshot): response = await bloom_560m_sharded.generate( "Pour déguster un ortolan, il faut tout d'abord", max_new_tokens=10, @@ -21,15 +23,13 @@ async def test_bloom_560m_sharded(bloom_560m_sharded, snapshot_test): ) assert response.details.generated_tokens == 10 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio async def test_bloom_560m_sharded_load( - bloom_560m_sharded, generate_load, snapshot_test + bloom_560m_sharded, generate_load, response_snapshot ): - await health_check(bloom_560m_sharded, 60) - responses = await generate_load( bloom_560m_sharded, "Pour déguster un ortolan, il faut tout d'abord", @@ -38,5 +38,6 @@ async def test_bloom_560m_sharded_load( ) assert len(responses) == 4 + assert all([r.generated_text == responses[0].generated_text for r in responses]) - assert snapshot_test(responses) + assert responses == response_snapshot diff --git a/integration-tests/models/test_flash_llama.py b/integration-tests/models/test_flash_llama.py index 4d1f2bcfd..374684551 100644 --- a/integration-tests/models/test_flash_llama.py +++ b/integration-tests/models/test_flash_llama.py @@ -1,30 +1,30 @@ import pytest -from utils import health_check - @pytest.fixture(scope="module") -def flash_llama(launcher): - with launcher("huggingface/llama-7b", num_shard=2) as client: - yield client +def flash_llama_handle(launcher): + with launcher("huggingface/llama-7b", num_shard=2) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def flash_llama(flash_llama_handle): + await flash_llama_handle.health(120) + return flash_llama_handle.client @pytest.mark.asyncio @pytest.mark.private -async def test_flash_llama(flash_llama, snapshot_test): - await health_check(flash_llama, 120) - +async def test_flash_llama(flash_llama, response_snapshot): response = await flash_llama.generate("Test request", max_new_tokens=10) assert response.details.generated_tokens == 10 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio @pytest.mark.private -async def test_flash_llama_all_params(flash_llama, snapshot_test): - await health_check(flash_llama, 120) - +async def test_flash_llama_all_params(flash_llama, response_snapshot): response = await flash_llama.generate( "Test request", max_new_tokens=10, @@ -41,16 +41,15 @@ async def test_flash_llama_all_params(flash_llama, snapshot_test): ) assert response.details.generated_tokens == 10 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio @pytest.mark.private -async def test_flash_llama_load(flash_llama, generate_load, snapshot_test): - await health_check(flash_llama, 120) - +async def test_flash_llama_load(flash_llama, generate_load, response_snapshot): responses = await generate_load(flash_llama, "Test request", max_new_tokens=10, n=4) assert len(responses) == 4 + assert all([r.generated_text == responses[0].generated_text for r in responses]) - assert snapshot_test(responses) + assert responses == response_snapshot diff --git a/integration-tests/models/test_flash_neox.py b/integration-tests/models/test_flash_neox.py index 8c9810287..56cbf270a 100644 --- a/integration-tests/models/test_flash_neox.py +++ b/integration-tests/models/test_flash_neox.py @@ -1,31 +1,31 @@ import pytest -from utils import health_check - @pytest.fixture(scope="module") -def flash_neox(launcher): - with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as client: - yield client +def flash_neox_handle(launcher): + with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def flash_neox(flash_neox_handle): + await flash_neox_handle.health(240) + return flash_neox_handle.client @pytest.mark.asyncio -async def test_flash_neox(flash_neox, snapshot_test): - await health_check(flash_neox, 240) - +async def test_flash_neox(flash_neox, response_snapshot): response = await flash_neox.generate( "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>", max_new_tokens=10, ) assert response.details.generated_tokens == 10 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio -async def test_flash_neox_load(flash_neox, generate_load, snapshot_test): - await health_check(flash_neox, 240) - +async def test_flash_neox_load(flash_neox, generate_load, response_snapshot): responses = await generate_load( flash_neox, "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>", @@ -34,5 +34,6 @@ async def test_flash_neox_load(flash_neox, generate_load, snapshot_test): ) assert len(responses) == 4 + assert all([r.generated_text == responses[0].generated_text for r in responses]) - assert snapshot_test(responses) + assert responses == response_snapshot diff --git a/integration-tests/models/test_flash_santacoder.py b/integration-tests/models/test_flash_santacoder.py index 64a59d781..b0cb45227 100644 --- a/integration-tests/models/test_flash_santacoder.py +++ b/integration-tests/models/test_flash_santacoder.py @@ -1,32 +1,35 @@ import pytest -from utils import health_check - @pytest.fixture(scope="module") -def flash_santacoder(launcher): - with launcher("bigcode/santacoder") as client: - yield client +def flash_santacoder_handle(launcher): + with launcher("bigcode/santacoder") as handle: + yield handle + + +@pytest.fixture(scope="module") +async def flash_santacoder(flash_santacoder_handle): + await flash_santacoder_handle.health(240) + return flash_santacoder_handle.client @pytest.mark.asyncio -async def test_flash_santacoder(flash_santacoder, snapshot_test): - await health_check(flash_santacoder, 60) - +async def test_flash_santacoder(flash_santacoder, response_snapshot): response = await flash_santacoder.generate("def print_hello", max_new_tokens=10) assert response.details.generated_tokens == 10 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio -async def test_flash_santacoder_load(flash_santacoder, generate_load, snapshot_test): - await health_check(flash_santacoder, 60) - +async def test_flash_santacoder_load( + flash_santacoder, generate_load, response_snapshot +): responses = await generate_load( flash_santacoder, "def print_hello", max_new_tokens=10, n=4 ) assert len(responses) == 4 + assert all([r.generated_text == responses[0].generated_text for r in responses]) - assert snapshot_test(responses) + assert responses == response_snapshot diff --git a/integration-tests/models/test_flash_starcoder.py b/integration-tests/models/test_flash_starcoder.py index d43e92dc8..4c7393a71 100644 --- a/integration-tests/models/test_flash_starcoder.py +++ b/integration-tests/models/test_flash_starcoder.py @@ -1,47 +1,46 @@ import pytest -from utils import health_check - @pytest.fixture(scope="module") -def flash_starcoder(launcher): - with launcher("bigcode/starcoder", num_shard=2) as client: - yield client +def flash_starcoder_handle(launcher): + with launcher("bigcode/starcoder", num_shard=2) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def flash_starcoder(flash_starcoder_handle): + await flash_starcoder_handle.health(240) + return flash_starcoder_handle.client @pytest.mark.asyncio @pytest.mark.private -async def test_flash_starcoder(flash_starcoder, snapshot_test): - await health_check(flash_starcoder, 240) - +async def test_flash_starcoder(flash_starcoder, response_snapshot): response = await flash_starcoder.generate("def print_hello", max_new_tokens=10) assert response.details.generated_tokens == 10 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio @pytest.mark.private -async def test_flash_starcoder_default_params(flash_starcoder, snapshot_test): - await health_check(flash_starcoder, 240) - +async def test_flash_starcoder_default_params(flash_starcoder, response_snapshot): response = await flash_starcoder.generate( "def print_hello", max_new_tokens=60, temperature=0.2, top_p=0.95, seed=0 ) assert response.details.generated_tokens == 12 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio @pytest.mark.private -async def test_flash_starcoder_load(flash_starcoder, generate_load, snapshot_test): - await health_check(flash_starcoder, 240) - +async def test_flash_starcoder_load(flash_starcoder, generate_load, response_snapshot): responses = await generate_load( flash_starcoder, "def print_hello", max_new_tokens=10, n=4 ) assert len(responses) == 4 + assert all([r.generated_text == responses[0].generated_text for r in responses]) - assert snapshot_test(responses) + assert responses == response_snapshot diff --git a/integration-tests/models/test_mt0_base.py b/integration-tests/models/test_mt0_base.py index 7310a30f1..15410f73e 100644 --- a/integration-tests/models/test_mt0_base.py +++ b/integration-tests/models/test_mt0_base.py @@ -1,18 +1,20 @@ import pytest -from utils import health_check - @pytest.fixture(scope="module") -def mt0_base(launcher): - with launcher("bigscience/mt0-base") as client: - yield client +def mt0_base_handle(launcher): + with launcher("bigscience/mt0-base") as handle: + yield handle + + +@pytest.fixture(scope="module") +async def mt0_base(mt0_base_handle): + await mt0_base_handle.health(60) + return mt0_base_handle.client @pytest.mark.asyncio -async def test_mt0_base(mt0_base, snapshot_test): - await health_check(mt0_base, 60) - +async def test_mt0_base(mt0_base, response_snapshot): response = await mt0_base.generate( "Why is the sky blue?", max_new_tokens=10, @@ -21,13 +23,11 @@ async def test_mt0_base(mt0_base, snapshot_test): ) assert response.details.generated_tokens == 5 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio -async def test_mt0_base_all_params(mt0_base, snapshot_test): - await health_check(mt0_base, 60) - +async def test_mt0_base_all_params(mt0_base, response_snapshot): response = await mt0_base.generate( "Why is the sky blue?", max_new_tokens=10, @@ -44,13 +44,11 @@ async def test_mt0_base_all_params(mt0_base, snapshot_test): ) assert response.details.generated_tokens == 10 - assert snapshot_test(response) + assert response == response_snapshot @pytest.mark.asyncio -async def test_mt0_base_load(mt0_base, generate_load, snapshot_test): - await health_check(mt0_base, 60) - +async def test_mt0_base_load(mt0_base, generate_load, response_snapshot): responses = await generate_load( mt0_base, "Why is the sky blue?", @@ -59,5 +57,6 @@ async def test_mt0_base_load(mt0_base, generate_load, snapshot_test): ) assert len(responses) == 4 + assert all([r.generated_text == responses[0].generated_text for r in responses]) - assert snapshot_test(responses) + assert responses == response_snapshot diff --git a/integration-tests/models/utils.py b/integration-tests/models/utils.py deleted file mode 100644 index c47e48712..000000000 --- a/integration-tests/models/utils.py +++ /dev/null @@ -1,15 +0,0 @@ -import time - -from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError -from text_generation import AsyncClient - - -async def health_check(client: AsyncClient, timeout: int = 60): - assert timeout > 0 - for _ in range(timeout): - try: - await client.generate("test") - return - except (ClientConnectorError, ClientOSError, ServerDisconnectedError) as e: - time.sleep(1) - raise RuntimeError("Health check failed") diff --git a/integration-tests/requirements.txt b/integration-tests/requirements.txt index 9ecbb2ee1..051730ffe 100644 --- a/integration-tests/requirements.txt +++ b/integration-tests/requirements.txt @@ -1,5 +1,5 @@ syrupy -text-generation==0.5.1 +text-generation==0.5.2 pytest pytest-asyncio==0.17.2 docker \ No newline at end of file