diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index c2aba160..79b3c777 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -10,6 +10,7 @@ on:
   pull_request:
     paths:
       - ".github/workflows/build.yaml"
+      - "integration-tests/**"
       - "server/**"
       - "proto/**"
       - "router/**"
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
index 00bb99fc..06d5f9cb 100644
--- a/clients/python/pyproject.toml
+++ b/clients/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "text-generation"
-version = "0.5.1"
+version = "0.5.2"
 description = "Hugging Face Text Generation Python Client"
 license = "Apache-2.0"
 authors = ["Olivier Dehaene <olivier@huggingface.co>"]
diff --git a/clients/python/tests/test_client.py b/clients/python/tests/test_client.py
index 8972dfd1..32462f14 100644
--- a/clients/python/tests/test_client.py
+++ b/clients/python/tests/test_client.py
@@ -16,9 +16,9 @@ def test_generate(flan_t5_xxl_url, hf_headers):
     assert len(response.details.prefill) == 1
     assert response.details.prefill[0] == PrefillToken(id=0, text="<pad>", logprob=None)
     assert len(response.details.tokens) == 1
-    assert response.details.tokens[0] == Token(
-        id=3, text="", logprob=-1.984375, special=False
-    )
+    assert response.details.tokens[0].id == 3
+    assert response.details.tokens[0].text == ""
+    assert not response.details.tokens[0].special
 
 
 def test_generate_best_of(flan_t5_xxl_url, hf_headers):
@@ -82,9 +82,9 @@ async def test_generate_async(flan_t5_xxl_url, hf_headers):
     assert len(response.details.prefill) == 1
     assert response.details.prefill[0] == PrefillToken(id=0, text="<pad>", logprob=None)
     assert len(response.details.tokens) == 1
-    assert response.details.tokens[0] == Token(
-        id=3, text="", logprob=-1.984375, special=False
-    )
+    assert response.details.tokens[0].id == 3
+    assert response.details.tokens[0].text == ""
+    assert not response.details.tokens[0].special
 
 
 @pytest.mark.asyncio
diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py
index f3f9dcb5..ad3cd09b 100644
--- a/clients/python/text_generation/types.py
+++ b/clients/python/text_generation/types.py
@@ -154,7 +154,7 @@ class Token(BaseModel):
 
 
 # Generation finish reason
-class FinishReason(Enum):
+class FinishReason(str, Enum):
     # number of generated tokens == `max_new_tokens`
     Length = "length"
     # the model generated its end of sequence token
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index e9c51c37..ba1abca9 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -4,22 +4,192 @@ import pytest
 import asyncio
 import os
 import docker
+import json
+import math
+import time
 
 from docker.errors import NotFound
-from typing import Optional, List
-from syrupy.filters import props
+from typing import Optional, List, Dict
+from syrupy.extensions.json import JSONSnapshotExtension
+from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
 
 from text_generation import AsyncClient
-from text_generation.types import Response
+from text_generation.types import Response, Details, PrefillToken, Token, BestOfSequence
 
 DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", None)
 HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN", None)
 DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data")
 
 
+class ResponseComparator(JSONSnapshotExtension):
+    def serialize(
+        self,
+        data,
+        *,
+        exclude=None,
+        matcher=None,
+    ):
+        if isinstance(data, List):
+            data = [d.dict() for d in data]
+
+        data = self._filter(
+            data=data, depth=0, path=(), exclude=exclude, matcher=matcher
+        )
+        return json.dumps(data, indent=2, ensure_ascii=False, sort_keys=False) + "\n"
+
+    def matches(
+        self,
+        *,
+        serialized_data,
+        snapshot_data,
+    ) -> bool:
+        def convert_data(data):
+            data = json.loads(data)
+
+            if isinstance(data, Dict):
+                return Response(**data)
+            if isinstance(data, List):
+                return [Response(**d) for d in data]
+            raise NotImplementedError
+
+        def eq_token(token: Token, other: Token) -> bool:
+            return (
+                token.id == other.id
+                and token.text == other.text
+                and math.isclose(token.logprob, other.logprob, rel_tol=0.2)
+                and token.special == other.special
+            )
+
+        def eq_prefill_token(prefill_token: PrefillToken, other: PrefillToken) -> bool:
+            try:
+                return (
+                    prefill_token.id == other.id
+                    and prefill_token.text == other.text
+                    and (
+                        math.isclose(prefill_token.logprob, other.logprob, rel_tol=0.2)
+                        if prefill_token.logprob is not None
+                        else prefill_token.logprob == other.logprob
+                    )
+                )
+            except TypeError:
+                return False
+
+        def eq_best_of(details: BestOfSequence, other: BestOfSequence) -> bool:
+            return (
+                details.finish_reason == other.finish_reason
+                and details.generated_tokens == other.generated_tokens
+                and details.seed == other.seed
+                and len(details.prefill) == len(other.prefill)
+                and all(
+                    [
+                        eq_prefill_token(d, o)
+                        for d, o in zip(details.prefill, other.prefill)
+                    ]
+                )
+                and len(details.tokens) == len(other.tokens)
+                and all([eq_token(d, o) for d, o in zip(details.tokens, other.tokens)])
+            )
+
+        def eq_details(details: Details, other: Details) -> bool:
+            return (
+                details.finish_reason == other.finish_reason
+                and details.generated_tokens == other.generated_tokens
+                and details.seed == other.seed
+                and len(details.prefill) == len(other.prefill)
+                and all(
+                    [
+                        eq_prefill_token(d, o)
+                        for d, o in zip(details.prefill, other.prefill)
+                    ]
+                )
+                and len(details.tokens) == len(other.tokens)
+                and all([eq_token(d, o) for d, o in zip(details.tokens, other.tokens)])
+                and (
+                    len(details.best_of_sequences)
+                    if details.best_of_sequences is not None
+                    else 0
+                )
+                == (
+                    len(other.best_of_sequences)
+                    if other.best_of_sequences is not None
+                    else 0
+                )
+                and (
+                    all(
+                        [
+                            eq_best_of(d, o)
+                            for d, o in zip(
+                                details.best_of_sequences, other.best_of_sequences
+                            )
+                        ]
+                    )
+                    if details.best_of_sequences is not None
+                    else details.best_of_sequences == other.best_of_sequences
+                )
+            )
+
+        def eq_response(response: Response, other: Response) -> bool:
+            return response.generated_text == other.generated_text and eq_details(
+                response.details, other.details
+            )
+
+        serialized_data = convert_data(serialized_data)
+        snapshot_data = convert_data(snapshot_data)
+
+        if not isinstance(serialized_data, List):
+            serialized_data = [serialized_data]
+        if not isinstance(snapshot_data, List):
+            snapshot_data = [snapshot_data]
+
+        return len(snapshot_data) == len(serialized_data) and all(
+            [eq_response(r, o) for r, o in zip(serialized_data, snapshot_data)]
+        )
+
+
+class LauncherHandle:
+    def __init__(self, port: int):
+        self.client = AsyncClient(f"http://localhost:{port}")
+
+    def _inner_health(self):
+        raise NotImplementedError
+
+    async def health(self, timeout: int = 60):
+        assert timeout > 0
+        for _ in range(timeout):
+            if not self._inner_health():
+                raise RuntimeError("Launcher crashed")
+
+            try:
+                await self.client.generate("test")
+                return
+            except (ClientConnectorError, ClientOSError, ServerDisconnectedError) as e:
+                time.sleep(1)
+        raise RuntimeError("Health check failed")
+
+
+class ContainerLauncherHandle(LauncherHandle):
+    def __init__(self, docker_client, container_name, port: int):
+        super(ContainerLauncherHandle, self).__init__(port)
+        self.docker_client = docker_client
+        self.container_name = container_name
+
+    def _inner_health(self) -> bool:
+        container = self.docker_client.containers.get(self.container_name)
+        return container.status in ["running", "created"]
+
+
+class ProcessLauncherHandle(LauncherHandle):
+    def __init__(self, process, port: int):
+        super(ProcessLauncherHandle, self).__init__(port)
+        self.process = process
+
+    def _inner_health(self) -> bool:
+        return self.process.poll() is None
+
+
 @pytest.fixture
-def snapshot_test(snapshot):
-    return lambda value: value == snapshot(exclude=props("logprob"))
+def response_snapshot(snapshot):
+    return snapshot.use_extension(ResponseComparator)
 
 
 @pytest.fixture(scope="module")
@@ -60,7 +230,7 @@ def launcher(event_loop):
         with subprocess.Popen(
             args, stdout=subprocess.PIPE, stderr=subprocess.PIPE
         ) as process:
-            yield AsyncClient(f"http://localhost:{port}")
+            yield ProcessLauncherHandle(process, port)
 
             process.terminate()
             process.wait(60)
@@ -110,7 +280,7 @@ def launcher(event_loop):
             command=args,
             name=container_name,
             environment=env,
-            auto_remove=True,
+            auto_remove=False,
             detach=True,
             device_requests=[
                 docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]])
@@ -119,13 +289,19 @@ def launcher(event_loop):
             ports={"80/tcp": port},
         )
 
-        yield AsyncClient(f"http://localhost:{port}")
+        yield ContainerLauncherHandle(client, container.name, port)
 
-        container.stop()
+        try:
+            container.stop()
+            container.wait()
+        except NotFound:
+            pass
 
         container_output = container.logs().decode("utf-8")
         print(container_output)
 
+        container.remove()
+
     if DOCKER_IMAGE is not None:
         return docker_launcher
     return local_launcher
@@ -140,7 +316,6 @@ def generate_load():
             client.generate(prompt, max_new_tokens=max_new_tokens) for _ in range(n)
         ]
 
-        results = await asyncio.gather(*futures)
-        return [r.dict() for r in results]
+        return await asyncio.gather(*futures)
 
     return generate_load_inner
diff --git a/integration-tests/models/__snapshots__/test_bloom_560m.ambr b/integration-tests/models/__snapshots__/test_bloom_560m.ambr
deleted file mode 100644
index 1067513d..00000000
--- a/integration-tests/models/__snapshots__/test_bloom_560m.ambr
+++ /dev/null
@@ -1,627 +0,0 @@
-# serializer version: 1
-# name: test_bloom_560m
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 17934,
-          'text': 'Pour',
-        }),
-        dict({
-          'id': 49833,
-          'text': ' dég',
-        }),
-        dict({
-          'id': 21543,
-          'text': 'uster',
-        }),
-        dict({
-          'id': 447,
-          'text': ' un',
-        }),
-        dict({
-          'id': 46341,
-          'text': ' ort',
-        }),
-        dict({
-          'id': 35567,
-          'text': 'olan',
-        }),
-        dict({
-          'id': 15,
-          'text': ',',
-        }),
-        dict({
-          'id': 1669,
-          'text': ' il',
-        }),
-        dict({
-          'id': 11580,
-          'text': ' faut',
-        }),
-        dict({
-          'id': 3913,
-          'text': ' tout',
-        }),
-        dict({
-          'id': 39261,
-          'text': " d'abord",
-        }),
-      ]),
-      'seed': 0,
-      'tokens': list([
-        dict({
-          'id': 578,
-          'special': False,
-          'text': ' le',
-        }),
-        dict({
-          'id': 5608,
-          'special': False,
-          'text': ' faire',
-        }),
-        dict({
-          'id': 159570,
-          'special': False,
-          'text': ' réch',
-        }),
-        dict({
-          'id': 810,
-          'special': False,
-          'text': 'au',
-        }),
-        dict({
-          'id': 12736,
-          'special': False,
-          'text': 'ffer',
-        }),
-        dict({
-          'id': 1742,
-          'special': False,
-          'text': ' au',
-        }),
-        dict({
-          'id': 6105,
-          'special': False,
-          'text': ' bain',
-        }),
-        dict({
-          'id': 88254,
-          'special': False,
-          'text': '-mar',
-        }),
-        dict({
-          'id': 641,
-          'special': False,
-          'text': 'ie',
-        }),
-        dict({
-          'id': 2940,
-          'special': False,
-          'text': ' avec',
-        }),
-      ]),
-    }),
-    'generated_text': ' le faire réchauffer au bain-marie avec',
-  })
-# ---
-# name: test_bloom_560m_all_params
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 15,
-          'text': ',',
-        }),
-        dict({
-          'id': 1669,
-          'text': ' il',
-        }),
-        dict({
-          'id': 11580,
-          'text': ' faut',
-        }),
-        dict({
-          'id': 3913,
-          'text': ' tout',
-        }),
-        dict({
-          'id': 39261,
-          'text': " d'abord",
-        }),
-      ]),
-      'seed': 0,
-      'tokens': list([
-        dict({
-          'id': 408,
-          'special': False,
-          'text': ' que',
-        }),
-        dict({
-          'id': 20288,
-          'special': False,
-          'text': " l'on",
-        }),
-        dict({
-          'id': 22255,
-          'special': False,
-          'text': ' trouve',
-        }),
-        dict({
-          'id': 1622,
-          'special': False,
-          'text': ' une',
-        }),
-        dict({
-          'id': 187079,
-          'special': False,
-          'text': ' posture',
-        }),
-        dict({
-          'id': 501,
-          'special': False,
-          'text': ' par',
-        }),
-        dict({
-          'id': 8741,
-          'special': False,
-          'text': ' rapport',
-        }),
-        dict({
-          'id': 693,
-          'special': False,
-          'text': ' à',
-        }),
-        dict({
-          'id': 366,
-          'special': False,
-          'text': ' la',
-        }),
-        dict({
-          'id': 36503,
-          'special': False,
-          'text': ' pratique',
-        }),
-      ]),
-    }),
-    'generated_text': "Pour déguster un ortolan, il faut tout d'abord que l'on trouve une posture par rapport à la pratique",
-  })
-# ---
-# name: test_bloom_560m_load
-  list([
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 17934,
-            'text': 'Pour',
-          }),
-          dict({
-            'id': 49833,
-            'text': ' dég',
-          }),
-          dict({
-            'id': 21543,
-            'text': 'uster',
-          }),
-          dict({
-            'id': 447,
-            'text': ' un',
-          }),
-          dict({
-            'id': 46341,
-            'text': ' ort',
-          }),
-          dict({
-            'id': 35567,
-            'text': 'olan',
-          }),
-          dict({
-            'id': 15,
-            'text': ',',
-          }),
-          dict({
-            'id': 1669,
-            'text': ' il',
-          }),
-          dict({
-            'id': 11580,
-            'text': ' faut',
-          }),
-          dict({
-            'id': 3913,
-            'text': ' tout',
-          }),
-          dict({
-            'id': 39261,
-            'text': " d'abord",
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 578,
-            'special': False,
-            'text': ' le',
-          }),
-          dict({
-            'id': 5608,
-            'special': False,
-            'text': ' faire',
-          }),
-          dict({
-            'id': 1767,
-            'special': False,
-            'text': ' cu',
-          }),
-          dict({
-            'id': 1273,
-            'special': False,
-            'text': 'ire',
-          }),
-          dict({
-            'id': 1486,
-            'special': False,
-            'text': ' dans',
-          }),
-          dict({
-            'id': 283,
-            'special': False,
-            'text': ' de',
-          }),
-          dict({
-            'id': 40410,
-            'special': False,
-            'text': " l'eau",
-          }),
-          dict({
-            'id': 20226,
-            'special': False,
-            'text': ' bou',
-          }),
-          dict({
-            'id': 172483,
-            'special': False,
-            'text': 'illante',
-          }),
-          dict({
-            'id': 2805,
-            'special': False,
-            'text': ' sal',
-          }),
-        ]),
-      }),
-      'generated_text': " le faire cuire dans de l'eau bouillante sal",
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 17934,
-            'text': 'Pour',
-          }),
-          dict({
-            'id': 49833,
-            'text': ' dég',
-          }),
-          dict({
-            'id': 21543,
-            'text': 'uster',
-          }),
-          dict({
-            'id': 447,
-            'text': ' un',
-          }),
-          dict({
-            'id': 46341,
-            'text': ' ort',
-          }),
-          dict({
-            'id': 35567,
-            'text': 'olan',
-          }),
-          dict({
-            'id': 15,
-            'text': ',',
-          }),
-          dict({
-            'id': 1669,
-            'text': ' il',
-          }),
-          dict({
-            'id': 11580,
-            'text': ' faut',
-          }),
-          dict({
-            'id': 3913,
-            'text': ' tout',
-          }),
-          dict({
-            'id': 39261,
-            'text': " d'abord",
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 578,
-            'special': False,
-            'text': ' le',
-          }),
-          dict({
-            'id': 5608,
-            'special': False,
-            'text': ' faire',
-          }),
-          dict({
-            'id': 1767,
-            'special': False,
-            'text': ' cu',
-          }),
-          dict({
-            'id': 1273,
-            'special': False,
-            'text': 'ire',
-          }),
-          dict({
-            'id': 1486,
-            'special': False,
-            'text': ' dans',
-          }),
-          dict({
-            'id': 283,
-            'special': False,
-            'text': ' de',
-          }),
-          dict({
-            'id': 40410,
-            'special': False,
-            'text': " l'eau",
-          }),
-          dict({
-            'id': 20226,
-            'special': False,
-            'text': ' bou',
-          }),
-          dict({
-            'id': 172483,
-            'special': False,
-            'text': 'illante',
-          }),
-          dict({
-            'id': 2805,
-            'special': False,
-            'text': ' sal',
-          }),
-        ]),
-      }),
-      'generated_text': " le faire cuire dans de l'eau bouillante sal",
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 17934,
-            'text': 'Pour',
-          }),
-          dict({
-            'id': 49833,
-            'text': ' dég',
-          }),
-          dict({
-            'id': 21543,
-            'text': 'uster',
-          }),
-          dict({
-            'id': 447,
-            'text': ' un',
-          }),
-          dict({
-            'id': 46341,
-            'text': ' ort',
-          }),
-          dict({
-            'id': 35567,
-            'text': 'olan',
-          }),
-          dict({
-            'id': 15,
-            'text': ',',
-          }),
-          dict({
-            'id': 1669,
-            'text': ' il',
-          }),
-          dict({
-            'id': 11580,
-            'text': ' faut',
-          }),
-          dict({
-            'id': 3913,
-            'text': ' tout',
-          }),
-          dict({
-            'id': 39261,
-            'text': " d'abord",
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 578,
-            'special': False,
-            'text': ' le',
-          }),
-          dict({
-            'id': 5608,
-            'special': False,
-            'text': ' faire',
-          }),
-          dict({
-            'id': 1767,
-            'special': False,
-            'text': ' cu',
-          }),
-          dict({
-            'id': 1273,
-            'special': False,
-            'text': 'ire',
-          }),
-          dict({
-            'id': 1486,
-            'special': False,
-            'text': ' dans',
-          }),
-          dict({
-            'id': 283,
-            'special': False,
-            'text': ' de',
-          }),
-          dict({
-            'id': 40410,
-            'special': False,
-            'text': " l'eau",
-          }),
-          dict({
-            'id': 20226,
-            'special': False,
-            'text': ' bou',
-          }),
-          dict({
-            'id': 172483,
-            'special': False,
-            'text': 'illante',
-          }),
-          dict({
-            'id': 2805,
-            'special': False,
-            'text': ' sal',
-          }),
-        ]),
-      }),
-      'generated_text': " le faire cuire dans de l'eau bouillante sal",
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 17934,
-            'text': 'Pour',
-          }),
-          dict({
-            'id': 49833,
-            'text': ' dég',
-          }),
-          dict({
-            'id': 21543,
-            'text': 'uster',
-          }),
-          dict({
-            'id': 447,
-            'text': ' un',
-          }),
-          dict({
-            'id': 46341,
-            'text': ' ort',
-          }),
-          dict({
-            'id': 35567,
-            'text': 'olan',
-          }),
-          dict({
-            'id': 15,
-            'text': ',',
-          }),
-          dict({
-            'id': 1669,
-            'text': ' il',
-          }),
-          dict({
-            'id': 11580,
-            'text': ' faut',
-          }),
-          dict({
-            'id': 3913,
-            'text': ' tout',
-          }),
-          dict({
-            'id': 39261,
-            'text': " d'abord",
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 578,
-            'special': False,
-            'text': ' le',
-          }),
-          dict({
-            'id': 5608,
-            'special': False,
-            'text': ' faire',
-          }),
-          dict({
-            'id': 1767,
-            'special': False,
-            'text': ' cu',
-          }),
-          dict({
-            'id': 1273,
-            'special': False,
-            'text': 'ire',
-          }),
-          dict({
-            'id': 1486,
-            'special': False,
-            'text': ' dans',
-          }),
-          dict({
-            'id': 283,
-            'special': False,
-            'text': ' de',
-          }),
-          dict({
-            'id': 40410,
-            'special': False,
-            'text': " l'eau",
-          }),
-          dict({
-            'id': 20226,
-            'special': False,
-            'text': ' bou',
-          }),
-          dict({
-            'id': 172483,
-            'special': False,
-            'text': 'illante',
-          }),
-          dict({
-            'id': 2805,
-            'special': False,
-            'text': ' sal',
-          }),
-        ]),
-      }),
-      'generated_text': " le faire cuire dans de l'eau bouillante sal",
-    }),
-  ])
-# ---
diff --git a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json
new file mode 100644
index 00000000..53a4ab85
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json
@@ -0,0 +1,128 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 17934,
+        "logprob": null,
+        "text": "Pour"
+      },
+      {
+        "id": 49833,
+        "logprob": -10.5625,
+        "text": " dég"
+      },
+      {
+        "id": 21543,
+        "logprob": -0.14770508,
+        "text": "uster"
+      },
+      {
+        "id": 447,
+        "logprob": -1.9287109,
+        "text": " un"
+      },
+      {
+        "id": 46341,
+        "logprob": -15.4609375,
+        "text": " ort"
+      },
+      {
+        "id": 35567,
+        "logprob": -7.5585938,
+        "text": "olan"
+      },
+      {
+        "id": 15,
+        "logprob": -1.4003906,
+        "text": ","
+      },
+      {
+        "id": 1669,
+        "logprob": -1.5673828,
+        "text": " il"
+      },
+      {
+        "id": 11580,
+        "logprob": -0.94628906,
+        "text": " faut"
+      },
+      {
+        "id": 3913,
+        "logprob": -3.703125,
+        "text": " tout"
+      },
+      {
+        "id": 39261,
+        "logprob": -1.5732422,
+        "text": " d'abord"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 578,
+        "logprob": -1.6591797,
+        "special": false,
+        "text": " le"
+      },
+      {
+        "id": 5608,
+        "logprob": -2.4492188,
+        "special": false,
+        "text": " faire"
+      },
+      {
+        "id": 159570,
+        "logprob": -6.6835938,
+        "special": false,
+        "text": " réch"
+      },
+      {
+        "id": 810,
+        "logprob": 0.0,
+        "special": false,
+        "text": "au"
+      },
+      {
+        "id": 12736,
+        "logprob": 0.0,
+        "special": false,
+        "text": "ffer"
+      },
+      {
+        "id": 1742,
+        "logprob": -2.5175781,
+        "special": false,
+        "text": " au"
+      },
+      {
+        "id": 6105,
+        "logprob": -2.0078125,
+        "special": false,
+        "text": " bain"
+      },
+      {
+        "id": 88254,
+        "logprob": -0.12695312,
+        "special": false,
+        "text": "-mar"
+      },
+      {
+        "id": 641,
+        "logprob": 0.0,
+        "special": false,
+        "text": "ie"
+      },
+      {
+        "id": 2940,
+        "logprob": -3.5175781,
+        "special": false,
+        "text": " avec"
+      }
+    ]
+  },
+  "generated_text": " le faire réchauffer au bain-marie avec"
+}
diff --git a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json
new file mode 100644
index 00000000..93a95804
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json
@@ -0,0 +1,98 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 15,
+        "logprob": null,
+        "text": ","
+      },
+      {
+        "id": 1669,
+        "logprob": -5.4414062,
+        "text": " il"
+      },
+      {
+        "id": 11580,
+        "logprob": -2.3378906,
+        "text": " faut"
+      },
+      {
+        "id": 3913,
+        "logprob": -4.3554688,
+        "text": " tout"
+      },
+      {
+        "id": 39261,
+        "logprob": -2.9238281,
+        "text": " d'abord"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 408,
+        "logprob": -1.9267578,
+        "special": false,
+        "text": " que"
+      },
+      {
+        "id": 20288,
+        "logprob": -2.9257812,
+        "special": false,
+        "text": " l'on"
+      },
+      {
+        "id": 22255,
+        "logprob": -2.8964844,
+        "special": false,
+        "text": " trouve"
+      },
+      {
+        "id": 1622,
+        "logprob": -1.1083984,
+        "special": false,
+        "text": " une"
+      },
+      {
+        "id": 187079,
+        "logprob": -7.796875,
+        "special": false,
+        "text": " posture"
+      },
+      {
+        "id": 501,
+        "logprob": -5.390625,
+        "special": false,
+        "text": " par"
+      },
+      {
+        "id": 8741,
+        "logprob": -0.34936523,
+        "special": false,
+        "text": " rapport"
+      },
+      {
+        "id": 693,
+        "logprob": 0.0,
+        "special": false,
+        "text": " à"
+      },
+      {
+        "id": 366,
+        "logprob": -2.3378906,
+        "special": false,
+        "text": " la"
+      },
+      {
+        "id": 36503,
+        "logprob": -3.6640625,
+        "special": false,
+        "text": " pratique"
+      }
+    ]
+  },
+  "generated_text": "Pour déguster un ortolan, il faut tout d'abord que l'on trouve une posture par rapport à la pratique"
+}
diff --git a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json
new file mode 100644
index 00000000..0a86bef8
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json
@@ -0,0 +1,514 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 17934,
+          "logprob": null,
+          "text": "Pour"
+        },
+        {
+          "id": 49833,
+          "logprob": -10.5625,
+          "text": " dég"
+        },
+        {
+          "id": 21543,
+          "logprob": -0.14770508,
+          "text": "uster"
+        },
+        {
+          "id": 447,
+          "logprob": -1.9287109,
+          "text": " un"
+        },
+        {
+          "id": 46341,
+          "logprob": -15.4609375,
+          "text": " ort"
+        },
+        {
+          "id": 35567,
+          "logprob": -7.5585938,
+          "text": "olan"
+        },
+        {
+          "id": 15,
+          "logprob": -1.4003906,
+          "text": ","
+        },
+        {
+          "id": 1669,
+          "logprob": -1.5673828,
+          "text": " il"
+        },
+        {
+          "id": 11580,
+          "logprob": -0.94628906,
+          "text": " faut"
+        },
+        {
+          "id": 3913,
+          "logprob": -3.703125,
+          "text": " tout"
+        },
+        {
+          "id": 39261,
+          "logprob": -1.5732422,
+          "text": " d'abord"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 578,
+          "logprob": -1.7646484,
+          "special": false,
+          "text": " le"
+        },
+        {
+          "id": 5608,
+          "logprob": -2.6113281,
+          "special": false,
+          "text": " faire"
+        },
+        {
+          "id": 1767,
+          "logprob": -1.5263672,
+          "special": false,
+          "text": " cu"
+        },
+        {
+          "id": 1273,
+          "logprob": -0.00010049343,
+          "special": false,
+          "text": "ire"
+        },
+        {
+          "id": 1486,
+          "logprob": -1.4707031,
+          "special": false,
+          "text": " dans"
+        },
+        {
+          "id": 283,
+          "logprob": -1.2119141,
+          "special": false,
+          "text": " de"
+        },
+        {
+          "id": 40410,
+          "logprob": -0.11883545,
+          "special": false,
+          "text": " l'eau"
+        },
+        {
+          "id": 20226,
+          "logprob": -0.40844727,
+          "special": false,
+          "text": " bou"
+        },
+        {
+          "id": 172483,
+          "logprob": -0.0037841797,
+          "special": false,
+          "text": "illante"
+        },
+        {
+          "id": 2805,
+          "logprob": -1.0195312,
+          "special": false,
+          "text": " sal"
+        }
+      ]
+    },
+    "generated_text": " le faire cuire dans de l'eau bouillante sal"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 17934,
+          "logprob": null,
+          "text": "Pour"
+        },
+        {
+          "id": 49833,
+          "logprob": -10.53125,
+          "text": " dég"
+        },
+        {
+          "id": 21543,
+          "logprob": -0.14770508,
+          "text": "uster"
+        },
+        {
+          "id": 447,
+          "logprob": -1.9287109,
+          "text": " un"
+        },
+        {
+          "id": 46341,
+          "logprob": -15.4140625,
+          "text": " ort"
+        },
+        {
+          "id": 35567,
+          "logprob": -7.5234375,
+          "text": "olan"
+        },
+        {
+          "id": 15,
+          "logprob": -1.3613281,
+          "text": ","
+        },
+        {
+          "id": 1669,
+          "logprob": -1.5458984,
+          "text": " il"
+        },
+        {
+          "id": 11580,
+          "logprob": -0.94189453,
+          "text": " faut"
+        },
+        {
+          "id": 3913,
+          "logprob": -3.7011719,
+          "text": " tout"
+        },
+        {
+          "id": 39261,
+          "logprob": -1.5732422,
+          "text": " d'abord"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 578,
+          "logprob": -1.7548828,
+          "special": false,
+          "text": " le"
+        },
+        {
+          "id": 5608,
+          "logprob": -2.578125,
+          "special": false,
+          "text": " faire"
+        },
+        {
+          "id": 1767,
+          "logprob": -1.5117188,
+          "special": false,
+          "text": " cu"
+        },
+        {
+          "id": 1273,
+          "logprob": -0.00010049343,
+          "special": false,
+          "text": "ire"
+        },
+        {
+          "id": 1486,
+          "logprob": -1.4707031,
+          "special": false,
+          "text": " dans"
+        },
+        {
+          "id": 283,
+          "logprob": -1.1982422,
+          "special": false,
+          "text": " de"
+        },
+        {
+          "id": 40410,
+          "logprob": -0.11004639,
+          "special": false,
+          "text": " l'eau"
+        },
+        {
+          "id": 20226,
+          "logprob": -0.4506836,
+          "special": false,
+          "text": " bou"
+        },
+        {
+          "id": 172483,
+          "logprob": -0.003047943,
+          "special": false,
+          "text": "illante"
+        },
+        {
+          "id": 2805,
+          "logprob": -1.0185547,
+          "special": false,
+          "text": " sal"
+        }
+      ]
+    },
+    "generated_text": " le faire cuire dans de l'eau bouillante sal"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 17934,
+          "logprob": null,
+          "text": "Pour"
+        },
+        {
+          "id": 49833,
+          "logprob": -10.53125,
+          "text": " dég"
+        },
+        {
+          "id": 21543,
+          "logprob": -0.14770508,
+          "text": "uster"
+        },
+        {
+          "id": 447,
+          "logprob": -1.9287109,
+          "text": " un"
+        },
+        {
+          "id": 46341,
+          "logprob": -15.4140625,
+          "text": " ort"
+        },
+        {
+          "id": 35567,
+          "logprob": -7.5234375,
+          "text": "olan"
+        },
+        {
+          "id": 15,
+          "logprob": -1.3613281,
+          "text": ","
+        },
+        {
+          "id": 1669,
+          "logprob": -1.5458984,
+          "text": " il"
+        },
+        {
+          "id": 11580,
+          "logprob": -0.94189453,
+          "text": " faut"
+        },
+        {
+          "id": 3913,
+          "logprob": -3.7011719,
+          "text": " tout"
+        },
+        {
+          "id": 39261,
+          "logprob": -1.5732422,
+          "text": " d'abord"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 578,
+          "logprob": -1.7548828,
+          "special": false,
+          "text": " le"
+        },
+        {
+          "id": 5608,
+          "logprob": -2.578125,
+          "special": false,
+          "text": " faire"
+        },
+        {
+          "id": 1767,
+          "logprob": -1.5117188,
+          "special": false,
+          "text": " cu"
+        },
+        {
+          "id": 1273,
+          "logprob": -0.00010049343,
+          "special": false,
+          "text": "ire"
+        },
+        {
+          "id": 1486,
+          "logprob": -1.4707031,
+          "special": false,
+          "text": " dans"
+        },
+        {
+          "id": 283,
+          "logprob": -1.1982422,
+          "special": false,
+          "text": " de"
+        },
+        {
+          "id": 40410,
+          "logprob": -0.11004639,
+          "special": false,
+          "text": " l'eau"
+        },
+        {
+          "id": 20226,
+          "logprob": -0.4506836,
+          "special": false,
+          "text": " bou"
+        },
+        {
+          "id": 172483,
+          "logprob": -0.003047943,
+          "special": false,
+          "text": "illante"
+        },
+        {
+          "id": 2805,
+          "logprob": -1.0185547,
+          "special": false,
+          "text": " sal"
+        }
+      ]
+    },
+    "generated_text": " le faire cuire dans de l'eau bouillante sal"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 17934,
+          "logprob": null,
+          "text": "Pour"
+        },
+        {
+          "id": 49833,
+          "logprob": -10.53125,
+          "text": " dég"
+        },
+        {
+          "id": 21543,
+          "logprob": -0.14770508,
+          "text": "uster"
+        },
+        {
+          "id": 447,
+          "logprob": -1.9287109,
+          "text": " un"
+        },
+        {
+          "id": 46341,
+          "logprob": -15.4140625,
+          "text": " ort"
+        },
+        {
+          "id": 35567,
+          "logprob": -7.5234375,
+          "text": "olan"
+        },
+        {
+          "id": 15,
+          "logprob": -1.3613281,
+          "text": ","
+        },
+        {
+          "id": 1669,
+          "logprob": -1.5458984,
+          "text": " il"
+        },
+        {
+          "id": 11580,
+          "logprob": -0.94189453,
+          "text": " faut"
+        },
+        {
+          "id": 3913,
+          "logprob": -3.7011719,
+          "text": " tout"
+        },
+        {
+          "id": 39261,
+          "logprob": -1.5732422,
+          "text": " d'abord"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 578,
+          "logprob": -1.7548828,
+          "special": false,
+          "text": " le"
+        },
+        {
+          "id": 5608,
+          "logprob": -2.578125,
+          "special": false,
+          "text": " faire"
+        },
+        {
+          "id": 1767,
+          "logprob": -1.5117188,
+          "special": false,
+          "text": " cu"
+        },
+        {
+          "id": 1273,
+          "logprob": -0.00010049343,
+          "special": false,
+          "text": "ire"
+        },
+        {
+          "id": 1486,
+          "logprob": -1.4707031,
+          "special": false,
+          "text": " dans"
+        },
+        {
+          "id": 283,
+          "logprob": -1.1982422,
+          "special": false,
+          "text": " de"
+        },
+        {
+          "id": 40410,
+          "logprob": -0.11004639,
+          "special": false,
+          "text": " l'eau"
+        },
+        {
+          "id": 20226,
+          "logprob": -0.4506836,
+          "special": false,
+          "text": " bou"
+        },
+        {
+          "id": 172483,
+          "logprob": -0.003047943,
+          "special": false,
+          "text": "illante"
+        },
+        {
+          "id": 2805,
+          "logprob": -1.0185547,
+          "special": false,
+          "text": " sal"
+        }
+      ]
+    },
+    "generated_text": " le faire cuire dans de l'eau bouillante sal"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_bloom_560m_sharded.ambr b/integration-tests/models/__snapshots__/test_bloom_560m_sharded.ambr
deleted file mode 100644
index 667a0373..00000000
--- a/integration-tests/models/__snapshots__/test_bloom_560m_sharded.ambr
+++ /dev/null
@@ -1,542 +0,0 @@
-# serializer version: 1
-# name: test_bloom_560m_sharded
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 17934,
-          'text': 'Pour',
-        }),
-        dict({
-          'id': 49833,
-          'text': ' dég',
-        }),
-        dict({
-          'id': 21543,
-          'text': 'uster',
-        }),
-        dict({
-          'id': 447,
-          'text': ' un',
-        }),
-        dict({
-          'id': 46341,
-          'text': ' ort',
-        }),
-        dict({
-          'id': 35567,
-          'text': 'olan',
-        }),
-        dict({
-          'id': 15,
-          'text': ',',
-        }),
-        dict({
-          'id': 1669,
-          'text': ' il',
-        }),
-        dict({
-          'id': 11580,
-          'text': ' faut',
-        }),
-        dict({
-          'id': 3913,
-          'text': ' tout',
-        }),
-        dict({
-          'id': 39261,
-          'text': " d'abord",
-        }),
-      ]),
-      'seed': 0,
-      'tokens': list([
-        dict({
-          'id': 578,
-          'special': False,
-          'text': ' le',
-        }),
-        dict({
-          'id': 5608,
-          'special': False,
-          'text': ' faire',
-        }),
-        dict({
-          'id': 159570,
-          'special': False,
-          'text': ' réch',
-        }),
-        dict({
-          'id': 810,
-          'special': False,
-          'text': 'au',
-        }),
-        dict({
-          'id': 12736,
-          'special': False,
-          'text': 'ffer',
-        }),
-        dict({
-          'id': 1742,
-          'special': False,
-          'text': ' au',
-        }),
-        dict({
-          'id': 6105,
-          'special': False,
-          'text': ' bain',
-        }),
-        dict({
-          'id': 88254,
-          'special': False,
-          'text': '-mar',
-        }),
-        dict({
-          'id': 641,
-          'special': False,
-          'text': 'ie',
-        }),
-        dict({
-          'id': 2940,
-          'special': False,
-          'text': ' avec',
-        }),
-      ]),
-    }),
-    'generated_text': ' le faire réchauffer au bain-marie avec',
-  })
-# ---
-# name: test_bloom_560m_sharded_load
-  list([
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 17934,
-            'text': 'Pour',
-          }),
-          dict({
-            'id': 49833,
-            'text': ' dég',
-          }),
-          dict({
-            'id': 21543,
-            'text': 'uster',
-          }),
-          dict({
-            'id': 447,
-            'text': ' un',
-          }),
-          dict({
-            'id': 46341,
-            'text': ' ort',
-          }),
-          dict({
-            'id': 35567,
-            'text': 'olan',
-          }),
-          dict({
-            'id': 15,
-            'text': ',',
-          }),
-          dict({
-            'id': 1669,
-            'text': ' il',
-          }),
-          dict({
-            'id': 11580,
-            'text': ' faut',
-          }),
-          dict({
-            'id': 3913,
-            'text': ' tout',
-          }),
-          dict({
-            'id': 39261,
-            'text': " d'abord",
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 578,
-            'special': False,
-            'text': ' le',
-          }),
-          dict({
-            'id': 5608,
-            'special': False,
-            'text': ' faire',
-          }),
-          dict({
-            'id': 1767,
-            'special': False,
-            'text': ' cu',
-          }),
-          dict({
-            'id': 1273,
-            'special': False,
-            'text': 'ire',
-          }),
-          dict({
-            'id': 1486,
-            'special': False,
-            'text': ' dans',
-          }),
-          dict({
-            'id': 283,
-            'special': False,
-            'text': ' de',
-          }),
-          dict({
-            'id': 40410,
-            'special': False,
-            'text': " l'eau",
-          }),
-          dict({
-            'id': 20226,
-            'special': False,
-            'text': ' bou',
-          }),
-          dict({
-            'id': 172483,
-            'special': False,
-            'text': 'illante',
-          }),
-          dict({
-            'id': 2805,
-            'special': False,
-            'text': ' sal',
-          }),
-        ]),
-      }),
-      'generated_text': " le faire cuire dans de l'eau bouillante sal",
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 17934,
-            'text': 'Pour',
-          }),
-          dict({
-            'id': 49833,
-            'text': ' dég',
-          }),
-          dict({
-            'id': 21543,
-            'text': 'uster',
-          }),
-          dict({
-            'id': 447,
-            'text': ' un',
-          }),
-          dict({
-            'id': 46341,
-            'text': ' ort',
-          }),
-          dict({
-            'id': 35567,
-            'text': 'olan',
-          }),
-          dict({
-            'id': 15,
-            'text': ',',
-          }),
-          dict({
-            'id': 1669,
-            'text': ' il',
-          }),
-          dict({
-            'id': 11580,
-            'text': ' faut',
-          }),
-          dict({
-            'id': 3913,
-            'text': ' tout',
-          }),
-          dict({
-            'id': 39261,
-            'text': " d'abord",
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 578,
-            'special': False,
-            'text': ' le',
-          }),
-          dict({
-            'id': 5608,
-            'special': False,
-            'text': ' faire',
-          }),
-          dict({
-            'id': 1767,
-            'special': False,
-            'text': ' cu',
-          }),
-          dict({
-            'id': 1273,
-            'special': False,
-            'text': 'ire',
-          }),
-          dict({
-            'id': 1486,
-            'special': False,
-            'text': ' dans',
-          }),
-          dict({
-            'id': 283,
-            'special': False,
-            'text': ' de',
-          }),
-          dict({
-            'id': 40410,
-            'special': False,
-            'text': " l'eau",
-          }),
-          dict({
-            'id': 20226,
-            'special': False,
-            'text': ' bou',
-          }),
-          dict({
-            'id': 172483,
-            'special': False,
-            'text': 'illante',
-          }),
-          dict({
-            'id': 2805,
-            'special': False,
-            'text': ' sal',
-          }),
-        ]),
-      }),
-      'generated_text': " le faire cuire dans de l'eau bouillante sal",
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 17934,
-            'text': 'Pour',
-          }),
-          dict({
-            'id': 49833,
-            'text': ' dég',
-          }),
-          dict({
-            'id': 21543,
-            'text': 'uster',
-          }),
-          dict({
-            'id': 447,
-            'text': ' un',
-          }),
-          dict({
-            'id': 46341,
-            'text': ' ort',
-          }),
-          dict({
-            'id': 35567,
-            'text': 'olan',
-          }),
-          dict({
-            'id': 15,
-            'text': ',',
-          }),
-          dict({
-            'id': 1669,
-            'text': ' il',
-          }),
-          dict({
-            'id': 11580,
-            'text': ' faut',
-          }),
-          dict({
-            'id': 3913,
-            'text': ' tout',
-          }),
-          dict({
-            'id': 39261,
-            'text': " d'abord",
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 578,
-            'special': False,
-            'text': ' le',
-          }),
-          dict({
-            'id': 5608,
-            'special': False,
-            'text': ' faire',
-          }),
-          dict({
-            'id': 1767,
-            'special': False,
-            'text': ' cu',
-          }),
-          dict({
-            'id': 1273,
-            'special': False,
-            'text': 'ire',
-          }),
-          dict({
-            'id': 1486,
-            'special': False,
-            'text': ' dans',
-          }),
-          dict({
-            'id': 283,
-            'special': False,
-            'text': ' de',
-          }),
-          dict({
-            'id': 40410,
-            'special': False,
-            'text': " l'eau",
-          }),
-          dict({
-            'id': 20226,
-            'special': False,
-            'text': ' bou',
-          }),
-          dict({
-            'id': 172483,
-            'special': False,
-            'text': 'illante',
-          }),
-          dict({
-            'id': 2805,
-            'special': False,
-            'text': ' sal',
-          }),
-        ]),
-      }),
-      'generated_text': " le faire cuire dans de l'eau bouillante sal",
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 17934,
-            'text': 'Pour',
-          }),
-          dict({
-            'id': 49833,
-            'text': ' dég',
-          }),
-          dict({
-            'id': 21543,
-            'text': 'uster',
-          }),
-          dict({
-            'id': 447,
-            'text': ' un',
-          }),
-          dict({
-            'id': 46341,
-            'text': ' ort',
-          }),
-          dict({
-            'id': 35567,
-            'text': 'olan',
-          }),
-          dict({
-            'id': 15,
-            'text': ',',
-          }),
-          dict({
-            'id': 1669,
-            'text': ' il',
-          }),
-          dict({
-            'id': 11580,
-            'text': ' faut',
-          }),
-          dict({
-            'id': 3913,
-            'text': ' tout',
-          }),
-          dict({
-            'id': 39261,
-            'text': " d'abord",
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 578,
-            'special': False,
-            'text': ' le',
-          }),
-          dict({
-            'id': 5608,
-            'special': False,
-            'text': ' faire',
-          }),
-          dict({
-            'id': 1767,
-            'special': False,
-            'text': ' cu',
-          }),
-          dict({
-            'id': 1273,
-            'special': False,
-            'text': 'ire',
-          }),
-          dict({
-            'id': 1486,
-            'special': False,
-            'text': ' dans',
-          }),
-          dict({
-            'id': 283,
-            'special': False,
-            'text': ' de',
-          }),
-          dict({
-            'id': 40410,
-            'special': False,
-            'text': " l'eau",
-          }),
-          dict({
-            'id': 20226,
-            'special': False,
-            'text': ' bou',
-          }),
-          dict({
-            'id': 172483,
-            'special': False,
-            'text': 'illante',
-          }),
-          dict({
-            'id': 2805,
-            'special': False,
-            'text': ' sal',
-          }),
-        ]),
-      }),
-      'generated_text': " le faire cuire dans de l'eau bouillante sal",
-    }),
-  ])
-# ---
diff --git a/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json
new file mode 100644
index 00000000..dd8936af
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json
@@ -0,0 +1,128 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 17934,
+        "logprob": null,
+        "text": "Pour"
+      },
+      {
+        "id": 49833,
+        "logprob": -10.5390625,
+        "text": " dég"
+      },
+      {
+        "id": 21543,
+        "logprob": -0.14758301,
+        "text": "uster"
+      },
+      {
+        "id": 447,
+        "logprob": -1.9296875,
+        "text": " un"
+      },
+      {
+        "id": 46341,
+        "logprob": -15.4453125,
+        "text": " ort"
+      },
+      {
+        "id": 35567,
+        "logprob": -7.59375,
+        "text": "olan"
+      },
+      {
+        "id": 15,
+        "logprob": -1.3994141,
+        "text": ","
+      },
+      {
+        "id": 1669,
+        "logprob": -1.578125,
+        "text": " il"
+      },
+      {
+        "id": 11580,
+        "logprob": -0.9453125,
+        "text": " faut"
+      },
+      {
+        "id": 3913,
+        "logprob": -3.7011719,
+        "text": " tout"
+      },
+      {
+        "id": 39261,
+        "logprob": -1.5732422,
+        "text": " d'abord"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 578,
+        "logprob": -1.6474609,
+        "special": false,
+        "text": " le"
+      },
+      {
+        "id": 5608,
+        "logprob": -2.5097656,
+        "special": false,
+        "text": " faire"
+      },
+      {
+        "id": 159570,
+        "logprob": -6.65625,
+        "special": false,
+        "text": " réch"
+      },
+      {
+        "id": 810,
+        "logprob": 0.0,
+        "special": false,
+        "text": "au"
+      },
+      {
+        "id": 12736,
+        "logprob": 0.0,
+        "special": false,
+        "text": "ffer"
+      },
+      {
+        "id": 1742,
+        "logprob": -2.5859375,
+        "special": false,
+        "text": " au"
+      },
+      {
+        "id": 6105,
+        "logprob": -2.03125,
+        "special": false,
+        "text": " bain"
+      },
+      {
+        "id": 88254,
+        "logprob": -0.12695312,
+        "special": false,
+        "text": "-mar"
+      },
+      {
+        "id": 641,
+        "logprob": 0.0,
+        "special": false,
+        "text": "ie"
+      },
+      {
+        "id": 2940,
+        "logprob": -3.5175781,
+        "special": false,
+        "text": " avec"
+      }
+    ]
+  },
+  "generated_text": " le faire réchauffer au bain-marie avec"
+}
diff --git a/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json
new file mode 100644
index 00000000..2dd480b9
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json
@@ -0,0 +1,514 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 17934,
+          "logprob": null,
+          "text": "Pour"
+        },
+        {
+          "id": 49833,
+          "logprob": -10.5390625,
+          "text": " dég"
+        },
+        {
+          "id": 21543,
+          "logprob": -0.14758301,
+          "text": "uster"
+        },
+        {
+          "id": 447,
+          "logprob": -1.9296875,
+          "text": " un"
+        },
+        {
+          "id": 46341,
+          "logprob": -15.4453125,
+          "text": " ort"
+        },
+        {
+          "id": 35567,
+          "logprob": -7.59375,
+          "text": "olan"
+        },
+        {
+          "id": 15,
+          "logprob": -1.3994141,
+          "text": ","
+        },
+        {
+          "id": 1669,
+          "logprob": -1.578125,
+          "text": " il"
+        },
+        {
+          "id": 11580,
+          "logprob": -0.9453125,
+          "text": " faut"
+        },
+        {
+          "id": 3913,
+          "logprob": -3.7011719,
+          "text": " tout"
+        },
+        {
+          "id": 39261,
+          "logprob": -1.5732422,
+          "text": " d'abord"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 578,
+          "logprob": -1.7529297,
+          "special": false,
+          "text": " le"
+        },
+        {
+          "id": 5608,
+          "logprob": -2.6054688,
+          "special": false,
+          "text": " faire"
+        },
+        {
+          "id": 1767,
+          "logprob": -1.5283203,
+          "special": false,
+          "text": " cu"
+        },
+        {
+          "id": 1273,
+          "logprob": -0.00010049343,
+          "special": false,
+          "text": "ire"
+        },
+        {
+          "id": 1486,
+          "logprob": -1.4716797,
+          "special": false,
+          "text": " dans"
+        },
+        {
+          "id": 283,
+          "logprob": -1.1982422,
+          "special": false,
+          "text": " de"
+        },
+        {
+          "id": 40410,
+          "logprob": -0.11853027,
+          "special": false,
+          "text": " l'eau"
+        },
+        {
+          "id": 20226,
+          "logprob": -0.41210938,
+          "special": false,
+          "text": " bou"
+        },
+        {
+          "id": 172483,
+          "logprob": -0.0037765503,
+          "special": false,
+          "text": "illante"
+        },
+        {
+          "id": 2805,
+          "logprob": -1.0166016,
+          "special": false,
+          "text": " sal"
+        }
+      ]
+    },
+    "generated_text": " le faire cuire dans de l'eau bouillante sal"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 17934,
+          "logprob": null,
+          "text": "Pour"
+        },
+        {
+          "id": 49833,
+          "logprob": -10.515625,
+          "text": " dég"
+        },
+        {
+          "id": 21543,
+          "logprob": -0.1484375,
+          "text": "uster"
+        },
+        {
+          "id": 447,
+          "logprob": -1.9287109,
+          "text": " un"
+        },
+        {
+          "id": 46341,
+          "logprob": -15.34375,
+          "text": " ort"
+        },
+        {
+          "id": 35567,
+          "logprob": -7.515625,
+          "text": "olan"
+        },
+        {
+          "id": 15,
+          "logprob": -1.4199219,
+          "text": ","
+        },
+        {
+          "id": 1669,
+          "logprob": -1.5664062,
+          "text": " il"
+        },
+        {
+          "id": 11580,
+          "logprob": -0.94091797,
+          "text": " faut"
+        },
+        {
+          "id": 3913,
+          "logprob": -3.6660156,
+          "text": " tout"
+        },
+        {
+          "id": 39261,
+          "logprob": -1.7753906,
+          "text": " d'abord"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 578,
+          "logprob": -1.7626953,
+          "special": false,
+          "text": " le"
+        },
+        {
+          "id": 5608,
+          "logprob": -2.5820312,
+          "special": false,
+          "text": " faire"
+        },
+        {
+          "id": 1767,
+          "logprob": -1.5097656,
+          "special": false,
+          "text": " cu"
+        },
+        {
+          "id": 1273,
+          "logprob": -9.393692e-05,
+          "special": false,
+          "text": "ire"
+        },
+        {
+          "id": 1486,
+          "logprob": -1.5175781,
+          "special": false,
+          "text": " dans"
+        },
+        {
+          "id": 283,
+          "logprob": -1.1982422,
+          "special": false,
+          "text": " de"
+        },
+        {
+          "id": 40410,
+          "logprob": -0.11883545,
+          "special": false,
+          "text": " l'eau"
+        },
+        {
+          "id": 20226,
+          "logprob": -0.4909668,
+          "special": false,
+          "text": " bou"
+        },
+        {
+          "id": 172483,
+          "logprob": -0.003047943,
+          "special": false,
+          "text": "illante"
+        },
+        {
+          "id": 2805,
+          "logprob": -1.0185547,
+          "special": false,
+          "text": " sal"
+        }
+      ]
+    },
+    "generated_text": " le faire cuire dans de l'eau bouillante sal"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 17934,
+          "logprob": null,
+          "text": "Pour"
+        },
+        {
+          "id": 49833,
+          "logprob": -10.515625,
+          "text": " dég"
+        },
+        {
+          "id": 21543,
+          "logprob": -0.1484375,
+          "text": "uster"
+        },
+        {
+          "id": 447,
+          "logprob": -1.9287109,
+          "text": " un"
+        },
+        {
+          "id": 46341,
+          "logprob": -15.34375,
+          "text": " ort"
+        },
+        {
+          "id": 35567,
+          "logprob": -7.515625,
+          "text": "olan"
+        },
+        {
+          "id": 15,
+          "logprob": -1.4199219,
+          "text": ","
+        },
+        {
+          "id": 1669,
+          "logprob": -1.5664062,
+          "text": " il"
+        },
+        {
+          "id": 11580,
+          "logprob": -0.94091797,
+          "text": " faut"
+        },
+        {
+          "id": 3913,
+          "logprob": -3.6660156,
+          "text": " tout"
+        },
+        {
+          "id": 39261,
+          "logprob": -1.7753906,
+          "text": " d'abord"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 578,
+          "logprob": -1.7626953,
+          "special": false,
+          "text": " le"
+        },
+        {
+          "id": 5608,
+          "logprob": -2.5820312,
+          "special": false,
+          "text": " faire"
+        },
+        {
+          "id": 1767,
+          "logprob": -1.5097656,
+          "special": false,
+          "text": " cu"
+        },
+        {
+          "id": 1273,
+          "logprob": -9.393692e-05,
+          "special": false,
+          "text": "ire"
+        },
+        {
+          "id": 1486,
+          "logprob": -1.5175781,
+          "special": false,
+          "text": " dans"
+        },
+        {
+          "id": 283,
+          "logprob": -1.1982422,
+          "special": false,
+          "text": " de"
+        },
+        {
+          "id": 40410,
+          "logprob": -0.11883545,
+          "special": false,
+          "text": " l'eau"
+        },
+        {
+          "id": 20226,
+          "logprob": -0.4909668,
+          "special": false,
+          "text": " bou"
+        },
+        {
+          "id": 172483,
+          "logprob": -0.003047943,
+          "special": false,
+          "text": "illante"
+        },
+        {
+          "id": 2805,
+          "logprob": -1.0185547,
+          "special": false,
+          "text": " sal"
+        }
+      ]
+    },
+    "generated_text": " le faire cuire dans de l'eau bouillante sal"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 17934,
+          "logprob": null,
+          "text": "Pour"
+        },
+        {
+          "id": 49833,
+          "logprob": -10.515625,
+          "text": " dég"
+        },
+        {
+          "id": 21543,
+          "logprob": -0.1484375,
+          "text": "uster"
+        },
+        {
+          "id": 447,
+          "logprob": -1.9287109,
+          "text": " un"
+        },
+        {
+          "id": 46341,
+          "logprob": -15.34375,
+          "text": " ort"
+        },
+        {
+          "id": 35567,
+          "logprob": -7.515625,
+          "text": "olan"
+        },
+        {
+          "id": 15,
+          "logprob": -1.4199219,
+          "text": ","
+        },
+        {
+          "id": 1669,
+          "logprob": -1.5664062,
+          "text": " il"
+        },
+        {
+          "id": 11580,
+          "logprob": -0.94091797,
+          "text": " faut"
+        },
+        {
+          "id": 3913,
+          "logprob": -3.6660156,
+          "text": " tout"
+        },
+        {
+          "id": 39261,
+          "logprob": -1.7753906,
+          "text": " d'abord"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 578,
+          "logprob": -1.7626953,
+          "special": false,
+          "text": " le"
+        },
+        {
+          "id": 5608,
+          "logprob": -2.5820312,
+          "special": false,
+          "text": " faire"
+        },
+        {
+          "id": 1767,
+          "logprob": -1.5097656,
+          "special": false,
+          "text": " cu"
+        },
+        {
+          "id": 1273,
+          "logprob": -9.393692e-05,
+          "special": false,
+          "text": "ire"
+        },
+        {
+          "id": 1486,
+          "logprob": -1.5175781,
+          "special": false,
+          "text": " dans"
+        },
+        {
+          "id": 283,
+          "logprob": -1.1982422,
+          "special": false,
+          "text": " de"
+        },
+        {
+          "id": 40410,
+          "logprob": -0.11883545,
+          "special": false,
+          "text": " l'eau"
+        },
+        {
+          "id": 20226,
+          "logprob": -0.4909668,
+          "special": false,
+          "text": " bou"
+        },
+        {
+          "id": 172483,
+          "logprob": -0.003047943,
+          "special": false,
+          "text": "illante"
+        },
+        {
+          "id": 2805,
+          "logprob": -1.0185547,
+          "special": false,
+          "text": " sal"
+        }
+      ]
+    },
+    "generated_text": " le faire cuire dans de l'eau bouillante sal"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_llama.ambr b/integration-tests/models/__snapshots__/test_flash_llama.ambr
deleted file mode 100644
index f4e3a4c1..00000000
--- a/integration-tests/models/__snapshots__/test_flash_llama.ambr
+++ /dev/null
@@ -1,465 +0,0 @@
-# serializer version: 1
-# name: test_flash_llama
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 1,
-          'text': '<s>',
-        }),
-        dict({
-          'id': 4321,
-          'text': 'Test',
-        }),
-        dict({
-          'id': 2009,
-          'text': 'request',
-        }),
-      ]),
-      'seed': None,
-      'tokens': list([
-        dict({
-          'id': 363,
-          'special': False,
-          'text': ' for',
-        }),
-        dict({
-          'id': 847,
-          'special': False,
-          'text': ' /',
-        }),
-        dict({
-          'id': 2754,
-          'special': False,
-          'text': 'api',
-        }),
-        dict({
-          'id': 29914,
-          'special': False,
-          'text': '/',
-        }),
-        dict({
-          'id': 29894,
-          'special': False,
-          'text': 'v',
-        }),
-        dict({
-          'id': 29896,
-          'special': False,
-          'text': '1',
-        }),
-        dict({
-          'id': 29914,
-          'special': False,
-          'text': '/',
-        }),
-        dict({
-          'id': 16418,
-          'special': False,
-          'text': 'projects',
-        }),
-        dict({
-          'id': 29914,
-          'special': False,
-          'text': '/',
-        }),
-        dict({
-          'id': 29896,
-          'special': False,
-          'text': '1',
-        }),
-      ]),
-    }),
-    'generated_text': 'for /api/v1/projects/1',
-  })
-# ---
-# name: test_flash_llama_all_params
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 1,
-          'text': '<s>',
-        }),
-        dict({
-          'id': 4321,
-          'text': 'Test',
-        }),
-        dict({
-          'id': 2009,
-          'text': 'request',
-        }),
-      ]),
-      'seed': 0,
-      'tokens': list([
-        dict({
-          'id': 5229,
-          'special': False,
-          'text': ' failed',
-        }),
-        dict({
-          'id': 363,
-          'special': False,
-          'text': ' for',
-        }),
-        dict({
-          'id': 5641,
-          'special': False,
-          'text': ' IP',
-        }),
-        dict({
-          'id': 16428,
-          'special': False,
-          'text': ' Address',
-        }),
-        dict({
-          'id': 29901,
-          'special': False,
-          'text': ':',
-        }),
-        dict({
-          'id': 525,
-          'special': False,
-          'text': " '",
-        }),
-        dict({
-          'id': 8516,
-          'special': False,
-          'text': 'None',
-        }),
-        dict({
-          'id': 4286,
-          'special': False,
-          'text': "'.",
-        }),
-        dict({
-          'id': 13,
-          'special': False,
-          'text': '''
-            
-  
-          ''',
-        }),
-        dict({
-          'id': 294,
-          'special': False,
-          'text': 'as',
-        }),
-      ]),
-    }),
-    'generated_text': '''
-      Test requestfailed for IP Address: 'None'.
-      as
-    ''',
-  })
-# ---
-# name: test_flash_llama_load
-  list([
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 1,
-            'text': '<s>',
-          }),
-          dict({
-            'id': 4321,
-            'text': 'Test',
-          }),
-          dict({
-            'id': 2009,
-            'text': 'request',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 363,
-            'special': False,
-            'text': ' for',
-          }),
-          dict({
-            'id': 847,
-            'special': False,
-            'text': ' /',
-          }),
-          dict({
-            'id': 2754,
-            'special': False,
-            'text': 'api',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 29894,
-            'special': False,
-            'text': 'v',
-          }),
-          dict({
-            'id': 29896,
-            'special': False,
-            'text': '1',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 16418,
-            'special': False,
-            'text': 'projects',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 29896,
-            'special': False,
-            'text': '1',
-          }),
-        ]),
-      }),
-      'generated_text': 'for /api/v1/projects/1',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 1,
-            'text': '<s>',
-          }),
-          dict({
-            'id': 4321,
-            'text': 'Test',
-          }),
-          dict({
-            'id': 2009,
-            'text': 'request',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 363,
-            'special': False,
-            'text': ' for',
-          }),
-          dict({
-            'id': 847,
-            'special': False,
-            'text': ' /',
-          }),
-          dict({
-            'id': 2754,
-            'special': False,
-            'text': 'api',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 29894,
-            'special': False,
-            'text': 'v',
-          }),
-          dict({
-            'id': 29896,
-            'special': False,
-            'text': '1',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 16418,
-            'special': False,
-            'text': 'projects',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 29896,
-            'special': False,
-            'text': '1',
-          }),
-        ]),
-      }),
-      'generated_text': 'for /api/v1/projects/1',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 1,
-            'text': '<s>',
-          }),
-          dict({
-            'id': 4321,
-            'text': 'Test',
-          }),
-          dict({
-            'id': 2009,
-            'text': 'request',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 363,
-            'special': False,
-            'text': ' for',
-          }),
-          dict({
-            'id': 847,
-            'special': False,
-            'text': ' /',
-          }),
-          dict({
-            'id': 2754,
-            'special': False,
-            'text': 'api',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 29894,
-            'special': False,
-            'text': 'v',
-          }),
-          dict({
-            'id': 29896,
-            'special': False,
-            'text': '1',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 16418,
-            'special': False,
-            'text': 'projects',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 29896,
-            'special': False,
-            'text': '1',
-          }),
-        ]),
-      }),
-      'generated_text': 'for /api/v1/projects/1',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 1,
-            'text': '<s>',
-          }),
-          dict({
-            'id': 4321,
-            'text': 'Test',
-          }),
-          dict({
-            'id': 2009,
-            'text': 'request',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 363,
-            'special': False,
-            'text': ' for',
-          }),
-          dict({
-            'id': 847,
-            'special': False,
-            'text': ' /',
-          }),
-          dict({
-            'id': 2754,
-            'special': False,
-            'text': 'api',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 29894,
-            'special': False,
-            'text': 'v',
-          }),
-          dict({
-            'id': 29896,
-            'special': False,
-            'text': '1',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 16418,
-            'special': False,
-            'text': 'projects',
-          }),
-          dict({
-            'id': 29914,
-            'special': False,
-            'text': '/',
-          }),
-          dict({
-            'id': 29896,
-            'special': False,
-            'text': '1',
-          }),
-        ]),
-      }),
-      'generated_text': 'for /api/v1/projects/1',
-    }),
-  ])
-# ---
diff --git a/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama.json b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama.json
new file mode 100644
index 00000000..49bc996c
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama.json
@@ -0,0 +1,88 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 4321,
+        "logprob": -8.6875,
+        "text": "Test"
+      },
+      {
+        "id": 2009,
+        "logprob": -11.5546875,
+        "text": "request"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 363,
+        "logprob": -1.5380859,
+        "special": false,
+        "text": " for"
+      },
+      {
+        "id": 847,
+        "logprob": -2.5917969,
+        "special": false,
+        "text": " /"
+      },
+      {
+        "id": 2754,
+        "logprob": -2.2773438,
+        "special": false,
+        "text": "api"
+      },
+      {
+        "id": 29914,
+        "logprob": -0.034362793,
+        "special": false,
+        "text": "/"
+      },
+      {
+        "id": 29894,
+        "logprob": -0.96533203,
+        "special": false,
+        "text": "v"
+      },
+      {
+        "id": 29896,
+        "logprob": -0.36669922,
+        "special": false,
+        "text": "1"
+      },
+      {
+        "id": 29914,
+        "logprob": -0.013122559,
+        "special": false,
+        "text": "/"
+      },
+      {
+        "id": 16418,
+        "logprob": -3.1503906,
+        "special": false,
+        "text": "projects"
+      },
+      {
+        "id": 29914,
+        "logprob": -0.43652344,
+        "special": false,
+        "text": "/"
+      },
+      {
+        "id": 29896,
+        "logprob": -1.9404297,
+        "special": false,
+        "text": "1"
+      }
+    ]
+  },
+  "generated_text": "for /api/v1/projects/1"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_all_params.json b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_all_params.json
new file mode 100644
index 00000000..1b6b51a3
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_all_params.json
@@ -0,0 +1,88 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 4321,
+        "logprob": -8.6875,
+        "text": "Test"
+      },
+      {
+        "id": 2009,
+        "logprob": -11.5546875,
+        "text": "request"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 5229,
+        "logprob": -3.3085938,
+        "special": false,
+        "text": " failed"
+      },
+      {
+        "id": 363,
+        "logprob": -3.984375,
+        "special": false,
+        "text": " for"
+      },
+      {
+        "id": 5641,
+        "logprob": -6.53125,
+        "special": false,
+        "text": " IP"
+      },
+      {
+        "id": 16428,
+        "logprob": -3.1835938,
+        "special": false,
+        "text": " Address"
+      },
+      {
+        "id": 29901,
+        "logprob": -1.2324219,
+        "special": false,
+        "text": ":"
+      },
+      {
+        "id": 525,
+        "logprob": -2.6855469,
+        "special": false,
+        "text": " '"
+      },
+      {
+        "id": 8516,
+        "logprob": -7.1601562,
+        "special": false,
+        "text": "None"
+      },
+      {
+        "id": 4286,
+        "logprob": -2.4433594,
+        "special": false,
+        "text": "'."
+      },
+      {
+        "id": 13,
+        "logprob": -0.06530762,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 294,
+        "logprob": -7.953125,
+        "special": false,
+        "text": "as"
+      }
+    ]
+  },
+  "generated_text": "Test requestfailed for IP Address: 'None'.\nas"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_load.json b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_load.json
new file mode 100644
index 00000000..5a8ba217
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_load.json
@@ -0,0 +1,354 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 4321,
+          "logprob": -8.6875,
+          "text": "Test"
+        },
+        {
+          "id": 2009,
+          "logprob": -11.5546875,
+          "text": "request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 363,
+          "logprob": -1.5322266,
+          "special": false,
+          "text": " for"
+        },
+        {
+          "id": 847,
+          "logprob": -2.5585938,
+          "special": false,
+          "text": " /"
+        },
+        {
+          "id": 2754,
+          "logprob": -2.265625,
+          "special": false,
+          "text": "api"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.034088135,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 29894,
+          "logprob": -0.96240234,
+          "special": false,
+          "text": "v"
+        },
+        {
+          "id": 29896,
+          "logprob": -0.36816406,
+          "special": false,
+          "text": "1"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.013191223,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 16418,
+          "logprob": -3.15625,
+          "special": false,
+          "text": "projects"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.43774414,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 29896,
+          "logprob": -1.9443359,
+          "special": false,
+          "text": "1"
+        }
+      ]
+    },
+    "generated_text": "for /api/v1/projects/1"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 4321,
+          "logprob": -8.6875,
+          "text": "Test"
+        },
+        {
+          "id": 2009,
+          "logprob": -11.5546875,
+          "text": "request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 363,
+          "logprob": -1.5380859,
+          "special": false,
+          "text": " for"
+        },
+        {
+          "id": 847,
+          "logprob": -2.5859375,
+          "special": false,
+          "text": " /"
+        },
+        {
+          "id": 2754,
+          "logprob": -2.2695312,
+          "special": false,
+          "text": "api"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.03439331,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 29894,
+          "logprob": -0.96240234,
+          "special": false,
+          "text": "v"
+        },
+        {
+          "id": 29896,
+          "logprob": -0.36694336,
+          "special": false,
+          "text": "1"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.013114929,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 16418,
+          "logprob": -3.1542969,
+          "special": false,
+          "text": "projects"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.43847656,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 29896,
+          "logprob": -1.9433594,
+          "special": false,
+          "text": "1"
+        }
+      ]
+    },
+    "generated_text": "for /api/v1/projects/1"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 4321,
+          "logprob": -8.6875,
+          "text": "Test"
+        },
+        {
+          "id": 2009,
+          "logprob": -11.5546875,
+          "text": "request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 363,
+          "logprob": -1.5322266,
+          "special": false,
+          "text": " for"
+        },
+        {
+          "id": 847,
+          "logprob": -2.5585938,
+          "special": false,
+          "text": " /"
+        },
+        {
+          "id": 2754,
+          "logprob": -2.265625,
+          "special": false,
+          "text": "api"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.034088135,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 29894,
+          "logprob": -0.96240234,
+          "special": false,
+          "text": "v"
+        },
+        {
+          "id": 29896,
+          "logprob": -0.36816406,
+          "special": false,
+          "text": "1"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.013191223,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 16418,
+          "logprob": -3.15625,
+          "special": false,
+          "text": "projects"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.43774414,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 29896,
+          "logprob": -1.9443359,
+          "special": false,
+          "text": "1"
+        }
+      ]
+    },
+    "generated_text": "for /api/v1/projects/1"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 4321,
+          "logprob": -8.6875,
+          "text": "Test"
+        },
+        {
+          "id": 2009,
+          "logprob": -11.5546875,
+          "text": "request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 363,
+          "logprob": -1.5322266,
+          "special": false,
+          "text": " for"
+        },
+        {
+          "id": 847,
+          "logprob": -2.5585938,
+          "special": false,
+          "text": " /"
+        },
+        {
+          "id": 2754,
+          "logprob": -2.265625,
+          "special": false,
+          "text": "api"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.034088135,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 29894,
+          "logprob": -0.96240234,
+          "special": false,
+          "text": "v"
+        },
+        {
+          "id": 29896,
+          "logprob": -0.36816406,
+          "special": false,
+          "text": "1"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.013191223,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 16418,
+          "logprob": -3.15625,
+          "special": false,
+          "text": "projects"
+        },
+        {
+          "id": 29914,
+          "logprob": -0.43774414,
+          "special": false,
+          "text": "/"
+        },
+        {
+          "id": 29896,
+          "logprob": -1.9443359,
+          "special": false,
+          "text": "1"
+        }
+      ]
+    },
+    "generated_text": "for /api/v1/projects/1"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_neox.ambr b/integration-tests/models/__snapshots__/test_flash_neox.ambr
deleted file mode 100644
index 4330db6b..00000000
--- a/integration-tests/models/__snapshots__/test_flash_neox.ambr
+++ /dev/null
@@ -1,682 +0,0 @@
-# serializer version: 1
-# name: test_flash_neox
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 50278,
-          'text': '<|prompter|>',
-        }),
-        dict({
-          'id': 1276,
-          'text': 'What',
-        }),
-        dict({
-          'id': 310,
-          'text': ' is',
-        }),
-        dict({
-          'id': 247,
-          'text': ' a',
-        }),
-        dict({
-          'id': 1167,
-          'text': ' mem',
-        }),
-        dict({
-          'id': 70,
-          'text': 'e',
-        }),
-        dict({
-          'id': 13,
-          'text': ',',
-        }),
-        dict({
-          'id': 285,
-          'text': ' and',
-        }),
-        dict({
-          'id': 752,
-          'text': ' what',
-        }),
-        dict({
-          'id': 434,
-          'text': "'s",
-        }),
-        dict({
-          'id': 253,
-          'text': ' the',
-        }),
-        dict({
-          'id': 2892,
-          'text': ' history',
-        }),
-        dict({
-          'id': 3212,
-          'text': ' behind',
-        }),
-        dict({
-          'id': 436,
-          'text': ' this',
-        }),
-        dict({
-          'id': 3159,
-          'text': ' word',
-        }),
-        dict({
-          'id': 32,
-          'text': '?',
-        }),
-        dict({
-          'id': 0,
-          'text': '<|endoftext|>',
-        }),
-        dict({
-          'id': 50281,
-          'text': '<|assistant|>',
-        }),
-      ]),
-      'seed': None,
-      'tokens': list([
-        dict({
-          'id': 510,
-          'special': False,
-          'text': 'The',
-        }),
-        dict({
-          'id': 3159,
-          'special': False,
-          'text': ' word',
-        }),
-        dict({
-          'id': 346,
-          'special': False,
-          'text': ' "',
-        }),
-        dict({
-          'id': 6441,
-          'special': False,
-          'text': 'mem',
-        }),
-        dict({
-          'id': 70,
-          'special': False,
-          'text': 'e',
-        }),
-        dict({
-          'id': 3,
-          'special': False,
-          'text': '"',
-        }),
-        dict({
-          'id': 369,
-          'special': False,
-          'text': ' was',
-        }),
-        dict({
-          'id': 806,
-          'special': False,
-          'text': ' first',
-        }),
-        dict({
-          'id': 908,
-          'special': False,
-          'text': ' used',
-        }),
-        dict({
-          'id': 275,
-          'special': False,
-          'text': ' in',
-        }),
-      ]),
-    }),
-    'generated_text': 'The word "meme" was first used in',
-  })
-# ---
-# name: test_flash_neox_load
-  list([
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 50278,
-            'text': '<|prompter|>',
-          }),
-          dict({
-            'id': 1276,
-            'text': 'What',
-          }),
-          dict({
-            'id': 310,
-            'text': ' is',
-          }),
-          dict({
-            'id': 247,
-            'text': ' a',
-          }),
-          dict({
-            'id': 1167,
-            'text': ' mem',
-          }),
-          dict({
-            'id': 70,
-            'text': 'e',
-          }),
-          dict({
-            'id': 13,
-            'text': ',',
-          }),
-          dict({
-            'id': 285,
-            'text': ' and',
-          }),
-          dict({
-            'id': 752,
-            'text': ' what',
-          }),
-          dict({
-            'id': 434,
-            'text': "'s",
-          }),
-          dict({
-            'id': 253,
-            'text': ' the',
-          }),
-          dict({
-            'id': 2892,
-            'text': ' history',
-          }),
-          dict({
-            'id': 3212,
-            'text': ' behind',
-          }),
-          dict({
-            'id': 436,
-            'text': ' this',
-          }),
-          dict({
-            'id': 3159,
-            'text': ' word',
-          }),
-          dict({
-            'id': 32,
-            'text': '?',
-          }),
-          dict({
-            'id': 0,
-            'text': '<|endoftext|>',
-          }),
-          dict({
-            'id': 50281,
-            'text': '<|assistant|>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 510,
-            'special': False,
-            'text': 'The',
-          }),
-          dict({
-            'id': 3159,
-            'special': False,
-            'text': ' word',
-          }),
-          dict({
-            'id': 346,
-            'special': False,
-            'text': ' "',
-          }),
-          dict({
-            'id': 6441,
-            'special': False,
-            'text': 'mem',
-          }),
-          dict({
-            'id': 70,
-            'special': False,
-            'text': 'e',
-          }),
-          dict({
-            'id': 3,
-            'special': False,
-            'text': '"',
-          }),
-          dict({
-            'id': 369,
-            'special': False,
-            'text': ' was',
-          }),
-          dict({
-            'id': 806,
-            'special': False,
-            'text': ' first',
-          }),
-          dict({
-            'id': 908,
-            'special': False,
-            'text': ' used',
-          }),
-          dict({
-            'id': 275,
-            'special': False,
-            'text': ' in',
-          }),
-        ]),
-      }),
-      'generated_text': 'The word "meme" was first used in',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 50278,
-            'text': '<|prompter|>',
-          }),
-          dict({
-            'id': 1276,
-            'text': 'What',
-          }),
-          dict({
-            'id': 310,
-            'text': ' is',
-          }),
-          dict({
-            'id': 247,
-            'text': ' a',
-          }),
-          dict({
-            'id': 1167,
-            'text': ' mem',
-          }),
-          dict({
-            'id': 70,
-            'text': 'e',
-          }),
-          dict({
-            'id': 13,
-            'text': ',',
-          }),
-          dict({
-            'id': 285,
-            'text': ' and',
-          }),
-          dict({
-            'id': 752,
-            'text': ' what',
-          }),
-          dict({
-            'id': 434,
-            'text': "'s",
-          }),
-          dict({
-            'id': 253,
-            'text': ' the',
-          }),
-          dict({
-            'id': 2892,
-            'text': ' history',
-          }),
-          dict({
-            'id': 3212,
-            'text': ' behind',
-          }),
-          dict({
-            'id': 436,
-            'text': ' this',
-          }),
-          dict({
-            'id': 3159,
-            'text': ' word',
-          }),
-          dict({
-            'id': 32,
-            'text': '?',
-          }),
-          dict({
-            'id': 0,
-            'text': '<|endoftext|>',
-          }),
-          dict({
-            'id': 50281,
-            'text': '<|assistant|>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 510,
-            'special': False,
-            'text': 'The',
-          }),
-          dict({
-            'id': 3159,
-            'special': False,
-            'text': ' word',
-          }),
-          dict({
-            'id': 346,
-            'special': False,
-            'text': ' "',
-          }),
-          dict({
-            'id': 6441,
-            'special': False,
-            'text': 'mem',
-          }),
-          dict({
-            'id': 70,
-            'special': False,
-            'text': 'e',
-          }),
-          dict({
-            'id': 3,
-            'special': False,
-            'text': '"',
-          }),
-          dict({
-            'id': 369,
-            'special': False,
-            'text': ' was',
-          }),
-          dict({
-            'id': 806,
-            'special': False,
-            'text': ' first',
-          }),
-          dict({
-            'id': 908,
-            'special': False,
-            'text': ' used',
-          }),
-          dict({
-            'id': 275,
-            'special': False,
-            'text': ' in',
-          }),
-        ]),
-      }),
-      'generated_text': 'The word "meme" was first used in',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 50278,
-            'text': '<|prompter|>',
-          }),
-          dict({
-            'id': 1276,
-            'text': 'What',
-          }),
-          dict({
-            'id': 310,
-            'text': ' is',
-          }),
-          dict({
-            'id': 247,
-            'text': ' a',
-          }),
-          dict({
-            'id': 1167,
-            'text': ' mem',
-          }),
-          dict({
-            'id': 70,
-            'text': 'e',
-          }),
-          dict({
-            'id': 13,
-            'text': ',',
-          }),
-          dict({
-            'id': 285,
-            'text': ' and',
-          }),
-          dict({
-            'id': 752,
-            'text': ' what',
-          }),
-          dict({
-            'id': 434,
-            'text': "'s",
-          }),
-          dict({
-            'id': 253,
-            'text': ' the',
-          }),
-          dict({
-            'id': 2892,
-            'text': ' history',
-          }),
-          dict({
-            'id': 3212,
-            'text': ' behind',
-          }),
-          dict({
-            'id': 436,
-            'text': ' this',
-          }),
-          dict({
-            'id': 3159,
-            'text': ' word',
-          }),
-          dict({
-            'id': 32,
-            'text': '?',
-          }),
-          dict({
-            'id': 0,
-            'text': '<|endoftext|>',
-          }),
-          dict({
-            'id': 50281,
-            'text': '<|assistant|>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 510,
-            'special': False,
-            'text': 'The',
-          }),
-          dict({
-            'id': 3159,
-            'special': False,
-            'text': ' word',
-          }),
-          dict({
-            'id': 346,
-            'special': False,
-            'text': ' "',
-          }),
-          dict({
-            'id': 6441,
-            'special': False,
-            'text': 'mem',
-          }),
-          dict({
-            'id': 70,
-            'special': False,
-            'text': 'e',
-          }),
-          dict({
-            'id': 3,
-            'special': False,
-            'text': '"',
-          }),
-          dict({
-            'id': 369,
-            'special': False,
-            'text': ' was',
-          }),
-          dict({
-            'id': 806,
-            'special': False,
-            'text': ' first',
-          }),
-          dict({
-            'id': 908,
-            'special': False,
-            'text': ' used',
-          }),
-          dict({
-            'id': 275,
-            'special': False,
-            'text': ' in',
-          }),
-        ]),
-      }),
-      'generated_text': 'The word "meme" was first used in',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 50278,
-            'text': '<|prompter|>',
-          }),
-          dict({
-            'id': 1276,
-            'text': 'What',
-          }),
-          dict({
-            'id': 310,
-            'text': ' is',
-          }),
-          dict({
-            'id': 247,
-            'text': ' a',
-          }),
-          dict({
-            'id': 1167,
-            'text': ' mem',
-          }),
-          dict({
-            'id': 70,
-            'text': 'e',
-          }),
-          dict({
-            'id': 13,
-            'text': ',',
-          }),
-          dict({
-            'id': 285,
-            'text': ' and',
-          }),
-          dict({
-            'id': 752,
-            'text': ' what',
-          }),
-          dict({
-            'id': 434,
-            'text': "'s",
-          }),
-          dict({
-            'id': 253,
-            'text': ' the',
-          }),
-          dict({
-            'id': 2892,
-            'text': ' history',
-          }),
-          dict({
-            'id': 3212,
-            'text': ' behind',
-          }),
-          dict({
-            'id': 436,
-            'text': ' this',
-          }),
-          dict({
-            'id': 3159,
-            'text': ' word',
-          }),
-          dict({
-            'id': 32,
-            'text': '?',
-          }),
-          dict({
-            'id': 0,
-            'text': '<|endoftext|>',
-          }),
-          dict({
-            'id': 50281,
-            'text': '<|assistant|>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 510,
-            'special': False,
-            'text': 'The',
-          }),
-          dict({
-            'id': 3159,
-            'special': False,
-            'text': ' word',
-          }),
-          dict({
-            'id': 346,
-            'special': False,
-            'text': ' "',
-          }),
-          dict({
-            'id': 6441,
-            'special': False,
-            'text': 'mem',
-          }),
-          dict({
-            'id': 70,
-            'special': False,
-            'text': 'e',
-          }),
-          dict({
-            'id': 3,
-            'special': False,
-            'text': '"',
-          }),
-          dict({
-            'id': 369,
-            'special': False,
-            'text': ' was',
-          }),
-          dict({
-            'id': 806,
-            'special': False,
-            'text': ' first',
-          }),
-          dict({
-            'id': 908,
-            'special': False,
-            'text': ' used',
-          }),
-          dict({
-            'id': 275,
-            'special': False,
-            'text': ' in',
-          }),
-        ]),
-      }),
-      'generated_text': 'The word "meme" was first used in',
-    }),
-  ])
-# ---
diff --git a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json
new file mode 100644
index 00000000..787704ce
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json
@@ -0,0 +1,163 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 50278,
+        "logprob": null,
+        "text": "<|prompter|>"
+      },
+      {
+        "id": 1276,
+        "logprob": -8.03125,
+        "text": "What"
+      },
+      {
+        "id": 310,
+        "logprob": -5.421875,
+        "text": " is"
+      },
+      {
+        "id": 247,
+        "logprob": -2.1601562,
+        "text": " a"
+      },
+      {
+        "id": 1167,
+        "logprob": -5.4609375,
+        "text": " mem"
+      },
+      {
+        "id": 70,
+        "logprob": -0.005657196,
+        "text": "e"
+      },
+      {
+        "id": 13,
+        "logprob": -7.28125,
+        "text": ","
+      },
+      {
+        "id": 285,
+        "logprob": -0.2980957,
+        "text": " and"
+      },
+      {
+        "id": 752,
+        "logprob": -2.1679688,
+        "text": " what"
+      },
+      {
+        "id": 434,
+        "logprob": -5.6210938,
+        "text": "'s"
+      },
+      {
+        "id": 253,
+        "logprob": -0.81103516,
+        "text": " the"
+      },
+      {
+        "id": 2892,
+        "logprob": -6.6640625,
+        "text": " history"
+      },
+      {
+        "id": 3212,
+        "logprob": -2.265625,
+        "text": " behind"
+      },
+      {
+        "id": 436,
+        "logprob": -11.5078125,
+        "text": " this"
+      },
+      {
+        "id": 3159,
+        "logprob": -2.1582031,
+        "text": " word"
+      },
+      {
+        "id": 32,
+        "logprob": -0.008720398,
+        "text": "?"
+      },
+      {
+        "id": 0,
+        "logprob": -2.4726562,
+        "text": "<|endoftext|>"
+      },
+      {
+        "id": 50281,
+        "logprob": -18.265625,
+        "text": "<|assistant|>"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 510,
+        "logprob": -0.63183594,
+        "special": false,
+        "text": "The"
+      },
+      {
+        "id": 3159,
+        "logprob": -0.5390625,
+        "special": false,
+        "text": " word"
+      },
+      {
+        "id": 346,
+        "logprob": -0.045684814,
+        "special": false,
+        "text": " \""
+      },
+      {
+        "id": 6441,
+        "logprob": -0.002090454,
+        "special": false,
+        "text": "mem"
+      },
+      {
+        "id": 70,
+        "logprob": -1.3589859e-05,
+        "special": false,
+        "text": "e"
+      },
+      {
+        "id": 3,
+        "logprob": -0.0009455681,
+        "special": false,
+        "text": "\""
+      },
+      {
+        "id": 369,
+        "logprob": -0.088012695,
+        "special": false,
+        "text": " was"
+      },
+      {
+        "id": 806,
+        "logprob": -0.12585449,
+        "special": false,
+        "text": " first"
+      },
+      {
+        "id": 908,
+        "logprob": -0.017196655,
+        "special": false,
+        "text": " used"
+      },
+      {
+        "id": 275,
+        "logprob": -0.49731445,
+        "special": false,
+        "text": " in"
+      }
+    ]
+  },
+  "generated_text": "The word \"meme\" was first used in"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json
new file mode 100644
index 00000000..47d6a77e
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json
@@ -0,0 +1,654 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 50278,
+          "logprob": null,
+          "text": "<|prompter|>"
+        },
+        {
+          "id": 1276,
+          "logprob": -8.03125,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -5.421875,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -2.1601562,
+          "text": " a"
+        },
+        {
+          "id": 1167,
+          "logprob": -5.4609375,
+          "text": " mem"
+        },
+        {
+          "id": 70,
+          "logprob": -0.005657196,
+          "text": "e"
+        },
+        {
+          "id": 13,
+          "logprob": -7.28125,
+          "text": ","
+        },
+        {
+          "id": 285,
+          "logprob": -0.2980957,
+          "text": " and"
+        },
+        {
+          "id": 752,
+          "logprob": -2.1679688,
+          "text": " what"
+        },
+        {
+          "id": 434,
+          "logprob": -5.6210938,
+          "text": "'s"
+        },
+        {
+          "id": 253,
+          "logprob": -0.81103516,
+          "text": " the"
+        },
+        {
+          "id": 2892,
+          "logprob": -6.6640625,
+          "text": " history"
+        },
+        {
+          "id": 3212,
+          "logprob": -2.265625,
+          "text": " behind"
+        },
+        {
+          "id": 436,
+          "logprob": -11.5078125,
+          "text": " this"
+        },
+        {
+          "id": 3159,
+          "logprob": -2.1582031,
+          "text": " word"
+        },
+        {
+          "id": 32,
+          "logprob": -0.008720398,
+          "text": "?"
+        },
+        {
+          "id": 0,
+          "logprob": -2.4726562,
+          "text": "<|endoftext|>"
+        },
+        {
+          "id": 50281,
+          "logprob": -18.265625,
+          "text": "<|assistant|>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 510,
+          "logprob": -0.63183594,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 3159,
+          "logprob": -0.5488281,
+          "special": false,
+          "text": " word"
+        },
+        {
+          "id": 346,
+          "logprob": -0.045684814,
+          "special": false,
+          "text": " \""
+        },
+        {
+          "id": 6441,
+          "logprob": -0.00207901,
+          "special": false,
+          "text": "mem"
+        },
+        {
+          "id": 70,
+          "logprob": -1.335144e-05,
+          "special": false,
+          "text": "e"
+        },
+        {
+          "id": 3,
+          "logprob": -0.00097227097,
+          "special": false,
+          "text": "\""
+        },
+        {
+          "id": 369,
+          "logprob": -0.0892334,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 806,
+          "logprob": -0.12463379,
+          "special": false,
+          "text": " first"
+        },
+        {
+          "id": 908,
+          "logprob": -0.01737976,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 275,
+          "logprob": -0.50341797,
+          "special": false,
+          "text": " in"
+        }
+      ]
+    },
+    "generated_text": "The word \"meme\" was first used in"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 50278,
+          "logprob": null,
+          "text": "<|prompter|>"
+        },
+        {
+          "id": 1276,
+          "logprob": -8.03125,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -5.421875,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -2.1601562,
+          "text": " a"
+        },
+        {
+          "id": 1167,
+          "logprob": -5.4609375,
+          "text": " mem"
+        },
+        {
+          "id": 70,
+          "logprob": -0.005657196,
+          "text": "e"
+        },
+        {
+          "id": 13,
+          "logprob": -7.28125,
+          "text": ","
+        },
+        {
+          "id": 285,
+          "logprob": -0.2980957,
+          "text": " and"
+        },
+        {
+          "id": 752,
+          "logprob": -2.1679688,
+          "text": " what"
+        },
+        {
+          "id": 434,
+          "logprob": -5.6210938,
+          "text": "'s"
+        },
+        {
+          "id": 253,
+          "logprob": -0.81103516,
+          "text": " the"
+        },
+        {
+          "id": 2892,
+          "logprob": -6.6640625,
+          "text": " history"
+        },
+        {
+          "id": 3212,
+          "logprob": -2.265625,
+          "text": " behind"
+        },
+        {
+          "id": 436,
+          "logprob": -11.5078125,
+          "text": " this"
+        },
+        {
+          "id": 3159,
+          "logprob": -2.1582031,
+          "text": " word"
+        },
+        {
+          "id": 32,
+          "logprob": -0.008720398,
+          "text": "?"
+        },
+        {
+          "id": 0,
+          "logprob": -2.4726562,
+          "text": "<|endoftext|>"
+        },
+        {
+          "id": 50281,
+          "logprob": -18.265625,
+          "text": "<|assistant|>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 510,
+          "logprob": -0.63183594,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 3159,
+          "logprob": -0.5488281,
+          "special": false,
+          "text": " word"
+        },
+        {
+          "id": 346,
+          "logprob": -0.045684814,
+          "special": false,
+          "text": " \""
+        },
+        {
+          "id": 6441,
+          "logprob": -0.00207901,
+          "special": false,
+          "text": "mem"
+        },
+        {
+          "id": 70,
+          "logprob": -1.335144e-05,
+          "special": false,
+          "text": "e"
+        },
+        {
+          "id": 3,
+          "logprob": -0.00097227097,
+          "special": false,
+          "text": "\""
+        },
+        {
+          "id": 369,
+          "logprob": -0.0892334,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 806,
+          "logprob": -0.12463379,
+          "special": false,
+          "text": " first"
+        },
+        {
+          "id": 908,
+          "logprob": -0.01737976,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 275,
+          "logprob": -0.50341797,
+          "special": false,
+          "text": " in"
+        }
+      ]
+    },
+    "generated_text": "The word \"meme\" was first used in"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 50278,
+          "logprob": null,
+          "text": "<|prompter|>"
+        },
+        {
+          "id": 1276,
+          "logprob": -8.03125,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -5.421875,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -2.1601562,
+          "text": " a"
+        },
+        {
+          "id": 1167,
+          "logprob": -5.4609375,
+          "text": " mem"
+        },
+        {
+          "id": 70,
+          "logprob": -0.005657196,
+          "text": "e"
+        },
+        {
+          "id": 13,
+          "logprob": -7.28125,
+          "text": ","
+        },
+        {
+          "id": 285,
+          "logprob": -0.2980957,
+          "text": " and"
+        },
+        {
+          "id": 752,
+          "logprob": -2.1679688,
+          "text": " what"
+        },
+        {
+          "id": 434,
+          "logprob": -5.6210938,
+          "text": "'s"
+        },
+        {
+          "id": 253,
+          "logprob": -0.81103516,
+          "text": " the"
+        },
+        {
+          "id": 2892,
+          "logprob": -6.6640625,
+          "text": " history"
+        },
+        {
+          "id": 3212,
+          "logprob": -2.265625,
+          "text": " behind"
+        },
+        {
+          "id": 436,
+          "logprob": -11.5078125,
+          "text": " this"
+        },
+        {
+          "id": 3159,
+          "logprob": -2.1582031,
+          "text": " word"
+        },
+        {
+          "id": 32,
+          "logprob": -0.008720398,
+          "text": "?"
+        },
+        {
+          "id": 0,
+          "logprob": -2.4726562,
+          "text": "<|endoftext|>"
+        },
+        {
+          "id": 50281,
+          "logprob": -18.265625,
+          "text": "<|assistant|>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 510,
+          "logprob": -0.63183594,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 3159,
+          "logprob": -0.5488281,
+          "special": false,
+          "text": " word"
+        },
+        {
+          "id": 346,
+          "logprob": -0.045684814,
+          "special": false,
+          "text": " \""
+        },
+        {
+          "id": 6441,
+          "logprob": -0.00207901,
+          "special": false,
+          "text": "mem"
+        },
+        {
+          "id": 70,
+          "logprob": -1.335144e-05,
+          "special": false,
+          "text": "e"
+        },
+        {
+          "id": 3,
+          "logprob": -0.00097227097,
+          "special": false,
+          "text": "\""
+        },
+        {
+          "id": 369,
+          "logprob": -0.0892334,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 806,
+          "logprob": -0.12463379,
+          "special": false,
+          "text": " first"
+        },
+        {
+          "id": 908,
+          "logprob": -0.01737976,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 275,
+          "logprob": -0.50341797,
+          "special": false,
+          "text": " in"
+        }
+      ]
+    },
+    "generated_text": "The word \"meme\" was first used in"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 50278,
+          "logprob": null,
+          "text": "<|prompter|>"
+        },
+        {
+          "id": 1276,
+          "logprob": -8.03125,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -5.421875,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -2.1601562,
+          "text": " a"
+        },
+        {
+          "id": 1167,
+          "logprob": -5.4609375,
+          "text": " mem"
+        },
+        {
+          "id": 70,
+          "logprob": -0.005657196,
+          "text": "e"
+        },
+        {
+          "id": 13,
+          "logprob": -7.28125,
+          "text": ","
+        },
+        {
+          "id": 285,
+          "logprob": -0.2980957,
+          "text": " and"
+        },
+        {
+          "id": 752,
+          "logprob": -2.1679688,
+          "text": " what"
+        },
+        {
+          "id": 434,
+          "logprob": -5.6210938,
+          "text": "'s"
+        },
+        {
+          "id": 253,
+          "logprob": -0.81103516,
+          "text": " the"
+        },
+        {
+          "id": 2892,
+          "logprob": -6.6640625,
+          "text": " history"
+        },
+        {
+          "id": 3212,
+          "logprob": -2.265625,
+          "text": " behind"
+        },
+        {
+          "id": 436,
+          "logprob": -11.5078125,
+          "text": " this"
+        },
+        {
+          "id": 3159,
+          "logprob": -2.1582031,
+          "text": " word"
+        },
+        {
+          "id": 32,
+          "logprob": -0.008720398,
+          "text": "?"
+        },
+        {
+          "id": 0,
+          "logprob": -2.4726562,
+          "text": "<|endoftext|>"
+        },
+        {
+          "id": 50281,
+          "logprob": -18.265625,
+          "text": "<|assistant|>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 510,
+          "logprob": -0.63183594,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 3159,
+          "logprob": -0.5488281,
+          "special": false,
+          "text": " word"
+        },
+        {
+          "id": 346,
+          "logprob": -0.045684814,
+          "special": false,
+          "text": " \""
+        },
+        {
+          "id": 6441,
+          "logprob": -0.00207901,
+          "special": false,
+          "text": "mem"
+        },
+        {
+          "id": 70,
+          "logprob": -1.335144e-05,
+          "special": false,
+          "text": "e"
+        },
+        {
+          "id": 3,
+          "logprob": -0.00097227097,
+          "special": false,
+          "text": "\""
+        },
+        {
+          "id": 369,
+          "logprob": -0.0892334,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 806,
+          "logprob": -0.12463379,
+          "special": false,
+          "text": " first"
+        },
+        {
+          "id": 908,
+          "logprob": -0.01737976,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 275,
+          "logprob": -0.50341797,
+          "special": false,
+          "text": " in"
+        }
+      ]
+    },
+    "generated_text": "The word \"meme\" was first used in"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_santacoder.ambr b/integration-tests/models/__snapshots__/test_flash_santacoder.ambr
deleted file mode 100644
index 030820cb..00000000
--- a/integration-tests/models/__snapshots__/test_flash_santacoder.ambr
+++ /dev/null
@@ -1,472 +0,0 @@
-# serializer version: 1
-# name: test_flash_santacoder
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 563,
-          'text': 'def',
-        }),
-        dict({
-          'id': 942,
-          'text': ' print',
-        }),
-        dict({
-          'id': 62,
-          'text': '_',
-        }),
-        dict({
-          'id': 7196,
-          'text': 'hello',
-        }),
-      ]),
-      'seed': None,
-      'tokens': list([
-        dict({
-          'id': 1241,
-          'special': False,
-          'text': '():',
-        }),
-        dict({
-          'id': 258,
-          'special': False,
-          'text': '''
-            
-               
-          ''',
-        }),
-        dict({
-          'id': 942,
-          'special': False,
-          'text': ' print',
-        }),
-        dict({
-          'id': 372,
-          'special': False,
-          'text': '("',
-        }),
-        dict({
-          'id': 7371,
-          'special': False,
-          'text': 'Hello',
-        }),
-        dict({
-          'id': 9956,
-          'special': False,
-          'text': ' World',
-        }),
-        dict({
-          'id': 8657,
-          'special': False,
-          'text': '!")',
-        }),
-        dict({
-          'id': 185,
-          'special': False,
-          'text': '''
-            
-  
-          ''',
-        }),
-        dict({
-          'id': 185,
-          'special': False,
-          'text': '''
-            
-  
-          ''',
-        }),
-        dict({
-          'id': 1018,
-          'special': False,
-          'text': 'print',
-        }),
-      ]),
-    }),
-    'generated_text': '''
-      ():
-          print("Hello World!")
-      
-      print
-    ''',
-  })
-# ---
-# name: test_flash_santacoder_load
-  list([
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 563,
-            'text': 'def',
-          }),
-          dict({
-            'id': 942,
-            'text': ' print',
-          }),
-          dict({
-            'id': 62,
-            'text': '_',
-          }),
-          dict({
-            'id': 7196,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 1241,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 258,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 942,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 372,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 7371,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 9956,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 8657,
-            'special': False,
-            'text': '!")',
-          }),
-          dict({
-            'id': 185,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 185,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 1018,
-            'special': False,
-            'text': 'print',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World!")
-        
-        print
-      ''',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 563,
-            'text': 'def',
-          }),
-          dict({
-            'id': 942,
-            'text': ' print',
-          }),
-          dict({
-            'id': 62,
-            'text': '_',
-          }),
-          dict({
-            'id': 7196,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 1241,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 258,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 942,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 372,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 7371,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 9956,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 8657,
-            'special': False,
-            'text': '!")',
-          }),
-          dict({
-            'id': 185,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 185,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 1018,
-            'special': False,
-            'text': 'print',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World!")
-        
-        print
-      ''',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 563,
-            'text': 'def',
-          }),
-          dict({
-            'id': 942,
-            'text': ' print',
-          }),
-          dict({
-            'id': 62,
-            'text': '_',
-          }),
-          dict({
-            'id': 7196,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 1241,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 258,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 942,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 372,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 7371,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 9956,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 8657,
-            'special': False,
-            'text': '!")',
-          }),
-          dict({
-            'id': 185,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 185,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 1018,
-            'special': False,
-            'text': 'print',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World!")
-        
-        print
-      ''',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 563,
-            'text': 'def',
-          }),
-          dict({
-            'id': 942,
-            'text': ' print',
-          }),
-          dict({
-            'id': 62,
-            'text': '_',
-          }),
-          dict({
-            'id': 7196,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 1241,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 258,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 942,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 372,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 7371,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 9956,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 8657,
-            'special': False,
-            'text': '!")',
-          }),
-          dict({
-            'id': 185,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 185,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 1018,
-            'special': False,
-            'text': 'print',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World!")
-        
-        print
-      ''',
-    }),
-  ])
-# ---
diff --git a/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json b/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json
new file mode 100644
index 00000000..0293e35a
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json
@@ -0,0 +1,93 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 563,
+        "logprob": null,
+        "text": "def"
+      },
+      {
+        "id": 942,
+        "logprob": -5.1367188,
+        "text": " print"
+      },
+      {
+        "id": 62,
+        "logprob": -0.24450684,
+        "text": "_"
+      },
+      {
+        "id": 7196,
+        "logprob": -6.9609375,
+        "text": "hello"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 1241,
+        "logprob": -0.9863281,
+        "special": false,
+        "text": "():"
+      },
+      {
+        "id": 258,
+        "logprob": -0.21447754,
+        "special": false,
+        "text": "\n   "
+      },
+      {
+        "id": 942,
+        "logprob": -0.43701172,
+        "special": false,
+        "text": " print"
+      },
+      {
+        "id": 372,
+        "logprob": -0.5361328,
+        "special": false,
+        "text": "(\""
+      },
+      {
+        "id": 7371,
+        "logprob": -0.44555664,
+        "special": false,
+        "text": "Hello"
+      },
+      {
+        "id": 9956,
+        "logprob": -1.2412109,
+        "special": false,
+        "text": " World"
+      },
+      {
+        "id": 8657,
+        "logprob": -0.7583008,
+        "special": false,
+        "text": "!\")"
+      },
+      {
+        "id": 185,
+        "logprob": -0.76171875,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 185,
+        "logprob": -0.20837402,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 1018,
+        "logprob": -1.2470703,
+        "special": false,
+        "text": "print"
+      }
+    ]
+  },
+  "generated_text": "():\n    print(\"Hello World!\")\n\nprint"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json b/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json
new file mode 100644
index 00000000..a03580b3
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json
@@ -0,0 +1,374 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 563,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 942,
+          "logprob": -5.1367188,
+          "text": " print"
+        },
+        {
+          "id": 62,
+          "logprob": -0.24450684,
+          "text": "_"
+        },
+        {
+          "id": 7196,
+          "logprob": -6.9609375,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1241,
+          "logprob": -0.9863281,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 258,
+          "logprob": -0.21362305,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 942,
+          "logprob": -0.44360352,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 372,
+          "logprob": -0.54248047,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 7371,
+          "logprob": -0.44555664,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 9956,
+          "logprob": -1.2441406,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 8657,
+          "logprob": -0.75878906,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 185,
+          "logprob": -0.76171875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 185,
+          "logprob": -0.2084961,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 1018,
+          "logprob": -1.2460938,
+          "special": false,
+          "text": "print"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\nprint"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 563,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 942,
+          "logprob": -5.1367188,
+          "text": " print"
+        },
+        {
+          "id": 62,
+          "logprob": -0.24450684,
+          "text": "_"
+        },
+        {
+          "id": 7196,
+          "logprob": -6.9609375,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1241,
+          "logprob": -0.9863281,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 258,
+          "logprob": -0.21362305,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 942,
+          "logprob": -0.44360352,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 372,
+          "logprob": -0.54248047,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 7371,
+          "logprob": -0.44555664,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 9956,
+          "logprob": -1.2441406,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 8657,
+          "logprob": -0.75878906,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 185,
+          "logprob": -0.76171875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 185,
+          "logprob": -0.2084961,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 1018,
+          "logprob": -1.2460938,
+          "special": false,
+          "text": "print"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\nprint"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 563,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 942,
+          "logprob": -5.1367188,
+          "text": " print"
+        },
+        {
+          "id": 62,
+          "logprob": -0.24450684,
+          "text": "_"
+        },
+        {
+          "id": 7196,
+          "logprob": -6.9609375,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1241,
+          "logprob": -0.9863281,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 258,
+          "logprob": -0.21362305,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 942,
+          "logprob": -0.44360352,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 372,
+          "logprob": -0.54248047,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 7371,
+          "logprob": -0.44555664,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 9956,
+          "logprob": -1.2441406,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 8657,
+          "logprob": -0.75878906,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 185,
+          "logprob": -0.76171875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 185,
+          "logprob": -0.2084961,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 1018,
+          "logprob": -1.2460938,
+          "special": false,
+          "text": "print"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\nprint"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 563,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 942,
+          "logprob": -5.1367188,
+          "text": " print"
+        },
+        {
+          "id": 62,
+          "logprob": -0.24450684,
+          "text": "_"
+        },
+        {
+          "id": 7196,
+          "logprob": -6.9609375,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1241,
+          "logprob": -0.9863281,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 258,
+          "logprob": -0.21362305,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 942,
+          "logprob": -0.44360352,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 372,
+          "logprob": -0.54248047,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 7371,
+          "logprob": -0.44555664,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 9956,
+          "logprob": -1.2441406,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 8657,
+          "logprob": -0.75878906,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 185,
+          "logprob": -0.76171875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 185,
+          "logprob": -0.2084961,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 1018,
+          "logprob": -1.2460938,
+          "special": false,
+          "text": "print"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\nprint"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder.ambr b/integration-tests/models/__snapshots__/test_flash_starcoder.ambr
deleted file mode 100644
index e0f4b568..00000000
--- a/integration-tests/models/__snapshots__/test_flash_starcoder.ambr
+++ /dev/null
@@ -1,573 +0,0 @@
-# serializer version: 1
-# name: test_flash_starcoder
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 589,
-          'text': 'def',
-        }),
-        dict({
-          'id': 1459,
-          'text': ' print',
-        }),
-        dict({
-          'id': 81,
-          'text': '_',
-        }),
-        dict({
-          'id': 7656,
-          'text': 'hello',
-        }),
-      ]),
-      'seed': None,
-      'tokens': list([
-        dict({
-          'id': 2262,
-          'special': False,
-          'text': '():',
-        }),
-        dict({
-          'id': 284,
-          'special': False,
-          'text': '''
-            
-               
-          ''',
-        }),
-        dict({
-          'id': 1459,
-          'special': False,
-          'text': ' print',
-        }),
-        dict({
-          'id': 440,
-          'special': False,
-          'text': '("',
-        }),
-        dict({
-          'id': 8279,
-          'special': False,
-          'text': 'Hello',
-        }),
-        dict({
-          'id': 10896,
-          'special': False,
-          'text': ' World',
-        }),
-        dict({
-          'id': 657,
-          'special': False,
-          'text': '")',
-        }),
-        dict({
-          'id': 203,
-          'special': False,
-          'text': '''
-            
-  
-          ''',
-        }),
-        dict({
-          'id': 203,
-          'special': False,
-          'text': '''
-            
-  
-          ''',
-        }),
-        dict({
-          'id': 589,
-          'special': False,
-          'text': 'def',
-        }),
-      ]),
-    }),
-    'generated_text': '''
-      ():
-          print("Hello World")
-      
-      def
-    ''',
-  })
-# ---
-# name: test_flash_starcoder_default_params
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-      'generated_tokens': 12,
-      'prefill': list([
-        dict({
-          'id': 589,
-          'text': 'def',
-        }),
-        dict({
-          'id': 1459,
-          'text': ' print',
-        }),
-        dict({
-          'id': 81,
-          'text': '_',
-        }),
-        dict({
-          'id': 7656,
-          'text': 'hello',
-        }),
-      ]),
-      'seed': 0,
-      'tokens': list([
-        dict({
-          'id': 2262,
-          'special': False,
-          'text': '():',
-        }),
-        dict({
-          'id': 284,
-          'special': False,
-          'text': '''
-            
-               
-          ''',
-        }),
-        dict({
-          'id': 5741,
-          'special': False,
-          'text': ' logging',
-        }),
-        dict({
-          'id': 32,
-          'special': False,
-          'text': '.',
-        }),
-        dict({
-          'id': 1338,
-          'special': False,
-          'text': 'info',
-        }),
-        dict({
-          'id': 463,
-          'special': False,
-          'text': "('",
-        }),
-        dict({
-          'id': 8279,
-          'special': False,
-          'text': 'Hello',
-        }),
-        dict({
-          'id': 30,
-          'special': False,
-          'text': ',',
-        }),
-        dict({
-          'id': 10896,
-          'special': False,
-          'text': ' World',
-        }),
-        dict({
-          'id': 683,
-          'special': False,
-          'text': "')",
-        }),
-        dict({
-          'id': 203,
-          'special': False,
-          'text': '''
-            
-  
-          ''',
-        }),
-        dict({
-          'id': 0,
-          'special': True,
-          'text': '<|endoftext|>',
-        }),
-      ]),
-    }),
-    'generated_text': '''
-      ():
-          logging.info('Hello, World')
-      <|endoftext|>
-    ''',
-  })
-# ---
-# name: test_flash_starcoder_load
-  list([
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 589,
-            'text': 'def',
-          }),
-          dict({
-            'id': 1459,
-            'text': ' print',
-          }),
-          dict({
-            'id': 81,
-            'text': '_',
-          }),
-          dict({
-            'id': 7656,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 2262,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 284,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 1459,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 440,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 8279,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 10896,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 657,
-            'special': False,
-            'text': '")',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 589,
-            'special': False,
-            'text': 'def',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World")
-        
-        def
-      ''',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 589,
-            'text': 'def',
-          }),
-          dict({
-            'id': 1459,
-            'text': ' print',
-          }),
-          dict({
-            'id': 81,
-            'text': '_',
-          }),
-          dict({
-            'id': 7656,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 2262,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 284,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 1459,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 440,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 8279,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 10896,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 657,
-            'special': False,
-            'text': '")',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 589,
-            'special': False,
-            'text': 'def',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World")
-        
-        def
-      ''',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 589,
-            'text': 'def',
-          }),
-          dict({
-            'id': 1459,
-            'text': ' print',
-          }),
-          dict({
-            'id': 81,
-            'text': '_',
-          }),
-          dict({
-            'id': 7656,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 2262,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 284,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 1459,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 440,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 8279,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 10896,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 657,
-            'special': False,
-            'text': '")',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 589,
-            'special': False,
-            'text': 'def',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World")
-        
-        def
-      ''',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 589,
-            'text': 'def',
-          }),
-          dict({
-            'id': 1459,
-            'text': ' print',
-          }),
-          dict({
-            'id': 81,
-            'text': '_',
-          }),
-          dict({
-            'id': 7656,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 2262,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 284,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 1459,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 440,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 8279,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 10896,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 657,
-            'special': False,
-            'text': '")',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 589,
-            'special': False,
-            'text': 'def',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World")
-        
-        def
-      ''',
-    }),
-  ])
-# ---
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json
new file mode 100644
index 00000000..8505c1db
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json
@@ -0,0 +1,93 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 589,
+        "logprob": null,
+        "text": "def"
+      },
+      {
+        "id": 1459,
+        "logprob": -5.6289062,
+        "text": " print"
+      },
+      {
+        "id": 81,
+        "logprob": -1.6005859,
+        "text": "_"
+      },
+      {
+        "id": 7656,
+        "logprob": -5.9921875,
+        "text": "hello"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 2262,
+        "logprob": -0.7705078,
+        "special": false,
+        "text": "():"
+      },
+      {
+        "id": 284,
+        "logprob": -0.2590332,
+        "special": false,
+        "text": "\n   "
+      },
+      {
+        "id": 1459,
+        "logprob": -0.39379883,
+        "special": false,
+        "text": " print"
+      },
+      {
+        "id": 440,
+        "logprob": -0.61376953,
+        "special": false,
+        "text": "(\""
+      },
+      {
+        "id": 8279,
+        "logprob": -0.47338867,
+        "special": false,
+        "text": "Hello"
+      },
+      {
+        "id": 10896,
+        "logprob": -1.5068359,
+        "special": false,
+        "text": " World"
+      },
+      {
+        "id": 657,
+        "logprob": -0.80810547,
+        "special": false,
+        "text": "\")"
+      },
+      {
+        "id": 203,
+        "logprob": -0.7397461,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 203,
+        "logprob": -0.35229492,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 589,
+        "logprob": -1.0371094,
+        "special": false,
+        "text": "def"
+      }
+    ]
+  },
+  "generated_text": "():\n    print(\"Hello World\")\n\ndef"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json
new file mode 100644
index 00000000..21bb509b
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json
@@ -0,0 +1,105 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "eos_token",
+    "generated_tokens": 12,
+    "prefill": [
+      {
+        "id": 589,
+        "logprob": null,
+        "text": "def"
+      },
+      {
+        "id": 1459,
+        "logprob": -5.6289062,
+        "text": " print"
+      },
+      {
+        "id": 81,
+        "logprob": -1.6005859,
+        "text": "_"
+      },
+      {
+        "id": 7656,
+        "logprob": -5.9921875,
+        "text": "hello"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 2262,
+        "logprob": -0.7451172,
+        "special": false,
+        "text": "():"
+      },
+      {
+        "id": 284,
+        "logprob": -0.21325684,
+        "special": false,
+        "text": "\n   "
+      },
+      {
+        "id": 5741,
+        "logprob": -5.734375,
+        "special": false,
+        "text": " logging"
+      },
+      {
+        "id": 32,
+        "logprob": 0.0,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 1338,
+        "logprob": -0.3232422,
+        "special": false,
+        "text": "info"
+      },
+      {
+        "id": 463,
+        "logprob": -1.0380859,
+        "special": false,
+        "text": "('"
+      },
+      {
+        "id": 8279,
+        "logprob": -0.8378906,
+        "special": false,
+        "text": "Hello"
+      },
+      {
+        "id": 30,
+        "logprob": -1.9501953,
+        "special": false,
+        "text": ","
+      },
+      {
+        "id": 10896,
+        "logprob": -1.3476562,
+        "special": false,
+        "text": " World"
+      },
+      {
+        "id": 683,
+        "logprob": -1.796875,
+        "special": false,
+        "text": "')"
+      },
+      {
+        "id": 203,
+        "logprob": -0.9873047,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 0,
+        "logprob": -0.7495117,
+        "special": true,
+        "text": "<|endoftext|>"
+      }
+    ]
+  },
+  "generated_text": "():\n    logging.info('Hello, World')\n<|endoftext|>"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json
new file mode 100644
index 00000000..0b3ad554
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json
@@ -0,0 +1,374 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 589,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1459,
+          "logprob": -5.6289062,
+          "text": " print"
+        },
+        {
+          "id": 81,
+          "logprob": -1.6005859,
+          "text": "_"
+        },
+        {
+          "id": 7656,
+          "logprob": -5.9921875,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2262,
+          "logprob": -0.7705078,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 284,
+          "logprob": -0.2602539,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1459,
+          "logprob": -0.39282227,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 440,
+          "logprob": -0.6113281,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8279,
+          "logprob": -0.4765625,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10896,
+          "logprob": -1.5068359,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 657,
+          "logprob": -0.8154297,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 203,
+          "logprob": -0.7319336,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 203,
+          "logprob": -0.35229492,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 589,
+          "logprob": -1.0380859,
+          "special": false,
+          "text": "def"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World\")\n\ndef"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 589,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1459,
+          "logprob": -5.6289062,
+          "text": " print"
+        },
+        {
+          "id": 81,
+          "logprob": -1.6005859,
+          "text": "_"
+        },
+        {
+          "id": 7656,
+          "logprob": -5.9921875,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2262,
+          "logprob": -0.7705078,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 284,
+          "logprob": -0.2602539,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1459,
+          "logprob": -0.39282227,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 440,
+          "logprob": -0.6113281,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8279,
+          "logprob": -0.4765625,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10896,
+          "logprob": -1.5068359,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 657,
+          "logprob": -0.8154297,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 203,
+          "logprob": -0.7319336,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 203,
+          "logprob": -0.35229492,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 589,
+          "logprob": -1.0380859,
+          "special": false,
+          "text": "def"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World\")\n\ndef"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 589,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1459,
+          "logprob": -5.6289062,
+          "text": " print"
+        },
+        {
+          "id": 81,
+          "logprob": -1.6005859,
+          "text": "_"
+        },
+        {
+          "id": 7656,
+          "logprob": -5.9921875,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2262,
+          "logprob": -0.7705078,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 284,
+          "logprob": -0.2602539,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1459,
+          "logprob": -0.39282227,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 440,
+          "logprob": -0.6113281,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8279,
+          "logprob": -0.4765625,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10896,
+          "logprob": -1.5068359,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 657,
+          "logprob": -0.8154297,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 203,
+          "logprob": -0.7319336,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 203,
+          "logprob": -0.35229492,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 589,
+          "logprob": -1.0380859,
+          "special": false,
+          "text": "def"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World\")\n\ndef"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 589,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1459,
+          "logprob": -5.6289062,
+          "text": " print"
+        },
+        {
+          "id": 81,
+          "logprob": -1.6005859,
+          "text": "_"
+        },
+        {
+          "id": 7656,
+          "logprob": -5.9921875,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2262,
+          "logprob": -0.7705078,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 284,
+          "logprob": -0.2602539,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1459,
+          "logprob": -0.39282227,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 440,
+          "logprob": -0.6113281,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8279,
+          "logprob": -0.4765625,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10896,
+          "logprob": -1.5068359,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 657,
+          "logprob": -0.8154297,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 203,
+          "logprob": -0.7319336,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 203,
+          "logprob": -0.35229492,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 589,
+          "logprob": -1.0380859,
+          "special": false,
+          "text": "def"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World\")\n\ndef"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_mt0_base.ambr b/integration-tests/models/__snapshots__/test_mt0_base.ambr
deleted file mode 100644
index d7c6eaf6..00000000
--- a/integration-tests/models/__snapshots__/test_mt0_base.ambr
+++ /dev/null
@@ -1,306 +0,0 @@
-# serializer version: 1
-# name: test_mt0_base
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-      'generated_tokens': 5,
-      'prefill': list([
-        dict({
-          'id': 0,
-          'text': '<pad>',
-        }),
-      ]),
-      'seed': 0,
-      'tokens': list([
-        dict({
-          'id': 926,
-          'special': False,
-          'text': 'To',
-        }),
-        dict({
-          'id': 18295,
-          'special': False,
-          'text': ' sell',
-        }),
-        dict({
-          'id': 7868,
-          'special': False,
-          'text': ' things',
-        }),
-        dict({
-          'id': 260,
-          'special': False,
-          'text': '.',
-        }),
-        dict({
-          'id': 1,
-          'special': True,
-          'text': '</s>',
-        }),
-      ]),
-    }),
-    'generated_text': 'To sell things.',
-  })
-# ---
-# name: test_mt0_base_all_params
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 0,
-          'text': '<pad>',
-        }),
-      ]),
-      'seed': 0,
-      'tokens': list([
-        dict({
-          'id': 16017,
-          'special': False,
-          'text': 'blue',
-        }),
-        dict({
-          'id': 20495,
-          'special': False,
-          'text': ' sky',
-        }),
-        dict({
-          'id': 259,
-          'special': False,
-          'text': ' ',
-        }),
-        dict({
-          'id': 15484,
-          'special': False,
-          'text': 'appear',
-        }),
-        dict({
-          'id': 345,
-          'special': False,
-          'text': 'ed',
-        }),
-        dict({
-          'id': 288,
-          'special': False,
-          'text': ' to',
-        }),
-        dict({
-          'id': 35622,
-          'special': False,
-          'text': ' cloud',
-        }),
-        dict({
-          'id': 263,
-          'special': False,
-          'text': 's',
-        }),
-        dict({
-          'id': 14701,
-          'special': False,
-          'text': ' above',
-        }),
-        dict({
-          'id': 751,
-          'special': False,
-          'text': ' all',
-        }),
-      ]),
-    }),
-    'generated_text': 'Why is the sky blue?blue sky appeared to clouds above all',
-  })
-# ---
-# name: test_mt0_base_load
-  list([
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-        'generated_tokens': 6,
-        'prefill': list([
-          dict({
-            'id': 0,
-            'text': '<pad>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 259,
-            'special': False,
-            'text': '',
-          }),
-          dict({
-            'id': 39261,
-            'special': False,
-            'text': 'Because',
-          }),
-          dict({
-            'id': 609,
-            'special': False,
-            'text': ' it',
-          }),
-          dict({
-            'id': 339,
-            'special': False,
-            'text': ' is',
-          }),
-          dict({
-            'id': 16017,
-            'special': False,
-            'text': ' blue',
-          }),
-          dict({
-            'id': 1,
-            'special': True,
-            'text': '</s>',
-          }),
-        ]),
-      }),
-      'generated_text': 'Because it is blue',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-        'generated_tokens': 6,
-        'prefill': list([
-          dict({
-            'id': 0,
-            'text': '<pad>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 259,
-            'special': False,
-            'text': '',
-          }),
-          dict({
-            'id': 39261,
-            'special': False,
-            'text': 'Because',
-          }),
-          dict({
-            'id': 609,
-            'special': False,
-            'text': ' it',
-          }),
-          dict({
-            'id': 339,
-            'special': False,
-            'text': ' is',
-          }),
-          dict({
-            'id': 16017,
-            'special': False,
-            'text': ' blue',
-          }),
-          dict({
-            'id': 1,
-            'special': True,
-            'text': '</s>',
-          }),
-        ]),
-      }),
-      'generated_text': 'Because it is blue',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-        'generated_tokens': 6,
-        'prefill': list([
-          dict({
-            'id': 0,
-            'text': '<pad>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 259,
-            'special': False,
-            'text': '',
-          }),
-          dict({
-            'id': 39261,
-            'special': False,
-            'text': 'Because',
-          }),
-          dict({
-            'id': 609,
-            'special': False,
-            'text': ' it',
-          }),
-          dict({
-            'id': 339,
-            'special': False,
-            'text': ' is',
-          }),
-          dict({
-            'id': 16017,
-            'special': False,
-            'text': ' blue',
-          }),
-          dict({
-            'id': 1,
-            'special': True,
-            'text': '</s>',
-          }),
-        ]),
-      }),
-      'generated_text': 'Because it is blue',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-        'generated_tokens': 6,
-        'prefill': list([
-          dict({
-            'id': 0,
-            'text': '<pad>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 259,
-            'special': False,
-            'text': '',
-          }),
-          dict({
-            'id': 39261,
-            'special': False,
-            'text': 'Because',
-          }),
-          dict({
-            'id': 609,
-            'special': False,
-            'text': ' it',
-          }),
-          dict({
-            'id': 339,
-            'special': False,
-            'text': ' is',
-          }),
-          dict({
-            'id': 16017,
-            'special': False,
-            'text': ' blue',
-          }),
-          dict({
-            'id': 1,
-            'special': True,
-            'text': '</s>',
-          }),
-        ]),
-      }),
-      'generated_text': 'Because it is blue',
-    }),
-  ])
-# ---
diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json
new file mode 100644
index 00000000..2a26e3db
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json
@@ -0,0 +1,48 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "eos_token",
+    "generated_tokens": 5,
+    "prefill": [
+      {
+        "id": 0,
+        "logprob": null,
+        "text": "<pad>"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 926,
+        "logprob": -4.3554688,
+        "special": false,
+        "text": "To"
+      },
+      {
+        "id": 18295,
+        "logprob": -7.7734375,
+        "special": false,
+        "text": " sell"
+      },
+      {
+        "id": 7868,
+        "logprob": -3.9257812,
+        "special": false,
+        "text": " things"
+      },
+      {
+        "id": 260,
+        "logprob": -2.4179688,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 1,
+        "logprob": 0.0,
+        "special": true,
+        "text": "</s>"
+      }
+    ]
+  },
+  "generated_text": "To sell things."
+}
diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json
new file mode 100644
index 00000000..fd77252d
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json
@@ -0,0 +1,78 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 0,
+        "logprob": null,
+        "text": "<pad>"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 16017,
+        "logprob": -1.3505859,
+        "special": false,
+        "text": "blue"
+      },
+      {
+        "id": 20495,
+        "logprob": -0.50439453,
+        "special": false,
+        "text": " sky"
+      },
+      {
+        "id": 259,
+        "logprob": -1.2011719,
+        "special": false,
+        "text": " "
+      },
+      {
+        "id": 15484,
+        "logprob": -2.8378906,
+        "special": false,
+        "text": "appear"
+      },
+      {
+        "id": 345,
+        "logprob": -0.87597656,
+        "special": false,
+        "text": "ed"
+      },
+      {
+        "id": 288,
+        "logprob": -1.8447266,
+        "special": false,
+        "text": " to"
+      },
+      {
+        "id": 35622,
+        "logprob": -7.1445312,
+        "special": false,
+        "text": " cloud"
+      },
+      {
+        "id": 263,
+        "logprob": -1.2929688,
+        "special": false,
+        "text": "s"
+      },
+      {
+        "id": 14701,
+        "logprob": -3.0761719,
+        "special": false,
+        "text": " above"
+      },
+      {
+        "id": 751,
+        "logprob": -4.4375,
+        "special": false,
+        "text": " all"
+      }
+    ]
+  },
+  "generated_text": "Why is the sky blue?blue sky appeared to clouds above all"
+}
diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json
new file mode 100644
index 00000000..c9e552b6
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json
@@ -0,0 +1,218 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "eos_token",
+      "generated_tokens": 6,
+      "prefill": [
+        {
+          "id": 0,
+          "logprob": null,
+          "text": "<pad>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 259,
+          "logprob": -1.3789062,
+          "special": false,
+          "text": ""
+        },
+        {
+          "id": 39261,
+          "logprob": -0.36279297,
+          "special": false,
+          "text": "Because"
+        },
+        {
+          "id": 609,
+          "logprob": -1.0966797,
+          "special": false,
+          "text": " it"
+        },
+        {
+          "id": 339,
+          "logprob": -0.8276367,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 16017,
+          "logprob": -1.6845703,
+          "special": false,
+          "text": " blue"
+        },
+        {
+          "id": 1,
+          "logprob": -0.72753906,
+          "special": true,
+          "text": "</s>"
+        }
+      ]
+    },
+    "generated_text": "Because it is blue"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "eos_token",
+      "generated_tokens": 6,
+      "prefill": [
+        {
+          "id": 0,
+          "logprob": null,
+          "text": "<pad>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 259,
+          "logprob": -1.3798828,
+          "special": false,
+          "text": ""
+        },
+        {
+          "id": 39261,
+          "logprob": -0.36328125,
+          "special": false,
+          "text": "Because"
+        },
+        {
+          "id": 609,
+          "logprob": -1.0947266,
+          "special": false,
+          "text": " it"
+        },
+        {
+          "id": 339,
+          "logprob": -0.8286133,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 16017,
+          "logprob": -1.6826172,
+          "special": false,
+          "text": " blue"
+        },
+        {
+          "id": 1,
+          "logprob": -0.7290039,
+          "special": true,
+          "text": "</s>"
+        }
+      ]
+    },
+    "generated_text": "Because it is blue"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "eos_token",
+      "generated_tokens": 6,
+      "prefill": [
+        {
+          "id": 0,
+          "logprob": null,
+          "text": "<pad>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 259,
+          "logprob": -1.3789062,
+          "special": false,
+          "text": ""
+        },
+        {
+          "id": 39261,
+          "logprob": -0.36279297,
+          "special": false,
+          "text": "Because"
+        },
+        {
+          "id": 609,
+          "logprob": -1.0966797,
+          "special": false,
+          "text": " it"
+        },
+        {
+          "id": 339,
+          "logprob": -0.8276367,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 16017,
+          "logprob": -1.6845703,
+          "special": false,
+          "text": " blue"
+        },
+        {
+          "id": 1,
+          "logprob": -0.72753906,
+          "special": true,
+          "text": "</s>"
+        }
+      ]
+    },
+    "generated_text": "Because it is blue"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "eos_token",
+      "generated_tokens": 6,
+      "prefill": [
+        {
+          "id": 0,
+          "logprob": null,
+          "text": "<pad>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 259,
+          "logprob": -1.3789062,
+          "special": false,
+          "text": ""
+        },
+        {
+          "id": 39261,
+          "logprob": -0.36279297,
+          "special": false,
+          "text": "Because"
+        },
+        {
+          "id": 609,
+          "logprob": -1.0966797,
+          "special": false,
+          "text": " it"
+        },
+        {
+          "id": 339,
+          "logprob": -0.8276367,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 16017,
+          "logprob": -1.6845703,
+          "special": false,
+          "text": " blue"
+        },
+        {
+          "id": 1,
+          "logprob": -0.72753906,
+          "special": true,
+          "text": "</s>"
+        }
+      ]
+    },
+    "generated_text": "Because it is blue"
+  }
+]
diff --git a/integration-tests/models/test_bloom_560m.py b/integration-tests/models/test_bloom_560m.py
index e13606f7..3c598c04 100644
--- a/integration-tests/models/test_bloom_560m.py
+++ b/integration-tests/models/test_bloom_560m.py
@@ -1,18 +1,20 @@
 import pytest
 
-from utils import health_check
-
 
 @pytest.fixture(scope="module")
-def bloom_560(launcher):
-    with launcher("bigscience/bloom-560m") as client:
-        yield client
+def bloom_560_handle(launcher):
+    with launcher("bigscience/bloom-560m") as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def bloom_560(bloom_560_handle):
+    await bloom_560_handle.health(60)
+    return bloom_560_handle.client
 
 
 @pytest.mark.asyncio
-async def test_bloom_560m(bloom_560, snapshot_test):
-    await health_check(bloom_560, 60)
-
+async def test_bloom_560m(bloom_560, response_snapshot):
     response = await bloom_560.generate(
         "Pour déguster un ortolan, il faut tout d'abord",
         max_new_tokens=10,
@@ -21,13 +23,11 @@ async def test_bloom_560m(bloom_560, snapshot_test):
     )
 
     assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
-async def test_bloom_560m_all_params(bloom_560, snapshot_test):
-    await health_check(bloom_560, 60)
-
+async def test_bloom_560m_all_params(bloom_560, response_snapshot):
     response = await bloom_560.generate(
         "Pour déguster un ortolan, il faut tout d'abord",
         max_new_tokens=10,
@@ -44,13 +44,11 @@ async def test_bloom_560m_all_params(bloom_560, snapshot_test):
     )
 
     assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
-async def test_bloom_560m_load(bloom_560, generate_load, snapshot_test):
-    await health_check(bloom_560, 60)
-
+async def test_bloom_560m_load(bloom_560, generate_load, response_snapshot):
     responses = await generate_load(
         bloom_560,
         "Pour déguster un ortolan, il faut tout d'abord",
@@ -59,5 +57,6 @@ async def test_bloom_560m_load(bloom_560, generate_load, snapshot_test):
     )
 
     assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
 
-    assert snapshot_test(responses)
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_bloom_560m_sharded.py b/integration-tests/models/test_bloom_560m_sharded.py
index bfb70253..25f6b2d7 100644
--- a/integration-tests/models/test_bloom_560m_sharded.py
+++ b/integration-tests/models/test_bloom_560m_sharded.py
@@ -1,18 +1,20 @@
 import pytest
 
-from utils import health_check
-
 
 @pytest.fixture(scope="module")
-def bloom_560m_sharded(launcher):
-    with launcher("bigscience/bloom-560m", num_shard=2) as client:
-        yield client
+def bloom_560m_sharded_handle(launcher):
+    with launcher("bigscience/bloom-560m", num_shard=2) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def bloom_560m_sharded(bloom_560m_sharded_handle):
+    await bloom_560m_sharded_handle.health(60)
+    return bloom_560m_sharded_handle.client
 
 
 @pytest.mark.asyncio
-async def test_bloom_560m_sharded(bloom_560m_sharded, snapshot_test):
-    await health_check(bloom_560m_sharded, 60)
-
+async def test_bloom_560m_sharded(bloom_560m_sharded, response_snapshot):
     response = await bloom_560m_sharded.generate(
         "Pour déguster un ortolan, il faut tout d'abord",
         max_new_tokens=10,
@@ -21,15 +23,13 @@ async def test_bloom_560m_sharded(bloom_560m_sharded, snapshot_test):
     )
 
     assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
 async def test_bloom_560m_sharded_load(
-    bloom_560m_sharded, generate_load, snapshot_test
+    bloom_560m_sharded, generate_load, response_snapshot
 ):
-    await health_check(bloom_560m_sharded, 60)
-
     responses = await generate_load(
         bloom_560m_sharded,
         "Pour déguster un ortolan, il faut tout d'abord",
@@ -38,5 +38,6 @@ async def test_bloom_560m_sharded_load(
     )
 
     assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
 
-    assert snapshot_test(responses)
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_llama.py b/integration-tests/models/test_flash_llama.py
index 4d1f2bcf..37468455 100644
--- a/integration-tests/models/test_flash_llama.py
+++ b/integration-tests/models/test_flash_llama.py
@@ -1,30 +1,30 @@
 import pytest
 
-from utils import health_check
-
 
 @pytest.fixture(scope="module")
-def flash_llama(launcher):
-    with launcher("huggingface/llama-7b", num_shard=2) as client:
-        yield client
+def flash_llama_handle(launcher):
+    with launcher("huggingface/llama-7b", num_shard=2) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_llama(flash_llama_handle):
+    await flash_llama_handle.health(120)
+    return flash_llama_handle.client
 
 
 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_llama(flash_llama, snapshot_test):
-    await health_check(flash_llama, 120)
-
+async def test_flash_llama(flash_llama, response_snapshot):
     response = await flash_llama.generate("Test request", max_new_tokens=10)
 
     assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_llama_all_params(flash_llama, snapshot_test):
-    await health_check(flash_llama, 120)
-
+async def test_flash_llama_all_params(flash_llama, response_snapshot):
     response = await flash_llama.generate(
         "Test request",
         max_new_tokens=10,
@@ -41,16 +41,15 @@ async def test_flash_llama_all_params(flash_llama, snapshot_test):
     )
 
     assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_llama_load(flash_llama, generate_load, snapshot_test):
-    await health_check(flash_llama, 120)
-
+async def test_flash_llama_load(flash_llama, generate_load, response_snapshot):
     responses = await generate_load(flash_llama, "Test request", max_new_tokens=10, n=4)
 
     assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
 
-    assert snapshot_test(responses)
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_neox.py b/integration-tests/models/test_flash_neox.py
index 8c981028..56cbf270 100644
--- a/integration-tests/models/test_flash_neox.py
+++ b/integration-tests/models/test_flash_neox.py
@@ -1,31 +1,31 @@
 import pytest
 
-from utils import health_check
-
 
 @pytest.fixture(scope="module")
-def flash_neox(launcher):
-    with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as client:
-        yield client
+def flash_neox_handle(launcher):
+    with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_neox(flash_neox_handle):
+    await flash_neox_handle.health(240)
+    return flash_neox_handle.client
 
 
 @pytest.mark.asyncio
-async def test_flash_neox(flash_neox, snapshot_test):
-    await health_check(flash_neox, 240)
-
+async def test_flash_neox(flash_neox, response_snapshot):
     response = await flash_neox.generate(
         "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
         max_new_tokens=10,
     )
 
     assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
-async def test_flash_neox_load(flash_neox, generate_load, snapshot_test):
-    await health_check(flash_neox, 240)
-
+async def test_flash_neox_load(flash_neox, generate_load, response_snapshot):
     responses = await generate_load(
         flash_neox,
         "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
@@ -34,5 +34,6 @@ async def test_flash_neox_load(flash_neox, generate_load, snapshot_test):
     )
 
     assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
 
-    assert snapshot_test(responses)
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_santacoder.py b/integration-tests/models/test_flash_santacoder.py
index 64a59d78..b0cb4522 100644
--- a/integration-tests/models/test_flash_santacoder.py
+++ b/integration-tests/models/test_flash_santacoder.py
@@ -1,32 +1,35 @@
 import pytest
 
-from utils import health_check
-
 
 @pytest.fixture(scope="module")
-def flash_santacoder(launcher):
-    with launcher("bigcode/santacoder") as client:
-        yield client
+def flash_santacoder_handle(launcher):
+    with launcher("bigcode/santacoder") as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_santacoder(flash_santacoder_handle):
+    await flash_santacoder_handle.health(240)
+    return flash_santacoder_handle.client
 
 
 @pytest.mark.asyncio
-async def test_flash_santacoder(flash_santacoder, snapshot_test):
-    await health_check(flash_santacoder, 60)
-
+async def test_flash_santacoder(flash_santacoder, response_snapshot):
     response = await flash_santacoder.generate("def print_hello", max_new_tokens=10)
 
     assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
-async def test_flash_santacoder_load(flash_santacoder, generate_load, snapshot_test):
-    await health_check(flash_santacoder, 60)
-
+async def test_flash_santacoder_load(
+    flash_santacoder, generate_load, response_snapshot
+):
     responses = await generate_load(
         flash_santacoder, "def print_hello", max_new_tokens=10, n=4
     )
 
     assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
 
-    assert snapshot_test(responses)
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_starcoder.py b/integration-tests/models/test_flash_starcoder.py
index d43e92dc..4c7393a7 100644
--- a/integration-tests/models/test_flash_starcoder.py
+++ b/integration-tests/models/test_flash_starcoder.py
@@ -1,47 +1,46 @@
 import pytest
 
-from utils import health_check
-
 
 @pytest.fixture(scope="module")
-def flash_starcoder(launcher):
-    with launcher("bigcode/starcoder", num_shard=2) as client:
-        yield client
+def flash_starcoder_handle(launcher):
+    with launcher("bigcode/starcoder", num_shard=2) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_starcoder(flash_starcoder_handle):
+    await flash_starcoder_handle.health(240)
+    return flash_starcoder_handle.client
 
 
 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_starcoder(flash_starcoder, snapshot_test):
-    await health_check(flash_starcoder, 240)
-
+async def test_flash_starcoder(flash_starcoder, response_snapshot):
     response = await flash_starcoder.generate("def print_hello", max_new_tokens=10)
 
     assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_starcoder_default_params(flash_starcoder, snapshot_test):
-    await health_check(flash_starcoder, 240)
-
+async def test_flash_starcoder_default_params(flash_starcoder, response_snapshot):
     response = await flash_starcoder.generate(
         "def print_hello", max_new_tokens=60, temperature=0.2, top_p=0.95, seed=0
     )
 
     assert response.details.generated_tokens == 12
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_starcoder_load(flash_starcoder, generate_load, snapshot_test):
-    await health_check(flash_starcoder, 240)
-
+async def test_flash_starcoder_load(flash_starcoder, generate_load, response_snapshot):
     responses = await generate_load(
         flash_starcoder, "def print_hello", max_new_tokens=10, n=4
     )
 
     assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
 
-    assert snapshot_test(responses)
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_mt0_base.py b/integration-tests/models/test_mt0_base.py
index 7310a30f..15410f73 100644
--- a/integration-tests/models/test_mt0_base.py
+++ b/integration-tests/models/test_mt0_base.py
@@ -1,18 +1,20 @@
 import pytest
 
-from utils import health_check
-
 
 @pytest.fixture(scope="module")
-def mt0_base(launcher):
-    with launcher("bigscience/mt0-base") as client:
-        yield client
+def mt0_base_handle(launcher):
+    with launcher("bigscience/mt0-base") as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def mt0_base(mt0_base_handle):
+    await mt0_base_handle.health(60)
+    return mt0_base_handle.client
 
 
 @pytest.mark.asyncio
-async def test_mt0_base(mt0_base, snapshot_test):
-    await health_check(mt0_base, 60)
-
+async def test_mt0_base(mt0_base, response_snapshot):
     response = await mt0_base.generate(
         "Why is the sky blue?",
         max_new_tokens=10,
@@ -21,13 +23,11 @@ async def test_mt0_base(mt0_base, snapshot_test):
     )
 
     assert response.details.generated_tokens == 5
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
-async def test_mt0_base_all_params(mt0_base, snapshot_test):
-    await health_check(mt0_base, 60)
-
+async def test_mt0_base_all_params(mt0_base, response_snapshot):
     response = await mt0_base.generate(
         "Why is the sky blue?",
         max_new_tokens=10,
@@ -44,13 +44,11 @@ async def test_mt0_base_all_params(mt0_base, snapshot_test):
     )
 
     assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot
 
 
 @pytest.mark.asyncio
-async def test_mt0_base_load(mt0_base, generate_load, snapshot_test):
-    await health_check(mt0_base, 60)
-
+async def test_mt0_base_load(mt0_base, generate_load, response_snapshot):
     responses = await generate_load(
         mt0_base,
         "Why is the sky blue?",
@@ -59,5 +57,6 @@ async def test_mt0_base_load(mt0_base, generate_load, snapshot_test):
     )
 
     assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
 
-    assert snapshot_test(responses)
+    assert responses == response_snapshot
diff --git a/integration-tests/models/utils.py b/integration-tests/models/utils.py
deleted file mode 100644
index c47e4871..00000000
--- a/integration-tests/models/utils.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import time
-
-from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
-from text_generation import AsyncClient
-
-
-async def health_check(client: AsyncClient, timeout: int = 60):
-    assert timeout > 0
-    for _ in range(timeout):
-        try:
-            await client.generate("test")
-            return
-        except (ClientConnectorError, ClientOSError, ServerDisconnectedError) as e:
-            time.sleep(1)
-    raise RuntimeError("Health check failed")
diff --git a/integration-tests/requirements.txt b/integration-tests/requirements.txt
index 9ecbb2ee..051730ff 100644
--- a/integration-tests/requirements.txt
+++ b/integration-tests/requirements.txt
@@ -1,5 +1,5 @@
 syrupy
-text-generation==0.5.1
+text-generation==0.5.2
 pytest
 pytest-asyncio==0.17.2
 docker
\ No newline at end of file