diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json
index 4aec05b8..2d32b332 100644
--- a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json
+++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json
@@ -2,13 +2,19 @@
   "choices": [
     {
       "finish_reason": "eos_token",
-      "index": 0,
+      "index": 1,
       "logprobs": null,
       "text": " PR for more information?"
     },
     {
       "finish_reason": "length",
-      "index": 1,
+      "index": 3,
+      "logprobs": null,
+      "text": "hd20220811-"
+    },
+    {
+      "finish_reason": "length",
+      "index": 0,
       "logprobs": null,
       "text": "le Business Incubator is providing a workspace"
     },
@@ -16,29 +22,17 @@
       "finish_reason": "length",
       "index": 2,
       "logprobs": null,
-      "text": "hd20220811-"
-    },
-    {
-      "finish_reason": "length",
-      "index": 3,
-      "logprobs": null,
       "text": " severely flawed and often has a substandard"
-    },
-    {
-      "finish_reason": "length",
-      "index": 4,
-      "logprobs": null,
-      "text": "](https://i.imgur.com/as"
     }
   ],
-  "created": 1712862968,
+  "created": 1712875413,
   "id": "",
   "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "object": "text_completion",
   "system_fingerprint": "1.4.5-native",
   "usage": {
-    "completion_tokens": 46,
-    "prompt_tokens": 10,
-    "total_tokens": 56
+    "completion_tokens": 36,
+    "prompt_tokens": 8,
+    "total_tokens": 44
   }
 }
diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
index bdea1b77..b0850cc9 100644
--- a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
+++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
@@ -1 +1 @@
-"<ClientResponse(http://localhost:9483/v1/completions) [200 OK]>\n<CIMultiDictProxy('Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'x-compute-type': '1-nvidia-a10g', 'x-compute-characters': '72', 'x-accel-buffering': 'no', 'Access-Control-Allow-Origin': '*', 'Vary': 'origin', 'Vary': 'access-control-request-method', 'Vary': 'access-control-request-headers', 'Transfer-Encoding': 'chunked', 'Date': 'Thu, 11 Apr 2024 19:19:32 GMT')>\n"
+"<ClientResponse(http://localhost:8041/v1/completions) [200 OK]>\n<CIMultiDictProxy('Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'x-compute-type': '1-nvidia-a10g', 'x-compute-characters': '72', 'x-accel-buffering': 'no', 'Access-Control-Allow-Origin': '*', 'Vary': 'origin', 'Vary': 'access-control-request-method', 'Vary': 'access-control-request-headers', 'Transfer-Encoding': 'chunked', 'Date': 'Thu, 11 Apr 2024 22:43:33 GMT')>\n"
diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json
index 51193b0c..925f7db7 100644
--- a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json
+++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json
@@ -7,7 +7,7 @@
       "text": " PR for flake8"
     }
   ],
-  "created": 1712862926,
+  "created": 1712875413,
   "id": "",
   "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "object": "text_completion",
diff --git a/integration-tests/models/test_completion_prompts.py b/integration-tests/models/test_completion_prompts.py
index 7d1f2c1d..daa1209a 100644
--- a/integration-tests/models/test_completion_prompts.py
+++ b/integration-tests/models/test_completion_prompts.py
@@ -47,7 +47,7 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
         f"{flash_llama_completion.base_url}/v1/completions",
         json={
             "model": "tgi",
-            "prompt": ["Say", "this", "is", "a", "test"],
+            "prompt": ["Say", "this", "is", "a"],
             "max_tokens": 10,
             "seed": 0,
         },
@@ -55,11 +55,11 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
         stream=False,
     )
     response = response.json()
-    assert len(response["choices"]) == 5
+    assert len(response["choices"]) == 4
 
     all_indexes = [choice["index"] for choice in response["choices"]]
     all_indexes.sort()
-    assert all_indexes == [0, 1, 2, 3, 4]
+    assert all_indexes == [0, 1, 2, 3]
 
     response == response_snapshot