From f0181ed2d78af10d5afd4236bce9f486eb4c863b Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 15 Aug 2024 13:28:42 +0200
Subject: [PATCH] Upgrading the tests to match the current workings. (#2423)

---
 .../test_bloom_560m/test_bloom_560m.json      |  30 +-
 .../test_flash_deepseek_v2_all_params.json    |   8 +-
 .../test_flash_gemma/test_flash_gemma.json    |  32 +-
 .../test_flash_starcoder_gptq.json            | 146 +-------
 ...t_flash_starcoder_gptq_default_params.json | 144 +-------
 .../test_flash_starcoder_gptq_load.json       | 344 ++++--------------
 .../test_mamba/test_mamba_all_params.json     |  24 +-
 .../test_mt0_base_all_params.json             |   6 +-
 .../models/test_flash_starcoder_gptq.py       |   4 +-
 integration-tests/models/test_mamba.py        |   2 +-
 10 files changed, 166 insertions(+), 574 deletions(-)

diff --git a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json
index b274992e..5d0eeef6 100644
--- a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json
+++ b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json
@@ -11,52 +11,52 @@
       },
       {
         "id": 49833,
-        "logprob": -10.5703125,
+        "logprob": -10.546875,
         "text": " dég"
       },
       {
         "id": 21543,
-        "logprob": -0.14746094,
+        "logprob": -0.18457031,
         "text": "uster"
       },
       {
         "id": 447,
-        "logprob": -1.9277344,
+        "logprob": -1.9287109,
         "text": " un"
       },
       {
         "id": 46341,
-        "logprob": -15.421875,
+        "logprob": -15.4296875,
         "text": " ort"
       },
       {
         "id": 35567,
-        "logprob": -7.5820312,
+        "logprob": -7.578125,
         "text": "olan"
       },
       {
         "id": 15,
-        "logprob": -1.4013672,
+        "logprob": -1.4003906,
         "text": ","
       },
       {
         "id": 1669,
-        "logprob": -1.5664062,
+        "logprob": -1.5439453,
         "text": " il"
       },
       {
         "id": 11580,
-        "logprob": -0.94189453,
+        "logprob": -0.93896484,
         "text": " faut"
       },
       {
         "id": 3913,
-        "logprob": -3.6816406,
+        "logprob": -3.7207031,
         "text": " tout"
       },
       {
         "id": 39261,
-        "logprob": -1.7753906,
+        "logprob": -1.5742188,
         "text": " d'abord"
       }
     ],
@@ -64,13 +64,13 @@
     "tokens": [
       {
         "id": 578,
-        "logprob": -1.6318359,
+        "logprob": -1.6474609,
         "special": false,
         "text": " le"
       },
       {
         "id": 5608,
-        "logprob": -2.4882812,
+        "logprob": -2.4707031,
         "special": false,
         "text": " faire"
       },
@@ -88,19 +88,19 @@
       },
       {
         "id": 693,
-        "logprob": -2.4472656,
+        "logprob": -2.4628906,
         "special": false,
         "text": " à"
       },
       {
         "id": 366,
-        "logprob": -1.1972656,
+        "logprob": -1.1953125,
         "special": false,
         "text": " la"
       },
       {
         "id": 48844,
-        "logprob": -1.7890625,
+        "logprob": -1.7978516,
         "special": false,
         "text": " cass"
       },
diff --git a/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_all_params.json b/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_all_params.json
index 6b45cf6b..3ac8d050 100644
--- a/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_all_params.json
+++ b/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_all_params.json
@@ -11,7 +11,7 @@
       },
       {
         "id": 3533,
-        "logprob": -9.625,
+        "logprob": -9.5625,
         "text": "Test"
       },
       {
@@ -24,13 +24,13 @@
     "tokens": [
       {
         "id": 2143,
-        "logprob": -1.828125,
+        "logprob": -1.8203125,
         "special": false,
         "text": " sent"
       },
       {
         "id": 10081,
-        "logprob": -0.41210938,
+        "logprob": -0.55078125,
         "special": false,
         "text": " successfully"
       },
@@ -42,7 +42,7 @@
       },
       {
         "id": 100001,
-        "logprob": -0.16015625,
+        "logprob": -0.12695312,
         "special": true,
         "text": "<｜end▁of▁sentence｜>"
       }
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json
index 8829f9fe..96f2ce17 100644
--- a/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json
+++ b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json
@@ -24,13 +24,13 @@
     "tokens": [
       {
         "id": 1736,
-        "logprob": -2.03125,
+        "logprob": -2.046875,
         "special": false,
         "text": " form"
       },
       {
         "id": 109,
-        "logprob": -1.8671875,
+        "logprob": -1.8828125,
         "special": false,
         "text": "\n\n"
       },
@@ -42,48 +42,48 @@
       },
       {
         "id": 2121,
-        "logprob": -1.8125,
+        "logprob": -1.78125,
         "special": false,
         "text": " test"
       },
       {
         "id": 3853,
-        "logprob": -0.24121094,
+        "logprob": -0.23632812,
         "special": false,
         "text": " request"
       },
       {
         "id": 1736,
-        "logprob": -0.100097656,
+        "logprob": -0.09326172,
         "special": false,
         "text": " form"
       },
       {
         "id": 603,
-        "logprob": -0.9453125,
+        "logprob": -0.8828125,
         "special": false,
         "text": " is"
       },
       {
-        "id": 476,
-        "logprob": -1.703125,
+        "id": 1671,
+        "logprob": -1.6171875,
         "special": false,
-        "text": " a"
+        "text": " used"
       },
       {
-        "id": 4551,
-        "logprob": -2.453125,
+        "id": 577,
+        "logprob": -0.390625,
         "special": false,
-        "text": " document"
+        "text": " to"
       },
       {
-        "id": 674,
-        "logprob": -0.796875,
+        "id": 3853,
+        "logprob": -1.2265625,
         "special": false,
-        "text": " that"
+        "text": " request"
       }
     ],
     "top_tokens": null
   },
-  "generated_text": " form\n\nThe test request form is a document that"
+  "generated_text": " form\n\nThe test request form is used to request"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json
index 5e537bb7..26224118 100644
--- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json
@@ -1,8 +1,8 @@
 {
   "details": {
     "best_of_sequences": null,
-    "finish_reason": "length",
-    "generated_tokens": 20,
+    "finish_reason": "eos_token",
+    "generated_tokens": 2,
     "prefill": [
       {
         "id": 589,
@@ -11,57 +11,57 @@
       },
       {
         "id": 3226,
-        "logprob": -8.5859375,
+        "logprob": -8.9453125,
         "text": " ge"
       },
       {
         "id": 21017,
-        "logprob": -7.5859375,
+        "logprob": -8.8515625,
         "text": "ometric"
       },
       {
         "id": 81,
-        "logprob": -0.2668457,
+        "logprob": -0.21875,
         "text": "_"
       },
       {
         "id": 6009,
-        "logprob": -1.6416016,
+        "logprob": -1.2773438,
         "text": "mean"
       },
       {
         "id": 26,
-        "logprob": -0.22705078,
+        "logprob": -0.25195312,
         "text": "("
       },
       {
         "id": 62,
-        "logprob": -5.2304688,
+        "logprob": -4.8203125,
         "text": "L"
       },
       {
         "id": 44,
-        "logprob": -3.0976562,
+        "logprob": -3.7734375,
         "text": ":"
       },
       {
         "id": 1682,
-        "logprob": -1.1044922,
+        "logprob": -0.8310547,
         "text": " List"
       },
       {
         "id": 77,
-        "logprob": -0.14294434,
+        "logprob": -0.22766113,
         "text": "["
       },
       {
         "id": 1808,
-        "logprob": -0.32299805,
+        "logprob": -0.46240234,
         "text": "float"
       },
       {
         "id": 10794,
-        "logprob": -2.8164062,
+        "logprob": -3.0234375,
         "text": "]):"
       }
     ],
@@ -69,126 +69,18 @@
     "tokens": [
       {
         "id": 284,
-        "logprob": -0.1282959,
+        "logprob": -0.04626465,
         "special": false,
         "text": "\n   "
       },
       {
-        "id": 1524,
-        "logprob": -0.97998047,
-        "special": false,
-        "text": " \"\"\""
-      },
-      {
-        "id": 284,
-        "logprob": -0.7006836,
-        "special": false,
-        "text": "\n   "
-      },
-      {
-        "id": 14883,
-        "logprob": -2.1933594,
-        "special": false,
-        "text": " Calculate"
-      },
-      {
-        "id": 322,
-        "logprob": -0.2697754,
-        "special": false,
-        "text": " the"
-      },
-      {
-        "id": 3226,
-        "logprob": -0.0836792,
-        "special": false,
-        "text": " ge"
-      },
-      {
-        "id": 21017,
-        "logprob": -0.018737793,
-        "special": false,
-        "text": "ometric"
-      },
-      {
-        "id": 5651,
-        "logprob": -0.028640747,
-        "special": false,
-        "text": " mean"
-      },
-      {
-        "id": 432,
-        "logprob": -0.29467773,
-        "special": false,
-        "text": " of"
-      },
-      {
-        "id": 312,
-        "logprob": -0.31518555,
-        "special": false,
-        "text": " a"
-      },
-      {
-        "id": 1149,
-        "logprob": -0.20605469,
-        "special": false,
-        "text": " list"
-      },
-      {
-        "id": 432,
-        "logprob": -0.23254395,
-        "special": false,
-        "text": " of"
-      },
-      {
-        "id": 7515,
-        "logprob": -0.4489746,
-        "special": false,
-        "text": " numbers"
-      },
-      {
-        "id": 32,
-        "logprob": -0.6044922,
-        "special": false,
-        "text": "."
-      },
-      {
-        "id": 446,
-        "logprob": -0.63964844,
-        "special": false,
-        "text": "\n\n   "
-      },
-      {
-        "id": 499,
-        "logprob": -1.1953125,
-        "special": false,
-        "text": " :"
-      },
-      {
-        "id": 753,
-        "logprob": -0.03515625,
-        "special": false,
-        "text": "param"
-      },
-      {
-        "id": 498,
-        "logprob": -0.06311035,
-        "special": false,
-        "text": " L"
-      },
-      {
-        "id": 44,
-        "logprob": -0.003414154,
-        "special": false,
-        "text": ":"
-      },
-      {
-        "id": 1682,
-        "logprob": -1.3310547,
-        "special": false,
-        "text": " List"
+        "id": 0,
+        "logprob": null,
+        "special": true,
+        "text": "<|endoftext|>"
       }
     ],
     "top_tokens": null
   },
-  "generated_text": "\n    \"\"\"\n    Calculate the geometric mean of a list of numbers.\n\n    :param L: List"
+  "generated_text": "\n   "
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json
index bf0f5146..015912f8 100644
--- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json
@@ -1,8 +1,8 @@
 {
   "details": {
     "best_of_sequences": null,
-    "finish_reason": "length",
-    "generated_tokens": 20,
+    "finish_reason": "eos_token",
+    "generated_tokens": 2,
     "prefill": [
       {
         "id": 589,
@@ -11,57 +11,57 @@
       },
       {
         "id": 3226,
-        "logprob": -8.5859375,
+        "logprob": -8.9453125,
         "text": " ge"
       },
       {
         "id": 21017,
-        "logprob": -7.5898438,
+        "logprob": -8.859375,
         "text": "ometric"
       },
       {
         "id": 81,
-        "logprob": -0.26586914,
+        "logprob": -0.21984863,
         "text": "_"
       },
       {
         "id": 6009,
-        "logprob": -1.6347656,
+        "logprob": -1.2861328,
         "text": "mean"
       },
       {
         "id": 26,
-        "logprob": -0.22705078,
+        "logprob": -0.25219727,
         "text": "("
       },
       {
         "id": 62,
-        "logprob": -5.2382812,
+        "logprob": -4.8007812,
         "text": "L"
       },
       {
         "id": 44,
-        "logprob": -3.0996094,
+        "logprob": -3.7949219,
         "text": ":"
       },
       {
         "id": 1682,
-        "logprob": -1.1025391,
+        "logprob": -0.8046875,
         "text": " List"
       },
       {
         "id": 77,
-        "logprob": -0.14294434,
+        "logprob": -0.22424316,
         "text": "["
       },
       {
         "id": 1808,
-        "logprob": -0.32226562,
+        "logprob": -0.46191406,
         "text": "float"
       },
       {
         "id": 10794,
-        "logprob": -2.8164062,
+        "logprob": -3.0253906,
         "text": "]):"
       }
     ],
@@ -74,121 +74,13 @@
         "text": "\n   "
       },
       {
-        "id": 442,
-        "logprob": -1.3134766,
-        "special": false,
-        "text": " return"
-      },
-      {
-        "id": 11665,
-        "logprob": -0.10021973,
-        "special": false,
-        "text": " reduce"
-      },
-      {
-        "id": 26,
-        "logprob": 0.0,
-        "special": false,
-        "text": "("
-      },
-      {
-        "id": 5962,
-        "logprob": 0.0,
-        "special": false,
-        "text": "lambda"
-      },
-      {
-        "id": 816,
-        "logprob": 0.0,
-        "special": false,
-        "text": " x"
-      },
-      {
-        "id": 30,
-        "logprob": 0.0,
-        "special": false,
-        "text": ","
-      },
-      {
-        "id": 533,
-        "logprob": 0.0,
-        "special": false,
-        "text": " y"
-      },
-      {
-        "id": 44,
-        "logprob": 0.0,
-        "special": false,
-        "text": ":"
-      },
-      {
-        "id": 816,
-        "logprob": 0.0,
-        "special": false,
-        "text": " x"
-      },
-      {
-        "id": 319,
-        "logprob": -0.42871094,
-        "special": false,
-        "text": " *"
-      },
-      {
-        "id": 533,
-        "logprob": 0.0,
-        "special": false,
-        "text": " y"
-      },
-      {
-        "id": 30,
-        "logprob": 0.0,
-        "special": false,
-        "text": ","
-      },
-      {
-        "id": 498,
-        "logprob": 0.0,
-        "special": false,
-        "text": " L"
-      },
-      {
-        "id": 27,
-        "logprob": 0.0,
-        "special": false,
-        "text": ")"
-      },
-      {
-        "id": 1115,
-        "logprob": 0.0,
-        "special": false,
-        "text": " **"
-      },
-      {
-        "id": 308,
-        "logprob": 0.0,
-        "special": false,
-        "text": " ("
-      },
-      {
-        "id": 35,
-        "logprob": 0.0,
-        "special": false,
-        "text": "1"
-      },
-      {
-        "id": 32,
-        "logprob": -0.31323242,
-        "special": false,
-        "text": "."
-      },
-      {
-        "id": 34,
-        "logprob": 0.0,
-        "special": false,
-        "text": "0"
+        "id": 0,
+        "logprob": null,
+        "special": true,
+        "text": "<|endoftext|>"
       }
     ],
     "top_tokens": null
   },
-  "generated_text": "\n    return reduce(lambda x, y: x * y, L) ** (1.0"
+  "generated_text": "\n   "
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json
index 46a21ed8..c9b5ab20 100644
--- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json
@@ -2,8 +2,8 @@
   {
     "details": {
       "best_of_sequences": null,
-      "finish_reason": "length",
-      "generated_tokens": 10,
+      "finish_reason": "eos_token",
+      "generated_tokens": 2,
       "prefill": [
         {
           "id": 589,
@@ -12,57 +12,57 @@
         },
         {
           "id": 3226,
-          "logprob": -8.5859375,
+          "logprob": -8.9453125,
           "text": " ge"
         },
         {
           "id": 21017,
-          "logprob": -7.5820312,
+          "logprob": -8.859375,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.26708984,
+          "logprob": -0.21826172,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -1.6386719,
+          "logprob": -1.3085938,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.22717285,
+          "logprob": -0.2548828,
           "text": "("
         },
         {
           "id": 62,
-          "logprob": -5.234375,
+          "logprob": -4.8007812,
           "text": "L"
         },
         {
           "id": 44,
-          "logprob": -3.1015625,
+          "logprob": -3.7871094,
           "text": ":"
         },
         {
           "id": 1682,
-          "logprob": -1.1083984,
+          "logprob": -0.81152344,
           "text": " List"
         },
         {
           "id": 77,
-          "logprob": -0.14294434,
+          "logprob": -0.22644043,
           "text": "["
         },
         {
           "id": 1808,
-          "logprob": -0.32592773,
+          "logprob": -0.46313477,
           "text": "float"
         },
         {
           "id": 10794,
-          "logprob": -2.8164062,
+          "logprob": -3.0253906,
           "text": "]):"
         }
       ],
@@ -70,74 +70,26 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -0.12817383,
+          "logprob": -0.046936035,
           "special": false,
           "text": "\n   "
         },
         {
-          "id": 1524,
-          "logprob": -0.9863281,
-          "special": false,
-          "text": " \"\"\""
-        },
-        {
-          "id": 284,
-          "logprob": -0.7011719,
-          "special": false,
-          "text": "\n   "
-        },
-        {
-          "id": 14883,
-          "logprob": -2.2050781,
-          "special": false,
-          "text": " Calculate"
-        },
-        {
-          "id": 322,
-          "logprob": -0.2668457,
-          "special": false,
-          "text": " the"
-        },
-        {
-          "id": 3226,
-          "logprob": -0.08465576,
-          "special": false,
-          "text": " ge"
-        },
-        {
-          "id": 21017,
-          "logprob": -0.019012451,
-          "special": false,
-          "text": "ometric"
-        },
-        {
-          "id": 5651,
-          "logprob": -0.028625488,
-          "special": false,
-          "text": " mean"
-        },
-        {
-          "id": 432,
-          "logprob": -0.29418945,
-          "special": false,
-          "text": " of"
-        },
-        {
-          "id": 312,
-          "logprob": -0.3161621,
-          "special": false,
-          "text": " a"
+          "id": 0,
+          "logprob": null,
+          "special": true,
+          "text": "<|endoftext|>"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\n    \"\"\"\n    Calculate the geometric mean of a"
+    "generated_text": "\n   "
   },
   {
     "details": {
       "best_of_sequences": null,
-      "finish_reason": "length",
-      "generated_tokens": 10,
+      "finish_reason": "eos_token",
+      "generated_tokens": 2,
       "prefill": [
         {
           "id": 589,
@@ -146,57 +98,57 @@
         },
         {
           "id": 3226,
-          "logprob": -8.5859375,
+          "logprob": -8.9375,
           "text": " ge"
         },
         {
           "id": 21017,
-          "logprob": -7.59375,
+          "logprob": -8.859375,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.26953125,
+          "logprob": -0.21899414,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -1.640625,
+          "logprob": -1.3105469,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.22705078,
+          "logprob": -0.25561523,
           "text": "("
         },
         {
           "id": 62,
-          "logprob": -5.234375,
+          "logprob": -4.8085938,
           "text": "L"
         },
         {
           "id": 44,
-          "logprob": -3.1132812,
+          "logprob": -3.7890625,
           "text": ":"
         },
         {
           "id": 1682,
-          "logprob": -1.1123047,
+          "logprob": -0.80615234,
           "text": " List"
         },
         {
           "id": 77,
-          "logprob": -0.14294434,
+          "logprob": -0.22375488,
           "text": "["
         },
         {
           "id": 1808,
-          "logprob": -0.32299805,
+          "logprob": -0.46801758,
           "text": "float"
         },
         {
           "id": 10794,
-          "logprob": -2.8164062,
+          "logprob": -3.0253906,
           "text": "]):"
         }
       ],
@@ -204,74 +156,26 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -0.12854004,
+          "logprob": -0.046447754,
           "special": false,
           "text": "\n   "
         },
         {
-          "id": 1524,
-          "logprob": -0.9897461,
-          "special": false,
-          "text": " \"\"\""
-        },
-        {
-          "id": 284,
-          "logprob": -0.69970703,
-          "special": false,
-          "text": "\n   "
-        },
-        {
-          "id": 14883,
-          "logprob": -2.2050781,
-          "special": false,
-          "text": " Calculate"
-        },
-        {
-          "id": 322,
-          "logprob": -0.2668457,
-          "special": false,
-          "text": " the"
-        },
-        {
-          "id": 3226,
-          "logprob": -0.08496094,
-          "special": false,
-          "text": " ge"
-        },
-        {
-          "id": 21017,
-          "logprob": -0.019012451,
-          "special": false,
-          "text": "ometric"
-        },
-        {
-          "id": 5651,
-          "logprob": -0.029037476,
-          "special": false,
-          "text": " mean"
-        },
-        {
-          "id": 432,
-          "logprob": -0.2939453,
-          "special": false,
-          "text": " of"
-        },
-        {
-          "id": 312,
-          "logprob": -0.31591797,
-          "special": false,
-          "text": " a"
+          "id": 0,
+          "logprob": null,
+          "special": true,
+          "text": "<|endoftext|>"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\n    \"\"\"\n    Calculate the geometric mean of a"
+    "generated_text": "\n   "
   },
   {
     "details": {
       "best_of_sequences": null,
-      "finish_reason": "length",
-      "generated_tokens": 10,
+      "finish_reason": "eos_token",
+      "generated_tokens": 2,
       "prefill": [
         {
           "id": 589,
@@ -280,57 +184,57 @@
         },
         {
           "id": 3226,
-          "logprob": -8.5859375,
+          "logprob": -8.9453125,
           "text": " ge"
         },
         {
           "id": 21017,
-          "logprob": -7.5859375,
+          "logprob": -8.859375,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.26586914,
+          "logprob": -0.2163086,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -1.6347656,
+          "logprob": -1.2958984,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.22766113,
+          "logprob": -0.2529297,
           "text": "("
         },
         {
           "id": 62,
-          "logprob": -5.2265625,
+          "logprob": -4.796875,
           "text": "L"
         },
         {
           "id": 44,
-          "logprob": -3.0976562,
+          "logprob": -3.7910156,
           "text": ":"
         },
         {
           "id": 1682,
-          "logprob": -1.1025391,
+          "logprob": -0.8076172,
           "text": " List"
         },
         {
           "id": 77,
-          "logprob": -0.1427002,
+          "logprob": -0.22375488,
           "text": "["
         },
         {
           "id": 1808,
-          "logprob": -0.32592773,
+          "logprob": -0.46655273,
           "text": "float"
         },
         {
           "id": 10794,
-          "logprob": -2.8164062,
+          "logprob": -3.0234375,
           "text": "]):"
         }
       ],
@@ -338,74 +242,26 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -0.13012695,
+          "logprob": -0.0463562,
           "special": false,
           "text": "\n   "
         },
         {
-          "id": 1524,
-          "logprob": -0.98046875,
-          "special": false,
-          "text": " \"\"\""
-        },
-        {
-          "id": 284,
-          "logprob": -0.69921875,
-          "special": false,
-          "text": "\n   "
-        },
-        {
-          "id": 14883,
-          "logprob": -2.1992188,
-          "special": false,
-          "text": " Calculate"
-        },
-        {
-          "id": 322,
-          "logprob": -0.2668457,
-          "special": false,
-          "text": " the"
-        },
-        {
-          "id": 3226,
-          "logprob": -0.083496094,
-          "special": false,
-          "text": " ge"
-        },
-        {
-          "id": 21017,
-          "logprob": -0.01902771,
-          "special": false,
-          "text": "ometric"
-        },
-        {
-          "id": 5651,
-          "logprob": -0.029006958,
-          "special": false,
-          "text": " mean"
-        },
-        {
-          "id": 432,
-          "logprob": -0.29248047,
-          "special": false,
-          "text": " of"
-        },
-        {
-          "id": 312,
-          "logprob": -0.3161621,
-          "special": false,
-          "text": " a"
+          "id": 0,
+          "logprob": null,
+          "special": true,
+          "text": "<|endoftext|>"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\n    \"\"\"\n    Calculate the geometric mean of a"
+    "generated_text": "\n   "
   },
   {
     "details": {
       "best_of_sequences": null,
-      "finish_reason": "length",
-      "generated_tokens": 10,
+      "finish_reason": "eos_token",
+      "generated_tokens": 2,
       "prefill": [
         {
           "id": 589,
@@ -414,57 +270,57 @@
         },
         {
           "id": 3226,
-          "logprob": -8.5859375,
+          "logprob": -8.9453125,
           "text": " ge"
         },
         {
           "id": 21017,
-          "logprob": -7.5859375,
+          "logprob": -8.859375,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.26904297,
+          "logprob": -0.21862793,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -1.6386719,
+          "logprob": -1.3095703,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.22705078,
+          "logprob": -0.25512695,
           "text": "("
         },
         {
           "id": 62,
-          "logprob": -5.234375,
+          "logprob": -4.796875,
           "text": "L"
         },
         {
           "id": 44,
-          "logprob": -3.1132812,
+          "logprob": -3.7890625,
           "text": ":"
         },
         {
           "id": 1682,
-          "logprob": -1.1074219,
+          "logprob": -0.79589844,
           "text": " List"
         },
         {
           "id": 77,
-          "logprob": -0.14477539,
+          "logprob": -0.22692871,
           "text": "["
         },
         {
           "id": 1808,
-          "logprob": -0.3256836,
+          "logprob": -0.46801758,
           "text": "float"
         },
         {
           "id": 10794,
-          "logprob": -2.8027344,
+          "logprob": -3.0097656,
           "text": "]):"
         }
       ],
@@ -472,67 +328,19 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -0.12915039,
+          "logprob": -0.04638672,
           "special": false,
           "text": "\n   "
         },
         {
-          "id": 1524,
-          "logprob": -0.98535156,
-          "special": false,
-          "text": " \"\"\""
-        },
-        {
-          "id": 284,
-          "logprob": -0.69921875,
-          "special": false,
-          "text": "\n   "
-        },
-        {
-          "id": 14883,
-          "logprob": -2.2011719,
-          "special": false,
-          "text": " Calculate"
-        },
-        {
-          "id": 322,
-          "logprob": -0.26708984,
-          "special": false,
-          "text": " the"
-        },
-        {
-          "id": 3226,
-          "logprob": -0.08502197,
-          "special": false,
-          "text": " ge"
-        },
-        {
-          "id": 21017,
-          "logprob": -0.019012451,
-          "special": false,
-          "text": "ometric"
-        },
-        {
-          "id": 5651,
-          "logprob": -0.028625488,
-          "special": false,
-          "text": " mean"
-        },
-        {
-          "id": 432,
-          "logprob": -0.29589844,
-          "special": false,
-          "text": " of"
-        },
-        {
-          "id": 312,
-          "logprob": -0.31591797,
-          "special": false,
-          "text": " a"
+          "id": 0,
+          "logprob": null,
+          "special": true,
+          "text": "<|endoftext|>"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\n    \"\"\"\n    Calculate the geometric mean of a"
+    "generated_text": "\n   "
   }
 ]
diff --git a/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json
index ef88926c..93724fe4 100644
--- a/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json
+++ b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json
@@ -11,22 +11,22 @@
       },
       {
         "id": 13,
-        "logprob": -2.734375,
+        "logprob": -2.59375,
         "text": ","
       },
       {
         "id": 8862,
-        "logprob": -3.6875,
+        "logprob": -3.5625,
         "text": " yellow"
       },
       {
         "id": 13,
-        "logprob": -0.40234375,
+        "logprob": -0.44726562,
         "text": ","
       },
       {
         "id": 209,
-        "logprob": -8.25,
+        "logprob": -8.0,
         "text": " "
       }
     ],
@@ -52,7 +52,7 @@
       },
       {
         "id": 9830,
-        "logprob": -2.25,
+        "logprob": -2.03125,
         "special": false,
         "text": " colors"
       },
@@ -64,13 +64,13 @@
       },
       {
         "id": 329,
-        "logprob": -2.171875,
+        "logprob": -2.734375,
         "special": false,
         "text": " A"
       },
       {
         "id": 1180,
-        "logprob": -2.046875,
+        "logprob": -2.0,
         "special": false,
         "text": " number"
       },
@@ -81,19 +81,19 @@
         "text": " of"
       },
       {
-        "id": 1027,
-        "logprob": -1.5546875,
+        "id": 253,
+        "logprob": -0.69140625,
         "special": false,
-        "text": " different"
+        "text": " the"
       },
       {
         "id": 3295,
-        "logprob": -0.97265625,
+        "logprob": -0.8203125,
         "special": false,
         "text": " color"
       }
     ],
     "top_tokens": null
   },
-  "generated_text": "blue, red, yellow, \nand blue colors. A number of different color"
+  "generated_text": "blue, red, yellow, \nand blue colors. A number of the color"
 }
diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json
index 5cacf3e9..40ec7e2f 100644
--- a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json
+++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json
@@ -26,13 +26,13 @@
       },
       {
         "id": 259,
-        "logprob": -0.4716797,
+        "logprob": -0.46948242,
         "special": false,
         "text": " "
       },
       {
         "id": 261,
-        "logprob": -0.044677734,
+        "logprob": -0.15307617,
         "special": false,
         "text": ","
       },
@@ -56,7 +56,7 @@
       },
       {
         "id": 35622,
-        "logprob": -1.1630859,
+        "logprob": -1.2998047,
         "special": false,
         "text": " cloud"
       },
diff --git a/integration-tests/models/test_flash_starcoder_gptq.py b/integration-tests/models/test_flash_starcoder_gptq.py
index f1007d6e..6d46e54d 100644
--- a/integration-tests/models/test_flash_starcoder_gptq.py
+++ b/integration-tests/models/test_flash_starcoder_gptq.py
@@ -21,7 +21,7 @@ async def test_flash_starcoder_gptq(flash_starcoder_gptq, generous_response_snap
         max_new_tokens=20,
         decoder_input_details=True,
     )
-    assert response.details.generated_tokens == 20
+    assert response.details.generated_tokens == 2
     assert response == generous_response_snapshot
 
 
@@ -38,7 +38,7 @@ async def test_flash_starcoder_gptq_default_params(
         decoder_input_details=True,
         seed=0,
     )
-    assert response.details.generated_tokens == 20
+    assert response.details.generated_tokens == 2
     assert response == generous_response_snapshot
 
 
diff --git a/integration-tests/models/test_mamba.py b/integration-tests/models/test_mamba.py
index bc946de8..8548970a 100644
--- a/integration-tests/models/test_mamba.py
+++ b/integration-tests/models/test_mamba.py
@@ -47,7 +47,7 @@ async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
     assert response.details.generated_tokens == 10
     assert (
         response.generated_text
-        == "blue, red, yellow, \nand blue colors. A number of different color"
+        == "blue, red, yellow, \nand blue colors. A number of the color"
     )
     assert response == response_snapshot