diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json
index 993bdaddc..b835bf075 100644
--- a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json
+++ b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json
@@ -10,80 +10,95 @@
         "text": "<s>"
       },
       {
-        "id": 3735,
-        "logprob": -11.0078125,
-        "text": "Test"
+        "id": 1824,
+        "logprob": -9.2890625,
+        "text": "What"
       },
       {
-        "id": 2159,
-        "logprob": -13.59375,
-        "text": "request"
+        "id": 349,
+        "logprob": -1.1503906,
+        "text": "is"
+      },
+      {
+        "id": 3534,
+        "logprob": -9.5859375,
+        "text": "deep"
+      },
+      {
+        "id": 5168,
+        "logprob": -1.3945312,
+        "text": "learning"
+      },
+      {
+        "id": 28804,
+        "logprob": -0.4555664,
+        "text": "?"
       }
     ],
     "seed": null,
     "tokens": [
       {
         "id": 13,
-        "logprob": -1.7089844,
+        "logprob": -0.6953125,
         "special": false,
         "text": "\n"
       },
       {
         "id": 13,
-        "logprob": -0.68847656,
+        "logprob": -0.4777832,
         "special": false,
         "text": "\n"
       },
       {
-        "id": 28771,
-        "logprob": -1.9394531,
+        "id": 23229,
+        "logprob": -0.13256836,
         "special": false,
-        "text": "#"
+        "text": "Deep"
       },
       {
-        "id": 3735,
-        "logprob": -2.8808594,
+        "id": 5168,
+        "logprob": -0.023849487,
         "special": false,
-        "text": " Test"
+        "text": " learning"
       },
       {
-        "id": 2159,
-        "logprob": -0.37280273,
+        "id": 349,
+        "logprob": -0.13977051,
         "special": false,
-        "text": " request"
+        "text": " is"
       },
       {
-        "id": 13,
-        "logprob": -0.26098633,
+        "id": 264,
+        "logprob": -0.14489746,
         "special": false,
-        "text": "\n"
+        "text": " a"
       },
       {
-        "id": 13,
-        "logprob": -0.0017137527,
+        "id": 19804,
+        "logprob": -0.63183594,
         "special": false,
-        "text": "\n"
+        "text": " subset"
       },
       {
-        "id": 1064,
-        "logprob": -2.2695312,
+        "id": 302,
+        "logprob": -0.010314941,
         "special": false,
-        "text": "##"
+        "text": " of"
       },
       {
-        "id": 3735,
-        "logprob": -1.9238281,
+        "id": 5599,
+        "logprob": -0.0635376,
         "special": false,
-        "text": " Test"
+        "text": " machine"
       },
       {
-        "id": 2159,
-        "logprob": -0.48828125,
+        "id": 5168,
+        "logprob": -0.0028572083,
         "special": false,
-        "text": " request"
+        "text": " learning"
       }
     ],
     "top_tokens": null
   },
-  "generated_text": "\n\n# Test request\n\n## Test request"
+  "generated_text": "\n\nDeep learning is a subset of machine learning"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json
index 94411eefb..77c885990 100644
--- a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json
+++ b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json
@@ -10,42 +10,28 @@
         "text": "<s>"
       },
       {
-        "id": 3735,
-        "logprob": -11.0078125,
-        "text": "Test"
+        "id": 349,
+        "logprob": -12.0546875,
+        "text": "is"
       },
       {
-        "id": 2159,
-        "logprob": -13.59375,
-        "text": "request"
+        "id": 3534,
+        "logprob": -10.53125,
+        "text": "deep"
+      },
+      {
+        "id": 5168,
+        "logprob": -2.71875,
+        "text": "learning"
+      },
+      {
+        "id": 28804,
+        "logprob": -5.0078125,
+        "text": "?"
       }
     ],
     "seed": 0,
     "tokens": [
-      {
-        "id": 13,
-        "logprob": -0.34838867,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 13940,
-        "logprob": -0.38916016,
-        "special": false,
-        "text": "``"
-      },
-      {
-        "id": 28832,
-        "logprob": 0.0,
-        "special": false,
-        "text": "`"
-      },
-      {
-        "id": 3371,
-        "logprob": -1.2529297,
-        "special": false,
-        "text": "json"
-      },
       {
         "id": 13,
         "logprob": 0.0,
@@ -53,37 +39,61 @@
         "text": "\n"
       },
       {
-        "id": 28751,
-        "logprob": 0.0,
+        "id": 23229,
+        "logprob": -0.18237305,
         "special": false,
-        "text": "{"
+        "text": "Deep"
       },
       {
-        "id": 13,
+        "id": 17504,
         "logprob": 0.0,
         "special": false,
-        "text": "\n"
+        "text": " Learning"
       },
       {
-        "id": 2287,
+        "id": 349,
         "logprob": 0.0,
         "special": false,
-        "text": "   "
+        "text": " is"
       },
       {
-        "id": 345,
+        "id": 264,
         "logprob": 0.0,
         "special": false,
-        "text": " \""
+        "text": " a"
       },
       {
-        "id": 3134,
-        "logprob": -0.640625,
+        "id": 19804,
+        "logprob": 0.0,
         "special": false,
-        "text": "request"
+        "text": " subset"
+      },
+      {
+        "id": 302,
+        "logprob": 0.0,
+        "special": false,
+        "text": " of"
+      },
+      {
+        "id": 13253,
+        "logprob": -0.6040039,
+        "special": false,
+        "text": " Machine"
+      },
+      {
+        "id": 17504,
+        "logprob": 0.0,
+        "special": false,
+        "text": " Learning"
+      },
+      {
+        "id": 28725,
+        "logprob": -0.11621094,
+        "special": false,
+        "text": ","
       }
     ],
     "top_tokens": null
   },
-  "generated_text": "Test request\n```json\n{\n    \"request"
+  "generated_text": "What is deep learning?\nDeep Learning is a subset of Machine Learning,"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json
index 19e306a38..959e3c557 100644
--- a/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json
@@ -11,82 +11,97 @@
           "text": "<s>"
         },
         {
-          "id": 3735,
-          "logprob": -11.0078125,
-          "text": "Test"
+          "id": 1824,
+          "logprob": -9.2890625,
+          "text": "What"
         },
         {
-          "id": 2159,
-          "logprob": -13.59375,
-          "text": "request"
+          "id": 349,
+          "logprob": -1.1503906,
+          "text": "is"
+        },
+        {
+          "id": 3534,
+          "logprob": -9.5859375,
+          "text": "deep"
+        },
+        {
+          "id": 5168,
+          "logprob": -1.3945312,
+          "text": "learning"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.4555664,
+          "text": "?"
         }
       ],
       "seed": null,
       "tokens": [
         {
           "id": 13,
-          "logprob": -1.7089844,
+          "logprob": -0.6953125,
           "special": false,
           "text": "\n"
         },
         {
           "id": 13,
-          "logprob": -0.68847656,
+          "logprob": -0.4777832,
           "special": false,
           "text": "\n"
         },
         {
-          "id": 28771,
-          "logprob": -1.9394531,
+          "id": 23229,
+          "logprob": -0.13232422,
           "special": false,
-          "text": "#"
+          "text": "Deep"
         },
         {
-          "id": 3735,
-          "logprob": -2.8828125,
+          "id": 5168,
+          "logprob": -0.023834229,
           "special": false,
-          "text": " Test"
+          "text": " learning"
         },
         {
-          "id": 2159,
-          "logprob": -0.37329102,
+          "id": 349,
+          "logprob": -0.13977051,
           "special": false,
-          "text": " request"
+          "text": " is"
         },
         {
-          "id": 13,
-          "logprob": -0.2602539,
+          "id": 264,
+          "logprob": -0.14416504,
           "special": false,
-          "text": "\n"
+          "text": " a"
         },
         {
-          "id": 13,
-          "logprob": -0.0017185211,
+          "id": 19804,
+          "logprob": -0.63183594,
           "special": false,
-          "text": "\n"
+          "text": " subset"
         },
         {
-          "id": 1064,
-          "logprob": -2.2753906,
+          "id": 302,
+          "logprob": -0.010223389,
           "special": false,
-          "text": "##"
+          "text": " of"
         },
         {
-          "id": 3735,
-          "logprob": -1.9316406,
+          "id": 5599,
+          "logprob": -0.064208984,
           "special": false,
-          "text": " Test"
+          "text": " machine"
         },
         {
-          "id": 2159,
-          "logprob": -0.48217773,
+          "id": 5168,
+          "logprob": -0.0028266907,
           "special": false,
-          "text": " request"
+          "text": " learning"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\n\n# Test request\n\n## Test request"
+    "generated_text": "\n\nDeep learning is a subset of machine learning"
   },
   {
     "details": {
@@ -100,82 +115,97 @@
           "text": "<s>"
         },
         {
-          "id": 3735,
-          "logprob": -11.0078125,
-          "text": "Test"
+          "id": 1824,
+          "logprob": -9.2890625,
+          "text": "What"
         },
         {
-          "id": 2159,
-          "logprob": -13.59375,
-          "text": "request"
+          "id": 349,
+          "logprob": -1.1425781,
+          "text": "is"
+        },
+        {
+          "id": 3534,
+          "logprob": -9.59375,
+          "text": "deep"
+        },
+        {
+          "id": 5168,
+          "logprob": -1.390625,
+          "text": "learning"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.45532227,
+          "text": "?"
         }
       ],
       "seed": null,
       "tokens": [
         {
           "id": 13,
-          "logprob": -1.7089844,
+          "logprob": -0.6953125,
           "special": false,
           "text": "\n"
         },
         {
           "id": 13,
-          "logprob": -0.68847656,
+          "logprob": -0.48339844,
           "special": false,
           "text": "\n"
         },
         {
-          "id": 28771,
-          "logprob": -1.9394531,
+          "id": 23229,
+          "logprob": -0.13256836,
           "special": false,
-          "text": "#"
+          "text": "Deep"
         },
         {
-          "id": 3735,
-          "logprob": -2.8828125,
+          "id": 5168,
+          "logprob": -0.02420044,
           "special": false,
-          "text": " Test"
+          "text": " learning"
         },
         {
-          "id": 2159,
-          "logprob": -0.37329102,
+          "id": 349,
+          "logprob": -0.13977051,
           "special": false,
-          "text": " request"
+          "text": " is"
         },
         {
-          "id": 13,
-          "logprob": -0.2602539,
+          "id": 264,
+          "logprob": -0.14501953,
           "special": false,
-          "text": "\n"
+          "text": " a"
         },
         {
-          "id": 13,
-          "logprob": -0.0017185211,
+          "id": 19804,
+          "logprob": -0.63134766,
           "special": false,
-          "text": "\n"
+          "text": " subset"
         },
         {
-          "id": 1064,
-          "logprob": -2.2753906,
+          "id": 302,
+          "logprob": -0.010223389,
           "special": false,
-          "text": "##"
+          "text": " of"
         },
         {
-          "id": 3735,
-          "logprob": -1.9316406,
+          "id": 5599,
+          "logprob": -0.06427002,
           "special": false,
-          "text": " Test"
+          "text": " machine"
         },
         {
-          "id": 2159,
-          "logprob": -0.48217773,
+          "id": 5168,
+          "logprob": -0.002817154,
           "special": false,
-          "text": " request"
+          "text": " learning"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\n\n# Test request\n\n## Test request"
+    "generated_text": "\n\nDeep learning is a subset of machine learning"
   },
   {
     "details": {
@@ -189,82 +219,97 @@
           "text": "<s>"
         },
         {
-          "id": 3735,
-          "logprob": -11.0078125,
-          "text": "Test"
+          "id": 1824,
+          "logprob": -9.2890625,
+          "text": "What"
         },
         {
-          "id": 2159,
-          "logprob": -13.59375,
-          "text": "request"
+          "id": 349,
+          "logprob": -1.1425781,
+          "text": "is"
+        },
+        {
+          "id": 3534,
+          "logprob": -9.59375,
+          "text": "deep"
+        },
+        {
+          "id": 5168,
+          "logprob": -1.390625,
+          "text": "learning"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.45532227,
+          "text": "?"
         }
       ],
       "seed": null,
       "tokens": [
         {
           "id": 13,
-          "logprob": -1.7089844,
+          "logprob": -0.6953125,
           "special": false,
           "text": "\n"
         },
         {
           "id": 13,
-          "logprob": -0.68847656,
+          "logprob": -0.48339844,
           "special": false,
           "text": "\n"
         },
         {
-          "id": 28771,
-          "logprob": -1.9394531,
+          "id": 23229,
+          "logprob": -0.13256836,
           "special": false,
-          "text": "#"
+          "text": "Deep"
         },
         {
-          "id": 3735,
-          "logprob": -2.8828125,
+          "id": 5168,
+          "logprob": -0.02420044,
           "special": false,
-          "text": " Test"
+          "text": " learning"
         },
         {
-          "id": 2159,
-          "logprob": -0.37329102,
+          "id": 349,
+          "logprob": -0.13977051,
           "special": false,
-          "text": " request"
+          "text": " is"
         },
         {
-          "id": 13,
-          "logprob": -0.2602539,
+          "id": 264,
+          "logprob": -0.14501953,
           "special": false,
-          "text": "\n"
+          "text": " a"
         },
         {
-          "id": 13,
-          "logprob": -0.0017185211,
+          "id": 19804,
+          "logprob": -0.63134766,
           "special": false,
-          "text": "\n"
+          "text": " subset"
         },
         {
-          "id": 1064,
-          "logprob": -2.2753906,
+          "id": 302,
+          "logprob": -0.010223389,
           "special": false,
-          "text": "##"
+          "text": " of"
         },
         {
-          "id": 3735,
-          "logprob": -1.9316406,
+          "id": 5599,
+          "logprob": -0.06427002,
           "special": false,
-          "text": " Test"
+          "text": " machine"
         },
         {
-          "id": 2159,
-          "logprob": -0.48217773,
+          "id": 5168,
+          "logprob": -0.002817154,
           "special": false,
-          "text": " request"
+          "text": " learning"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\n\n# Test request\n\n## Test request"
+    "generated_text": "\n\nDeep learning is a subset of machine learning"
   },
   {
     "details": {
@@ -278,81 +323,96 @@
           "text": "<s>"
         },
         {
-          "id": 3735,
-          "logprob": -11.0078125,
-          "text": "Test"
+          "id": 1824,
+          "logprob": -9.2890625,
+          "text": "What"
         },
         {
-          "id": 2159,
-          "logprob": -13.59375,
-          "text": "request"
+          "id": 349,
+          "logprob": -1.1425781,
+          "text": "is"
+        },
+        {
+          "id": 3534,
+          "logprob": -9.59375,
+          "text": "deep"
+        },
+        {
+          "id": 5168,
+          "logprob": -1.390625,
+          "text": "learning"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.45532227,
+          "text": "?"
         }
       ],
       "seed": null,
       "tokens": [
         {
           "id": 13,
-          "logprob": -1.7089844,
+          "logprob": -0.6953125,
           "special": false,
           "text": "\n"
         },
         {
           "id": 13,
-          "logprob": -0.68847656,
+          "logprob": -0.48339844,
           "special": false,
           "text": "\n"
         },
         {
-          "id": 28771,
-          "logprob": -1.9394531,
+          "id": 23229,
+          "logprob": -0.13256836,
           "special": false,
-          "text": "#"
+          "text": "Deep"
         },
         {
-          "id": 3735,
-          "logprob": -2.8828125,
+          "id": 5168,
+          "logprob": -0.02420044,
           "special": false,
-          "text": " Test"
+          "text": " learning"
         },
         {
-          "id": 2159,
-          "logprob": -0.37329102,
+          "id": 349,
+          "logprob": -0.13977051,
           "special": false,
-          "text": " request"
+          "text": " is"
         },
         {
-          "id": 13,
-          "logprob": -0.2602539,
+          "id": 264,
+          "logprob": -0.14501953,
           "special": false,
-          "text": "\n"
+          "text": " a"
         },
         {
-          "id": 13,
-          "logprob": -0.0017185211,
+          "id": 19804,
+          "logprob": -0.63134766,
           "special": false,
-          "text": "\n"
+          "text": " subset"
         },
         {
-          "id": 1064,
-          "logprob": -2.2753906,
+          "id": 302,
+          "logprob": -0.010223389,
           "special": false,
-          "text": "##"
+          "text": " of"
         },
         {
-          "id": 3735,
-          "logprob": -1.9316406,
+          "id": 5599,
+          "logprob": -0.06427002,
           "special": false,
-          "text": " Test"
+          "text": " machine"
         },
         {
-          "id": 2159,
-          "logprob": -0.48217773,
+          "id": 5168,
+          "logprob": -0.002817154,
           "special": false,
-          "text": " request"
+          "text": " learning"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\n\n# Test request\n\n## Test request"
+    "generated_text": "\n\nDeep learning is a subset of machine learning"
   }
 ]
diff --git a/integration-tests/models/test_flash_mixtral_gptq.py b/integration-tests/models/test_flash_mixtral_gptq.py
index eb8806284..47bcb0bf3 100644
--- a/integration-tests/models/test_flash_mixtral_gptq.py
+++ b/integration-tests/models/test_flash_mixtral_gptq.py
@@ -3,7 +3,11 @@ import pytest
 
 @pytest.fixture(scope="module")
 def flash_mixtral_gptq_handle(launcher):
-    with launcher("TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ", num_shard=2) as handle:
+    with launcher(
+        "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ",
+        revision="gptq-4bit-128g-actorder_True",
+        num_shard=2,
+    ) as handle:
         yield handle
 
 
@@ -16,7 +20,12 @@ async def flash_mixtral_gptq(flash_mixtral_gptq_handle):
 @pytest.mark.asyncio
 async def test_flash_mixtral_gptq(flash_mixtral_gptq, response_snapshot):
     response = await flash_mixtral_gptq.generate(
-        "Test request", max_new_tokens=10, decoder_input_details=True
+        "What is deep learning?", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert (
+        response.generated_text == "\n\nDeep learning is a subset of machine learning"
     )
 
     assert response == response_snapshot
@@ -25,7 +34,7 @@ async def test_flash_mixtral_gptq(flash_mixtral_gptq, response_snapshot):
 @pytest.mark.asyncio
 async def test_flash_mixtral_gptq_all_params(flash_mixtral_gptq, response_snapshot):
     response = await flash_mixtral_gptq.generate(
-        "Test request",
+        "What is deep learning?",
         max_new_tokens=10,
         repetition_penalty=1.2,
         return_full_text=True,
@@ -41,6 +50,10 @@ async def test_flash_mixtral_gptq_all_params(flash_mixtral_gptq, response_snapsh
     )
 
     assert response.details.generated_tokens == 10
+    assert (
+        response.generated_text
+        == "What is deep learning?\nDeep Learning is a subset of Machine Learning,"
+    )
     assert response == response_snapshot
 
 
@@ -49,10 +62,14 @@ async def test_flash_mixtral_gptq_load(
     flash_mixtral_gptq, generate_load, response_snapshot
 ):
     responses = await generate_load(
-        flash_mixtral_gptq, "Test request", max_new_tokens=10, n=4
+        flash_mixtral_gptq, "What is deep learning?", max_new_tokens=10, n=4
     )
 
     assert len(responses) == 4
+    assert (
+        responses[0].generated_text
+        == "\n\nDeep learning is a subset of machine learning"
+    )
     assert all(
         [r.generated_text == responses[0].generated_text for r in responses]
     ), f"{[r.generated_text  for r in responses]}"