diff --git a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json
index 787704ce..66ddbaef 100644
--- a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json
+++ b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json
@@ -7,157 +7,107 @@
       {
         "id": 50278,
         "logprob": null,
-        "text": "<|prompter|>"
+        "text": "<|USER|>"
       },
       {
         "id": 1276,
-        "logprob": -8.03125,
+        "logprob": -4.5546875,
         "text": "What"
       },
-      {
-        "id": 310,
-        "logprob": -5.421875,
-        "text": " is"
-      },
-      {
-        "id": 247,
-        "logprob": -2.1601562,
-        "text": " a"
-      },
-      {
-        "id": 1167,
-        "logprob": -5.4609375,
-        "text": " mem"
-      },
-      {
-        "id": 70,
-        "logprob": -0.005657196,
-        "text": "e"
-      },
-      {
-        "id": 13,
-        "logprob": -7.28125,
-        "text": ","
-      },
-      {
-        "id": 285,
-        "logprob": -0.2980957,
-        "text": " and"
-      },
-      {
-        "id": 752,
-        "logprob": -2.1679688,
-        "text": " what"
-      },
       {
         "id": 434,
-        "logprob": -5.6210938,
+        "logprob": -4.234375,
         "text": "'s"
       },
       {
-        "id": 253,
-        "logprob": -0.81103516,
-        "text": " the"
+        "id": 634,
+        "logprob": -5.1054688,
+        "text": " your"
       },
       {
-        "id": 2892,
-        "logprob": -6.6640625,
-        "text": " history"
+        "id": 12315,
+        "logprob": -9.953125,
+        "text": " mood"
       },
       {
-        "id": 3212,
-        "logprob": -2.265625,
-        "text": " behind"
-      },
-      {
-        "id": 436,
-        "logprob": -11.5078125,
-        "text": " this"
-      },
-      {
-        "id": 3159,
-        "logprob": -2.1582031,
-        "text": " word"
+        "id": 3063,
+        "logprob": -4.0820312,
+        "text": " today"
       },
       {
         "id": 32,
-        "logprob": -0.008720398,
+        "logprob": -0.15148926,
         "text": "?"
       },
       {
-        "id": 0,
-        "logprob": -2.4726562,
-        "text": "<|endoftext|>"
-      },
-      {
-        "id": 50281,
-        "logprob": -18.265625,
-        "text": "<|assistant|>"
+        "id": 50279,
+        "logprob": -0.27026367,
+        "text": "<|ASSISTANT|>"
       }
     ],
     "seed": null,
     "tokens": [
       {
-        "id": 510,
-        "logprob": -0.63183594,
+        "id": 42,
+        "logprob": -0.88378906,
         "special": false,
-        "text": "The"
+        "text": "I"
       },
       {
-        "id": 3159,
-        "logprob": -0.5390625,
+        "id": 1353,
+        "logprob": -0.94921875,
         "special": false,
-        "text": " word"
+        "text": "'m"
       },
       {
-        "id": 346,
-        "logprob": -0.045684814,
+        "id": 417,
+        "logprob": -2.2402344,
         "special": false,
-        "text": " \""
+        "text": " not"
       },
       {
-        "id": 6441,
-        "logprob": -0.002090454,
+        "id": 2119,
+        "logprob": -0.3725586,
         "special": false,
-        "text": "mem"
+        "text": " sure"
       },
       {
-        "id": 70,
-        "logprob": -1.3589859e-05,
+        "id": 13,
+        "logprob": -1.078125,
         "special": false,
-        "text": "e"
+        "text": ","
       },
       {
-        "id": 3,
-        "logprob": -0.0009455681,
+        "id": 534,
+        "logprob": -0.67822266,
         "special": false,
-        "text": "\""
+        "text": " which"
       },
       {
-        "id": 369,
-        "logprob": -0.088012695,
+        "id": 310,
+        "logprob": -1.3837891,
         "special": false,
-        "text": " was"
+        "text": " is"
       },
       {
-        "id": 806,
-        "logprob": -0.12585449,
+        "id": 253,
+        "logprob": -1.7050781,
         "special": false,
-        "text": " first"
+        "text": " the"
       },
       {
-        "id": 908,
-        "logprob": -0.017196655,
+        "id": 1682,
+        "logprob": -0.052001953,
         "special": false,
-        "text": " used"
+        "text": " best"
       },
       {
-        "id": 275,
-        "logprob": -0.49731445,
+        "id": 1039,
+        "logprob": -2.0390625,
         "special": false,
-        "text": " in"
+        "text": " way"
       }
     ]
   },
-  "generated_text": "The word \"meme\" was first used in"
+  "generated_text": "I'm not sure, which is the best way"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json
index 47d6a77e..5ef6b3a2 100644
--- a/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json
@@ -8,159 +8,109 @@
         {
           "id": 50278,
           "logprob": null,
-          "text": "<|prompter|>"
+          "text": "<|USER|>"
         },
         {
           "id": 1276,
-          "logprob": -8.03125,
+          "logprob": -4.5546875,
           "text": "What"
         },
-        {
-          "id": 310,
-          "logprob": -5.421875,
-          "text": " is"
-        },
-        {
-          "id": 247,
-          "logprob": -2.1601562,
-          "text": " a"
-        },
-        {
-          "id": 1167,
-          "logprob": -5.4609375,
-          "text": " mem"
-        },
-        {
-          "id": 70,
-          "logprob": -0.005657196,
-          "text": "e"
-        },
-        {
-          "id": 13,
-          "logprob": -7.28125,
-          "text": ","
-        },
-        {
-          "id": 285,
-          "logprob": -0.2980957,
-          "text": " and"
-        },
-        {
-          "id": 752,
-          "logprob": -2.1679688,
-          "text": " what"
-        },
         {
           "id": 434,
-          "logprob": -5.6210938,
+          "logprob": -4.234375,
           "text": "'s"
         },
         {
-          "id": 253,
-          "logprob": -0.81103516,
-          "text": " the"
+          "id": 634,
+          "logprob": -5.21875,
+          "text": " your"
         },
         {
-          "id": 2892,
-          "logprob": -6.6640625,
-          "text": " history"
+          "id": 12315,
+          "logprob": -9.9375,
+          "text": " mood"
         },
         {
-          "id": 3212,
-          "logprob": -2.265625,
-          "text": " behind"
-        },
-        {
-          "id": 436,
-          "logprob": -11.5078125,
-          "text": " this"
-        },
-        {
-          "id": 3159,
-          "logprob": -2.1582031,
-          "text": " word"
+          "id": 3063,
+          "logprob": -4.1015625,
+          "text": " today"
         },
         {
           "id": 32,
-          "logprob": -0.008720398,
+          "logprob": -0.15319824,
           "text": "?"
         },
         {
-          "id": 0,
-          "logprob": -2.4726562,
-          "text": "<|endoftext|>"
-        },
-        {
-          "id": 50281,
-          "logprob": -18.265625,
-          "text": "<|assistant|>"
+          "id": 50279,
+          "logprob": -0.2614746,
+          "text": "<|ASSISTANT|>"
         }
       ],
       "seed": null,
       "tokens": [
         {
-          "id": 510,
-          "logprob": -0.63183594,
+          "id": 42,
+          "logprob": -0.8886719,
           "special": false,
-          "text": "The"
+          "text": "I"
         },
         {
-          "id": 3159,
-          "logprob": -0.5488281,
+          "id": 1353,
+          "logprob": -0.98046875,
           "special": false,
-          "text": " word"
+          "text": "'m"
         },
         {
-          "id": 346,
-          "logprob": -0.045684814,
+          "id": 417,
+          "logprob": -2.2265625,
           "special": false,
-          "text": " \""
+          "text": " not"
         },
         {
-          "id": 6441,
-          "logprob": -0.00207901,
+          "id": 2119,
+          "logprob": -0.3479004,
           "special": false,
-          "text": "mem"
+          "text": " sure"
         },
         {
-          "id": 70,
-          "logprob": -1.335144e-05,
+          "id": 13,
+          "logprob": -1.0117188,
           "special": false,
-          "text": "e"
+          "text": ","
         },
         {
-          "id": 3,
-          "logprob": -0.00097227097,
+          "id": 534,
+          "logprob": -0.67871094,
           "special": false,
-          "text": "\""
+          "text": " which"
         },
         {
-          "id": 369,
-          "logprob": -0.0892334,
+          "id": 310,
+          "logprob": -1.421875,
           "special": false,
-          "text": " was"
+          "text": " is"
         },
         {
-          "id": 806,
-          "logprob": -0.12463379,
+          "id": 253,
+          "logprob": -1.7382812,
           "special": false,
-          "text": " first"
+          "text": " the"
         },
         {
-          "id": 908,
-          "logprob": -0.01737976,
+          "id": 1682,
+          "logprob": -0.051330566,
           "special": false,
-          "text": " used"
+          "text": " best"
         },
         {
-          "id": 275,
-          "logprob": -0.50341797,
+          "id": 1039,
+          "logprob": -2.0390625,
           "special": false,
-          "text": " in"
+          "text": " way"
         }
       ]
     },
-    "generated_text": "The word \"meme\" was first used in"
+    "generated_text": "I'm not sure, which is the best way"
   },
   {
     "details": {
@@ -171,159 +121,109 @@
         {
           "id": 50278,
           "logprob": null,
-          "text": "<|prompter|>"
+          "text": "<|USER|>"
         },
         {
           "id": 1276,
-          "logprob": -8.03125,
+          "logprob": -4.5546875,
           "text": "What"
         },
-        {
-          "id": 310,
-          "logprob": -5.421875,
-          "text": " is"
-        },
-        {
-          "id": 247,
-          "logprob": -2.1601562,
-          "text": " a"
-        },
-        {
-          "id": 1167,
-          "logprob": -5.4609375,
-          "text": " mem"
-        },
-        {
-          "id": 70,
-          "logprob": -0.005657196,
-          "text": "e"
-        },
-        {
-          "id": 13,
-          "logprob": -7.28125,
-          "text": ","
-        },
-        {
-          "id": 285,
-          "logprob": -0.2980957,
-          "text": " and"
-        },
-        {
-          "id": 752,
-          "logprob": -2.1679688,
-          "text": " what"
-        },
         {
           "id": 434,
-          "logprob": -5.6210938,
+          "logprob": -4.234375,
           "text": "'s"
         },
         {
-          "id": 253,
-          "logprob": -0.81103516,
-          "text": " the"
+          "id": 634,
+          "logprob": -5.1054688,
+          "text": " your"
         },
         {
-          "id": 2892,
-          "logprob": -6.6640625,
-          "text": " history"
+          "id": 12315,
+          "logprob": -9.953125,
+          "text": " mood"
         },
         {
-          "id": 3212,
-          "logprob": -2.265625,
-          "text": " behind"
-        },
-        {
-          "id": 436,
-          "logprob": -11.5078125,
-          "text": " this"
-        },
-        {
-          "id": 3159,
-          "logprob": -2.1582031,
-          "text": " word"
+          "id": 3063,
+          "logprob": -4.0820312,
+          "text": " today"
         },
         {
           "id": 32,
-          "logprob": -0.008720398,
+          "logprob": -0.15148926,
           "text": "?"
         },
         {
-          "id": 0,
-          "logprob": -2.4726562,
-          "text": "<|endoftext|>"
-        },
-        {
-          "id": 50281,
-          "logprob": -18.265625,
-          "text": "<|assistant|>"
+          "id": 50279,
+          "logprob": -0.27026367,
+          "text": "<|ASSISTANT|>"
         }
       ],
       "seed": null,
       "tokens": [
         {
-          "id": 510,
-          "logprob": -0.63183594,
+          "id": 42,
+          "logprob": -0.88378906,
           "special": false,
-          "text": "The"
+          "text": "I"
         },
         {
-          "id": 3159,
-          "logprob": -0.5488281,
+          "id": 1353,
+          "logprob": -0.9819336,
           "special": false,
-          "text": " word"
+          "text": "'m"
         },
         {
-          "id": 346,
-          "logprob": -0.045684814,
+          "id": 417,
+          "logprob": -2.2421875,
           "special": false,
-          "text": " \""
+          "text": " not"
         },
         {
-          "id": 6441,
-          "logprob": -0.00207901,
+          "id": 2119,
+          "logprob": -0.3474121,
           "special": false,
-          "text": "mem"
+          "text": " sure"
         },
         {
-          "id": 70,
-          "logprob": -1.335144e-05,
+          "id": 13,
+          "logprob": -1.078125,
           "special": false,
-          "text": "e"
+          "text": ","
         },
         {
-          "id": 3,
-          "logprob": -0.00097227097,
+          "id": 534,
+          "logprob": -0.69140625,
           "special": false,
-          "text": "\""
+          "text": " which"
         },
         {
-          "id": 369,
-          "logprob": -0.0892334,
+          "id": 310,
+          "logprob": -1.4072266,
           "special": false,
-          "text": " was"
+          "text": " is"
         },
         {
-          "id": 806,
-          "logprob": -0.12463379,
+          "id": 253,
+          "logprob": -1.7041016,
           "special": false,
-          "text": " first"
+          "text": " the"
         },
         {
-          "id": 908,
-          "logprob": -0.01737976,
+          "id": 1682,
+          "logprob": -0.053375244,
           "special": false,
-          "text": " used"
+          "text": " best"
         },
         {
-          "id": 275,
-          "logprob": -0.50341797,
+          "id": 1039,
+          "logprob": -2.0351562,
           "special": false,
-          "text": " in"
+          "text": " way"
         }
       ]
     },
-    "generated_text": "The word \"meme\" was first used in"
+    "generated_text": "I'm not sure, which is the best way"
   },
   {
     "details": {
@@ -334,159 +234,109 @@
         {
           "id": 50278,
           "logprob": null,
-          "text": "<|prompter|>"
+          "text": "<|USER|>"
         },
         {
           "id": 1276,
-          "logprob": -8.03125,
+          "logprob": -4.5546875,
           "text": "What"
         },
-        {
-          "id": 310,
-          "logprob": -5.421875,
-          "text": " is"
-        },
-        {
-          "id": 247,
-          "logprob": -2.1601562,
-          "text": " a"
-        },
-        {
-          "id": 1167,
-          "logprob": -5.4609375,
-          "text": " mem"
-        },
-        {
-          "id": 70,
-          "logprob": -0.005657196,
-          "text": "e"
-        },
-        {
-          "id": 13,
-          "logprob": -7.28125,
-          "text": ","
-        },
-        {
-          "id": 285,
-          "logprob": -0.2980957,
-          "text": " and"
-        },
-        {
-          "id": 752,
-          "logprob": -2.1679688,
-          "text": " what"
-        },
         {
           "id": 434,
-          "logprob": -5.6210938,
+          "logprob": -4.234375,
           "text": "'s"
         },
         {
-          "id": 253,
-          "logprob": -0.81103516,
-          "text": " the"
+          "id": 634,
+          "logprob": -5.21875,
+          "text": " your"
         },
         {
-          "id": 2892,
-          "logprob": -6.6640625,
-          "text": " history"
+          "id": 12315,
+          "logprob": -9.9375,
+          "text": " mood"
         },
         {
-          "id": 3212,
-          "logprob": -2.265625,
-          "text": " behind"
-        },
-        {
-          "id": 436,
-          "logprob": -11.5078125,
-          "text": " this"
-        },
-        {
-          "id": 3159,
-          "logprob": -2.1582031,
-          "text": " word"
+          "id": 3063,
+          "logprob": -4.1015625,
+          "text": " today"
         },
         {
           "id": 32,
-          "logprob": -0.008720398,
+          "logprob": -0.15319824,
           "text": "?"
         },
         {
-          "id": 0,
-          "logprob": -2.4726562,
-          "text": "<|endoftext|>"
-        },
-        {
-          "id": 50281,
-          "logprob": -18.265625,
-          "text": "<|assistant|>"
+          "id": 50279,
+          "logprob": -0.2614746,
+          "text": "<|ASSISTANT|>"
         }
       ],
       "seed": null,
       "tokens": [
         {
-          "id": 510,
-          "logprob": -0.63183594,
+          "id": 42,
+          "logprob": -0.8886719,
           "special": false,
-          "text": "The"
+          "text": "I"
         },
         {
-          "id": 3159,
-          "logprob": -0.5488281,
+          "id": 1353,
+          "logprob": -0.98046875,
           "special": false,
-          "text": " word"
+          "text": "'m"
         },
         {
-          "id": 346,
-          "logprob": -0.045684814,
+          "id": 417,
+          "logprob": -2.2265625,
           "special": false,
-          "text": " \""
+          "text": " not"
         },
         {
-          "id": 6441,
-          "logprob": -0.00207901,
+          "id": 2119,
+          "logprob": -0.3479004,
           "special": false,
-          "text": "mem"
+          "text": " sure"
         },
         {
-          "id": 70,
-          "logprob": -1.335144e-05,
+          "id": 13,
+          "logprob": -1.0117188,
           "special": false,
-          "text": "e"
+          "text": ","
         },
         {
-          "id": 3,
-          "logprob": -0.00097227097,
+          "id": 534,
+          "logprob": -0.67871094,
           "special": false,
-          "text": "\""
+          "text": " which"
         },
         {
-          "id": 369,
-          "logprob": -0.0892334,
+          "id": 310,
+          "logprob": -1.421875,
           "special": false,
-          "text": " was"
+          "text": " is"
         },
         {
-          "id": 806,
-          "logprob": -0.12463379,
+          "id": 253,
+          "logprob": -1.7382812,
           "special": false,
-          "text": " first"
+          "text": " the"
         },
         {
-          "id": 908,
-          "logprob": -0.01737976,
+          "id": 1682,
+          "logprob": -0.051330566,
           "special": false,
-          "text": " used"
+          "text": " best"
         },
         {
-          "id": 275,
-          "logprob": -0.50341797,
+          "id": 1039,
+          "logprob": -2.0390625,
           "special": false,
-          "text": " in"
+          "text": " way"
         }
       ]
     },
-    "generated_text": "The word \"meme\" was first used in"
+    "generated_text": "I'm not sure, which is the best way"
   },
   {
     "details": {
@@ -497,158 +347,108 @@
         {
           "id": 50278,
           "logprob": null,
-          "text": "<|prompter|>"
+          "text": "<|USER|>"
         },
         {
           "id": 1276,
-          "logprob": -8.03125,
+          "logprob": -4.5546875,
           "text": "What"
         },
-        {
-          "id": 310,
-          "logprob": -5.421875,
-          "text": " is"
-        },
-        {
-          "id": 247,
-          "logprob": -2.1601562,
-          "text": " a"
-        },
-        {
-          "id": 1167,
-          "logprob": -5.4609375,
-          "text": " mem"
-        },
-        {
-          "id": 70,
-          "logprob": -0.005657196,
-          "text": "e"
-        },
-        {
-          "id": 13,
-          "logprob": -7.28125,
-          "text": ","
-        },
-        {
-          "id": 285,
-          "logprob": -0.2980957,
-          "text": " and"
-        },
-        {
-          "id": 752,
-          "logprob": -2.1679688,
-          "text": " what"
-        },
         {
           "id": 434,
-          "logprob": -5.6210938,
+          "logprob": -4.234375,
           "text": "'s"
         },
         {
-          "id": 253,
-          "logprob": -0.81103516,
-          "text": " the"
+          "id": 634,
+          "logprob": -5.21875,
+          "text": " your"
         },
         {
-          "id": 2892,
-          "logprob": -6.6640625,
-          "text": " history"
+          "id": 12315,
+          "logprob": -9.9375,
+          "text": " mood"
         },
         {
-          "id": 3212,
-          "logprob": -2.265625,
-          "text": " behind"
-        },
-        {
-          "id": 436,
-          "logprob": -11.5078125,
-          "text": " this"
-        },
-        {
-          "id": 3159,
-          "logprob": -2.1582031,
-          "text": " word"
+          "id": 3063,
+          "logprob": -4.1015625,
+          "text": " today"
         },
         {
           "id": 32,
-          "logprob": -0.008720398,
+          "logprob": -0.15319824,
           "text": "?"
         },
         {
-          "id": 0,
-          "logprob": -2.4726562,
-          "text": "<|endoftext|>"
-        },
-        {
-          "id": 50281,
-          "logprob": -18.265625,
-          "text": "<|assistant|>"
+          "id": 50279,
+          "logprob": -0.2614746,
+          "text": "<|ASSISTANT|>"
         }
       ],
       "seed": null,
       "tokens": [
         {
-          "id": 510,
-          "logprob": -0.63183594,
+          "id": 42,
+          "logprob": -0.8886719,
           "special": false,
-          "text": "The"
+          "text": "I"
         },
         {
-          "id": 3159,
-          "logprob": -0.5488281,
+          "id": 1353,
+          "logprob": -0.98046875,
           "special": false,
-          "text": " word"
+          "text": "'m"
         },
         {
-          "id": 346,
-          "logprob": -0.045684814,
+          "id": 417,
+          "logprob": -2.2265625,
           "special": false,
-          "text": " \""
+          "text": " not"
         },
         {
-          "id": 6441,
-          "logprob": -0.00207901,
+          "id": 2119,
+          "logprob": -0.3479004,
           "special": false,
-          "text": "mem"
+          "text": " sure"
         },
         {
-          "id": 70,
-          "logprob": -1.335144e-05,
+          "id": 13,
+          "logprob": -1.0117188,
           "special": false,
-          "text": "e"
+          "text": ","
         },
         {
-          "id": 3,
-          "logprob": -0.00097227097,
+          "id": 534,
+          "logprob": -0.67871094,
           "special": false,
-          "text": "\""
+          "text": " which"
         },
         {
-          "id": 369,
-          "logprob": -0.0892334,
+          "id": 310,
+          "logprob": -1.421875,
           "special": false,
-          "text": " was"
+          "text": " is"
         },
         {
-          "id": 806,
-          "logprob": -0.12463379,
+          "id": 253,
+          "logprob": -1.7382812,
           "special": false,
-          "text": " first"
+          "text": " the"
         },
         {
-          "id": 908,
-          "logprob": -0.01737976,
+          "id": 1682,
+          "logprob": -0.051330566,
           "special": false,
-          "text": " used"
+          "text": " best"
         },
         {
-          "id": 275,
-          "logprob": -0.50341797,
+          "id": 1039,
+          "logprob": -2.0390625,
           "special": false,
-          "text": " in"
+          "text": " way"
         }
       ]
     },
-    "generated_text": "The word \"meme\" was first used in"
+    "generated_text": "I'm not sure, which is the best way"
   }
 ]
diff --git a/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox.json b/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox.json
new file mode 100644
index 00000000..787704ce
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox.json
@@ -0,0 +1,163 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 50278,
+        "logprob": null,
+        "text": "<|prompter|>"
+      },
+      {
+        "id": 1276,
+        "logprob": -8.03125,
+        "text": "What"
+      },
+      {
+        "id": 310,
+        "logprob": -5.421875,
+        "text": " is"
+      },
+      {
+        "id": 247,
+        "logprob": -2.1601562,
+        "text": " a"
+      },
+      {
+        "id": 1167,
+        "logprob": -5.4609375,
+        "text": " mem"
+      },
+      {
+        "id": 70,
+        "logprob": -0.005657196,
+        "text": "e"
+      },
+      {
+        "id": 13,
+        "logprob": -7.28125,
+        "text": ","
+      },
+      {
+        "id": 285,
+        "logprob": -0.2980957,
+        "text": " and"
+      },
+      {
+        "id": 752,
+        "logprob": -2.1679688,
+        "text": " what"
+      },
+      {
+        "id": 434,
+        "logprob": -5.6210938,
+        "text": "'s"
+      },
+      {
+        "id": 253,
+        "logprob": -0.81103516,
+        "text": " the"
+      },
+      {
+        "id": 2892,
+        "logprob": -6.6640625,
+        "text": " history"
+      },
+      {
+        "id": 3212,
+        "logprob": -2.265625,
+        "text": " behind"
+      },
+      {
+        "id": 436,
+        "logprob": -11.5078125,
+        "text": " this"
+      },
+      {
+        "id": 3159,
+        "logprob": -2.1582031,
+        "text": " word"
+      },
+      {
+        "id": 32,
+        "logprob": -0.008720398,
+        "text": "?"
+      },
+      {
+        "id": 0,
+        "logprob": -2.4726562,
+        "text": "<|endoftext|>"
+      },
+      {
+        "id": 50281,
+        "logprob": -18.265625,
+        "text": "<|assistant|>"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 510,
+        "logprob": -0.63183594,
+        "special": false,
+        "text": "The"
+      },
+      {
+        "id": 3159,
+        "logprob": -0.5390625,
+        "special": false,
+        "text": " word"
+      },
+      {
+        "id": 346,
+        "logprob": -0.045684814,
+        "special": false,
+        "text": " \""
+      },
+      {
+        "id": 6441,
+        "logprob": -0.002090454,
+        "special": false,
+        "text": "mem"
+      },
+      {
+        "id": 70,
+        "logprob": -1.3589859e-05,
+        "special": false,
+        "text": "e"
+      },
+      {
+        "id": 3,
+        "logprob": -0.0009455681,
+        "special": false,
+        "text": "\""
+      },
+      {
+        "id": 369,
+        "logprob": -0.088012695,
+        "special": false,
+        "text": " was"
+      },
+      {
+        "id": 806,
+        "logprob": -0.12585449,
+        "special": false,
+        "text": " first"
+      },
+      {
+        "id": 908,
+        "logprob": -0.017196655,
+        "special": false,
+        "text": " used"
+      },
+      {
+        "id": 275,
+        "logprob": -0.49731445,
+        "special": false,
+        "text": " in"
+      }
+    ]
+  },
+  "generated_text": "The word \"meme\" was first used in"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox_load.json b/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox_load.json
new file mode 100644
index 00000000..47d6a77e
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox_load.json
@@ -0,0 +1,654 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 50278,
+          "logprob": null,
+          "text": "<|prompter|>"
+        },
+        {
+          "id": 1276,
+          "logprob": -8.03125,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -5.421875,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -2.1601562,
+          "text": " a"
+        },
+        {
+          "id": 1167,
+          "logprob": -5.4609375,
+          "text": " mem"
+        },
+        {
+          "id": 70,
+          "logprob": -0.005657196,
+          "text": "e"
+        },
+        {
+          "id": 13,
+          "logprob": -7.28125,
+          "text": ","
+        },
+        {
+          "id": 285,
+          "logprob": -0.2980957,
+          "text": " and"
+        },
+        {
+          "id": 752,
+          "logprob": -2.1679688,
+          "text": " what"
+        },
+        {
+          "id": 434,
+          "logprob": -5.6210938,
+          "text": "'s"
+        },
+        {
+          "id": 253,
+          "logprob": -0.81103516,
+          "text": " the"
+        },
+        {
+          "id": 2892,
+          "logprob": -6.6640625,
+          "text": " history"
+        },
+        {
+          "id": 3212,
+          "logprob": -2.265625,
+          "text": " behind"
+        },
+        {
+          "id": 436,
+          "logprob": -11.5078125,
+          "text": " this"
+        },
+        {
+          "id": 3159,
+          "logprob": -2.1582031,
+          "text": " word"
+        },
+        {
+          "id": 32,
+          "logprob": -0.008720398,
+          "text": "?"
+        },
+        {
+          "id": 0,
+          "logprob": -2.4726562,
+          "text": "<|endoftext|>"
+        },
+        {
+          "id": 50281,
+          "logprob": -18.265625,
+          "text": "<|assistant|>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 510,
+          "logprob": -0.63183594,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 3159,
+          "logprob": -0.5488281,
+          "special": false,
+          "text": " word"
+        },
+        {
+          "id": 346,
+          "logprob": -0.045684814,
+          "special": false,
+          "text": " \""
+        },
+        {
+          "id": 6441,
+          "logprob": -0.00207901,
+          "special": false,
+          "text": "mem"
+        },
+        {
+          "id": 70,
+          "logprob": -1.335144e-05,
+          "special": false,
+          "text": "e"
+        },
+        {
+          "id": 3,
+          "logprob": -0.00097227097,
+          "special": false,
+          "text": "\""
+        },
+        {
+          "id": 369,
+          "logprob": -0.0892334,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 806,
+          "logprob": -0.12463379,
+          "special": false,
+          "text": " first"
+        },
+        {
+          "id": 908,
+          "logprob": -0.01737976,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 275,
+          "logprob": -0.50341797,
+          "special": false,
+          "text": " in"
+        }
+      ]
+    },
+    "generated_text": "The word \"meme\" was first used in"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 50278,
+          "logprob": null,
+          "text": "<|prompter|>"
+        },
+        {
+          "id": 1276,
+          "logprob": -8.03125,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -5.421875,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -2.1601562,
+          "text": " a"
+        },
+        {
+          "id": 1167,
+          "logprob": -5.4609375,
+          "text": " mem"
+        },
+        {
+          "id": 70,
+          "logprob": -0.005657196,
+          "text": "e"
+        },
+        {
+          "id": 13,
+          "logprob": -7.28125,
+          "text": ","
+        },
+        {
+          "id": 285,
+          "logprob": -0.2980957,
+          "text": " and"
+        },
+        {
+          "id": 752,
+          "logprob": -2.1679688,
+          "text": " what"
+        },
+        {
+          "id": 434,
+          "logprob": -5.6210938,
+          "text": "'s"
+        },
+        {
+          "id": 253,
+          "logprob": -0.81103516,
+          "text": " the"
+        },
+        {
+          "id": 2892,
+          "logprob": -6.6640625,
+          "text": " history"
+        },
+        {
+          "id": 3212,
+          "logprob": -2.265625,
+          "text": " behind"
+        },
+        {
+          "id": 436,
+          "logprob": -11.5078125,
+          "text": " this"
+        },
+        {
+          "id": 3159,
+          "logprob": -2.1582031,
+          "text": " word"
+        },
+        {
+          "id": 32,
+          "logprob": -0.008720398,
+          "text": "?"
+        },
+        {
+          "id": 0,
+          "logprob": -2.4726562,
+          "text": "<|endoftext|>"
+        },
+        {
+          "id": 50281,
+          "logprob": -18.265625,
+          "text": "<|assistant|>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 510,
+          "logprob": -0.63183594,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 3159,
+          "logprob": -0.5488281,
+          "special": false,
+          "text": " word"
+        },
+        {
+          "id": 346,
+          "logprob": -0.045684814,
+          "special": false,
+          "text": " \""
+        },
+        {
+          "id": 6441,
+          "logprob": -0.00207901,
+          "special": false,
+          "text": "mem"
+        },
+        {
+          "id": 70,
+          "logprob": -1.335144e-05,
+          "special": false,
+          "text": "e"
+        },
+        {
+          "id": 3,
+          "logprob": -0.00097227097,
+          "special": false,
+          "text": "\""
+        },
+        {
+          "id": 369,
+          "logprob": -0.0892334,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 806,
+          "logprob": -0.12463379,
+          "special": false,
+          "text": " first"
+        },
+        {
+          "id": 908,
+          "logprob": -0.01737976,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 275,
+          "logprob": -0.50341797,
+          "special": false,
+          "text": " in"
+        }
+      ]
+    },
+    "generated_text": "The word \"meme\" was first used in"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 50278,
+          "logprob": null,
+          "text": "<|prompter|>"
+        },
+        {
+          "id": 1276,
+          "logprob": -8.03125,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -5.421875,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -2.1601562,
+          "text": " a"
+        },
+        {
+          "id": 1167,
+          "logprob": -5.4609375,
+          "text": " mem"
+        },
+        {
+          "id": 70,
+          "logprob": -0.005657196,
+          "text": "e"
+        },
+        {
+          "id": 13,
+          "logprob": -7.28125,
+          "text": ","
+        },
+        {
+          "id": 285,
+          "logprob": -0.2980957,
+          "text": " and"
+        },
+        {
+          "id": 752,
+          "logprob": -2.1679688,
+          "text": " what"
+        },
+        {
+          "id": 434,
+          "logprob": -5.6210938,
+          "text": "'s"
+        },
+        {
+          "id": 253,
+          "logprob": -0.81103516,
+          "text": " the"
+        },
+        {
+          "id": 2892,
+          "logprob": -6.6640625,
+          "text": " history"
+        },
+        {
+          "id": 3212,
+          "logprob": -2.265625,
+          "text": " behind"
+        },
+        {
+          "id": 436,
+          "logprob": -11.5078125,
+          "text": " this"
+        },
+        {
+          "id": 3159,
+          "logprob": -2.1582031,
+          "text": " word"
+        },
+        {
+          "id": 32,
+          "logprob": -0.008720398,
+          "text": "?"
+        },
+        {
+          "id": 0,
+          "logprob": -2.4726562,
+          "text": "<|endoftext|>"
+        },
+        {
+          "id": 50281,
+          "logprob": -18.265625,
+          "text": "<|assistant|>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 510,
+          "logprob": -0.63183594,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 3159,
+          "logprob": -0.5488281,
+          "special": false,
+          "text": " word"
+        },
+        {
+          "id": 346,
+          "logprob": -0.045684814,
+          "special": false,
+          "text": " \""
+        },
+        {
+          "id": 6441,
+          "logprob": -0.00207901,
+          "special": false,
+          "text": "mem"
+        },
+        {
+          "id": 70,
+          "logprob": -1.335144e-05,
+          "special": false,
+          "text": "e"
+        },
+        {
+          "id": 3,
+          "logprob": -0.00097227097,
+          "special": false,
+          "text": "\""
+        },
+        {
+          "id": 369,
+          "logprob": -0.0892334,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 806,
+          "logprob": -0.12463379,
+          "special": false,
+          "text": " first"
+        },
+        {
+          "id": 908,
+          "logprob": -0.01737976,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 275,
+          "logprob": -0.50341797,
+          "special": false,
+          "text": " in"
+        }
+      ]
+    },
+    "generated_text": "The word \"meme\" was first used in"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 50278,
+          "logprob": null,
+          "text": "<|prompter|>"
+        },
+        {
+          "id": 1276,
+          "logprob": -8.03125,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -5.421875,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -2.1601562,
+          "text": " a"
+        },
+        {
+          "id": 1167,
+          "logprob": -5.4609375,
+          "text": " mem"
+        },
+        {
+          "id": 70,
+          "logprob": -0.005657196,
+          "text": "e"
+        },
+        {
+          "id": 13,
+          "logprob": -7.28125,
+          "text": ","
+        },
+        {
+          "id": 285,
+          "logprob": -0.2980957,
+          "text": " and"
+        },
+        {
+          "id": 752,
+          "logprob": -2.1679688,
+          "text": " what"
+        },
+        {
+          "id": 434,
+          "logprob": -5.6210938,
+          "text": "'s"
+        },
+        {
+          "id": 253,
+          "logprob": -0.81103516,
+          "text": " the"
+        },
+        {
+          "id": 2892,
+          "logprob": -6.6640625,
+          "text": " history"
+        },
+        {
+          "id": 3212,
+          "logprob": -2.265625,
+          "text": " behind"
+        },
+        {
+          "id": 436,
+          "logprob": -11.5078125,
+          "text": " this"
+        },
+        {
+          "id": 3159,
+          "logprob": -2.1582031,
+          "text": " word"
+        },
+        {
+          "id": 32,
+          "logprob": -0.008720398,
+          "text": "?"
+        },
+        {
+          "id": 0,
+          "logprob": -2.4726562,
+          "text": "<|endoftext|>"
+        },
+        {
+          "id": 50281,
+          "logprob": -18.265625,
+          "text": "<|assistant|>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 510,
+          "logprob": -0.63183594,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 3159,
+          "logprob": -0.5488281,
+          "special": false,
+          "text": " word"
+        },
+        {
+          "id": 346,
+          "logprob": -0.045684814,
+          "special": false,
+          "text": " \""
+        },
+        {
+          "id": 6441,
+          "logprob": -0.00207901,
+          "special": false,
+          "text": "mem"
+        },
+        {
+          "id": 70,
+          "logprob": -1.335144e-05,
+          "special": false,
+          "text": "e"
+        },
+        {
+          "id": 3,
+          "logprob": -0.00097227097,
+          "special": false,
+          "text": "\""
+        },
+        {
+          "id": 369,
+          "logprob": -0.0892334,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 806,
+          "logprob": -0.12463379,
+          "special": false,
+          "text": " first"
+        },
+        {
+          "id": 908,
+          "logprob": -0.01737976,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 275,
+          "logprob": -0.50341797,
+          "special": false,
+          "text": " in"
+        }
+      ]
+    },
+    "generated_text": "The word \"meme\" was first used in"
+  }
+]
diff --git a/integration-tests/models/test_flash_neox.py b/integration-tests/models/test_flash_neox.py
index 56cbf270..ca5b33c1 100644
--- a/integration-tests/models/test_flash_neox.py
+++ b/integration-tests/models/test_flash_neox.py
@@ -3,7 +3,7 @@ import pytest
 
 @pytest.fixture(scope="module")
 def flash_neox_handle(launcher):
-    with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as handle:
+    with launcher("stabilityai/stablelm-tuned-alpha-3b", num_shard=1) as handle:
         yield handle
 
 
@@ -16,7 +16,7 @@ async def flash_neox(flash_neox_handle):
 @pytest.mark.asyncio
 async def test_flash_neox(flash_neox, response_snapshot):
     response = await flash_neox.generate(
-        "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
+        "<|USER|>What's your mood today?<|ASSISTANT|>",
         max_new_tokens=10,
     )
 
@@ -28,12 +28,14 @@ async def test_flash_neox(flash_neox, response_snapshot):
 async def test_flash_neox_load(flash_neox, generate_load, response_snapshot):
     responses = await generate_load(
         flash_neox,
-        "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
+        "<|USER|>What's your mood today?<|ASSISTANT|>",
         max_new_tokens=10,
         n=4,
     )
 
-    assert len(responses) == 4
-    assert all([r.generated_text == responses[0].generated_text for r in responses])
+    generated_texts = [r.generated_text for r in responses]
+
+    assert len(generated_texts) == 4
+    assert generated_texts, all([text == generated_texts[0] for text in generated_texts])
 
     assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_neox_sharded.py b/integration-tests/models/test_flash_neox_sharded.py
new file mode 100644
index 00000000..513aeaaf
--- /dev/null
+++ b/integration-tests/models/test_flash_neox_sharded.py
@@ -0,0 +1,39 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_neox_sharded_handle(launcher):
+    with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_neox_sharded(flash_neox_sharded_handle):
+    await flash_neox_sharded_handle.health(240)
+    return flash_neox_sharded_handle.client
+
+
+@pytest.mark.asyncio
+async def test_flash_neox(flash_neox_sharded, response_snapshot):
+    response = await flash_neox_sharded.generate(
+        "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
+        max_new_tokens=10,
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_flash_neox_load(flash_neox_sharded, generate_load, response_snapshot):
+    responses = await generate_load(
+        flash_neox_sharded,
+        "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
+        max_new_tokens=10,
+        n=4,
+    )
+
+    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
+
+    assert responses == response_snapshot
diff --git a/integration-tests/pytest.ini b/integration-tests/pytest.ini
new file mode 100644
index 00000000..485e6017
--- /dev/null
+++ b/integration-tests/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+asyncio_mode = auto
+markers =
+    private: marks tests as requiring an admin hf token (deselect with '-m "not private"')
\ No newline at end of file
diff --git a/server/pyproject.toml b/server/pyproject.toml
index 22aa41db..b8e15230 100644
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -35,6 +35,9 @@ bnb = ["bitsandbytes"]
 grpcio-tools = "^1.51.1"
 pytest = "^7.3.0"
 
+[tool.pytest.ini_options]
+markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
index 2c6b8da6..b7834157 100644
--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@@ -362,7 +362,7 @@ class FlashGPTNeoXModel(FlashGPTNeoXPreTrainedModel):
             pretrained_model_name_or_path, load_in_8bit=False, *model_args, **kwargs
         )
 
-        model.post_load_weights(load_in_8bit)
+        model.post_load_weights("bitsandbytes" if load_in_8bit else None)
         return model
 
     def forward(
@@ -466,7 +466,7 @@ class FlashGPTNeoXForCausalLM(FlashGPTNeoXPreTrainedModel):
         model = super(FlashGPTNeoXForCausalLM, cls).from_pretrained(
             pretrained_model_name_or_path, load_in_8bit=False, *model_args, **kwargs
         )
-        model.post_load_weights(load_in_8bit)
+        model.post_load_weights("bitsandbytes" if load_in_8bit else None)
         return model
 
     def forward(
diff --git a/server/text_generation_server/models/flash_neox.py b/server/text_generation_server/models/flash_neox.py
index cac40bab..8c1c1a00 100644
--- a/server/text_generation_server/models/flash_neox.py
+++ b/server/text_generation_server/models/flash_neox.py
@@ -88,7 +88,7 @@ class FlashNeoXSharded(FlashNeoX):
     def load_weights(
         model,
         filenames: List[str],
-        quantize: bool,
+        quantize: Optional[str],
         device: torch.device,
         dtype: torch.dtype,
         rank: int,
diff --git a/server/text_generation_server/models/flash_santacoder.py b/server/text_generation_server/models/flash_santacoder.py
index 5dc31309..1fbaf252 100644
--- a/server/text_generation_server/models/flash_santacoder.py
+++ b/server/text_generation_server/models/flash_santacoder.py
@@ -80,7 +80,7 @@ class FlashSantacoder(FlashCausalLM):
     def load_weights(
         model: FlashSantacoderForCausalLM,
         filenames: List[Path],
-        quantize: bool,
+        quantize: Optional[str],
         device: torch.device,
         dtype: torch.dtype,
         transpose: bool,
diff --git a/server/text_generation_server/models/opt.py b/server/text_generation_server/models/opt.py
index fdae795b..8c676a51 100644
--- a/server/text_generation_server/models/opt.py
+++ b/server/text_generation_server/models/opt.py
@@ -101,7 +101,7 @@ class OPTSharded(OPT):
     def load_weights(
         model,
         filenames: List[str],
-        quantize: bool,
+        quantize: Optional[str],
         device: torch.device,
         dtype: torch.dtype,
         rank: int,