diff --git a/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq.json b/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq.json
index 0d8c05ed..dcd37cb9 100644
--- a/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq.json
+++ b/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq.json
@@ -10,80 +10,95 @@
         "text": "<s>"
       },
       {
-        "id": 4321,
-        "logprob": -8.515625,
-        "text": "Test"
+        "id": 1724,
+        "logprob": -7.703125,
+        "text": "What"
       },
       {
-        "id": 2009,
-        "logprob": -15.4140625,
-        "text": "request"
+        "id": 338,
+        "logprob": -1.4765625,
+        "text": "is"
+      },
+      {
+        "id": 21784,
+        "logprob": -9.390625,
+        "text": "Deep"
+      },
+      {
+        "id": 29257,
+        "logprob": -1.8583984,
+        "text": "Learning"
+      },
+      {
+        "id": 29973,
+        "logprob": -0.7548828,
+        "text": "?"
       }
     ],
     "seed": null,
     "tokens": [
-      {
-        "id": 29896,
-        "logprob": -2.0292969,
-        "special": false,
-        "text": "1"
-      },
       {
         "id": 13,
-        "logprob": -2.2597656,
+        "logprob": -1.9306641,
         "special": false,
         "text": "\n"
       },
       {
-        "id": 30166,
-        "logprob": -3.8671875,
+        "id": 5618,
+        "logprob": -2.4550781,
         "special": false,
-        "text": "​"
+        "text": "What"
       },
       {
-        "id": 30166,
-        "logprob": -1.0488281,
+        "id": 338,
+        "logprob": -0.5732422,
         "special": false,
-        "text": "​"
+        "text": " is"
       },
       {
-        "id": 30166,
-        "logprob": -0.24523926,
+        "id": 278,
+        "logprob": -1.5761719,
         "special": false,
-        "text": "​"
+        "text": " the"
       },
       {
-        "id": 30166,
-        "logprob": -0.07897949,
+        "id": 4328,
+        "logprob": -1.5888672,
         "special": false,
-        "text": "​"
+        "text": " difference"
       },
       {
-        "id": 30166,
-        "logprob": -0.023513794,
+        "id": 1546,
+        "logprob": -0.026504517,
         "special": false,
-        "text": "​"
+        "text": " between"
       },
       {
-        "id": 30166,
-        "logprob": -0.011444092,
+        "id": 21784,
+        "logprob": -1.4287109,
         "special": false,
-        "text": "​"
+        "text": " Deep"
       },
       {
-        "id": 30166,
-        "logprob": -0.008430481,
+        "id": 29257,
+        "logprob": -0.15856934,
         "special": false,
-        "text": "​"
+        "text": " Learning"
       },
       {
-        "id": 30166,
-        "logprob": -0.007648468,
+        "id": 322,
+        "logprob": -0.17456055,
         "special": false,
-        "text": "​"
+        "text": " and"
+      },
+      {
+        "id": 6189,
+        "logprob": -0.62646484,
+        "special": false,
+        "text": " Machine"
       }
     ],
     "top_tokens": null
   },
-  "generated_text": "1\n​​​​​​​​"
+  "generated_text": "\nWhat is the difference between Deep Learning and Machine"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_all_params.json b/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_all_params.json
index 8e3f5571..d16d34f9 100644
--- a/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_all_params.json
+++ b/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_all_params.json
@@ -10,80 +10,90 @@
         "text": "<s>"
       },
       {
-        "id": 4321,
-        "logprob": -8.515625,
-        "text": "Test"
+        "id": 338,
+        "logprob": -9.0859375,
+        "text": "is"
       },
       {
-        "id": 2009,
-        "logprob": -15.4140625,
-        "text": "request"
+        "id": 21784,
+        "logprob": -10.90625,
+        "text": "Deep"
+      },
+      {
+        "id": 29257,
+        "logprob": -2.65625,
+        "text": "Learning"
+      },
+      {
+        "id": 29973,
+        "logprob": -4.8085938,
+        "text": "?"
       }
     ],
     "seed": 0,
     "tokens": [
-      {
-        "id": 29896,
-        "logprob": 0.0,
-        "special": false,
-        "text": "1"
-      },
       {
         "id": 13,
-        "logprob": -0.6254883,
+        "logprob": -0.19958496,
         "special": false,
         "text": "\n"
       },
       {
-        "id": 30166,
+        "id": 4013,
+        "logprob": -2.203125,
+        "special": false,
+        "text": "This"
+      },
+      {
+        "id": 1139,
+        "logprob": -0.23693848,
+        "special": false,
+        "text": " question"
+      },
+      {
+        "id": 756,
         "logprob": 0.0,
         "special": false,
-        "text": "​"
+        "text": " has"
       },
       {
-        "id": 29918,
-        "logprob": -0.20141602,
+        "id": 1063,
+        "logprob": -0.076538086,
         "special": false,
-        "text": "_"
+        "text": " been"
       },
       {
-        "id": 29906,
-        "logprob": -0.6254883,
-        "special": false,
-        "text": "2"
-      },
-      {
-        "id": 29871,
+        "id": 4433,
         "logprob": 0.0,
         "special": false,
-        "text": " "
+        "text": " asked"
       },
       {
-        "id": 30166,
-        "logprob": 0.0,
+        "id": 1784,
+        "logprob": -1.1367188,
         "special": false,
-        "text": "​"
+        "text": " many"
       },
       {
-        "id": 30166,
+        "id": 3064,
         "logprob": 0.0,
         "special": false,
-        "text": "​"
+        "text": " times"
       },
       {
-        "id": 30166,
-        "logprob": 0.0,
+        "id": 322,
+        "logprob": -1.7460938,
         "special": false,
-        "text": "​"
+        "text": " and"
       },
       {
-        "id": 30166,
+        "id": 306,
         "logprob": 0.0,
         "special": false,
-        "text": "​"
+        "text": " I"
       }
     ],
     "top_tokens": null
   },
-  "generated_text": "Test request1\n​_2 ​​​​"
+  "generated_text": "What is Deep Learning?\nThis question has been asked many times and I"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_load.json b/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_load.json
index 42b085f8..e6fb3dc0 100644
--- a/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_load.json
@@ -11,82 +11,97 @@
           "text": "<s>"
         },
         {
-          "id": 4321,
-          "logprob": -8.515625,
-          "text": "Test"
+          "id": 1724,
+          "logprob": -7.703125,
+          "text": "What"
         },
         {
-          "id": 2009,
-          "logprob": -15.4140625,
-          "text": "request"
+          "id": 338,
+          "logprob": -1.4765625,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.390625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.8652344,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.7548828,
+          "text": "?"
         }
       ],
       "seed": null,
       "tokens": [
-        {
-          "id": 29896,
-          "logprob": -2.0292969,
-          "special": false,
-          "text": "1"
-        },
         {
           "id": 13,
-          "logprob": -2.2617188,
+          "logprob": -1.9306641,
           "special": false,
           "text": "\n"
         },
         {
-          "id": 30166,
-          "logprob": -3.8671875,
+          "id": 5618,
+          "logprob": -2.4550781,
           "special": false,
-          "text": "​"
+          "text": "What"
         },
         {
-          "id": 30166,
-          "logprob": -1.0498047,
+          "id": 338,
+          "logprob": -0.5732422,
           "special": false,
-          "text": "​"
+          "text": " is"
         },
         {
-          "id": 30166,
-          "logprob": -0.24523926,
+          "id": 278,
+          "logprob": -1.5761719,
           "special": false,
-          "text": "​"
+          "text": " the"
         },
         {
-          "id": 30166,
-          "logprob": -0.07897949,
+          "id": 4328,
+          "logprob": -1.5888672,
           "special": false,
-          "text": "​"
+          "text": " difference"
         },
         {
-          "id": 30166,
-          "logprob": -0.023529053,
+          "id": 1546,
+          "logprob": -0.026504517,
           "special": false,
-          "text": "​"
+          "text": " between"
         },
         {
-          "id": 30166,
-          "logprob": -0.011444092,
+          "id": 21784,
+          "logprob": -1.4287109,
           "special": false,
-          "text": "​"
+          "text": " Deep"
         },
         {
-          "id": 30166,
-          "logprob": -0.008300781,
+          "id": 29257,
+          "logprob": -0.15856934,
           "special": false,
-          "text": "​"
+          "text": " Learning"
         },
         {
-          "id": 30166,
-          "logprob": -0.007648468,
+          "id": 322,
+          "logprob": -0.17456055,
           "special": false,
-          "text": "​"
+          "text": " and"
+        },
+        {
+          "id": 6189,
+          "logprob": -0.62646484,
+          "special": false,
+          "text": " Machine"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "1\n​​​​​​​​"
+    "generated_text": "\nWhat is the difference between Deep Learning and Machine"
   },
   {
     "details": {
@@ -100,82 +115,97 @@
           "text": "<s>"
         },
         {
-          "id": 4321,
-          "logprob": -8.515625,
-          "text": "Test"
+          "id": 1724,
+          "logprob": -7.703125,
+          "text": "What"
         },
         {
-          "id": 2009,
-          "logprob": -15.4140625,
-          "text": "request"
+          "id": 338,
+          "logprob": -1.4765625,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.390625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.8583984,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.7548828,
+          "text": "?"
         }
       ],
       "seed": null,
       "tokens": [
-        {
-          "id": 29896,
-          "logprob": -2.0292969,
-          "special": false,
-          "text": "1"
-        },
         {
           "id": 13,
-          "logprob": -2.2617188,
+          "logprob": -1.9306641,
           "special": false,
           "text": "\n"
         },
         {
-          "id": 30166,
-          "logprob": -3.8671875,
+          "id": 5618,
+          "logprob": -2.4550781,
           "special": false,
-          "text": "​"
+          "text": "What"
         },
         {
-          "id": 30166,
-          "logprob": -1.0498047,
+          "id": 338,
+          "logprob": -0.5732422,
           "special": false,
-          "text": "​"
+          "text": " is"
         },
         {
-          "id": 30166,
-          "logprob": -0.24523926,
+          "id": 278,
+          "logprob": -1.5761719,
           "special": false,
-          "text": "​"
+          "text": " the"
         },
         {
-          "id": 30166,
-          "logprob": -0.07897949,
+          "id": 4328,
+          "logprob": -1.5888672,
           "special": false,
-          "text": "​"
+          "text": " difference"
         },
         {
-          "id": 30166,
-          "logprob": -0.023529053,
+          "id": 1546,
+          "logprob": -0.026504517,
           "special": false,
-          "text": "​"
+          "text": " between"
         },
         {
-          "id": 30166,
-          "logprob": -0.011444092,
+          "id": 21784,
+          "logprob": -1.4287109,
           "special": false,
-          "text": "​"
+          "text": " Deep"
         },
         {
-          "id": 30166,
-          "logprob": -0.008300781,
+          "id": 29257,
+          "logprob": -0.15856934,
           "special": false,
-          "text": "​"
+          "text": " Learning"
         },
         {
-          "id": 30166,
-          "logprob": -0.007648468,
+          "id": 322,
+          "logprob": -0.17456055,
           "special": false,
-          "text": "​"
+          "text": " and"
+        },
+        {
+          "id": 6189,
+          "logprob": -0.62646484,
+          "special": false,
+          "text": " Machine"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "1\n​​​​​​​​"
+    "generated_text": "\nWhat is the difference between Deep Learning and Machine"
   },
   {
     "details": {
@@ -189,82 +219,97 @@
           "text": "<s>"
         },
         {
-          "id": 4321,
-          "logprob": -8.515625,
-          "text": "Test"
+          "id": 1724,
+          "logprob": -7.703125,
+          "text": "What"
         },
         {
-          "id": 2009,
-          "logprob": -15.4140625,
-          "text": "request"
+          "id": 338,
+          "logprob": -1.4765625,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.390625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.8652344,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.7548828,
+          "text": "?"
         }
       ],
       "seed": null,
       "tokens": [
-        {
-          "id": 29896,
-          "logprob": -2.0292969,
-          "special": false,
-          "text": "1"
-        },
         {
           "id": 13,
-          "logprob": -2.2617188,
+          "logprob": -1.9306641,
           "special": false,
           "text": "\n"
         },
         {
-          "id": 30166,
-          "logprob": -3.8671875,
+          "id": 5618,
+          "logprob": -2.4550781,
           "special": false,
-          "text": "​"
+          "text": "What"
         },
         {
-          "id": 30166,
-          "logprob": -1.0498047,
+          "id": 338,
+          "logprob": -0.5732422,
           "special": false,
-          "text": "​"
+          "text": " is"
         },
         {
-          "id": 30166,
-          "logprob": -0.24523926,
+          "id": 278,
+          "logprob": -1.5761719,
           "special": false,
-          "text": "​"
+          "text": " the"
         },
         {
-          "id": 30166,
-          "logprob": -0.07897949,
+          "id": 4328,
+          "logprob": -1.5888672,
           "special": false,
-          "text": "​"
+          "text": " difference"
         },
         {
-          "id": 30166,
-          "logprob": -0.023529053,
+          "id": 1546,
+          "logprob": -0.026504517,
           "special": false,
-          "text": "​"
+          "text": " between"
         },
         {
-          "id": 30166,
-          "logprob": -0.011444092,
+          "id": 21784,
+          "logprob": -1.4287109,
           "special": false,
-          "text": "​"
+          "text": " Deep"
         },
         {
-          "id": 30166,
-          "logprob": -0.008300781,
+          "id": 29257,
+          "logprob": -0.15856934,
           "special": false,
-          "text": "​"
+          "text": " Learning"
         },
         {
-          "id": 30166,
-          "logprob": -0.007648468,
+          "id": 322,
+          "logprob": -0.17456055,
           "special": false,
-          "text": "​"
+          "text": " and"
+        },
+        {
+          "id": 6189,
+          "logprob": -0.62646484,
+          "special": false,
+          "text": " Machine"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "1\n​​​​​​​​"
+    "generated_text": "\nWhat is the difference between Deep Learning and Machine"
   },
   {
     "details": {
@@ -278,81 +323,96 @@
           "text": "<s>"
         },
         {
-          "id": 4321,
-          "logprob": -8.515625,
-          "text": "Test"
+          "id": 1724,
+          "logprob": -7.703125,
+          "text": "What"
         },
         {
-          "id": 2009,
-          "logprob": -15.4140625,
-          "text": "request"
+          "id": 338,
+          "logprob": -1.4765625,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.390625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.8652344,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.7548828,
+          "text": "?"
         }
       ],
       "seed": null,
       "tokens": [
-        {
-          "id": 29896,
-          "logprob": -2.0292969,
-          "special": false,
-          "text": "1"
-        },
         {
           "id": 13,
-          "logprob": -2.2617188,
+          "logprob": -1.9306641,
           "special": false,
           "text": "\n"
         },
         {
-          "id": 30166,
-          "logprob": -3.8671875,
+          "id": 5618,
+          "logprob": -2.4550781,
           "special": false,
-          "text": "​"
+          "text": "What"
         },
         {
-          "id": 30166,
-          "logprob": -1.0498047,
+          "id": 338,
+          "logprob": -0.5732422,
           "special": false,
-          "text": "​"
+          "text": " is"
         },
         {
-          "id": 30166,
-          "logprob": -0.24523926,
+          "id": 278,
+          "logprob": -1.5761719,
           "special": false,
-          "text": "​"
+          "text": " the"
         },
         {
-          "id": 30166,
-          "logprob": -0.07897949,
+          "id": 4328,
+          "logprob": -1.5888672,
           "special": false,
-          "text": "​"
+          "text": " difference"
         },
         {
-          "id": 30166,
-          "logprob": -0.023529053,
+          "id": 1546,
+          "logprob": -0.026504517,
           "special": false,
-          "text": "​"
+          "text": " between"
         },
         {
-          "id": 30166,
-          "logprob": -0.011444092,
+          "id": 21784,
+          "logprob": -1.4287109,
           "special": false,
-          "text": "​"
+          "text": " Deep"
         },
         {
-          "id": 30166,
-          "logprob": -0.008300781,
+          "id": 29257,
+          "logprob": -0.15856934,
           "special": false,
-          "text": "​"
+          "text": " Learning"
         },
         {
-          "id": 30166,
-          "logprob": -0.007648468,
+          "id": 322,
+          "logprob": -0.17456055,
           "special": false,
-          "text": "​"
+          "text": " and"
+        },
+        {
+          "id": 6189,
+          "logprob": -0.62646484,
+          "special": false,
+          "text": " Machine"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "1\n​​​​​​​​"
+    "generated_text": "\nWhat is the difference between Deep Learning and Machine"
   }
 ]
diff --git a/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json b/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json
new file mode 100644
index 00000000..f1d9129d
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json
@@ -0,0 +1,418 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -7.6914062,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.4746094,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.390625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.8623047,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.7558594,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -1.9228516,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 5618,
+          "logprob": -2.4609375,
+          "special": false,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -0.57177734,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 278,
+          "logprob": -1.5722656,
+          "special": false,
+          "text": " the"
+        },
+        {
+          "id": 4328,
+          "logprob": -1.5859375,
+          "special": false,
+          "text": " difference"
+        },
+        {
+          "id": 1546,
+          "logprob": -0.02633667,
+          "special": false,
+          "text": " between"
+        },
+        {
+          "id": 21784,
+          "logprob": -1.4335938,
+          "special": false,
+          "text": " Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -0.15991211,
+          "special": false,
+          "text": " Learning"
+        },
+        {
+          "id": 322,
+          "logprob": -0.17456055,
+          "special": false,
+          "text": " and"
+        },
+        {
+          "id": 6189,
+          "logprob": -0.62060547,
+          "special": false,
+          "text": " Machine"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\nWhat is the difference between Deep Learning and Machine"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -7.6914062,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.4746094,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.390625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.8623047,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.7558594,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -1.9228516,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 5618,
+          "logprob": -2.4609375,
+          "special": false,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -0.57177734,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 278,
+          "logprob": -1.5722656,
+          "special": false,
+          "text": " the"
+        },
+        {
+          "id": 4328,
+          "logprob": -1.5859375,
+          "special": false,
+          "text": " difference"
+        },
+        {
+          "id": 1546,
+          "logprob": -0.02633667,
+          "special": false,
+          "text": " between"
+        },
+        {
+          "id": 21784,
+          "logprob": -1.4335938,
+          "special": false,
+          "text": " Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -0.15991211,
+          "special": false,
+          "text": " Learning"
+        },
+        {
+          "id": 322,
+          "logprob": -0.17456055,
+          "special": false,
+          "text": " and"
+        },
+        {
+          "id": 6189,
+          "logprob": -0.62060547,
+          "special": false,
+          "text": " Machine"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\nWhat is the difference between Deep Learning and Machine"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -7.6914062,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.4746094,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.390625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.8623047,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.7558594,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -1.9228516,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 5618,
+          "logprob": -2.4609375,
+          "special": false,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -0.57177734,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 278,
+          "logprob": -1.5722656,
+          "special": false,
+          "text": " the"
+        },
+        {
+          "id": 4328,
+          "logprob": -1.5859375,
+          "special": false,
+          "text": " difference"
+        },
+        {
+          "id": 1546,
+          "logprob": -0.02633667,
+          "special": false,
+          "text": " between"
+        },
+        {
+          "id": 21784,
+          "logprob": -1.4335938,
+          "special": false,
+          "text": " Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -0.15991211,
+          "special": false,
+          "text": " Learning"
+        },
+        {
+          "id": 322,
+          "logprob": -0.17456055,
+          "special": false,
+          "text": " and"
+        },
+        {
+          "id": 6189,
+          "logprob": -0.62060547,
+          "special": false,
+          "text": " Machine"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\nWhat is the difference between Deep Learning and Machine"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -7.6914062,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.4746094,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.390625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.8623047,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.7558594,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -1.9228516,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 5618,
+          "logprob": -2.4609375,
+          "special": false,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -0.57177734,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 278,
+          "logprob": -1.5722656,
+          "special": false,
+          "text": " the"
+        },
+        {
+          "id": 4328,
+          "logprob": -1.5859375,
+          "special": false,
+          "text": " difference"
+        },
+        {
+          "id": 1546,
+          "logprob": -0.02633667,
+          "special": false,
+          "text": " between"
+        },
+        {
+          "id": 21784,
+          "logprob": -1.4335938,
+          "special": false,
+          "text": " Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -0.15991211,
+          "special": false,
+          "text": " Learning"
+        },
+        {
+          "id": 322,
+          "logprob": -0.17456055,
+          "special": false,
+          "text": " and"
+        },
+        {
+          "id": 6189,
+          "logprob": -0.62060547,
+          "special": false,
+          "text": " Machine"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\nWhat is the difference between Deep Learning and Machine"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_sharded.json b/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_sharded.json
new file mode 100644
index 00000000..0f91eb36
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_sharded.json
@@ -0,0 +1,104 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 1724,
+        "logprob": -7.6914062,
+        "text": "What"
+      },
+      {
+        "id": 338,
+        "logprob": -1.4746094,
+        "text": "is"
+      },
+      {
+        "id": 21784,
+        "logprob": -9.390625,
+        "text": "Deep"
+      },
+      {
+        "id": 29257,
+        "logprob": -1.8623047,
+        "text": "Learning"
+      },
+      {
+        "id": 29973,
+        "logprob": -0.7558594,
+        "text": "?"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 13,
+        "logprob": -1.9228516,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 5618,
+        "logprob": -2.4609375,
+        "special": false,
+        "text": "What"
+      },
+      {
+        "id": 338,
+        "logprob": -0.57177734,
+        "special": false,
+        "text": " is"
+      },
+      {
+        "id": 278,
+        "logprob": -1.5722656,
+        "special": false,
+        "text": " the"
+      },
+      {
+        "id": 4328,
+        "logprob": -1.5927734,
+        "special": false,
+        "text": " difference"
+      },
+      {
+        "id": 1546,
+        "logprob": -0.026428223,
+        "special": false,
+        "text": " between"
+      },
+      {
+        "id": 21784,
+        "logprob": -1.4267578,
+        "special": false,
+        "text": " Deep"
+      },
+      {
+        "id": 29257,
+        "logprob": -0.16015625,
+        "special": false,
+        "text": " Learning"
+      },
+      {
+        "id": 322,
+        "logprob": -0.17382812,
+        "special": false,
+        "text": " and"
+      },
+      {
+        "id": 6189,
+        "logprob": -0.62060547,
+        "special": false,
+        "text": " Machine"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "\nWhat is the difference between Deep Learning and Machine"
+}
diff --git a/integration-tests/models/test_flash_awq.py b/integration-tests/models/test_flash_awq.py
index ca474d37..f0b99a3b 100644
--- a/integration-tests/models/test_flash_awq.py
+++ b/integration-tests/models/test_flash_awq.py
@@ -3,7 +3,7 @@ import pytest
 
 @pytest.fixture(scope="module")
 def flash_llama_awq_handle(launcher):
-    with launcher("abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq", num_shard=2, quantize="awq") as handle:
+    with launcher("abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq", num_shard=1, quantize="awq") as handle:
         yield handle
 
 
@@ -12,23 +12,24 @@ async def flash_llama_awq(flash_llama_awq_handle):
     await flash_llama_awq_handle.health(300)
     return flash_llama_awq_handle.client
 
-
 @pytest.mark.asyncio
 @pytest.mark.private
 async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
     response = await flash_llama_awq.generate(
-        "Test request", max_new_tokens=10, decoder_input_details=True
+        "What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
     )
 
     assert response.details.generated_tokens == 10
+    assert response.generated_text == "\nWhat is the difference between Deep Learning and Machine"
     assert response == response_snapshot
 
 
+
 @pytest.mark.asyncio
 @pytest.mark.private
 async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
     response = await flash_llama_awq.generate(
-        "Test request",
+        "What is Deep Learning?",
         max_new_tokens=10,
         repetition_penalty=1.2,
         return_full_text=True,
@@ -52,10 +53,12 @@ async def test_flash_llama_awq_load(
     flash_llama_awq, generate_load, response_snapshot
 ):
     responses = await generate_load(
-        flash_llama_awq, "Test request", max_new_tokens=10, n=4
+        flash_llama_awq, "What is Deep Learning?", max_new_tokens=10, n=4
     )
 
     assert len(responses) == 4
-    assert all([r.generated_text == responses[0].generated_text for r in responses])
+    assert all([r.generated_text ==  "\nWhat is the difference between Deep Learning and Machine" for r in responses])
 
     assert responses == response_snapshot
+
+
diff --git a/integration-tests/models/test_flash_awq_sharded.py b/integration-tests/models/test_flash_awq_sharded.py
new file mode 100644
index 00000000..39ea464a
--- /dev/null
+++ b/integration-tests/models/test_flash_awq_sharded.py
@@ -0,0 +1,36 @@
+import pytest
+
+@pytest.fixture(scope="module")
+def flash_llama_awq_handle_sharded(launcher):
+    with launcher("abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq", num_shard=2, quantize="awq") as handle:
+        yield handle
+
+@pytest.fixture(scope="module")
+async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
+    await flash_llama_awq_handle_sharded.health(300)
+    return flash_llama_awq_handle_sharded.client
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapshot):
+    response = await flash_llama_awq_sharded.generate(
+        "What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response.generated_text == "\nWhat is the difference between Deep Learning and Machine"
+    assert response == response_snapshot
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_awq_load_sharded(
+    flash_llama_awq_sharded, generate_load, response_snapshot
+):
+    responses = await generate_load(
+        flash_llama_awq_sharded, "What is Deep Learning?", max_new_tokens=10, n=4
+    )
+
+    assert len(responses) == 4
+    assert all([r.generated_text ==  "\nWhat is the difference between Deep Learning and Machine" for r in responses])
+
+    assert responses == response_snapshot
diff --git a/server/text_generation_server/utils/weights.py b/server/text_generation_server/utils/weights.py
index 45807949..266fcccb 100644
--- a/server/text_generation_server/utils/weights.py
+++ b/server/text_generation_server/utils/weights.py
@@ -299,8 +299,8 @@ class Weights:
                     "Cannot load `awq` weight, make sure the model is already quantized"
                 )
 
-            qzeros = self.get_tensor(f"{prefix}.qzeros")
-            scales = self.get_tensor(f"{prefix}.scales")
+            qzeros = self.get_sharded(f"{prefix}.qzeros", dim=0)
+            scales = self.get_sharded(f"{prefix}.scales", dim=0)
             g_idx = None
             use_exllama = False