diff --git a/Dockerfile b/Dockerfile
index b2d274d7..ed0c4aa3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,14 +40,14 @@ RUN cargo build --profile release-opt
 
 # Python builder
 # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
-FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS pytorch-install
+FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS pytorch-install
 
 # NOTE: When updating PyTorch version, beware to remove `pip install nvidia-nccl-cu12==2.22.3` below in the Dockerfile. Context: https://github.com/huggingface/text-generation-inference/pull/2099
 ARG PYTORCH_VERSION=2.4.0
 
 ARG PYTHON_VERSION=3.10
 # Keep in sync with `server/pyproject.toml
-ARG CUDA_VERSION=12.1
+ARG CUDA_VERSION=12.4
 ARG MAMBA_VERSION=24.3.0-0
 ARG CUDA_CHANNEL=nvidia
 ARG INSTALL_CHANNEL=pytorch
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index 46a8769f..15af1cad 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -118,6 +118,7 @@ class ResponseComparator(JSONSnapshotExtension):
                 and token.text == other.text
                 and (
                     self.ignore_logprob
+                    or (token.logprob == other.logprob and token.logprob is None)
                     or math.isclose(token.logprob, other.logprob, rel_tol=self.rtol)
                 )
                 and token.special == other.special
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_load.json b/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_load.json
index 36c87c09..1e9ff6c0 100644
--- a/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_load.json
@@ -12,12 +12,12 @@
         },
         {
           "id": 2323,
-          "logprob": -9.421875,
+          "logprob": -9.5625,
           "text": "Test"
         },
         {
           "id": 1715,
-          "logprob": -10.546875,
+          "logprob": -10.375,
           "text": " request"
         }
       ],
@@ -25,61 +25,61 @@
       "tokens": [
         {
           "id": 369,
-          "logprob": -2.1816406,
+          "logprob": -2.15625,
           "special": false,
           "text": " for"
         },
         {
           "id": 279,
-          "logprob": -2.6992188,
+          "logprob": -2.703125,
           "special": false,
           "text": " the"
         },
         {
           "id": 220,
-          "logprob": -3.6308594,
+          "logprob": -3.640625,
           "special": false,
           "text": " "
         },
         {
           "id": 679,
-          "logprob": -1.7988281,
+          "logprob": -1.703125,
           "special": false,
           "text": "201"
         },
         {
           "id": 24,
-          "logprob": -1.3535156,
+          "logprob": -1.421875,
           "special": false,
           "text": "9"
         },
         {
           "id": 12,
-          "logprob": -2.0058594,
+          "logprob": -2.03125,
           "special": false,
           "text": "-"
         },
         {
           "id": 2366,
-          "logprob": -0.45410156,
+          "logprob": -0.49023438,
           "special": false,
           "text": "202"
         },
         {
           "id": 15,
-          "logprob": -0.037109375,
+          "logprob": -0.041503906,
           "special": false,
           "text": "0"
         },
         {
           "id": 2978,
-          "logprob": -0.8095703,
+          "logprob": -0.87109375,
           "special": false,
           "text": " school"
         },
         {
           "id": 1060,
-          "logprob": -0.013053894,
+          "logprob": -0.012939453,
           "special": false,
           "text": " year"
         }
@@ -101,12 +101,12 @@
         },
         {
           "id": 2323,
-          "logprob": -9.421875,
+          "logprob": -9.5625,
           "text": "Test"
         },
         {
           "id": 1715,
-          "logprob": -10.546875,
+          "logprob": -10.375,
           "text": " request"
         }
       ],
@@ -114,61 +114,61 @@
       "tokens": [
         {
           "id": 369,
-          "logprob": -2.1816406,
+          "logprob": -2.15625,
           "special": false,
           "text": " for"
         },
         {
           "id": 279,
-          "logprob": -2.6992188,
+          "logprob": -2.703125,
           "special": false,
           "text": " the"
         },
         {
           "id": 220,
-          "logprob": -3.6308594,
+          "logprob": -3.640625,
           "special": false,
           "text": " "
         },
         {
           "id": 679,
-          "logprob": -1.7988281,
+          "logprob": -1.703125,
           "special": false,
           "text": "201"
         },
         {
           "id": 24,
-          "logprob": -1.3535156,
+          "logprob": -1.421875,
           "special": false,
           "text": "9"
         },
         {
           "id": 12,
-          "logprob": -2.0058594,
+          "logprob": -2.03125,
           "special": false,
           "text": "-"
         },
         {
           "id": 2366,
-          "logprob": -0.45410156,
+          "logprob": -0.49023438,
           "special": false,
           "text": "202"
         },
         {
           "id": 15,
-          "logprob": -0.037109375,
+          "logprob": -0.041503906,
           "special": false,
           "text": "0"
         },
         {
           "id": 2978,
-          "logprob": -0.8095703,
+          "logprob": -0.87109375,
           "special": false,
           "text": " school"
         },
         {
           "id": 1060,
-          "logprob": -0.013053894,
+          "logprob": -0.012939453,
           "special": false,
           "text": " year"
         }
@@ -190,12 +190,12 @@
         },
         {
           "id": 2323,
-          "logprob": -9.421875,
+          "logprob": -9.5625,
           "text": "Test"
         },
         {
           "id": 1715,
-          "logprob": -10.546875,
+          "logprob": -10.375,
           "text": " request"
         }
       ],
@@ -203,61 +203,61 @@
       "tokens": [
         {
           "id": 369,
-          "logprob": -2.1816406,
+          "logprob": -2.15625,
           "special": false,
           "text": " for"
         },
         {
           "id": 279,
-          "logprob": -2.6992188,
+          "logprob": -2.703125,
           "special": false,
           "text": " the"
         },
         {
           "id": 220,
-          "logprob": -3.6308594,
+          "logprob": -3.640625,
           "special": false,
           "text": " "
         },
         {
           "id": 679,
-          "logprob": -1.7988281,
+          "logprob": -1.703125,
           "special": false,
           "text": "201"
         },
         {
           "id": 24,
-          "logprob": -1.3535156,
+          "logprob": -1.421875,
           "special": false,
           "text": "9"
         },
         {
           "id": 12,
-          "logprob": -2.0058594,
+          "logprob": -2.03125,
           "special": false,
           "text": "-"
         },
         {
           "id": 2366,
-          "logprob": -0.45410156,
+          "logprob": -0.49023438,
           "special": false,
           "text": "202"
         },
         {
           "id": 15,
-          "logprob": -0.037109375,
+          "logprob": -0.041503906,
           "special": false,
           "text": "0"
         },
         {
           "id": 2978,
-          "logprob": -0.8095703,
+          "logprob": -0.87109375,
           "special": false,
           "text": " school"
         },
         {
           "id": 1060,
-          "logprob": -0.013053894,
+          "logprob": -0.012939453,
           "special": false,
           "text": " year"
         }
@@ -279,12 +279,12 @@
         },
         {
           "id": 2323,
-          "logprob": -9.421875,
+          "logprob": -9.5625,
           "text": "Test"
         },
         {
           "id": 1715,
-          "logprob": -10.546875,
+          "logprob": -10.375,
           "text": " request"
         }
       ],
@@ -292,61 +292,61 @@
       "tokens": [
         {
           "id": 369,
-          "logprob": -2.1816406,
+          "logprob": -2.15625,
           "special": false,
           "text": " for"
         },
         {
           "id": 279,
-          "logprob": -2.6992188,
+          "logprob": -2.703125,
           "special": false,
           "text": " the"
         },
         {
           "id": 220,
-          "logprob": -3.6308594,
+          "logprob": -3.640625,
           "special": false,
           "text": " "
         },
         {
           "id": 679,
-          "logprob": -1.7988281,
+          "logprob": -1.703125,
           "special": false,
           "text": "201"
         },
         {
           "id": 24,
-          "logprob": -1.3535156,
+          "logprob": -1.421875,
           "special": false,
           "text": "9"
         },
         {
           "id": 12,
-          "logprob": -2.0058594,
+          "logprob": -2.03125,
           "special": false,
           "text": "-"
         },
         {
           "id": 2366,
-          "logprob": -0.45410156,
+          "logprob": -0.49023438,
           "special": false,
           "text": "202"
         },
         {
           "id": 15,
-          "logprob": -0.037109375,
+          "logprob": -0.041503906,
           "special": false,
           "text": "0"
         },
         {
           "id": 2978,
-          "logprob": -0.8095703,
+          "logprob": -0.87109375,
           "special": false,
           "text": " school"
         },
         {
           "id": 1060,
-          "logprob": -0.013053894,
+          "logprob": -0.012939453,
           "special": false,
           "text": " year"
         }
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json
index c9b5ab20..d9072c52 100644
--- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json
@@ -17,37 +17,37 @@
         },
         {
           "id": 21017,
-          "logprob": -8.859375,
+          "logprob": -8.8515625,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.21826172,
+          "logprob": -0.22033691,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -1.3085938,
+          "logprob": -1.2939453,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.2548828,
+          "logprob": -0.25268555,
           "text": "("
         },
         {
           "id": 62,
-          "logprob": -4.8007812,
+          "logprob": -4.796875,
           "text": "L"
         },
         {
           "id": 44,
-          "logprob": -3.7871094,
+          "logprob": -3.796875,
           "text": ":"
         },
         {
           "id": 1682,
-          "logprob": -0.81152344,
+          "logprob": -0.8066406,
           "text": " List"
         },
         {
@@ -57,7 +57,7 @@
         },
         {
           "id": 1808,
-          "logprob": -0.46313477,
+          "logprob": -0.46166992,
           "text": "float"
         },
         {
@@ -70,7 +70,7 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -0.046936035,
+          "logprob": -0.046844482,
           "special": false,
           "text": "\n   "
         },
@@ -103,22 +103,22 @@
         },
         {
           "id": 21017,
-          "logprob": -8.859375,
+          "logprob": -8.8515625,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.21899414,
+          "logprob": -0.21826172,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -1.3105469,
+          "logprob": -1.2871094,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.25561523,
+          "logprob": -0.25390625,
           "text": "("
         },
         {
@@ -131,92 +131,6 @@
           "logprob": -3.7890625,
           "text": ":"
         },
-        {
-          "id": 1682,
-          "logprob": -0.80615234,
-          "text": " List"
-        },
-        {
-          "id": 77,
-          "logprob": -0.22375488,
-          "text": "["
-        },
-        {
-          "id": 1808,
-          "logprob": -0.46801758,
-          "text": "float"
-        },
-        {
-          "id": 10794,
-          "logprob": -3.0253906,
-          "text": "]):"
-        }
-      ],
-      "seed": null,
-      "tokens": [
-        {
-          "id": 284,
-          "logprob": -0.046447754,
-          "special": false,
-          "text": "\n   "
-        },
-        {
-          "id": 0,
-          "logprob": null,
-          "special": true,
-          "text": "<|endoftext|>"
-        }
-      ],
-      "top_tokens": null
-    },
-    "generated_text": "\n   "
-  },
-  {
-    "details": {
-      "best_of_sequences": null,
-      "finish_reason": "eos_token",
-      "generated_tokens": 2,
-      "prefill": [
-        {
-          "id": 589,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 3226,
-          "logprob": -8.9453125,
-          "text": " ge"
-        },
-        {
-          "id": 21017,
-          "logprob": -8.859375,
-          "text": "ometric"
-        },
-        {
-          "id": 81,
-          "logprob": -0.2163086,
-          "text": "_"
-        },
-        {
-          "id": 6009,
-          "logprob": -1.2958984,
-          "text": "mean"
-        },
-        {
-          "id": 26,
-          "logprob": -0.2529297,
-          "text": "("
-        },
-        {
-          "id": 62,
-          "logprob": -4.796875,
-          "text": "L"
-        },
-        {
-          "id": 44,
-          "logprob": -3.7910156,
-          "text": ":"
-        },
         {
           "id": 1682,
           "logprob": -0.8076172,
@@ -224,12 +138,12 @@
         },
         {
           "id": 77,
-          "logprob": -0.22375488,
+          "logprob": -0.22302246,
           "text": "["
         },
         {
           "id": 1808,
-          "logprob": -0.46655273,
+          "logprob": -0.46435547,
           "text": "float"
         },
         {
@@ -242,7 +156,7 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -0.0463562,
+          "logprob": -0.046722412,
           "special": false,
           "text": "\n   "
         },
@@ -275,47 +189,133 @@
         },
         {
           "id": 21017,
-          "logprob": -8.859375,
+          "logprob": -8.8515625,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.21862793,
+          "logprob": -0.21813965,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -1.3095703,
+          "logprob": -1.2744141,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.25512695,
+          "logprob": -0.2512207,
           "text": "("
         },
         {
           "id": 62,
-          "logprob": -4.796875,
+          "logprob": -4.8046875,
           "text": "L"
         },
         {
           "id": 44,
-          "logprob": -3.7890625,
+          "logprob": -3.7851562,
           "text": ":"
         },
         {
           "id": 1682,
-          "logprob": -0.79589844,
+          "logprob": -0.81396484,
           "text": " List"
         },
         {
           "id": 77,
-          "logprob": -0.22692871,
+          "logprob": -0.22570801,
           "text": "["
         },
         {
           "id": 1808,
-          "logprob": -0.46801758,
+          "logprob": -0.46044922,
+          "text": "float"
+        },
+        {
+          "id": 10794,
+          "logprob": -3.0234375,
+          "text": "]):"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 284,
+          "logprob": -0.04650879,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 0,
+          "logprob": null,
+          "special": true,
+          "text": "<|endoftext|>"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n   "
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "eos_token",
+      "generated_tokens": 2,
+      "prefill": [
+        {
+          "id": 589,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 3226,
+          "logprob": -8.9453125,
+          "text": " ge"
+        },
+        {
+          "id": 21017,
+          "logprob": -8.8515625,
+          "text": "ometric"
+        },
+        {
+          "id": 81,
+          "logprob": -0.21960449,
+          "text": "_"
+        },
+        {
+          "id": 6009,
+          "logprob": -1.2890625,
+          "text": "mean"
+        },
+        {
+          "id": 26,
+          "logprob": -0.25073242,
+          "text": "("
+        },
+        {
+          "id": 62,
+          "logprob": -4.8085938,
+          "text": "L"
+        },
+        {
+          "id": 44,
+          "logprob": -3.8046875,
+          "text": ":"
+        },
+        {
+          "id": 1682,
+          "logprob": -0.8071289,
+          "text": " List"
+        },
+        {
+          "id": 77,
+          "logprob": -0.22570801,
+          "text": "["
+        },
+        {
+          "id": 1808,
+          "logprob": -0.46118164,
           "text": "float"
         },
         {
@@ -328,7 +328,7 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -0.04638672,
+          "logprob": -0.046539307,
           "special": false,
           "text": "\n   "
         },
diff --git a/integration-tests/models/test_flash_llama_fp8.py b/integration-tests/models/test_flash_llama_fp8.py
index fe5df590..1980846d 100644
--- a/integration-tests/models/test_flash_llama_fp8.py
+++ b/integration-tests/models/test_flash_llama_fp8.py
@@ -21,6 +21,7 @@ async def test_flash_llama_fp8(flash_llama_fp8, response_snapshot):
         "Test request", max_new_tokens=10, decoder_input_details=True
     )
 
+    assert response.generated_text == " for the 2019-2020 school year"
     assert response.details.generated_tokens == 10
     assert response == response_snapshot
 
@@ -57,6 +58,8 @@ async def test_flash_llama_fp8_load(flash_llama_fp8, generate_load, response_sna
     )
 
     assert len(responses) == 4
-    assert all([r.generated_text == responses[0].generated_text for r in responses])
-
+    assert responses[0].generated_text == " for the 2019-2020 school year"
+    assert all(
+        [r.generated_text == responses[0].generated_text for r in responses]
+    ), f"Different messages : {[r.generated_text for r in responses]}"
     assert responses == response_snapshot