diff --git a/Cargo.lock b/Cargo.lock
index ca041d98..e5f47197 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3406,7 +3406,7 @@ dependencies = [
 
 [[package]]
 name = "text-generation-benchmark"
-version = "1.4.5"
+version = "2.0.0"
 dependencies = [
  "average",
  "clap",
@@ -3427,7 +3427,7 @@ dependencies = [
 
 [[package]]
 name = "text-generation-client"
-version = "1.4.5"
+version = "2.0.0"
 dependencies = [
  "futures",
  "grpc-metadata",
@@ -3444,7 +3444,7 @@ dependencies = [
 
 [[package]]
 name = "text-generation-launcher"
-version = "1.4.5"
+version = "2.0.0"
 dependencies = [
  "clap",
  "ctrlc",
@@ -3462,7 +3462,7 @@ dependencies = [
 
 [[package]]
 name = "text-generation-router"
-version = "1.4.5"
+version = "2.0.0"
 dependencies = [
  "async-stream",
  "axum",
@@ -4657,9 +4657,9 @@ dependencies = [
 
 [[package]]
 name = "zeroize"
-version = "1.8.0"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "63381fa6624bf92130a6b87c0d07380116f80b565c42cf0d754136f0238359ef"
+checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
 
 [[package]]
 name = "zip"
diff --git a/Cargo.toml b/Cargo.toml
index a9e7f1c8..ecb4878f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,7 +9,7 @@ members = [
 resolver = "2"
 
 [workspace.package]
-version = "1.4.5"
+version = "2.0.0"
 edition = "2021"
 authors = ["Olivier Dehaene"]
 homepage = "https://github.com/huggingface/text-generation-inference"
diff --git a/docs/openapi.json b/docs/openapi.json
index fdf1c804..34b030f2 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -10,7 +10,7 @@
       "name": "Apache 2.0",
       "url": "https://www.apache.org/licenses/LICENSE-2.0"
     },
-    "version": "1.4.5"
+    "version": "2.0.0"
   },
   "paths": {
     "/": {
diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_no_tools.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_no_tools.json
index 543be115..153a508d 100644
--- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_no_tools.json
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_no_tools.json
@@ -17,7 +17,7 @@
   "id": "",
   "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "object": "text_completion",
-  "system_fingerprint": "1.4.5-native",
+  "system_fingerprint": "2.0.0-native",
   "usage": {
     "completion_tokens": 100,
     "prompt_tokens": 60,
diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json
index 728e90a4..56920b3e 100644
--- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json
@@ -31,7 +31,7 @@
   "id": "",
   "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "object": "text_completion",
-  "system_fingerprint": "1.4.5-native",
+  "system_fingerprint": "2.0.0-native",
   "usage": {
     "completion_tokens": 29,
     "prompt_tokens": 316,
diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json
index 2e0efb86..fe679362 100644
--- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json
@@ -31,7 +31,7 @@
   "id": "",
   "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "object": "text_completion",
-  "system_fingerprint": "1.4.5-native",
+  "system_fingerprint": "2.0.0-native",
   "usage": {
     "completion_tokens": 29,
     "prompt_tokens": 316,
diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json
index 91854223..e48a1e7d 100644
--- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json
@@ -30,7 +30,7 @@
   "id": "",
   "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "object": "text_completion",
-  "system_fingerprint": "1.4.5-native",
+  "system_fingerprint": "2.0.0-native",
   "usage": {
     "completion_tokens": 21,
     "prompt_tokens": 187,
diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json
index e0c7aed6..cfebc05f 100644
--- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json
@@ -23,5 +23,5 @@
   "id": "",
   "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "object": "text_completion",
-  "system_fingerprint": "1.4.5-native"
+  "system_fingerprint": "2.0.0-native"
 }
diff --git a/integration-tests/pyproject.toml b/integration-tests/pyproject.toml
index ad217072..6923ff23 100644
--- a/integration-tests/pyproject.toml
+++ b/integration-tests/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "text-generation-integration-tests"
-version = "1.4.5"
+version = "2.0.0"
 description = "Text Generation Inference integration tests"
 authors = ["Nicolas Patry <nicolas@huggingface.co>"]
 
diff --git a/server/pyproject.toml b/server/pyproject.toml
index 6f892c14..57f3590e 100644
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "text-generation-server"
-version = "1.4.5"
+version = "2.0.0"
 description = "Text Generation Inference Python gRPC Server"
 authors = ["Olivier Dehaene <olivier@huggingface.co>"]
 
diff --git a/server/text_generation_server/interceptor.py b/server/text_generation_server/interceptor.py
index cde71de3..05339282 100644
--- a/server/text_generation_server/interceptor.py
+++ b/server/text_generation_server/interceptor.py
@@ -28,6 +28,10 @@ class ExceptionInterceptor(AsyncServerInterceptor):
             method_name = method_name.split("/")[-1]
             logger.exception(f"Method {method_name} encountered an error.")
 
+            # Runtime Error cannot be recovered from
+            if isinstance(err, RuntimeError):
+                exit(1)
+
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
 
diff --git a/server/text_generation_server/models/cache_manager.py b/server/text_generation_server/models/cache_manager.py
index 4be8b1b9..85e1b19b 100644
--- a/server/text_generation_server/models/cache_manager.py
+++ b/server/text_generation_server/models/cache_manager.py
@@ -55,9 +55,10 @@ class CacheManager:
     ):
         # Get free blocks indices by finding values in mask that are not set to 0
         free_block_indices = self.free_block_mask.nonzero()
-        assert (
-            len(free_block_indices) >= blocks
-        ), f"Out of available cache blocks: asked {blocks}, only {len(free_block_indices)} free blocks"
+        if blocks > len(free_block_indices):
+            raise RuntimeError(
+                f"Out of available cache blocks: asked {blocks}, only {len(free_block_indices)} free blocks"
+            )
 
         # Slice by the number of required blocks
         block_indices = free_block_indices[:blocks]
diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py
index 2c12984b..9cf5c80f 100644
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@@ -503,6 +503,10 @@ class MedusaHeadV1(nn.Module):
         self, input: torch.Tensor
     ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
         logits = self.lm_head(input)
+        # If we have too many tokens, we skip speculative logits
+        if input.shape[0] > 128:
+            return logits, None
+
         speculative_logits = self.medusa(input)
         return logits, speculative_logits
 
@@ -549,6 +553,11 @@ class MedusaHeadV2(nn.Module):
         self.lm_head = TensorParallelHead.load(config, prefix, weights)
 
     def forward(self, x):
+        # If we have too many tokens, we skip speculative logits
+        if x.shape[0] > 128:
+            logits = self.lm_head(x)
+            return logits, None
+
         size = x.shape[-1]
         block_size = (size + self.world_size - 1) // self.world_size
         start = self.rank * block_size