From c67bec168e20989ea4b2490fa38ba792886a25dd Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Sat, 7 Sep 2024 00:53:12 +0200
Subject: [PATCH] Remove some comments.

---
 backends/v3/src/radix.rs      | 3 ---
 integration-tests/conftest.py | 1 -
 server/Makefile-flashinfer    | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/backends/v3/src/radix.rs b/backends/v3/src/radix.rs
index 189b8082..7fe732f1 100644
--- a/backends/v3/src/radix.rs
+++ b/backends/v3/src/radix.rs
@@ -76,9 +76,6 @@ impl Allocator for RadixAllocator {
                 &prefill_tokens.as_slice()[..prefill_tokens.len().saturating_sub(1)],
                 &mut blocks,
             );
-            // Even if this allocation fails below, we need to increase he
-            // refcount to ensure that the prefix that was found is not evicted.
-
             node_id
         } else {
             self.cache_blocks.root_id()
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index 94143058..7f636373 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -398,7 +398,6 @@ def launcher(event_loop):
 
         if not use_flash_attention:
             env["USE_FLASH_ATTENTION"] = "false"
-        env["USE_PREFIX_CACHING"] = "0"
 
         with tempfile.TemporaryFile("w+") as tmp:
             # We'll output stdout/stderr to a temporary file. Using a pipe
diff --git a/server/Makefile-flashinfer b/server/Makefile-flashinfer
index 3c279a59..3abb0491 100644
--- a/server/Makefile-flashinfer
+++ b/server/Makefile-flashinfer
@@ -1,2 +1,2 @@
 install-flashinfer:
-	pip install flashinfer==0.1.4 -i https://flashinfer.ai/whl/cu124/torch2.4
+	pip install flashinfer==0.1.5 -i https://flashinfer.ai/whl/cu124/torch2.4