From c67bec168e20989ea4b2490fa38ba792886a25dd Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Sat, 7 Sep 2024 00:53:12 +0200 Subject: [PATCH] Remove some comments. --- backends/v3/src/radix.rs | 3 --- integration-tests/conftest.py | 1 - server/Makefile-flashinfer | 2 +- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/backends/v3/src/radix.rs b/backends/v3/src/radix.rs index 189b8082..7fe732f1 100644 --- a/backends/v3/src/radix.rs +++ b/backends/v3/src/radix.rs @@ -76,9 +76,6 @@ impl Allocator for RadixAllocator { &prefill_tokens.as_slice()[..prefill_tokens.len().saturating_sub(1)], &mut blocks, ); - // Even if this allocation fails below, we need to increase he - // refcount to ensure that the prefix that was found is not evicted. - node_id } else { self.cache_blocks.root_id() diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 94143058..7f636373 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -398,7 +398,6 @@ def launcher(event_loop): if not use_flash_attention: env["USE_FLASH_ATTENTION"] = "false" - env["USE_PREFIX_CACHING"] = "0" with tempfile.TemporaryFile("w+") as tmp: # We'll output stdout/stderr to a temporary file. Using a pipe diff --git a/server/Makefile-flashinfer b/server/Makefile-flashinfer index 3c279a59..3abb0491 100644 --- a/server/Makefile-flashinfer +++ b/server/Makefile-flashinfer @@ -1,2 +1,2 @@ install-flashinfer: - pip install flashinfer==0.1.4 -i https://flashinfer.ai/whl/cu124/torch2.4 + pip install flashinfer==0.1.5 -i https://flashinfer.ai/whl/cu124/torch2.4