Fixing the free algorithm to handle times where the common prefix is

smaller.
2025-09-17 23:34:52 +00:00 · 2024-08-29 09:17:00 +02:00 · 2024-08-29 09:17:00 +02:00 · bef2f6bdaa
commit bef2f6bdaa
parent 9c839ca5df
1 changed files with 6 additions and 5 deletions
--- a/backends/v3/src/radix.rs
+++ b/backends/v3/src/radix.rs
@ -172,7 +172,6 @@ impl Allocator for RadixAllocator {
                        )
                        // Unwrap, failing is a programming error.
                        .expect("Failed to store prefill tokens");
                    // We can have a prefill with the following structure:
                    //
                    // |---| From the prefix cache.
@ -182,12 +181,14 @@ impl Allocator for RadixAllocator {
                    // This means that while processing this request there was a
                    // partially overlapping request that had A..=E in its
                    // prefill. In this case we need to free the blocks D E.
                    if prefix_len > allocation.cached_prefix_len {
                        self.free_blocks.extend(
                            &blocks[allocation.cached_prefix_len / self.block_size as usize
                                ..prefix_len / self.block_size as usize],
                        );
                    }
                }
            }
            // Free non-prefill blocks.
            self.free_blocks