Fixing the free algorithm to handle times where the common prefix is

smaller.
This commit is contained in:
Nicolas Patry 2024-08-29 09:17:00 +02:00
parent 9c839ca5df
commit bef2f6bdaa
No known key found for this signature in database
GPG Key ID: 64AF4752B2967863

View File

@ -172,7 +172,6 @@ impl Allocator for RadixAllocator {
) )
// Unwrap, failing is a programming error. // Unwrap, failing is a programming error.
.expect("Failed to store prefill tokens"); .expect("Failed to store prefill tokens");
// We can have a prefill with the following structure: // We can have a prefill with the following structure:
// //
// |---| From the prefix cache. // |---| From the prefix cache.
@ -182,12 +181,14 @@ impl Allocator for RadixAllocator {
// This means that while processing this request there was a // This means that while processing this request there was a
// partially overlapping request that had A..=E in its // partially overlapping request that had A..=E in its
// prefill. In this case we need to free the blocks D E. // prefill. In this case we need to free the blocks D E.
if prefix_len > allocation.cached_prefix_len {
self.free_blocks.extend( self.free_blocks.extend(
&blocks[allocation.cached_prefix_len / self.block_size as usize &blocks[allocation.cached_prefix_len / self.block_size as usize
..prefix_len / self.block_size as usize], ..prefix_len / self.block_size as usize],
); );
} }
} }
}
// Free non-prefill blocks. // Free non-prefill blocks.
self.free_blocks self.free_blocks