mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Renaming, window size
This commit is contained in:
parent
083806aa42
commit
05611f6b40
@ -190,7 +190,7 @@ struct PrefixBlockState {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct PrefixCache {
|
pub struct PrefixCacheAllocator {
|
||||||
/// Size of a paged attention block.
|
/// Size of a paged attention block.
|
||||||
block_size: usize,
|
block_size: usize,
|
||||||
|
|
||||||
@ -210,9 +210,13 @@ pub struct PrefixCache {
|
|||||||
time: u64,
|
time: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PrefixCache {
|
impl PrefixCacheAllocator {
|
||||||
pub fn new(block_size: usize, n_blocks: usize) -> Self {
|
pub fn new(block_size: usize, n_blocks: usize, window_size: Option<u32>) -> Self {
|
||||||
PrefixCache {
|
if window_size.is_some() {
|
||||||
|
unimplemented!("Window size not supported in the prefix-caching block allocator yet");
|
||||||
|
}
|
||||||
|
|
||||||
|
PrefixCacheAllocator {
|
||||||
block_size,
|
block_size,
|
||||||
cache_blocks: HashMap::new(),
|
cache_blocks: HashMap::new(),
|
||||||
free_blocks: (1..n_blocks as u32).collect(),
|
free_blocks: (1..n_blocks as u32).collect(),
|
||||||
@ -386,11 +390,11 @@ impl PrefixCache {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use crate::infer::v3::block_allocator::BlockAllocationWithCache;
|
use crate::infer::v3::block_allocator::BlockAllocationWithCache;
|
||||||
|
|
||||||
use super::PrefixCache;
|
use super::PrefixCacheAllocator;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_prefix_cache() {
|
fn test_prefix_cache() {
|
||||||
let mut cache = PrefixCache::new(4, 3);
|
let mut cache = PrefixCacheAllocator::new(4, 3, None);
|
||||||
let allocation = cache.alloc(8, &[0, 1, 2, 3]);
|
let allocation = cache.alloc(8, &[0, 1, 2, 3]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
allocation,
|
allocation,
|
||||||
@ -413,7 +417,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_older_prefixes_are_collected_first() {
|
fn test_older_prefixes_are_collected_first() {
|
||||||
let mut cache = PrefixCache::new(2, 4);
|
let mut cache = PrefixCacheAllocator::new(2, 4, None);
|
||||||
let allocation1 = cache.alloc(4, &[0, 1, 2, 3]);
|
let allocation1 = cache.alloc(4, &[0, 1, 2, 3]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
allocation1,
|
allocation1,
|
||||||
|
Loading…
Reference in New Issue
Block a user