mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Traitify the current allocator in preparation for swappable alloc
This commit is contained in:
parent
3b4754cd31
commit
083806aa42
@ -1,8 +1,6 @@
|
|||||||
use itertools::Itertools;
|
|
||||||
use std::{
|
use std::{
|
||||||
borrow::BorrowMut,
|
cmp::min,
|
||||||
cmp::{min, Reverse},
|
collections::{hash_map::Entry, BTreeSet, HashMap},
|
||||||
collections::{hash_map::Entry, BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet},
|
|
||||||
hash::{DefaultHasher, Hash, Hasher},
|
hash::{DefaultHasher, Hash, Hasher},
|
||||||
};
|
};
|
||||||
use tokio::sync::{mpsc, oneshot};
|
use tokio::sync::{mpsc, oneshot};
|
||||||
@ -80,18 +78,48 @@ async fn block_allocator_task(
|
|||||||
window_size: Option<u32>,
|
window_size: Option<u32>,
|
||||||
mut receiver: mpsc::UnboundedReceiver<BlockAllocatorCommand>,
|
mut receiver: mpsc::UnboundedReceiver<BlockAllocatorCommand>,
|
||||||
) {
|
) {
|
||||||
// Block 0 is reserved for health checks
|
let mut allocator = SimpleAllocator::new(blocks, block_size, window_size);
|
||||||
let mut free_blocks: Vec<u32> = (1..blocks).collect();
|
|
||||||
while let Some(cmd) = receiver.recv().await {
|
while let Some(cmd) = receiver.recv().await {
|
||||||
match cmd {
|
match cmd {
|
||||||
BlockAllocatorCommand::Free { blocks } => free_blocks.extend(blocks),
|
BlockAllocatorCommand::Free { blocks } => allocator.free(blocks),
|
||||||
BlockAllocatorCommand::Allocate {
|
BlockAllocatorCommand::Allocate {
|
||||||
tokens,
|
tokens,
|
||||||
response_sender,
|
response_sender,
|
||||||
} => {
|
} => {
|
||||||
|
response_sender.send(allocator.allocate(tokens)).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Allocator {
|
||||||
|
fn allocate(&mut self, tokens: u32) -> Option<(Vec<u32>, Vec<u32>)>;
|
||||||
|
|
||||||
|
fn free(&mut self, blocks: Vec<u32>);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SimpleAllocator {
|
||||||
|
free_blocks: Vec<u32>,
|
||||||
|
block_size: u32,
|
||||||
|
window_size: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SimpleAllocator {
|
||||||
|
fn new(blocks: u32, block_size: u32, window_size: Option<u32>) -> Self {
|
||||||
|
SimpleAllocator {
|
||||||
|
block_size,
|
||||||
|
// Block 0 is reserved for health checks
|
||||||
|
free_blocks: (1..blocks).collect(),
|
||||||
|
window_size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Allocator for SimpleAllocator {
|
||||||
|
fn allocate(&mut self, tokens: u32) -> Option<(Vec<u32>, Vec<u32>)> {
|
||||||
// Apply window size
|
// Apply window size
|
||||||
let (required_blocks, repeats) = {
|
let (required_blocks, repeats) = {
|
||||||
let (tokens, repeats) = match window_size {
|
let (tokens, repeats) = match self.window_size {
|
||||||
None => (tokens, 1),
|
None => (tokens, 1),
|
||||||
Some(window_size) => {
|
Some(window_size) => {
|
||||||
let repeats = (tokens + window_size - 1) / window_size;
|
let repeats = (tokens + window_size - 1) / window_size;
|
||||||
@ -100,22 +128,22 @@ async fn block_allocator_task(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
// Pad to a multiple of block size
|
// Pad to a multiple of block size
|
||||||
let required_blocks = (tokens + block_size - 1) / block_size;
|
let required_blocks = (tokens + self.block_size - 1) / self.block_size;
|
||||||
(required_blocks, repeats)
|
(required_blocks, repeats)
|
||||||
};
|
};
|
||||||
|
|
||||||
let tokens = tokens as usize;
|
let tokens = tokens as usize;
|
||||||
let allocation = if required_blocks > free_blocks.len() as u32 {
|
if required_blocks > self.free_blocks.len() as u32 {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
let blocks =
|
let blocks = self
|
||||||
free_blocks.split_off(free_blocks.len() - required_blocks as usize);
|
.free_blocks
|
||||||
let mut slots = Vec::with_capacity(
|
.split_off(self.free_blocks.len() - required_blocks as usize);
|
||||||
(required_blocks * block_size * repeats as u32) as usize,
|
let mut slots =
|
||||||
);
|
Vec::with_capacity((required_blocks * self.block_size * repeats as u32) as usize);
|
||||||
|
|
||||||
'slots: for block_id in blocks.repeat(repeats).iter() {
|
'slots: for block_id in blocks.repeat(repeats).iter() {
|
||||||
for s in (block_id * block_size)..((block_id + 1) * block_size) {
|
for s in (block_id * self.block_size)..((block_id + 1) * self.block_size) {
|
||||||
slots.push(s);
|
slots.push(s);
|
||||||
if slots.len() == tokens {
|
if slots.len() == tokens {
|
||||||
break 'slots;
|
break 'slots;
|
||||||
@ -123,10 +151,11 @@ async fn block_allocator_task(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some((blocks, slots))
|
Some((blocks, slots))
|
||||||
};
|
|
||||||
response_sender.send(allocation).unwrap();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn free(&mut self, blocks: Vec<u32>) {
|
||||||
|
self.free_blocks.extend(blocks)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user