mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Merge commit 'refs/pull/2352/head' of github.com:huggingface/text-generation-inference into pr-2352-ci-branch
This commit is contained in:
commit
6497ae61e2
@ -205,6 +205,13 @@ impl State {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(max_size) = max_size {
|
||||
if max_size == 0 {
|
||||
tracing::debug!("No capacity");
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
// Pad prefill_token_budget to be a multiple of block size
|
||||
let prefill_token_budget =
|
||||
((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size;
|
||||
@ -297,7 +304,7 @@ impl State {
|
||||
batch_entries.insert(id, entry);
|
||||
|
||||
// Check if max_size
|
||||
if Some(batch_requests.len()) == max_size {
|
||||
if Some(batch_requests.len()) >= max_size {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -161,7 +161,10 @@ pub(crate) async fn batching_task(
|
||||
};
|
||||
|
||||
let token_budget = max_batch_total_tokens.saturating_sub(batch_max_tokens);
|
||||
let max_size = max_batch_size.map(|max_size| max_size - batch_size as usize);
|
||||
let max_size = max_batch_size.map(|max_size| {
|
||||
if batch_size as usize > max_size { 0 } else { max_size - batch_size as usize }
|
||||
});
|
||||
|
||||
|
||||
// Try to get a new batch
|
||||
if let Some((mut new_entries, new_batch, span)) = queue
|
||||
|
Loading…
Reference in New Issue
Block a user