mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 19:34:53 +00:00
Obey max batch size
This commit is contained in:
parent
fcc2c5fcbf
commit
dff8f7cde7
@ -128,7 +128,7 @@ async fn batching_task(
|
||||
|
||||
// Try to get a new batch
|
||||
if let Some((new_request_ids, new_batch)) =
|
||||
db.next_batch(min_size, max_batch_size)
|
||||
db.next_batch(min_size, max_batch_size - batch_size as usize)
|
||||
{
|
||||
// Generate one token for this new batch to have the attention past in cache
|
||||
let new_cached_batch =
|
||||
|
Loading…
Reference in New Issue
Block a user