mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 19:34:53 +00:00
Obey max batch size
This commit is contained in:
parent
fcc2c5fcbf
commit
dff8f7cde7
@ -128,7 +128,7 @@ async fn batching_task(
|
|||||||
|
|
||||||
// Try to get a new batch
|
// Try to get a new batch
|
||||||
if let Some((new_request_ids, new_batch)) =
|
if let Some((new_request_ids, new_batch)) =
|
||||||
db.next_batch(min_size, max_batch_size)
|
db.next_batch(min_size, max_batch_size - batch_size as usize)
|
||||||
{
|
{
|
||||||
// Generate one token for this new batch to have the attention past in cache
|
// Generate one token for this new batch to have the attention past in cache
|
||||||
let new_cached_batch =
|
let new_cached_batch =
|
||||||
|
Loading…
Reference in New Issue
Block a user