mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 19:34:53 +00:00
really fix pruning of cancelled requests from queue
This commit is contained in:
parent
51355ea722
commit
9b1868b2c2
@ -456,7 +456,7 @@ impl<B: BatchType> State<B> {
|
||||
|
||||
batch_stats = next_stats;
|
||||
|
||||
chosen_indices.push(queue_index - 1);
|
||||
chosen_indices.push(checked_up_to_index - 1);
|
||||
total_count += 1;
|
||||
if total_count >= config.size_limit {
|
||||
break
|
||||
|
Loading…
Reference in New Issue
Block a user