mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 03:44:54 +00:00
really fix pruning of cancelled requests from queue
This commit is contained in:
parent
51355ea722
commit
9b1868b2c2
@ -456,7 +456,7 @@ impl<B: BatchType> State<B> {
|
|||||||
|
|
||||||
batch_stats = next_stats;
|
batch_stats = next_stats;
|
||||||
|
|
||||||
chosen_indices.push(queue_index - 1);
|
chosen_indices.push(checked_up_to_index - 1);
|
||||||
total_count += 1;
|
total_count += 1;
|
||||||
if total_count >= config.size_limit {
|
if total_count >= config.size_limit {
|
||||||
break
|
break
|
||||||
|
Loading…
Reference in New Issue
Block a user