mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 03:44:54 +00:00
small change to handling of cancelled requests
This commit is contained in:
parent
7552483dde
commit
c207b88b3a
@ -332,11 +332,10 @@ impl<B: BatchType> State<B> {
|
|||||||
self.last_seen_batch_size = total_count
|
self.last_seen_batch_size = total_count
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter entries where the response receiver was dropped (== entries where the request
|
// Filter cancelled entries from the front of the queue,
|
||||||
// was dropped by the client)
|
// so that next-entry waiting time is accurate
|
||||||
let queue_len_before = self.entries.len();
|
while matches!(self.entries.front(), Some((_,entry)) if entry.response_tx.is_disconnected()) {
|
||||||
self.entries.retain_mut(|(_, entry)| !entry.response_tx.is_disconnected());
|
self.entries.pop_front();
|
||||||
if queue_len_before != self.entries.len() {
|
|
||||||
// Reset the count of checked requests if any in the queue were cancelled since last check
|
// Reset the count of checked requests if any in the queue were cancelled since last check
|
||||||
self.checked_request_count = 0;
|
self.checked_request_count = 0;
|
||||||
}
|
}
|
||||||
@ -354,7 +353,7 @@ impl<B: BatchType> State<B> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Indices into buffer of entries chosen to add to next batch or remove due to expiry
|
// Indices into buffer of entries chosen to add to next batch
|
||||||
let mut chosen_indices = vec![];
|
let mut chosen_indices = vec![];
|
||||||
// Indices to drop due to client cancellation
|
// Indices to drop due to client cancellation
|
||||||
let mut indices_to_drop = vec![];
|
let mut indices_to_drop = vec![];
|
||||||
|
Loading…
Reference in New Issue
Block a user