mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 03:44:54 +00:00
small thing missed during rebase; add another comment
This commit is contained in:
parent
ba1aae3e78
commit
47fb2fb986
@ -235,6 +235,8 @@ impl State {
|
|||||||
// that don't fit in the current batch to reach smaller entries that do
|
// that don't fit in the current batch to reach smaller entries that do
|
||||||
let mut queue_index = checked_up_to_index;
|
let mut queue_index = checked_up_to_index;
|
||||||
'queue_loop: for (entry_id, entry) in self.entries.range(queue_index..) {
|
'queue_loop: for (entry_id, entry) in self.entries.range(queue_index..) {
|
||||||
|
// If we have skipped over an entry, stop when we reach requests
|
||||||
|
// that came in to long after it
|
||||||
if matches!(time_cutoff, Some(t) if entry.queue_time > t) {
|
if matches!(time_cutoff, Some(t) if entry.queue_time > t) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -303,9 +305,9 @@ impl State {
|
|||||||
}
|
}
|
||||||
// Here, we can add this request to the batch without breaching memory limit
|
// Here, we can add this request to the batch without breaching memory limit
|
||||||
|
|
||||||
|
// Also check whether adding this request will make the batch of new requests
|
||||||
|
// too expensive latency-wise to perform in a single forward-pass.
|
||||||
if config.prefill_weight_limit > 0 {
|
if config.prefill_weight_limit > 0 {
|
||||||
// Also check whether adding this request will make the batch of new requests
|
|
||||||
// too expensive latency-wise to perform in a single forward-pass.
|
|
||||||
if prefill_size + input_len > config.prefill_weight_limit {
|
if prefill_size + input_len > config.prefill_weight_limit {
|
||||||
if let Some(tree) = btree.as_mut() {
|
if let Some(tree) = btree.as_mut() {
|
||||||
// Remove our tuple from the set
|
// Remove our tuple from the set
|
||||||
@ -367,8 +369,7 @@ impl State {
|
|||||||
let batch_requests = chosen_indices.iter().enumerate().map(|(i, index)| {
|
let batch_requests = chosen_indices.iter().enumerate().map(|(i, index)| {
|
||||||
let (id, mut entry) = self.entries.remove(index - i).expect("bug");
|
let (id, mut entry) = self.entries.remove(index - i).expect("bug");
|
||||||
// Create a new span to link the batch back to this entry
|
// Create a new span to link the batch back to this entry
|
||||||
let entry_batch_span =
|
let entry_batch_span = info_span!(parent: &entry.span, "infer");
|
||||||
info_span!(parent: &entry.span, "infer", batch_size = next_batch_size);
|
|
||||||
// Add relationships
|
// Add relationships
|
||||||
next_batch_span.follows_from(&entry_batch_span);
|
next_batch_span.follows_from(&entry_batch_span);
|
||||||
entry_batch_span.follows_from(&next_batch_span);
|
entry_batch_span.follows_from(&next_batch_span);
|
||||||
|
Loading…
Reference in New Issue
Block a user