This commit is contained in:
OlivierDehaene 2023-04-24 16:19:54 +02:00
parent c3ad942e9f
commit 61ff239724

View File

@ -267,12 +267,13 @@ async fn batching_task(
metrics::gauge!("tgi_batch_current_size", batch_size as f64); metrics::gauge!("tgi_batch_current_size", batch_size as f64);
metrics::gauge!("tgi_batch_current_max_tokens", batch_max_tokens as f64); metrics::gauge!("tgi_batch_current_max_tokens", batch_max_tokens as f64);
let min_size = match waiting_tokens { let min_size = if waiting_tokens >= max_waiting_tokens {
// If we didn't onboard any new requests since >= max_waiting_tokens, we try // If we didn't onboard any new requests since >= max_waiting_tokens, we try
// to add a new batch even though its size might be small // to add a new batch even though its size might be small
_ if waiting_tokens >= max_waiting_tokens => None, None
} else {
// Minimum batch size // Minimum batch size
_ => Some((batch_size as f32 * waiting_served_ratio).floor() as usize), Some((batch_size as f32 * waiting_served_ratio).floor() as usize)
}; };
let token_budget = max_batch_total_tokens - batch_max_tokens; let token_budget = max_batch_total_tokens - batch_max_tokens;