diff --git a/router/src/infer.rs b/router/src/infer.rs index d6dbd842..d7b9b52b 100644 --- a/router/src/infer.rs +++ b/router/src/infer.rs @@ -387,7 +387,7 @@ async fn batching_task( }; let token_budget = max_batch_total_tokens.saturating_sub(batch_max_tokens); - let max_size = max_batch_size.map(|max_size| batch_size as usize - max_size); + let max_size = max_batch_size.map(|max_size| max_size - batch_size as usize); // Try to get a new batch if let Some((mut new_entries, new_batch, span)) = queue