Fix unsigned integer underflow

Passing --max-batch-size to the launcher actually had no effect because after a few requests the max_size passed to State::next_batch would underflow becoming a largo positive number. In the scheduler, as soon as the cached batch size reached the max_batch_size the max_size passed to next_batch becomes 0. Since the only check in that funcion is ``` if Some(batch_requests.len()) == max_size { break; } ``` and it's called after the `batch_requests.len()` has become 1, it doesn't do anything to prevent more than 0 requests from being batched. Now we have cached batch in the server that is large than max_batch_size and `max_size - batch_size as usize` underflows. Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
2025-09-12 12:54:52 +00:00 · 2024-08-01 15:53:32 -03:00 · 2024-08-01 15:53:32 -03:00 · fe5c19d155
commit fe5c19d155
parent 47447ef017
2 changed files with 12 additions and 2 deletions
--- a/router/src/infer/v2/queue.rs
+++ b/router/src/infer/v2/queue.rs
@ -205,6 +205,13 @@ impl State {
            }
        }
        if let Some(max_size) = max_size {
            if max_size == 0 {
                tracing::debug!("No capacity");
                return None;
            }
        }
        // Pad prefill_token_budget to be a multiple of block size
        let prefill_token_budget =
            ((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size;
@ -297,7 +304,7 @@ impl State {
            batch_entries.insert(id, entry);
            // Check if max_size
-            if Some(batch_requests.len()) == max_size {
+            if Some(batch_requests.len()) >= max_size {
                break;
            }
        }
--- a/router/src/infer/v2/scheduler.rs
+++ b/router/src/infer/v2/scheduler.rs
@ -161,7 +161,10 @@ pub(crate) async fn batching_task(
                };
                let token_budget = max_batch_total_tokens.saturating_sub(batch_max_tokens);
-                let max_size = max_batch_size.map(|max_size| max_size - batch_size as usize);
+                let max_size = max_batch_size.map(|max_size| {
                    if batch_size as usize > max_size { 0 } else { max_size - batch_size as usize }
                });
                // Try to get a new batch
                if let Some((mut new_entries, new_batch, span)) = queue