use blocking_recv in looper to consume awaiting_requests at max before pulling in a single step

2025-07-12 02:40:16 +00:00 · 2024-08-08 00:33:10 +02:00 · 2024-08-08 00:33:10 +02:00 · 291eaa99fb
commit 291eaa99fb
parent 7bebc629af
1 changed files with 3 additions and 6 deletions
--- a/backends/trtllm/src/looper.rs
+++ b/backends/trtllm/src/looper.rs
@ -56,13 +56,10 @@ fn executor_status_poller(
        span!(Level::DEBUG, "[in-flight][submit]").in_scope(|| {
            // Is there any request pending to be scheduled?
            let awaiting_requests = waiting_requests.len();
-            if awaiting_requests > 0 {
+            for _ in 0..awaiting_requests {
                // Retrieve all the requests
-                let mut requests = Vec::with_capacity(awaiting_requests);
+                if let Some(ctx) = waiting_requests.blocking_recv() {
-                let _ = waiting_requests.recv_many(&mut requests, awaiting_requests);
+                    // Submit all the request to the executor and move the context to the in-flight tracker
                // Submit all the request to the executor and move the context to the in-flight tracker
                for ctx in requests {
                    let request = &ctx.request;
                    let generation_params = &request.inner.parameters;
                    let stopping_params = &request.inner.stopping_parameters;