From fab395b41fbe9b9929869cbfc8c84816997738fb Mon Sep 17 00:00:00 2001 From: Tzu-Yu Lee Date: Sun, 25 May 2025 22:07:54 +0800 Subject: [PATCH] perf(trtllm): reduce futile loop iterations The executor_status_looper runs a spin loop, even if there are no active requests. This makes the service constantly wasting a CPU core. Make the loop block on receiving requests if there are no running ones to reduce CPU usage when idle. --- backends/trtllm/src/looper.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backends/trtllm/src/looper.rs b/backends/trtllm/src/looper.rs index 34fcf34e..267f9fa9 100644 --- a/backends/trtllm/src/looper.rs +++ b/backends/trtllm/src/looper.rs @@ -90,7 +90,12 @@ fn executor_status_looper( 'scheduler: loop { // Is there any request pending to be scheduled? - let awaiting_requests = backlog.len(); + let mut awaiting_requests = backlog.len(); + if awaiting_requests == 0 && in_flights.is_empty() { + // Wait for 1 request if we are not waiting for any response, + // so that the loop blocks at receive from backlog. + awaiting_requests += 1; + } for _ in 0..awaiting_requests { // Retrieve all the requests if let Some(ctx) = backlog.blocking_recv() {