mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
perf(trtllm): reduce futile loop iterations
The executor_status_looper runs a spin loop, even if there are no active requests. This makes the service constantly wasting a CPU core. Make the loop block on receiving requests if there are no running ones to reduce CPU usage when idle.
This commit is contained in:
parent
f7bd82a90e
commit
fab395b41f
@ -90,7 +90,12 @@ fn executor_status_looper(
|
|||||||
|
|
||||||
'scheduler: loop {
|
'scheduler: loop {
|
||||||
// Is there any request pending to be scheduled?
|
// Is there any request pending to be scheduled?
|
||||||
let awaiting_requests = backlog.len();
|
let mut awaiting_requests = backlog.len();
|
||||||
|
if awaiting_requests == 0 && in_flights.is_empty() {
|
||||||
|
// Wait for 1 request if we are not waiting for any response,
|
||||||
|
// so that the loop blocks at receive from backlog.
|
||||||
|
awaiting_requests += 1;
|
||||||
|
}
|
||||||
for _ in 0..awaiting_requests {
|
for _ in 0..awaiting_requests {
|
||||||
// Retrieve all the requests
|
// Retrieve all the requests
|
||||||
if let Some(ctx) = backlog.blocking_recv() {
|
if let Some(ctx) = backlog.blocking_recv() {
|
||||||
|
Loading…
Reference in New Issue
Block a user