mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-23 07:52:06 +00:00
use blocking_recv in looper to consume awaiting_requests at max before pulling in a single step
This commit is contained in:
parent
7bebc629af
commit
291eaa99fb
@ -56,13 +56,10 @@ fn executor_status_poller(
|
||||
span!(Level::DEBUG, "[in-flight][submit]").in_scope(|| {
|
||||
// Is there any request pending to be scheduled?
|
||||
let awaiting_requests = waiting_requests.len();
|
||||
if awaiting_requests > 0 {
|
||||
for _ in 0..awaiting_requests {
|
||||
// Retrieve all the requests
|
||||
let mut requests = Vec::with_capacity(awaiting_requests);
|
||||
let _ = waiting_requests.recv_many(&mut requests, awaiting_requests);
|
||||
|
||||
if let Some(ctx) = waiting_requests.blocking_recv() {
|
||||
// Submit all the request to the executor and move the context to the in-flight tracker
|
||||
for ctx in requests {
|
||||
let request = &ctx.request;
|
||||
let generation_params = &request.inner.parameters;
|
||||
let stopping_params = &request.inner.stopping_parameters;
|
||||
|
Loading…
Reference in New Issue
Block a user