effectively cancel the request on the executor

This commit is contained in:
Morgan Funtowicz 2024-12-04 14:29:04 +01:00
parent b6dbf605af
commit 460f290d5b
2 changed files with 5 additions and 1 deletions

View File

@ -62,5 +62,7 @@ mod ffi {
fn pull_tokens(
self: Pin<&mut TensorRtLlmBackendImpl>,
) -> Result<UniquePtr<CxxVector<GenerationStep>>>;
fn cancel(self: Pin<&mut TensorRtLlmBackendImpl>, request_id: u64);
}
}

View File

@ -121,7 +121,8 @@ fn executor_status_looper(
}
if backend.num_tokens_ready() > 0 {
match backend.pin_mut().pull_tokens() {
let backend = backend.pin_mut();
match backend.pull_tokens() {
Ok(responses) => {
// Iterate through all the decoded token
for step in responses.deref() {
@ -140,6 +141,7 @@ fn executor_status_looper(
if posted.is_err() || step.is_final {
debug!("Removing {}", step.request_id);
backend.cancel(step.request_id);
let _ = in_flights.remove(&step.request_id);
}
} else {