effectively cancel the request on the executor

2025-07-09 01:10:17 +00:00 · 2024-12-04 14:29:04 +01:00 · 2024-12-04 14:29:04 +01:00 · 460f290d5b
commit 460f290d5b
parent b6dbf605af
2 changed files with 5 additions and 1 deletions
--- a/backends/trtllm/src/lib.rs
+++ b/backends/trtllm/src/lib.rs
@ -62,5 +62,7 @@ mod ffi {
        fn pull_tokens(
            self: Pin<&mut TensorRtLlmBackendImpl>,
        ) -> Result<UniquePtr<CxxVector<GenerationStep>>>;
+
+        fn cancel(self: Pin<&mut TensorRtLlmBackendImpl>, request_id: u64);
    }
 }
--- a/backends/trtllm/src/looper.rs
+++ b/backends/trtllm/src/looper.rs
@ -121,7 +121,8 @@ fn executor_status_looper(
        }

        if backend.num_tokens_ready() > 0 {
-            match backend.pin_mut().pull_tokens() {
+            let backend = backend.pin_mut();
+            match backend.pull_tokens() {
                Ok(responses) => {
                    // Iterate through all the decoded token
                    for step in responses.deref() {
@ -140,6 +141,7 @@ fn executor_status_looper(

                            if posted.is_err() || step.is_final {
                                debug!("Removing {}", step.request_id);
+                                backend.cancel(step.request_id);
                                let _ = in_flights.remove(&step.request_id);
                            }
                        } else {