mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-25 03:52:08 +00:00
effectively cancel the request on the executor
This commit is contained in:
parent
b6dbf605af
commit
460f290d5b
@ -62,5 +62,7 @@ mod ffi {
|
|||||||
fn pull_tokens(
|
fn pull_tokens(
|
||||||
self: Pin<&mut TensorRtLlmBackendImpl>,
|
self: Pin<&mut TensorRtLlmBackendImpl>,
|
||||||
) -> Result<UniquePtr<CxxVector<GenerationStep>>>;
|
) -> Result<UniquePtr<CxxVector<GenerationStep>>>;
|
||||||
|
|
||||||
|
fn cancel(self: Pin<&mut TensorRtLlmBackendImpl>, request_id: u64);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -121,7 +121,8 @@ fn executor_status_looper(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if backend.num_tokens_ready() > 0 {
|
if backend.num_tokens_ready() > 0 {
|
||||||
match backend.pin_mut().pull_tokens() {
|
let backend = backend.pin_mut();
|
||||||
|
match backend.pull_tokens() {
|
||||||
Ok(responses) => {
|
Ok(responses) => {
|
||||||
// Iterate through all the decoded token
|
// Iterate through all the decoded token
|
||||||
for step in responses.deref() {
|
for step in responses.deref() {
|
||||||
@ -140,6 +141,7 @@ fn executor_status_looper(
|
|||||||
|
|
||||||
if posted.is_err() || step.is_final {
|
if posted.is_err() || step.is_final {
|
||||||
debug!("Removing {}", step.request_id);
|
debug!("Removing {}", step.request_id);
|
||||||
|
backend.cancel(step.request_id);
|
||||||
let _ = in_flights.remove(&step.request_id);
|
let _ = in_flights.remove(&step.request_id);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
Reference in New Issue
Block a user