mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
expose shutdown function at ffi layer
This commit is contained in:
parent
b291be64a0
commit
31d9f4d5dc
@ -92,6 +92,11 @@ namespace huggingface::tgi::backends {
|
||||
* @return Global number of generated tokens for this request id
|
||||
*/
|
||||
uint32_t Stream(RequestId reqId, std::function<TokenStreamingCallback> &cb);
|
||||
|
||||
/***
|
||||
* Stop the underlying executor
|
||||
*/
|
||||
void Shutdown();
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -136,3 +136,9 @@ uint32_t huggingface::tgi::backends::TensorRtLlmBackend::Stream(const tle::IdTyp
|
||||
std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::Poll(const tle::IdType requestId) {
|
||||
return executor.awaitResponses(requestId);
|
||||
}
|
||||
|
||||
|
||||
void huggingface::tgi::backends::TensorRtLlmBackend::Shutdown() {
|
||||
SPDLOG_INFO("Shutting down executor");
|
||||
executor.shutdown();
|
||||
}
|
@ -7,8 +7,6 @@
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
|
||||
//#include "rust/cxx.h"
|
||||
//#include "../include/ffi.h"
|
||||
#include "backends/trtllm/include/ffi.h"
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
pub use backend::TrtLLmBackend;
|
||||
pub use backend::TensorRtLlmBackend;
|
||||
|
||||
use crate::backend::GenerationContext;
|
||||
|
||||
@ -58,5 +58,8 @@ mod ffi {
|
||||
request_id: u64,
|
||||
callback: fn(Box<GenerationContext>, u32, u32, bool),
|
||||
) -> u32;
|
||||
|
||||
#[rust_name = "shutdown"]
|
||||
fn Shutdown(self: Pin<&mut TensorRtLlmBackendImpl>);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user