mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
expose shutdown function at ffi layer
This commit is contained in:
parent
b291be64a0
commit
31d9f4d5dc
@ -92,6 +92,11 @@ namespace huggingface::tgi::backends {
|
|||||||
* @return Global number of generated tokens for this request id
|
* @return Global number of generated tokens for this request id
|
||||||
*/
|
*/
|
||||||
uint32_t Stream(RequestId reqId, std::function<TokenStreamingCallback> &cb);
|
uint32_t Stream(RequestId reqId, std::function<TokenStreamingCallback> &cb);
|
||||||
|
|
||||||
|
/***
|
||||||
|
* Stop the underlying executor
|
||||||
|
*/
|
||||||
|
void Shutdown();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -136,3 +136,9 @@ uint32_t huggingface::tgi::backends::TensorRtLlmBackend::Stream(const tle::IdTyp
|
|||||||
std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::Poll(const tle::IdType requestId) {
|
std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::Poll(const tle::IdType requestId) {
|
||||||
return executor.awaitResponses(requestId);
|
return executor.awaitResponses(requestId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void huggingface::tgi::backends::TensorRtLlmBackend::Shutdown() {
|
||||||
|
SPDLOG_INFO("Shutting down executor");
|
||||||
|
executor.shutdown();
|
||||||
|
}
|
@ -7,8 +7,6 @@
|
|||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
//#include "rust/cxx.h"
|
|
||||||
//#include "../include/ffi.h"
|
|
||||||
#include "backends/trtllm/include/ffi.h"
|
#include "backends/trtllm/include/ffi.h"
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
pub use backend::TrtLLmBackend;
|
pub use backend::TensorRtLlmBackend;
|
||||||
|
|
||||||
use crate::backend::GenerationContext;
|
use crate::backend::GenerationContext;
|
||||||
|
|
||||||
@ -58,5 +58,8 @@ mod ffi {
|
|||||||
request_id: u64,
|
request_id: u64,
|
||||||
callback: fn(Box<GenerationContext>, u32, u32, bool),
|
callback: fn(Box<GenerationContext>, u32, u32, bool),
|
||||||
) -> u32;
|
) -> u32;
|
||||||
|
|
||||||
|
#[rust_name = "shutdown"]
|
||||||
|
fn Shutdown(self: Pin<&mut TensorRtLlmBackendImpl>);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user