expose shutdown function at ffi layer

2025-06-19 15:52:08 +00:00 · 2024-07-15 07:36:01 +00:00 · 2024-07-15 07:36:01 +00:00 · 31d9f4d5dc
commit 31d9f4d5dc
parent b291be64a0
4 changed files with 15 additions and 3 deletions
--- a/backends/trtllm/include/backend.h
+++ b/backends/trtllm/include/backend.h
@ -92,6 +92,11 @@ namespace huggingface::tgi::backends {
         * @return Global number of generated tokens for this request id
         */
        uint32_t Stream(RequestId reqId, std::function<TokenStreamingCallback> &cb);
+
+        /***
+         * Stop the underlying executor
+         */
+        void Shutdown();
    };
 }

--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@ -136,3 +136,9 @@ uint32_t huggingface::tgi::backends::TensorRtLlmBackend::Stream(const tle::IdTyp
 std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::Poll(const tle::IdType requestId) {
    return executor.awaitResponses(requestId);
 }
+
+
+void huggingface::tgi::backends::TensorRtLlmBackend::Shutdown() {
+    SPDLOG_INFO("Shutting down executor");
+    executor.shutdown();
+}
--- a/backends/trtllm/src/ffi.cpp
+++ b/backends/trtllm/src/ffi.cpp
@ -7,8 +7,6 @@
 #include <filesystem>
 #include <vector>

-//#include "rust/cxx.h"
-//#include "../include/ffi.h"
 #include "backends/trtllm/include/ffi.h"


--- a/backends/trtllm/src/lib.rs
+++ b/backends/trtllm/src/lib.rs
@ -1,4 +1,4 @@
-pub use backend::TrtLLmBackend;
+pub use backend::TensorRtLlmBackend;

 use crate::backend::GenerationContext;

@ -58,5 +58,8 @@ mod ffi {
            request_id: u64,
            callback: fn(Box<GenerationContext>, u32, u32, bool),
        ) -> u32;
+
+        #[rust_name = "shutdown"]
+        fn Shutdown(self: Pin<&mut TensorRtLlmBackendImpl>);
    }
 }