backend(trtllm): link against decoder_attention_{0|1}

2025-06-19 15:52:08 +00:00 · 2025-02-05 16:15:31 +01:00 · 2025-02-05 16:15:31 +01:00 · d299b52cb5
commit d299b52cb5
parent 11c9acab42
2 changed files with 5 additions and 2 deletions
--- a/backends/trtllm/CMakeLists.txt
+++ b/backends/trtllm/CMakeLists.txt
@ -59,6 +59,8 @@ target_link_libraries(tgi_trtllm_backend_impl PRIVATE tensorrt_llm nvinfer_plugi

 # This install all the artifacts in CMAKE_INSTALL_PREFIX under include/ lib/ bin/ to make easy to link / find it back
 install(TARGETS tgi_trtllm_backend_impl)
+#install(TARGETS cutlass_src fb_gemm_src fpA_intB_gemm_src gemm_swiglu_sm90_src kernels_src)
+install(TARGETS decoder_attention_0 decoder_attention_1)
 install(TARGETS tensorrt_llm nvinfer_plugin_tensorrt_llm decoder_attention_src executorWorker)
 install(FILES ${TRTLLM_NVRTC_WRAPPER_LIBRARY_PATH} TYPE LIB)
 if (NOT ${TGI_TRTLLM_BACKEND_DEBUG})
--- a/backends/trtllm/build.rs
+++ b/backends/trtllm/build.rs
@ -25,11 +25,12 @@ const IS_GHA_BUILD: LazyLock<bool> = LazyLock::new(|| {
 // Dependencies
 const BACKEND_DEPS: &str = "tgi_trtllm_backend_impl";
 const CUDA_TRANSITIVE_DEPS: [&str; 4] = ["cuda", "cudart", "cublas", "nvidia-ml"];
-const TENSORRT_LLM_TRANSITIVE_DEPS: [(&str, &str); 4] = [
+const TENSORRT_LLM_TRANSITIVE_DEPS: [(&str, &str); 5] = [
    ("dylib", "tensorrt_llm"),
    ("dylib", "tensorrt_llm_nvrtc_wrapper"),
    ("dylib", "nvinfer_plugin_tensorrt_llm"),
-    ("dylib", "decoder_attention"),
+    ("dylib", "decoder_attention_0"),
+    ("dylib", "decoder_attention_1"),
 ];

 macro_rules! probe {