mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
backend(trtllm): link against decoder_attention_{0|1}
This commit is contained in:
parent
11c9acab42
commit
d299b52cb5
@ -59,6 +59,8 @@ target_link_libraries(tgi_trtllm_backend_impl PRIVATE tensorrt_llm nvinfer_plugi
|
||||
|
||||
# This install all the artifacts in CMAKE_INSTALL_PREFIX under include/ lib/ bin/ to make easy to link / find it back
|
||||
install(TARGETS tgi_trtllm_backend_impl)
|
||||
#install(TARGETS cutlass_src fb_gemm_src fpA_intB_gemm_src gemm_swiglu_sm90_src kernels_src)
|
||||
install(TARGETS decoder_attention_0 decoder_attention_1)
|
||||
install(TARGETS tensorrt_llm nvinfer_plugin_tensorrt_llm decoder_attention_src executorWorker)
|
||||
install(FILES ${TRTLLM_NVRTC_WRAPPER_LIBRARY_PATH} TYPE LIB)
|
||||
if (NOT ${TGI_TRTLLM_BACKEND_DEBUG})
|
||||
|
@ -25,11 +25,12 @@ const IS_GHA_BUILD: LazyLock<bool> = LazyLock::new(|| {
|
||||
// Dependencies
|
||||
const BACKEND_DEPS: &str = "tgi_trtllm_backend_impl";
|
||||
const CUDA_TRANSITIVE_DEPS: [&str; 4] = ["cuda", "cudart", "cublas", "nvidia-ml"];
|
||||
const TENSORRT_LLM_TRANSITIVE_DEPS: [(&str, &str); 4] = [
|
||||
const TENSORRT_LLM_TRANSITIVE_DEPS: [(&str, &str); 5] = [
|
||||
("dylib", "tensorrt_llm"),
|
||||
("dylib", "tensorrt_llm_nvrtc_wrapper"),
|
||||
("dylib", "nvinfer_plugin_tensorrt_llm"),
|
||||
("dylib", "decoder_attention"),
|
||||
("dylib", "decoder_attention_0"),
|
||||
("dylib", "decoder_attention_1"),
|
||||
];
|
||||
|
||||
macro_rules! probe {
|
||||
|
Loading…
Reference in New Issue
Block a user