diff --git a/Dockerfile_trtllm b/Dockerfile_trtllm index d7049cb7..89e4fe5a 100644 --- a/Dockerfile_trtllm +++ b/Dockerfile_trtllm @@ -16,8 +16,11 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ g++-14 \ git \ git-lfs \ + lld \ libssl-dev \ libucx-dev \ + libasan8 \ + libubsan1 \ ninja-build \ pkg-config \ pipx \ @@ -79,12 +82,13 @@ ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH" ENV CUDA_ARCH_LIST=${cuda_arch_list} ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH" ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig:$PKG_CONFIG_PATH" +ENV USE_LLD_LINKER=ON COPY . . COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \ - python3 scripts/setup_sccache.py --is-gha-build ${is_gha_build} -k ${aws_access_key_id} -s ${aws_secret_key_id} -t ${aws_session_token} -b ${sccache_bucket} -r ${sscache_region} -p ${sccache_s3_key_prefix }&& \ + python3 scripts/setup_sccache.py --is-gha-build ${is_gha_build} -k ${aws_access_key_id} -s ${aws_secret_key_id} -t ${aws_session_token} -b ${sccache_bucket} -r ${sscache_region} -p ${sccache_s3_key_prefix } && \ RUSTC_WRAPPER=sccache CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime diff --git a/backends/trtllm/CMakeLists.txt b/backends/trtllm/CMakeLists.txt index 24c8dc7f..c9b55cb5 100644 --- a/backends/trtllm/CMakeLists.txt +++ b/backends/trtllm/CMakeLists.txt @@ -80,7 +80,8 @@ target_link_libraries(tgi_trtllm_backend_impl PUBLIC nlohmann_json::nlohmann_jso target_link_libraries(tgi_trtllm_backend_impl PRIVATE tensorrt_llm nvinfer_plugin_tensorrt_llm tensorrt_llm_nvrtc_wrapper) # This install all the artifacts in CMAKE_INSTALL_PREFIX under include/ lib/ bin/ to make easy to link / find it back -install(TARGETS tgi_trtllm_backend_impl tensorrt_llm nvinfer_plugin_tensorrt_llm decoder_attention executorWorker) +install(TARGETS tgi_trtllm_backend_impl) +install(TARGETS tensorrt_llm nvinfer_plugin_tensorrt_llm decoder_attention executorWorker) install(FILES ${TRTLLM_NVRTC_WRAPPER_LIBRARY_PATH} TYPE LIB) if (NOT ${TGI_TRTLLM_BACKEND_DEBUG}) install(FILES ${TRTLLM_EXECUTOR_STATIC_LIBRARY_PATH} TYPE LIB) @@ -108,9 +109,9 @@ if (${TGI_TRTLLM_BACKEND_BUILD_TESTS}) target_link_libraries(tgi_trtllm_backend_tests PRIVATE tensorrt_llm nvinfer_plugin_tensorrt_llm tensorrt_llm_nvrtc_wrapper) if (CMAKE_BUILD_TYPE MATCHES "Debug") - # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fsanitize=undefined") - # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize=undefined") - # target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=undefined PUBLIC -fsanitize=address) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fsanitize=undefined") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize=undefined") + target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=undefined PUBLIC -fsanitize=address) endif () list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) diff --git a/backends/trtllm/build.rs b/backends/trtllm/build.rs index ee4e49d1..ab0005f3 100644 --- a/backends/trtllm/build.rs +++ b/backends/trtllm/build.rs @@ -13,7 +13,7 @@ const TENSORRT_ROOT_DIR: Option<&str> = option_env!("TENSORRT_ROOT_DIR"); const NCCL_ROOT_DIR: Option<&str> = option_env!("NCCL_ROOT_DIR"); // Dependencies -const BACKEND_DEPS: [&str; 2] = ["tgi_trtllm_backend_impl", "tgi_trtllm_backend"]; +const BACKEND_DEPS: &str = "tgi_trtllm_backend_impl"; const CUDA_TRANSITIVE_DEPS: [&str; 4] = ["cuda", "cudart", "cublas", "nvidia-ml"]; const TENSORRT_LLM_TRANSITIVE_DEPS: [(&str, &str); 4] = [ ("dylib", "tensorrt_llm"), @@ -42,16 +42,6 @@ fn get_compiler_flag( } } -#[cfg(target_arch = "x86_64")] -fn get_system_install_path(install_path: &PathBuf) -> PathBuf { - install_path.join("lib64") -} - -#[cfg(not(target_arch = "x86_64"))] -fn get_system_install_path(install_path: &PathBuf) -> PathBuf { - install_path.join("lib") -} - fn get_library_architecture() -> &'static str { let os = env::var("CARGO_CFG_TARGET_OS").unwrap(); let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); @@ -113,6 +103,11 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf ) .define("TGI_TRTLLM_BACKEND_TRT_ROOT", tensorrt_path); + if option_env!("USE_LLD_LINKER").is_some() { + println!("cargo:warning=Using lld linker"); + config.define("TGI_TRTLLM_BACKEND_BUILD_USE_LLD", "ON"); + } + if let Some(nvcc_host_compiler) = option_env!("CMAKE_CUDA_HOST_COMPILER") { config.define("CMAKE_CUDA_HOST_COMPILER", nvcc_host_compiler); } @@ -141,15 +136,14 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf } // Emit linkage information from the artifacts we just built - - let install_lib_path = get_system_install_path(&install_path); - - println!( - r"cargo:warning=Adding link search path: {}", - install_lib_path.display() - ); - println!(r"cargo:rustc-link-search={}", install_lib_path.display()); - + for path in ["lib", "lib64"] { + let install_lib_path = install_path.join(path); + println!( + r"cargo:warning=Adding link search path: {}", + install_lib_path.display() + ); + println!(r"cargo:rustc-link-search={}", install_lib_path.display()); + } (PathBuf::from(install_path), deps_folder) } @@ -223,7 +217,5 @@ fn main() { }); // Backend - BACKEND_DEPS.iter().for_each(|name| { - println!("cargo:rustc-link-lib=static={}", name); - }); + println!("cargo:rustc-link-lib=static={}", &BACKEND_DEPS); }