diff --git a/Dockerfile_trtllm b/Dockerfile_trtllm index af2a0141..818f0cb3 100644 --- a/Dockerfile_trtllm +++ b/Dockerfile_trtllm @@ -1,7 +1,7 @@ -ARG CUDA_ARCH_LIST="75-real;80-real;86-real;89-real;90-real" -ARG OMPI_VERSION="4.1.7rc1" -ARG BUILD_TYPE=release -ARG IS_GHA_BUILD=false +ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real" +ARG ompi_version="4.1.7rc1" +ARG build_type=release +ARG is_gha_build=false # CUDA dependent dependencies resolver stage FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder @@ -36,9 +36,9 @@ ENV TENSORRT_INSTALL_PREFIX=/usr/local/tensorrt # Install OpenMPI FROM cuda-builder AS mpi-builder -ARG OMPI_VERSION +ARG ompi_version -ENV OMPI_TARBALL_FILENAME="openmpi-$OMPI_VERSION.tar.bz2" +ENV OMPI_TARBALL_FILENAME="openmpi-$ompi_version.tar.bz2" RUN wget "https://download.open-mpi.org/release/open-mpi/v4.1/$OMPI_TARBALL_FILENAME" -P /opt/src && \ mkdir /usr/src/mpi && \ tar -xf "/opt/src/$OMPI_TARBALL_FILENAME" -C /usr/src/mpi --strip-components=1 && \ @@ -68,7 +68,7 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y && # Build actual TGI ARG build_type ARG cuda_arch_list -ARG is_gha_build +ARG is_gha_build="false" # SCCACHE Specifics args - before finding a better, more generic, way... ARG aws_access_key_id @@ -80,6 +80,7 @@ ARG sscache_region ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH" ENV CUDA_ARCH_LIST=${cuda_arch_list} +ENV IS_GHA_BUILD ${is_gha_build} ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH" ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig:$PKG_CONFIG_PATH" ENV USE_LLD_LINKER=ON diff --git a/backends/trtllm/CMakeLists.txt b/backends/trtllm/CMakeLists.txt index b2ce97c0..6a0e4c8a 100644 --- a/backends/trtllm/CMakeLists.txt +++ b/backends/trtllm/CMakeLists.txt @@ -85,6 +85,10 @@ endif () #### Unit Tests #### if (${TGI_TRTLLM_BACKEND_BUILD_TESTS}) message(STATUS "Building tests") + option(TGI_TRTLLM_BACKEND_ENABLE_ASAN "Enable AddressSanitizer") + option(TGI_TRTLLM_BACKEND_ENABLE_UBSAN "Enable UndefinedSanitizer") + option(TGI_TRTLLM_BACKEND_ENABLE_MSAN "Enable MemorySanitizer") + FetchContent_Declare( Catch2 URL https://github.com/catchorg/Catch2/archive/refs/tags/v3.7.1.tar.gz @@ -112,32 +116,20 @@ if (${TGI_TRTLLM_BACKEND_BUILD_TESTS}) target_link_libraries(tgi_trtllm_backend_tests PRIVATE tensorrt_llm nvinfer_plugin_tensorrt_llm tensorrt_llm_nvrtc_wrapper) if (CMAKE_BUILD_TYPE MATCHES "Debug") - if (NOT "${ASan_${CMAKE_C_COMPILER_ID}_FLAGS}" STREQUAL "") + if (${TGI_TRTLLM_BACKEND_ENABLE_ASAN}) + message(STATUS "Enabled AddressSanitizer") target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=address) endif () - if (NOT "${UBSan_${CMAKE_C_COMPILER_ID}_FLAGS}" STREQUAL "") + if (${TGI_TRTLLM_BACKEND_ENABLE_UBSAN}) + message(STATUS "Enabled UndefinedSanitizer") target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=undefined) endif () - if (NOT "${MSan_${CMAKE_C_COMPILER_ID}_FLAGS}" STREQUAL "") + if (${TGI_TRTLLM_BACKEND_ENABLE_MSAN}) + message(STATUS "Enabled MemorySanitizer") target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=memory) endif () - - # execute_process(COMMAND CMAKE_C_COMPILER --print-file-name=libasan.so OUTPUT_VARIABLE LIBASAN_PATH OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE LIBASAN_FOUND) - # execute_process(COMMAND CMAKE_C_COMPILER --print-file-name=libubsan.so OUTPUT_VARIABLE LIBUBSAN_PATH OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE LIBUBSAN_FOUND) - - # if (LIBASAN_FOUND) - # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address") - # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") - # target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=address) - # endif () - - # if (LIBUBSAN_FOUND) - # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") - # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined") - # target_link_options(tgi_trtllm_backend_tests BEFORE PUBLIC -fsanitize=undefined) - # endif () endif () install(TARGETS tgi_trtllm_backend_tests) diff --git a/backends/trtllm/build.rs b/backends/trtllm/build.rs index e4d61982..b87e21ec 100644 --- a/backends/trtllm/build.rs +++ b/backends/trtllm/build.rs @@ -3,6 +3,7 @@ use pkg_config; use std::env; use std::env::consts::ARCH; use std::path::{absolute, PathBuf}; +use std::sync::LazyLock; const ADDITIONAL_BACKEND_LINK_LIBRARIES: [&str; 1] = ["spdlog"]; const CUDA_ARCH_LIST: Option<&str> = option_env!("CUDA_ARCH_LIST"); @@ -12,6 +13,15 @@ const INSTALL_PREFIX: Option<&str> = option_env!("CMAKE_INSTALL_PREFIX"); const TENSORRT_ROOT_DIR: Option<&str> = option_env!("TENSORRT_ROOT_DIR"); const NCCL_ROOT_DIR: Option<&str> = option_env!("NCCL_ROOT_DIR"); +const IS_GHA_BUILD: LazyLock = LazyLock::new(|| { + option_env!("IS_GHA_BUILD").map_or(false, |value| match value.to_lowercase().as_str() { + "on" => true, + "true" => true, + "1" => true, + _ => false, + }) +}); + // Dependencies const BACKEND_DEPS: &str = "tgi_trtllm_backend_impl"; const CUDA_TRANSITIVE_DEPS: [&str; 4] = ["cuda", "cudart", "cublas", "nvidia-ml"]; @@ -112,6 +122,21 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf config.define("TGI_TRTLLM_BACKEND_BUILD_USE_LLD", "ON"); } + if (is_debug && option_env!("ENABLE_ASAN").is_some()) || *IS_GHA_BUILD { + println!("cargo:warning=Enabling Address Sanitizer"); + config.define("TGI_TRTLLM_BACKEND_ENABLE_ASAN", "ON"); + } + + if (is_debug && option_env!("ENABLE_UBSAN").is_some()) || *IS_GHA_BUILD { + println!("cargo:warning=Enabling Undefined Sanitizer"); + config.define("TGI_TRTLLM_BACKEND_ENABLE_UBSAN", "ON"); + } + + if (is_debug && option_env!("ENABLE_MSAN").is_some()) || *IS_GHA_BUILD { + println!("cargo:warning=Enabling Memory Sanitizer"); + config.define("TGI_TRTLLM_BACKEND_ENABLE_MSAN", "ON"); + } + if let Some(nvcc_host_compiler) = option_env!("CMAKE_CUDA_HOST_COMPILER") { config.define("CMAKE_CUDA_HOST_COMPILER", nvcc_host_compiler); }