From 03935f670535518f558cdd142a8d9ac38d585214 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Tue, 23 Jul 2024 22:13:02 +0000 Subject: [PATCH] update TensorRT-LLM to latest version --- backends/trtllm/Dockerfile | 4 ++-- backends/trtllm/cmake/trtllm.cmake | 2 +- backends/trtllm/scripts/install_tensorrt.sh | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/backends/trtllm/Dockerfile b/backends/trtllm/Dockerfile index 1cc9abf6..60ad03f7 100644 --- a/backends/trtllm/Dockerfile +++ b/backends/trtllm/Dockerfile @@ -10,7 +10,7 @@ COPY . . RUN cargo chef prepare --recipe-path recipe.json # CUDA dependent dependencies resolver stage -FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 AS cuda-builder +FROM nvidia/cuda:12.5.1-cudnn-devel-ubuntu22.04 AS cuda-builder RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ --mount=type=cache,target=/var/lib/apt,sharing=locked \ @@ -81,7 +81,7 @@ COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \ CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --release --bin text-generation-backends-trtllm -FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS runtime +FROM nvidia/cuda:12.5.1-cudnn-runtime-ubuntu22.04 AS runtime WORKDIR /usr/local/tgi/bin ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" diff --git a/backends/trtllm/cmake/trtllm.cmake b/backends/trtllm/cmake/trtllm.cmake index ecf5a452..c7907c00 100644 --- a/backends/trtllm/cmake/trtllm.cmake +++ b/backends/trtllm/cmake/trtllm.cmake @@ -23,7 +23,7 @@ endif () fetchcontent_declare( trtllm GIT_REPOSITORY https://github.com/NVIDIA/TensorRT-LLM.git - GIT_TAG 05316d3313360012536ace46c781518f5afae75e + GIT_TAG bca9a33b022dc6a924bf7913137feed3d28b602d GIT_SHALLOW FALSE ) fetchcontent_makeavailable(trtllm) diff --git a/backends/trtllm/scripts/install_tensorrt.sh b/backends/trtllm/scripts/install_tensorrt.sh index 4472bf9a..12c914b1 100755 --- a/backends/trtllm/scripts/install_tensorrt.sh +++ b/backends/trtllm/scripts/install_tensorrt.sh @@ -2,10 +2,10 @@ set -ex -TRT_VER="10.1.0.27" +TRT_VER="10.2.0.19" # Align with the pre-installed cuDNN / cuBLAS / NCCL versions from # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-05.html#rel-24-05 -CUDA_VER="12.4" # 12.4.1 +CUDA_VER="12.5" # 12.5.1 # Keep the installation for cuDNN if users want to install PyTorch with source codes. # PyTorch 2.3.x can compile with cuDNN v9. CUDNN_VER="9.1.0.70-1" @@ -77,7 +77,7 @@ install_centos_requirements() { install_tensorrt() { #PY_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') #PARSED_PY_VERSION=$(echo "${PY_VERSION//./}") - TRT_CUDA_VERSION="12.4" + TRT_CUDA_VERSION="12.5" if [ -z "$RELEASE_URL_TRT" ];then ARCH=${TRT_TARGETARCH} @@ -86,7 +86,7 @@ install_tensorrt() { if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi if [ "$ARCH" = "x86_64" ];then DIR_NAME="x64-agnostic"; else DIR_NAME=${ARCH};fi if [ "$ARCH" = "aarch64" ];then OS1="Ubuntu22_04" && OS2="Ubuntu-22.04" && OS="ubuntu-22.04"; else OS1="Linux" && OS2="Linux" && OS="linux";fi - RELEASE_URL_TRT=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-${TRT_VER}.${OS2}.${ARCH}-gnu.cuda-${TRT_CUDA_VERSION}.tar.gz + RELEASE_URL_TRT=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/tars/TensorRT-${TRT_VER}.${OS2}.${ARCH}-gnu.cuda-${TRT_CUDA_VERSION}.tar.gz fi wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar tar -xf /tmp/TensorRT.tar -C /usr/local/