From b6dbf605af078b910d24d92fc01910d972d73eab Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 4 Dec 2024 12:02:42 +0100 Subject: [PATCH] chore(trtllm): update dependency towards 0.15.0 --- Dockerfile_trtllm | 9 +++++---- backends/trtllm/cmake/trtllm.cmake | 6 +++--- backends/trtllm/scripts/install_tensorrt.sh | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Dockerfile_trtllm b/Dockerfile_trtllm index 7e1ffd61..b4523ea5 100644 --- a/Dockerfile_trtllm +++ b/Dockerfile_trtllm @@ -10,7 +10,7 @@ COPY . . RUN cargo chef prepare --recipe-path recipe.json # CUDA dependent dependencies resolver stage -FROM nvidia/cuda:12.6.1-cudnn-devel-ubuntu24.04 AS cuda-builder +FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ --mount=type=cache,target=/var/lib/apt,sharing=locked \ @@ -18,8 +18,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ build-essential \ cmake \ curl \ - gcc \ - g++ \ + gcc-14 \ + g++-14 \ git \ git-lfs \ libssl-dev \ @@ -86,7 +86,7 @@ RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$ cd backends/trtllm && \ CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --release -FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu24.04 AS runtime +FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \ rm -rf /var/lib/{apt,dpkg,cache,log}/ && \ pipx ensurepath && \ @@ -94,6 +94,7 @@ RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python WORKDIR /usr/local/tgi/bin +ENV PATH=/root/.local/share/pipx/venvs/transformers/bin/:$PATH ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/mpi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" ENV TOKENIZERS_PARALLELISM=false ENV OMPI_MCA_plm_rsh_agent="" diff --git a/backends/trtllm/cmake/trtllm.cmake b/backends/trtllm/cmake/trtllm.cmake index 78f8df85..4217892b 100644 --- a/backends/trtllm/cmake/trtllm.cmake +++ b/backends/trtllm/cmake/trtllm.cmake @@ -25,9 +25,9 @@ find_package(Python3 REQUIRED Interpreter) fetchcontent_declare( trtllm - GIT_REPOSITORY https://github.com/NVIDIA/TensorRT-LLM.git - GIT_TAG 385626572df16175dd327fa785e4434cb7866a64 - GIT_SHALLOW OFF + GIT_REPOSITORY https://github.com/huggingface/TensorRT-LLM.git + GIT_TAG 1bb9ca4688805444f203647674bac1d7219d0579 + GIT_SHALLOW ON DOWNLOAD_EXTRACT_TIMESTAMP ) fetchcontent_makeavailable(trtllm) diff --git a/backends/trtllm/scripts/install_tensorrt.sh b/backends/trtllm/scripts/install_tensorrt.sh index 4c2dc26b..7deb2fe8 100755 --- a/backends/trtllm/scripts/install_tensorrt.sh +++ b/backends/trtllm/scripts/install_tensorrt.sh @@ -2,7 +2,7 @@ set -ex -TRT_VER_BASE="10.4.0" +TRT_VER_BASE="10.6.0" TRT_VER_FULL="${TRT_VER_BASE}.26" CUDA_VER="12.6" CUDNN_VER="9.5.0.50-1"