mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
update TensorRT-LLM to latest version
This commit is contained in:
parent
ef1876346c
commit
03935f6705
@ -10,7 +10,7 @@ COPY . .
|
|||||||
RUN cargo chef prepare --recipe-path recipe.json
|
RUN cargo chef prepare --recipe-path recipe.json
|
||||||
|
|
||||||
# CUDA dependent dependencies resolver stage
|
# CUDA dependent dependencies resolver stage
|
||||||
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 AS cuda-builder
|
FROM nvidia/cuda:12.5.1-cudnn-devel-ubuntu22.04 AS cuda-builder
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||||
@ -81,7 +81,7 @@ COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
|
|||||||
RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
|
RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
|
||||||
CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --release --bin text-generation-backends-trtllm
|
CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --release --bin text-generation-backends-trtllm
|
||||||
|
|
||||||
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS runtime
|
FROM nvidia/cuda:12.5.1-cudnn-runtime-ubuntu22.04 AS runtime
|
||||||
WORKDIR /usr/local/tgi/bin
|
WORKDIR /usr/local/tgi/bin
|
||||||
|
|
||||||
ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
|
ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
|
||||||
|
@ -23,7 +23,7 @@ endif ()
|
|||||||
fetchcontent_declare(
|
fetchcontent_declare(
|
||||||
trtllm
|
trtllm
|
||||||
GIT_REPOSITORY https://github.com/NVIDIA/TensorRT-LLM.git
|
GIT_REPOSITORY https://github.com/NVIDIA/TensorRT-LLM.git
|
||||||
GIT_TAG 05316d3313360012536ace46c781518f5afae75e
|
GIT_TAG bca9a33b022dc6a924bf7913137feed3d28b602d
|
||||||
GIT_SHALLOW FALSE
|
GIT_SHALLOW FALSE
|
||||||
)
|
)
|
||||||
fetchcontent_makeavailable(trtllm)
|
fetchcontent_makeavailable(trtllm)
|
||||||
|
@ -2,10 +2,10 @@
|
|||||||
|
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
TRT_VER="10.1.0.27"
|
TRT_VER="10.2.0.19"
|
||||||
# Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
|
# Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
|
||||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-05.html#rel-24-05
|
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-05.html#rel-24-05
|
||||||
CUDA_VER="12.4" # 12.4.1
|
CUDA_VER="12.5" # 12.5.1
|
||||||
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
|
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
|
||||||
# PyTorch 2.3.x can compile with cuDNN v9.
|
# PyTorch 2.3.x can compile with cuDNN v9.
|
||||||
CUDNN_VER="9.1.0.70-1"
|
CUDNN_VER="9.1.0.70-1"
|
||||||
@ -77,7 +77,7 @@ install_centos_requirements() {
|
|||||||
install_tensorrt() {
|
install_tensorrt() {
|
||||||
#PY_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
|
#PY_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
|
||||||
#PARSED_PY_VERSION=$(echo "${PY_VERSION//./}")
|
#PARSED_PY_VERSION=$(echo "${PY_VERSION//./}")
|
||||||
TRT_CUDA_VERSION="12.4"
|
TRT_CUDA_VERSION="12.5"
|
||||||
|
|
||||||
if [ -z "$RELEASE_URL_TRT" ];then
|
if [ -z "$RELEASE_URL_TRT" ];then
|
||||||
ARCH=${TRT_TARGETARCH}
|
ARCH=${TRT_TARGETARCH}
|
||||||
@ -86,7 +86,7 @@ install_tensorrt() {
|
|||||||
if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
|
if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
|
||||||
if [ "$ARCH" = "x86_64" ];then DIR_NAME="x64-agnostic"; else DIR_NAME=${ARCH};fi
|
if [ "$ARCH" = "x86_64" ];then DIR_NAME="x64-agnostic"; else DIR_NAME=${ARCH};fi
|
||||||
if [ "$ARCH" = "aarch64" ];then OS1="Ubuntu22_04" && OS2="Ubuntu-22.04" && OS="ubuntu-22.04"; else OS1="Linux" && OS2="Linux" && OS="linux";fi
|
if [ "$ARCH" = "aarch64" ];then OS1="Ubuntu22_04" && OS2="Ubuntu-22.04" && OS="ubuntu-22.04"; else OS1="Linux" && OS2="Linux" && OS="linux";fi
|
||||||
RELEASE_URL_TRT=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-${TRT_VER}.${OS2}.${ARCH}-gnu.cuda-${TRT_CUDA_VERSION}.tar.gz
|
RELEASE_URL_TRT=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/tars/TensorRT-${TRT_VER}.${OS2}.${ARCH}-gnu.cuda-${TRT_CUDA_VERSION}.tar.gz
|
||||||
fi
|
fi
|
||||||
wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar
|
wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar
|
||||||
tar -xf /tmp/TensorRT.tar -C /usr/local/
|
tar -xf /tmp/TensorRT.tar -C /usr/local/
|
||||||
|
Loading…
Reference in New Issue
Block a user