mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
backend(trtllm): bump TRTLLM to v.0.17.0
This commit is contained in:
parent
c1cf36c0dc
commit
6168ffc23f
@ -105,7 +105,7 @@ RUN export CMAKE_C_COMPILER_LAUNCHER=sccache && \
|
|||||||
cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \
|
cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \
|
||||||
sccache --show-stats
|
sccache --show-stats
|
||||||
|
|
||||||
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime
|
FROM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu24.04 AS runtime
|
||||||
RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
|
RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
|
||||||
rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
|
rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
|
||||||
pipx ensurepath && \
|
pipx ensurepath && \
|
||||||
@ -124,7 +124,7 @@ COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi
|
|||||||
COPY --from=tgi-builder /usr/src/text-generation-inference/target/release/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher
|
COPY --from=tgi-builder /usr/src/text-generation-inference/target/release/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher
|
||||||
|
|
||||||
# This is used only for the CI/CD
|
# This is used only for the CI/CD
|
||||||
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS ci-runtime
|
FROM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu24.04 AS ci-runtime
|
||||||
RUN apt update && apt install -y libasan8 libubsan1 libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
|
RUN apt update && apt install -y libasan8 libubsan1 libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
|
||||||
rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
|
rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
|
||||||
pipx ensurepath && \
|
pipx ensurepath && \
|
||||||
|
@ -28,7 +28,7 @@ find_package(Python3 REQUIRED Interpreter)
|
|||||||
fetchcontent_declare(
|
fetchcontent_declare(
|
||||||
trtllm
|
trtllm
|
||||||
GIT_REPOSITORY https://github.com/nvidia/TensorRT-LLM.git
|
GIT_REPOSITORY https://github.com/nvidia/TensorRT-LLM.git
|
||||||
GIT_TAG v0.16.0
|
GIT_TAG v0.17.0
|
||||||
GIT_SHALLOW ON
|
GIT_SHALLOW ON
|
||||||
DOWNLOAD_EXTRACT_TIMESTAMP
|
DOWNLOAD_EXTRACT_TIMESTAMP
|
||||||
)
|
)
|
||||||
|
@ -2,13 +2,13 @@
|
|||||||
|
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
TRT_VER_BASE="10.7.0"
|
TRT_VER_BASE="10.8.0"
|
||||||
TRT_VER_FULL="${TRT_VER_BASE}.23"
|
TRT_VER_FULL="${TRT_VER_BASE}.43"
|
||||||
CUDA_VER="12.6"
|
CUDA_VER="12.8"
|
||||||
CUDNN_VER="9.5.0.50-1"
|
CUDNN_VER="9.7.0.66-1"
|
||||||
NCCL_VER="2.22.3-1+cuda12.6"
|
NCCL_VER="2.25.1-1+cuda${CUDA_VER}"
|
||||||
CUBLAS_VER="12.6.3.3-1"
|
CUBLAS_VER="${CUDA_VER}.3.14-1"
|
||||||
NVRTC_VER="12.6.77-1"
|
NVRTC_VER="${CUDA_VER}.61-1"
|
||||||
|
|
||||||
for i in "$@"; do
|
for i in "$@"; do
|
||||||
case $i in
|
case $i in
|
||||||
@ -73,7 +73,7 @@ install_centos_requirements() {
|
|||||||
install_tensorrt() {
|
install_tensorrt() {
|
||||||
#PY_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
|
#PY_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
|
||||||
#PARSED_PY_VERSION=$(echo "${PY_VERSION//./}")
|
#PARSED_PY_VERSION=$(echo "${PY_VERSION//./}")
|
||||||
TRT_CUDA_VERSION="12.6"
|
TRT_CUDA_VERSION="12.8"
|
||||||
|
|
||||||
if [ -z "$RELEASE_URL_TRT" ];then
|
if [ -z "$RELEASE_URL_TRT" ];then
|
||||||
ARCH=${TRT_TARGETARCH}
|
ARCH=${TRT_TARGETARCH}
|
||||||
|
Loading…
Reference in New Issue
Block a user