mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
backend(trtllm): bump TRTLLM to v.0.17.0
This commit is contained in:
parent
c1cf36c0dc
commit
6168ffc23f
@ -105,7 +105,7 @@ RUN export CMAKE_C_COMPILER_LAUNCHER=sccache && \
|
||||
cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \
|
||||
sccache --show-stats
|
||||
|
||||
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime
|
||||
FROM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu24.04 AS runtime
|
||||
RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
|
||||
rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
|
||||
pipx ensurepath && \
|
||||
@ -124,7 +124,7 @@ COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi
|
||||
COPY --from=tgi-builder /usr/src/text-generation-inference/target/release/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher
|
||||
|
||||
# This is used only for the CI/CD
|
||||
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS ci-runtime
|
||||
FROM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu24.04 AS ci-runtime
|
||||
RUN apt update && apt install -y libasan8 libubsan1 libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
|
||||
rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
|
||||
pipx ensurepath && \
|
||||
|
@ -28,7 +28,7 @@ find_package(Python3 REQUIRED Interpreter)
|
||||
fetchcontent_declare(
|
||||
trtllm
|
||||
GIT_REPOSITORY https://github.com/nvidia/TensorRT-LLM.git
|
||||
GIT_TAG v0.16.0
|
||||
GIT_TAG v0.17.0
|
||||
GIT_SHALLOW ON
|
||||
DOWNLOAD_EXTRACT_TIMESTAMP
|
||||
)
|
||||
|
@ -2,13 +2,13 @@
|
||||
|
||||
set -ex
|
||||
|
||||
TRT_VER_BASE="10.7.0"
|
||||
TRT_VER_FULL="${TRT_VER_BASE}.23"
|
||||
CUDA_VER="12.6"
|
||||
CUDNN_VER="9.5.0.50-1"
|
||||
NCCL_VER="2.22.3-1+cuda12.6"
|
||||
CUBLAS_VER="12.6.3.3-1"
|
||||
NVRTC_VER="12.6.77-1"
|
||||
TRT_VER_BASE="10.8.0"
|
||||
TRT_VER_FULL="${TRT_VER_BASE}.43"
|
||||
CUDA_VER="12.8"
|
||||
CUDNN_VER="9.7.0.66-1"
|
||||
NCCL_VER="2.25.1-1+cuda${CUDA_VER}"
|
||||
CUBLAS_VER="${CUDA_VER}.3.14-1"
|
||||
NVRTC_VER="${CUDA_VER}.61-1"
|
||||
|
||||
for i in "$@"; do
|
||||
case $i in
|
||||
@ -73,7 +73,7 @@ install_centos_requirements() {
|
||||
install_tensorrt() {
|
||||
#PY_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
|
||||
#PARSED_PY_VERSION=$(echo "${PY_VERSION//./}")
|
||||
TRT_CUDA_VERSION="12.6"
|
||||
TRT_CUDA_VERSION="12.8"
|
||||
|
||||
if [ -z "$RELEASE_URL_TRT" ];then
|
||||
ARCH=${TRT_TARGETARCH}
|
||||
|
Loading…
Reference in New Issue
Block a user