mirror of
				https://github.com/huggingface/text-generation-inference.git
				synced 2025-10-20 20:35:24 +00:00 
			
		
		
		
	* Ensure that `sccache` version is 0.10.0 or higher * Rename `ACTIONS_CACHE_URL` to `ACTIONS_RESULTS_URL`
		
			
				
	
	
		
			159 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			159 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real;100-real;120-real"
 | |
| ARG cuda_base=12.8.0
 | |
| ARG build_type=release
 | |
| ARG ompi_version=4.1.7
 | |
| ARG sccache_gha_enabled=off
 | |
| ARG actions_results_url=""
 | |
| ARG actions_runtime_token=""
 | |
| 
 | |
| # CUDA dependent dependencies resolver stage
 | |
| FROM nvidia/cuda:${cuda_base}-cudnn-devel-ubuntu24.04 AS cuda-builder
 | |
| 
 | |
| RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
 | |
|     build-essential \
 | |
|     cmake \
 | |
|     curl \
 | |
|     gcc-14  \
 | |
|     g++-14 \
 | |
|     git \
 | |
|     git-lfs \
 | |
|     lld \
 | |
|     libssl-dev \
 | |
|     libucx-dev \
 | |
|     libasan8 \
 | |
|     libubsan1 \
 | |
|     ninja-build \
 | |
|     pkg-config \
 | |
|     pipx \
 | |
|     python3 \
 | |
|     python3-dev \
 | |
|     python3-setuptools \
 | |
|     tar \
 | |
|     wget --no-install-recommends && \
 | |
|     pipx ensurepath
 | |
| 
 | |
| ENV TGI_INSTALL_PREFIX=/usr/local/tgi
 | |
| ENV TENSORRT_INSTALL_PREFIX=/usr/local/tensorrt
 | |
| 
 | |
| # Install OpenMPI
 | |
| FROM cuda-builder AS mpi-builder
 | |
| WORKDIR /opt/src/mpi
 | |
| 
 | |
| ARG ompi_version
 | |
| ENV OMPI_VERSION=${ompi_version}
 | |
| ENV OMPI_TARBALL_FILENAME=openmpi-${OMPI_VERSION}.tar.bz2
 | |
| ADD --checksum=sha256:54a33cb7ad81ff0976f15a6cc8003c3922f0f3d8ceed14e1813ef3603f22cd34 \
 | |
|     https://download.open-mpi.org/release/open-mpi/v4.1/${OMPI_TARBALL_FILENAME} .
 | |
| 
 | |
| RUN tar --strip-components=1 -xf ${OMPI_TARBALL_FILENAME} &&\
 | |
|     ./configure --prefix=/usr/local/mpi --with-cuda=/usr/local/cuda --with-slurm && \
 | |
|     make -j all && \
 | |
|     make install && \
 | |
|     rm -rf ${OMPI_TARBALL_FILENAME}/..
 | |
| 
 | |
| # Install TensorRT
 | |
| FROM cuda-builder AS trt-builder
 | |
| COPY backends/trtllm/scripts/install_tensorrt.sh /opt/install_tensorrt.sh
 | |
| RUN chmod +x /opt/install_tensorrt.sh && \
 | |
|     /opt/install_tensorrt.sh
 | |
| 
 | |
| # Build Backend
 | |
| FROM cuda-builder AS tgi-builder
 | |
| WORKDIR /usr/src/text-generation-inference
 | |
| 
 | |
| # Scoped global args reuse
 | |
| ARG cuda_arch_list
 | |
| ARG build_type
 | |
| ARG sccache_gha_enabled
 | |
| ARG actions_results_url
 | |
| ARG actions_runtime_token
 | |
| 
 | |
| # Install Rust
 | |
| ENV PATH="/root/.cargo/bin:$PATH"
 | |
| RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.85.1 --profile minimal -y && \
 | |
|     chmod -R a+w /root/.rustup && \
 | |
|     chmod -R a+w /root/.cargo && \
 | |
|     cargo install sccache --version ">=0.10.0" --locked
 | |
| 
 | |
| ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH"
 | |
| ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig"
 | |
| ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt"
 | |
| 
 | |
| ENV USE_LLD_LINKER=ON
 | |
| ENV CUDA_ARCH_LIST=${cuda_arch_list}
 | |
| 
 | |
| # SCCACHE Specifics args - before finding a better, more generic, way...
 | |
| ENV SCCACHE_GHA_ENABLED=${sccache_gha_enabled}
 | |
| ENV ACTIONS_RESULTS_URL=${actions_results_url}
 | |
| ENV ACTIONS_RUNTIME_TOKEN=${actions_runtime_token}
 | |
| 
 | |
| COPY Cargo.lock Cargo.lock
 | |
| COPY Cargo.toml Cargo.toml
 | |
| COPY rust-toolchain.toml rust-toolchain.toml
 | |
| COPY router router
 | |
| COPY backends backends
 | |
| COPY benchmark benchmark
 | |
| COPY launcher launcher
 | |
| COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
 | |
| COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
 | |
| 
 | |
| ENV RUSTC_WRAPPER=sccache
 | |
| ENV CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX
 | |
| RUN export CC=gcc-14 \
 | |
|     export CXX=g++-14 \
 | |
|     export CMAKE_C_COMPILER_LAUNCHER=sccache && \
 | |
|     export CMAKE_CXX_COMPILER_LAUNCHER=sccache && \
 | |
|     export CMAKE_CUDA_COMPILER_LAUNCHER=sccache && \
 | |
|     mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
 | |
|     cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \
 | |
|     sccache --show-stats
 | |
| 
 | |
| FROM nvidia/cuda:${cuda_base}-cudnn-runtime-ubuntu24.04 AS runtime
 | |
| RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
 | |
|     rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
 | |
|     pipx ensurepath && \
 | |
|     pipx install --include-deps transformers tokenizers
 | |
| 
 | |
| WORKDIR /usr/local/tgi/bin
 | |
| 
 | |
| ENV PATH=/root/.local/share/pipx/venvs/transformers/bin/:$PATH
 | |
| ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/mpi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
 | |
| ENV TOKENIZERS_PARALLELISM=false
 | |
| ENV OMPI_MCA_plm_rsh_agent=""
 | |
| 
 | |
| COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
 | |
| COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
 | |
| COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi
 | |
| COPY --from=tgi-builder /usr/src/text-generation-inference/target/release/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher
 | |
| 
 | |
| # This is used only for the CI/CD
 | |
| FROM nvidia/cuda:${cuda_base}-cudnn-runtime-ubuntu24.04 AS ci-runtime
 | |
| RUN apt update && apt install -y libasan8 libubsan1 libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
 | |
|     rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
 | |
|     pipx ensurepath && \
 | |
|     pipx install --include-deps transformers tokenizers
 | |
| 
 | |
| WORKDIR /usr/local/tgi/bin
 | |
| 
 | |
| ENV PATH=/root/.local/share/pipx/venvs/transformers/bin/:$PATH
 | |
| ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/mpi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
 | |
| ENV TOKENIZERS_PARALLELISM=false
 | |
| ENV OMPI_MCA_plm_rsh_agent=""
 | |
| 
 | |
| COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
 | |
| COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
 | |
| COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi
 | |
| 
 | |
| # Basically we copy from target/debug instead of target/release
 | |
| COPY --from=tgi-builder /usr/src/text-generation-inference/target/debug/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher
 | |
| 
 | |
| # This is the final image
 | |
| FROM runtime
 | |
| 
 | |
| LABEL co.huggingface.vendor="Hugging Face Inc."
 | |
| LABEL org.opencontainers.image.authors="hardware@hf.co"
 | |
| LABEL org.opencontainers.title="Text-Generation-Inference TensorRT-LLM Backend"
 | |
| 
 | |
| ENTRYPOINT ["./text-generation-launcher"]
 | |
| CMD ["--executor-worker", "/usr/local/tgi/bin/executorWorker"]
 |