Fixing TRTLLM dockerfile. (#2922)

* Fixing TRTLLM dockerfile. * Fixed. * Creating a dummy modification to chekc CI runs. * Removing the cache directive. * Modifying this should cache hit. * Revert "Modifying this should cache hit." This reverts commit 46a2bde108. * Modifying this should cache hit. * Unwanted files.
2025-09-12 21:04:53 +00:00 · 2025-01-20 11:13:46 +01:00 · 2025-01-20 11:13:46 +01:00 · 447a5b2f87
commit 447a5b2f87
parent 630f198624
4 changed files with 16 additions and 100 deletions
--- a/.devcontainer/Dockerfile_trtllm
+++ b/.devcontainer/Dockerfile_trtllm
@ -1,75 +0,0 @@
 ARG CUDA_ARCH_LIST="75-real;80-real;86-real;89-real;90-real"
 ARG OMPI_VERSION="4.1.7rc1"
 # Build dependencies resolver stage
 FROM lukemathwalker/cargo-chef:latest AS chef
 WORKDIR /usr/src/text-generation-inference/backends/trtllm
 FROM chef AS planner
 COPY . .
 RUN cargo chef prepare --recipe-path recipe.json
 # CUDA dependent dependencies resolver stage
 FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    apt update && apt install -y \
    build-essential \
    cmake \
    curl \
    gcc-14  \
    g++-14 \
    git \
    git-lfs \
    libssl-dev \
    libucx-dev \
    ninja-build \
    pkg-config \
    pipx \
    python3 \
    python3-dev \
    python3-setuptools \
    tar \
    wget && \
    pipx ensurepath
 ENV TGI_INSTALL_PREFIX=/usr/local/tgi
 ENV TENSORRT_INSTALL_PREFIX=/usr/local/tensorrt
 # Install OpenMPI
 FROM cuda-builder AS mpi-builder
 ARG OMPI_VERSION
 ENV OMPI_TARBALL_FILENAME="openmpi-$OMPI_VERSION.tar.bz2"
 RUN wget "https://download.open-mpi.org/release/open-mpi/v4.1/$OMPI_TARBALL_FILENAME" -P /opt/src && \
    mkdir /usr/src/mpi && \
    tar -xf "/opt/src/$OMPI_TARBALL_FILENAME" -C /usr/src/mpi --strip-components=1 && \
    cd /usr/src/mpi && \
    ./configure --prefix=/usr/local/mpi --with-cuda=/usr/local/cuda --with-slurm && \
    make -j all && \
    make install && \
    rm -rf "/opt/src/$OMPI_TARBALL_FILENAME"
 # Install TensorRT
 FROM cuda-builder AS trt-builder
 COPY backends/trtllm/scripts/install_tensorrt.sh /opt/install_tensorrt.sh
 RUN chmod +x /opt/install_tensorrt.sh && \
    /opt/install_tensorrt.sh
 # Build Backend
 FROM cuda-builder AS tgi-builder
 WORKDIR /usr/src/text-generation-inference
 # Install Rust
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y && \
    chmod -R a+w /root/.rustup && \
    chmod -R a+w /root/.cargo
 ENV PATH="/root/.cargo/bin:$PATH"
 RUN cargo install cargo-chef
 COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
 COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
 ENV MPI_HOME=/usr/local/mpi
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@ -1,19 +0,0 @@
 // For format details, see https://aka.ms/devcontainer.json. For config options, see the
 // README at: https://github.com/devcontainers/templates/tree/main/src/cpp
 {
  "name": "CUDA",
  "build": {
    "dockerfile": "Dockerfile_trtllm",
    "context": ".."
  },
  "remoteEnv": {
    "PATH": "${containerEnv:PATH}:/usr/local/cuda/bin",
    "LD_LIBRARY_PATH": "$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64",
    "XLA_FLAGS": "--xla_gpu_cuda_data_dir=/usr/local/cuda"
  },
  "customizations" : {
    "jetbrains" : {
      "backend" : "CLion"
    }
  }
 }
--- a/20
+++ b/20
@ -6,15 +6,19 @@ FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
 WORKDIR /usr/src/text-generation-inference/backends/trtllm
 FROM chef AS planner
-COPY . .
+COPY Cargo.lock Cargo.lock
 COPY Cargo.toml Cargo.toml
 COPY rust-toolchain.toml rust-toolchain.toml
 COPY router router
 COPY benchmark/ benchmark/
 COPY backends/ backends/
 COPY launcher/ launcher/
 RUN cargo chef prepare --recipe-path recipe.json
 # CUDA dependent dependencies resolver stage
 FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder
-RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    apt update && apt install -y \
    build-essential \
    cmake \
    curl \
@ -31,7 +35,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    python3-dev \
    python3-setuptools \
    tar \
-    wget && \
+    wget --no-install-recommends && \
    pipx ensurepath
 ENV TGI_INSTALL_PREFIX=/usr/local/tgi
@ -79,7 +83,11 @@ ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH"
 ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH"
 ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig:$PKG_CONFIG_PATH"
-COPY . .
+COPY Cargo.lock Cargo.lock
 COPY Cargo.toml Cargo.toml
 COPY rust-toolchain.toml rust-toolchain.toml
 COPY router router
 COPY backends/trtllm backends/trtllm
 COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
 COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
 RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@ -9,6 +9,8 @@ from enum import Enum
 from huggingface_hub import hf_hub_download
 from text_generation_server.utils.adapter import parse_lora_adapters
 # Dummy change should cache hit.
 app = typer.Typer()