misc(backend): let's build for ci-runtime

2025-09-11 12:24:53 +00:00 · 2024-12-19 09:42:43 +01:00 · 2024-12-19 09:42:43 +01:00 · a5e3e6ac24
commit a5e3e6ac24
parent 076457afb5
2 changed files with 25 additions and 2 deletions
--- a/.github/workflows/build_trtllm.yaml
+++ b/.github/workflows/build_trtllm.yaml
@ -58,7 +58,7 @@ jobs:
        with:
          context: .
          file: Dockerfile_trtllm
-          target: runtime
+          target: ci-runtime
          push: false
          load: true
          platforms: 'linux/amd64'
--- a/25
+++ b/25
@ -88,7 +88,7 @@ COPY . .
 COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
 COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
 RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
-    python3 scripts/setup_sccache.py --is-gha-build ${is_gha_build} -k ${aws_access_key_id} -s ${aws_secret_key_id} -t ${aws_session_token} -b ${sccache_bucket} -r ${sscache_region} -p ${sccache_s3_key_prefix } && \
+#    python3 scripts/setup_sccache.py --is-gha-build ${is_gha_build} -k ${aws_access_key_id} -s ${aws_secret_key_id} -t ${aws_session_token} -b ${sccache_bucket} -r ${sscache_region} -p ${sccache_s3_key_prefix } && \
    RUSTC_WRAPPER=sccache CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm

 FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime
@ -99,6 +99,8 @@ RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python

 WORKDIR /usr/local/tgi/bin

+ARG build_type
+
 ENV PATH=/root/.local/share/pipx/venvs/transformers/bin/:$PATH
 ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/mpi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
 ENV TOKENIZERS_PARALLELISM=false
@ -116,3 +118,24 @@ LABEL org.opencontainers.image.authors="hardware@hf.co"

 ENTRYPOINT ["./text-generation-launcher"]
 CMD ["--executor-worker", "/usr/local/tgi/bin/executorWorker"]
+
+# This is used only for the CI/CD
+FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS ci-runtime
+RUN apt update && apt install -y libasan8 libubsan1 libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
+    rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
+    pipx ensurepath && \
+    pipx install --include-deps transformers tokenizers
+
+WORKDIR /usr/local/tgi/bin
+
+ENV PATH=/root/.local/share/pipx/venvs/transformers/bin/:$PATH
+ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/mpi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
+ENV TOKENIZERS_PARALLELISM=false
+ENV OMPI_MCA_plm_rsh_agent=""
+
+COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
+COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
+COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi
+
+# Basically we copy from target/debug instead of target/release
+COPY --from=tgi-builder /usr/src/text-generation-inference/target/debug/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher