From 80c357ab59d8768d9a8fe26a722346a8f4a48213 Mon Sep 17 00:00:00 2001
From: Morgan Funtowicz <funtowiczmo@gmail.com>
Date: Thu, 19 Dec 2024 09:42:43 +0100
Subject: [PATCH] misc(backend): let's build for ci-runtime

---
 .github/workflows/build_trtllm.yaml |  2 +-
 Dockerfile_trtllm                   | 25 ++++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_trtllm.yaml b/.github/workflows/build_trtllm.yaml
index 88bd2834..12c86faa 100644
--- a/.github/workflows/build_trtllm.yaml
+++ b/.github/workflows/build_trtllm.yaml
@@ -58,7 +58,7 @@ jobs:
         with:
           context: .
           file: Dockerfile_trtllm
-          target: runtime
+          target: ci-runtime
           push: false
           load: true
           platforms: 'linux/amd64'
diff --git a/Dockerfile_trtllm b/Dockerfile_trtllm
index 89e4fe5a..f537b350 100644
--- a/Dockerfile_trtllm
+++ b/Dockerfile_trtllm
@@ -88,7 +88,7 @@ COPY . .
 COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
 COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
 RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
-    python3 scripts/setup_sccache.py --is-gha-build ${is_gha_build} -k ${aws_access_key_id} -s ${aws_secret_key_id} -t ${aws_session_token} -b ${sccache_bucket} -r ${sscache_region} -p ${sccache_s3_key_prefix } && \
+#    python3 scripts/setup_sccache.py --is-gha-build ${is_gha_build} -k ${aws_access_key_id} -s ${aws_secret_key_id} -t ${aws_session_token} -b ${sccache_bucket} -r ${sscache_region} -p ${sccache_s3_key_prefix } && \
     RUSTC_WRAPPER=sccache CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm
 
 FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime
@@ -99,6 +99,8 @@ RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python
 
 WORKDIR /usr/local/tgi/bin
 
+ARG build_type
+
 ENV PATH=/root/.local/share/pipx/venvs/transformers/bin/:$PATH
 ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/mpi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
 ENV TOKENIZERS_PARALLELISM=false
@@ -116,3 +118,24 @@ LABEL org.opencontainers.image.authors="hardware@hf.co"
 
 ENTRYPOINT ["./text-generation-launcher"]
 CMD ["--executor-worker", "/usr/local/tgi/bin/executorWorker"]
+
+# This is used only for the CI/CD
+FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS ci-runtime
+RUN apt update && apt install -y libasan8 libubsan1 libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
+    rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
+    pipx ensurepath && \
+    pipx install --include-deps transformers tokenizers
+
+WORKDIR /usr/local/tgi/bin
+
+ENV PATH=/root/.local/share/pipx/venvs/transformers/bin/:$PATH
+ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/mpi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
+ENV TOKENIZERS_PARALLELISM=false
+ENV OMPI_MCA_plm_rsh_agent=""
+
+COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
+COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
+COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi
+
+# Basically we copy from target/debug instead of target/release
+COPY --from=tgi-builder /usr/src/text-generation-inference/target/debug/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher
\ No newline at end of file