From 80c357ab59d8768d9a8fe26a722346a8f4a48213 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 19 Dec 2024 09:42:43 +0100 Subject: [PATCH] misc(backend): let's build for ci-runtime --- .github/workflows/build_trtllm.yaml | 2 +- Dockerfile_trtllm | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_trtllm.yaml b/.github/workflows/build_trtllm.yaml index 88bd2834..12c86faa 100644 --- a/.github/workflows/build_trtllm.yaml +++ b/.github/workflows/build_trtllm.yaml @@ -58,7 +58,7 @@ jobs: with: context: . file: Dockerfile_trtllm - target: runtime + target: ci-runtime push: false load: true platforms: 'linux/amd64' diff --git a/Dockerfile_trtllm b/Dockerfile_trtllm index 89e4fe5a..f537b350 100644 --- a/Dockerfile_trtllm +++ b/Dockerfile_trtllm @@ -88,7 +88,7 @@ COPY . . COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \ - python3 scripts/setup_sccache.py --is-gha-build ${is_gha_build} -k ${aws_access_key_id} -s ${aws_secret_key_id} -t ${aws_session_token} -b ${sccache_bucket} -r ${sscache_region} -p ${sccache_s3_key_prefix } && \ +# python3 scripts/setup_sccache.py --is-gha-build ${is_gha_build} -k ${aws_access_key_id} -s ${aws_secret_key_id} -t ${aws_session_token} -b ${sccache_bucket} -r ${sscache_region} -p ${sccache_s3_key_prefix } && \ RUSTC_WRAPPER=sccache CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime @@ -99,6 +99,8 @@ RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python WORKDIR /usr/local/tgi/bin +ARG build_type + ENV PATH=/root/.local/share/pipx/venvs/transformers/bin/:$PATH ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/mpi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" ENV TOKENIZERS_PARALLELISM=false @@ -116,3 +118,24 @@ LABEL org.opencontainers.image.authors="hardware@hf.co" ENTRYPOINT ["./text-generation-launcher"] CMD ["--executor-worker", "/usr/local/tgi/bin/executorWorker"] + +# This is used only for the CI/CD +FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS ci-runtime +RUN apt update && apt install -y libasan8 libubsan1 libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \ + rm -rf /var/lib/{apt,dpkg,cache,log}/ && \ + pipx ensurepath && \ + pipx install --include-deps transformers tokenizers + +WORKDIR /usr/local/tgi/bin + +ENV PATH=/root/.local/share/pipx/venvs/transformers/bin/:$PATH +ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/mpi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" +ENV TOKENIZERS_PARALLELISM=false +ENV OMPI_MCA_plm_rsh_agent="" + +COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi +COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt +COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi + +# Basically we copy from target/debug instead of target/release +COPY --from=tgi-builder /usr/src/text-generation-inference/target/debug/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher \ No newline at end of file