backend(trtllm): attempt to remove AWS S3 flaky cache for sccache

This commit is contained in:
Morgan Funtowicz 2025-01-24 15:50:28 +01:00
parent 6cb41a80a1
commit 556a61d143
2 changed files with 28 additions and 45 deletions

View File

@ -6,11 +6,11 @@ on:
hardware: hardware:
type: string type: string
description: Hardware description: Hardware
# options: # options:
# - cuda # - cuda
# - cuda-trtllm # - cuda-trtllm
# - rocm # - rocm
# - intel # - intel
required: true required: true
release-tests: release-tests:
description: "Run release integration tests" description: "Run release integration tests"
@ -41,19 +41,18 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Inject slug/short variables - name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4.4.1 uses: rlespinasse/github-slug-action@v4.4.1
- name: Inject required variables for sccache to interact with Github Actions Cache
uses: actions/github-script@v7
with:
script: |
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
- name: Extract TensorRT-LLM version - name: Extract TensorRT-LLM version
run: | run: |
echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV
echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}" echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}"
- name: "Configure AWS Credentials" - name: Construct hardware variables
id: aws-creds
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: us-east-1
role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }}
role-duration-seconds: 7200
output-credentials: true
- name: Construct harware variables
shell: bash shell: bash
run: | run: |
case ${{ inputs.hardware }} in case ${{ inputs.hardware }} in
@ -75,9 +74,6 @@ jobs:
export runs_on="ubuntu-latest" export runs_on="ubuntu-latest"
export platform="" export platform=""
export extra_pytest="" export extra_pytest=""
export target="ci-runtime"
export sccache_s3_key_prefix="trtllm"
export sccache_region="us-east-1"
export build_type="dev" export build_type="dev"
;; ;;
rocm) rocm)
@ -128,8 +124,6 @@ jobs:
echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
echo "TARGET=${target}" >> $GITHUB_ENV echo "TARGET=${target}" >> $GITHUB_ENV
echo "SCCACHE_S3_KEY_PREFIX=${sccache_s3_key_prefix}" >> $GITHUB_ENV
echo "SCCACHE_REGION=${sccache_region}" >> $GITHUB_ENV
echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV
- name: Initialize Docker Buildx - name: Initialize Docker Buildx
uses: docker/setup-buildx-action@v3 uses: docker/setup-buildx-action@v3
@ -196,13 +190,10 @@ jobs:
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }} DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
PLATFORM=${{ env.PLATFORM }} PLATFORM=${{ env.PLATFORM }}
build_type=${{ env.BUILD_TYPE }} build_type=${{ env.BUILD_TYPE }}
is_gha_build=true sccache_gha_enabled=on
aws_access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }} actions_cache_url=${{ env.ACTIONS_CACHE_URL }}
aws_secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }} actions_runtime_token=${{ env.ACTIONS_RUNTIME_TOKEN }}
aws_session_token=${{ steps.aws-creds.outputs.aws-session-token }}
sccache_bucket=${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }}
sccache_s3_key_prefix=${{ env.SCCACHE_S3_KEY_PREFIX }}
sccache_region=${{ env.SCCACHE_REGION }}
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min

View File

@ -1,7 +1,9 @@
ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real" ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real"
ARG ompi_version="4.1.7" ARG ompi_version="4.1.7"
ARG build_type=release ARG build_type=release
ARG is_gha_build=false ARG sccache_gha_enabled=no
ARG actions_cache_url=""
ARG actions_runtime_token=""
# CUDA dependent dependencies resolver stage # CUDA dependent dependencies resolver stage
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder
@ -59,8 +61,10 @@ FROM cuda-builder AS tgi-builder
WORKDIR /usr/src/text-generation-inference WORKDIR /usr/src/text-generation-inference
# Scoped global args reuse # Scoped global args reuse
ARG is_gha_build
ARG build_type ARG build_type
ARG sccache_gha_enabled
ARG actions_cache_url
ARG actions_runtime_token
# Install Rust # Install Rust
ENV PATH="/root/.cargo/bin:$PATH" ENV PATH="/root/.cargo/bin:$PATH"
@ -69,28 +73,17 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y &&
chmod -R a+w /root/.cargo && \ chmod -R a+w /root/.cargo && \
cargo install sccache --locked cargo install sccache --locked
# SCCACHE Specifics args - before finding a better, more generic, way...
ARG aws_access_key_id
ARG aws_secret_access_key
ARG aws_session_token
ARG sccache_bucket
ARG sccache_s3_key_prefix
ARG sccache_region
ENV AWS_ACCESS_KEY_ID=$aws_access_key_id
ENV AWS_SECRET_ACCESS_KEY=$aws_secret_access_key
ENV AWS_SESSION_TOKEN=$aws_session_token
ENV SCCACHE_BUCKET=$sccache_bucket
ENV SCCACHE_S3_KEY_PREFIX=$sccache_s3_key_prefix
ENV SCCACHE_REGION=$sccache_region
ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH" ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH"
ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig:$PKG_CONFIG_PATH" ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig:$PKG_CONFIG_PATH"
ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH" ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH"
ENV USE_LLD_LINKER=ON ENV USE_LLD_LINKER=ON
ENV CUDA_ARCH_LIST=${cuda_arch_list} ENV CUDA_ARCH_LIST=${cuda_arch_list}
ENV IS_GHA_BUILD=${is_gha_build}
# SCCACHE Specifics args - before finding a better, more generic, way...
ENV SCCACHE_GHA_ENABLE=${sccache_gha_enabled}
ENV ACTIONS_CACHE_URL=${actions_cache_url}
ENV ACTIONS_RUNTIME_TOKEN=${actions_runtime_token}
COPY Cargo.lock Cargo.lock COPY Cargo.lock Cargo.lock
COPY Cargo.toml Cargo.toml COPY Cargo.toml Cargo.toml
@ -103,7 +96,6 @@ COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \ RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
python3 backends/trtllm/scripts/setup_sccache.py --is-gha-build ${is_gha_build} && \
CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX \ CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX \
RUSTC_WRAPPER=sccache \ RUSTC_WRAPPER=sccache \
cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \ cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \