From 556a61d143f8ba86d600c90eb22c1b196a25a49f Mon Sep 17 00:00:00 2001
From: Morgan Funtowicz <funtowiczmo@gmail.com>
Date: Fri, 24 Jan 2025 15:50:28 +0100
Subject: [PATCH] backend(trtllm): attempt to remove AWS S3 flaky cache for
 sccache

---
 .github/workflows/build.yaml | 43 ++++++++++++++----------------------
 Dockerfile_trtllm            | 30 +++++++++----------------
 2 files changed, 28 insertions(+), 45 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 27907e39..92696497 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -6,11 +6,11 @@ on:
       hardware:
         type: string
         description: Hardware
-          # options:
-          # - cuda
-          # - cuda-trtllm
-          # - rocm
-          # - intel
+        # options:
+        # - cuda
+        # - cuda-trtllm
+        # - rocm
+        # - intel
         required: true
       release-tests:
         description: "Run release integration tests"
@@ -41,19 +41,18 @@ jobs:
         uses: actions/checkout@v4
       - name: Inject slug/short variables
         uses: rlespinasse/github-slug-action@v4.4.1
+      - name: Inject required variables for sccache to interact with Github Actions Cache
+        uses: actions/github-script@v7
+        with:
+          script: |
+            core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
+            core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
+
       - name: Extract TensorRT-LLM version
         run: |
           echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV
           echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}"
-      - name: "Configure AWS Credentials"
-        id: aws-creds
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: us-east-1
-          role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }}
-          role-duration-seconds: 7200
-          output-credentials: true
-      - name: Construct harware variables
+      - name: Construct hardware variables
         shell: bash
         run: |
           case ${{ inputs.hardware }} in
@@ -75,9 +74,6 @@ jobs:
                 export runs_on="ubuntu-latest"
                 export platform=""
                 export extra_pytest=""
-                export target="ci-runtime"
-                export sccache_s3_key_prefix="trtllm"
-                export sccache_region="us-east-1"
                 export build_type="dev"
                 ;;
             rocm)
@@ -128,8 +124,6 @@ jobs:
           echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
           echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
           echo "TARGET=${target}" >> $GITHUB_ENV
-          echo "SCCACHE_S3_KEY_PREFIX=${sccache_s3_key_prefix}" >> $GITHUB_ENV
-          echo "SCCACHE_REGION=${sccache_region}" >> $GITHUB_ENV
           echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV
       - name: Initialize Docker Buildx
         uses: docker/setup-buildx-action@v3
@@ -196,13 +190,10 @@ jobs:
             DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
             PLATFORM=${{ env.PLATFORM }}
             build_type=${{ env.BUILD_TYPE }}
-            is_gha_build=true
-            aws_access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }}
-            aws_secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }}
-            aws_session_token=${{ steps.aws-creds.outputs.aws-session-token }}
-            sccache_bucket=${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }}
-            sccache_s3_key_prefix=${{ env.SCCACHE_S3_KEY_PREFIX }}
-            sccache_region=${{ env.SCCACHE_REGION }}
+            sccache_gha_enabled=on
+            actions_cache_url=${{ env.ACTIONS_CACHE_URL }}
+            actions_runtime_token=${{ env.ACTIONS_RUNTIME_TOKEN }}
+
           tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
           cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
diff --git a/Dockerfile_trtllm b/Dockerfile_trtllm
index dd977a81..c3cd1f46 100644
--- a/Dockerfile_trtllm
+++ b/Dockerfile_trtllm
@@ -1,7 +1,9 @@
 ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real"
 ARG ompi_version="4.1.7"
 ARG build_type=release
-ARG is_gha_build=false
+ARG sccache_gha_enabled=no
+ARG actions_cache_url=""
+ARG actions_runtime_token=""
 
 # CUDA dependent dependencies resolver stage
 FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder
@@ -59,8 +61,10 @@ FROM cuda-builder AS tgi-builder
 WORKDIR /usr/src/text-generation-inference
 
 # Scoped global args reuse
-ARG is_gha_build
 ARG build_type
+ARG sccache_gha_enabled
+ARG actions_cache_url
+ARG actions_runtime_token
 
 # Install Rust
 ENV PATH="/root/.cargo/bin:$PATH"
@@ -69,28 +73,17 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y &&
     chmod -R a+w /root/.cargo && \
     cargo install sccache --locked
 
-# SCCACHE Specifics args - before finding a better, more generic, way...
-ARG aws_access_key_id
-ARG aws_secret_access_key
-ARG aws_session_token
-ARG sccache_bucket
-ARG sccache_s3_key_prefix
-ARG sccache_region
-
-ENV AWS_ACCESS_KEY_ID=$aws_access_key_id
-ENV AWS_SECRET_ACCESS_KEY=$aws_secret_access_key
-ENV AWS_SESSION_TOKEN=$aws_session_token
-ENV SCCACHE_BUCKET=$sccache_bucket
-ENV SCCACHE_S3_KEY_PREFIX=$sccache_s3_key_prefix
-ENV SCCACHE_REGION=$sccache_region
-
 ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH"
 ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig:$PKG_CONFIG_PATH"
 ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH"
 
 ENV USE_LLD_LINKER=ON
 ENV CUDA_ARCH_LIST=${cuda_arch_list}
-ENV IS_GHA_BUILD=${is_gha_build}
+
+# SCCACHE Specifics args - before finding a better, more generic, way...
+ENV SCCACHE_GHA_ENABLE=${sccache_gha_enabled}
+ENV ACTIONS_CACHE_URL=${actions_cache_url}
+ENV ACTIONS_RUNTIME_TOKEN=${actions_runtime_token}
 
 COPY Cargo.lock Cargo.lock
 COPY Cargo.toml Cargo.toml
@@ -103,7 +96,6 @@ COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
 COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
 
 RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
-    python3 backends/trtllm/scripts/setup_sccache.py --is-gha-build ${is_gha_build} && \
     CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX \
     RUSTC_WRAPPER=sccache \
     cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \