Merge TRTLLM in standard CI

2025-09-11 12:24:53 +00:00 · 2025-01-16 00:59:28 +01:00 · 2025-01-16 00:59:28 +01:00 · d969dad634
commit d969dad634
parent 0159843449
2 changed files with 32 additions and 11 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -40,6 +40,18 @@ jobs:
        uses: actions/checkout@v4
      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4.4.1
      - name: Extract TensorRT-LLM version
        run: |
          echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV
          echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}"
      - name: "Configure AWS Credentials"
        id: aws-creds
        uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-region: us-east-1
          role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }}
          role-duration-seconds: 7200
          output-credentials: true
      - name: Construct harware variables
        shell: bash
        run: |
@ -52,6 +64,7 @@ jobs:
                export runs_on="aws-g6-12xl-plus-priv-cache"
                export platform=""
                export extra_pytest=""
                export target="nil" 
                ;;
            cuda-trtllm)
                export dockerfile="Dockerfile_trtllm"
@ -61,6 +74,11 @@ jobs:
                export runs_on="ubuntu-latest"
                export platform=""
                export extra_pytest=""
                export target="ci-runtime"
                export sccache_s3_key_prefix="trtllm"
                export sccache_region="us-east-1"
                export build_type="dev"
                export is_gha_build="TRUE"
                ;;
            rocm)
                export dockerfile="Dockerfile_amd"
@ -71,6 +89,7 @@ jobs:
                export runs_on="ubuntu-latest"
                export platform=""
                export extra_pytest="-k test_flash_gemma_gptq_load"
                export target="nil"
                ;;
            intel-xpu)
                export dockerfile="Dockerfile_intel"
@ -80,6 +99,7 @@ jobs:
                export runs_on="ubuntu-latest"
                export platform="xpu"
                export extra_pytest=""
                export target="nil"
                ;;
            intel-cpu)
                export dockerfile="Dockerfile_intel"
@ -90,6 +110,7 @@ jobs:
                export runs_on="aws-highmemory-32-plus-priv"
                export platform="cpu"
                export extra_pytest="-k test_flash_gemma_simple"
                export target="nil"
                ;;
          esac
          echo $dockerfile
@ -106,6 +127,11 @@ jobs:
          echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV
          echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
          echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
          echo "TARGET=${target}" >> $GITHUB_ENV
          echo "SCCACHE_S3_KEY_PREFIX=${sccache_s3_key_prefix}" >> $GITHUB_ENV
          echo "SCCACHE_REGION=${sccache_region}" >> $GITHUB_ENV
          echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV
          echo "IS_GHA_BUILD=${is_gha_build}" >> $GITHUB_ENV
      - name: Initialize Docker Buildx
        uses: docker/setup-buildx-action@v3
        with:
@ -170,6 +196,11 @@ jobs:
            GIT_SHA=${{ env.GITHUB_SHA }}
            DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
            PLATFORM=${{ env.PLATFORM }}
            build_type=${{ env.BUILD_TYPE }}
            is_gha_build=${{ env.IS_GHA_BUILD }}
            sccache_bucket=${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }}
            sccache_s3_key_prefix=${{ env.SCCACHE_S3_KEY_PREFIX }}
            sccache_region=${{ env.SCCACHE_REGION }}
          tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
          cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
--- a/.github/workflows/ci_build.yaml
+++ b/.github/workflows/ci_build.yaml
@ -37,7 +37,7 @@ jobs:
      # fail-fast is true by default
      fail-fast: false
      matrix:
-        hardware: ["cuda", "rocm", "intel-xpu", "intel-cpu"]
+        hardware: ["cuda", "cuda-trtllm", "rocm", "intel-xpu", "intel-cpu"]
    uses: ./.github/workflows/build.yaml # calls the one above ^
    permissions:
      contents: write
@ -47,13 +47,3 @@ jobs:
      # https://github.com/actions/runner/issues/2206
      release-tests: ${{ inputs.release-tests == true }}
    secrets: inherit
  build-trtllm:
    uses: ./.github/workflows/build_trtllm.yaml
    with:
      runs-on: aws-highmemory-64-plus-priv
    permissions:
      contents: write
      packages: write
      id-token: write
    secrets: inherit