mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Merge TRTLLM in standard CI
This commit is contained in:
parent
0159843449
commit
d969dad634
31
.github/workflows/build.yaml
vendored
31
.github/workflows/build.yaml
vendored
@ -40,6 +40,18 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
- name: Inject slug/short variables
|
- name: Inject slug/short variables
|
||||||
uses: rlespinasse/github-slug-action@v4.4.1
|
uses: rlespinasse/github-slug-action@v4.4.1
|
||||||
|
- name: Extract TensorRT-LLM version
|
||||||
|
run: |
|
||||||
|
echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV
|
||||||
|
echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}"
|
||||||
|
- name: "Configure AWS Credentials"
|
||||||
|
id: aws-creds
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
aws-region: us-east-1
|
||||||
|
role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }}
|
||||||
|
role-duration-seconds: 7200
|
||||||
|
output-credentials: true
|
||||||
- name: Construct harware variables
|
- name: Construct harware variables
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -52,6 +64,7 @@ jobs:
|
|||||||
export runs_on="aws-g6-12xl-plus-priv-cache"
|
export runs_on="aws-g6-12xl-plus-priv-cache"
|
||||||
export platform=""
|
export platform=""
|
||||||
export extra_pytest=""
|
export extra_pytest=""
|
||||||
|
export target="nil"
|
||||||
;;
|
;;
|
||||||
cuda-trtllm)
|
cuda-trtllm)
|
||||||
export dockerfile="Dockerfile_trtllm"
|
export dockerfile="Dockerfile_trtllm"
|
||||||
@ -61,6 +74,11 @@ jobs:
|
|||||||
export runs_on="ubuntu-latest"
|
export runs_on="ubuntu-latest"
|
||||||
export platform=""
|
export platform=""
|
||||||
export extra_pytest=""
|
export extra_pytest=""
|
||||||
|
export target="ci-runtime"
|
||||||
|
export sccache_s3_key_prefix="trtllm"
|
||||||
|
export sccache_region="us-east-1"
|
||||||
|
export build_type="dev"
|
||||||
|
export is_gha_build="TRUE"
|
||||||
;;
|
;;
|
||||||
rocm)
|
rocm)
|
||||||
export dockerfile="Dockerfile_amd"
|
export dockerfile="Dockerfile_amd"
|
||||||
@ -71,6 +89,7 @@ jobs:
|
|||||||
export runs_on="ubuntu-latest"
|
export runs_on="ubuntu-latest"
|
||||||
export platform=""
|
export platform=""
|
||||||
export extra_pytest="-k test_flash_gemma_gptq_load"
|
export extra_pytest="-k test_flash_gemma_gptq_load"
|
||||||
|
export target="nil"
|
||||||
;;
|
;;
|
||||||
intel-xpu)
|
intel-xpu)
|
||||||
export dockerfile="Dockerfile_intel"
|
export dockerfile="Dockerfile_intel"
|
||||||
@ -80,6 +99,7 @@ jobs:
|
|||||||
export runs_on="ubuntu-latest"
|
export runs_on="ubuntu-latest"
|
||||||
export platform="xpu"
|
export platform="xpu"
|
||||||
export extra_pytest=""
|
export extra_pytest=""
|
||||||
|
export target="nil"
|
||||||
;;
|
;;
|
||||||
intel-cpu)
|
intel-cpu)
|
||||||
export dockerfile="Dockerfile_intel"
|
export dockerfile="Dockerfile_intel"
|
||||||
@ -90,6 +110,7 @@ jobs:
|
|||||||
export runs_on="aws-highmemory-32-plus-priv"
|
export runs_on="aws-highmemory-32-plus-priv"
|
||||||
export platform="cpu"
|
export platform="cpu"
|
||||||
export extra_pytest="-k test_flash_gemma_simple"
|
export extra_pytest="-k test_flash_gemma_simple"
|
||||||
|
export target="nil"
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
echo $dockerfile
|
echo $dockerfile
|
||||||
@ -106,6 +127,11 @@ jobs:
|
|||||||
echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV
|
echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV
|
||||||
echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
|
echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
|
||||||
echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
|
echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
|
||||||
|
echo "TARGET=${target}" >> $GITHUB_ENV
|
||||||
|
echo "SCCACHE_S3_KEY_PREFIX=${sccache_s3_key_prefix}" >> $GITHUB_ENV
|
||||||
|
echo "SCCACHE_REGION=${sccache_region}" >> $GITHUB_ENV
|
||||||
|
echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV
|
||||||
|
echo "IS_GHA_BUILD=${is_gha_build}" >> $GITHUB_ENV
|
||||||
- name: Initialize Docker Buildx
|
- name: Initialize Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v3
|
||||||
with:
|
with:
|
||||||
@ -170,6 +196,11 @@ jobs:
|
|||||||
GIT_SHA=${{ env.GITHUB_SHA }}
|
GIT_SHA=${{ env.GITHUB_SHA }}
|
||||||
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
|
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
|
||||||
PLATFORM=${{ env.PLATFORM }}
|
PLATFORM=${{ env.PLATFORM }}
|
||||||
|
build_type=${{ env.BUILD_TYPE }}
|
||||||
|
is_gha_build=${{ env.IS_GHA_BUILD }}
|
||||||
|
sccache_bucket=${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }}
|
||||||
|
sccache_s3_key_prefix=${{ env.SCCACHE_S3_KEY_PREFIX }}
|
||||||
|
sccache_region=${{ env.SCCACHE_REGION }}
|
||||||
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
|
||||||
cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
|
cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
|
||||||
|
12
.github/workflows/ci_build.yaml
vendored
12
.github/workflows/ci_build.yaml
vendored
@ -37,7 +37,7 @@ jobs:
|
|||||||
# fail-fast is true by default
|
# fail-fast is true by default
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
hardware: ["cuda", "rocm", "intel-xpu", "intel-cpu"]
|
hardware: ["cuda", "cuda-trtllm", "rocm", "intel-xpu", "intel-cpu"]
|
||||||
uses: ./.github/workflows/build.yaml # calls the one above ^
|
uses: ./.github/workflows/build.yaml # calls the one above ^
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
@ -47,13 +47,3 @@ jobs:
|
|||||||
# https://github.com/actions/runner/issues/2206
|
# https://github.com/actions/runner/issues/2206
|
||||||
release-tests: ${{ inputs.release-tests == true }}
|
release-tests: ${{ inputs.release-tests == true }}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
build-trtllm:
|
|
||||||
uses: ./.github/workflows/build_trtllm.yaml
|
|
||||||
with:
|
|
||||||
runs-on: aws-highmemory-64-plus-priv
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
packages: write
|
|
||||||
id-token: write
|
|
||||||
secrets: inherit
|
|
||||||
|
Loading…
Reference in New Issue
Block a user