diff --git a/.github/workflows/build_trtllm.yaml b/.github/workflows/build_trtllm.yaml index 7e992024..bb300dde 100644 --- a/.github/workflows/build_trtllm.yaml +++ b/.github/workflows/build_trtllm.yaml @@ -16,7 +16,7 @@ on: value: ${{ jobs.build-and-push.outputs.label }} jobs: - build-trtllm-and-push: + build-and-push: permissions: contents: read # Required to check out repository. id-token: write # Required to authenticate via OIDC. @@ -131,4 +131,28 @@ jobs: echo "docker_image=registry.internal.huggingface.tech/api-inference/community/text-generation-inference/tensorrt-llm:sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}" >> "$GITHUB_OUTPUT" echo "label=${{ env.LABEL }}" >> "$GITHUB_OUTPUT" + run-tests: + needs: build-and-push + concurrency: + group: ${{ github.workflow }}-${{ github.job }}-trtllm-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + runs-on: + group: aws-g6-12xl-plus-priv-cache + container: + image: ${{ needs.build-and-push.outputs.docker_image }} + credentials: + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_PASSWORD }} + options: --gpus all --shm-size=8g + + steps: + - name: whoami + run: | + echo "Container: ${{ needs.build-and-push.outputs.docker_image }}" + + - name: List binaries + run: ls -alh /usr/local/tgi + + - name: Run C++/CUDA tests + run: /usr/local/tgi/bin/tgi_trtllm_backend_tests diff --git a/.github/workflows/trtllm_tests.yaml b/.github/workflows/trtllm_tests.yaml deleted file mode 100644 index 9c9048f6..00000000 --- a/.github/workflows/trtllm_tests.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: "TensorRT-LLM C/C++/CUDA Test Suite" -on: - push: - branches: - - 'main' - - 'trtllm/ci' - tags: - - 'v*' - pull_request: - paths: - - "backends/trtllm" - - "server/**" - - "proto/**" - - "router/**" - - "launcher/**" - - "Cargo.lock" - - "rust-toolchain.toml" - - ".github/workflows/build_trtllm.yaml" - - ".github/workflows/trtllm_tests.yaml" - branches: - - "main" - -jobs: - build-backend: - permissions: - contents: write - packages: write - id-token: write - uses: ./.github/workflows/build_trtllm.yaml - with: - runs-on: aws-highmemory-64-plus-priv - secrets: inherit - - run-tests: - needs: build-backend - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-trtllm-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: - group: aws-g6-12xl-plus-priv-cache - container: - image: ${{ needs.build-backend.outputs.docker_image }} - credentials: - username: ${{ secrets.REGISTRY_USERNAME }} - password: ${{ secrets.REGISTRY_PASSWORD }} - options: --gpus all --shm-size=8g - - steps: - - name: whoami - run: | - echo "Container: ${{ needs.build-backend.outputs.docker_image }}" - - name: List binaries - run: ls -alh /usr/local/tgi - - - name: Run C++/CUDA tests - run: /usr/local/tgi/bin/tgi_trtllm_backend_tests \ No newline at end of file