diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index cd9f19ba..abe161db 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -27,8 +27,8 @@ jobs: concurrency: group: ${{ github.workflow }}-build-and-push-image-${{ inputs.hardware }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true - # TODO see with @Glegendre to get CPU runner here instead - runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci] + runs-on: + group: aws-r7i-8xlarge-priv permissions: contents: write packages: write @@ -49,7 +49,7 @@ jobs: export dockerfile="Dockerfile" export label_extension="" export docker_devices="" - export runs_on="nvidia-gpu" + export runs_on="aws-g5-12xlarge" ;; rocm) export dockerfile="Dockerfile_amd" @@ -79,9 +79,15 @@ jobs: uses: docker/setup-buildx-action@v3 with: install: true - config-inline: | + buildkitd-config-inline: | [registry."docker.io"] - mirrors = ["registry.github-runners.huggingface.tech"] + mirrors = ["registry-us-east-1-mirror.prod.aws.ci.huggingface.tech"] + - name: Login to internal Container Registry + uses: docker/login-action@v3 + with: + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_PASSWORD }} + registry: registry.internal.huggingface.tech - name: Login to GitHub Container Registry if: github.event_name != 'pull_request' uses: docker/login-action@v3 @@ -103,7 +109,8 @@ jobs: uses: docker/metadata-action@v5 with: images: | - registry-push.github-runners.huggingface.tech/api-inference/community/text-generation-inference + registry-us-east-1.prod.aws.ci.huggingface.tech/api-inference/community/text-generation-inference + registry.internal.huggingface.tech/api-inference/community/text-generation-inference tags: | type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }} # If main, release or tag @@ -115,7 +122,8 @@ jobs: flavor: | latest=auto images: | - registry-push.github-runners.huggingface.tech/api-inference/community/text-generation-inference + registry-us-east-1.prod.aws.ci.huggingface.tech/api-inference/community/text-generation-inference + registry.internal.huggingface.tech/api-inference/community/text-generation-inferenceca ghcr.io/huggingface/text-generation-inference db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference tags: | @@ -141,7 +149,7 @@ jobs: - name: Final id: final run: | - echo "docker_image=registry-push.github-runners.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT" + echo "docker_image=registry-us-east-1.prod.aws.ci.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT" echo "docker_devices=${{ env.DOCKER_DEVICES }}" >> "$GITHUB_OUTPUT" echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT" echo "label=${{ env.LABEL }}" >> "$GITHUB_OUTPUT" @@ -150,7 +158,8 @@ jobs: group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true needs: build-and-push - runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"] + runs-on: + group: ${{ needs.build-and-push.outputs.runs_on }} if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' env: PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '' }} @@ -174,3 +183,4 @@ jobs: export HF_TOKEN=${{ secrets.HF_TOKEN }} echo $DOCKER_IMAGE pytest -s -vv integration-tests ${PYTEST_FLAGS} + diff --git a/.github/workflows/load_test.yaml b/.github/workflows/load_test.yaml index 637df472..ecfe0fda 100644 --- a/.github/workflows/load_test.yaml +++ b/.github/workflows/load_test.yaml @@ -15,7 +15,8 @@ jobs: concurrency: group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true - runs-on: [self-hosted, nvidia-gpu , multi-gpu, 4-a10, ci] + runs-on: + group: aws-g5-12xlarge env: DOCKER_VOLUME: /cache steps: