Fix workflow

This commit is contained in:
orangetin 2023-07-26 19:58:44 -07:00 committed by GitHub
parent e64a65891b
commit 3f031ad51f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,246 +1,77 @@
name: Build and push docker image to internal registry name: DockerHub Release
on: on:
workflow_dispatch:
push: push:
branches: branches:
- 'main' - main
tags: paths-ignore:
- 'v*' - '**/README.md'
pull_request:
paths: env:
- ".github/workflows/build.yaml" AWS_REGION: ${{ secrets.AWS_REGION }}
- "integration-tests/**" AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
- "server/**" AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- "proto/**" DOCKERHUB_REPOSITORY: text-generation-inference
- "router/**" DOCKERHUB_ORGANIZATION: togethercomputer
- "launcher/**" DOCKERFILE: Dockerfile
- "Cargo.lock"
- "rust-toolchain.toml" defaults:
- "Dockerfile" run:
branches: shell: bash
- 'main'
jobs: jobs:
start-runner: build:
name: Start self-hosted EC2 runner name: Build and push intermediate image
runs-on: ubuntu-latest runs-on:
env: labels: ubuntu-22.04-4core
AWS_REGION: us-east-1
EC2_AMI_ID: ami-03cfed9ea28f4b002
EC2_INSTANCE_TYPE: g5.12xlarge
EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc
EC2_SECURITY_GROUP: sg-030175c435ac141d6
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps: steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Configure AWS credentials - name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1 uses: aws-actions/configure-aws-credentials@v1
with: with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }} aws-region: ${{ env.AWS_REGION }}
- name: Start EC2 runner
id: start-ec2-runner
uses: philschmid/philschmid-ec2-github-runner@main
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ env.EC2_AMI_ID }}
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }}
subnet-id: ${{ env.EC2_SUBNET_ID }}
security-group-id: ${{ env.EC2_SECURITY_GROUP }}
aws-resource-tags: > # optional, requires additional permissions
[
{"Key": "Name", "Value": "ec2-tgi-github-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"}
]
build-and-push-image: - name: Login to Amazon ECR
concurrency: id: login-ecr
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} uses: aws-actions/amazon-ecr-login@v1
cancel-in-progress: true
needs: start-runner # required to start the main job when the runner is ready - name: Login to DockerHub
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner id: login-dockerhub
permissions:
contents: write
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write
security-events: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Initialize Docker Buildx
uses: docker/setup-buildx-action@v2.0.0
with:
install: true
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4.4.1
- name: Install cosign
if: github.event_name != 'pull_request'
uses: sigstore/cosign-installer@f3c664df7af409cb4873aa5068053ba9d61a57b6 #v2.6.0
with:
cosign-release: 'v1.13.1'
- name: Tailscale
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
with:
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
- name: Login to GitHub Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v2 uses: docker/login-action@v2
with: with:
registry: ghcr.io username: ${{ secrets.DOCKERHUB_USERNAME }}
username: ${{ github.actor }} password: ${{ secrets.DOCKERHUB_TOKEN }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to internal Container Registry - name: Create and push git version tag
uses: docker/login-action@v2.1.0 id: build-image
with: run: |
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }} export HOME=$(pwd)
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }} git fetch --tags --unshallow --prune
registry: registry.internal.huggingface.tech curl -sL https://git.io/autotag-install | sh --
- name: Login to Azure Container Registry TAG_NAME=v$(bin/autotag -b main -p main -T datetime)
if: github.event_name != 'pull_request' echo $TAG_NAME
uses: docker/login-action@v2.1.0 echo "TAG_NAME=$TAG_NAME" >> $GITHUB_ENV
with: echo "${{ env.TAG_NAME }}"
username: ${{ secrets.AZURE_DOCKER_USERNAME }} git config --global user.name 'Together Robot'
password: ${{ secrets.AZURE_DOCKER_PASSWORD }} git push --tags
registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io
# If pull request - name: build and push to dockerhub
- name: Extract metadata (tags, labels) for Docker
if: ${{ github.event_name == 'pull_request' }}
id: meta-pr
uses: docker/metadata-action@v4.3.0
with:
images: |
registry.internal.huggingface.tech/api-inference/community/text-generation-inference
tags: |
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
# If main, release or tag
- name: Extract metadata (tags, labels) for Docker
if: ${{ github.event_name != 'pull_request' }}
id: meta
uses: docker/metadata-action@v4.3.0
with:
flavor: |
latest=auto
images: |
registry.internal.huggingface.tech/api-inference/community/text-generation-inference
ghcr.io/huggingface/text-generation-inference
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v4 uses: docker/build-push-action@v4
with: with:
context: . file: ${{ env.DOCKERFILE }}
file: Dockerfile
push: true push: true
platforms: 'linux/amd64' tags: |
build-args: | ${{ env.DOCKERHUB_ORGANIZATION }}/${{ env.DOCKERHUB_REPOSITORY}}:${{ env.TAG_NAME}}
GIT_SHA=${{ env.GITHUB_SHA }} ${{ env.DOCKERHUB_ORGANIZATION }}/${{ env.DOCKERHUB_REPOSITORY}}:latest
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} cache-from: type=registry,ref=${{ env.DOCKERHUB_ORGANIZATION }}/${{ env.DOCKERHUB_REPOSITORY}}:cache
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} cache-to: type=registry,ref=${{ env.DOCKERHUB_ORGANIZATION }}/${{ env.DOCKERHUB_REPOSITORY}}:cache,mode=max
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
# Sign the resulting Docker image digest except on PRs.
# This will only write to the public Rekor transparency log when the Docker
# repository is public to avoid leaking data.
- name: Sign the published Docker image
if: ${{ github.event_name != 'pull_request' }}
env:
COSIGN_EXPERIMENTAL: "true"
# This step uses the identity token to provision an ephemeral certificate
# against the sigstore community Fulcio instance.
run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign {}@${{ steps.build-and-push.outputs.digest }}
- name: Run Trivy in GitHub SBOM mode and submit results to Dependency Graph
uses: aquasecurity/trivy-action@master
if: ${{ github.event_name != 'pull_request' }}
with:
image-ref: 'ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}'
format: 'github'
output: 'dependency-results.sbom.json'
github-pat: ${{ secrets.GITHUB_TOKEN }}
scanners: 'vuln'
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
if: ${{ github.event_name != 'pull_request' }}
with:
image-ref: 'ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}'
format: 'sarif'
output: 'trivy-results.sarif'
severity: 'CRITICAL'
scanners: 'vuln'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
if: ${{ github.event_name != 'pull_request' }}
with:
sarif_file: 'trivy-results.sarif'
integration-tests:
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
needs:
- start-runner
- build-and-push-image # Wait for the docker image to be built
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
env:
DOCKER_VOLUME: /cache
steps:
- uses: actions/checkout@v2
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4.4.1
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Tailscale
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
with:
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
- name: Prepare disks
run: |
sudo mkfs -t ext4 /dev/nvme1n1
sudo mkdir ${{ env.DOCKER_VOLUME }}
sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
- name: Install
run: |
make install-integration-tests
- name: Run tests
run: |
export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
pytest -s -vv integration-tests
stop-runner:
name: Stop self-hosted EC2 runner
needs:
- start-runner
- build-and-push-image
- integration-tests
runs-on: ubuntu-latest
env:
AWS_REGION: us-east-1
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Stop EC2 runner
uses: philschmid/philschmid-ec2-github-runner@main
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}