diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 17242328..504e64fb 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -27,7 +27,7 @@ jobs: runs-on: ubuntu-latest env: AWS_REGION: us-east-1 - EC2_AMI_ID: ami-0471c7c76be300c9f + EC2_AMI_ID: ami-0789b6925c11b1fb2 EC2_INSTANCE_TYPE: g5.12xlarge EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc EC2_SECURITY_GROUP: sg-030175c435ac141d6 @@ -130,6 +130,8 @@ jobs: type=semver,pattern={{major}}.{{minor}} type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }} + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 - name: Build and push Docker image id: build-and-push uses: docker/build-push-action@v4 diff --git a/Dockerfile b/Dockerfile index 630372b7..c8327c22 100644 --- a/Dockerfile +++ b/Dockerfile @@ -96,7 +96,6 @@ WORKDIR /usr/src COPY server/Makefile-flash-att Makefile # Build specific version of flash attention -ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX" RUN make build-flash-attention # Build Flash Attention v2 CUDA kernels @@ -108,7 +107,6 @@ COPY server/Makefile-flash-att-v2 Makefile # Build specific version of flash attention v2 RUN make build-flash-attention-v2-cuda -RUN TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0+PTX" make build-flash-attention # Build Transformers exllama kernels FROM kernel-builder as exllama-kernels-builder