Sshing a cuda 12.4

2025-09-11 20:34:54 +00:00 · 2024-05-15 17:17:32 +00:00 · 2024-05-15 17:17:32 +00:00 · 90059707a8
commit 90059707a8
parent fcb62c71e2
2 changed files with 3 additions and 3 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -27,7 +27,7 @@ jobs:
    runs-on: ubuntu-latest
    env:
      AWS_REGION: us-east-1
-      EC2_AMI_ID: ami-0471c7c76be300c9f
+      EC2_AMI_ID: ami-0789b6925c11b1fb2
      EC2_INSTANCE_TYPE: g5.12xlarge
      EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc
      EC2_SECURITY_GROUP: sg-030175c435ac141d6
@ -130,6 +130,8 @@ jobs:
            type=semver,pattern={{major}}.{{minor}}
            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
+      - name: Setup tmate session
+        uses: mxschmitt/action-tmate@v3
      - name: Build and push Docker image
        id: build-and-push
        uses: docker/build-push-action@v4
--- a/2
+++ b/2
@ -96,7 +96,6 @@ WORKDIR /usr/src
 COPY server/Makefile-flash-att Makefile

 # Build specific version of flash attention
-ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
 RUN make build-flash-attention

 # Build Flash Attention v2 CUDA kernels
@ -108,7 +107,6 @@ COPY server/Makefile-flash-att-v2 Makefile

 # Build specific version of flash attention v2
 RUN make build-flash-attention-v2-cuda
-RUN TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0+PTX" make build-flash-attention

 # Build Transformers exllama kernels
 FROM kernel-builder as exllama-kernels-builder