Sshing a cuda 12.4

This commit is contained in:
Nicolas Patry 2024-05-15 17:17:32 +00:00
parent fcb62c71e2
commit 90059707a8
2 changed files with 3 additions and 3 deletions

View File

@ -27,7 +27,7 @@ jobs:
runs-on: ubuntu-latest
env:
AWS_REGION: us-east-1
EC2_AMI_ID: ami-0471c7c76be300c9f
EC2_AMI_ID: ami-0789b6925c11b1fb2
EC2_INSTANCE_TYPE: g5.12xlarge
EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc
EC2_SECURITY_GROUP: sg-030175c435ac141d6
@ -130,6 +130,8 @@ jobs:
type=semver,pattern={{major}}.{{minor}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v4

View File

@ -96,7 +96,6 @@ WORKDIR /usr/src
COPY server/Makefile-flash-att Makefile
# Build specific version of flash attention
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
RUN make build-flash-attention
# Build Flash Attention v2 CUDA kernels
@ -108,7 +107,6 @@ COPY server/Makefile-flash-att-v2 Makefile
# Build specific version of flash attention v2
RUN make build-flash-attention-v2-cuda
RUN TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0+PTX" make build-flash-attention
# Build Transformers exllama kernels
FROM kernel-builder as exllama-kernels-builder