mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Sshing a cuda 12.4
This commit is contained in:
parent
fcb62c71e2
commit
90059707a8
4
.github/workflows/build.yaml
vendored
4
.github/workflows/build.yaml
vendored
@ -27,7 +27,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
AWS_REGION: us-east-1
|
||||
EC2_AMI_ID: ami-0471c7c76be300c9f
|
||||
EC2_AMI_ID: ami-0789b6925c11b1fb2
|
||||
EC2_INSTANCE_TYPE: g5.12xlarge
|
||||
EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc
|
||||
EC2_SECURITY_GROUP: sg-030175c435ac141d6
|
||||
@ -130,6 +130,8 @@ jobs:
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
|
||||
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
|
||||
- name: Setup tmate session
|
||||
uses: mxschmitt/action-tmate@v3
|
||||
- name: Build and push Docker image
|
||||
id: build-and-push
|
||||
uses: docker/build-push-action@v4
|
||||
|
@ -96,7 +96,6 @@ WORKDIR /usr/src
|
||||
COPY server/Makefile-flash-att Makefile
|
||||
|
||||
# Build specific version of flash attention
|
||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
|
||||
RUN make build-flash-attention
|
||||
|
||||
# Build Flash Attention v2 CUDA kernels
|
||||
@ -108,7 +107,6 @@ COPY server/Makefile-flash-att-v2 Makefile
|
||||
|
||||
# Build specific version of flash attention v2
|
||||
RUN make build-flash-attention-v2-cuda
|
||||
RUN TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0+PTX" make build-flash-attention
|
||||
|
||||
# Build Transformers exllama kernels
|
||||
FROM kernel-builder as exllama-kernels-builder
|
||||
|
Loading…
Reference in New Issue
Block a user