mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Sshing a cuda 12.4
This commit is contained in:
parent
fcb62c71e2
commit
90059707a8
4
.github/workflows/build.yaml
vendored
4
.github/workflows/build.yaml
vendored
@ -27,7 +27,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
env:
|
env:
|
||||||
AWS_REGION: us-east-1
|
AWS_REGION: us-east-1
|
||||||
EC2_AMI_ID: ami-0471c7c76be300c9f
|
EC2_AMI_ID: ami-0789b6925c11b1fb2
|
||||||
EC2_INSTANCE_TYPE: g5.12xlarge
|
EC2_INSTANCE_TYPE: g5.12xlarge
|
||||||
EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc
|
EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc
|
||||||
EC2_SECURITY_GROUP: sg-030175c435ac141d6
|
EC2_SECURITY_GROUP: sg-030175c435ac141d6
|
||||||
@ -130,6 +130,8 @@ jobs:
|
|||||||
type=semver,pattern={{major}}.{{minor}}
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
|
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
|
||||||
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
|
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
|
||||||
|
- name: Setup tmate session
|
||||||
|
uses: mxschmitt/action-tmate@v3
|
||||||
- name: Build and push Docker image
|
- name: Build and push Docker image
|
||||||
id: build-and-push
|
id: build-and-push
|
||||||
uses: docker/build-push-action@v4
|
uses: docker/build-push-action@v4
|
||||||
|
@ -96,7 +96,6 @@ WORKDIR /usr/src
|
|||||||
COPY server/Makefile-flash-att Makefile
|
COPY server/Makefile-flash-att Makefile
|
||||||
|
|
||||||
# Build specific version of flash attention
|
# Build specific version of flash attention
|
||||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
|
|
||||||
RUN make build-flash-attention
|
RUN make build-flash-attention
|
||||||
|
|
||||||
# Build Flash Attention v2 CUDA kernels
|
# Build Flash Attention v2 CUDA kernels
|
||||||
@ -108,7 +107,6 @@ COPY server/Makefile-flash-att-v2 Makefile
|
|||||||
|
|
||||||
# Build specific version of flash attention v2
|
# Build specific version of flash attention v2
|
||||||
RUN make build-flash-attention-v2-cuda
|
RUN make build-flash-attention-v2-cuda
|
||||||
RUN TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0+PTX" make build-flash-attention
|
|
||||||
|
|
||||||
# Build Transformers exllama kernels
|
# Build Transformers exllama kernels
|
||||||
FROM kernel-builder as exllama-kernels-builder
|
FROM kernel-builder as exllama-kernels-builder
|
||||||
|
Loading…
Reference in New Issue
Block a user