mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Change the dockerfile. It builds locally, something might be up in AWS
env.
This commit is contained in:
parent
368c057cbf
commit
dc0b8d76b5
@ -96,6 +96,7 @@ WORKDIR /usr/src
|
|||||||
COPY server/Makefile-flash-att Makefile
|
COPY server/Makefile-flash-att Makefile
|
||||||
|
|
||||||
# Build specific version of flash attention
|
# Build specific version of flash attention
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
|
||||||
RUN make build-flash-attention
|
RUN make build-flash-attention
|
||||||
|
|
||||||
# Build Flash Attention v2 CUDA kernels
|
# Build Flash Attention v2 CUDA kernels
|
||||||
@ -107,6 +108,7 @@ COPY server/Makefile-flash-att-v2 Makefile
|
|||||||
|
|
||||||
# Build specific version of flash attention v2
|
# Build specific version of flash attention v2
|
||||||
RUN make build-flash-attention-v2-cuda
|
RUN make build-flash-attention-v2-cuda
|
||||||
|
RUN TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0+PTX" make build-flash-attention
|
||||||
|
|
||||||
# Build Transformers exllama kernels
|
# Build Transformers exllama kernels
|
||||||
FROM kernel-builder as exllama-kernels-builder
|
FROM kernel-builder as exllama-kernels-builder
|
||||||
@ -181,7 +183,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
|||||||
ca-certificates \
|
ca-certificates \
|
||||||
make \
|
make \
|
||||||
curl \
|
curl \
|
||||||
build-essential \
|
git \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Copy conda with PyTorch installed
|
# Copy conda with PyTorch installed
|
||||||
|
Loading…
Reference in New Issue
Block a user