mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
fix dockerfile
This commit is contained in:
parent
d186b13c59
commit
751f26b66c
14
Dockerfile
14
Dockerfile
@ -98,13 +98,6 @@ COPY server/Makefile-flash-att Makefile
|
||||
# Build specific version of flash attention
|
||||
RUN make build-flash-attention
|
||||
|
||||
# Build Transformers CUDA kernels
|
||||
FROM kernel-builder as custom-kernels-builder
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
COPY server/custom_kernels/ .
|
||||
|
||||
# Build Flash Attention v2 CUDA kernels
|
||||
FROM kernel-builder as flash-att-v2-builder
|
||||
|
||||
@ -115,6 +108,13 @@ COPY server/Makefile-flash-att-v2 Makefile
|
||||
# Build specific version of flash attention v2
|
||||
RUN make build-flash-attention-v2
|
||||
|
||||
# Build Transformers CUDA kernels
|
||||
FROM kernel-builder as custom-kernels-builder
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
COPY server/custom_kernels/ .
|
||||
|
||||
# Build specific version of transformers
|
||||
RUN python setup.py build
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user