diff --git a/Dockerfile b/Dockerfile index 7ba4239c..168f2f97 100644 --- a/Dockerfile +++ b/Dockerfile @@ -98,13 +98,6 @@ COPY server/Makefile-flash-att Makefile # Build specific version of flash attention RUN make build-flash-attention -# Build Transformers CUDA kernels -FROM kernel-builder as custom-kernels-builder - -WORKDIR /usr/src - -COPY server/custom_kernels/ . - # Build Flash Attention v2 CUDA kernels FROM kernel-builder as flash-att-v2-builder @@ -115,6 +108,13 @@ COPY server/Makefile-flash-att-v2 Makefile # Build specific version of flash attention v2 RUN make build-flash-attention-v2 +# Build Transformers CUDA kernels +FROM kernel-builder as custom-kernels-builder + +WORKDIR /usr/src + +COPY server/custom_kernels/ . + # Build specific version of transformers RUN python setup.py build