fixing ram exhaustion during build issue

This commit is contained in:
Łukasz Olszewski 2023-12-23 13:19:25 +01:00
parent 43277c6c6a
commit 76590818a3

View File

@ -106,6 +106,7 @@ WORKDIR /usr/src
COPY server/Makefile-flash-att-v2 Makefile COPY server/Makefile-flash-att-v2 Makefile
# Build specific version of flash attention v2 # Build specific version of flash attention v2
ENV MAX_JOBS=8
RUN make build-flash-attention-v2-cuda RUN make build-flash-attention-v2-cuda
# Build Transformers exllama kernels # Build Transformers exllama kernels
@ -180,6 +181,8 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
curl \ curl \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
ENV MAX_JOBS=14
# Copy conda with PyTorch and Megablocks installed # Copy conda with PyTorch and Megablocks installed
COPY --from=megablocks-builder /opt/conda /opt/conda COPY --from=megablocks-builder /opt/conda /opt/conda
@ -208,6 +211,11 @@ COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/c
# Install flash-attention dependencies # Install flash-attention dependencies
RUN pip install einops --no-cache-dir RUN pip install einops --no-cache-dir
RUN apt-get update && \
apt-get install -y git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install server # Install server
COPY proto proto COPY proto proto
COPY server server COPY server server