From ffa385031cc6a772f5608048358a07c25a04f54d Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Fri, 7 Apr 2023 14:11:44 +0200 Subject: [PATCH] fea(dockerfile): better layer caching --- Dockerfile | 155 ++++++++++++++++++++++++++++------- server/Makefile-flash-att | 23 +++++- server/Makefile-transformers | 13 ++- 3 files changed, 156 insertions(+), 35 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9fe0b49b..3d1fbcf0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,4 @@ +# Rust builder FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef WORKDIR /usr/src @@ -27,51 +28,149 @@ COPY router router COPY launcher launcher RUN cargo build --release -FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as base +# Python builder +# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile +FROM ubuntu:22.04 as dev-base +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + ccache \ + cmake \ + curl \ + git \ + libjpeg-dev \ + libpng-dev && \ + rm -rf /var/lib/apt/lists/* +RUN /usr/sbin/update-ccache-symlinks +RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache +ENV PATH /opt/conda/bin:$PATH -ENV LANG=C.UTF-8 \ - LC_ALL=C.UTF-8 \ - DEBIAN_FRONTEND=noninteractive \ - HUGGINGFACE_HUB_CACHE=/data \ +# Install conda +FROM dev-base as conda +ARG PYTHON_VERSION=3.9 +# Automatically set by buildx +ARG TARGETPLATFORM +# translating Docker's TARGETPLATFORM into miniconda arches +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MINICONDA_ARCH=aarch64 ;; \ + *) MINICONDA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -v -o ~/miniconda.sh -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${MINICONDA_ARCH}.sh" +# Manually invoke bash on miniconda script per https://github.com/conda/conda/issues/10431 +RUN chmod +x ~/miniconda.sh && \ + bash ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh +# /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \ +# /opt/conda/bin/conda clean -ya + + +# Install pytorch +FROM conda as pytorch-install +ARG PYTHON_VERSION=3.9 +ARG CUDA_VERSION=11.8 +ARG CUDA_CHANNEL=nvidia +ARG INSTALL_CHANNEL=pytorch +ARG PYTORCH_VERSION=2.0.0 +# Automatically set by buildx +RUN /opt/conda/bin/conda update -y conda +RUN /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -y python=${PYTHON_VERSION} +ARG TARGETPLATFORM + +# On arm64 we exit with an error code +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") exit 1 ;; \ + *) /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \ + esac && \ + /opt/conda/bin/conda clean -ya + + +FROM ubuntu:22.04 as final-pytorch-image +ARG TARGETPLATFORM +ARG PYTORCH_VERSION=2.0.0 +ARG CUDA_VERSION=11.8 +LABEL com.nvidia.volumes.needed="nvidia_driver" +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* +COPY --from=pytorch-install /opt/conda /opt/conda +ENV PATH /opt/conda/bin:$PATH +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility +ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 +ENV PYTORCH_VERSION ${PYTORCH_VERSION} +WORKDIR /workspace + +# CUDA kernels builder image +FROM final-pytorch-image as kernel-builder + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + build-essential \ + ninja-build \ + git \ + && rm -rf /var/lib/apt/lists/* + +RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \ + /opt/conda/bin/conda clean -ya + + +# Build Flash Attention CUDA kernels +FROM kernel-builder as flash-att-builder + +WORKDIR /usr/src + +COPY server/Makefile-flash-att Makefile + +# Build specific version of flash attention +RUN make build-flash-attention + +# Build Transformers CUDA kernels +FROM kernel-builder as transformers-builder + +WORKDIR /usr/src + +COPY server/Makefile-transformers Makefile + +# Build specific version of transformers +RUN BUILD_EXTENSIONS="True" make build-transformers + +# Text Generation Inference base image +FROM final-pytorch-image as base + +ENV HUGGINGFACE_HUB_CACHE=/data \ HF_HUB_ENABLE_HF_TRANSFER=1 \ MODEL_ID=bigscience/bloom-560m \ QUANTIZE=false \ NUM_SHARD=1 \ - PORT=80 \ - CUDA_HOME=/usr/local/cuda \ - LD_LIBRARY_PATH="/opt/miniconda/envs/text-generation/lib:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH" \ - CONDA_DEFAULT_ENV=text-generation \ - PATH=$PATH:/opt/miniconda/envs/text-generation/bin:/opt/miniconda/bin:/usr/local/cuda/bin + PORT=80 -RUN apt-get update && apt-get install -y git curl libssl-dev && rm -rf /var/lib/apt/lists/* - -RUN cd ~ && \ - curl -L -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - chmod +x Miniconda3-latest-Linux-x86_64.sh && \ - bash ./Miniconda3-latest-Linux-x86_64.sh -bf -p /opt/miniconda && \ - conda create -n text-generation python=3.9 -y +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + libssl-dev \ + make \ + && rm -rf /var/lib/apt/lists/* WORKDIR /usr/src -# Install torch -RUN pip install torch --extra-index-url https://download.pytorch.org/whl/cu118 --no-cache-dir +# Copy build artifacts from flash attention builder +COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages +COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages +COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages -# Install specific version of flash attention -COPY server/Makefile-flash-att server/Makefile -RUN cd server && make install-flash-attention +# Copy build artifacts from transformers builder +COPY --from=transformers-builder /usr/src/transformers /usr/src/transformers +COPY --from=transformers-builder /usr/src/transformers/build/lib.linux-x86_64-cpython-39/transformers /usr/src/transformers/src/transformers -# Install specific version of transformers -COPY server/Makefile-transformers server/Makefile -RUN cd server && BUILD_EXTENSIONS="True" make install-transformers +# Install transformers dependencies +RUN cd /usr/src/transformers && pip install -e . --no-cache-dir -COPY server/Makefile server/Makefile +#RUN cd server && make install-flash-attention && BUILD_EXTENSIONS="True" make install-transformers # Install server COPY proto proto COPY server server +COPY server/Makefile server/Makefile RUN cd server && \ make gen-server && \ - /opt/miniconda/envs/text-generation/bin/pip install ".[bnb]" --no-cache-dir + pip install ".[bnb]" --no-cache-dir # Install router COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router @@ -86,7 +185,7 @@ RUN chmod +x entrypoint.sh ENTRYPOINT ["./entrypoint.sh"] -# Original image +# Final image FROM base ENTRYPOINT ["text-generation-launcher"] diff --git a/server/Makefile-flash-att b/server/Makefile-flash-att index 297fd9d0..24b694ff 100644 --- a/server/Makefile-flash-att +++ b/server/Makefile-flash-att @@ -1,10 +1,25 @@ flash_att_commit := d478eeec8f16c7939c54e4617dbd36f59b8eeed7 -install-flash-attention: - # Install specific version of flash attention +flash-attention: + # Install specific version of flash attention pip install packaging - pip uninstall flash_attn rotary_emb dropout_layer_norm -y || true - rm -rf flash-attention || true git clone https://github.com/HazyResearch/flash-attention.git cd flash-attention && git checkout $(flash_att_commit) + +flash-attention/build: flash-attention + cd flash-attention && git checkout $(flash_att_commit) + cd flash-attention && python setup.py build + +flash-attention/csrc/layer_norm/build: flash-attention + cd flash-attention && git checkout $(flash_att_commit) + cd flash-attention/csrc/layer_norm && python setup.py build + +flash-attention/csrc/rotary/build: flash-attention + cd flash-attention && git checkout $(flash_att_commit) + cd flash-attention/csrc/rotary && python setup.py build + +build-flash-attention: flash-attention/build flash-attention/csrc/layer_norm/build flash-attention/csrc/rotary/build + +install-flash-attention: build-flash-attention + pip uninstall flash_attn rotary_emb dropout_layer_norm -y || true cd flash-attention && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install \ No newline at end of file diff --git a/server/Makefile-transformers b/server/Makefile-transformers index 1e081336..cb5a6313 100644 --- a/server/Makefile-transformers +++ b/server/Makefile-transformers @@ -1,10 +1,17 @@ transformers_commit := b8d969ff47c6a9d40538a6ea33df021953363afc -install-transformers: +transformers: # Install specific version of transformers with custom cuda kernels pip install --upgrade setuptools - pip uninstall transformers -y || true - rm -rf transformers || true git clone https://github.com/OlivierDehaene/transformers.git cd transformers && git checkout $(transformers_commit) + +transformers/build: transformers + cd transformers && git checkout $(transformers_commit) + cd transformers && python setup.py build + +build-transformers: transformers/build + +install-transformers: build-transformers + pip uninstall transformers -y || true cd transformers && python setup.py install \ No newline at end of file