Add poetry.lock export into requirements_poetry.txt

Installing from an exported `requirements.txt` like file generated out
of the `poetry.lock` makes sense to be able to reuse the generated lock
with the pinned dependencies and avoid dependency issues or conflicts
when building outdated Dockerfiles in case any dependency is loose and
runs into conflicts i.e. any potential conflict is always solved before
the release
This commit is contained in:
Alvaro Bartolome 2024-11-29 12:53:42 +01:00
parent d471805134
commit ebed60b8a9
No known key found for this signature in database

View File

@ -174,7 +174,7 @@ COPY server/Makefile-flashinfer Makefile
RUN make install-flashinfer RUN make install-flashinfer
# Text Generation Inference base image # Text Generation Inference base image
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS conda-install
# Conda env # Conda env
ENV PATH=/opt/conda/bin:$PATH \ ENV PATH=/opt/conda/bin:$PATH \
@ -198,6 +198,21 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
# Copy conda with PyTorch installed # Copy conda with PyTorch installed
COPY --from=pytorch-install /opt/conda /opt/conda COPY --from=pytorch-install /opt/conda /opt/conda
# Export text-generation-server Python requirements from poetry lock file
FROM poetry-install AS poetry-requirements
COPY server/poetry.lock poetry.lock
COPY server/pyproject.toml pyproject.toml
RUN pip install poetry && poetry export -f requirements.txt \
--extras "attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines" \
--output requirements_poetry.txt
FROM conda-install AS base
# Copy the requirements file generated from the poetry lock
COPY --from=poetry-requirements /usr/src/requirements_poetry.txt requirements_poetry.txt
# Copy build artifacts from flash attention builder # Copy build artifacts from flash attention builder
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
@ -233,7 +248,8 @@ COPY server/Makefile server/Makefile
RUN cd server && \ RUN cd server && \
make gen-server && \ make gen-server && \
pip install -r requirements_cuda.txt && \ pip install -r requirements_cuda.txt && \
pip install ".[attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines]" --no-cache-dir && \ pip install -r requirements_poetry.txt --no-cache-dir && \
pip install . --no-cache-dir && \
pip install nvidia-nccl-cu12==2.22.3 pip install nvidia-nccl-cu12==2.22.3
ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2 ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2
@ -258,7 +274,6 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
# Install launcher # Install launcher
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
# AWS Sagemaker compatible image # AWS Sagemaker compatible image
FROM base AS sagemaker FROM base AS sagemaker