Add poetry.lock export into requirements_poetry.txt

Installing from an exported `requirements.txt` like file generated out
of the `poetry.lock` makes sense to be able to reuse the generated lock
with the pinned dependencies and avoid dependency issues or conflicts
when building outdated Dockerfiles in case any dependency is loose and
runs into conflicts i.e. any potential conflict is always solved before
the release
This commit is contained in:
Alvaro Bartolome 2024-11-29 12:53:42 +01:00
parent d471805134
commit ebed60b8a9
No known key found for this signature in database

View File

@ -174,7 +174,7 @@ COPY server/Makefile-flashinfer Makefile
RUN make install-flashinfer
# Text Generation Inference base image
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS conda-install
# Conda env
ENV PATH=/opt/conda/bin:$PATH \
@ -198,6 +198,21 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
# Copy conda with PyTorch installed
COPY --from=pytorch-install /opt/conda /opt/conda
# Export text-generation-server Python requirements from poetry lock file
FROM poetry-install AS poetry-requirements
COPY server/poetry.lock poetry.lock
COPY server/pyproject.toml pyproject.toml
RUN pip install poetry && poetry export -f requirements.txt \
--extras "attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines" \
--output requirements_poetry.txt
FROM conda-install AS base
# Copy the requirements file generated from the poetry lock
COPY --from=poetry-requirements /usr/src/requirements_poetry.txt requirements_poetry.txt
# Copy build artifacts from flash attention builder
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
@ -233,7 +248,8 @@ COPY server/Makefile server/Makefile
RUN cd server && \
make gen-server && \
pip install -r requirements_cuda.txt && \
pip install ".[attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines]" --no-cache-dir && \
pip install -r requirements_poetry.txt --no-cache-dir && \
pip install . --no-cache-dir && \
pip install nvidia-nccl-cu12==2.22.3
ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2
@ -258,7 +274,6 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
# Install launcher
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
# AWS Sagemaker compatible image
FROM base AS sagemaker