Add poetry.lock export into requirements_poetry.txt

Installing from an exported `requirements.txt` like file generated out of the `poetry.lock` makes sense to be able to reuse the generated lock with the pinned dependencies and avoid dependency issues or conflicts when building outdated Dockerfiles in case any dependency is loose and runs into conflicts i.e. any potential conflict is always solved before the release
2025-09-11 20:34:54 +00:00 · 2024-11-29 12:53:42 +01:00 · 2024-11-29 12:53:42 +01:00 · ebed60b8a9
commit ebed60b8a9
parent d471805134
1 changed files with 18 additions and 3 deletions
--- a/21
+++ b/21
@ -174,7 +174,7 @@ COPY server/Makefile-flashinfer Makefile
 RUN make install-flashinfer
 # Text Generation Inference base image
-FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base
+FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS conda-install
 # Conda env
 ENV PATH=/opt/conda/bin:$PATH \
@ -198,6 +198,21 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
 # Copy conda with PyTorch installed
 COPY --from=pytorch-install /opt/conda /opt/conda
 # Export text-generation-server Python requirements from poetry lock file
 FROM poetry-install AS poetry-requirements
 COPY server/poetry.lock poetry.lock
 COPY server/pyproject.toml pyproject.toml
 RUN pip install poetry && poetry export -f requirements.txt  \
    --extras "attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines" \
    --output requirements_poetry.txt
 FROM conda-install AS base
 # Copy the requirements file generated from the poetry lock
 COPY --from=poetry-requirements /usr/src/requirements_poetry.txt requirements_poetry.txt
 # Copy build artifacts from flash attention builder
 COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
@ -233,7 +248,8 @@ COPY server/Makefile server/Makefile
 RUN cd server && \
    make gen-server && \
    pip install -r requirements_cuda.txt && \
-    pip install ".[attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines]" --no-cache-dir && \
+    pip install -r requirements_poetry.txt --no-cache-dir && \
    pip install . --no-cache-dir && \
    pip install nvidia-nccl-cu12==2.22.3
 ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2
@ -258,7 +274,6 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
 # Install launcher
 COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
 # AWS Sagemaker compatible image
 FROM base AS sagemaker