mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Add poetry.lock
export into requirements_poetry.txt
Installing from an exported `requirements.txt` like file generated out of the `poetry.lock` makes sense to be able to reuse the generated lock with the pinned dependencies and avoid dependency issues or conflicts when building outdated Dockerfiles in case any dependency is loose and runs into conflicts i.e. any potential conflict is always solved before the release
This commit is contained in:
parent
d471805134
commit
ebed60b8a9
21
Dockerfile
21
Dockerfile
@ -174,7 +174,7 @@ COPY server/Makefile-flashinfer Makefile
|
||||
RUN make install-flashinfer
|
||||
|
||||
# Text Generation Inference base image
|
||||
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base
|
||||
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS conda-install
|
||||
|
||||
# Conda env
|
||||
ENV PATH=/opt/conda/bin:$PATH \
|
||||
@ -198,6 +198,21 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
||||
# Copy conda with PyTorch installed
|
||||
COPY --from=pytorch-install /opt/conda /opt/conda
|
||||
|
||||
# Export text-generation-server Python requirements from poetry lock file
|
||||
FROM poetry-install AS poetry-requirements
|
||||
|
||||
COPY server/poetry.lock poetry.lock
|
||||
COPY server/pyproject.toml pyproject.toml
|
||||
|
||||
RUN pip install poetry && poetry export -f requirements.txt \
|
||||
--extras "attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines" \
|
||||
--output requirements_poetry.txt
|
||||
|
||||
FROM conda-install AS base
|
||||
|
||||
# Copy the requirements file generated from the poetry lock
|
||||
COPY --from=poetry-requirements /usr/src/requirements_poetry.txt requirements_poetry.txt
|
||||
|
||||
# Copy build artifacts from flash attention builder
|
||||
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
|
||||
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
|
||||
@ -233,7 +248,8 @@ COPY server/Makefile server/Makefile
|
||||
RUN cd server && \
|
||||
make gen-server && \
|
||||
pip install -r requirements_cuda.txt && \
|
||||
pip install ".[attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines]" --no-cache-dir && \
|
||||
pip install -r requirements_poetry.txt --no-cache-dir && \
|
||||
pip install . --no-cache-dir && \
|
||||
pip install nvidia-nccl-cu12==2.22.3
|
||||
|
||||
ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2
|
||||
@ -258,7 +274,6 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
|
||||
# Install launcher
|
||||
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
||||
|
||||
|
||||
# AWS Sagemaker compatible image
|
||||
FROM base AS sagemaker
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user