mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
dockerfile change to ipex cpu/xpu
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
4b8cda684b
commit
a31641c1b6
@ -97,24 +97,34 @@ ENV HF_HOME=/data \
|
|||||||
|
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
|
||||||
RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
|
||||||
RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
|
||||||
RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
|
||||||
|
|
||||||
RUN pip install triton-xpu==3.0.0b2 --no-cache-dir
|
|
||||||
|
|
||||||
# Install server
|
# Install server
|
||||||
COPY proto proto
|
COPY proto proto
|
||||||
COPY server server
|
COPY server server
|
||||||
COPY server/Makefile server/Makefile
|
COPY server/Makefile server/Makefile
|
||||||
ENV UV_SYSTEM_PYTHON=1
|
ENV UV_SYSTEM_PYTHON=1
|
||||||
RUN cd server && \
|
|
||||||
make gen-server && \
|
|
||||||
pip install -U pip uv && \
|
|
||||||
uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
|
|
||||||
|
|
||||||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib
|
RUN cd server && \
|
||||||
|
pip install -U pip uv && uv venv --seed && \
|
||||||
|
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
|
||||||
|
. ./.venv/bin/activate && \
|
||||||
|
make gen-server-raw
|
||||||
|
|
||||||
|
RUN cd server && \
|
||||||
|
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
|
||||||
|
. ./.venv/bin/activate && \
|
||||||
|
pwd && \
|
||||||
|
text-generation-server --help
|
||||||
|
|
||||||
|
ENV PATH=/usr/src/server/.venv/bin:$PATH
|
||||||
|
RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
||||||
|
RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
||||||
|
RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
||||||
|
RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
||||||
|
|
||||||
|
RUN pip uninstall -y triton && pip install ruamel.yaml triton-xpu==3.0.0b2 --no-cache-dir
|
||||||
|
|
||||||
|
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib:/usr/src/server/.venv/lib
|
||||||
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
||||||
#ENV TORCH_LLM_ALLREDUCE=1
|
#ENV TORCH_LLM_ALLREDUCE=1
|
||||||
#ENV CCL_TOPO_FABRIC_VERTEX_CONNECTION_CHECK=0
|
#ENV CCL_TOPO_FABRIC_VERTEX_CONNECTION_CHECK=0
|
||||||
@ -162,7 +172,7 @@ ARG MAMBA_VERSION=23.1.0-1
|
|||||||
ARG PYTHON_VERSION='3.11.10'
|
ARG PYTHON_VERSION='3.11.10'
|
||||||
# Automatically set by buildx
|
# Automatically set by buildx
|
||||||
ARG TARGETPLATFORM
|
ARG TARGETPLATFORM
|
||||||
ENV PATH /opt/conda/bin:$PATH
|
ENV PATH=/opt/conda/bin:$PATH
|
||||||
|
|
||||||
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
|
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
|
||||||
# Install mamba
|
# Install mamba
|
||||||
@ -186,11 +196,6 @@ RUN case ${TARGETPLATFORM} in \
|
|||||||
RUN conda install -c conda-forge gperftools mkl
|
RUN conda install -c conda-forge gperftools mkl
|
||||||
|
|
||||||
|
|
||||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
|
||||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
|
||||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
|
||||||
|
|
||||||
RUN pip install triton==3.1.0 py-libnuma
|
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
@ -198,16 +203,8 @@ RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-e
|
|||||||
RUN git clone https://github.com/intel/torch-ccl.git && cd torch-ccl && git checkout v2.4.0+cpu+rc0
|
RUN git clone https://github.com/intel/torch-ccl.git && cd torch-ccl && git checkout v2.4.0+cpu+rc0
|
||||||
|
|
||||||
RUN sed -i 's/VERSION_MINOR 6/VERSION_MINOR 5/' intel-extension-for-pytorch/version.txt
|
RUN sed -i 's/VERSION_MINOR 6/VERSION_MINOR 5/' intel-extension-for-pytorch/version.txt
|
||||||
RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update --init --recursive && python setup.py install
|
|
||||||
|
|
||||||
RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install .
|
|
||||||
|
|
||||||
ENV LD_PRELOAD=/opt/conda/lib/libtcmalloc.so
|
ENV LD_PRELOAD=/opt/conda/lib/libtcmalloc.so
|
||||||
ENV CCL_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
|
|
||||||
ENV I_MPI_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
|
|
||||||
ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
|
|
||||||
ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/lib
|
|
||||||
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
|
|
||||||
|
|
||||||
# Install server
|
# Install server
|
||||||
COPY proto proto
|
COPY proto proto
|
||||||
@ -215,9 +212,34 @@ COPY server server
|
|||||||
COPY server/Makefile server/Makefile
|
COPY server/Makefile server/Makefile
|
||||||
ENV UV_SYSTEM_PYTHON=1
|
ENV UV_SYSTEM_PYTHON=1
|
||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
make gen-server && \
|
pip install -U pip uv && uv venv --seed && \
|
||||||
pip install -U pip uv && \
|
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
|
||||||
uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
|
. ./.venv/bin/activate && \
|
||||||
|
make gen-server-raw
|
||||||
|
|
||||||
|
RUN cd server && \
|
||||||
|
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
|
||||||
|
. ./.venv/bin/activate && \
|
||||||
|
pwd && \
|
||||||
|
text-generation-server --help
|
||||||
|
|
||||||
|
ENV PATH=/usr/src/server/.venv/bin:$PATH
|
||||||
|
|
||||||
|
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
||||||
|
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
||||||
|
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
||||||
|
|
||||||
|
RUN pip install triton==3.1.0 py-libnuma
|
||||||
|
|
||||||
|
RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update --init --recursive && python3 setup.py install
|
||||||
|
|
||||||
|
RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install .
|
||||||
|
|
||||||
|
ENV CCL_ROOT=/usr/src/server/.venv/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
|
||||||
|
ENV I_MPI_ROOT=/usr/src/server/.venv/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
|
||||||
|
ENV FI_PROVIDER_PATH=/usr/src/server/.venv/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
|
||||||
|
ENV LD_LIBRARY_PATH=/usr/src/server/.venv/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/lib
|
||||||
|
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib:/usr/src/server/.venv/lib"
|
||||||
|
|
||||||
# Install benchmarker
|
# Install benchmarker
|
||||||
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
||||||
@ -231,5 +253,8 @@ ENV ATTENTION=flashdecoding-ipex
|
|||||||
ENV PREFIX_CACHING=1
|
ENV PREFIX_CACHING=1
|
||||||
ENV PREFILL_CHUNKING=1
|
ENV PREFILL_CHUNKING=1
|
||||||
ENV CUDA_GRAPHS=0
|
ENV CUDA_GRAPHS=0
|
||||||
ENTRYPOINT ["text-generation-launcher"]
|
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
|
||||||
|
RUN chmod +x /tgi-entrypoint.sh
|
||||||
|
|
||||||
|
ENTRYPOINT ["/tgi-entrypoint.sh"]
|
||||||
CMD ["--json-output"]
|
CMD ["--json-output"]
|
||||||
|
Loading…
Reference in New Issue
Block a user