From a31641c1b66aba072f44c1f44291ae2c1f98d2cb Mon Sep 17 00:00:00 2001 From: "Wang, Yi A" Date: Tue, 11 Feb 2025 19:51:05 +0000 Subject: [PATCH] dockerfile change to ipex cpu/xpu Signed-off-by: Wang, Yi A --- Dockerfile_intel | 83 +++++++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 29 deletions(-) diff --git a/Dockerfile_intel b/Dockerfile_intel index be248866..ac24a7c3 100644 --- a/Dockerfile_intel +++ b/Dockerfile_intel @@ -97,24 +97,34 @@ ENV HF_HOME=/data \ WORKDIR /usr/src -RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir -RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir -RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir -RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir - -RUN pip install triton-xpu==3.0.0b2 --no-cache-dir # Install server COPY proto proto COPY server server COPY server/Makefile server/Makefile ENV UV_SYSTEM_PYTHON=1 -RUN cd server && \ - make gen-server && \ - pip install -U pip uv && \ - uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib +RUN cd server && \ + pip install -U pip uv && uv venv --seed && \ + uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \ + . ./.venv/bin/activate && \ + make gen-server-raw + +RUN cd server && \ + uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \ + . ./.venv/bin/activate && \ + pwd && \ + text-generation-server --help + +ENV PATH=/usr/src/server/.venv/bin:$PATH +RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir +RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir +RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir +RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir + +RUN pip uninstall -y triton && pip install ruamel.yaml triton-xpu==3.0.0b2 --no-cache-dir + +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib:/usr/src/server/.venv/lib ENV CCL_ZE_IPC_EXCHANGE=sockets #ENV TORCH_LLM_ALLREDUCE=1 #ENV CCL_TOPO_FABRIC_VERTEX_CONNECTION_CHECK=0 @@ -162,7 +172,7 @@ ARG MAMBA_VERSION=23.1.0-1 ARG PYTHON_VERSION='3.11.10' # Automatically set by buildx ARG TARGETPLATFORM -ENV PATH /opt/conda/bin:$PATH +ENV PATH=/opt/conda/bin:$PATH # TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda. # Install mamba @@ -186,11 +196,6 @@ RUN case ${TARGETPLATFORM} in \ RUN conda install -c conda-forge gperftools mkl -RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl -RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl -RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl - -RUN pip install triton==3.1.0 py-libnuma WORKDIR /usr/src @@ -198,16 +203,8 @@ RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-e RUN git clone https://github.com/intel/torch-ccl.git && cd torch-ccl && git checkout v2.4.0+cpu+rc0 RUN sed -i 's/VERSION_MINOR 6/VERSION_MINOR 5/' intel-extension-for-pytorch/version.txt -RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update --init --recursive && python setup.py install - -RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install . ENV LD_PRELOAD=/opt/conda/lib/libtcmalloc.so -ENV CCL_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch -ENV I_MPI_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch -ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric -ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/lib -ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/" # Install server COPY proto proto @@ -215,9 +212,34 @@ COPY server server COPY server/Makefile server/Makefile ENV UV_SYSTEM_PYTHON=1 RUN cd server && \ - make gen-server && \ - pip install -U pip uv && \ - uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir + pip install -U pip uv && uv venv --seed && \ + uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \ + . ./.venv/bin/activate && \ + make gen-server-raw + +RUN cd server && \ + uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \ + . ./.venv/bin/activate && \ + pwd && \ + text-generation-server --help + +ENV PATH=/usr/src/server/.venv/bin:$PATH + +RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl +RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl +RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl + +RUN pip install triton==3.1.0 py-libnuma + +RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update --init --recursive && python3 setup.py install + +RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install . + +ENV CCL_ROOT=/usr/src/server/.venv/lib/python3.11/site-packages/oneccl_bindings_for_pytorch +ENV I_MPI_ROOT=/usr/src/server/.venv/lib/python3.11/site-packages/oneccl_bindings_for_pytorch +ENV FI_PROVIDER_PATH=/usr/src/server/.venv/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric +ENV LD_LIBRARY_PATH=/usr/src/server/.venv/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/lib +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib:/usr/src/server/.venv/lib" # Install benchmarker COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark @@ -231,5 +253,8 @@ ENV ATTENTION=flashdecoding-ipex ENV PREFIX_CACHING=1 ENV PREFILL_CHUNKING=1 ENV CUDA_GRAPHS=0 -ENTRYPOINT ["text-generation-launcher"] +COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh +RUN chmod +x /tgi-entrypoint.sh + +ENTRYPOINT ["/tgi-entrypoint.sh"] CMD ["--json-output"]