mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
update docker file
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
23a1cb0511
commit
02537ec663
@ -1,5 +1,4 @@
|
|||||||
# Rust builder
|
FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef
|
||||||
FROM lukemathwalker/cargo-chef:latest-rust-1.71 AS chef
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||||
@ -35,6 +34,7 @@ COPY router router
|
|||||||
COPY launcher launcher
|
COPY launcher launcher
|
||||||
RUN cargo build --release
|
RUN cargo build --release
|
||||||
|
|
||||||
|
|
||||||
# Text Generation Inference base image for Intel
|
# Text Generation Inference base image for Intel
|
||||||
FROM intel/intel-extension-for-pytorch:2.1.10-xpu as base
|
FROM intel/intel-extension-for-pytorch:2.1.10-xpu as base
|
||||||
|
|
||||||
@ -47,7 +47,7 @@ RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.
|
|||||||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
||||||
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list
|
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list
|
||||||
|
|
||||||
RUN apt-get update && apt install -y intel-basekit xpu-smi
|
RUN apt-get update && apt install -y intel-basekit xpu-smi cmake python3-dev
|
||||||
|
|
||||||
# Text Generation Inference base env
|
# Text Generation Inference base env
|
||||||
ENV HUGGINGFACE_HUB_CACHE=/data \
|
ENV HUGGINGFACE_HUB_CACHE=/data \
|
||||||
@ -55,6 +55,11 @@ ENV HUGGINGFACE_HUB_CACHE=/data \
|
|||||||
PORT=80
|
PORT=80
|
||||||
|
|
||||||
|
|
||||||
|
WORKDIR /usr/src
|
||||||
|
# Build pytorch and ipex
|
||||||
|
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout -b xpu_main origin/xpu-main
|
||||||
|
RUN git clone https://github.com/pytorch/pytorch.git && cd pytorch && git checkout 209f2fa8ff86652f67d75c2f19bf9cb9942fd018 && git apply /usr/src/intel-extension-for-pytorch/torch_patches/00*.patch
|
||||||
|
|
||||||
# Install server
|
# Install server
|
||||||
COPY proto proto
|
COPY proto proto
|
||||||
COPY server server
|
COPY server server
|
||||||
@ -62,7 +67,29 @@ COPY server/Makefile server/Makefile
|
|||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
make gen-server && \
|
make gen-server && \
|
||||||
pip install -r requirements_common.txt && \
|
pip install -r requirements_common.txt && \
|
||||||
pip install ".[accelerate, peft]" --no-cache-dir
|
pip install ".[accelerate, peft, outlines]" --no-cache-dir
|
||||||
|
|
||||||
|
ENV CCL_ROOT=/opt/intel/oneapi/ccl/2021.11
|
||||||
|
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/2021.11
|
||||||
|
ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric
|
||||||
|
ENV DIAGUTIL_PATH=/opt/intel/oneapi/compiler/2024.0/etc/compiler/sys_check/sys_check.sh
|
||||||
|
ENV CCL_CONFIGURATION=cpu_gpu_dpcpp
|
||||||
|
ENV MANPATH=/opt/intel/oneapi/mpi/2021.11/share/man:/opt/intel/oneapi/mpi/2021.11/share/man:/opt/intel/oneapi/compiler/2024.0/documentation/en/man/common:
|
||||||
|
ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/2024.0/lib/cmake:/opt/intel/oneapi/compiler/2024.0
|
||||||
|
ENV CMPLR_ROOT=/opt/intel/oneapi/compiler/2024.0
|
||||||
|
ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/2021.11/lib:/opt/intel/oneapi/ccl/2021.11/lib/:/opt/intel/oneapi/mkl/2024.0/lib/:/opt/intel/oneapi/compiler/2024.0/lib
|
||||||
|
ENV OCL_ICD_FILENAMES=libintelocl_emu.so:libalteracl.so:/opt/intel/oneapi/compiler/2024.0/lib/libintelocl.so
|
||||||
|
ENV CLASSPATH=/opt/intel/oneapi/mpi/2021.11/share/java/mpi.jar:/opt/intel/oneapi/mpi/2021.11/share/java/mpi.jar
|
||||||
|
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/2021.11/lib/:/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/2021.11/lib:/opt/intel/oneapi/mkl/2024.0/lib:/opt/intel/oneapi/compiler/2024.0/opt/compiler/lib:/opt/intel/oneapi/compiler/2024.0/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:
|
||||||
|
ENV MKLROOT=/opt/intel/oneapi/mkl/2024.0
|
||||||
|
ENV NLSPATH=/opt/intel/oneapi/mkl/2024.0/share/locale/%l_%t/%N:/opt/intel/oneapi/compiler/2024.0/lib/locale/%l_%t/%N
|
||||||
|
ENV PATH=/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/2021.11/bin:/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/2024.0/bin/:/opt/intel/oneapi/compiler/2024.0/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||||
|
ENV CPATH=/opt/intel/oneapi/mpi/2021.11/include:/opt/intel/oneapi/ccl/2021.11/include:/opt/intel/oneapi/mkl/2024.0/include
|
||||||
|
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
||||||
|
|
||||||
|
|
||||||
|
RUN pip uninstall -y torch && cd pytorch && git submodule update --init --recursive && python setup.py install
|
||||||
|
RUN pip uninstall -y intel-extension-for-pytorch && cd intel-extension-for-pytorch && git submodule update --init --recursive && USE_AOT_DEVLIST='pvc' BUILD_SEPARATE_OPS=ON BUILD_WITH_CPU=ON USE_XETLA=ON python setup.py install
|
||||||
|
|
||||||
# Install benchmarker
|
# Install benchmarker
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
||||||
|
@ -35,7 +35,7 @@ SLIDING_WINDOW: Optional[int] = None
|
|||||||
SLIDING_WINDOW_BLOCKS: Optional[int] = None
|
SLIDING_WINDOW_BLOCKS: Optional[int] = None
|
||||||
from text_generation_server.utils.import_utils import IS_XPU_SYSTEM
|
from text_generation_server.utils.import_utils import IS_XPU_SYSTEM
|
||||||
|
|
||||||
MEM_POOL = torch.cuda.graph_pool_handle()
|
MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None
|
||||||
|
|
||||||
|
|
||||||
def set_sliding_window(sliding_window: int, sliding_window_blocks: int):
|
def set_sliding_window(sliding_window: int, sliding_window_blocks: int):
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import torch
|
import torch
|
||||||
import os
|
import os
|
||||||
|
|
||||||
MEM_POOL = torch.cuda.graph_pool_handle()
|
MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None
|
||||||
# This is overridden by the cli
|
# This is overridden by the cli
|
||||||
cuda_graphs = os.getenv("CUDA_GRAPHS")
|
cuda_graphs = os.getenv("CUDA_GRAPHS")
|
||||||
if cuda_graphs is not None and cuda_graphs != "0":
|
if cuda_graphs is not None and cuda_graphs != "0":
|
||||||
|
@ -883,11 +883,10 @@ try:
|
|||||||
residual = hidden_states
|
residual = hidden_states
|
||||||
out = ipex.llm.modules.RMSNorm.apply(
|
out = ipex.llm.modules.RMSNorm.apply(
|
||||||
hidden_states,
|
hidden_states,
|
||||||
[hidden_states.size(-1)],
|
|
||||||
self.weight,
|
self.weight,
|
||||||
self.variance_epsilon,
|
self.variance_epsilon,
|
||||||
)
|
)
|
||||||
return out[0], residual
|
return out, residual
|
||||||
elif hidden_states.shape[-1] > 8192:
|
elif hidden_states.shape[-1] > 8192:
|
||||||
if residual is not None:
|
if residual is not None:
|
||||||
hidden_states += residual
|
hidden_states += residual
|
||||||
|
Loading…
Reference in New Issue
Block a user