From 0343a4b71c9f1b448064055001fa7e10e74fa3c9 Mon Sep 17 00:00:00 2001 From: "Wang, Yi A" Date: Sun, 31 Mar 2024 22:48:42 -0700 Subject: [PATCH] update the API and dockerfile Signed-off-by: Wang, Yi A --- Dockerfile_intel | 32 +++++++++---------- .../utils/flash_attn.py | 2 +- server/text_generation_server/utils/layers.py | 31 +++++++++--------- 3 files changed, 32 insertions(+), 33 deletions(-) diff --git a/Dockerfile_intel b/Dockerfile_intel index 08a29dc0..0411a798 100644 --- a/Dockerfile_intel +++ b/Dockerfile_intel @@ -47,7 +47,7 @@ RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1. RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list -RUN apt-get update && apt install -y intel-basekit xpu-smi cmake python3-dev +RUN apt-get update && apt install -y intel-basekit xpu-smi cmake python3-dev ninja-build # Text Generation Inference base env ENV HUGGINGFACE_HUB_CACHE=/data \ @@ -69,22 +69,22 @@ RUN cd server && \ pip install -r requirements_common.txt && \ pip install ".[accelerate, peft, outlines]" --no-cache-dir -ENV CCL_ROOT=/opt/intel/oneapi/ccl/2021.11 -ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/2021.11 -ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric -ENV DIAGUTIL_PATH=/opt/intel/oneapi/compiler/2024.0/etc/compiler/sys_check/sys_check.sh +ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest +ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest +ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric +ENV DIAGUTIL_PATH=/opt/intel/oneapi/compiler/latest/etc/compiler/sys_check/sys_check.sh ENV CCL_CONFIGURATION=cpu_gpu_dpcpp -ENV MANPATH=/opt/intel/oneapi/mpi/2021.11/share/man:/opt/intel/oneapi/mpi/2021.11/share/man:/opt/intel/oneapi/compiler/2024.0/documentation/en/man/common: -ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/2024.0/lib/cmake:/opt/intel/oneapi/compiler/2024.0 -ENV CMPLR_ROOT=/opt/intel/oneapi/compiler/2024.0 -ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/2021.11/lib:/opt/intel/oneapi/ccl/2021.11/lib/:/opt/intel/oneapi/mkl/2024.0/lib/:/opt/intel/oneapi/compiler/2024.0/lib -ENV OCL_ICD_FILENAMES=libintelocl_emu.so:libalteracl.so:/opt/intel/oneapi/compiler/2024.0/lib/libintelocl.so -ENV CLASSPATH=/opt/intel/oneapi/mpi/2021.11/share/java/mpi.jar:/opt/intel/oneapi/mpi/2021.11/share/java/mpi.jar -ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/2021.11/lib/:/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/2021.11/lib:/opt/intel/oneapi/mkl/2024.0/lib:/opt/intel/oneapi/compiler/2024.0/opt/compiler/lib:/opt/intel/oneapi/compiler/2024.0/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64: -ENV MKLROOT=/opt/intel/oneapi/mkl/2024.0 -ENV NLSPATH=/opt/intel/oneapi/mkl/2024.0/share/locale/%l_%t/%N:/opt/intel/oneapi/compiler/2024.0/lib/locale/%l_%t/%N -ENV PATH=/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/2021.11/bin:/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/2024.0/bin/:/opt/intel/oneapi/compiler/2024.0/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -ENV CPATH=/opt/intel/oneapi/mpi/2021.11/include:/opt/intel/oneapi/ccl/2021.11/include:/opt/intel/oneapi/mkl/2024.0/include +ENV MANPATH=/opt/intel/oneapi/mpi/latest/share/man:/opt/intel/oneapi/mpi/latest/share/man:/opt/intel/oneapi/compiler/latest/share/man +ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/compiler/latest +ENV CMPLR_ROOT=/opt/intel/oneapi/compiler/latest +ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/compiler/latest/lib +ENV OCL_ICD_FILENAMES=libintelocl_emu.so:libalteracl.so:/opt/intel/oneapi/compiler/latest/lib/libintelocl.so +ENV CLASSPATH=/opt/intel/oneapi/mpi/latest/share/java/mpi.jar:/opt/intel/oneapi/mpi/latest/share/java/mpi.jar +ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64: +ENV MKLROOT=/opt/intel/oneapi/mkl/latest +ENV NLSPATH=/opt/intel/oneapi/mkl/latest/share/locale/%l_%t/%N:/opt/intel/oneapi/compiler/latest/lib/locale/%l_%t/%N +ENV PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +ENV CPATH=/opt/intel/oneapi/mpi/latest/include:/opt/intel/oneapi/ccl/latest/include:/opt/intel/oneapi/mkl/latest/include ENV CCL_ZE_IPC_EXCHANGE=sockets diff --git a/server/text_generation_server/utils/flash_attn.py b/server/text_generation_server/utils/flash_attn.py index e49447b9..583a8f91 100644 --- a/server/text_generation_server/utils/flash_attn.py +++ b/server/text_generation_server/utils/flash_attn.py @@ -97,7 +97,7 @@ def attention( raise ValueError( f"XPU version of Flash Attention does not support window attention (window_size_left != -1, got window_size_left={window_size_left})." ) - return ipex.llm.modules.VarlenAttention.apply( + return ipex.llm.functional.varlen_attention( q, k, v, diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index edd18090..638cb0a0 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -820,17 +820,13 @@ try: class FastLayerNorm(nn.LayerNorm): def forward(self, hidden_states, residual=None): if IS_XPU_SYSTEM: - if residual is not None: - hidden_states += residual - residual = hidden_states - out = ipex.llm.modules.FastLayerNorm.apply( - hidden_states, - self.normalized_shape, - self.eps, - self.weight, - self.bias, + res_out = hidden_states + out = ipex.llm.functional.add_layer_norm( + residual, hidden_states, self.weight, self.bias, self.eps, True ) - return out, residual + if residual is not None: + res_out = residual + return out, res_out elif hidden_states.shape[-1] > 8192 or IS_ROCM_SYSTEM: if residual is not None: hidden_states += residual @@ -878,15 +874,18 @@ try: def forward(self, hidden_states, residual=None): if IS_XPU_SYSTEM: - if residual is not None: - hidden_states += residual - residual = hidden_states - out = ipex.llm.modules.RMSNorm.apply( + residual_out = hidden_states + out = ipex.llm.functional.add_rms_norm( + residual, hidden_states, self.weight, + None, self.variance_epsilon, + True, ) - return out, residual + if residual is not None: + residual_out = residual + return out, residual_out elif hidden_states.shape[-1] > 8192: if residual is not None: hidden_states += residual @@ -1014,7 +1013,7 @@ try: # Inplace operation, updating query and key. pos_encoding_ops.rotary_embedding(query, key, head_size, cos, sin, True) elif IS_XPU_SYSTEM: - ipex.llm.modules.RotaryEmbedding.apply( + ipex.llm.functional.rotary_embedding( query, key, sin, cos, query.size(-1), True ) else: