From f8c8c3d3974077255e5f7baf1cc150d233f308d2 Mon Sep 17 00:00:00 2001 From: "Wang, Yi A" Date: Tue, 8 Apr 2025 22:42:03 -0700 Subject: [PATCH] softcap default -1.0 Signed-off-by: Wang, Yi A --- server/text_generation_server/layers/attention/ipex.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/text_generation_server/layers/attention/ipex.py b/server/text_generation_server/layers/attention/ipex.py index 6ca02afe2..31b745f0c 100644 --- a/server/text_generation_server/layers/attention/ipex.py +++ b/server/text_generation_server/layers/attention/ipex.py @@ -105,6 +105,8 @@ def paged_attention( kv_cache_dtype = "fp8_e4m3" if ATTENTION == "flashdecoding-ipex": window_size_right = -1 if window_size_left == -1 else 0 + if softcap is None: + softcap = -1.0 ipex.llm.modules.PagedAttention.flash_attn_varlen_func( out, query.contiguous() if query.device.type == "xpu" else query,