softcap default -1.0

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
Wang, Yi A 2025-04-08 22:42:03 -07:00
parent 8d36856d57
commit f8c8c3d397

View File

@ -105,6 +105,8 @@ def paged_attention(
kv_cache_dtype = "fp8_e4m3" kv_cache_dtype = "fp8_e4m3"
if ATTENTION == "flashdecoding-ipex": if ATTENTION == "flashdecoding-ipex":
window_size_right = -1 if window_size_left == -1 else 0 window_size_right = -1 if window_size_left == -1 else 0
if softcap is None:
softcap = -1.0
ipex.llm.modules.PagedAttention.flash_attn_varlen_func( ipex.llm.modules.PagedAttention.flash_attn_varlen_func(
out, out,
query.contiguous() if query.device.type == "xpu" else query, query.contiguous() if query.device.type == "xpu" else query,