softcap default -1.0

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
Wang, Yi A 2025-04-13 20:02:05 -07:00
parent f8c8c3d397
commit ce8548f5c4

View File

@ -39,6 +39,8 @@ def attention(
# We do not need to check window_size_left (not supported) here, so it is already checked ahead of time at model load.
if ATTENTION == "flashdecoding-ipex":
window_size_right = -1 if window_size_left == -1 else 0
if softcap is None:
softcap = -1.0
ipex.llm.modules.PagedAttention.flash_attn_varlen_func(
out,
query.contiguous() if query.device.type == "xpu" else query,