mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 15:32:08 +00:00
softcap default -1.0
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
f8c8c3d397
commit
ce8548f5c4
@ -39,6 +39,8 @@ def attention(
|
||||
# We do not need to check window_size_left (not supported) here, so it is already checked ahead of time at model load.
|
||||
if ATTENTION == "flashdecoding-ipex":
|
||||
window_size_right = -1 if window_size_left == -1 else 0
|
||||
if softcap is None:
|
||||
softcap = -1.0
|
||||
ipex.llm.modules.PagedAttention.flash_attn_varlen_func(
|
||||
out,
|
||||
query.contiguous() if query.device.type == "xpu" else query,
|
||||
|
Loading…
Reference in New Issue
Block a user