mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 06:42:10 +00:00
softcap default -1.0
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
f8c8c3d397
commit
ce8548f5c4
@ -39,6 +39,8 @@ def attention(
|
|||||||
# We do not need to check window_size_left (not supported) here, so it is already checked ahead of time at model load.
|
# We do not need to check window_size_left (not supported) here, so it is already checked ahead of time at model load.
|
||||||
if ATTENTION == "flashdecoding-ipex":
|
if ATTENTION == "flashdecoding-ipex":
|
||||||
window_size_right = -1 if window_size_left == -1 else 0
|
window_size_right = -1 if window_size_left == -1 else 0
|
||||||
|
if softcap is None:
|
||||||
|
softcap = -1.0
|
||||||
ipex.llm.modules.PagedAttention.flash_attn_varlen_func(
|
ipex.llm.modules.PagedAttention.flash_attn_varlen_func(
|
||||||
out,
|
out,
|
||||||
query.contiguous() if query.device.type == "xpu" else query,
|
query.contiguous() if query.device.type == "xpu" else query,
|
||||||
|
Loading…
Reference in New Issue
Block a user