mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
softcap default -1.0
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
8d36856d57
commit
f8c8c3d397
@ -105,6 +105,8 @@ def paged_attention(
|
|||||||
kv_cache_dtype = "fp8_e4m3"
|
kv_cache_dtype = "fp8_e4m3"
|
||||||
if ATTENTION == "flashdecoding-ipex":
|
if ATTENTION == "flashdecoding-ipex":
|
||||||
window_size_right = -1 if window_size_left == -1 else 0
|
window_size_right = -1 if window_size_left == -1 else 0
|
||||||
|
if softcap is None:
|
||||||
|
softcap = -1.0
|
||||||
ipex.llm.modules.PagedAttention.flash_attn_varlen_func(
|
ipex.llm.modules.PagedAttention.flash_attn_varlen_func(
|
||||||
out,
|
out,
|
||||||
query.contiguous() if query.device.type == "xpu" else query,
|
query.contiguous() if query.device.type == "xpu" else query,
|
||||||
|
Loading…
Reference in New Issue
Block a user