fix in regression in ipex flashattention

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
Wang, Yi A 2024-08-06 22:00:36 -07:00
parent 30e70f2ceb
commit 6abcab843d

View File

@ -21,7 +21,7 @@ def attention(
out = torch.empty_like(q) out = torch.empty_like(q)
# We do not need to check window_size_left (not supported) here, so it is already checked ahead of time at model load. # We do not need to check window_size_left (not supported) here, so it is already checked ahead of time at model load.
return ipex.llm.functional.varlen_attention( ipex.llm.functional.varlen_attention(
q, q,
k, k,
v, v,
@ -38,6 +38,8 @@ def attention(
None, None,
) )
return out
def reshape_and_cache( def reshape_and_cache(
key: torch.Tensor, key: torch.Tensor,