mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-23 16:02:10 +00:00
kv scale in pageattn
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
d9e47b651c
commit
ad15a9c0af
@ -143,6 +143,8 @@ def paged_attention(
|
|||||||
BLOCK_SIZE,
|
BLOCK_SIZE,
|
||||||
max_s,
|
max_s,
|
||||||
None,
|
None,
|
||||||
|
k_scale=kv_scales.key_scale_cpu,
|
||||||
|
v_scale=kv_scales.value_scale_cpu,
|
||||||
)
|
)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user