mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 15:32:08 +00:00
kv scale in pageattn
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
d9e47b651c
commit
ad15a9c0af
@ -143,6 +143,8 @@ def paged_attention(
|
||||
BLOCK_SIZE,
|
||||
max_s,
|
||||
None,
|
||||
k_scale=kv_scales.key_scale_cpu,
|
||||
v_scale=kv_scales.value_scale_cpu,
|
||||
)
|
||||
return out
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user