From ad15a9c0afbb3a7ead90425161836c7c2577f143 Mon Sep 17 00:00:00 2001 From: "Wang, Yi A" Date: Mon, 7 Apr 2025 22:47:11 -0700 Subject: [PATCH] kv scale in pageattn Signed-off-by: Wang, Yi A --- server/text_generation_server/layers/attention/ipex.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/text_generation_server/layers/attention/ipex.py b/server/text_generation_server/layers/attention/ipex.py index 479d6566..6ca02afe 100644 --- a/server/text_generation_server/layers/attention/ipex.py +++ b/server/text_generation_server/layers/attention/ipex.py @@ -143,6 +143,8 @@ def paged_attention( BLOCK_SIZE, max_s, None, + k_scale=kv_scales.key_scale_cpu, + v_scale=kv_scales.value_scale_cpu, ) return out