diff --git a/server/text_generation_server/layers/attention/ipex.py b/server/text_generation_server/layers/attention/ipex.py index 479d6566..6ca02afe 100644 --- a/server/text_generation_server/layers/attention/ipex.py +++ b/server/text_generation_server/layers/attention/ipex.py @@ -143,6 +143,8 @@ def paged_attention( BLOCK_SIZE, max_s, None, + k_scale=kv_scales.key_scale_cpu, + v_scale=kv_scales.value_scale_cpu, ) return out