diff --git a/server/text_generation_server/layers/attention/ipex.py b/server/text_generation_server/layers/attention/ipex.py index 54422308..2b89060e 100644 --- a/server/text_generation_server/layers/attention/ipex.py +++ b/server/text_generation_server/layers/attention/ipex.py @@ -78,6 +78,7 @@ def paged_attention( *, kv_scales: KVScales, softcap: Optional[float] = None, + window_size_left: Optional[int] = -1, ): if softcap is not None: raise NotImplementedError("softcap is not available in IPEX") diff --git a/server/text_generation_server/layers/attention/rocm.py b/server/text_generation_server/layers/attention/rocm.py index 65f3ea41..682aade2 100644 --- a/server/text_generation_server/layers/attention/rocm.py +++ b/server/text_generation_server/layers/attention/rocm.py @@ -59,6 +59,7 @@ def paged_attention( *, kv_scales: KVScales, softcap: Optional[float] = None, + window_size_left: Optional[int] = -1, ): # Adapted from: https://github.com/vllm-project/vllm/blob/f8a1e39fae05ca610be8d5a78be9d40f5274e5fc/vllm/model_executor/layers/attention.py # Copyright 2023 The vLLM team. All rights