From b30cdabf68fdbe36bea5de3ae12485e045aa2698 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 14 Mar 2025 07:47:45 +0000 Subject: [PATCH] Add window_size_left param ipex rocm --- server/text_generation_server/layers/attention/ipex.py | 1 + server/text_generation_server/layers/attention/rocm.py | 1 + 2 files changed, 2 insertions(+) diff --git a/server/text_generation_server/layers/attention/ipex.py b/server/text_generation_server/layers/attention/ipex.py index 54422308..2b89060e 100644 --- a/server/text_generation_server/layers/attention/ipex.py +++ b/server/text_generation_server/layers/attention/ipex.py @@ -78,6 +78,7 @@ def paged_attention( *, kv_scales: KVScales, softcap: Optional[float] = None, + window_size_left: Optional[int] = -1, ): if softcap is not None: raise NotImplementedError("softcap is not available in IPEX") diff --git a/server/text_generation_server/layers/attention/rocm.py b/server/text_generation_server/layers/attention/rocm.py index 65f3ea41..682aade2 100644 --- a/server/text_generation_server/layers/attention/rocm.py +++ b/server/text_generation_server/layers/attention/rocm.py @@ -59,6 +59,7 @@ def paged_attention( *, kv_scales: KVScales, softcap: Optional[float] = None, + window_size_left: Optional[int] = -1, ): # Adapted from: https://github.com/vllm-project/vllm/blob/f8a1e39fae05ca610be8d5a78be9d40f5274e5fc/vllm/model_executor/layers/attention.py # Copyright 2023 The vLLM team. All rights