From 8a3449def9b99b3b8193fd89054424a2526cf340 Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Thu, 28 Sep 2023 09:24:32 +0200 Subject: [PATCH] fix default window size --- server/text_generation_server/utils/flash_attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/flash_attn.py b/server/text_generation_server/utils/flash_attn.py index bde0aa76..caf072b7 100644 --- a/server/text_generation_server/utils/flash_attn.py +++ b/server/text_generation_server/utils/flash_attn.py @@ -57,7 +57,7 @@ def attention( cu_seqlens, max_s, softmax_scale, - window_size_left=0, + window_size_left=-1, ): if HAS_FLASH_ATTN_V2: return flash_attn_2_cuda.varlen_fwd(