From 891b18a0c3426e6ac5ec0e8da32229a3b80d8cc7 Mon Sep 17 00:00:00 2001 From: Peter Lowrance <46451172+peterlowrance@users.noreply.github.com> Date: Mon, 2 Oct 2023 11:46:04 -0400 Subject: [PATCH] Fix window_size_left for flash attention v1 This fixes flash attention v1 by not raising NotImplementedError when window_size_left is left at its default of -1 --- server/text_generation_server/utils/flash_attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/flash_attn.py b/server/text_generation_server/utils/flash_attn.py index caf072b7..8f0fcee6 100644 --- a/server/text_generation_server/utils/flash_attn.py +++ b/server/text_generation_server/utils/flash_attn.py @@ -80,7 +80,7 @@ def attention( ) if HAS_FLASH_ATTN: - if window_size_left != 0: + if window_size_left != -1: raise NotImplementedError( "window_size_left is only available with flash attn v2" )