From 519e5ac05b7604e9a3098687519a5c1e4192bfd2 Mon Sep 17 00:00:00 2001 From: drbh Date: Tue, 13 Aug 2024 00:56:15 +0000 Subject: [PATCH] fix: adds causal to attention params to check when using flash attn v1 --- server/text_generation_server/layers/attention/cuda.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/text_generation_server/layers/attention/cuda.py b/server/text_generation_server/layers/attention/cuda.py index d039e1e7..8703eb94 100644 --- a/server/text_generation_server/layers/attention/cuda.py +++ b/server/text_generation_server/layers/attention/cuda.py @@ -293,6 +293,7 @@ else: max_s, softmax_scale, window_size_left=-1, + causal=None, softcap=None, ): if window_size_left != -1: