diff --git a/server/text_generation_server/layers/attention/cuda.py b/server/text_generation_server/layers/attention/cuda.py index 7e42eddf..e74c9ba9 100644 --- a/server/text_generation_server/layers/attention/cuda.py +++ b/server/text_generation_server/layers/attention/cuda.py @@ -261,7 +261,6 @@ else: raise NotImplementedError( "window_size_left is only available with flash attn v2" ) - if softcap is not None: raise NotImplementedError("softcap is only available with flash attn v2")