No need to recreate anything actually.

2025-09-12 04:44:52 +00:00 · 2024-07-01 16:37:36 +00:00 · 2024-07-01 16:37:36 +00:00 · 1c7c21d596
commit 1c7c21d596
parent ef8bce0b41
1 changed files with 2 additions and 1 deletions
--- a/server/text_generation_server/layers/attention/common.py
+++ b/server/text_generation_server/layers/attention/common.py
@ -31,7 +31,8 @@ if FLASH_DECODING:
            self.cu_seqlen_k = cu_seqlen_k
        def clamp(self, max):
-            return Seqlen(torch.clamp(self.input_lengths, max=max))
+            # Flash decoding doesn't need to clamp
            return self
 else: