diff --git a/server/text_generation_server/layers/attention/common.py b/server/text_generation_server/layers/attention/common.py index d6e512c01..c8ac0c2ab 100644 --- a/server/text_generation_server/layers/attention/common.py +++ b/server/text_generation_server/layers/attention/common.py @@ -68,5 +68,5 @@ else: def clamp(self, max): if SYSTEM == "rocm": return self - raise NotImplementedError("Not implemented seqlen for paged") - return Seqlen(torch.clamp(self.input_lengths, max=max)) + self.input_lengths = torch.clamp(self.input_lengths, max=max) + return self