Update mistral past.

2025-09-12 04:44:52 +00:00 · 2024-07-01 13:19:26 +00:00 · 2024-07-01 13:19:26 +00:00 · 1bd52157d8
commit 1bd52157d8
parent 8fa8cda660
2 changed files with 6 additions and 2 deletions
--- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
@ -512,7 +512,9 @@ class FlashMistralForCausalLM(torch.nn.Module):
        elif self.max_past is not None:
            # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
            # kernel requires the true values
-            input_lengths = torch.clamp(input_lengths, max=self.max_past_tensor)
+            input_lengths.input_lengths = torch.clamp(
                input_lengths.input_lengths, max=self.max_past_tensor
            )
        inputs_embeds = self.embed_tokens(input_ids)
        hidden_states = self.model(
--- a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
@ -647,7 +647,9 @@ class FlashMixtralForCausalLM(torch.nn.Module):
        elif self.max_past is not None:
            # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
            # kernel requires the true values
-            input_lengths = torch.clamp(input_lengths, max=self.max_past_tensor)
+            input_lengths.input_lengths = torch.clamp(
                input_lengths.input_lengths, max=self.max_past_tensor
            )
        hidden_states = self.model(
            input_ids,