mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Fix mistral with length > window_size for long prefills (rotary doesn't
create long enough cos, sin).
This commit is contained in:
parent
4139054b82
commit
2804a74276
@ -460,8 +460,8 @@ class BaseFlashMistral(FlashCausalLM):
|
|||||||
max_s = batch.max_seqlen
|
max_s = batch.max_seqlen
|
||||||
lm_head_indices = batch.prefill_head_indices
|
lm_head_indices = batch.prefill_head_indices
|
||||||
|
|
||||||
if self.model.max_past is not None:
|
# if self.model.max_past is not None:
|
||||||
max_s = min(self.model.max_past, max_s)
|
# max_s = min(self.model.max_past, max_s)
|
||||||
|
|
||||||
bs = input_ids.shape[0]
|
bs = input_ids.shape[0]
|
||||||
padded_bs = bs
|
padded_bs = bs
|
||||||
|
Loading…
Reference in New Issue
Block a user