WhaT?

2025-09-11 12:24:53 +00:00 · 2024-04-26 11:24:44 +02:00 · 2024-04-26 11:24:44 +02:00 · 66b2015586
commit 66b2015586
parent ee47973a2f
1 changed files with 12 additions and 27 deletions
--- a/server/text_generation_server/models/flash_mistral.py
+++ b/server/text_generation_server/models/flash_mistral.py
@ -511,21 +511,6 @@ class BaseFlashMistral(FlashCausalLM):
        cuda_graph = self.cuda_graphs.get(padded_bs, None)

        if cu_seqlen_prefill is not None or cuda_graph is None:
-
-            if cu_seqlen_prefill is None:
-                logits, speculative_logits = self.compiled_model(
-                    input_ids=input_ids,
-                    position_ids=position_ids,
-                    cu_seqlen_prefill=cu_seqlen_prefill,
-                    kv_cache=kv_cache,
-                    block_tables=block_tables,
-                    slots=slots,
-                    input_lengths=input_lengths,
-                    max_s=max_s,
-                    prefill_cache_indices=batch.prefill_cache_indices,
-                    lm_head_indices=lm_head_indices,
-                )
-            else:
            logits, speculative_logits = self.model.forward(
                input_ids=input_ids,
                position_ids=position_ids,