Update server/text_generation_server/models/flash_causal_lm.py

Co-authored-by: Daniël de Kok <me@github.danieldk.eu>
2025-09-10 03:44:54 +00:00 · 2025-01-14 08:58:48 +08:00 · 2025-01-14 08:58:48 +08:00 · 22ed5703de
commit 22ed5703de
parent 5ad8c9a40b
1 changed files with 1 additions and 5 deletions
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -1595,11 +1595,7 @@ class FlashCausalLM(Model):
        if max_total_tokens is None:
            if get_support_chunking():
                model_max_length = self.tokenizer.model_max_length
-                max_position_embeddings = (
+                max_position_embeddings = getattr(self.config, "max_position_embeddings", model_max_length)
                    self.config.max_position_embeddings
                    if hasattr(self.config, "max_position_embeddings")
                    else model_max_length
                )
                max_total_tokens = min(
                    num_blocks * BLOCK_SIZE, model_max_length, max_position_embeddings
                )