Update server/text_generation_server/models/flash_causal_lm.py

Co-authored-by: Daniël de Kok <me@github.danieldk.eu>
This commit is contained in:
Wang, Yi 2025-01-14 08:58:48 +08:00 committed by GitHub
parent 5ad8c9a40b
commit 22ed5703de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1595,11 +1595,7 @@ class FlashCausalLM(Model):
if max_total_tokens is None: if max_total_tokens is None:
if get_support_chunking(): if get_support_chunking():
model_max_length = self.tokenizer.model_max_length model_max_length = self.tokenizer.model_max_length
max_position_embeddings = ( max_position_embeddings = getattr(self.config, "max_position_embeddings", model_max_length)
self.config.max_position_embeddings
if hasattr(self.config, "max_position_embeddings")
else model_max_length
)
max_total_tokens = min( max_total_tokens = min(
num_blocks * BLOCK_SIZE, model_max_length, max_position_embeddings num_blocks * BLOCK_SIZE, model_max_length, max_position_embeddings
) )