fix: prefer patch to be vlm specific

2025-09-10 03:44:54 +00:00 · 2025-05-06 00:02:38 +00:00 · 2025-05-06 00:02:38 +00:00 · 783ca66926
commit 783ca66926
parent b32cd97b71
2 changed files with 9 additions and 9 deletions
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -1267,15 +1267,6 @@ class FlashCausalLM(Model):
        prefix = None
        model = model_class(prefix, config, weights)
        if model.config.vocab_size != tokenizer.vocab_size:
            logger.warning(
                f"Tokenizer vocab size {tokenizer.vocab_size} does not match model vocab size {model.config.vocab_size}. Updating tokenizer vocab size."
            )
            # TODO: HUGE HACK! This is a workaround for the fact that Qwen2TokenizerFast
            # returns the incorrect vocab size for the 2B model.
            tokenizer._vocab_size = model.config.vocab_size
        torch.distributed.barrier(group=self.process_group)
        # VLM models define the config we care about in their text_config
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@ -414,6 +414,15 @@ class VlmCausalLM(FlashCausalLM):
            **kwargs,
        )
        if self.config.vocab_size != self.tokenizer.vocab_size:
            logger.warning(
                f"Tokenizer vocab size {self.tokenizer.vocab_size} does not match model vocab size {self.config.vocab_size}. Updating tokenizer vocab size."
            )
            # TODO: HUGE HACK! This is a workaround to update the vocab size 
            # in the tokenizer. When the tokenizer is updated within the model
            # the vocab size is not updated in the tokenizer.
            self.tokenizer._vocab_size = self.config.vocab_size
    @property
    def batch_type(self) -> Type[VlmCausalLMBatch]:
        return self.batch_class