fix: prefer patch to be vlm specific

2025-09-09 03:14:53 +00:00 · 2025-05-06 00:02:38 +00:00 · 2025-05-06 00:02:38 +00:00 · 783ca66926
commit 783ca66926
parent b32cd97b71
2 changed files with 9 additions and 9 deletions
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -1267,15 +1267,6 @@ class FlashCausalLM(Model):

        prefix = None
        model = model_class(prefix, config, weights)
-
-        if model.config.vocab_size != tokenizer.vocab_size:
-            logger.warning(
-                f"Tokenizer vocab size {tokenizer.vocab_size} does not match model vocab size {model.config.vocab_size}. Updating tokenizer vocab size."
-            )
-            # TODO: HUGE HACK! This is a workaround for the fact that Qwen2TokenizerFast
-            # returns the incorrect vocab size for the 2B model.
-            tokenizer._vocab_size = model.config.vocab_size
-
        torch.distributed.barrier(group=self.process_group)

        # VLM models define the config we care about in their text_config
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@ -414,6 +414,15 @@ class VlmCausalLM(FlashCausalLM):
            **kwargs,
        )

+        if self.config.vocab_size != self.tokenizer.vocab_size:
+            logger.warning(
+                f"Tokenizer vocab size {self.tokenizer.vocab_size} does not match model vocab size {self.config.vocab_size}. Updating tokenizer vocab size."
+            )
+            # TODO: HUGE HACK! This is a workaround to update the vocab size 
+            # in the tokenizer. When the tokenizer is updated within the model
+            # the vocab size is not updated in the tokenizer.
+            self.tokenizer._vocab_size = self.config.vocab_size
+
    @property
    def batch_type(self) -> Type[VlmCausalLMBatch]:
        return self.batch_class