diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py index 81b4369b..be5843c7 100644 --- a/server/text_generation_server/models/vlm_causal_lm.py +++ b/server/text_generation_server/models/vlm_causal_lm.py @@ -212,6 +212,9 @@ class VlmCausalLMBatch(FlashCausalLMBatch): processor, image_inputs, config, image_id ) image_id += 1 + elif chunk_type == "video" and config.model_type == "qwen2_vl": + # Based on Qwen2VL's video token format + full_text += f"" full_text = image_text_replacement_fixup(config, full_text)