From 05464d26bf8a4bd264e64992ba2cdbb4a7b235fe Mon Sep 17 00:00:00 2001
From: Miquel Farre <miquel.farre@huggingface.co>
Date: Thu, 14 Nov 2024 10:48:18 +0000
Subject: [PATCH] connecting video to qwen2

---
 server/text_generation_server/models/vlm_causal_lm.py | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py
index 81b4369b..be5843c7 100644
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@@ -212,6 +212,9 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
                         processor, image_inputs, config, image_id
                     )
                     image_id += 1
+                elif chunk_type == "video" and config.model_type == "qwen2_vl":
+                    # Based on Qwen2VL's video token format
+                    full_text += f"<video>{chunk.video}</video>"
 
             full_text = image_text_replacement_fixup(config, full_text)