From b9c8152ac68c10cb9687dbfa3952399a90af5789 Mon Sep 17 00:00:00 2001
From: Miquel Farre <miquel.farre@huggingface.co>
Date: Thu, 14 Nov 2024 11:36:11 +0000
Subject: [PATCH] downloading videos

---
 .../models/vlm_causal_lm.py                   | 30 +++++++++++++++++--
 1 file changed, 27 insertions(+), 3 deletions(-)
diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py
index 14877e1a..c3b184a3 100644
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@@ -1,7 +1,9 @@
 import torch
+import requests
 from PIL import Image
 from io import BytesIO
 
+from contextlib import contextmanager
 from opentelemetry import trace
 from typing import Iterable, Optional, Tuple, List, Type, Dict
 
@@ -218,8 +220,10 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
                     )
                     image_id += 1
                 elif chunk_type == "video" and config.model_type == "qwen2_vl":
-                    # Based on Qwen2VL's video token format
-                    full_text += f"<video>{chunk.video}</video>"
+                    # Download and process video in a temporary context
+                    with cls.temp_video_download(chunk.video) as local_path:
+                        # Now the video is available at local_path for processing
+                        full_text += f"<video>{local_path}</video>"
 
             full_text = image_text_replacement_fixup(config, full_text)
 
@@ -271,7 +275,27 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
             batch.image_sizes = None
             batch.image_grid_thw = None
         return batch
-
+    
+    @staticmethod
+    @contextmanager
+    def temp_video_download(url: str) -> str:
+        """Downloads video to a temporary file and cleans it up after use."""
+        with tempfile.NamedTemporaryFile(suffix=os.path.splitext(url)[1], delete=False) as tmp_file:
+            try:
+                # Download video
+                with requests.get(url, stream=True) as r:
+                    r.raise_for_status()
+                    for chunk in r.iter_content(chunk_size=8192):
+                        if chunk:
+                            tmp_file.write(chunk)
+                tmp_file.flush()
+                yield tmp_file.name
+            finally:
+                # Clean up temp file
+                try:
+                    os.unlink(tmp_file.name)
+                except OSError:
+                    pass
 
 class VlmCausalLM(FlashCausalLM):
     def __init__(