From 77eb07f73b21a785ba7b4b8f31c725ed01656d4d Mon Sep 17 00:00:00 2001
From: David Holtz <david.richard.holtz@gmail.com>
Date: Tue, 29 Oct 2024 15:47:32 +0000
Subject: [PATCH] fix: adjust resize case for qwen2_vl warmup

---
 .../text_generation_server/models/vlm_causal_lm.py  | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py
index df2c2a2c..9a3db502 100644
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@@ -177,6 +177,14 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
                     pass
                 elif chunk_type == "image":
                     image = Image.open(BytesIO(chunk.image.data))
+                    # qwen2_vl expects images to be greater than 20 pixels, this is for warmup since the
+                    # default warmup image is 20x20
+                    if config.model_type == "qwen2_vl":
+                        if image.width <= 20:
+                            w = image.width * 2
+                            h = image.height * 2
+                            image = image.resize((w, h))
+
                     if config.model_type == "llava_next":
                         images.append(image)
                     else:
@@ -185,11 +193,6 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
                     raise RuntimeError(f"Invalid chunk type {chunk_type}")
 
         if images:
-            if images[0][0].width <= 20:
-                # TODO: provide a better way to handle the issue of the prefill image being too small
-                images = images[0][0].resize(
-                    (images[0][0].width * 2, images[0][0].height * 2)
-                )
             image_inputs = processor.image_processor(images, return_tensors="pt")
         else:
             image_inputs = None