From 76d526d9314293756c3ef793b99d90ce2f82ef40 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Wed, 5 Feb 2025 02:27:29 +0000
Subject: [PATCH] feat: check before rope type adjustment and small refactors

---
 launcher/src/main.rs                                      | 2 +-
 server/text_generation_server/layers/rotary.py            | 1 -
 .../models/custom_modeling/qwen2_5_vl.py                  | 8 ++++++--
 .../models/custom_modeling/qwen2_vl.py                    | 8 ++++++--
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 3c9ee850..fbbe8a2d 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -2053,7 +2053,7 @@ fn main() -> Result<(), LauncherError> {
                 // this is a short term temporary fix to enable vlms to avoid rejecting images
                 let default_optimal = match config {
                     Some(ref config) => match config.model_type.as_deref() {
-                        Some("qwen2_vl") => 10_000,
+                        Some("qwen2_vl") | Some("qwen2_5_vl") => 10_000,
                         _ => 4096,
                     },
                     None => 4096,
diff --git a/server/text_generation_server/layers/rotary.py b/server/text_generation_server/layers/rotary.py
index f38f6859..1cee08a0 100644
--- a/server/text_generation_server/layers/rotary.py
+++ b/server/text_generation_server/layers/rotary.py
@@ -86,7 +86,6 @@ class PositionRotaryEmbedding(nn.Module):
             # `rope_type` is now standard in transformers, but some existing models
             # have `type` instead.
             rope_type = rope_scaling.get("rope_type", rope_scaling.get("type", None))
-            mrope_section = rope_scaling.get("mrope_section", None)
 
             if rope_type == "linear":
                 pass
diff --git a/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py b/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py
index ad2f6039..11ec308c 100644
--- a/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py
+++ b/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py
@@ -475,7 +475,12 @@ class Qwen2_5VLForConditionalGeneration(nn.Module):
         config.vision_config.speculator = config.speculator
         # set rope_scaling.type == "mrope" since AutoConfig.from_pretrained incorrectly
         # returns rope_scaling.type == "default" for Qwen2_5-VL model at the moment
-        config.rope_scaling.update({"rope_type": "mrope"})
+        if (
+            hasattr(config, "rope_scaling")
+            and config.rope_scaling is not None
+            and config.rope_scaling.get("type", None) == "default"
+        ):
+            config.rope_scaling.update({"rope_type": "mrope"})
         self.hidden_size = config.hidden_size
         self.vision_start_token_id = config.vision_start_token_id
         self.vision_end_token_id = config.vision_end_token_id
@@ -616,7 +621,6 @@ class Qwen2_5VLForConditionalGeneration(nn.Module):
 
         # apply the visual model to the pixel values if they are provided
         if pixel_values is not None and len(pixel_values) > 0:
-            pixel_values = pixel_values.to(inputs_embeds.dtype)
             if pixel_values is not None:
                 image_embeds = self.visual(
                     pixel_values, grid_thw=image_grid_thw
diff --git a/server/text_generation_server/models/custom_modeling/qwen2_vl.py b/server/text_generation_server/models/custom_modeling/qwen2_vl.py
index 2d017e38..a72e0e55 100644
--- a/server/text_generation_server/models/custom_modeling/qwen2_vl.py
+++ b/server/text_generation_server/models/custom_modeling/qwen2_vl.py
@@ -379,7 +379,12 @@ class Qwen2VLForConditionalGeneration(nn.Module):
         config.vision_config.speculator = config.speculator
         # set rope_scaling.type == "mrope" since AutoConfig.from_pretrained incorrectly
         # returns rope_scaling.type == "default" for Qwen2-VL model at the moment
-        config.rope_scaling.update({"rope_type": "mrope"})
+        if (
+            hasattr(config, "rope_scaling")
+            and config.rope_scaling is not None
+            and config.rope_scaling.get("type", None) == "default"
+        ):
+            config.rope_scaling.update({"rope_type": "mrope"})
         self.hidden_size = config.hidden_size
         self.vision_start_token_id = config.vision_start_token_id
         self.vision_end_token_id = config.vision_end_token_id
@@ -520,7 +525,6 @@ class Qwen2VLForConditionalGeneration(nn.Module):
 
         # apply the visual model to the pixel values if they are provided
         if pixel_values is not None and len(pixel_values) > 0:
-            pixel_values = pixel_values.to(inputs_embeds.dtype)
             if pixel_values is not None:
                 image_embeds = self.visual(
                     pixel_values, grid_thw=image_grid_thw