From 76d526d9314293756c3ef793b99d90ce2f82ef40 Mon Sep 17 00:00:00 2001 From: drbh Date: Wed, 5 Feb 2025 02:27:29 +0000 Subject: [PATCH] feat: check before rope type adjustment and small refactors --- launcher/src/main.rs | 2 +- server/text_generation_server/layers/rotary.py | 1 - .../models/custom_modeling/qwen2_5_vl.py | 8 ++++++-- .../models/custom_modeling/qwen2_vl.py | 8 ++++++-- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 3c9ee850..fbbe8a2d 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -2053,7 +2053,7 @@ fn main() -> Result<(), LauncherError> { // this is a short term temporary fix to enable vlms to avoid rejecting images let default_optimal = match config { Some(ref config) => match config.model_type.as_deref() { - Some("qwen2_vl") => 10_000, + Some("qwen2_vl") | Some("qwen2_5_vl") => 10_000, _ => 4096, }, None => 4096, diff --git a/server/text_generation_server/layers/rotary.py b/server/text_generation_server/layers/rotary.py index f38f6859..1cee08a0 100644 --- a/server/text_generation_server/layers/rotary.py +++ b/server/text_generation_server/layers/rotary.py @@ -86,7 +86,6 @@ class PositionRotaryEmbedding(nn.Module): # `rope_type` is now standard in transformers, but some existing models # have `type` instead. rope_type = rope_scaling.get("rope_type", rope_scaling.get("type", None)) - mrope_section = rope_scaling.get("mrope_section", None) if rope_type == "linear": pass diff --git a/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py b/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py index ad2f6039..11ec308c 100644 --- a/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py +++ b/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py @@ -475,7 +475,12 @@ class Qwen2_5VLForConditionalGeneration(nn.Module): config.vision_config.speculator = config.speculator # set rope_scaling.type == "mrope" since AutoConfig.from_pretrained incorrectly # returns rope_scaling.type == "default" for Qwen2_5-VL model at the moment - config.rope_scaling.update({"rope_type": "mrope"}) + if ( + hasattr(config, "rope_scaling") + and config.rope_scaling is not None + and config.rope_scaling.get("type", None) == "default" + ): + config.rope_scaling.update({"rope_type": "mrope"}) self.hidden_size = config.hidden_size self.vision_start_token_id = config.vision_start_token_id self.vision_end_token_id = config.vision_end_token_id @@ -616,7 +621,6 @@ class Qwen2_5VLForConditionalGeneration(nn.Module): # apply the visual model to the pixel values if they are provided if pixel_values is not None and len(pixel_values) > 0: - pixel_values = pixel_values.to(inputs_embeds.dtype) if pixel_values is not None: image_embeds = self.visual( pixel_values, grid_thw=image_grid_thw diff --git a/server/text_generation_server/models/custom_modeling/qwen2_vl.py b/server/text_generation_server/models/custom_modeling/qwen2_vl.py index 2d017e38..a72e0e55 100644 --- a/server/text_generation_server/models/custom_modeling/qwen2_vl.py +++ b/server/text_generation_server/models/custom_modeling/qwen2_vl.py @@ -379,7 +379,12 @@ class Qwen2VLForConditionalGeneration(nn.Module): config.vision_config.speculator = config.speculator # set rope_scaling.type == "mrope" since AutoConfig.from_pretrained incorrectly # returns rope_scaling.type == "default" for Qwen2-VL model at the moment - config.rope_scaling.update({"rope_type": "mrope"}) + if ( + hasattr(config, "rope_scaling") + and config.rope_scaling is not None + and config.rope_scaling.get("type", None) == "default" + ): + config.rope_scaling.update({"rope_type": "mrope"}) self.hidden_size = config.hidden_size self.vision_start_token_id = config.vision_start_token_id self.vision_end_token_id = config.vision_end_token_id @@ -520,7 +525,6 @@ class Qwen2VLForConditionalGeneration(nn.Module): # apply the visual model to the pixel values if they are provided if pixel_values is not None and len(pixel_values) > 0: - pixel_values = pixel_values.to(inputs_embeds.dtype) if pixel_values is not None: image_embeds = self.visual( pixel_values, grid_thw=image_grid_thw