mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
feat: check before rope type adjustment and small refactors
This commit is contained in:
parent
1f585775b8
commit
76d526d931
@ -2053,7 +2053,7 @@ fn main() -> Result<(), LauncherError> {
|
|||||||
// this is a short term temporary fix to enable vlms to avoid rejecting images
|
// this is a short term temporary fix to enable vlms to avoid rejecting images
|
||||||
let default_optimal = match config {
|
let default_optimal = match config {
|
||||||
Some(ref config) => match config.model_type.as_deref() {
|
Some(ref config) => match config.model_type.as_deref() {
|
||||||
Some("qwen2_vl") => 10_000,
|
Some("qwen2_vl") | Some("qwen2_5_vl") => 10_000,
|
||||||
_ => 4096,
|
_ => 4096,
|
||||||
},
|
},
|
||||||
None => 4096,
|
None => 4096,
|
||||||
|
@ -86,7 +86,6 @@ class PositionRotaryEmbedding(nn.Module):
|
|||||||
# `rope_type` is now standard in transformers, but some existing models
|
# `rope_type` is now standard in transformers, but some existing models
|
||||||
# have `type` instead.
|
# have `type` instead.
|
||||||
rope_type = rope_scaling.get("rope_type", rope_scaling.get("type", None))
|
rope_type = rope_scaling.get("rope_type", rope_scaling.get("type", None))
|
||||||
mrope_section = rope_scaling.get("mrope_section", None)
|
|
||||||
|
|
||||||
if rope_type == "linear":
|
if rope_type == "linear":
|
||||||
pass
|
pass
|
||||||
|
@ -475,7 +475,12 @@ class Qwen2_5VLForConditionalGeneration(nn.Module):
|
|||||||
config.vision_config.speculator = config.speculator
|
config.vision_config.speculator = config.speculator
|
||||||
# set rope_scaling.type == "mrope" since AutoConfig.from_pretrained incorrectly
|
# set rope_scaling.type == "mrope" since AutoConfig.from_pretrained incorrectly
|
||||||
# returns rope_scaling.type == "default" for Qwen2_5-VL model at the moment
|
# returns rope_scaling.type == "default" for Qwen2_5-VL model at the moment
|
||||||
config.rope_scaling.update({"rope_type": "mrope"})
|
if (
|
||||||
|
hasattr(config, "rope_scaling")
|
||||||
|
and config.rope_scaling is not None
|
||||||
|
and config.rope_scaling.get("type", None) == "default"
|
||||||
|
):
|
||||||
|
config.rope_scaling.update({"rope_type": "mrope"})
|
||||||
self.hidden_size = config.hidden_size
|
self.hidden_size = config.hidden_size
|
||||||
self.vision_start_token_id = config.vision_start_token_id
|
self.vision_start_token_id = config.vision_start_token_id
|
||||||
self.vision_end_token_id = config.vision_end_token_id
|
self.vision_end_token_id = config.vision_end_token_id
|
||||||
@ -616,7 +621,6 @@ class Qwen2_5VLForConditionalGeneration(nn.Module):
|
|||||||
|
|
||||||
# apply the visual model to the pixel values if they are provided
|
# apply the visual model to the pixel values if they are provided
|
||||||
if pixel_values is not None and len(pixel_values) > 0:
|
if pixel_values is not None and len(pixel_values) > 0:
|
||||||
pixel_values = pixel_values.to(inputs_embeds.dtype)
|
|
||||||
if pixel_values is not None:
|
if pixel_values is not None:
|
||||||
image_embeds = self.visual(
|
image_embeds = self.visual(
|
||||||
pixel_values, grid_thw=image_grid_thw
|
pixel_values, grid_thw=image_grid_thw
|
||||||
|
@ -379,7 +379,12 @@ class Qwen2VLForConditionalGeneration(nn.Module):
|
|||||||
config.vision_config.speculator = config.speculator
|
config.vision_config.speculator = config.speculator
|
||||||
# set rope_scaling.type == "mrope" since AutoConfig.from_pretrained incorrectly
|
# set rope_scaling.type == "mrope" since AutoConfig.from_pretrained incorrectly
|
||||||
# returns rope_scaling.type == "default" for Qwen2-VL model at the moment
|
# returns rope_scaling.type == "default" for Qwen2-VL model at the moment
|
||||||
config.rope_scaling.update({"rope_type": "mrope"})
|
if (
|
||||||
|
hasattr(config, "rope_scaling")
|
||||||
|
and config.rope_scaling is not None
|
||||||
|
and config.rope_scaling.get("type", None) == "default"
|
||||||
|
):
|
||||||
|
config.rope_scaling.update({"rope_type": "mrope"})
|
||||||
self.hidden_size = config.hidden_size
|
self.hidden_size = config.hidden_size
|
||||||
self.vision_start_token_id = config.vision_start_token_id
|
self.vision_start_token_id = config.vision_start_token_id
|
||||||
self.vision_end_token_id = config.vision_end_token_id
|
self.vision_end_token_id = config.vision_end_token_id
|
||||||
@ -520,7 +525,6 @@ class Qwen2VLForConditionalGeneration(nn.Module):
|
|||||||
|
|
||||||
# apply the visual model to the pixel values if they are provided
|
# apply the visual model to the pixel values if they are provided
|
||||||
if pixel_values is not None and len(pixel_values) > 0:
|
if pixel_values is not None and len(pixel_values) > 0:
|
||||||
pixel_values = pixel_values.to(inputs_embeds.dtype)
|
|
||||||
if pixel_values is not None:
|
if pixel_values is not None:
|
||||||
image_embeds = self.visual(
|
image_embeds = self.visual(
|
||||||
pixel_values, grid_thw=image_grid_thw
|
pixel_values, grid_thw=image_grid_thw
|
||||||
|
Loading…
Reference in New Issue
Block a user