mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-27 21:12:07 +00:00
Fix runtime error when Qwen2-VL was prompted with multiple images
Fix runtime error when Qwen2-VL model is prompted with prompt with more than one image. The runtime error was: File "text-generation-inference/server/text_generation_server/models/custom_modeling/qwen2_vl.py", line 459, in get_position_ids text_pos_ids = torch.arange(text_length, device=d) RuntimeError: upper bound and larger bound inconsistent with step sign The error was caused by text_length variable going to negative value when multiple images caused multiple loops in the get_position_ids function's main loop. The error is a simple logic mistake where next_image_pos is initialized as relative offset from current_pos, but was used like it was absolute position from zero.
This commit is contained in:
parent
a72f339c79
commit
fca2218fa9
@ -450,7 +450,7 @@ class Qwen2VLForConditionalGeneration(nn.Module):
|
||||
width //= self.spatial_merge_size
|
||||
|
||||
# calculate the length of the text and image tokens
|
||||
text_length = next_image_pos - current_pos
|
||||
text_length = next_image_pos
|
||||
start_idx = (
|
||||
llm_pos_ids_list[-1].max() + 1 if llm_pos_ids_list else 0
|
||||
)
|
||||
@ -480,7 +480,7 @@ class Qwen2VLForConditionalGeneration(nn.Module):
|
||||
)
|
||||
llm_pos_ids_list.append(image_pos_ids)
|
||||
|
||||
current_pos = next_image_pos + time_steps * height * width
|
||||
current_pos += next_image_pos + time_steps * height * width
|
||||
image_index += 1
|
||||
|
||||
if current_pos < batch_input_ids.size(1):
|
||||
|
Loading…
Reference in New Issue
Block a user