From 3c8d1f4b2fd9508c7caf7bce42d1b12a12ec10f8 Mon Sep 17 00:00:00 2001 From: David Holtz Date: Fri, 1 Nov 2024 21:11:15 +0000 Subject: [PATCH] fix: prefer repeat over expand to avoid clone --- .../text_generation_server/models/custom_modeling/qwen2_vl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/models/custom_modeling/qwen2_vl.py b/server/text_generation_server/models/custom_modeling/qwen2_vl.py index 085b70d6..73325c88 100644 --- a/server/text_generation_server/models/custom_modeling/qwen2_vl.py +++ b/server/text_generation_server/models/custom_modeling/qwen2_vl.py @@ -472,7 +472,7 @@ class Qwen2VLForConditionalGeneration(nn.Module): position_ids = ( torch.arange(batch_input_ids.shape[1], device=batch_input_ids.device) .view(1, 1, -1) - .expand(3, batch_input_ids.shape[0], -1) + .repeat(3, batch_input_ids.shape[0], 1) ) return position_ids