From 26212b9f351cd107b994fd83ea570a9689e45f36 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 22 Apr 2025 02:03:34 +0530 Subject: [PATCH] fix inputs_embeds --- server/text_generation_server/models/vlm_causal_lm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py index ac46a717..d8c5103f 100644 --- a/server/text_generation_server/models/vlm_causal_lm.py +++ b/server/text_generation_server/models/vlm_causal_lm.py @@ -1188,7 +1188,7 @@ class VlmCausalLM(FlashCausalLM): # Copy inputs to the static inputs of the cuda graph # Static inputs are potentially padded cuda_graph["input_ids"][: input_ids.shape[0]] = input_ids - cuda_graph["input_embeds"][: inputs_embeds.shape[0]] = inputs_embeds + cuda_graph["inputs_embeds"][: inputs_embeds.shape[0]] = inputs_embeds cuda_graph["position_ids"][: position_ids.shape[0]] = position_ids if ATTENTION == "flashinfer": block_tables = block_tables_to_ragged(