From 26212b9f351cd107b994fd83ea570a9689e45f36 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Tue, 22 Apr 2025 02:03:34 +0530
Subject: [PATCH] fix inputs_embeds

---
 server/text_generation_server/models/vlm_causal_lm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py
index ac46a717..d8c5103f 100644
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@@ -1188,7 +1188,7 @@ class VlmCausalLM(FlashCausalLM):
         # Copy inputs to the static inputs of the cuda graph
         # Static inputs are potentially padded
         cuda_graph["input_ids"][: input_ids.shape[0]] = input_ids
-        cuda_graph["input_embeds"][: inputs_embeds.shape[0]] = inputs_embeds
+        cuda_graph["inputs_embeds"][: inputs_embeds.shape[0]] = inputs_embeds
         cuda_graph["position_ids"][: position_ids.shape[0]] = position_ids
         if ATTENTION == "flashinfer":
             block_tables = block_tables_to_ragged(