mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 03:14:53 +00:00
fix inputs_embeds
This commit is contained in:
parent
f34b06ca3b
commit
26212b9f35
@ -1188,7 +1188,7 @@ class VlmCausalLM(FlashCausalLM):
|
||||
# Copy inputs to the static inputs of the cuda graph
|
||||
# Static inputs are potentially padded
|
||||
cuda_graph["input_ids"][: input_ids.shape[0]] = input_ids
|
||||
cuda_graph["input_embeds"][: inputs_embeds.shape[0]] = inputs_embeds
|
||||
cuda_graph["inputs_embeds"][: inputs_embeds.shape[0]] = inputs_embeds
|
||||
cuda_graph["position_ids"][: position_ids.shape[0]] = position_ids
|
||||
if ATTENTION == "flashinfer":
|
||||
block_tables = block_tables_to_ragged(
|
||||
|
Loading…
Reference in New Issue
Block a user