mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 11:24:53 +00:00
fix inputs_embeds
This commit is contained in:
parent
f34b06ca3b
commit
26212b9f35
@ -1188,7 +1188,7 @@ class VlmCausalLM(FlashCausalLM):
|
|||||||
# Copy inputs to the static inputs of the cuda graph
|
# Copy inputs to the static inputs of the cuda graph
|
||||||
# Static inputs are potentially padded
|
# Static inputs are potentially padded
|
||||||
cuda_graph["input_ids"][: input_ids.shape[0]] = input_ids
|
cuda_graph["input_ids"][: input_ids.shape[0]] = input_ids
|
||||||
cuda_graph["input_embeds"][: inputs_embeds.shape[0]] = inputs_embeds
|
cuda_graph["inputs_embeds"][: inputs_embeds.shape[0]] = inputs_embeds
|
||||||
cuda_graph["position_ids"][: position_ids.shape[0]] = position_ids
|
cuda_graph["position_ids"][: position_ids.shape[0]] = position_ids
|
||||||
if ATTENTION == "flashinfer":
|
if ATTENTION == "flashinfer":
|
||||||
block_tables = block_tables_to_ragged(
|
block_tables = block_tables_to_ragged(
|
||||||
|
Loading…
Reference in New Issue
Block a user