mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Just medusa values now.
This commit is contained in:
parent
549f0e9ca7
commit
4c8dcbb76d
@ -1507,7 +1507,7 @@ class FlashCausalLM(Model):
|
|||||||
input_lengths_tensor=cuda_graph["input_lengths"],
|
input_lengths_tensor=cuda_graph["input_lengths"],
|
||||||
prefix_lens=batch.prefix_lens,
|
prefix_lens=batch.prefix_lens,
|
||||||
prefix_lens_tensor=prefix_lens_tensor,
|
prefix_lens_tensor=prefix_lens_tensor,
|
||||||
state=cuda_graph["state"],
|
state=cuda_graph.get("state"),
|
||||||
):
|
):
|
||||||
# Replay the graph
|
# Replay the graph
|
||||||
cuda_graph["graph"].replay()
|
cuda_graph["graph"].replay()
|
||||||
|
Loading…
Reference in New Issue
Block a user