mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Just medusa values now.
This commit is contained in:
parent
549f0e9ca7
commit
4c8dcbb76d
@ -1507,7 +1507,7 @@ class FlashCausalLM(Model):
|
||||
input_lengths_tensor=cuda_graph["input_lengths"],
|
||||
prefix_lens=batch.prefix_lens,
|
||||
prefix_lens_tensor=prefix_lens_tensor,
|
||||
state=cuda_graph["state"],
|
||||
state=cuda_graph.get("state"),
|
||||
):
|
||||
# Replay the graph
|
||||
cuda_graph["graph"].replay()
|
||||
|
Loading…
Reference in New Issue
Block a user