From 4c8dcbb76d1b788b882b669d8c48c46d4fc4d669 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 13 Aug 2024 13:02:48 +0200 Subject: [PATCH] Just medusa values now. --- server/text_generation_server/models/flash_causal_lm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 387118c2..830dc6c2 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -1507,7 +1507,7 @@ class FlashCausalLM(Model): input_lengths_tensor=cuda_graph["input_lengths"], prefix_lens=batch.prefix_lens, prefix_lens_tensor=prefix_lens_tensor, - state=cuda_graph["state"], + state=cuda_graph.get("state"), ): # Replay the graph cuda_graph["graph"].replay()