diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 5a1ac4d8..7cab1c4b 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -1027,6 +1027,7 @@ class FlashCausalLM(Model): create_decode_state, create_prefill_with_paged_kv_state, ) + self.prefill_state = create_prefill_state(device=device) self.prefill_with_paged_kv_state = create_prefill_with_paged_kv_state( device=device