diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index ef28ac4d..5f0e46da 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -188,6 +188,7 @@ class FlashCausalLMBatch(Batch): max_seqlen=max_seqlen, past_key_values=past_key_values, input_lengths=input_lengths, + offsets=offsets, all_input_ids=all_input_ids, all_input_ids_tensor=all_input_ids_tensor, next_token_choosers=next_token_choosers,