Bit more simplification to flash_neox generate_tokens()

2025-09-09 19:34:53 +00:00 · 2023-03-27 16:30:11 -07:00 · 2023-03-27 16:30:11 -07:00 · f786d1ddf5
commit f786d1ddf5
parent 9895569c8b
1 changed files with 0 additions and 6 deletions
--- a/server/text_generation_server/models/flash_neox.py
+++ b/server/text_generation_server/models/flash_neox.py
@ -301,8 +301,6 @@ class FlashNeoX(Model):
        next_batch_cu_seqlens = [0]
        next_batch_past_key_values = []
        next_batch_input_lengths = []
-        next_batch_all_input_ids = []
-        next_batch_all_input_ids_tensor = []

        # Cumulative length
        cumulative_length = 0
@ -368,8 +366,6 @@ class FlashNeoX(Model):
                next_batch_cu_seqlens[-1] + new_input_length
            )
            next_batch_input_lengths.append(new_input_length)
-            next_batch_all_input_ids.append(all_input_ids)
-            next_batch_all_input_ids_tensor.append(all_input_ids_tensor)

            # Prefill
            if prefill:
@ -411,8 +407,6 @@ class FlashNeoX(Model):
        batch.max_seqlen += 1
        batch.past_key_values = next_batch_past_key_values
        batch.input_lengths = next_batch_input_lengths
-        batch.all_input_ids = next_batch_all_input_ids
-        batch.all_input_ids_tensor = next_batch_all_input_ids_tensor

        return generations