diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json index b7d5cfe4..e7fb5740 100644 --- a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json +++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json @@ -8,7 +8,7 @@ "text": "\n" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -23,7 +23,7 @@ "text": "\n" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -38,7 +38,7 @@ "text": "\n" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -53,7 +53,7 @@ "text": "hd" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -68,7 +68,7 @@ "text": "\n" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -83,7 +83,7 @@ "text": "\n" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -98,7 +98,7 @@ "text": "\n" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -113,7 +113,7 @@ "text": "aho" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -128,7 +128,7 @@ "text": "2" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -143,7 +143,7 @@ "text": "2" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -158,7 +158,7 @@ "text": "2" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -173,7 +173,7 @@ "text": "ima" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -188,7 +188,7 @@ "text": "." } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -203,7 +203,7 @@ "text": "." } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -218,7 +218,7 @@ "text": "." } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -233,7 +233,7 @@ "text": "\n" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -248,7 +248,7 @@ "text": " Sarah" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -263,7 +263,7 @@ "text": " Yes" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -278,7 +278,7 @@ "text": " And" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -293,7 +293,7 @@ "text": "i" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -308,7 +308,7 @@ "text": "'" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -323,7 +323,7 @@ "text": "," } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -338,7 +338,7 @@ "text": " what" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -353,7 +353,7 @@ "text": "'" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -368,7 +368,7 @@ "text": "s" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -383,7 +383,7 @@ "text": " Moh" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -398,7 +398,7 @@ "text": " is" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -413,7 +413,7 @@ "text": "m" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -428,7 +428,7 @@ "text": " Room" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -443,7 +443,7 @@ "text": "s" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -458,7 +458,7 @@ "text": " the" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -473,7 +473,7 @@ "text": " tired" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -488,7 +488,7 @@ "text": ":" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -503,7 +503,7 @@ "text": "'" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -518,7 +518,7 @@ "text": " capital" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -533,7 +533,7 @@ "text": "," } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -548,7 +548,7 @@ "text": " She" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -563,7 +563,7 @@ "text": " scale" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -578,7 +578,7 @@ "text": " of" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", @@ -593,7 +593,7 @@ "text": " its" } ], - "created": 1724158270, + "created": 1724833943, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 6b0e1e86..409fe2e3 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -188,20 +188,18 @@ class FlashCausalLMBatch(Batch): def batch_tokenized_inputs( cls, requests: Iterable[generate_pb2.Request], tokenizer ): - batch_inputs = [] max_length = 0 all_input_ids = [] batch_size = 0 for r in requests: batch_size += 1 - batch_inputs.append(concat_text_chunks(r.input_chunks.chunks)) - + inputs = concat_text_chunks(r.input_chunks.chunks) input_ids = tokenizer( - batch_inputs, + inputs, truncation=True, max_length=r.truncate, add_special_tokens=r.add_special_tokens, - )["input_ids"][0] + )["input_ids"] max_length = max(max_length, len(input_ids)) all_input_ids.append(input_ids) return all_input_ids