[WIP] tmp dump of integration load tests.

This commit is contained in:
Nicolas Patry 2024-09-05 14:23:06 +02:00
parent 3669d078e0
commit d45408e935
No known key found for this signature in database
GPG Key ID: 64AF4752B2967863
5 changed files with 108 additions and 111 deletions

View File

@ -168,8 +168,6 @@ pub(crate) async fn batching_task(
None
} else {
// Minimum batch size
// TODO: temporarily disable to avoid incorrect deallocation +
// reallocation when using prefix caching.
Some((batch_size as f32 * waiting_served_ratio).floor() as usize)
};

View File

@ -70,9 +70,13 @@ impl Allocator for RadixAllocator {
) -> Option<BlockAllocation> {
let mut blocks = vec![];
let prefix_node = if let Some(prefill_tokens) = prefill_tokens.as_ref() {
let node_id = self
.cache_blocks
.find(prefill_tokens.as_slice(), &mut blocks);
let node_id = self.cache_blocks.find(
&prefill_tokens.as_slice()[..prefill_tokens.len().saturating_sub(1)],
&mut blocks,
);
// Even if this allocation fails below, we need to increase he
// refcount to ensure that the prefix that was found is not evicted.
node_id
} else {
self.cache_blocks.root_id()
@ -89,8 +93,6 @@ impl Allocator for RadixAllocator {
let suffix_blocks = (suffix_len + self.block_size - 1) / self.block_size;
tracing::info!("Prefix {prefix_len} - Suffix {suffix_len}");
match self.alloc_or_reclaim(suffix_blocks as usize) {
Some(suffix_blocks) => blocks.extend(suffix_blocks),
None => {

View File

@ -14,7 +14,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -40,7 +40,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -66,7 +66,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -92,7 +92,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -118,7 +118,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -144,7 +144,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -170,7 +170,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -196,7 +196,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -222,7 +222,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -248,7 +248,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -274,7 +274,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -300,7 +300,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -326,7 +326,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -352,7 +352,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -378,7 +378,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -404,7 +404,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -430,7 +430,7 @@
"usage": null
}
],
"created": 1725522217,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -456,7 +456,7 @@
"usage": null
}
],
"created": 1725522218,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -482,7 +482,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -508,7 +508,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -534,7 +534,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -560,7 +560,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -586,7 +586,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -604,7 +604,7 @@
"index": 0,
"logprobs": null,
"message": {
"content": "/u/Cr!!!!!!!",
"content": "/u/CruxHub: Hey Alice, I",
"name": null,
"role": "assistant",
"tool_calls": null
@ -612,7 +612,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -638,7 +638,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -664,7 +664,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -690,7 +690,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -716,7 +716,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -742,7 +742,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -768,7 +768,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -794,7 +794,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -820,7 +820,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -846,7 +846,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -872,7 +872,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -898,7 +898,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -924,7 +924,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -950,7 +950,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -976,7 +976,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1002,7 +1002,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1028,7 +1028,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1054,7 +1054,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1080,7 +1080,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1106,7 +1106,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1132,7 +1132,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1158,7 +1158,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1184,7 +1184,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1210,7 +1210,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1236,7 +1236,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1262,7 +1262,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1288,7 +1288,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1314,7 +1314,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1340,7 +1340,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1366,7 +1366,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1392,7 +1392,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1418,7 +1418,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1444,7 +1444,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1470,7 +1470,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1496,7 +1496,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1522,7 +1522,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1548,7 +1548,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1574,7 +1574,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1600,7 +1600,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1626,7 +1626,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1652,7 +1652,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1678,7 +1678,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1704,7 +1704,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1730,7 +1730,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1756,7 +1756,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1782,7 +1782,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1808,7 +1808,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1834,7 +1834,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1860,7 +1860,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1886,7 +1886,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1912,7 +1912,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1938,7 +1938,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1964,7 +1964,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -1990,7 +1990,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2016,7 +2016,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2042,7 +2042,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2068,7 +2068,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2094,7 +2094,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2120,7 +2120,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2146,7 +2146,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2172,7 +2172,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2198,7 +2198,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2224,7 +2224,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2250,7 +2250,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2276,7 +2276,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2302,7 +2302,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2328,7 +2328,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2354,7 +2354,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2380,7 +2380,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2406,7 +2406,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2432,7 +2432,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525943,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2458,7 +2458,7 @@
"usage": null
}
],
"created": 1725522228,
"created": 1725525936,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2484,7 +2484,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2510,7 +2510,7 @@
"usage": null
}
],
"created": 1725522228,
"created": 1725525941,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2536,7 +2536,7 @@
"usage": null
}
],
"created": 1725522227,
"created": 1725525942,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",
@ -2562,7 +2562,7 @@
"usage": null
}
],
"created": 1725522228,
"created": 1725525935,
"id": "",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"object": "chat.completion",

View File

@ -3,7 +3,7 @@ import pytest
@pytest.fixture(scope="module")
def flash_llama_handle(launcher):
with launcher("meta-llama/Meta-Llama-3.1-8B-Instruct", num_shard=4) as handle:
with launcher("meta-llama/Meta-Llama-3.1-8B-Instruct", num_shard=2) as handle:
yield handle

View File

@ -268,9 +268,6 @@ class FlashCausalLMBatch(Batch):
assert (
prefix_len <= orig_input_length
), f"Prefix {prefix_len} vs input {orig_input_length}"
if prefix_len == orig_input_length:
assert prefix_len > 0
prefix_len -= 1
prefix_ids.append(tokenized_input[:prefix_len])
tokenized_input = tokenized_input[prefix_len:]