mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
[WIP] tmp dump of integration load tests.
This commit is contained in:
parent
3669d078e0
commit
d45408e935
@ -168,8 +168,6 @@ pub(crate) async fn batching_task(
|
||||
None
|
||||
} else {
|
||||
// Minimum batch size
|
||||
// TODO: temporarily disable to avoid incorrect deallocation +
|
||||
// reallocation when using prefix caching.
|
||||
Some((batch_size as f32 * waiting_served_ratio).floor() as usize)
|
||||
};
|
||||
|
||||
|
@ -70,9 +70,13 @@ impl Allocator for RadixAllocator {
|
||||
) -> Option<BlockAllocation> {
|
||||
let mut blocks = vec![];
|
||||
let prefix_node = if let Some(prefill_tokens) = prefill_tokens.as_ref() {
|
||||
let node_id = self
|
||||
.cache_blocks
|
||||
.find(prefill_tokens.as_slice(), &mut blocks);
|
||||
let node_id = self.cache_blocks.find(
|
||||
&prefill_tokens.as_slice()[..prefill_tokens.len().saturating_sub(1)],
|
||||
&mut blocks,
|
||||
);
|
||||
// Even if this allocation fails below, we need to increase he
|
||||
// refcount to ensure that the prefix that was found is not evicted.
|
||||
|
||||
node_id
|
||||
} else {
|
||||
self.cache_blocks.root_id()
|
||||
@ -89,8 +93,6 @@ impl Allocator for RadixAllocator {
|
||||
|
||||
let suffix_blocks = (suffix_len + self.block_size - 1) / self.block_size;
|
||||
|
||||
tracing::info!("Prefix {prefix_len} - Suffix {suffix_len}");
|
||||
|
||||
match self.alloc_or_reclaim(suffix_blocks as usize) {
|
||||
Some(suffix_blocks) => blocks.extend(suffix_blocks),
|
||||
None => {
|
||||
|
@ -14,7 +14,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -40,7 +40,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -66,7 +66,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -92,7 +92,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -118,7 +118,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -144,7 +144,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -170,7 +170,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -196,7 +196,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -222,7 +222,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -248,7 +248,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -274,7 +274,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -300,7 +300,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -326,7 +326,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -352,7 +352,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -378,7 +378,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -404,7 +404,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -430,7 +430,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522217,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -456,7 +456,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522218,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -482,7 +482,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -508,7 +508,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -534,7 +534,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -560,7 +560,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -586,7 +586,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -604,7 +604,7 @@
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "/u/Cr!!!!!!!",
|
||||
"content": "/u/CruxHub: Hey Alice, I",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
@ -612,7 +612,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -638,7 +638,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -664,7 +664,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -690,7 +690,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -716,7 +716,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -742,7 +742,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -768,7 +768,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -794,7 +794,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -820,7 +820,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -846,7 +846,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -872,7 +872,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -898,7 +898,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -924,7 +924,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -950,7 +950,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -976,7 +976,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1002,7 +1002,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1028,7 +1028,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1054,7 +1054,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1080,7 +1080,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1106,7 +1106,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1132,7 +1132,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1158,7 +1158,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1184,7 +1184,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1210,7 +1210,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1236,7 +1236,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1262,7 +1262,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1288,7 +1288,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1314,7 +1314,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1340,7 +1340,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1366,7 +1366,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1392,7 +1392,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1418,7 +1418,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1444,7 +1444,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1470,7 +1470,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1496,7 +1496,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1522,7 +1522,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1548,7 +1548,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1574,7 +1574,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1600,7 +1600,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1626,7 +1626,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1652,7 +1652,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1678,7 +1678,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1704,7 +1704,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1730,7 +1730,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1756,7 +1756,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1782,7 +1782,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1808,7 +1808,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1834,7 +1834,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1860,7 +1860,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1886,7 +1886,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1912,7 +1912,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1938,7 +1938,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1964,7 +1964,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -1990,7 +1990,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2016,7 +2016,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2042,7 +2042,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2068,7 +2068,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2094,7 +2094,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2120,7 +2120,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2146,7 +2146,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2172,7 +2172,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2198,7 +2198,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2224,7 +2224,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2250,7 +2250,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2276,7 +2276,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2302,7 +2302,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2328,7 +2328,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2354,7 +2354,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2380,7 +2380,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2406,7 +2406,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2432,7 +2432,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525943,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2458,7 +2458,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522228,
|
||||
"created": 1725525936,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2484,7 +2484,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2510,7 +2510,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522228,
|
||||
"created": 1725525941,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2536,7 +2536,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522227,
|
||||
"created": 1725525942,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
@ -2562,7 +2562,7 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1725522228,
|
||||
"created": 1725525935,
|
||||
"id": "",
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
|
@ -3,7 +3,7 @@ import pytest
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def flash_llama_handle(launcher):
|
||||
with launcher("meta-llama/Meta-Llama-3.1-8B-Instruct", num_shard=4) as handle:
|
||||
with launcher("meta-llama/Meta-Llama-3.1-8B-Instruct", num_shard=2) as handle:
|
||||
yield handle
|
||||
|
||||
|
||||
|
@ -268,9 +268,6 @@ class FlashCausalLMBatch(Batch):
|
||||
assert (
|
||||
prefix_len <= orig_input_length
|
||||
), f"Prefix {prefix_len} vs input {orig_input_length}"
|
||||
if prefix_len == orig_input_length:
|
||||
assert prefix_len > 0
|
||||
prefix_len -= 1
|
||||
|
||||
prefix_ids.append(tokenized_input[:prefix_len])
|
||||
tokenized_input = tokenized_input[prefix_len:]
|
||||
|
Loading…
Reference in New Issue
Block a user