Incomplete generation stream fix (#2754)

entries.len() could > batch.size in prefill, so need to filter as well.

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
Wang, Yi 2024-11-21 22:06:26 +08:00 committed by GitHub
parent 3c54488638
commit 4cbba33139
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -369,11 +369,6 @@ async fn filter_batch(
) -> Option<CachedBatch> { ) -> Option<CachedBatch> {
let mut batch = next_batch?; let mut batch = next_batch?;
// No need to filter
if batch.size as usize == entries.len() {
return Some(batch);
}
let id = batch.id; let id = batch.id;
// Retain only requests that are still in entries // Retain only requests that are still in entries