Simplify the warmup process (#173)

Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
yuanwu2017 2024-08-15 18:04:14 +08:00 committed by GitHub
parent e3f0f85b70
commit 369e499a66
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1192,14 +1192,6 @@ class CausalLM(Model):
if len(request_ids) < len(decode_batch.requests): if len(request_ids) < len(decode_batch.requests):
decode_batch = decode_batch.filter(request_ids) decode_batch = decode_batch.filter(request_ids)
while decode_batch is not None:
# filter finished requests
request_ids = get_unfinished_requests(decode_batch.requests)
if len(request_ids) < len(decode_batch.requests):
decode_batch = decode_batch.filter(request_ids)
# decode
_, decode_batch, _ = self.generate_token([decode_batch])
def shifting_warmup(self, batch: CausalLMBatch) -> None: def shifting_warmup(self, batch: CausalLMBatch) -> None:
chunk_sizes = CHUNK_SIZES.copy() chunk_sizes = CHUNK_SIZES.copy()
chunk_sizes.extend([-chunk for chunk in chunk_sizes]) chunk_sizes.extend([-chunk for chunk in chunk_sizes])