mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-07-13 03:10:17 +00:00
Simplify the warmup process (#173)
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
e3f0f85b70
commit
369e499a66
@ -1192,14 +1192,6 @@ class CausalLM(Model):
|
||||
if len(request_ids) < len(decode_batch.requests):
|
||||
decode_batch = decode_batch.filter(request_ids)
|
||||
|
||||
while decode_batch is not None:
|
||||
# filter finished requests
|
||||
request_ids = get_unfinished_requests(decode_batch.requests)
|
||||
if len(request_ids) < len(decode_batch.requests):
|
||||
decode_batch = decode_batch.filter(request_ids)
|
||||
# decode
|
||||
_, decode_batch, _ = self.generate_token([decode_batch])
|
||||
|
||||
def shifting_warmup(self, batch: CausalLMBatch) -> None:
|
||||
chunk_sizes = CHUNK_SIZES.copy()
|
||||
chunk_sizes.extend([-chunk for chunk in chunk_sizes])
|
||||
|
Loading…
Reference in New Issue
Block a user