BS round up to BUCKET_SIZE to prevent capture graph when graph input not change (#185)

This commit is contained in:
BaihuiJin 2024-07-16 15:42:46 +08:00 committed by GitHub
parent aac547dd82
commit 15e5df1cc4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -966,9 +966,9 @@ class CausalLM(Model):
batch = batch.__class__.recombine([batch], self.tokenizer.pad_token_id) batch = batch.__class__.recombine([batch], self.tokenizer.pad_token_id)
scenario = 'PREFILL' if prefill else 'GENERATE' scenario = 'PREFILL' if prefill else 'GENERATE'
if self.enable_hpu_graph and batch.batch_size != self.prev_bs: if self.enable_hpu_graph and self.limit_hpu_graph and round_up(batch.batch_size, BATCH_BUCKET_SIZE) != self.prev_bs:
self.model.clear_cache() self.model.clear_cache()
self.prev_bs = batch.batch_size self.prev_bs = round_up(batch.batch_size, BATCH_BUCKET_SIZE)
dbg_trace( dbg_trace(
scenario, f'bs:{batch.batch_size} num_reqs:{len(batch.requests)} seq_len:{batch.seq_length} padding:{batch.right_padding}') scenario, f'bs:{batch.batch_size} num_reqs:{len(batch.requests)} seq_len:{batch.seq_length} padding:{batch.right_padding}')
assert batch.right_padding > 0, 'No more room for next token!' assert batch.right_padding > 0, 'No more room for next token!'