Doesn't run the prefill warmup when limit_hpu_graph=true

Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
yuanwu 2024-12-01 21:29:41 +00:00
parent 4586325a34
commit 0228bd0260

View File

@ -1202,7 +1202,6 @@ class CausalLM(Model):
decode_batch_size_list.append(max_decode_batch_size) decode_batch_size_list.append(max_decode_batch_size)
decode_batch_size_list.sort(reverse=True) decode_batch_size_list.sort(reverse=True)
self.limit_hpu_graph = True
try: try:
for batch_size in decode_batch_size_list: for batch_size in decode_batch_size_list:
batches= [] batches= []
@ -1234,11 +1233,12 @@ class CausalLM(Model):
f"Memory stats: {mem_stats} " f"Memory stats: {mem_stats} "
) )
limit_hpu_graph = os.getenv("LIMIT_HPU_GRAPH", "false").lower() == "true"
if limit_hpu_graph == False:
# Warmup prefill batch_size # Warmup prefill batch_size
max_input_length = request.max_input_length max_input_length = request.max_input_length
prefill_batch_size_list = [] prefill_batch_size_list = []
prefill_seqlen_list = [] prefill_seqlen_list = []
#Prefill and decode warmup
try: try:
for batch_size in range(max_prefill_batch_size, 0, -PREFILL_BATCH_BUCKET_SIZE): for batch_size in range(max_prefill_batch_size, 0, -PREFILL_BATCH_BUCKET_SIZE):
prefill_batch_size_list.append(batch_size) prefill_batch_size_list.append(batch_size)
@ -1257,8 +1257,6 @@ class CausalLM(Model):
) )
prefill_batch_size_list.sort() prefill_batch_size_list.sort()
prefill_seqlen_list.sort() prefill_seqlen_list.sort()
limit_hpu_graph = os.getenv("LIMIT_HPU_GRAPH", "false").lower() == "true"
if limit_hpu_graph == False:
mem_stats = get_hpu_memory_stats(self.device) mem_stats = get_hpu_memory_stats(self.device)
logger.info( logger.info(
f"\nFollowing prefill and decode warmup successfully.\n" f"\nFollowing prefill and decode warmup successfully.\n"