Improve habana profile dev experience (#36) (#65)

Co-authored-by: Michal Szutenberg <37601244+szutenberg@users.noreply.github.com>
This commit is contained in:
jkaniecki 2024-02-22 13:57:45 +01:00 committed by GitHub
parent f7ef414e38
commit 9ad6086250
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 4 deletions

View File

@ -74,8 +74,10 @@ Environment Variables Added:
| MAX_TOTAL_TOKENS | integer | 0 | Control the padding of input | add -e in docker run, such | | MAX_TOTAL_TOKENS | integer | 0 | Control the padding of input | add -e in docker run, such |
| ENABLE_HPU_GRAPH | true/false | true | Enable hpu graph or not | add -e in docker run command | | ENABLE_HPU_GRAPH | true/false | true | Enable hpu graph or not | add -e in docker run command |
| PROF_WARMUPSTEP | integer | 0 | Enable/disable profile, control profile warmup step, 0 means disable profile | add -e in docker run command | | PROF_WARMUPSTEP | integer | 0 | Enable/disable profile, control profile warmup step, 0 means disable profile | add -e in docker run command |
| PROF_STEP | interger | 5 | Control profile step | add -e in docker run command | | PROF_STEP | integer | 5 | Control profile step | add -e in docker run command |
| PROF_PATH | string | /root/text-generation-inference | Define profile folder | add -e in docker run command | | PROF_PATH | string | /tmp/hpu_profile | Define profile folder | add -e in docker run command |
| PROF_RANKS | string | 0 | Comma-separated list of ranks to profile | add -e in docker run command |
| PROF_RECORD_SHAPES | true/false | false | Control record_shapes option in the profiler | add -e in docker run command |
| LIMIT_HPU_GRAPH | True/False | False | Skip HPU graph usage for prefill to save memory, set to `True` for large sequence/decoding lengths(e.g. 300/212) | add -e in docker run command | | LIMIT_HPU_GRAPH | True/False | False | Skip HPU graph usage for prefill to save memory, set to `True` for large sequence/decoding lengths(e.g. 300/212) | add -e in docker run command |
| BATCH_BUCKET_SIZE | integer | 8 | Batch size for decode operation will be rounded to the nearest multiple of this number. This limits the number of cached graphs | add -e in docker run command | | BATCH_BUCKET_SIZE | integer | 8 | Batch size for decode operation will be rounded to the nearest multiple of this number. This limits the number of cached graphs | add -e in docker run command |
| PREFILL_BATCH_BUCKET_SIZE | integer | 4 | Batch size for prefill operation will be rounded to the nearest multiple of this number. This limits the number of cached graphs | add -e in docker run command | | PREFILL_BATCH_BUCKET_SIZE | integer | 4 | Batch size for prefill operation will be rounded to the nearest multiple of this number. This limits the number of cached graphs | add -e in docker run command |

View File

@ -596,11 +596,13 @@ class CausalLM(Model):
rank=rank, rank=rank,
kwargs=kwargs, kwargs=kwargs,
) )
self.profiling_warmup_steps = int(os.getenv("PROF_WARMUPSTEP", "0")) prof_ranks = [int(val) for val in os.getenv("PROF_RANKS", "0").split(',')]
self.profiling_warmup_steps = int(os.getenv("PROF_WARMUPSTEP", "0")) if rank in prof_ranks else 0
self.profiling_steps = int(os.getenv("PROF_STEP", "5")) self.profiling_steps = int(os.getenv("PROF_STEP", "5"))
record_shapes = os.getenv("PROF_RECORD_SHAPES", "false").lower() == "true"
output_dir = os.getenv("PROF_PATH", "/tmp/hpu_profile") output_dir = os.getenv("PROF_PATH", "/tmp/hpu_profile")
self.hb_profer = HabanaProfile( self.hb_profer = HabanaProfile(
warmup=self.profiling_warmup_steps, active=self.profiling_steps, output_dir=output_dir warmup=self.profiling_warmup_steps, active=self.profiling_steps, output_dir=output_dir, record_shapes=record_shapes
) )
if self.profiling_warmup_steps > 0: if self.profiling_warmup_steps > 0:
self.hb_profer_started = True self.hb_profer_started = True