mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-18 07:12:10 +00:00
Co-authored-by: Michal Szutenberg <37601244+szutenberg@users.noreply.github.com>
This commit is contained in:
parent
f7ef414e38
commit
9ad6086250
@ -74,8 +74,10 @@ Environment Variables Added:
|
||||
| MAX_TOTAL_TOKENS | integer | 0 | Control the padding of input | add -e in docker run, such |
|
||||
| ENABLE_HPU_GRAPH | true/false | true | Enable hpu graph or not | add -e in docker run command |
|
||||
| PROF_WARMUPSTEP | integer | 0 | Enable/disable profile, control profile warmup step, 0 means disable profile | add -e in docker run command |
|
||||
| PROF_STEP | interger | 5 | Control profile step | add -e in docker run command |
|
||||
| PROF_PATH | string | /root/text-generation-inference | Define profile folder | add -e in docker run command |
|
||||
| PROF_STEP | integer | 5 | Control profile step | add -e in docker run command |
|
||||
| PROF_PATH | string | /tmp/hpu_profile | Define profile folder | add -e in docker run command |
|
||||
| PROF_RANKS | string | 0 | Comma-separated list of ranks to profile | add -e in docker run command |
|
||||
| PROF_RECORD_SHAPES | true/false | false | Control record_shapes option in the profiler | add -e in docker run command |
|
||||
| LIMIT_HPU_GRAPH | True/False | False | Skip HPU graph usage for prefill to save memory, set to `True` for large sequence/decoding lengths(e.g. 300/212) | add -e in docker run command |
|
||||
| BATCH_BUCKET_SIZE | integer | 8 | Batch size for decode operation will be rounded to the nearest multiple of this number. This limits the number of cached graphs | add -e in docker run command |
|
||||
| PREFILL_BATCH_BUCKET_SIZE | integer | 4 | Batch size for prefill operation will be rounded to the nearest multiple of this number. This limits the number of cached graphs | add -e in docker run command |
|
||||
|
@ -596,11 +596,13 @@ class CausalLM(Model):
|
||||
rank=rank,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
self.profiling_warmup_steps = int(os.getenv("PROF_WARMUPSTEP", "0"))
|
||||
prof_ranks = [int(val) for val in os.getenv("PROF_RANKS", "0").split(',')]
|
||||
self.profiling_warmup_steps = int(os.getenv("PROF_WARMUPSTEP", "0")) if rank in prof_ranks else 0
|
||||
self.profiling_steps = int(os.getenv("PROF_STEP", "5"))
|
||||
record_shapes = os.getenv("PROF_RECORD_SHAPES", "false").lower() == "true"
|
||||
output_dir = os.getenv("PROF_PATH", "/tmp/hpu_profile")
|
||||
self.hb_profer = HabanaProfile(
|
||||
warmup=self.profiling_warmup_steps, active=self.profiling_steps, output_dir=output_dir
|
||||
warmup=self.profiling_warmup_steps, active=self.profiling_steps, output_dir=output_dir, record_shapes=record_shapes
|
||||
)
|
||||
if self.profiling_warmup_steps > 0:
|
||||
self.hb_profer_started = True
|
||||
|
Loading…
Reference in New Issue
Block a user