mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-18 15:22:09 +00:00
Co-authored-by: Michal Szutenberg <37601244+szutenberg@users.noreply.github.com>
This commit is contained in:
parent
f7ef414e38
commit
9ad6086250
@ -74,8 +74,10 @@ Environment Variables Added:
|
|||||||
| MAX_TOTAL_TOKENS | integer | 0 | Control the padding of input | add -e in docker run, such |
|
| MAX_TOTAL_TOKENS | integer | 0 | Control the padding of input | add -e in docker run, such |
|
||||||
| ENABLE_HPU_GRAPH | true/false | true | Enable hpu graph or not | add -e in docker run command |
|
| ENABLE_HPU_GRAPH | true/false | true | Enable hpu graph or not | add -e in docker run command |
|
||||||
| PROF_WARMUPSTEP | integer | 0 | Enable/disable profile, control profile warmup step, 0 means disable profile | add -e in docker run command |
|
| PROF_WARMUPSTEP | integer | 0 | Enable/disable profile, control profile warmup step, 0 means disable profile | add -e in docker run command |
|
||||||
| PROF_STEP | interger | 5 | Control profile step | add -e in docker run command |
|
| PROF_STEP | integer | 5 | Control profile step | add -e in docker run command |
|
||||||
| PROF_PATH | string | /root/text-generation-inference | Define profile folder | add -e in docker run command |
|
| PROF_PATH | string | /tmp/hpu_profile | Define profile folder | add -e in docker run command |
|
||||||
|
| PROF_RANKS | string | 0 | Comma-separated list of ranks to profile | add -e in docker run command |
|
||||||
|
| PROF_RECORD_SHAPES | true/false | false | Control record_shapes option in the profiler | add -e in docker run command |
|
||||||
| LIMIT_HPU_GRAPH | True/False | False | Skip HPU graph usage for prefill to save memory, set to `True` for large sequence/decoding lengths(e.g. 300/212) | add -e in docker run command |
|
| LIMIT_HPU_GRAPH | True/False | False | Skip HPU graph usage for prefill to save memory, set to `True` for large sequence/decoding lengths(e.g. 300/212) | add -e in docker run command |
|
||||||
| BATCH_BUCKET_SIZE | integer | 8 | Batch size for decode operation will be rounded to the nearest multiple of this number. This limits the number of cached graphs | add -e in docker run command |
|
| BATCH_BUCKET_SIZE | integer | 8 | Batch size for decode operation will be rounded to the nearest multiple of this number. This limits the number of cached graphs | add -e in docker run command |
|
||||||
| PREFILL_BATCH_BUCKET_SIZE | integer | 4 | Batch size for prefill operation will be rounded to the nearest multiple of this number. This limits the number of cached graphs | add -e in docker run command |
|
| PREFILL_BATCH_BUCKET_SIZE | integer | 4 | Batch size for prefill operation will be rounded to the nearest multiple of this number. This limits the number of cached graphs | add -e in docker run command |
|
||||||
|
@ -596,11 +596,13 @@ class CausalLM(Model):
|
|||||||
rank=rank,
|
rank=rank,
|
||||||
kwargs=kwargs,
|
kwargs=kwargs,
|
||||||
)
|
)
|
||||||
self.profiling_warmup_steps = int(os.getenv("PROF_WARMUPSTEP", "0"))
|
prof_ranks = [int(val) for val in os.getenv("PROF_RANKS", "0").split(',')]
|
||||||
|
self.profiling_warmup_steps = int(os.getenv("PROF_WARMUPSTEP", "0")) if rank in prof_ranks else 0
|
||||||
self.profiling_steps = int(os.getenv("PROF_STEP", "5"))
|
self.profiling_steps = int(os.getenv("PROF_STEP", "5"))
|
||||||
|
record_shapes = os.getenv("PROF_RECORD_SHAPES", "false").lower() == "true"
|
||||||
output_dir = os.getenv("PROF_PATH", "/tmp/hpu_profile")
|
output_dir = os.getenv("PROF_PATH", "/tmp/hpu_profile")
|
||||||
self.hb_profer = HabanaProfile(
|
self.hb_profer = HabanaProfile(
|
||||||
warmup=self.profiling_warmup_steps, active=self.profiling_steps, output_dir=output_dir
|
warmup=self.profiling_warmup_steps, active=self.profiling_steps, output_dir=output_dir, record_shapes=record_shapes
|
||||||
)
|
)
|
||||||
if self.profiling_warmup_steps > 0:
|
if self.profiling_warmup_steps > 0:
|
||||||
self.hb_profer_started = True
|
self.hb_profer_started = True
|
||||||
|
Loading…
Reference in New Issue
Block a user