mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 08:22:07 +00:00
Co-authored-by: mswiniarsk <156412439+mswiniarsk@users.noreply.github.com>
This commit is contained in:
parent
d31fb62576
commit
31bed905d4
@ -74,8 +74,9 @@ Environment Variables Added:
|
|||||||
| --------------------------- | :--------- | :--------------- | :------------------------------------------------------------------------------------------------------------------------------- | :--------------------------- |
|
| --------------------------- | :--------- | :--------------- | :------------------------------------------------------------------------------------------------------------------------------- | :--------------------------- |
|
||||||
| MAX_TOTAL_TOKENS | integer | 0 | Control the padding of input | add -e in docker run, such |
|
| MAX_TOTAL_TOKENS | integer | 0 | Control the padding of input | add -e in docker run, such |
|
||||||
| ENABLE_HPU_GRAPH | true/false | true | Enable hpu graph or not | add -e in docker run command |
|
| ENABLE_HPU_GRAPH | true/false | true | Enable hpu graph or not | add -e in docker run command |
|
||||||
| PROF_WARMUPSTEP | integer | 0 | Enable/disable profile, control profile warmup step, 0 means disable profile | add -e in docker run command |
|
| PROF_WAITSTEP | integer | 0 | Control profile wait steps | add -e in docker run command |
|
||||||
| PROF_STEP | integer | 5 | Control profile step | add -e in docker run command |
|
| PROF_WARMUPSTEP | integer | 0 | Control profile warmup steps | add -e in docker run command |
|
||||||
|
| PROF_STEP | integer | 0 | Enable/disable profile, control profile active steps | add -e in docker run command |
|
||||||
| PROF_PATH | string | /tmp/hpu_profile | Define profile folder | add -e in docker run command |
|
| PROF_PATH | string | /tmp/hpu_profile | Define profile folder | add -e in docker run command |
|
||||||
| PROF_RANKS | string | 0 | Comma-separated list of ranks to profile | add -e in docker run command |
|
| PROF_RANKS | string | 0 | Comma-separated list of ranks to profile | add -e in docker run command |
|
||||||
| PROF_RECORD_SHAPES | true/false | false | Control record_shapes option in the profiler | add -e in docker run command |
|
| PROF_RECORD_SHAPES | true/false | false | Control record_shapes option in the profiler | add -e in docker run command |
|
||||||
|
@ -324,7 +324,8 @@ class CausalLMBatch(Batch):
|
|||||||
htorch.core.mark_step()
|
htorch.core.mark_step()
|
||||||
|
|
||||||
dst_keys = [prepare_memory(new_bs * chunk_size, prev, inplace) for prev in src_keys[target_batch_idx]]
|
dst_keys = [prepare_memory(new_bs * chunk_size, prev, inplace) for prev in src_keys[target_batch_idx]]
|
||||||
dst_keys = [move_data(dst_keys[layer_num], chunk_size, indices, [src[layer_num] for src in src_keys]) for layer_num in range(num_layers)]
|
dst_keys = [move_data(dst_keys[layer_num], chunk_size, indices, [src[layer_num]
|
||||||
|
for src in src_keys]) for layer_num in range(num_layers)]
|
||||||
|
|
||||||
src_values = [torch.stack(src) for src in src_values]
|
src_values = [torch.stack(src) for src in src_values]
|
||||||
htorch.core.mark_step()
|
htorch.core.mark_step()
|
||||||
@ -334,7 +335,8 @@ class CausalLMBatch(Batch):
|
|||||||
htorch.core.mark_step()
|
htorch.core.mark_step()
|
||||||
|
|
||||||
dst_values = [prepare_memory(new_bs * chunk_size, prev, inplace) for prev in src_values[target_batch_idx]]
|
dst_values = [prepare_memory(new_bs * chunk_size, prev, inplace) for prev in src_values[target_batch_idx]]
|
||||||
dst_values = [move_data(dst_values[layer_num], chunk_size, indices, [src[layer_num] for src in src_values]) for layer_num in range(num_layers)]
|
dst_values = [move_data(dst_values[layer_num], chunk_size, indices, [src[layer_num]
|
||||||
|
for src in src_values]) for layer_num in range(num_layers)]
|
||||||
|
|
||||||
past_key_values = past_key_values_type(zip(dst_keys, dst_values))
|
past_key_values = past_key_values_type(zip(dst_keys, dst_values))
|
||||||
|
|
||||||
@ -626,18 +628,20 @@ class CausalLM(Model):
|
|||||||
)
|
)
|
||||||
prof_ranks = [int(val) for val in os.getenv("PROF_RANKS", "0").split(',')]
|
prof_ranks = [int(val) for val in os.getenv("PROF_RANKS", "0").split(',')]
|
||||||
self.profiling_warmup_steps = int(os.getenv("PROF_WARMUPSTEP", "0")) if rank in prof_ranks else 0
|
self.profiling_warmup_steps = int(os.getenv("PROF_WARMUPSTEP", "0")) if rank in prof_ranks else 0
|
||||||
self.profiling_steps = int(os.getenv("PROF_STEP", "5"))
|
self.profiling_steps = int(os.getenv("PROF_STEP", "0")) if rank in prof_ranks else 0
|
||||||
|
self.profiling_wait_steps = int(os.getenv("PROF_WAITSTEP", "0"))
|
||||||
record_shapes = os.getenv("PROF_RECORD_SHAPES", "false").lower() == "true"
|
record_shapes = os.getenv("PROF_RECORD_SHAPES", "false").lower() == "true"
|
||||||
output_dir = os.getenv("PROF_PATH", "/tmp/hpu_profile")
|
output_dir = os.getenv("PROF_PATH", "/tmp/hpu_profile")
|
||||||
self.hb_profer = HabanaProfile(
|
if self.profiling_steps > 0:
|
||||||
warmup=self.profiling_warmup_steps, active=self.profiling_steps, output_dir=output_dir, record_shapes=record_shapes
|
self.hb_profiler = HabanaProfile(
|
||||||
)
|
wait=self.profiling_wait_steps,
|
||||||
if self.profiling_warmup_steps > 0:
|
warmup=self.profiling_warmup_steps,
|
||||||
self.hb_profer_started = True
|
active=self.profiling_steps,
|
||||||
self.hb_profer.start()
|
output_dir=output_dir, record_shapes=record_shapes
|
||||||
|
)
|
||||||
|
self.hb_profiler.start()
|
||||||
else:
|
else:
|
||||||
self.hb_profer = None
|
self.hb_profiler = None
|
||||||
self.hb_profer_started = False
|
|
||||||
self.step = 0
|
self.step = 0
|
||||||
|
|
||||||
def setup_quantization(self, model):
|
def setup_quantization(self, model):
|
||||||
@ -979,10 +983,10 @@ class CausalLM(Model):
|
|||||||
req.prefix_offset = prefix_offset
|
req.prefix_offset = prefix_offset
|
||||||
req.read_offset = read_offset
|
req.read_offset = read_offset
|
||||||
htorch.core.mark_step()
|
htorch.core.mark_step()
|
||||||
|
|
||||||
self.step = self.step + 1
|
self.step = self.step + 1
|
||||||
if self.hb_profer_started == True and self.step > self.profiling_warmup_steps + self.profiling_steps:
|
if self.hb_profiler is not None:
|
||||||
self.hb_profer.stop()
|
if self.step > self.profiling_wait_steps + self.profiling_warmup_steps + self.profiling_steps:
|
||||||
self.hb_profer_started = False
|
self.hb_profiler.stop()
|
||||||
|
else:
|
||||||
return generations, batch if not stopped else None
|
self.hb_profiler.step()
|
||||||
|
return generations, batch if not stopped else None
|
||||||
|
Loading…
Reference in New Issue
Block a user