mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-21 16:40:20 +00:00
fix: check existance before accessing rope type in cuda warmup
This commit is contained in:
parent
79a2c956de
commit
d0e2332d17
@ -1401,8 +1401,11 @@ class FlashCausalLM(Model):
|
||||
if max_bs is None:
|
||||
input_ids = torch.zeros(bs, dtype=torch.int64, device=self.device)
|
||||
position_ids = torch.zeros(bs, dtype=torch.int32, device=self.device)
|
||||
# mrope have position_ids per section, if so repeat n times
|
||||
if self.model.config.rope_scaling["rope_type"] == "mrope":
|
||||
if ( # mrope have position_ids per section, if so repeat n times
|
||||
hasattr(self.model, "config")
|
||||
and hasattr(self.model.config, "rope_scaling")
|
||||
and self.model.config.rope_scaling["rope_type"] == "mrope"
|
||||
):
|
||||
n_sections = len(self.model.config.rope_scaling["mrope_section"])
|
||||
position_ids = position_ids.unsqueeze(1).repeat(1, n_sections)
|
||||
slots = torch.arange(bs, dtype=torch.int64, device=self.device)
|
||||
|
Loading…
Reference in New Issue
Block a user