From d0e2332d174ac8d94acbb60e4b5b797b2acdf16b Mon Sep 17 00:00:00 2001 From: drbh Date: Tue, 28 Jan 2025 22:54:34 +0000 Subject: [PATCH] fix: check existance before accessing rope type in cuda warmup --- server/text_generation_server/models/flash_causal_lm.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 600ed716..47d372ad 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -1401,8 +1401,11 @@ class FlashCausalLM(Model): if max_bs is None: input_ids = torch.zeros(bs, dtype=torch.int64, device=self.device) position_ids = torch.zeros(bs, dtype=torch.int32, device=self.device) - # mrope have position_ids per section, if so repeat n times - if self.model.config.rope_scaling["rope_type"] == "mrope": + if ( # mrope have position_ids per section, if so repeat n times + hasattr(self.model, "config") + and hasattr(self.model.config, "rope_scaling") + and self.model.config.rope_scaling["rope_type"] == "mrope" + ): n_sections = len(self.model.config.rope_scaling["mrope_section"]) position_ids = position_ids.unsqueeze(1).repeat(1, n_sections) slots = torch.arange(bs, dtype=torch.int64, device=self.device)