From 36c5ec2abefd7686fc55489cf8e123157bf1d3cc Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Thu, 24 Apr 2025 09:55:14 +0000 Subject: [PATCH] improve headdim --- .../models/custom_modeling/flash_mistral_modeling.py | 2 +- server/text_generation_server/models/flash_causal_lm.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py index f24a7ad6..c855745c 100644 --- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py @@ -116,7 +116,7 @@ class MistralAttention(torch.nn.Module): ) self.num_heads = config.num_attention_heads self.hidden_size = config.hidden_size - if hasattr(config, "head_dim") and config.head_dim is not None: + if getattr(config, "head_dim", None) is not None: self.head_size = config.head_dim else: self.head_size = self.hidden_size // self.num_heads diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 695c4af3..207226ff 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -1298,7 +1298,7 @@ class FlashCausalLM(Model): if head_size is None: # Some models use GQA and different sizes for o_proj # and q_proj, that allows for that. - if hasattr(config, "head_dim") and config.head_dim is not None: + if getattr(config, "head_dim", None) is not None: self.head_size = config.head_dim else: self.head_size = config.hidden_size // config.num_attention_heads