mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Less clutter.
This commit is contained in:
parent
620416f13f
commit
5829b7821e
@ -757,6 +757,8 @@ def get_model(
|
||||
default_dtype=torch.bfloat16,
|
||||
trust_remote_code=trust_remote_code,
|
||||
lora_adapter_ids=lora_adapter_ids,
|
||||
# hidden_size / num_attention_heads is wrong in `google/gemma-2-9b-it`
|
||||
head_size=config.head_dim,
|
||||
)
|
||||
elif sharded:
|
||||
raise NotImplementedError(FLASH_ATT_ERROR_MESSAGE.format("Sharded Gemma2"))
|
||||
|
@ -925,10 +925,6 @@ class FlashCausalLM(Model):
|
||||
assert self.num_kv_heads > 0
|
||||
|
||||
if head_size is None:
|
||||
if getattr(config, "head_dim", None):
|
||||
# hidden_size / num_attention_heads is wrong in `google/gemma-2-9b-it`
|
||||
self.head_size = config.head_dim
|
||||
else:
|
||||
self.head_size = config.hidden_size // config.num_attention_heads
|
||||
else:
|
||||
self.head_size = head_size
|
||||
|
Loading…
Reference in New Issue
Block a user