mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
fix the crash of meta-llama/Llama-3.2-1B (#2918)
* fix the crash of meta-llama/Llama-3.2-1B Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> * Apply suggestions from code review Simpler fix (which doesn't break vlms). --------- Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
parent
c20025dbf7
commit
6e982f43a1
@ -642,9 +642,7 @@ class FlashLlamaForCausalLM(torch.nn.Module):
|
|||||||
embedding_multiplier = getattr(config, "embedding_multiplier", None)
|
embedding_multiplier = getattr(config, "embedding_multiplier", None)
|
||||||
if embedding_multiplier is not None:
|
if embedding_multiplier is not None:
|
||||||
self.embed_tokens.weight.data *= embedding_multiplier
|
self.embed_tokens.weight.data *= embedding_multiplier
|
||||||
|
prefix = suffix if not prefix or name != "model" else f"{prefix}.{suffix}"
|
||||||
prefix = "lm_head" if not prefix or name != "model" else f"{prefix}.{suffix}"
|
|
||||||
|
|
||||||
with no_fp8(weights):
|
with no_fp8(weights):
|
||||||
self.lm_head = SpeculativeHead.load(
|
self.lm_head = SpeculativeHead.load(
|
||||||
config,
|
config,
|
||||||
|
Loading…
Reference in New Issue
Block a user