diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index 43c5dfb4..20bab01b 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -647,7 +647,7 @@ class FlashLlamaForCausalLM(torch.nn.Module): ) self.model = FlashLlamaModel(prefix, config, weights) if config.tie_word_embeddings: - suffix = f"model.embed_tokens" + suffix = "model.embed_tokens" else: suffix = "lm_head"