From e864b956564f886098bff17a8878b125ae64ee54 Mon Sep 17 00:00:00 2001 From: Vincent Brouwers Date: Wed, 30 Aug 2023 09:50:49 +0000 Subject: [PATCH] Fix Falcon weight mapping for H2O.ai checkpoints During the safetensor conversion, duplicate weights are removed. However, which of the duplicates gets removed, differs per checkpoint. In some, like `h2oai/h2ogpt-oig-oasst1-falcon-40b`, the weight `transformer.word_embeddings.weightSafetensor` gets removed. In others, `lm_head.weight` gets removed. Long story long, we need to support both. Originally, f018143 mapped `lm_head` to `word_embeddings`. Then ac736fd switched this around. This commit merges them and allows for both. --- server/text_generation_server/models/flash_rw.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/models/flash_rw.py b/server/text_generation_server/models/flash_rw.py index 2fc7c53d..195b3883 100644 --- a/server/text_generation_server/models/flash_rw.py +++ b/server/text_generation_server/models/flash_rw.py @@ -54,7 +54,10 @@ class FlashRWSharded(FlashCausalLM): device, dtype, process_group=self.process_group, - aliases={"lm_head.weight": ["transformer.word_embeddings.weight"]}, + aliases={ + "lm_head.weight": ["transformer.word_embeddings.weight"], + "transformer.word_embeddings.weight": ["lm_head.weight"], + }, ) config.quantize = quantize