From 2e767279107f1bac84af4bbb8fec286380f6170b Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 11 Jul 2023 12:51:13 +0000 Subject: [PATCH] Doesn't affect LM_Head. --- server/text_generation_server/utils/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index 950744f6..699f4664 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -180,7 +180,7 @@ class TensorParallelHead(SuperLayer): @staticmethod def load(config, prefix: str, weights): - weight = weights.get_partial_sharded(f"{prefix}.weight", dim=0) + weight = weights.get_sharded(f"{prefix}.weight", dim=0) # GPTQ doesn't quantize heads (nor embeddings) if config.quantize == "gptq":