Doesn't affect LM_Head.

2025-09-10 20:04:52 +00:00 · 2023-07-11 12:51:13 +00:00 · 2023-07-11 12:51:13 +00:00 · 2e76727910
commit 2e76727910
parent 906027ae58
1 changed files with 1 additions and 1 deletions
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@ -180,7 +180,7 @@ class TensorParallelHead(SuperLayer):

    @staticmethod
    def load(config, prefix: str, weights):
-        weight = weights.get_partial_sharded(f"{prefix}.weight", dim=0)
+        weight = weights.get_sharded(f"{prefix}.weight", dim=0)

        # GPTQ doesn't quantize heads (nor embeddings)
        if config.quantize == "gptq":