mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
Doesn't affect LM_Head.
This commit is contained in:
parent
906027ae58
commit
2e76727910
@ -180,7 +180,7 @@ class TensorParallelHead(SuperLayer):
|
||||
|
||||
@staticmethod
|
||||
def load(config, prefix: str, weights):
|
||||
weight = weights.get_partial_sharded(f"{prefix}.weight", dim=0)
|
||||
weight = weights.get_sharded(f"{prefix}.weight", dim=0)
|
||||
|
||||
# GPTQ doesn't quantize heads (nor embeddings)
|
||||
if config.quantize == "gptq":
|
||||
|
Loading…
Reference in New Issue
Block a user