mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
Doesn't affect LM_Head.
This commit is contained in:
parent
906027ae58
commit
2e76727910
@ -180,7 +180,7 @@ class TensorParallelHead(SuperLayer):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load(config, prefix: str, weights):
|
def load(config, prefix: str, weights):
|
||||||
weight = weights.get_partial_sharded(f"{prefix}.weight", dim=0)
|
weight = weights.get_sharded(f"{prefix}.weight", dim=0)
|
||||||
|
|
||||||
# GPTQ doesn't quantize heads (nor embeddings)
|
# GPTQ doesn't quantize heads (nor embeddings)
|
||||||
if config.quantize == "gptq":
|
if config.quantize == "gptq":
|
||||||
|
Loading…
Reference in New Issue
Block a user