mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
Fix GQA llama + AWQ
This commit is contained in:
parent
c5de7cd886
commit
1ab173a260
@ -179,7 +179,7 @@ def _load_gqa(config, prefix: str, weights):
|
||||
dim=0,
|
||||
)
|
||||
|
||||
if config.quantize != "gptq":
|
||||
if config.quantize not in ["gptq", "awq"]:
|
||||
weight = weight.to(dtype=weights.dtype).to(device=weights.device)
|
||||
|
||||
head_size = config.hidden_size // config.num_attention_heads
|
||||
|
Loading…
Reference in New Issue
Block a user