mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 00:12:08 +00:00
support bits different than 4
This commit is contained in:
parent
67d687609b
commit
f90c61a340
@ -151,7 +151,7 @@ def get_linear(weight, bias, quantize):
|
|||||||
f"The passed weight is not `gptq` compatible, loader needs to be updated."
|
f"The passed weight is not `gptq` compatible, loader needs to be updated."
|
||||||
)
|
)
|
||||||
|
|
||||||
if use_triton_kernel:
|
if use_triton_kernel or bits != 4:
|
||||||
linear = QuantLinear(
|
linear = QuantLinear(
|
||||||
qweight,
|
qweight,
|
||||||
qzeros,
|
qzeros,
|
||||||
|
Loading…
Reference in New Issue
Block a user