support bits different than 4

This commit is contained in:
Felix Marty 2023-07-12 16:19:25 +00:00
parent 67d687609b
commit f90c61a340

View File

@ -151,7 +151,7 @@ def get_linear(weight, bias, quantize):
f"The passed weight is not `gptq` compatible, loader needs to be updated." f"The passed weight is not `gptq` compatible, loader needs to be updated."
) )
if use_triton_kernel: if use_triton_kernel or bits != 4:
linear = QuantLinear( linear = QuantLinear(
qweight, qweight,
qzeros, qzeros,