From f90c61a3401e6cd8a641e5d59fec27a964b28779 Mon Sep 17 00:00:00 2001 From: Felix Marty <9808326+fxmarty@users.noreply.github.com> Date: Wed, 12 Jul 2023 16:19:25 +0000 Subject: [PATCH] support bits different than 4 --- server/text_generation_server/utils/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index 63b9a406..b3fa2abb 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -151,7 +151,7 @@ def get_linear(weight, bias, quantize): f"The passed weight is not `gptq` compatible, loader needs to be updated." ) - if use_triton_kernel: + if use_triton_kernel or bits != 4: linear = QuantLinear( qweight, qzeros,