From 74d9dfa89e4fcb2c8830207fa9964f7d12a00229 Mon Sep 17 00:00:00 2001 From: chenxichen Date: Wed, 27 Dec 2023 03:25:47 +0000 Subject: [PATCH] Fix incorrect use of bias in awq --- server/text_generation_server/utils/awq/quantize/qmodule.py | 5 +---- server/text_generation_server/utils/layers.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/server/text_generation_server/utils/awq/quantize/qmodule.py b/server/text_generation_server/utils/awq/quantize/qmodule.py index ca8caf50..31ac7463 100644 --- a/server/text_generation_server/utils/awq/quantize/qmodule.py +++ b/server/text_generation_server/utils/awq/quantize/qmodule.py @@ -35,10 +35,7 @@ class WQLinear(nn.Module): self.qweight = qweight self.qzeros = qzeros self.scales = scales - if bias: - self.bias = bias - else: - self.bias = None + self.bias = bias @torch.no_grad() def forward(self, x): diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index f38f130e..8f0965ff 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -335,7 +335,7 @@ def get_linear(weight, bias, quantize): qweight=qweight, qzeros=qzeros, scales=scales, - bias=bias is not None, + bias=bias, ) else: raise NotImplementedError(f"Quantization `{quantize}` is not implemented yet.")