diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index f38f130e..7bb95dd2 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -155,10 +155,7 @@ class EETQLinear(nn.Module): device = weight.device weight = torch.t(weight).contiguous().cpu() weight, scale = quant_weights(weight, torch.int8, False) - if bias: - bias = weights.get_tensor(f"{prefix}.bias") - else: - bias = None + self.weight = weight.cuda(device) self.scale = scale.cuda(device) self.bias = bias.cuda(device) if bias is not None else None