From 5ef2a48fecd7e08fa26bb657a5df78eb95404589 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Fri, 12 Apr 2024 08:11:18 +0200 Subject: [PATCH] Update server/text_generation_server/utils/layers.py Co-authored-by: Dong Shin --- server/text_generation_server/utils/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index cace1084..8d2bd1ca 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -209,7 +209,7 @@ class Fp8Linear(nn.Module): self.dtype = weight.dtype self.qweight, self.scale = fp8_quantize(weight) - self.bias = bias.cuda(device) if bias is not None else None + self.bias = bias.cuda(bias.device) if bias is not None else None def forward(self, input: torch.Tensor) -> torch.Tensor: qinput, scale = fp8_quantize(input)