Update server/text_generation_server/utils/layers.py

Co-authored-by: Dong Shin <d0104.shin@gmail.com>
This commit is contained in:
Nicolas Patry 2024-04-12 08:11:18 +02:00 committed by GitHub
parent a352563ee0
commit 5ef2a48fec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -209,7 +209,7 @@ class Fp8Linear(nn.Module):
self.dtype = weight.dtype
self.qweight, self.scale = fp8_quantize(weight)
self.bias = bias.cuda(device) if bias is not None else None
self.bias = bias.cuda(bias.device) if bias is not None else None
def forward(self, input: torch.Tensor) -> torch.Tensor:
qinput, scale = fp8_quantize(input)