mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Update server/text_generation_server/utils/layers.py
This commit is contained in:
parent
5ef2a48fec
commit
666cde0e12
@ -209,7 +209,7 @@ class Fp8Linear(nn.Module):
|
|||||||
self.dtype = weight.dtype
|
self.dtype = weight.dtype
|
||||||
self.qweight, self.scale = fp8_quantize(weight)
|
self.qweight, self.scale = fp8_quantize(weight)
|
||||||
|
|
||||||
self.bias = bias.cuda(bias.device) if bias is not None else None
|
self.bias = bias if bias is not None else None
|
||||||
|
|
||||||
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
||||||
qinput, scale = fp8_quantize(input)
|
qinput, scale = fp8_quantize(input)
|
||||||
|
Loading…
Reference in New Issue
Block a user