mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
also quant weights with single scale
This commit is contained in:
parent
3d0c7b85fe
commit
473f968a01
@ -203,7 +203,7 @@ class Fp8Linear(torch.nn.Module):
|
||||
|
||||
@classmethod
|
||||
def from_unquant(cls, weight, bias, dtype):
|
||||
qweight, scale = fp8_quantize(weight)
|
||||
qweight, scale = fp8_quantize(weight, scalar=not FBGEMM_MM_AVAILABLE)
|
||||
return cls(
|
||||
qweight=qweight, scale=scale, scale_upper_bound=None, bias=bias, dtype=dtype
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user