mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Put link to ref.
This commit is contained in:
parent
51bc8a4e45
commit
c174142fe5
@ -504,6 +504,7 @@ class Fp8Linear(torch.nn.Module):
|
|||||||
|
|
||||||
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
||||||
if self.weight_block_size is not None:
|
if self.weight_block_size is not None:
|
||||||
|
# https://arxiv.org/pdf/2412.19437
|
||||||
# At a more granular level. As illustrated in Figure 7 (a), (1) for activations, we group and
|
# At a more granular level. As illustrated in Figure 7 (a), (1) for activations, we group and
|
||||||
# scale elements on a 1x128 tile basis (i.e., per token per 128 channels); and (2) for weights, we
|
# scale elements on a 1x128 tile basis (i.e., per token per 128 channels); and (2) for weights, we
|
||||||
# group and scale elements on a 128x128 block basis (i.e., per 128 input channels per 128 output
|
# group and scale elements on a 128x128 block basis (i.e., per 128 input channels per 128 output
|
||||||
|
Loading…
Reference in New Issue
Block a user