mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 11:54:52 +00:00
taste
This commit is contained in:
parent
5882768682
commit
2080735e16
@ -168,8 +168,7 @@ def _load_gqa(config, prefix: str, weights):
|
|||||||
config.hidden_size,
|
config.hidden_size,
|
||||||
], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
|
], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
|
||||||
|
|
||||||
bias = None
|
return TensorParallelColumnLinear(get_linear(weight, bias=None, quantize=config.quantize))
|
||||||
return TensorParallelColumnLinear(get_linear(weight, bias, config.quantize))
|
|
||||||
|
|
||||||
|
|
||||||
class FlashLlamaAttention(torch.nn.Module):
|
class FlashLlamaAttention(torch.nn.Module):
|
||||||
|
Loading…
Reference in New Issue
Block a user