mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 07:42:06 +00:00
Remove useless modifications
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
4a89f59ec7
commit
1505d4687a
@ -88,6 +88,7 @@ def attention(
|
||||
_, kv_head_num, head_size = key.shape
|
||||
query = query.view(bs, -1, head_num, head_size).transpose(1, 2)
|
||||
key = key.view(bs, -1, kv_head_num, head_size).transpose(1, 2)
|
||||
value = value.view(bs, -1, kv_head_num, head_size).transpose(1, 2)
|
||||
attn_output = fsdpa_op(
|
||||
query,
|
||||
key,
|
||||
|
@ -155,7 +155,6 @@ class TensorParallelColumnLinear(SuperLayer):
|
||||
|
||||
@classmethod
|
||||
def load_multi(cls, config, prefixes: List[str], weights, bias: bool, dim: int):
|
||||
print(f"bias: {bias}")
|
||||
if config.quantize == "exl2":
|
||||
linears = []
|
||||
for prefix in prefixes:
|
||||
|
Loading…
Reference in New Issue
Block a user