mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Remove useless modifications
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
4a89f59ec7
commit
1505d4687a
@ -88,6 +88,7 @@ def attention(
|
|||||||
_, kv_head_num, head_size = key.shape
|
_, kv_head_num, head_size = key.shape
|
||||||
query = query.view(bs, -1, head_num, head_size).transpose(1, 2)
|
query = query.view(bs, -1, head_num, head_size).transpose(1, 2)
|
||||||
key = key.view(bs, -1, kv_head_num, head_size).transpose(1, 2)
|
key = key.view(bs, -1, kv_head_num, head_size).transpose(1, 2)
|
||||||
|
value = value.view(bs, -1, kv_head_num, head_size).transpose(1, 2)
|
||||||
attn_output = fsdpa_op(
|
attn_output = fsdpa_op(
|
||||||
query,
|
query,
|
||||||
key,
|
key,
|
||||||
|
@ -155,7 +155,6 @@ class TensorParallelColumnLinear(SuperLayer):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load_multi(cls, config, prefixes: List[str], weights, bias: bool, dim: int):
|
def load_multi(cls, config, prefixes: List[str], weights, bias: bool, dim: int):
|
||||||
print(f"bias: {bias}")
|
|
||||||
if config.quantize == "exl2":
|
if config.quantize == "exl2":
|
||||||
linears = []
|
linears = []
|
||||||
for prefix in prefixes:
|
for prefix in prefixes:
|
||||||
|
Loading…
Reference in New Issue
Block a user