mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-05-07 18:32:06 +00:00
Fixing the other pathways.
This commit is contained in:
parent
c9047667ad
commit
5c598cc7ed
@ -115,13 +115,14 @@ def ext_make_q_matrix(
|
|||||||
w.qweight,
|
w.qweight,
|
||||||
extra.q_perm,
|
extra.q_perm,
|
||||||
extra.q_invperm,
|
extra.q_invperm,
|
||||||
none_tensor,
|
none_tensor, # q_scale
|
||||||
none_tensor,
|
none_tensor, # q_scale_max
|
||||||
none_tensor,
|
none_tensor, # q_groups
|
||||||
none_tensor,
|
none_tensor, # q_group_map
|
||||||
w.qzeros,
|
w.qzeros,
|
||||||
w.scales,
|
w.scales,
|
||||||
w.g_idx.cpu(),
|
w.g_idx.cpu(),
|
||||||
|
none_tensor, # bias
|
||||||
temp_dq,
|
temp_dq,
|
||||||
max_dq_rows,
|
max_dq_rows,
|
||||||
)
|
)
|
||||||
@ -129,15 +130,16 @@ def ext_make_q_matrix(
|
|||||||
else:
|
else:
|
||||||
return make_q_matrix(
|
return make_q_matrix(
|
||||||
w.qweight,
|
w.qweight,
|
||||||
none_tensor,
|
none_tensor, # q_perm
|
||||||
none_tensor,
|
none_tensor, # q_invperm
|
||||||
none_tensor,
|
none_tensor, # q_scale
|
||||||
none_tensor,
|
none_tensor, # q_scale_max
|
||||||
none_tensor,
|
none_tensor, # q_groups
|
||||||
none_tensor,
|
none_tensor, # q_group_map
|
||||||
w.qzeros,
|
w.qzeros,
|
||||||
w.scales,
|
w.scales,
|
||||||
none_tensor,
|
none_tensor, # g_idx
|
||||||
|
none_tensor, # bias
|
||||||
temp_dq,
|
temp_dq,
|
||||||
max_dq_rows,
|
max_dq_rows,
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user