mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Adding a comment.
This commit is contained in:
parent
97d9ff3a71
commit
7de9141164
@ -185,6 +185,9 @@ class QuantLinear(nn.Module):
|
|||||||
"g_idx": self.g_idx,
|
"g_idx": self.g_idx,
|
||||||
}
|
}
|
||||||
temp_dq = temp_dq.get_scratch_slice(self.temp_dq_size())
|
temp_dq = temp_dq.get_scratch_slice(self.temp_dq_size())
|
||||||
|
|
||||||
|
# We NEED to keep a pointer on Python side, otherwise the garbage collector will mess with us,
|
||||||
|
# and `Memory access fault by GPU node-2` will EAT you.
|
||||||
self.temp_dq = temp_dq
|
self.temp_dq = temp_dq
|
||||||
self.q_handle = ext_make_q_matrix(self.q_tensors, temp_dq)
|
self.q_handle = ext_make_q_matrix(self.q_tensors, temp_dq)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user