Adding a comment.

2025-09-11 04:14:52 +00:00 · 2024-01-26 10:30:01 +00:00 · 2024-01-26 10:30:01 +00:00 · 7de9141164
commit 7de9141164
parent 97d9ff3a71
1 changed files with 3 additions and 0 deletions
--- a/server/text_generation_server/utils/gptq/exllamav2.py
+++ b/server/text_generation_server/utils/gptq/exllamav2.py
@ -185,6 +185,9 @@ class QuantLinear(nn.Module):
            "g_idx": self.g_idx,
        }
        temp_dq = temp_dq.get_scratch_slice(self.temp_dq_size())
+
+        # We NEED to keep a pointer on Python side, otherwise the garbage collector will mess with us,
+        # and `Memory access fault by GPU node-2` will EAT you.
        self.temp_dq = temp_dq
        self.q_handle = ext_make_q_matrix(self.q_tensors, temp_dq)