From 7de9141164117e31687f044eff9ccd6a217a4a84 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Fri, 26 Jan 2024 10:30:01 +0000 Subject: [PATCH] Adding a comment. --- server/text_generation_server/utils/gptq/exllamav2.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/text_generation_server/utils/gptq/exllamav2.py b/server/text_generation_server/utils/gptq/exllamav2.py index 25ff508b..2b897f25 100644 --- a/server/text_generation_server/utils/gptq/exllamav2.py +++ b/server/text_generation_server/utils/gptq/exllamav2.py @@ -185,6 +185,9 @@ class QuantLinear(nn.Module): "g_idx": self.g_idx, } temp_dq = temp_dq.get_scratch_slice(self.temp_dq_size()) + + # We NEED to keep a pointer on Python side, otherwise the garbage collector will mess with us, + # and `Memory access fault by GPU node-2` will EAT you. self.temp_dq = temp_dq self.q_handle = ext_make_q_matrix(self.q_tensors, temp_dq)