From 07bc903d6e603a2d446f25eae075f4b3d6d3bb7e Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 7 Sep 2023 14:02:34 +0000 Subject: [PATCH] Fix __call__ vs forward. --- server/text_generation_server/utils/gptq/exllama.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/gptq/exllama.py b/server/text_generation_server/utils/gptq/exllama.py index 6a1cf117..7353afb5 100644 --- a/server/text_generation_server/utils/gptq/exllama.py +++ b/server/text_generation_server/utils/gptq/exllama.py @@ -69,10 +69,11 @@ def create_exllama_buffers(): TEMP_STATE, TEMP_DQ = temp_state, temp_dq -class Ex4bitLinear: +class Ex4bitLinear(torch.nn.Module): """Linear layer implementation with per-group 4-bit quantization of the weights""" def __init__(self, qweight, qzeros, scales, g_idx, bias, bits, groupsize): + super().__init__() global MAX_DQ, MAX_INNER, ACT_ORDER, DEVICE assert bits == 4