From 07bc903d6e603a2d446f25eae075f4b3d6d3bb7e Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 7 Sep 2023 14:02:34 +0000
Subject: [PATCH] Fix __call__ vs forward.

---
 server/text_generation_server/utils/gptq/exllama.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/server/text_generation_server/utils/gptq/exllama.py b/server/text_generation_server/utils/gptq/exllama.py
index 6a1cf117..7353afb5 100644
--- a/server/text_generation_server/utils/gptq/exllama.py
+++ b/server/text_generation_server/utils/gptq/exllama.py
@@ -69,10 +69,11 @@ def create_exllama_buffers():
     TEMP_STATE, TEMP_DQ = temp_state, temp_dq
 
 
-class Ex4bitLinear:
+class Ex4bitLinear(torch.nn.Module):
     """Linear layer implementation with per-group 4-bit quantization of the weights"""
 
     def __init__(self, qweight, qzeros, scales, g_idx, bias, bits, groupsize):
+        super().__init__()
         global MAX_DQ, MAX_INNER, ACT_ORDER, DEVICE
         assert bits == 4