diff --git a/server/text_generation_server/layers/gptq/__init__.py b/server/text_generation_server/layers/gptq/__init__.py index 2049f777..86856887 100644 --- a/server/text_generation_server/layers/gptq/__init__.py +++ b/server/text_generation_server/layers/gptq/__init__.py @@ -65,8 +65,6 @@ class GPTQWeight(Weight): return ExllamaQuantLinear(self, bias) else: - from text_generation_server.layers.gptq import QuantLinear - return QuantLinear( self.qweight, self.qzeros,