diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py index 430323bc..5d25bfc5 100644 --- a/server/text_generation_server/cli.py +++ b/server/text_generation_server/cli.py @@ -316,7 +316,7 @@ def quantize( logger_level=logger_level, json_output=json_output, ) - from text_generation_server.utils.gptq.quantize import quantize + from text_generation_server.layers.gptq.quantize import quantize quantize( model_id=model_id, diff --git a/server/text_generation_server/layers/gptq/quantize.py b/server/text_generation_server/layers/gptq/quantize.py index ca113d8f..8d029817 100644 --- a/server/text_generation_server/layers/gptq/quantize.py +++ b/server/text_generation_server/layers/gptq/quantize.py @@ -12,7 +12,7 @@ from huggingface_hub import HfApi from accelerate import init_empty_weights from text_generation_server.utils import initialize_torch_distributed, Weights from text_generation_server.utils.hub import weight_files -from text_generation_server.utils.gptq.quant_linear import QuantLinear +from text_generation_server.layers.gptq.quant_linear import QuantLinear from loguru import logger from typing import Optional