From fd536f2017b06c4f5496d73a628c3eb15ae390ca Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 9 Apr 2024 05:40:52 +0000 Subject: [PATCH] Automatic quantization config. --- server/text_generation_server/models/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index 06219e7c..ec167303 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -186,6 +186,14 @@ def get_model( raise RuntimeError( f"Could not determine model type for {model_id} revision {revision}" ) + quantization_config = config_dict.get("quantization_config", None) + if quantization_config is not None and quantize is None: + method = quantization_config.get("quant_method", None) + if method in {"gptq", "awq"}: + logger.info(f"Auto selecting quantization method {method}") + quantize = method + else: + logger.info(f"Unknown quantization method {method}") if model_type == "ssm": return Mamba(