diff --git a/server/text_generation_server/layers/gptq/__init__.py b/server/text_generation_server/layers/gptq/__init__.py index f8a62cf5..b5549916 100644 --- a/server/text_generation_server/layers/gptq/__init__.py +++ b/server/text_generation_server/layers/gptq/__init__.py @@ -188,8 +188,6 @@ class GPTQWeightsLoader(WeightsLoader): def is_layer_skipped_quantization( self, prefix: str, modules_to_not_convert: List[str] ): - if modules_to_not_convert is None: - return False return any(module_name in prefix for module_name in modules_to_not_convert) def get_weights_col_packed( diff --git a/server/text_generation_server/utils/quantization.py b/server/text_generation_server/utils/quantization.py index 7324b33f..e460361a 100644 --- a/server/text_generation_server/utils/quantization.py +++ b/server/text_generation_server/utils/quantization.py @@ -21,7 +21,7 @@ class _QuantizerConfig: quant_method: str sym: bool weight_block_size: Optional[List[int]] - modules_to_not_convert: Optional[List[str]] + modules_to_not_convert: List[str] @dataclass @@ -52,7 +52,7 @@ def _get_quantizer_config(model_id, revision): sym = False desc_act = False weight_block_size = None - modules_to_not_convert = None + modules_to_not_convert = [] filename = "config.json" try: