From 8ff7d57443de9937f67428b81179c0b91e1bb37a Mon Sep 17 00:00:00 2001 From: krzim Date: Mon, 17 Jul 2023 19:31:39 +0000 Subject: [PATCH] add AutoModel error message for 4bit quantization --- server/text_generation_server/models/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index fd97f8b1..62485bae 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -290,7 +290,10 @@ def get_model( raise ValueError( "gptq quantization is not supported for AutoModel, you can try to quantize it with `text-generation-server quantize ORIGINAL_MODEL_ID NEW_MODEL_ID`" ) - + elif (quantize == "bitsandbytes-fp4") or (quantize == "bitsandbytes-nf4"): + raise ValueError( + "4bit quantization is not supported for AutoModel" + ) if model_type in modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES: return CausalLM( model_id,