add AutoModel error message for 4bit quantization

2025-09-10 20:04:52 +00:00 · 2023-07-17 19:31:39 +00:00 · 2023-07-17 19:31:39 +00:00 · 8ff7d57443
commit 8ff7d57443
parent 9c11372d8f
1 changed files with 4 additions and 1 deletions
--- a/server/text_generation_server/models/init.py
+++ b/server/text_generation_server/models/init.py
@ -290,7 +290,10 @@ def get_model(
        raise ValueError(
            "gptq quantization is not supported for AutoModel, you can try to quantize it with `text-generation-server quantize ORIGINAL_MODEL_ID NEW_MODEL_ID`"
        )
-
+    elif (quantize == "bitsandbytes-fp4") or (quantize == "bitsandbytes-nf4"):
        raise ValueError(
            "4bit quantization is not supported for AutoModel"
        )
    if model_type in modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
        return CausalLM(
            model_id,