diff --git a/server/text_generation_server/utils/gptq/quantize.py b/server/text_generation_server/utils/gptq/quantize.py index d182456f..5ec2d011 100644 --- a/server/text_generation_server/utils/gptq/quantize.py +++ b/server/text_generation_server/utils/gptq/quantize.py @@ -864,7 +864,7 @@ def quantize( ) with init_empty_weights(): - model = AutoModelForCausalLM.from_config(config, torch_dtype=torch.float16) + model = AutoModelForCausalLM.from_config(config, torch_dtype=torch.float16,trust_remote_code=trust_remote_code) model = model.eval() print("LOADED model")