diff --git a/server/text_generation_server/utils/gptq/quantize.py b/server/text_generation_server/utils/gptq/quantize.py index d182456f7..bee1e4468 100644 --- a/server/text_generation_server/utils/gptq/quantize.py +++ b/server/text_generation_server/utils/gptq/quantize.py @@ -864,7 +864,8 @@ def quantize( ) with init_empty_weights(): - model = AutoModelForCausalLM.from_config(config, torch_dtype=torch.float16) + model = AutoModelForCausalLM.from_config(config, torch_dtype=torch.float16, + trust_remote_code=trust_remote_code) model = model.eval() print("LOADED model")