diff --git a/server/text_generation_server/models/santacoder.py b/server/text_generation_server/models/santacoder.py index a2b387376..d73c07f8d 100644 --- a/server/text_generation_server/models/santacoder.py +++ b/server/text_generation_server/models/santacoder.py @@ -56,7 +56,7 @@ class SantaCoder(CausalLM): model_id, revision=revision, torch_dtype=dtype, - load_in_8bit=quantize == "bitsandbytes", + load_in_4bit=quantize == "bitsandbytes", trust_remote_code=trust_remote_code, ).to(device)