server quantize: expose groupsize option (#2225)

This commit is contained in:
Daniël de Kok 2024-07-16 08:36:05 +02:00 committed by yuanwu
parent e955f7b536
commit 7177da0df6

View File

@ -341,6 +341,7 @@ def quantize(
upload_to_model_id: Optional[str] = None,
percdamp: float = 0.01,
act_order: bool = False,
groupsize: int = 128,
):
if revision is None:
revision = "main"
@ -355,7 +356,7 @@ def quantize(
quantize(
model_id=model_id,
bits=4,
groupsize=128,
groupsize=groupsize,
output_dir=output_dir,
revision=revision,
trust_remote_code=trust_remote_code,