server quantize: expose groupsize option (#2225)

2025-10-08 22:45:23 +00:00 · 2024-07-16 08:36:05 +02:00 · 2024-07-16 08:36:05 +02:00 · 7177da0df6
commit 7177da0df6
parent e955f7b536
1 changed files with 2 additions and 1 deletions
--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@ -341,6 +341,7 @@ def quantize(
    upload_to_model_id: Optional[str] = None,
    percdamp: float = 0.01,
    act_order: bool = False,
+    groupsize: int = 128,
 ):
    if revision is None:
        revision = "main"
@ -355,7 +356,7 @@ def quantize(
    quantize(
        model_id=model_id,
        bits=4,
-        groupsize=128,
+        groupsize=groupsize,
        output_dir=output_dir,
        revision=revision,
        trust_remote_code=trust_remote_code,