mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-25 20:12:07 +00:00
server quantize
: expose groupsize option (#2225)
This commit is contained in:
parent
e955f7b536
commit
7177da0df6
@ -341,6 +341,7 @@ def quantize(
|
||||
upload_to_model_id: Optional[str] = None,
|
||||
percdamp: float = 0.01,
|
||||
act_order: bool = False,
|
||||
groupsize: int = 128,
|
||||
):
|
||||
if revision is None:
|
||||
revision = "main"
|
||||
@ -355,7 +356,7 @@ def quantize(
|
||||
quantize(
|
||||
model_id=model_id,
|
||||
bits=4,
|
||||
groupsize=128,
|
||||
groupsize=groupsize,
|
||||
output_dir=output_dir,
|
||||
revision=revision,
|
||||
trust_remote_code=trust_remote_code,
|
||||
|
Loading…
Reference in New Issue
Block a user