mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Automatic quantization config.
This commit is contained in:
parent
ff42d33e99
commit
fd536f2017
@ -186,6 +186,14 @@ def get_model(
|
|||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Could not determine model type for {model_id} revision {revision}"
|
f"Could not determine model type for {model_id} revision {revision}"
|
||||||
)
|
)
|
||||||
|
quantization_config = config_dict.get("quantization_config", None)
|
||||||
|
if quantization_config is not None and quantize is None:
|
||||||
|
method = quantization_config.get("quant_method", None)
|
||||||
|
if method in {"gptq", "awq"}:
|
||||||
|
logger.info(f"Auto selecting quantization method {method}")
|
||||||
|
quantize = method
|
||||||
|
else:
|
||||||
|
logger.info(f"Unknown quantization method {method}")
|
||||||
|
|
||||||
if model_type == "ssm":
|
if model_type == "ssm":
|
||||||
return Mamba(
|
return Mamba(
|
||||||
|
Loading…
Reference in New Issue
Block a user