mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-25 12:02:08 +00:00
fix format
Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
This commit is contained in:
parent
0bad926fb8
commit
a332862510
@ -6,7 +6,12 @@ import torch
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from text_generation_server.utils.import_utils import SYSTEM
|
from text_generation_server.utils.import_utils import SYSTEM
|
||||||
from text_generation_server.utils.log import log_once
|
from text_generation_server.utils.log import log_once
|
||||||
from text_generation_server.utils.weights import Weight, Weights, WeightsLoader, UnquantizedWeight
|
from text_generation_server.utils.weights import (
|
||||||
|
Weight,
|
||||||
|
Weights,
|
||||||
|
WeightsLoader,
|
||||||
|
UnquantizedWeight,
|
||||||
|
)
|
||||||
|
|
||||||
if SYSTEM == "ipex":
|
if SYSTEM == "ipex":
|
||||||
from .ipex import QuantLinear
|
from .ipex import QuantLinear
|
||||||
@ -181,7 +186,9 @@ class GPTQWeightsLoader(WeightsLoader):
|
|||||||
use_exllama=use_exllama,
|
use_exllama=use_exllama,
|
||||||
)
|
)
|
||||||
|
|
||||||
def is_layer_skipped_quantization(self, prefix: str, modules_to_not_convert: List[str]):
|
def is_layer_skipped_quantization(
|
||||||
|
self, prefix: str, modules_to_not_convert: List[str]
|
||||||
|
):
|
||||||
if modules_to_not_convert is None:
|
if modules_to_not_convert is None:
|
||||||
return False
|
return False
|
||||||
return any(module_name in prefix for module_name in modules_to_not_convert)
|
return any(module_name in prefix for module_name in modules_to_not_convert)
|
||||||
|
@ -76,7 +76,9 @@ def _get_quantizer_config(model_id, revision):
|
|||||||
quant_method = data["quantization_config"]["quant_method"]
|
quant_method = data["quantization_config"]["quant_method"]
|
||||||
checkpoint_format = data["quantization_config"].get("checkpoint_format")
|
checkpoint_format = data["quantization_config"].get("checkpoint_format")
|
||||||
desc_act = data["quantization_config"].get("desc_act", False)
|
desc_act = data["quantization_config"].get("desc_act", False)
|
||||||
modules_to_not_convert = data["quantization_config"].get("modules_to_not_convert", None)
|
modules_to_not_convert = data["quantization_config"].get(
|
||||||
|
"modules_to_not_convert", None
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
filename = "quantize_config.json"
|
filename = "quantize_config.json"
|
||||||
try:
|
try:
|
||||||
|
Loading…
Reference in New Issue
Block a user