mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 06:12:07 +00:00
fix awq crash if modules_to_not_convert is None
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
9997047d8a
commit
1c6dee17ce
@ -19,7 +19,7 @@ def get_cuda_free_memory(device, memory_fraction):
|
||||
|
||||
def get_xpu_free_memory(device, memory_fraction):
|
||||
total_free_memory, total_xpu_memory = torch.xpu.mem_get_info(device)
|
||||
memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "1.0"))
|
||||
memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "0.9"))
|
||||
free_memory = max(
|
||||
0, int(total_free_memory - (1 - memory_fraction) * total_xpu_memory)
|
||||
)
|
||||
|
@ -79,6 +79,8 @@ def _get_quantizer_config(model_id, revision):
|
||||
modules_to_not_convert = data["quantization_config"].get(
|
||||
"modules_to_not_convert", []
|
||||
)
|
||||
if modules_to_not_convert is None:
|
||||
modules_to_not_convert = []
|
||||
except Exception:
|
||||
filename = "quantize_config.json"
|
||||
try:
|
||||
|
Loading…
Reference in New Issue
Block a user