mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 22:32:07 +00:00
fix awq crash if modules_to_not_convert is None
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
9997047d8a
commit
1c6dee17ce
@ -19,7 +19,7 @@ def get_cuda_free_memory(device, memory_fraction):
|
|||||||
|
|
||||||
def get_xpu_free_memory(device, memory_fraction):
|
def get_xpu_free_memory(device, memory_fraction):
|
||||||
total_free_memory, total_xpu_memory = torch.xpu.mem_get_info(device)
|
total_free_memory, total_xpu_memory = torch.xpu.mem_get_info(device)
|
||||||
memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "1.0"))
|
memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "0.9"))
|
||||||
free_memory = max(
|
free_memory = max(
|
||||||
0, int(total_free_memory - (1 - memory_fraction) * total_xpu_memory)
|
0, int(total_free_memory - (1 - memory_fraction) * total_xpu_memory)
|
||||||
)
|
)
|
||||||
|
@ -79,6 +79,8 @@ def _get_quantizer_config(model_id, revision):
|
|||||||
modules_to_not_convert = data["quantization_config"].get(
|
modules_to_not_convert = data["quantization_config"].get(
|
||||||
"modules_to_not_convert", []
|
"modules_to_not_convert", []
|
||||||
)
|
)
|
||||||
|
if modules_to_not_convert is None:
|
||||||
|
modules_to_not_convert = []
|
||||||
except Exception:
|
except Exception:
|
||||||
filename = "quantize_config.json"
|
filename = "quantize_config.json"
|
||||||
try:
|
try:
|
||||||
|
Loading…
Reference in New Issue
Block a user