fix awq crash if modules_to_not_convert is None

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
2025-09-18 07:44:53 +00:00 · 2025-03-12 01:28:56 -07:00 · 2025-03-12 01:28:56 -07:00 · 1c6dee17ce
commit 1c6dee17ce
parent 9997047d8a
2 changed files with 3 additions and 1 deletions
--- a/server/text_generation_server/utils/import_utils.py
+++ b/server/text_generation_server/utils/import_utils.py
@ -19,7 +19,7 @@ def get_cuda_free_memory(device, memory_fraction):
 def get_xpu_free_memory(device, memory_fraction):
    total_free_memory, total_xpu_memory = torch.xpu.mem_get_info(device)
-    memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "1.0"))
+    memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "0.9"))
    free_memory = max(
        0, int(total_free_memory - (1 - memory_fraction) * total_xpu_memory)
    )
--- a/server/text_generation_server/utils/quantization.py
+++ b/server/text_generation_server/utils/quantization.py
@ -79,6 +79,8 @@ def _get_quantizer_config(model_id, revision):
        modules_to_not_convert = data["quantization_config"].get(
            "modules_to_not_convert", []
        )
        if modules_to_not_convert is None:
            modules_to_not_convert = []
    except Exception:
        filename = "quantize_config.json"
        try: