From 1c6dee17ce44ea67b00adaf2235d6052ca86b72f Mon Sep 17 00:00:00 2001
From: "Wang, Yi A" <yi.a.wang@intel.com>
Date: Wed, 12 Mar 2025 01:28:56 -0700
Subject: [PATCH] fix awq crash if modules_to_not_convert is None

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
---
 server/text_generation_server/utils/import_utils.py | 2 +-
 server/text_generation_server/utils/quantization.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/server/text_generation_server/utils/import_utils.py b/server/text_generation_server/utils/import_utils.py
index 35cca9cc..730ac6cb 100644
--- a/server/text_generation_server/utils/import_utils.py
+++ b/server/text_generation_server/utils/import_utils.py
@@ -19,7 +19,7 @@ def get_cuda_free_memory(device, memory_fraction):
 
 def get_xpu_free_memory(device, memory_fraction):
     total_free_memory, total_xpu_memory = torch.xpu.mem_get_info(device)
-    memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "1.0"))
+    memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "0.9"))
     free_memory = max(
         0, int(total_free_memory - (1 - memory_fraction) * total_xpu_memory)
     )
diff --git a/server/text_generation_server/utils/quantization.py b/server/text_generation_server/utils/quantization.py
index e460361a..92111b19 100644
--- a/server/text_generation_server/utils/quantization.py
+++ b/server/text_generation_server/utils/quantization.py
@@ -79,6 +79,8 @@ def _get_quantizer_config(model_id, revision):
         modules_to_not_convert = data["quantization_config"].get(
             "modules_to_not_convert", []
         )
+        if modules_to_not_convert is None:
+            modules_to_not_convert = []
     except Exception:
         filename = "quantize_config.json"
         try: