From 886bfab23de922f3fd65960883b6ab51686ac221 Mon Sep 17 00:00:00 2001 From: "Wang, Yi A" Date: Thu, 27 Jun 2024 05:18:57 -0700 Subject: [PATCH] refine get xpu free memory Signed-off-by: Wang, Yi A --- server/text_generation_server/utils/import_utils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/server/text_generation_server/utils/import_utils.py b/server/text_generation_server/utils/import_utils.py index 6d921721..011e0f63 100644 --- a/server/text_generation_server/utils/import_utils.py +++ b/server/text_generation_server/utils/import_utils.py @@ -1,6 +1,7 @@ import torch from loguru import logger import subprocess +import os def is_ipex_available(): @@ -21,10 +22,13 @@ def get_cuda_free_memory(device, memory_fraction): def get_xpu_free_memory(device, memory_fraction): total_memory = torch.xpu.get_device_properties(device).total_memory device_id = device.index - query = f"xpu-smi dump -d {device_id} -m 18 -n 1" - output = subprocess.check_output(query.split()).decode("utf-8").split("\n") - used_memory = float(output[1].split(",")[-1]) * 1024 * 1024 - free_memory = int(total_memory * 0.95 - used_memory) + memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "1.0")) + free_memory = max( + 0, + int( + total_memory * 0.9 * memory_fraction - torch.xpu.memory_reserved(device_id) + ), + ) return free_memory