From c35810d6f024e7492f72dfc9a68fd9d2d1eb4d22 Mon Sep 17 00:00:00 2001
From: Yuan Wu <yuan.wu@intel.com>
Date: Fri, 28 Feb 2025 18:20:55 +0800
Subject: [PATCH] Fix the loading issue of 90B (#283)

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 server/text_generation_server/models/vlm_causal_lm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py
index 06c9951e1..f17e9c247 100644
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@@ -784,6 +784,7 @@ class VlmCausalLM(Model):
             htorch.core.hpu_set_env()
 
         if world_size > 1:
+            os.environ.setdefault("DEEPSPEED_USE_HABANA_FRAMEWORKS_DETERMINISTIC_API", "1")
             model = self.get_deepspeed_model(
                 model_class, model_id, dtype, revision
             )