Disable tensor caching in HPU Graph execution (#4)

2025-07-01 21:40:16 +00:00 · 2023-12-22 13:51:16 +01:00 · 2023-12-22 13:51:16 +01:00 · e3dcd7f2c2
commit e3dcd7f2c2
parent b1897acfd6
1 changed files with 1 additions and 1 deletions
--- a/server/text_generation_server/models/causal_lm.py
+++ b/server/text_generation_server/models/causal_lm.py
@ -632,7 +632,7 @@ class CausalLM(Model):
            model = model.eval().to(device)
            #wrap in hpu_graph only if self.enable_hpu_graph is set
            if self.enable_hpu_graph:
-                model = wrap_in_hpu_graph(model)
+                model = wrap_in_hpu_graph(model, disable_tensor_cache=True)

        if model.config.model_type in MODELS_OPTIMIZED_WITH_STATIC_SHAPES:
            self.is_optimized_for_gaudi = True