From e3dcd7f2c2959f5b63f93cdc93f3485cd974a7c9 Mon Sep 17 00:00:00 2001 From: jkaniecki <153085639+jkaniecki@users.noreply.github.com> Date: Fri, 22 Dec 2023 13:51:16 +0100 Subject: [PATCH] Disable tensor caching in HPU Graph execution (#4) --- server/text_generation_server/models/causal_lm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py index ee726faf..26be1875 100644 --- a/server/text_generation_server/models/causal_lm.py +++ b/server/text_generation_server/models/causal_lm.py @@ -632,7 +632,7 @@ class CausalLM(Model): model = model.eval().to(device) #wrap in hpu_graph only if self.enable_hpu_graph is set if self.enable_hpu_graph: - model = wrap_in_hpu_graph(model) + model = wrap_in_hpu_graph(model, disable_tensor_cache=True) if model.config.model_type in MODELS_OPTIMIZED_WITH_STATIC_SHAPES: self.is_optimized_for_gaudi = True