From 5df20f88ffa7a31e3eacc1ff6b34e80e7b6d24da Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Thu, 4 Jul 2024 13:42:24 +0200 Subject: [PATCH] Fix to non-LLAMA models (#177) Co-authored-by: Jacek Czaja --- server/text_generation_server/models/causal_lm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py index f20db570..37d7479b 100644 --- a/server/text_generation_server/models/causal_lm.py +++ b/server/text_generation_server/models/causal_lm.py @@ -850,9 +850,12 @@ class CausalLM(Model): "attention_mask": attention_mask, "past_key_values": past_key_values, "token_idx": token_idx, - "lazy_mode": LAZY_MODE == 1, } + # Optimum Habana got "lazy_mode" key-val only supported for llama type of models + if self.model.config.model_type == "llama" : + kwargs["lazy_mode"] = LAZY_MODE == 1 + if self.has_position_ids: kwargs["position_ids"] = position_ids