From 55d60a103c7c5ca269e99b1b18ed57c1969c243f Mon Sep 17 00:00:00 2001
From: "Wang, Chang" <491521017@qq.com>
Date: Mon, 26 Aug 2024 17:02:58 +0800
Subject: [PATCH] Add qwen2 fp8 support (#210)

Signed-off-by: changwang <changwang@habana.ai>
Co-authored-by: changwang <changwang@habana.ai>
---
 server/text_generation_server/models/causal_lm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/text_generation_server/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py
index 831b0b5e..f814a8e2 100644
--- a/server/text_generation_server/models/causal_lm.py
+++ b/server/text_generation_server/models/causal_lm.py
@@ -691,9 +691,9 @@ class CausalLM(Model):
             "return_dict": True,
         }
 
-        if model.config.model_type in ["llama", "mistral", "starcoder2"]:
+        if model.config.model_type in ["llama", "mistral", "starcoder2", "qwen2"]:
             
-            if model.config.model_type in ["llama", "mistral"]:
+            if model.config.model_type in ["llama", "mistral", "qwen2"]:
                 kwargs["attn_softmax_bf16"] = True
                 kwargs["trim_logits"] = True