mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 15:32:08 +00:00
Add qwen2 fp8 support (#210)
Signed-off-by: changwang <changwang@habana.ai> Co-authored-by: changwang <changwang@habana.ai>
This commit is contained in:
parent
e33db1877c
commit
55d60a103c
@ -691,9 +691,9 @@ class CausalLM(Model):
|
||||
"return_dict": True,
|
||||
}
|
||||
|
||||
if model.config.model_type in ["llama", "mistral", "starcoder2"]:
|
||||
if model.config.model_type in ["llama", "mistral", "starcoder2", "qwen2"]:
|
||||
|
||||
if model.config.model_type in ["llama", "mistral"]:
|
||||
if model.config.model_type in ["llama", "mistral", "qwen2"]:
|
||||
kwargs["attn_softmax_bf16"] = True
|
||||
kwargs["trim_logits"] = True
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user