mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-27 13:02:12 +00:00
Enabling Flash Attention support for falcon model (#232)
This commit is contained in:
parent
0578bd917d
commit
e06320f64e
@ -694,9 +694,9 @@ class CausalLM(Model):
|
|||||||
"return_dict": True,
|
"return_dict": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
if model.config.model_type in ["llama", "mistral", "starcoder2", "qwen2"]:
|
if model.config.model_type in ["llama", "mistral", "starcoder2", "qwen2", "falcon"]:
|
||||||
|
|
||||||
if model.config.model_type in ["llama", "mistral", "qwen2"]:
|
if model.config.model_type not in ["falcon"]
|
||||||
kwargs["attn_softmax_bf16"] = True
|
kwargs["attn_softmax_bf16"] = True
|
||||||
kwargs["trim_logits"] = True
|
kwargs["trim_logits"] = True
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user