mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Update server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
This commit is contained in:
parent
891fe74099
commit
0eea83be44
@ -43,7 +43,7 @@ if IS_CUDA_SYSTEM:
|
|||||||
elif IS_ROCM_SYSTEM:
|
elif IS_ROCM_SYSTEM:
|
||||||
from vllm import layernorm_ops
|
from vllm import layernorm_ops
|
||||||
|
|
||||||
if not HAS_FLASH_ATTN_V2_ROCM and not HAS_FLASH_ATTN_V2_ROCM:
|
if not HAS_FLASH_ATTN_V2_CUDA and not HAS_FLASH_ATTN_V2_ROCM:
|
||||||
raise ImportError("Mistral model requires flash attn v2")
|
raise ImportError("Mistral model requires flash attn v2")
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user