Update server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py

Co-authored-by: OlivierDehaene <olivier@huggingface.co>
This commit is contained in:
fxmarty 2023-11-08 19:07:45 +09:00 committed by GitHub
parent f746b8e0ae
commit 891fe74099
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -27,7 +27,7 @@ from transformers.configuration_utils import PretrainedConfig
from typing import Optional, List, Tuple from typing import Optional, List, Tuple
from text_generation_server.utils import paged_attention, flash_attn from text_generation_server.utils import paged_attention, flash_attn
from text_generation_server.utils.flash_attn import attention, HAS_FLASH_ATTN_V2_ROCM, HAS_FLASH_ATTN_V2_ROCM from text_generation_server.utils.flash_attn import attention, HAS_FLASH_ATTN_V2_ROCM, HAS_FLASH_ATTN_V2_CUDA
from text_generation_server.utils.layers import ( from text_generation_server.utils.layers import (
TensorParallelRowLinear, TensorParallelRowLinear,
TensorParallelColumnLinear, TensorParallelColumnLinear,