mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 14:52:20 +00:00
Hotfixing intel-cpu (not sure how it was working before). (#2967)
* Hotfixing intel-cpu (not sure how it was working before). * Do not fail on missing moe-kernels (Intel-cpu).
This commit is contained in:
parent
ee0dffcd14
commit
80e7d98f88
@ -10,7 +10,11 @@ from text_generation_server.layers.fp8 import (
|
||||
quant_dtype,
|
||||
normalize_e4m3fn_to_native_float8,
|
||||
)
|
||||
from moe_kernels.fused_moe import fused_moe
|
||||
|
||||
try:
|
||||
from moe_kernels.fused_moe import fused_moe
|
||||
except Exception:
|
||||
fused_moe = None
|
||||
|
||||
|
||||
class FP8SparseMoELayer(nn.Module):
|
||||
|
@ -180,7 +180,7 @@ except ImportError as e:
|
||||
if MAMBA_AVAILABLE:
|
||||
__all__.append(Mamba)
|
||||
|
||||
FLASH_TRANSFORMERS_BACKEND = True
|
||||
FLASH_TRANSFORMERS_BACKEND = torch.cuda.is_available()
|
||||
try:
|
||||
from text_generation_server.models.transformers_flash_causal_lm import (
|
||||
TransformersFlashCausalLM,
|
||||
|
Loading…
Reference in New Issue
Block a user