mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
Hotfixing intel-cpu (not sure how it was working before). (#2967)
* Hotfixing intel-cpu (not sure how it was working before). * Do not fail on missing moe-kernels (Intel-cpu).
This commit is contained in:
parent
ee0dffcd14
commit
80e7d98f88
@ -10,7 +10,11 @@ from text_generation_server.layers.fp8 import (
|
|||||||
quant_dtype,
|
quant_dtype,
|
||||||
normalize_e4m3fn_to_native_float8,
|
normalize_e4m3fn_to_native_float8,
|
||||||
)
|
)
|
||||||
from moe_kernels.fused_moe import fused_moe
|
|
||||||
|
try:
|
||||||
|
from moe_kernels.fused_moe import fused_moe
|
||||||
|
except Exception:
|
||||||
|
fused_moe = None
|
||||||
|
|
||||||
|
|
||||||
class FP8SparseMoELayer(nn.Module):
|
class FP8SparseMoELayer(nn.Module):
|
||||||
|
@ -180,7 +180,7 @@ except ImportError as e:
|
|||||||
if MAMBA_AVAILABLE:
|
if MAMBA_AVAILABLE:
|
||||||
__all__.append(Mamba)
|
__all__.append(Mamba)
|
||||||
|
|
||||||
FLASH_TRANSFORMERS_BACKEND = True
|
FLASH_TRANSFORMERS_BACKEND = torch.cuda.is_available()
|
||||||
try:
|
try:
|
||||||
from text_generation_server.models.transformers_flash_causal_lm import (
|
from text_generation_server.models.transformers_flash_causal_lm import (
|
||||||
TransformersFlashCausalLM,
|
TransformersFlashCausalLM,
|
||||||
|
Loading…
Reference in New Issue
Block a user