Hotfixing intel-cpu (not sure how it was working before). (#2967)

* Hotfixing intel-cpu (not sure how it was working before).

* Do not fail on missing moe-kernels (Intel-cpu).
This commit is contained in:
Nicolas Patry 2025-01-29 22:34:41 +01:00 committed by GitHub
parent ee0dffcd14
commit 80e7d98f88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 2 deletions

View File

@ -10,7 +10,11 @@ from text_generation_server.layers.fp8 import (
quant_dtype,
normalize_e4m3fn_to_native_float8,
)
try:
from moe_kernels.fused_moe import fused_moe
except Exception:
fused_moe = None
class FP8SparseMoELayer(nn.Module):

View File

@ -180,7 +180,7 @@ except ImportError as e:
if MAMBA_AVAILABLE:
__all__.append(Mamba)
FLASH_TRANSFORMERS_BACKEND = True
FLASH_TRANSFORMERS_BACKEND = torch.cuda.is_available()
try:
from text_generation_server.models.transformers_flash_causal_lm import (
TransformersFlashCausalLM,