mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Fixing moe import.
This commit is contained in:
parent
351f3c6ee5
commit
7539881054
@ -5,7 +5,6 @@ from typing import Optional, Tuple, Type, Union, List
|
||||
import torch
|
||||
from loguru import logger
|
||||
|
||||
from moe_kernels.fp8_utils import w8a8_block_fp8_matmul, per_token_group_quant_fp8
|
||||
from text_generation_server.utils.import_utils import SYSTEM
|
||||
from text_generation_server.utils.weights import (
|
||||
Weight,
|
||||
@ -20,6 +19,12 @@ try:
|
||||
except ImportError:
|
||||
marlin_kernels = None
|
||||
|
||||
try:
|
||||
from moe_kernels.fp8_utils import w8a8_block_fp8_matmul, per_token_group_quant_fp8
|
||||
except ImportError:
|
||||
w8a8_block_fp8_matmul = None
|
||||
per_token_group_quant_fp8 = None
|
||||
|
||||
quant_dtype: torch.dtype = (
|
||||
torch.float8_e4m3fnuz if SYSTEM == "rocm" else torch.float8_e4m3fn
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user