mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Fixing moe import.
This commit is contained in:
parent
351f3c6ee5
commit
7539881054
@ -5,7 +5,6 @@ from typing import Optional, Tuple, Type, Union, List
|
|||||||
import torch
|
import torch
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from moe_kernels.fp8_utils import w8a8_block_fp8_matmul, per_token_group_quant_fp8
|
|
||||||
from text_generation_server.utils.import_utils import SYSTEM
|
from text_generation_server.utils.import_utils import SYSTEM
|
||||||
from text_generation_server.utils.weights import (
|
from text_generation_server.utils.weights import (
|
||||||
Weight,
|
Weight,
|
||||||
@ -20,6 +19,12 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
marlin_kernels = None
|
marlin_kernels = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from moe_kernels.fp8_utils import w8a8_block_fp8_matmul, per_token_group_quant_fp8
|
||||||
|
except ImportError:
|
||||||
|
w8a8_block_fp8_matmul = None
|
||||||
|
per_token_group_quant_fp8 = None
|
||||||
|
|
||||||
quant_dtype: torch.dtype = (
|
quant_dtype: torch.dtype = (
|
||||||
torch.float8_e4m3fnuz if SYSTEM == "rocm" else torch.float8_e4m3fn
|
torch.float8_e4m3fnuz if SYSTEM == "rocm" else torch.float8_e4m3fn
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user