mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-27 13:02:12 +00:00
try-catch to load the cuda extension, quite ugly practice tbh
This commit is contained in:
parent
620ed7d8aa
commit
a6e387404d
@ -4,6 +4,15 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch.cuda.amp import custom_bwd, custom_fwd
|
from torch.cuda.amp import custom_bwd, custom_fwd
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
try:
|
||||||
|
from custom_kernels.exllama import make_q4, q4_matmul
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"The CUDA kernels custom_kernels.exllama not installed, got the error: {e}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import triton
|
import triton
|
||||||
import triton.language as tl
|
import triton.language as tl
|
||||||
@ -359,9 +368,6 @@ class QuantLinear(nn.Module):
|
|||||||
out = out + self.bias if self.bias is not None else out
|
out = out + self.bias if self.bias is not None else out
|
||||||
return out.reshape(out_shape)
|
return out.reshape(out_shape)
|
||||||
|
|
||||||
import torch
|
|
||||||
from custom_kernels.exllama import make_q4, q4_matmul
|
|
||||||
|
|
||||||
# Dummy tensor to pass instead of g_idx since there is no way to pass "None" to a C++ extension
|
# Dummy tensor to pass instead of g_idx since there is no way to pass "None" to a C++ extension
|
||||||
none_tensor = torch.empty((1, 1), device = "meta")
|
none_tensor = torch.empty((1, 1), device = "meta")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user