mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
Fix __call__ vs forward.
This commit is contained in:
parent
b03d2621a7
commit
07bc903d6e
@ -69,10 +69,11 @@ def create_exllama_buffers():
|
||||
TEMP_STATE, TEMP_DQ = temp_state, temp_dq
|
||||
|
||||
|
||||
class Ex4bitLinear:
|
||||
class Ex4bitLinear(torch.nn.Module):
|
||||
"""Linear layer implementation with per-group 4-bit quantization of the weights"""
|
||||
|
||||
def __init__(self, qweight, qzeros, scales, g_idx, bias, bits, groupsize):
|
||||
super().__init__()
|
||||
global MAX_DQ, MAX_INNER, ACT_ORDER, DEVICE
|
||||
assert bits == 4
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user