mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
Fix __call__ vs forward.
This commit is contained in:
parent
b03d2621a7
commit
07bc903d6e
@ -69,10 +69,11 @@ def create_exllama_buffers():
|
|||||||
TEMP_STATE, TEMP_DQ = temp_state, temp_dq
|
TEMP_STATE, TEMP_DQ = temp_state, temp_dq
|
||||||
|
|
||||||
|
|
||||||
class Ex4bitLinear:
|
class Ex4bitLinear(torch.nn.Module):
|
||||||
"""Linear layer implementation with per-group 4-bit quantization of the weights"""
|
"""Linear layer implementation with per-group 4-bit quantization of the weights"""
|
||||||
|
|
||||||
def __init__(self, qweight, qzeros, scales, g_idx, bias, bits, groupsize):
|
def __init__(self, qweight, qzeros, scales, g_idx, bias, bits, groupsize):
|
||||||
|
super().__init__()
|
||||||
global MAX_DQ, MAX_INNER, ACT_ORDER, DEVICE
|
global MAX_DQ, MAX_INNER, ACT_ORDER, DEVICE
|
||||||
assert bits == 4
|
assert bits == 4
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user