This commit is contained in:
OlivierDehaene 2023-07-24 13:59:10 +02:00
parent a6057c4076
commit 74c87f5888
2 changed files with 3 additions and 2 deletions

View File

@ -146,8 +146,10 @@ def serve(
# For which we have the finale shapes only after the model has loaded
# This will allocate those buffers.
from text_generation_server.utils.gptq.exllama import (
create_exllama_buffers, set_device
create_exllama_buffers,
set_device,
)
set_device(model.device)
create_exllama_buffers()
except ImportError:

View File

@ -12,7 +12,6 @@ def ext_make_q4(qweight, qzeros, scales, g_idx, device):
)
def ext_q4_matmul(x, q4, q4_width):
"""Matrix multiplication, returns x @ q4"""
outshape = x.shape[:-1] + (q4_width,)