log message

This commit is contained in:
IlyasMoutawwakil 2024-02-05 09:26:47 +01:00 committed by Nicolas Patry
parent 76834c9989
commit 2629193efa

View File

@ -163,20 +163,17 @@ class Weights:
g_idx = self.get_tensor(f"{prefix}.g_idx") g_idx = self.get_tensor(f"{prefix}.g_idx")
elif quantize == "gptq" and quant_method == "awq": elif quantize == "gptq" and quant_method == "awq":
log_once( log_once(
logger.info, logger.info, "Converting AWQ model to Exllama/GPTQ packing format."
"Converting AWQ weights to Exllama/GPTQ packing format, "
"in order used with Exllama/GPTQ kernels.",
) )
from text_generation_server.utils.awq.conversion_utils import ( from text_generation_server.utils.awq.conversion_utils import (
fast_awq_to_gptq, fast_awq_to_gptq,
) )
qweight, qzeros = fast_awq_to_gptq(qweight, qzeros) qweight, qzeros = fast_awq_to_gptq(qweight, qzeros)
g_idx = torch.zeros( g_idx = (
(qweight.shape[0] * 32 // bits), torch.arange(qweight.shape[0] * (32 // bits), device=qweight.device)
dtype=torch.int32, // groupsize
device=qweight.device, ).to(dtype=torch.int32)
)
else: else:
g_idx = None g_idx = None
@ -230,20 +227,17 @@ class Weights:
g_idx = w[0] g_idx = w[0]
elif quantize == "gptq" and quant_method == "awq": elif quantize == "gptq" and quant_method == "awq":
log_once( log_once(
logger.info, logger.info, "Converting AWQ model to Exllama/GPTQ packing format."
"Converting AWQ weights to Exllama/GPTQ packing format, "
"in order used with Exllama/GPTQ kernels.",
) )
from text_generation_server.utils.awq.conversion_utils import ( from text_generation_server.utils.awq.conversion_utils import (
fast_awq_to_gptq, fast_awq_to_gptq,
) )
qweight, qzeros = fast_awq_to_gptq(qweight, qzeros) qweight, qzeros = fast_awq_to_gptq(qweight, qzeros)
g_idx = torch.zeros( g_idx = (
(qweight.shape[0] * 32 // bits), torch.arange(qweight.shape[0] * (32 // bits), device=qweight.device)
dtype=torch.int32, // groupsize
device=qweight.device, ).to(dtype=torch.int32)
)
else: else:
g_idx = None g_idx = None
@ -340,20 +334,17 @@ class Weights:
if quant_method == "awq": if quant_method == "awq":
log_once( log_once(
logger.info, logger.info, "Converting AWQ model to Exllama/GPTQ packing format."
"Converting AWQ weights to Exllama/GPTQ packing format, "
"in order used with Exllama/GPTQ kernels.",
) )
from text_generation_server.utils.awq.conversion_utils import ( from text_generation_server.utils.awq.conversion_utils import (
fast_awq_to_gptq, fast_awq_to_gptq,
) )
qweight, qzeros = fast_awq_to_gptq(qweight, qzeros) qweight, qzeros = fast_awq_to_gptq(qweight, qzeros)
g_idx = torch.zeros( g_idx = (
(qweight.shape[0] * 32 // bits), torch.arange(qweight.shape[0] * (32 // bits), device=qweight.device)
dtype=torch.int32, // groupsize
device=qweight.device, ).to(dtype=torch.int32)
)
weight = (qweight, qzeros, scales, g_idx, bits, groupsize, use_exllama) weight = (qweight, qzeros, scales, g_idx, bits, groupsize, use_exllama)
elif quantize == "awq": elif quantize == "awq":