Simplifying conditionals + reverting integration tests values.

This commit is contained in:
Nicolas Patry 2024-10-18 12:13:48 +02:00
parent f36c9a68ae
commit 3e12402a98
No known key found for this signature in database
GPG Key ID: D2920555C90F704C
8 changed files with 88 additions and 87 deletions

View File

@ -11,57 +11,57 @@
},
{
"id": 3226,
"logprob": -9.0234375,
"logprob": -8.9453125,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.0859375,
"logprob": -8.8515625,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.2578125,
"logprob": -0.21875,
"text": "_"
},
{
"id": 6009,
"logprob": -2.1835938,
"logprob": -1.2773438,
"text": "mean"
},
{
"id": 26,
"logprob": -0.3005371,
"logprob": -0.25195312,
"text": "("
},
{
"id": 62,
"logprob": -5.625,
"logprob": -4.8203125,
"text": "L"
},
{
"id": 44,
"logprob": -3.0644531,
"logprob": -3.7734375,
"text": ":"
},
{
"id": 1682,
"logprob": -0.6845703,
"logprob": -0.8310547,
"text": " List"
},
{
"id": 77,
"logprob": -0.3869629,
"logprob": -0.22766113,
"text": "["
},
{
"id": 1808,
"logprob": -0.94628906,
"logprob": -0.46240234,
"text": "float"
},
{
"id": 10794,
"logprob": -2.5371094,
"logprob": -3.0234375,
"text": "]):"
}
],
@ -69,7 +69,7 @@
"tokens": [
{
"id": 284,
"logprob": -1.171875,
"logprob": -0.04626465,
"special": false,
"text": "\n "
},

View File

@ -11,57 +11,57 @@
},
{
"id": 3226,
"logprob": -9.0234375,
"logprob": -8.9453125,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.0859375,
"logprob": -8.859375,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.25634766,
"logprob": -0.21984863,
"text": "_"
},
{
"id": 6009,
"logprob": -2.1835938,
"logprob": -1.2861328,
"text": "mean"
},
{
"id": 26,
"logprob": -0.29956055,
"logprob": -0.25219727,
"text": "("
},
{
"id": 62,
"logprob": -5.625,
"logprob": -4.8007812,
"text": "L"
},
{
"id": 44,
"logprob": -3.09375,
"logprob": -3.7949219,
"text": ":"
},
{
"id": 1682,
"logprob": -0.67578125,
"logprob": -0.8046875,
"text": " List"
},
{
"id": 77,
"logprob": -0.38256836,
"logprob": -0.22424316,
"text": "["
},
{
"id": 1808,
"logprob": -0.9458008,
"logprob": -0.46191406,
"text": "float"
},
{
"id": 10794,
"logprob": -2.5371094,
"logprob": -3.0253906,
"text": "]):"
}
],
@ -69,7 +69,7 @@
"tokens": [
{
"id": 284,
"logprob": -0.05831909,
"logprob": 0.0,
"special": false,
"text": "\n "
},

View File

@ -12,57 +12,57 @@
},
{
"id": 3226,
"logprob": -9.015625,
"logprob": -8.9453125,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.0859375,
"logprob": -8.8515625,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.25585938,
"logprob": -0.22033691,
"text": "_"
},
{
"id": 6009,
"logprob": -2.1894531,
"logprob": -1.2939453,
"text": "mean"
},
{
"id": 26,
"logprob": -0.29882812,
"logprob": -0.25268555,
"text": "("
},
{
"id": 62,
"logprob": -5.6210938,
"logprob": -4.796875,
"text": "L"
},
{
"id": 44,
"logprob": -3.078125,
"logprob": -3.796875,
"text": ":"
},
{
"id": 1682,
"logprob": -0.6699219,
"logprob": -0.8066406,
"text": " List"
},
{
"id": 77,
"logprob": -0.38232422,
"logprob": -0.22644043,
"text": "["
},
{
"id": 1808,
"logprob": -0.9379883,
"logprob": -0.46166992,
"text": "float"
},
{
"id": 10794,
"logprob": -2.5371094,
"logprob": -3.0253906,
"text": "]):"
}
],
@ -70,7 +70,7 @@
"tokens": [
{
"id": 284,
"logprob": -1.1826172,
"logprob": -0.046844482,
"special": false,
"text": "\n "
},
@ -98,57 +98,57 @@
},
{
"id": 3226,
"logprob": -9.0234375,
"logprob": -8.9375,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.09375,
"logprob": -8.8515625,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.25610352,
"logprob": -0.21826172,
"text": "_"
},
{
"id": 6009,
"logprob": -2.1933594,
"logprob": -1.2871094,
"text": "mean"
},
{
"id": 26,
"logprob": -0.29907227,
"logprob": -0.25390625,
"text": "("
},
{
"id": 62,
"logprob": -5.640625,
"logprob": -4.8085938,
"text": "L"
},
{
"id": 44,
"logprob": -3.09375,
"logprob": -3.7890625,
"text": ":"
},
{
"id": 1682,
"logprob": -0.67626953,
"logprob": -0.8076172,
"text": " List"
},
{
"id": 77,
"logprob": -0.39038086,
"logprob": -0.22302246,
"text": "["
},
{
"id": 1808,
"logprob": -0.94384766,
"logprob": -0.46435547,
"text": "float"
},
{
"id": 10794,
"logprob": -2.5507812,
"logprob": -3.0234375,
"text": "]):"
}
],
@ -156,7 +156,7 @@
"tokens": [
{
"id": 284,
"logprob": -1.1865234,
"logprob": -0.046722412,
"special": false,
"text": "\n "
},
@ -184,57 +184,57 @@
},
{
"id": 3226,
"logprob": -9.015625,
"logprob": -8.9453125,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.0859375,
"logprob": -8.8515625,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.25561523,
"logprob": -0.21813965,
"text": "_"
},
{
"id": 6009,
"logprob": -2.1933594,
"logprob": -1.2744141,
"text": "mean"
},
{
"id": 26,
"logprob": -0.296875,
"logprob": -0.2512207,
"text": "("
},
{
"id": 62,
"logprob": -5.6367188,
"logprob": -4.8046875,
"text": "L"
},
{
"id": 44,
"logprob": -3.0800781,
"logprob": -3.7851562,
"text": ":"
},
{
"id": 1682,
"logprob": -0.6875,
"logprob": -0.81396484,
"text": " List"
},
{
"id": 77,
"logprob": -0.3840332,
"logprob": -0.22570801,
"text": "["
},
{
"id": 1808,
"logprob": -0.93847656,
"logprob": -0.46044922,
"text": "float"
},
{
"id": 10794,
"logprob": -2.5371094,
"logprob": -3.0234375,
"text": "]):"
}
],
@ -242,7 +242,7 @@
"tokens": [
{
"id": 284,
"logprob": -1.1777344,
"logprob": -0.04650879,
"special": false,
"text": "\n "
},
@ -270,57 +270,57 @@
},
{
"id": 3226,
"logprob": -9.015625,
"logprob": -8.9453125,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.0859375,
"logprob": -8.8515625,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.25610352,
"logprob": -0.21960449,
"text": "_"
},
{
"id": 6009,
"logprob": -2.1933594,
"logprob": -1.2890625,
"text": "mean"
},
{
"id": 26,
"logprob": -0.3010254,
"logprob": -0.25073242,
"text": "("
},
{
"id": 62,
"logprob": -5.6484375,
"logprob": -4.8085938,
"text": "L"
},
{
"id": 44,
"logprob": -3.0820312,
"logprob": -3.8046875,
"text": ":"
},
{
"id": 1682,
"logprob": -0.6801758,
"logprob": -0.8071289,
"text": " List"
},
{
"id": 77,
"logprob": -0.39257812,
"logprob": -0.22570801,
"text": "["
},
{
"id": 1808,
"logprob": -0.92626953,
"logprob": -0.46118164,
"text": "float"
},
{
"id": 10794,
"logprob": -2.5234375,
"logprob": -3.0097656,
"text": "]):"
}
],
@ -328,7 +328,7 @@
"tokens": [
{
"id": 284,
"logprob": -1.171875,
"logprob": -0.046539307,
"special": false,
"text": "\n "
},

View File

@ -0,0 +1,8 @@
from text_generation_server.utils.import_utils import SYSTEM
if SYSTEM == "ipex":
from .ipex import WQLinear
elif SYSTEM == "cuda":
from .cuda import WQLinear
__all__ = ["WQLinear"]

View File

@ -8,6 +8,11 @@ from text_generation_server.utils.import_utils import SYSTEM
from text_generation_server.utils.log import log_once
from text_generation_server.utils.weights import Weight, Weights, WeightsLoader
if SYSTEM == "ipex":
from .ipex import QuantLinear
elif SYSTEM == "cuda":
from .cuda import QuantLinear
@dataclass
class GPTQWeight(Weight):
@ -36,12 +41,7 @@ class GPTQWeight(Weight):
"to use Exllama/GPTQ kernels for AWQ inference."
)
try:
if SYSTEM == "ipex":
from text_generation_server.layers.awq.quantize.ipex import WQLinear
else:
from text_generation_server.layers.awq.quantize.qmodule import (
WQLinear,
)
from text_generation_server.layers.awq.quantize import WQLinear
return WQLinear(
w_bit=self.bits,
@ -65,10 +65,7 @@ class GPTQWeight(Weight):
return ExllamaQuantLinear(self, bias)
else:
if SYSTEM == "ipex":
from text_generation_server.layers.gptq.ipex import QuantLinear
else:
from text_generation_server.layers.gptq.quant_linear import QuantLinear
from text_generation_server.layers.gptq import QuantLinear
return QuantLinear(
self.qweight,

View File

@ -13,11 +13,7 @@ from accelerate import init_empty_weights
from text_generation_server.utils import initialize_torch_distributed, Weights
from text_generation_server.utils.hub import weight_files
from text_generation_server.utils.import_utils import SYSTEM
if SYSTEM == "ipex":
from text_generation_server.layers.gptq.ipex import QuantLinear
else:
from text_generation_server.layers.gptq.quant_linear import QuantLinear
from text_generation_server.layers.gptq import QuantLinear
from loguru import logger
from typing import Optional
from text_generation_server.layers.gptq.utils import torch_snr_error