Simplifying conditionals + reverting integration tests values.

This commit is contained in:
Nicolas Patry 2024-10-18 12:13:48 +02:00
parent f36c9a68ae
commit 3e12402a98
No known key found for this signature in database
GPG Key ID: D2920555C90F704C
8 changed files with 88 additions and 87 deletions

View File

@ -11,57 +11,57 @@
}, },
{ {
"id": 3226, "id": 3226,
"logprob": -9.0234375, "logprob": -8.9453125,
"text": " ge" "text": " ge"
}, },
{ {
"id": 21017, "id": 21017,
"logprob": -9.0859375, "logprob": -8.8515625,
"text": "ometric" "text": "ometric"
}, },
{ {
"id": 81, "id": 81,
"logprob": -0.2578125, "logprob": -0.21875,
"text": "_" "text": "_"
}, },
{ {
"id": 6009, "id": 6009,
"logprob": -2.1835938, "logprob": -1.2773438,
"text": "mean" "text": "mean"
}, },
{ {
"id": 26, "id": 26,
"logprob": -0.3005371, "logprob": -0.25195312,
"text": "(" "text": "("
}, },
{ {
"id": 62, "id": 62,
"logprob": -5.625, "logprob": -4.8203125,
"text": "L" "text": "L"
}, },
{ {
"id": 44, "id": 44,
"logprob": -3.0644531, "logprob": -3.7734375,
"text": ":" "text": ":"
}, },
{ {
"id": 1682, "id": 1682,
"logprob": -0.6845703, "logprob": -0.8310547,
"text": " List" "text": " List"
}, },
{ {
"id": 77, "id": 77,
"logprob": -0.3869629, "logprob": -0.22766113,
"text": "[" "text": "["
}, },
{ {
"id": 1808, "id": 1808,
"logprob": -0.94628906, "logprob": -0.46240234,
"text": "float" "text": "float"
}, },
{ {
"id": 10794, "id": 10794,
"logprob": -2.5371094, "logprob": -3.0234375,
"text": "]):" "text": "]):"
} }
], ],
@ -69,7 +69,7 @@
"tokens": [ "tokens": [
{ {
"id": 284, "id": 284,
"logprob": -1.171875, "logprob": -0.04626465,
"special": false, "special": false,
"text": "\n " "text": "\n "
}, },

View File

@ -11,57 +11,57 @@
}, },
{ {
"id": 3226, "id": 3226,
"logprob": -9.0234375, "logprob": -8.9453125,
"text": " ge" "text": " ge"
}, },
{ {
"id": 21017, "id": 21017,
"logprob": -9.0859375, "logprob": -8.859375,
"text": "ometric" "text": "ometric"
}, },
{ {
"id": 81, "id": 81,
"logprob": -0.25634766, "logprob": -0.21984863,
"text": "_" "text": "_"
}, },
{ {
"id": 6009, "id": 6009,
"logprob": -2.1835938, "logprob": -1.2861328,
"text": "mean" "text": "mean"
}, },
{ {
"id": 26, "id": 26,
"logprob": -0.29956055, "logprob": -0.25219727,
"text": "(" "text": "("
}, },
{ {
"id": 62, "id": 62,
"logprob": -5.625, "logprob": -4.8007812,
"text": "L" "text": "L"
}, },
{ {
"id": 44, "id": 44,
"logprob": -3.09375, "logprob": -3.7949219,
"text": ":" "text": ":"
}, },
{ {
"id": 1682, "id": 1682,
"logprob": -0.67578125, "logprob": -0.8046875,
"text": " List" "text": " List"
}, },
{ {
"id": 77, "id": 77,
"logprob": -0.38256836, "logprob": -0.22424316,
"text": "[" "text": "["
}, },
{ {
"id": 1808, "id": 1808,
"logprob": -0.9458008, "logprob": -0.46191406,
"text": "float" "text": "float"
}, },
{ {
"id": 10794, "id": 10794,
"logprob": -2.5371094, "logprob": -3.0253906,
"text": "]):" "text": "]):"
} }
], ],
@ -69,7 +69,7 @@
"tokens": [ "tokens": [
{ {
"id": 284, "id": 284,
"logprob": -0.05831909, "logprob": 0.0,
"special": false, "special": false,
"text": "\n " "text": "\n "
}, },

View File

@ -12,57 +12,57 @@
}, },
{ {
"id": 3226, "id": 3226,
"logprob": -9.015625, "logprob": -8.9453125,
"text": " ge" "text": " ge"
}, },
{ {
"id": 21017, "id": 21017,
"logprob": -9.0859375, "logprob": -8.8515625,
"text": "ometric" "text": "ometric"
}, },
{ {
"id": 81, "id": 81,
"logprob": -0.25585938, "logprob": -0.22033691,
"text": "_" "text": "_"
}, },
{ {
"id": 6009, "id": 6009,
"logprob": -2.1894531, "logprob": -1.2939453,
"text": "mean" "text": "mean"
}, },
{ {
"id": 26, "id": 26,
"logprob": -0.29882812, "logprob": -0.25268555,
"text": "(" "text": "("
}, },
{ {
"id": 62, "id": 62,
"logprob": -5.6210938, "logprob": -4.796875,
"text": "L" "text": "L"
}, },
{ {
"id": 44, "id": 44,
"logprob": -3.078125, "logprob": -3.796875,
"text": ":" "text": ":"
}, },
{ {
"id": 1682, "id": 1682,
"logprob": -0.6699219, "logprob": -0.8066406,
"text": " List" "text": " List"
}, },
{ {
"id": 77, "id": 77,
"logprob": -0.38232422, "logprob": -0.22644043,
"text": "[" "text": "["
}, },
{ {
"id": 1808, "id": 1808,
"logprob": -0.9379883, "logprob": -0.46166992,
"text": "float" "text": "float"
}, },
{ {
"id": 10794, "id": 10794,
"logprob": -2.5371094, "logprob": -3.0253906,
"text": "]):" "text": "]):"
} }
], ],
@ -70,7 +70,7 @@
"tokens": [ "tokens": [
{ {
"id": 284, "id": 284,
"logprob": -1.1826172, "logprob": -0.046844482,
"special": false, "special": false,
"text": "\n " "text": "\n "
}, },
@ -98,57 +98,57 @@
}, },
{ {
"id": 3226, "id": 3226,
"logprob": -9.0234375, "logprob": -8.9375,
"text": " ge" "text": " ge"
}, },
{ {
"id": 21017, "id": 21017,
"logprob": -9.09375, "logprob": -8.8515625,
"text": "ometric" "text": "ometric"
}, },
{ {
"id": 81, "id": 81,
"logprob": -0.25610352, "logprob": -0.21826172,
"text": "_" "text": "_"
}, },
{ {
"id": 6009, "id": 6009,
"logprob": -2.1933594, "logprob": -1.2871094,
"text": "mean" "text": "mean"
}, },
{ {
"id": 26, "id": 26,
"logprob": -0.29907227, "logprob": -0.25390625,
"text": "(" "text": "("
}, },
{ {
"id": 62, "id": 62,
"logprob": -5.640625, "logprob": -4.8085938,
"text": "L" "text": "L"
}, },
{ {
"id": 44, "id": 44,
"logprob": -3.09375, "logprob": -3.7890625,
"text": ":" "text": ":"
}, },
{ {
"id": 1682, "id": 1682,
"logprob": -0.67626953, "logprob": -0.8076172,
"text": " List" "text": " List"
}, },
{ {
"id": 77, "id": 77,
"logprob": -0.39038086, "logprob": -0.22302246,
"text": "[" "text": "["
}, },
{ {
"id": 1808, "id": 1808,
"logprob": -0.94384766, "logprob": -0.46435547,
"text": "float" "text": "float"
}, },
{ {
"id": 10794, "id": 10794,
"logprob": -2.5507812, "logprob": -3.0234375,
"text": "]):" "text": "]):"
} }
], ],
@ -156,7 +156,7 @@
"tokens": [ "tokens": [
{ {
"id": 284, "id": 284,
"logprob": -1.1865234, "logprob": -0.046722412,
"special": false, "special": false,
"text": "\n " "text": "\n "
}, },
@ -184,57 +184,57 @@
}, },
{ {
"id": 3226, "id": 3226,
"logprob": -9.015625, "logprob": -8.9453125,
"text": " ge" "text": " ge"
}, },
{ {
"id": 21017, "id": 21017,
"logprob": -9.0859375, "logprob": -8.8515625,
"text": "ometric" "text": "ometric"
}, },
{ {
"id": 81, "id": 81,
"logprob": -0.25561523, "logprob": -0.21813965,
"text": "_" "text": "_"
}, },
{ {
"id": 6009, "id": 6009,
"logprob": -2.1933594, "logprob": -1.2744141,
"text": "mean" "text": "mean"
}, },
{ {
"id": 26, "id": 26,
"logprob": -0.296875, "logprob": -0.2512207,
"text": "(" "text": "("
}, },
{ {
"id": 62, "id": 62,
"logprob": -5.6367188, "logprob": -4.8046875,
"text": "L" "text": "L"
}, },
{ {
"id": 44, "id": 44,
"logprob": -3.0800781, "logprob": -3.7851562,
"text": ":" "text": ":"
}, },
{ {
"id": 1682, "id": 1682,
"logprob": -0.6875, "logprob": -0.81396484,
"text": " List" "text": " List"
}, },
{ {
"id": 77, "id": 77,
"logprob": -0.3840332, "logprob": -0.22570801,
"text": "[" "text": "["
}, },
{ {
"id": 1808, "id": 1808,
"logprob": -0.93847656, "logprob": -0.46044922,
"text": "float" "text": "float"
}, },
{ {
"id": 10794, "id": 10794,
"logprob": -2.5371094, "logprob": -3.0234375,
"text": "]):" "text": "]):"
} }
], ],
@ -242,7 +242,7 @@
"tokens": [ "tokens": [
{ {
"id": 284, "id": 284,
"logprob": -1.1777344, "logprob": -0.04650879,
"special": false, "special": false,
"text": "\n " "text": "\n "
}, },
@ -270,57 +270,57 @@
}, },
{ {
"id": 3226, "id": 3226,
"logprob": -9.015625, "logprob": -8.9453125,
"text": " ge" "text": " ge"
}, },
{ {
"id": 21017, "id": 21017,
"logprob": -9.0859375, "logprob": -8.8515625,
"text": "ometric" "text": "ometric"
}, },
{ {
"id": 81, "id": 81,
"logprob": -0.25610352, "logprob": -0.21960449,
"text": "_" "text": "_"
}, },
{ {
"id": 6009, "id": 6009,
"logprob": -2.1933594, "logprob": -1.2890625,
"text": "mean" "text": "mean"
}, },
{ {
"id": 26, "id": 26,
"logprob": -0.3010254, "logprob": -0.25073242,
"text": "(" "text": "("
}, },
{ {
"id": 62, "id": 62,
"logprob": -5.6484375, "logprob": -4.8085938,
"text": "L" "text": "L"
}, },
{ {
"id": 44, "id": 44,
"logprob": -3.0820312, "logprob": -3.8046875,
"text": ":" "text": ":"
}, },
{ {
"id": 1682, "id": 1682,
"logprob": -0.6801758, "logprob": -0.8071289,
"text": " List" "text": " List"
}, },
{ {
"id": 77, "id": 77,
"logprob": -0.39257812, "logprob": -0.22570801,
"text": "[" "text": "["
}, },
{ {
"id": 1808, "id": 1808,
"logprob": -0.92626953, "logprob": -0.46118164,
"text": "float" "text": "float"
}, },
{ {
"id": 10794, "id": 10794,
"logprob": -2.5234375, "logprob": -3.0097656,
"text": "]):" "text": "]):"
} }
], ],
@ -328,7 +328,7 @@
"tokens": [ "tokens": [
{ {
"id": 284, "id": 284,
"logprob": -1.171875, "logprob": -0.046539307,
"special": false, "special": false,
"text": "\n " "text": "\n "
}, },

View File

@ -0,0 +1,8 @@
from text_generation_server.utils.import_utils import SYSTEM
if SYSTEM == "ipex":
from .ipex import WQLinear
elif SYSTEM == "cuda":
from .cuda import WQLinear
__all__ = ["WQLinear"]

View File

@ -8,6 +8,11 @@ from text_generation_server.utils.import_utils import SYSTEM
from text_generation_server.utils.log import log_once from text_generation_server.utils.log import log_once
from text_generation_server.utils.weights import Weight, Weights, WeightsLoader from text_generation_server.utils.weights import Weight, Weights, WeightsLoader
if SYSTEM == "ipex":
from .ipex import QuantLinear
elif SYSTEM == "cuda":
from .cuda import QuantLinear
@dataclass @dataclass
class GPTQWeight(Weight): class GPTQWeight(Weight):
@ -36,12 +41,7 @@ class GPTQWeight(Weight):
"to use Exllama/GPTQ kernels for AWQ inference." "to use Exllama/GPTQ kernels for AWQ inference."
) )
try: try:
if SYSTEM == "ipex": from text_generation_server.layers.awq.quantize import WQLinear
from text_generation_server.layers.awq.quantize.ipex import WQLinear
else:
from text_generation_server.layers.awq.quantize.qmodule import (
WQLinear,
)
return WQLinear( return WQLinear(
w_bit=self.bits, w_bit=self.bits,
@ -65,10 +65,7 @@ class GPTQWeight(Weight):
return ExllamaQuantLinear(self, bias) return ExllamaQuantLinear(self, bias)
else: else:
if SYSTEM == "ipex": from text_generation_server.layers.gptq import QuantLinear
from text_generation_server.layers.gptq.ipex import QuantLinear
else:
from text_generation_server.layers.gptq.quant_linear import QuantLinear
return QuantLinear( return QuantLinear(
self.qweight, self.qweight,

View File

@ -13,11 +13,7 @@ from accelerate import init_empty_weights
from text_generation_server.utils import initialize_torch_distributed, Weights from text_generation_server.utils import initialize_torch_distributed, Weights
from text_generation_server.utils.hub import weight_files from text_generation_server.utils.hub import weight_files
from text_generation_server.utils.import_utils import SYSTEM from text_generation_server.utils.import_utils import SYSTEM
from text_generation_server.layers.gptq import QuantLinear
if SYSTEM == "ipex":
from text_generation_server.layers.gptq.ipex import QuantLinear
else:
from text_generation_server.layers.gptq.quant_linear import QuantLinear
from loguru import logger from loguru import logger
from typing import Optional from typing import Optional
from text_generation_server.layers.gptq.utils import torch_snr_error from text_generation_server.layers.gptq.utils import torch_snr_error