This commit is contained in:
OlivierDehaene 2023-12-22 15:46:04 +01:00 committed by Karol Damaszke
parent 8cc4306f72
commit 62646c2a54
8 changed files with 28 additions and 29 deletions

34
Cargo.lock generated
View File

@ -350,9 +350,9 @@ checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"
[[package]]
name = "cc"
version = "1.0.94"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17f6e324229dc011159fcc089755d1e2e216a90d43a7dea6853ca740b84f35e7"
checksum = "d32a725bc159af97c3e629873bb9f88fb8cf8a4867175f76dc987815ea07c83b"
[[package]]
name = "cfg-if"
@ -2340,9 +2340,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.38.32"
version = "0.38.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89"
checksum = "e3cc72858054fcff6d7dea32df2aeaee6a7c24227366d7ea429aada2f26b16ad"
dependencies = [
"bitflags 2.5.0",
"errno",
@ -2365,9 +2365,9 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.22.3"
version = "0.22.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99008d7ad0bbbea527ec27bddbc0e432c5b87d8175178cee68d2eec9c4a1813c"
checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432"
dependencies = [
"log",
"ring 0.17.8",
@ -2586,9 +2586,9 @@ dependencies = [
[[package]]
name = "signal-hook-registry"
version = "1.4.1"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1"
checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
dependencies = [
"libc",
]
@ -2798,7 +2798,7 @@ dependencies = [
[[package]]
name = "text-generation-benchmark"
version = "1.3.3"
version = "1.3.4"
dependencies = [
"average",
"clap",
@ -2819,7 +2819,7 @@ dependencies = [
[[package]]
name = "text-generation-client"
version = "1.3.3"
version = "1.3.4"
dependencies = [
"futures",
"grpc-metadata",
@ -2836,7 +2836,7 @@ dependencies = [
[[package]]
name = "text-generation-launcher"
version = "1.3.3"
version = "1.3.4"
dependencies = [
"clap",
"ctrlc",
@ -2852,7 +2852,7 @@ dependencies = [
[[package]]
name = "text-generation-router"
version = "1.3.3"
version = "1.3.4"
dependencies = [
"async-stream",
"axum",
@ -2887,18 +2887,18 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.58"
version = "1.0.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.58"
version = "1.0.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66"
dependencies = [
"proc-macro2",
"quote",
@ -3409,7 +3409,7 @@ dependencies = [
"log",
"native-tls",
"once_cell",
"rustls 0.22.3",
"rustls 0.22.4",
"rustls-pki-types",
"rustls-webpki",
"serde",

View File

@ -9,7 +9,7 @@ members = [
resolver = "2"
[workspace.package]
version = "1.3.3"
version = "1.3.4"
edition = "2021"
authors = ["Olivier Dehaene"]
homepage = "https://github.com/huggingface/text-generation-inference"

View File

@ -10,7 +10,7 @@
"name": "Apache 2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
},
"version": "1.3.3"
"version": "1.3.4"
},
"paths": {
"/": {

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-integration-tests"
version = "1.3.3"
version = "1.3.4"
description = "Text Generation Inference integration tests"
authors = ["Nicolas Patry <nicolas@huggingface.co>"]

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-server"
version = "1.3.3"
version = "1.3.4"
description = "Text Generation Inference Python gRPC Server"
authors = ["Olivier Dehaene <olivier@huggingface.co>"]

View File

@ -39,7 +39,7 @@ if V2 and int(os.getenv("WORLD_SIZE", "1")) > 1:
V2 = False
log_once(
logger.warning,
"Disabling exllama v2 and using v1 instead because there are issues when sharding"
"Disabling exllama v2 and using v1 instead because there are issues when sharding",
)
if os.getenv("DISABLE_EXLLAMA") == "True":

View File

@ -215,7 +215,9 @@ class Weights:
bits, groupsize, desc_act = self._get_gptq_params()
from text_generation_server.utils.layers import HAS_EXLLAMA
use_exllama = bits == 4 and HAS_EXLLAMA and quantize == "gptq" and not desc_act
use_exllama = (
bits == 4 and HAS_EXLLAMA and quantize == "gptq" and not desc_act
)
weight = (qweight, qzeros, scales, g_idx, bits, groupsize, use_exllama)
else:
w = [self.get_sharded(f"{p}.weight", dim=0) for p in prefixes]
@ -281,14 +283,11 @@ class Weights:
if CAN_EXLLAMA:
log_once(
logger.warning,
"Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True"
"Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True",
)
use_exllama = False
else:
log_once(
logger.info,
f"Using exllama kernels v{HAS_EXLLAMA}"
)
log_once(logger.info, f"Using exllama kernels v{HAS_EXLLAMA}")
g_idx = self.get_sharded(f"{prefix}.g_idx", dim=0)