mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-27 04:52:07 +00:00
v1.3.4
This commit is contained in:
parent
8cc4306f72
commit
62646c2a54
34
Cargo.lock
generated
34
Cargo.lock
generated
@ -350,9 +350,9 @@ checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.94"
|
||||
version = "1.0.95"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17f6e324229dc011159fcc089755d1e2e216a90d43a7dea6853ca740b84f35e7"
|
||||
checksum = "d32a725bc159af97c3e629873bb9f88fb8cf8a4867175f76dc987815ea07c83b"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
@ -2340,9 +2340,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.32"
|
||||
version = "0.38.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89"
|
||||
checksum = "e3cc72858054fcff6d7dea32df2aeaee6a7c24227366d7ea429aada2f26b16ad"
|
||||
dependencies = [
|
||||
"bitflags 2.5.0",
|
||||
"errno",
|
||||
@ -2365,9 +2365,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.22.3"
|
||||
version = "0.22.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99008d7ad0bbbea527ec27bddbc0e432c5b87d8175178cee68d2eec9c4a1813c"
|
||||
checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432"
|
||||
dependencies = [
|
||||
"log",
|
||||
"ring 0.17.8",
|
||||
@ -2586,9 +2586,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.1"
|
||||
version = "1.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1"
|
||||
checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
@ -2798,7 +2798,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-benchmark"
|
||||
version = "1.3.3"
|
||||
version = "1.3.4"
|
||||
dependencies = [
|
||||
"average",
|
||||
"clap",
|
||||
@ -2819,7 +2819,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-client"
|
||||
version = "1.3.3"
|
||||
version = "1.3.4"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"grpc-metadata",
|
||||
@ -2836,7 +2836,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-launcher"
|
||||
version = "1.3.3"
|
||||
version = "1.3.4"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"ctrlc",
|
||||
@ -2852,7 +2852,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-router"
|
||||
version = "1.3.3"
|
||||
version = "1.3.4"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"axum",
|
||||
@ -2887,18 +2887,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.58"
|
||||
version = "1.0.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
|
||||
checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.58"
|
||||
version = "1.0.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
|
||||
checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@ -3409,7 +3409,7 @@ dependencies = [
|
||||
"log",
|
||||
"native-tls",
|
||||
"once_cell",
|
||||
"rustls 0.22.3",
|
||||
"rustls 0.22.4",
|
||||
"rustls-pki-types",
|
||||
"rustls-webpki",
|
||||
"serde",
|
||||
|
@ -9,7 +9,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "1.3.3"
|
||||
version = "1.3.4"
|
||||
edition = "2021"
|
||||
authors = ["Olivier Dehaene"]
|
||||
homepage = "https://github.com/huggingface/text-generation-inference"
|
||||
|
@ -10,7 +10,7 @@
|
||||
"name": "Apache 2.0",
|
||||
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
||||
},
|
||||
"version": "1.3.3"
|
||||
"version": "1.3.4"
|
||||
},
|
||||
"paths": {
|
||||
"/": {
|
||||
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "text-generation-integration-tests"
|
||||
version = "1.3.3"
|
||||
version = "1.3.4"
|
||||
description = "Text Generation Inference integration tests"
|
||||
authors = ["Nicolas Patry <nicolas@huggingface.co>"]
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "text-generation-server"
|
||||
version = "1.3.3"
|
||||
version = "1.3.4"
|
||||
description = "Text Generation Inference Python gRPC Server"
|
||||
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
||||
|
||||
|
@ -39,7 +39,7 @@ if V2 and int(os.getenv("WORLD_SIZE", "1")) > 1:
|
||||
V2 = False
|
||||
log_once(
|
||||
logger.warning,
|
||||
"Disabling exllama v2 and using v1 instead because there are issues when sharding"
|
||||
"Disabling exllama v2 and using v1 instead because there are issues when sharding",
|
||||
)
|
||||
|
||||
if os.getenv("DISABLE_EXLLAMA") == "True":
|
||||
|
@ -2,5 +2,5 @@ from functools import lru_cache
|
||||
|
||||
|
||||
@lru_cache(10)
|
||||
def log_once(log, msg:str):
|
||||
def log_once(log, msg: str):
|
||||
log(msg)
|
||||
|
@ -215,7 +215,9 @@ class Weights:
|
||||
bits, groupsize, desc_act = self._get_gptq_params()
|
||||
from text_generation_server.utils.layers import HAS_EXLLAMA
|
||||
|
||||
use_exllama = bits == 4 and HAS_EXLLAMA and quantize == "gptq" and not desc_act
|
||||
use_exllama = (
|
||||
bits == 4 and HAS_EXLLAMA and quantize == "gptq" and not desc_act
|
||||
)
|
||||
weight = (qweight, qzeros, scales, g_idx, bits, groupsize, use_exllama)
|
||||
else:
|
||||
w = [self.get_sharded(f"{p}.weight", dim=0) for p in prefixes]
|
||||
@ -281,14 +283,11 @@ class Weights:
|
||||
if CAN_EXLLAMA:
|
||||
log_once(
|
||||
logger.warning,
|
||||
"Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True"
|
||||
"Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True",
|
||||
)
|
||||
use_exllama = False
|
||||
else:
|
||||
log_once(
|
||||
logger.info,
|
||||
f"Using exllama kernels v{HAS_EXLLAMA}"
|
||||
)
|
||||
log_once(logger.info, f"Using exllama kernels v{HAS_EXLLAMA}")
|
||||
|
||||
g_idx = self.get_sharded(f"{prefix}.g_idx", dim=0)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user