mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
Upgrading bitsandbytes. (#2910)
* Upgrading bitsandbytes. Co-Authored-By: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> * Tighter lock. --------- Co-authored-by: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com>
This commit is contained in:
parent
120bd3e3bb
commit
922cc38fbc
@ -2078,14 +2078,7 @@ fn main() -> Result<(), LauncherError> {
|
|||||||
let cuda_graphs = match (&args.cuda_graphs, &quantize) {
|
let cuda_graphs = match (&args.cuda_graphs, &quantize) {
|
||||||
(Some(cuda_graphs), _) => cuda_graphs.iter().cloned().filter(|&c| c > 0).collect(),
|
(Some(cuda_graphs), _) => cuda_graphs.iter().cloned().filter(|&c| c > 0).collect(),
|
||||||
#[allow(deprecated)]
|
#[allow(deprecated)]
|
||||||
(
|
(None, Some(Quantization::Bitsandbytes)) => {
|
||||||
None,
|
|
||||||
Some(
|
|
||||||
Quantization::Bitsandbytes
|
|
||||||
| Quantization::BitsandbytesNf4
|
|
||||||
| Quantization::BitsandbytesFp4,
|
|
||||||
),
|
|
||||||
) => {
|
|
||||||
tracing::warn!("Bitsandbytes doesn't work with cuda graphs, deactivating them");
|
tracing::warn!("Bitsandbytes doesn't work with cuda graphs, deactivating them");
|
||||||
vec![]
|
vec![]
|
||||||
}
|
}
|
||||||
|
13
server/poetry.lock
generated
13
server/poetry.lock
generated
@ -1,4 +1,4 @@
|
|||||||
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "accelerate"
|
name = "accelerate"
|
||||||
@ -290,22 +290,23 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bitsandbytes"
|
name = "bitsandbytes"
|
||||||
version = "0.43.3"
|
version = "0.45.0"
|
||||||
description = "k-bit optimizers and matrix multiplication routines."
|
description = "k-bit optimizers and matrix multiplication routines."
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
files = [
|
files = [
|
||||||
{file = "bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl", hash = "sha256:cc99507c352be0715098b2c7577b690dd158972dc4ea10c7495bac104c7c79f0"},
|
{file = "bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl", hash = "sha256:0f0323de1ff1fdf8383e79bdad1283516a4c05a6fd2b44a363bf4e059422305b"},
|
||||||
{file = "bitsandbytes-0.43.3-py3-none-win_amd64.whl", hash = "sha256:257f6552f2144748a84e6c44e1f7a98f3da888f675ed74e18fd7f7eb13c6cafa"},
|
{file = "bitsandbytes-0.45.0-py3-none-win_amd64.whl", hash = "sha256:ebbf96e0ecb466716a65ecdeaef3fa1983575447b9ab66b74e5211892507c6ff"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
numpy = "*"
|
numpy = "*"
|
||||||
torch = "*"
|
torch = "*"
|
||||||
|
typing_extensions = ">=4.8.0"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
benchmark = ["matplotlib", "pandas"]
|
benchmark = ["matplotlib", "pandas"]
|
||||||
test = ["scipy"]
|
test = ["lion_pytorch", "scipy"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "certifi"
|
name = "certifi"
|
||||||
@ -4097,4 +4098,4 @@ torch = ["torch"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.9,<3.13"
|
python-versions = ">=3.9,<3.13"
|
||||||
content-hash = "25f96d5dea777bfa7a959f863e35d2e05e1a6172d0dd45193dbe25ac2f32cc25"
|
content-hash = "0ead8472620eeef6f9ff81f70bcb48403f9c831b6914245efa5e249724d80d0b"
|
||||||
|
@ -16,7 +16,7 @@ grpcio-reflection = "^1.51.1"
|
|||||||
grpc-interceptor = "^0.15.4"
|
grpc-interceptor = "^0.15.4"
|
||||||
typer = "^0.12.5"
|
typer = "^0.12.5"
|
||||||
accelerate = {version = "^1.1.0", optional = true}
|
accelerate = {version = "^1.1.0", optional = true}
|
||||||
bitsandbytes = { version = "^0.43.0", optional = true }
|
bitsandbytes = { version = "^0.45.0", optional = true }
|
||||||
safetensors = "^0.4.5"
|
safetensors = "^0.4.5"
|
||||||
loguru = "^0.7.2"
|
loguru = "^0.7.2"
|
||||||
opentelemetry-api = "^1.27.0"
|
opentelemetry-api = "^1.27.0"
|
||||||
|
Loading…
Reference in New Issue
Block a user