From 9c11372d8f5ac100a5dfc428e1e77c2eba9edc81 Mon Sep 17 00:00:00 2001 From: krzim Date: Mon, 17 Jul 2023 19:31:11 +0000 Subject: [PATCH] add bnb 4bit to quantization enums --- launcher/src/main.rs | 10 +++++++++- server/text_generation_server/cli.py | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index e26244e5..36add771 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -22,6 +22,8 @@ mod env_runtime; #[derive(Clone, Copy, Debug, ValueEnum)] enum Quantization { Bitsandbytes, + BitsandbytesNF4, + BitsandbytesFP4, Gptq, } @@ -32,6 +34,12 @@ impl std::fmt::Display for Quantization { Quantization::Bitsandbytes => { write!(f, "bitsandbytes") } + Quantization::BitsandbytesNF4 => { + write!(f, "bitsandbytes-nf4") + } + Quantization::BitsandbytesFP4 => { + write!(f, "bitsandbytes-fp4") + } Quantization::Gptq => { write!(f, "gptq") } @@ -96,7 +104,7 @@ struct Args { num_shard: Option, /// Whether you want the model to be quantized. This will use `bitsandbytes` for - /// quantization on the fly, or `gptq`. + /// quantization on the fly, or `gptq`. #[clap(long, env, value_enum)] quantize: Option, diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py index 7a55e919..373b32df 100644 --- a/server/text_generation_server/cli.py +++ b/server/text_generation_server/cli.py @@ -13,6 +13,8 @@ app = typer.Typer() class Quantization(str, Enum): bitsandbytes = "bitsandbytes" + bitsandbytes_nf4 = "bitsandbytes-nf4" + bitsandbytes_fp4 = "bitsandbytes-fp4" gptq = "gptq"