add bnb 4bit to quantization enums

2025-09-10 11:54:52 +00:00 · 2023-07-17 19:31:11 +00:00 · 2023-07-17 19:31:11 +00:00 · 9c11372d8f
commit 9c11372d8f
parent aded1c161e
2 changed files with 11 additions and 1 deletions
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@ -22,6 +22,8 @@ mod env_runtime;
 #[derive(Clone, Copy, Debug, ValueEnum)]
 enum Quantization {
    Bitsandbytes,
+    BitsandbytesNF4,
+    BitsandbytesFP4,
    Gptq,
 }

@ -32,6 +34,12 @@ impl std::fmt::Display for Quantization {
            Quantization::Bitsandbytes => {
                write!(f, "bitsandbytes")
            }
+            Quantization::BitsandbytesNF4 => {
+                write!(f, "bitsandbytes-nf4")
+            }
+            Quantization::BitsandbytesFP4 => {
+                write!(f, "bitsandbytes-fp4")
+            }
            Quantization::Gptq => {
                write!(f, "gptq")
            }
@ -96,7 +104,7 @@ struct Args {
    num_shard: Option<usize>,

    /// Whether you want the model to be quantized. This will use `bitsandbytes` for
-    /// quantization on the fly, or `gptq`.
+    /// quantization on the fly, or `gptq`. 
    #[clap(long, env, value_enum)]
    quantize: Option<Quantization>,

--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@ -13,6 +13,8 @@ app = typer.Typer()

 class Quantization(str, Enum):
    bitsandbytes = "bitsandbytes"
+    bitsandbytes_nf4 = "bitsandbytes-nf4"
+    bitsandbytes_fp4 = "bitsandbytes-fp4"
    gptq = "gptq"