add bnb 4bit to quantization enums

2025-09-10 11:54:52 +00:00 · 2023-07-17 19:31:11 +00:00 · 2023-07-17 19:31:11 +00:00 · bccebb027a
commit bccebb027a
parent 3f2c5c31a8
2 changed files with 11 additions and 1 deletions
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@ -22,6 +22,8 @@ mod env_runtime;
 #[derive(Clone, Copy, Debug, ValueEnum)]
 enum Quantization {
    Bitsandbytes,
+    BitsandbytesNF4,
+    BitsandbytesFP4,
    Gptq,
 }

@ -32,6 +34,12 @@ impl std::fmt::Display for Quantization {
            Quantization::Bitsandbytes => {
                write!(f, "bitsandbytes")
            }
+            Quantization::BitsandbytesNF4 => {
+                write!(f, "bitsandbytes-nf4")
+            }
+            Quantization::BitsandbytesFP4 => {
+                write!(f, "bitsandbytes-fp4")
+            }
            Quantization::Gptq => {
                write!(f, "gptq")
            }
--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@ -14,6 +14,8 @@ app = typer.Typer()

 class Quantization(str, Enum):
    bitsandbytes = "bitsandbytes"
+    bitsandbytes_nf4 = "bitsandbytes-nf4"
+    bitsandbytes_fp4 = "bitsandbytes-fp4"
    gptq = "gptq"