From 9c11372d8f5ac100a5dfc428e1e77c2eba9edc81 Mon Sep 17 00:00:00 2001
From: krzim <zimmerk4@live.com>
Date: Mon, 17 Jul 2023 19:31:11 +0000
Subject: [PATCH] add bnb 4bit to quantization enums

---
 launcher/src/main.rs                 | 10 +++++++++-
 server/text_generation_server/cli.py |  2 ++
 2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index e26244e5..36add771 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -22,6 +22,8 @@ mod env_runtime;
 #[derive(Clone, Copy, Debug, ValueEnum)]
 enum Quantization {
     Bitsandbytes,
+    BitsandbytesNF4,
+    BitsandbytesFP4,
     Gptq,
 }
 
@@ -32,6 +34,12 @@ impl std::fmt::Display for Quantization {
             Quantization::Bitsandbytes => {
                 write!(f, "bitsandbytes")
             }
+            Quantization::BitsandbytesNF4 => {
+                write!(f, "bitsandbytes-nf4")
+            }
+            Quantization::BitsandbytesFP4 => {
+                write!(f, "bitsandbytes-fp4")
+            }
             Quantization::Gptq => {
                 write!(f, "gptq")
             }
@@ -96,7 +104,7 @@ struct Args {
     num_shard: Option<usize>,
 
     /// Whether you want the model to be quantized. This will use `bitsandbytes` for
-    /// quantization on the fly, or `gptq`.
+    /// quantization on the fly, or `gptq`. 
     #[clap(long, env, value_enum)]
     quantize: Option<Quantization>,
 
diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py
index 7a55e919..373b32df 100644
--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@@ -13,6 +13,8 @@ app = typer.Typer()
 
 class Quantization(str, Enum):
     bitsandbytes = "bitsandbytes"
+    bitsandbytes_nf4 = "bitsandbytes-nf4"
+    bitsandbytes_fp4 = "bitsandbytes-fp4"
     gptq = "gptq"