diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 146d83d6..35872867 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -22,6 +22,8 @@ mod env_runtime;
 #[derive(Clone, Copy, Debug, ValueEnum)]
 enum Quantization {
     Bitsandbytes,
+    BitsandbytesNF4,
+    BitsandbytesFP4,
     Gptq,
 }
 
@@ -32,6 +34,12 @@ impl std::fmt::Display for Quantization {
             Quantization::Bitsandbytes => {
                 write!(f, "bitsandbytes")
             }
+            Quantization::BitsandbytesNF4 => {
+                write!(f, "bitsandbytes-nf4")
+            }
+            Quantization::BitsandbytesFP4 => {
+                write!(f, "bitsandbytes-fp4")
+            }
             Quantization::Gptq => {
                 write!(f, "gptq")
             }
@@ -116,7 +124,7 @@ struct Args {
     num_shard: Option<usize>,
 
     /// Whether you want the model to be quantized. This will use `bitsandbytes` for
-    /// quantization on the fly, or `gptq`.
+    /// quantization on the fly, or `gptq`. 
     #[clap(long, env, value_enum)]
     quantize: Option<Quantization>,
 
diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py
index eba807bc..459ba8c4 100644
--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@@ -14,6 +14,8 @@ app = typer.Typer()
 
 class Quantization(str, Enum):
     bitsandbytes = "bitsandbytes"
+    bitsandbytes_nf4 = "bitsandbytes-nf4"
+    bitsandbytes_fp4 = "bitsandbytes-fp4"
     gptq = "gptq"