From ef7acd44529d48c8be90d8f9b583ea4e2a3a6f3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Thu, 19 Sep 2024 14:12:49 +0000
Subject: [PATCH] doc: clarify that `--quantize` is not needed for
 pre-quantized models

---
 docs/source/reference/launcher.md | 4 +++-
 flake.nix                         | 1 +
 launcher/src/main.rs              | 6 +++++-
 3 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/docs/source/reference/launcher.md b/docs/source/reference/launcher.md
index 01f15648..c8d2a4c6 100644
--- a/docs/source/reference/launcher.md
+++ b/docs/source/reference/launcher.md
@@ -55,7 +55,9 @@ Options:
 ## QUANTIZE
 ```shell
       --quantize <QUANTIZE>
-          Whether you want the model to be quantized
+          Quantization method to use for the model. It is not necessary to specify this option for pre-quantized models, since the quantization method is read from the model configuration.
+          
+          Marlin kernels will be used automatically for GPTQ/AWQ models.
           
           [env: QUANTIZE=]
 
diff --git a/flake.nix b/flake.nix
index 3d349ff2..03f957b0 100644
--- a/flake.nix
+++ b/flake.nix
@@ -149,6 +149,7 @@
                 pyright
                 pytest
                 pytest-asyncio
+                redocly
                 ruff
                 syrupy
               ]);
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 2cdccfe0..175244ff 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -367,7 +367,11 @@ struct Args {
     #[clap(long, env)]
     num_shard: Option<usize>,
 
-    /// Whether you want the model to be quantized.
+    /// Quantization method to use for the model. It is not necessary to specify this option
+    /// for pre-quantized models, since the quantization method is read from the model
+    /// configuration.
+    ///
+    /// Marlin kernels will be used automatically for GPTQ/AWQ models.
     #[clap(long, env, value_enum)]
     quantize: Option<Quantization>,