doc: clarify that --quantize is not needed for pre-quantized models

2025-09-12 04:44:52 +00:00 · 2024-09-19 14:12:49 +00:00 · 2024-09-19 14:12:49 +00:00 · ef7acd4452
commit ef7acd4452
parent ce85efa968
3 changed files with 9 additions and 2 deletions
--- a/docs/source/reference/launcher.md
+++ b/docs/source/reference/launcher.md
@ -55,7 +55,9 @@ Options:
 ## QUANTIZE
 ```shell
      --quantize <QUANTIZE>
-          Whether you want the model to be quantized
+          Quantization method to use for the model. It is not necessary to specify this option for pre-quantized models, since the quantization method is read from the model configuration.
+          
+          Marlin kernels will be used automatically for GPTQ/AWQ models.
          
          [env: QUANTIZE=]

--- a/flake.nix
+++ b/flake.nix
@ -149,6 +149,7 @@
                pyright
                pytest
                pytest-asyncio
+                redocly
                ruff
                syrupy
              ]);
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@ -367,7 +367,11 @@ struct Args {
    #[clap(long, env)]
    num_shard: Option<usize>,

-    /// Whether you want the model to be quantized.
+    /// Quantization method to use for the model. It is not necessary to specify this option
+    /// for pre-quantized models, since the quantization method is read from the model
+    /// configuration.
+    ///
+    /// Marlin kernels will be used automatically for GPTQ/AWQ models.
    #[clap(long, env, value_enum)]
    quantize: Option<Quantization>,