doc: clarify that --quantize is not needed for pre-quantized models (#2536)

2025-09-11 20:34:54 +00:00 · 2024-09-19 22:17:15 +02:00 · 2024-09-19 22:17:15 +02:00 · b6ef2bfc1b
commit b6ef2bfc1b
parent c1a99e2f15
3 changed files with 9 additions and 2 deletions
--- a/docs/source/reference/launcher.md
+++ b/docs/source/reference/launcher.md
@ -55,7 +55,9 @@ Options:
 ## QUANTIZE
 ```shell
      --quantize <QUANTIZE>
-          Whether you want the model to be quantized
+          Quantization method to use for the model. It is not necessary to specify this option for pre-quantized models, since the quantization method is read from the model configuration.
+          
+          Marlin kernels will be used automatically for GPTQ/AWQ models.
          
          [env: QUANTIZE=]

--- a/flake.nix
+++ b/flake.nix
@ -157,6 +157,7 @@
                pyright
                pytest
                pytest-asyncio
+                redocly
                ruff
                syrupy
              ]);
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@ -369,7 +369,11 @@ struct Args {
    #[clap(long, env)]
    num_shard: Option<usize>,

-    /// Whether you want the model to be quantized.
+    /// Quantization method to use for the model. It is not necessary to specify this option
+    /// for pre-quantized models, since the quantization method is read from the model
+    /// configuration.
+    ///
+    /// Marlin kernels will be used automatically for GPTQ/AWQ models.
    #[clap(long, env, value_enum)]
    quantize: Option<Quantization>,