From ef7acd44529d48c8be90d8f9b583ea4e2a3a6f3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 19 Sep 2024 14:12:49 +0000 Subject: [PATCH] doc: clarify that `--quantize` is not needed for pre-quantized models --- docs/source/reference/launcher.md | 4 +++- flake.nix | 1 + launcher/src/main.rs | 6 +++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/source/reference/launcher.md b/docs/source/reference/launcher.md index 01f15648..c8d2a4c6 100644 --- a/docs/source/reference/launcher.md +++ b/docs/source/reference/launcher.md @@ -55,7 +55,9 @@ Options: ## QUANTIZE ```shell --quantize - Whether you want the model to be quantized + Quantization method to use for the model. It is not necessary to specify this option for pre-quantized models, since the quantization method is read from the model configuration. + + Marlin kernels will be used automatically for GPTQ/AWQ models. [env: QUANTIZE=] diff --git a/flake.nix b/flake.nix index 3d349ff2..03f957b0 100644 --- a/flake.nix +++ b/flake.nix @@ -149,6 +149,7 @@ pyright pytest pytest-asyncio + redocly ruff syrupy ]); diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 2cdccfe0..175244ff 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -367,7 +367,11 @@ struct Args { #[clap(long, env)] num_shard: Option, - /// Whether you want the model to be quantized. + /// Quantization method to use for the model. It is not necessary to specify this option + /// for pre-quantized models, since the quantization method is read from the model + /// configuration. + /// + /// Marlin kernels will be used automatically for GPTQ/AWQ models. #[clap(long, env, value_enum)] quantize: Option,