mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-26 12:32:10 +00:00
doc: clarify that --quantize
is not needed for pre-quantized models (#2536)
This commit is contained in:
parent
c1a99e2f15
commit
b6ef2bfc1b
@ -55,7 +55,9 @@ Options:
|
||||
## QUANTIZE
|
||||
```shell
|
||||
--quantize <QUANTIZE>
|
||||
Whether you want the model to be quantized
|
||||
Quantization method to use for the model. It is not necessary to specify this option for pre-quantized models, since the quantization method is read from the model configuration.
|
||||
|
||||
Marlin kernels will be used automatically for GPTQ/AWQ models.
|
||||
|
||||
[env: QUANTIZE=]
|
||||
|
||||
|
@ -369,7 +369,11 @@ struct Args {
|
||||
#[clap(long, env)]
|
||||
num_shard: Option<usize>,
|
||||
|
||||
/// Whether you want the model to be quantized.
|
||||
/// Quantization method to use for the model. It is not necessary to specify this option
|
||||
/// for pre-quantized models, since the quantization method is read from the model
|
||||
/// configuration.
|
||||
///
|
||||
/// Marlin kernels will be used automatically for GPTQ/AWQ models.
|
||||
#[clap(long, env, value_enum)]
|
||||
quantize: Option<Quantization>,
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user