mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-27 04:52:07 +00:00
doc: clarify that --quantize
is not needed for pre-quantized models (#2536)
This commit is contained in:
parent
c1a99e2f15
commit
b6ef2bfc1b
@ -55,7 +55,9 @@ Options:
|
|||||||
## QUANTIZE
|
## QUANTIZE
|
||||||
```shell
|
```shell
|
||||||
--quantize <QUANTIZE>
|
--quantize <QUANTIZE>
|
||||||
Whether you want the model to be quantized
|
Quantization method to use for the model. It is not necessary to specify this option for pre-quantized models, since the quantization method is read from the model configuration.
|
||||||
|
|
||||||
|
Marlin kernels will be used automatically for GPTQ/AWQ models.
|
||||||
|
|
||||||
[env: QUANTIZE=]
|
[env: QUANTIZE=]
|
||||||
|
|
||||||
|
@ -157,6 +157,7 @@
|
|||||||
pyright
|
pyright
|
||||||
pytest
|
pytest
|
||||||
pytest-asyncio
|
pytest-asyncio
|
||||||
|
redocly
|
||||||
ruff
|
ruff
|
||||||
syrupy
|
syrupy
|
||||||
]);
|
]);
|
||||||
|
@ -369,7 +369,11 @@ struct Args {
|
|||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
num_shard: Option<usize>,
|
num_shard: Option<usize>,
|
||||||
|
|
||||||
/// Whether you want the model to be quantized.
|
/// Quantization method to use for the model. It is not necessary to specify this option
|
||||||
|
/// for pre-quantized models, since the quantization method is read from the model
|
||||||
|
/// configuration.
|
||||||
|
///
|
||||||
|
/// Marlin kernels will be used automatically for GPTQ/AWQ models.
|
||||||
#[clap(long, env, value_enum)]
|
#[clap(long, env, value_enum)]
|
||||||
quantize: Option<Quantization>,
|
quantize: Option<Quantization>,
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user