mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
Fmt.
This commit is contained in:
parent
086d62dbe3
commit
e93705e272
@ -36,7 +36,10 @@ enum Quantization {
|
|||||||
Gptq,
|
Gptq,
|
||||||
/// Bitsandbytes 8bit. Can be applied on any model, will cut the memory requirement in half,
|
/// Bitsandbytes 8bit. Can be applied on any model, will cut the memory requirement in half,
|
||||||
/// but it is known that the model will be much slower to run than the native f16.
|
/// but it is known that the model will be much slower to run than the native f16.
|
||||||
#[deprecated(since="1.1.0", note="Use `eetq` instead, which provides better latencies overall and is drop-in in most cases")]
|
#[deprecated(
|
||||||
|
since = "1.1.0",
|
||||||
|
note = "Use `eetq` instead, which provides better latencies overall and is drop-in in most cases"
|
||||||
|
)]
|
||||||
Bitsandbytes,
|
Bitsandbytes,
|
||||||
/// Bitsandbytes 4bit. Can be applied on any model, will cut the memory requirement by 4x,
|
/// Bitsandbytes 4bit. Can be applied on any model, will cut the memory requirement by 4x,
|
||||||
/// but it is known that the model will be much slower to run than the native f16.
|
/// but it is known that the model will be much slower to run than the native f16.
|
||||||
|
Loading…
Reference in New Issue
Block a user