mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
add bnb 4bit to quantization enums
This commit is contained in:
parent
aded1c161e
commit
9c11372d8f
@ -22,6 +22,8 @@ mod env_runtime;
|
|||||||
#[derive(Clone, Copy, Debug, ValueEnum)]
|
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||||
enum Quantization {
|
enum Quantization {
|
||||||
Bitsandbytes,
|
Bitsandbytes,
|
||||||
|
BitsandbytesNF4,
|
||||||
|
BitsandbytesFP4,
|
||||||
Gptq,
|
Gptq,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,6 +34,12 @@ impl std::fmt::Display for Quantization {
|
|||||||
Quantization::Bitsandbytes => {
|
Quantization::Bitsandbytes => {
|
||||||
write!(f, "bitsandbytes")
|
write!(f, "bitsandbytes")
|
||||||
}
|
}
|
||||||
|
Quantization::BitsandbytesNF4 => {
|
||||||
|
write!(f, "bitsandbytes-nf4")
|
||||||
|
}
|
||||||
|
Quantization::BitsandbytesFP4 => {
|
||||||
|
write!(f, "bitsandbytes-fp4")
|
||||||
|
}
|
||||||
Quantization::Gptq => {
|
Quantization::Gptq => {
|
||||||
write!(f, "gptq")
|
write!(f, "gptq")
|
||||||
}
|
}
|
||||||
@ -96,7 +104,7 @@ struct Args {
|
|||||||
num_shard: Option<usize>,
|
num_shard: Option<usize>,
|
||||||
|
|
||||||
/// Whether you want the model to be quantized. This will use `bitsandbytes` for
|
/// Whether you want the model to be quantized. This will use `bitsandbytes` for
|
||||||
/// quantization on the fly, or `gptq`.
|
/// quantization on the fly, or `gptq`.
|
||||||
#[clap(long, env, value_enum)]
|
#[clap(long, env, value_enum)]
|
||||||
quantize: Option<Quantization>,
|
quantize: Option<Quantization>,
|
||||||
|
|
||||||
|
@ -13,6 +13,8 @@ app = typer.Typer()
|
|||||||
|
|
||||||
class Quantization(str, Enum):
|
class Quantization(str, Enum):
|
||||||
bitsandbytes = "bitsandbytes"
|
bitsandbytes = "bitsandbytes"
|
||||||
|
bitsandbytes_nf4 = "bitsandbytes-nf4"
|
||||||
|
bitsandbytes_fp4 = "bitsandbytes-fp4"
|
||||||
gptq = "gptq"
|
gptq = "gptq"
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user