add bnb 4bit to quantization enums

This commit is contained in:
krzim 2023-07-17 19:31:11 +00:00
parent aded1c161e
commit 9c11372d8f
2 changed files with 11 additions and 1 deletions

View File

@ -22,6 +22,8 @@ mod env_runtime;
#[derive(Clone, Copy, Debug, ValueEnum)] #[derive(Clone, Copy, Debug, ValueEnum)]
enum Quantization { enum Quantization {
Bitsandbytes, Bitsandbytes,
BitsandbytesNF4,
BitsandbytesFP4,
Gptq, Gptq,
} }
@ -32,6 +34,12 @@ impl std::fmt::Display for Quantization {
Quantization::Bitsandbytes => { Quantization::Bitsandbytes => {
write!(f, "bitsandbytes") write!(f, "bitsandbytes")
} }
Quantization::BitsandbytesNF4 => {
write!(f, "bitsandbytes-nf4")
}
Quantization::BitsandbytesFP4 => {
write!(f, "bitsandbytes-fp4")
}
Quantization::Gptq => { Quantization::Gptq => {
write!(f, "gptq") write!(f, "gptq")
} }
@ -96,7 +104,7 @@ struct Args {
num_shard: Option<usize>, num_shard: Option<usize>,
/// Whether you want the model to be quantized. This will use `bitsandbytes` for /// Whether you want the model to be quantized. This will use `bitsandbytes` for
/// quantization on the fly, or `gptq`. /// quantization on the fly, or `gptq`.
#[clap(long, env, value_enum)] #[clap(long, env, value_enum)]
quantize: Option<Quantization>, quantize: Option<Quantization>,

View File

@ -13,6 +13,8 @@ app = typer.Typer()
class Quantization(str, Enum): class Quantization(str, Enum):
bitsandbytes = "bitsandbytes" bitsandbytes = "bitsandbytes"
bitsandbytes_nf4 = "bitsandbytes-nf4"
bitsandbytes_fp4 = "bitsandbytes-fp4"
gptq = "gptq" gptq = "gptq"