mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-29 05:52:11 +00:00
36 lines
946 B
Rust
36 lines
946 B
Rust
|
use crate::llamacpp;
|
||
|
|
||
|
use std::ffi::CString;
|
||
|
|
||
|
#[repr(u32)]
|
||
|
#[derive(Debug, Clone, Copy)]
|
||
|
pub enum QuantizeType {
|
||
|
MostlyQ4_0 = 2,
|
||
|
}
|
||
|
|
||
|
pub fn model(
|
||
|
input_path: &str,
|
||
|
output_path: &str,
|
||
|
ftype: QuantizeType,
|
||
|
n_threads: usize,
|
||
|
) -> Result<(), String> {
|
||
|
let c_input_path =
|
||
|
CString::new(input_path).map_err(|e| format!("Failed to convert input path: {}", e))?;
|
||
|
|
||
|
let c_output_path =
|
||
|
CString::new(output_path).map_err(|e| format!("Failed to convert output path: {}", e))?;
|
||
|
|
||
|
let result = unsafe {
|
||
|
let mut params = llamacpp::model_quantize_default_params();
|
||
|
params.nthread = n_threads as _;
|
||
|
params.ftype = ftype as _;
|
||
|
params.quantize_output_tensor = true;
|
||
|
llamacpp::model_quantize(c_input_path.as_ptr(), c_output_path.as_ptr(), ¶ms)
|
||
|
};
|
||
|
if result == 0 {
|
||
|
Ok(())
|
||
|
} else {
|
||
|
Err(format!("Quantization failed, error code: {}", result))
|
||
|
}
|
||
|
}
|