text-generation-inference/backends/llamacpp/src/lib.rs

use crate::ffi::SamplingParams;

pub mod backend;

impl Default for SamplingParams {
    fn default() -> Self {
        Self {
            top_k: u32::MAX,
            top_p: 1.0f32,
            frequency_penalty: 0.0f32,
            repetition_penalty: 0.0f32,
            seed: 2014u64,
        }
    }
}

#[cxx::bridge(namespace = "huggingface::tgi::backends::llamacpp")]
mod ffi {
    struct GenerationParams {
        max_new_tokens: u32,
        ignore_eos_token: bool,
    }

    struct SamplingParams {
        top_k: u32,
        top_p: f32,
        frequency_penalty: f32,
        repetition_penalty: f32,
        seed: u64,
    }

    unsafe extern "C++" {
        include!("backends/llamacpp/csrc/ffi.hpp");

        #[cxx_name = "generation_params_t"]
        type GenerationParams;

        #[cxx_name = "sampling_params_t"]
        type SamplingParams;

        /// Represent an instance of the llama.cpp backend instance on C++ side
        #[cxx_name = "llama_cpp_backend_impl_t"]
        type LlamaCppBackendImpl;

        #[rust_name = "create_single_worker_backend"]
        fn create_single_worker_backend(modelPath: &str) -> Result<UniquePtr<LlamaCppBackendImpl>>;

        fn generate(
            self: Pin<&mut LlamaCppBackendImpl>,
            tokens: &[u32],
            generated: &mut [u32],
            generation_params: &GenerationParams,
            sampling_params: &SamplingParams,
            callback: fn(u32, f32, bool),
        ) -> Result<usize>;
    }
}
feat(backend): full rework of the backend internal to safer c++ 2024-10-31 16:51:57 +00:00			`use crate::ffi::SamplingParams;`

feat(llamacpp): initial end2end build 2024-10-04 08:42:31 +00:00			`pub mod backend;`

feat(backend): full rework of the backend internal to safer c++ 2024-10-31 16:51:57 +00:00			`impl Default for SamplingParams {`
			`fn default() -> Self {`
			`Self {`
			`top_k: u32::MAX,`
			`top_p: 1.0f32,`
			`frequency_penalty: 0.0f32,`
			`repetition_penalty: 0.0f32,`
			`seed: 2014u64,`
			`}`
			`}`
			`}`

			`#[cxx::bridge(namespace = "huggingface::tgi::backends::llamacpp")]`
feat(llamacpp): initial end2end build 2024-10-04 08:42:31 +00:00			`mod ffi {`
feat(backend): full rework of the backend internal to safer c++ 2024-10-31 16:51:57 +00:00			`struct GenerationParams {`
			`max_new_tokens: u32,`
feat(backend): add mapping for ignore_eos_token stopping criteria 2024-10-31 20:32:29 +00:00			`ignore_eos_token: bool,`
feat(backend): full rework of the backend internal to safer c++ 2024-10-31 16:51:57 +00:00			`}`

			`struct SamplingParams {`
			`top_k: u32,`
			`top_p: f32,`
			`frequency_penalty: f32,`
			`repetition_penalty: f32,`
			`seed: u64,`
			`}`

feat(llamacpp): initial end2end build 2024-10-04 08:42:31 +00:00			`unsafe extern "C++" {`
feat(backend): wip Rust binding 2024-10-24 07:56:40 +00:00			`include!("backends/llamacpp/csrc/ffi.hpp");`
feat(llamacpp): initial end2end build 2024-10-04 08:42:31 +00:00
feat(backend): full rework of the backend internal to safer c++ 2024-10-31 16:51:57 +00:00			`#[cxx_name = "generation_params_t"]`
			`type GenerationParams;`

			`#[cxx_name = "sampling_params_t"]`
			`type SamplingParams;`

feat(llamacpp): initial end2end build 2024-10-04 08:42:31 +00:00			`/// Represent an instance of the llama.cpp backend instance on C++ side`
feat(backend): full rework of the backend internal to safer c++ 2024-10-31 16:51:57 +00:00			`#[cxx_name = "llama_cpp_backend_impl_t"]`
feat(llamacpp): initial end2end build 2024-10-04 08:42:31 +00:00			`type LlamaCppBackendImpl;`
feat(backend): wip Rust binding 2024-10-24 07:56:40 +00:00
feat(backend): full rework of the backend internal to safer c++ 2024-10-31 16:51:57 +00:00			`#[rust_name = "create_single_worker_backend"]`
			`fn create_single_worker_backend(modelPath: &str) -> Result<UniquePtr<LlamaCppBackendImpl>>;`

			`fn generate(`
			`self: Pin<&mut LlamaCppBackendImpl>,`
			`tokens: &[u32],`
			`generated: &mut [u32],`
			`generation_params: &GenerationParams,`
			`sampling_params: &SamplingParams,`
feat(backend): add logit parameter in the callback fn 2024-10-31 23:49:50 +00:00			`callback: fn(u32, f32, bool),`
feat(backend): full rework of the backend internal to safer c++ 2024-10-31 16:51:57 +00:00			`) -> Result<usize>;`
feat(llamacpp): initial end2end build 2024-10-04 08:42:31 +00:00			`}`
			`}`