// // Created by Morgan Funtowicz on 9/28/2024. // #ifndef TGI_LLAMA_CPP_BACKEND_BACKEND_HPP #define TGI_LLAMA_CPP_BACKEND_BACKEND_HPP #include #include #include #include #include #define LLAMA_SUCCESS 0 namespace huggingface::tgi::backends::llama { enum TgiLlamaCppBackendError { MODEL_FILE_DOESNT_EXIST = 1 }; class TgiLlamaCppBackend { using TokenId = int32_t; private: llama_model* model; llama_context* ctx; /** * * @param topK * @param topP * @return */ std::unique_ptr GetSamplerFromArgs( uint32_t topK, float_t topP, float_t frequencyPenalty, float_t repetitionPenalty, uint64_t seed); public: TgiLlamaCppBackend(llama_model *model, llama_context *ctx); ~TgiLlamaCppBackend(); /** * * @param text * @return */ [[nodiscard]] std::vector Tokenize(const std::string& text) const; /** * * @param tokens * @param topK * @param topP * @param maxNewTokens * @return */ [[nodiscard]] std::vector Generate( std::span tokens, uint32_t topK, float_t topP = 1.0f, uint32_t maxNewTokens = std::numeric_limits::max() ); }; std::expected, TgiLlamaCppBackendError> CreateLlamaCppBackend(const std::filesystem::path& root); } #endif //TGI_LLAMA_CPP_BACKEND_BACKEND_HPP