// // Created by Morgan Funtowicz on 9/28/2024. // #ifndef TGI_LLAMA_CPP_BACKEND_BACKEND_HPP #define TGI_LLAMA_CPP_BACKEND_BACKEND_HPP #include #include #include #include #include #include #include #define LLAMA_SUCCESS(x) x == 0 namespace huggingface::tgi::backends::llamacpp { enum TgiLlamaCppBackendError : uint8_t { MODEL_FILE_DOESNT_EXIST = 1 }; class TgiLlamaCppBackend { using TokenId = llama_token; private: llama_model *model; llama_context *ctx; /** * * @param topK * @param topP * @return */ std::unique_ptr GetSamplerFromArgs( uint32_t topK, float_t topP, float_t frequencyPenalty, float_t repetitionPenalty, uint64_t seed); public: /** * * @return */ static std::expected, TgiLlamaCppBackendError> FromGGUF(const std::filesystem::path &) noexcept; TgiLlamaCppBackend(llama_model *model, llama_context *ctx); ~TgiLlamaCppBackend(); /** * * @param text * @return */ [[nodiscard("Tokens will be freed after this call if not assigned to an lvalue")]] std::vector Tokenize(const std::string &text) const; /** * * @param tokens * @param topK * @param topP * @param frequencyPenalty * @param repetitionPenalty * @param maxNewTokens * @param seed * @return */ [[nodiscard("Generated tokens will be freed after this call if not assigned to an lvalue")]] std::expected, TgiLlamaCppBackendError> Generate( std::span tokens, uint32_t topK, float_t topP = 1.0f, float_t frequencyPenalty = 0.0f, float_t repetitionPenalty = 0.0f, uint32_t maxNewTokens = std::numeric_limits::max() - 1, uint64_t seed = 2014 ); }; [[nodiscard("Create backend will be freed after this call if not assigned to an lvalue")]] std::expected, TgiLlamaCppBackendError> CreateLlamaCppBackend(const std::filesystem::path &root); } #endif //TGI_LLAMA_CPP_BACKEND_BACKEND_HPP