mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 14:52:20 +00:00
29 lines
667 B
C++
29 lines
667 B
C++
|
//
|
||
|
// Created by Morgan Funtowicz on 9/28/2024.
|
||
|
//
|
||
|
|
||
|
#ifndef TGI_LLAMA_CPP_BACKEND_BACKEND_HPP
|
||
|
#define TGI_LLAMA_CPP_BACKEND_BACKEND_HPP
|
||
|
|
||
|
#include <memory>
|
||
|
#include <llama.h>
|
||
|
|
||
|
namespace huggingface::tgi::backends::llama {
|
||
|
const char* TGI_BACKEND_LLAMA_CPP_NAME = "llama.cpp";
|
||
|
|
||
|
|
||
|
class TgiLlamaCppBackend {
|
||
|
private:
|
||
|
llama_model* model;
|
||
|
llama_context* ctx;
|
||
|
llama_batch batch;
|
||
|
public:
|
||
|
TgiLlamaCppBackend(llama_model* const model, llama_context* const);
|
||
|
~TgiLlamaCppBackend();
|
||
|
};
|
||
|
|
||
|
std::unique_ptr<TgiLlamaCppBackend> CreateLlamaCppBackend(std::string_view root);
|
||
|
}
|
||
|
|
||
|
#endif //TGI_LLAMA_CPP_BACKEND_BACKEND_HPP
|