mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-09 19:02:09 +00:00
feat(backend): minor refactor
This commit is contained in:
parent
92bb113653
commit
d4b5be10f9
@ -123,7 +123,7 @@ namespace huggingface::tgi::backends::llama {
|
|||||||
#else
|
#else
|
||||||
const auto status = llama_decode(ctx, batch);
|
const auto status = llama_decode(ctx, batch);
|
||||||
#endif
|
#endif
|
||||||
if (status == LLAMA_SUCCESS) {
|
if (LLAMA_SUCCESS(status)) {
|
||||||
// Sample the new token
|
// Sample the new token
|
||||||
auto new_token_id = llama_sampler_sample(*sampler, ctx, -1);
|
auto new_token_id = llama_sampler_sample(*sampler, ctx, -1);
|
||||||
generated.emplace_back(new_token_id);
|
generated.emplace_back(new_token_id);
|
||||||
@ -133,7 +133,6 @@ namespace huggingface::tgi::backends::llama {
|
|||||||
batch = llama_batch_get_one(&new_token_id, 1);
|
batch = llama_batch_get_one(&new_token_id, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
generated.shrink_to_fit();
|
|
||||||
return generated;
|
return generated;
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -10,7 +10,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <llama.h>
|
#include <llama.h>
|
||||||
|
|
||||||
#define LLAMA_SUCCESS 0
|
#define LLAMA_SUCCESS(x) x == 0
|
||||||
|
|
||||||
namespace huggingface::tgi::backends::llama {
|
namespace huggingface::tgi::backends::llama {
|
||||||
enum TgiLlamaCppBackendError {
|
enum TgiLlamaCppBackendError {
|
||||||
|
Loading…
Reference in New Issue
Block a user