diff --git a/backends/llamacpp/csrc/backend.cpp b/backends/llamacpp/csrc/backend.cpp index 66017fc5..eb91e517 100644 --- a/backends/llamacpp/csrc/backend.cpp +++ b/backends/llamacpp/csrc/backend.cpp @@ -74,7 +74,7 @@ namespace huggingface::tgi::backends::llamacpp { const auto latency = std::chrono::duration_cast(end - start); SPDLOG_DEBUG(FMT_STRING("Successfully decoded {:d} token(s) in {}"), batch.n_tokens, latency); #else - const auto status = llama_decode(context, batch); + const auto status = llama_decode(context_.get(), batch); #endif batch.n_tokens = 0; if (LLAMA_SUCCESS(status)) [[likely]] {