diff --git a/backends/trtllm/include/ffi.h b/backends/trtllm/include/ffi.h index c2b29500d..a35449813 100644 --- a/backends/trtllm/include/ffi.h +++ b/backends/trtllm/include/ffi.h @@ -5,7 +5,9 @@ #ifndef TGI_TRTLLM_BACKEND_FFI_H #define TGI_TRTLLM_BACKEND_FFI_H +ad#include #include +#include #include "backend.h" namespace huggingface::tgi::backends { diff --git a/backends/trtllm/src/ffi.cpp b/backends/trtllm/src/ffi.cpp index 1179fc857..adaaced6c 100644 --- a/backends/trtllm/src/ffi.cpp +++ b/backends/trtllm/src/ffi.cpp @@ -3,11 +3,13 @@ // #pragma once -#include +#include #include #include +#include #include #include +#include #include #include @@ -28,7 +30,7 @@ uint64_t huggingface::tgi::backends::TensorRtLlmBackendImpl::Submit( // This will copy all the items from the initial slice std::vector tokens_(std::make_move_iterator(tokens.begin()), std::make_move_iterator(tokens.end())); return TensorRtLlmBackend::Submit( - std::move(tokens_), topK, topP, temperature, repetition_penalty, frequency_penalty, seed); + std::move(tokens_), maxNewTokens, topK, topP, temperature, repetition_penalty, frequency_penalty, seed); } std::unique_ptr>