diff --git a/backends/trtllm/include/backend.h b/backends/trtllm/include/backend.h
index 0703e8cc..b5d0711b 100644
--- a/backends/trtllm/include/backend.h
+++ b/backends/trtllm/include/backend.h
@@ -8,8 +8,8 @@
 #include <filesystem>
 #include <span>
 
-#include <nlohmann/json.hpp>
 #include <fmt/format.h>
+#include <nlohmann/json.hpp>
 
 #include <tensorrt_llm/runtime/common.h>
 #include <tensorrt_llm/executor/executor.h>
@@ -20,8 +20,24 @@ namespace tle = tensorrt_llm::executor;
 
 namespace huggingface::tgi::backends {
 
+    /**
+     * Initialize all the components required by TRTLLM.
+     * It is required to call this function before attempting to load any engine
+     */
+    void InitializeBackend();
+
+    /**
+     *
+     * @param config
+     * @param workerPath
+     * @param channel
+     * @return
+     */
     tle::ExecutorConfig GetExecutorConfig(const json &config, const std::string &workerPath);
 
+    /**
+     *
+     */
     class TensorRtLlmBackend {
     private:
         const json config;
@@ -50,23 +66,30 @@ namespace huggingface::tgi::backends {
          * @param temperature
          * @param minLength
          * @param repetitionPenalty
-         * @param frequencePenalty
+         * @param frequencyPenalty
          * @param seed
          * @param nTopTokens
          * @return
          */
         [[nodiscard]] tle::IdType Submit(
-                std::vector<tle::TokenIdType> &tokens,
+                const std::vector<tle::TokenIdType> &tokens,
                 int32_t maxNewTokens,
-                float_t topK,
+                int32_t topK,
                 float_t topP,
                 float_t temperature,
                 int32_t minLength,
                 std::optional<float_t> repetitionPenalty = std::nullopt,
-                std::optional<float_t> frequencePenalty = std::nullopt,
+                std::optional<float_t> frequencyPenalty = std::nullopt,
                 std::optional<uint32_t> seed = std::nullopt,
                 std::optional<uint32_t> nTopTokens = std::nullopt
         );
+
+        /***
+         *
+         * @param reqId
+         * @return
+         */
+        std::vector<tle::Response> Poll(tle::IdType reqId);
     };
 }
 
diff --git a/backends/trtllm/lib/backend.cpp b/backends/trtllm/lib/backend.cpp
index fc5d4594..ed9c685a 100644
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@@ -1,77 +1,98 @@
-#include <spdlog/spdlog.h>
 #include <fmt/std.h>
+#include <spdlog/spdlog.h>
 
 #include "backend.h"
 
+void huggingface::tgi::backends::InitializeBackend() {
+    SPDLOG_INFO("Initializing Backend...");
+
+    initTrtLlmPlugins();
+}
+
 tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &config, const std::string &workerPath) {
-    tle::ExecutorConfig execConfig(
-            config["/build_config/max_beam_width"_json_pointer].get<int32_t>()
-    );
-
-    execConfig.setParallelConfig(tle::ParallelConfig(
-            tle::CommunicationType::kMPI,
-            tle::CommunicationMode::kORCHESTRATOR,
-            std::nullopt,
-            std::nullopt,
-            tle::OrchestratorConfig(true, workerPath)
-    ));
-
+    tle::ExecutorConfig execConfig(1);
 
     // TODO : Need to check for >= sm_80 (ampere)
     // execConfig.setEnableChunkedContext(true)
     execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
+
+    if(config["/pretrained_config/mapping/world_size"_json_pointer].get<uint8_t>() == 1){
+        SPDLOG_INFO("Detected single engine deployment, using leader mode");
+        execConfig.setParallelConfig(tle::ParallelConfig(
+                tle::CommunicationType::kMPI,
+                tle::CommunicationMode::kLEADER,
+                std::nullopt,
+                std::nullopt,
+                std::nullopt
+        ));
+    } else {
+        SPDLOG_INFO("Detected sharded engine deployment, using orchestrator mode");
+        execConfig.setParallelConfig(tle::ParallelConfig(
+                tle::CommunicationType::kMPI,
+                tle::CommunicationMode::kORCHESTRATOR,
+                std::nullopt,
+                std::nullopt,
+                tle::OrchestratorConfig(true, workerPath)
+        ));
+    }
     return execConfig;
 }
 
 huggingface::tgi::backends::TensorRtLlmBackend::TensorRtLlmBackend(
-        const std::filesystem::path &engineFolder,
+        const std::filesystem::path &enginesFolder,
         const std::filesystem::path &executorWorker
 ):
-    config(json::parse(std::ifstream(engineFolder / "config.json"))),
-    executor(engineFolder, tensorrt_llm::executor::ModelType::kDECODER_ONLY, GetExecutorConfig(config, executorWorker.string()))
+    config(json::parse(std::ifstream(enginesFolder / "config.json"))),
+    executor(
+        enginesFolder,
+        tensorrt_llm::executor::ModelType::kDECODER_ONLY,
+        GetExecutorConfig(config, executorWorker.string()
+    ))
 {
-    initTrtLlmPlugins();
-    SPDLOG_INFO(FMT_STRING("Engine (version={})"), config["version"].get<std::string>());
+    SPDLOG_INFO(FMT_STRING("Engine (version={})"), config["/version"_json_pointer].get_ref<const std::string&>());
 }
 
 tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
-        std::vector<tle::TokenIdType> &tokens,
+        const std::vector<tle::TokenIdType> &tokens,
         const int32_t maxNewTokens,
-        const float_t topK,
+        const int32_t topK,
         const float_t topP,
         const float_t temperature,
         const int32_t minLength,
-        const std::optional<float_t> repetitionPenalty,
-        const std::optional<float_t> frequencePenalty,
-        const std::optional<uint32_t> seed,
-        const std::optional<uint32_t> nTopTokens
+        std::optional<float_t> repetitionPenalty,
+        std::optional<float_t> frequencyPenalty,
+        std::optional<uint32_t> seed,
+        std::optional<uint32_t> nTopTokens
 ) {
-//    if (IsReady()) {
-//        spdlog::debug(
-//                "Submitting inference over {:d} tokens to the executor {:d}",
-//                tokens.size(),
-//                executor.getLatestIterationStats().back().numActiveRequests
-//        );
-//
-//        const auto sampling = tle::SamplingConfig{
-//                1,
-//                topK,
-//                topP,
-//                std::nullopt,
-//                std::nullopt,
-//                std::nullopt,
-//                seed,
-//                temperature,
-//                minLength,
-//                std::nullopt,
-//                repetitionPenalty.value_or(0.0),
-//                std::nullopt,
-//                frequencePenalty.value_or(1.0),
-//        };
-//        const auto output = tle::OutputConfig{false, false, nTopTokens.value_or(1) > 1};
-//        const auto request = tle::Request{std::move(tokens), maxNewTokens, true, sampling, output};
-//
-//        return executor.enqueueRequest(request);
-//    }
-    return 0;
+    spdlog::debug(
+            "Submitting inference over {:d} tokens to the executor {:d}",
+            tokens.size(),
+            executor.getLatestIterationStats().back().numActiveRequests
+    );
+
+    const auto sampling = tle::SamplingConfig{
+            1,
+            topK,
+            topP,
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            seed,
+            temperature,
+            minLength,
+            std::nullopt,
+            repetitionPenalty,
+            std::nullopt,
+            frequencyPenalty,
+    };
+    const auto output = tle::OutputConfig{false, false, nTopTokens.value_or(1) > 1};
+    const auto request = tle::Request{tokens, maxNewTokens, true, sampling, output};
+
+    return executor.enqueueRequest(request);
 }
+
+std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::Poll(const tle::IdType reqId) {
+    SPDLOG_DEBUG("Polling request {:d}", reqId);
+    const auto responses = executor.awaitResponses(reqId);
+    return responses;
+}
\ No newline at end of file