From cdba16fd23e42d2b77b3e47f9422eeaa52a5fd27 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 21 Oct 2024 23:40:54 +0200 Subject: [PATCH] chore(trtllm): ensure max throughput scheduling policy is selected --- backends/trtllm/lib/backend.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/trtllm/lib/backend.cpp b/backends/trtllm/lib/backend.cpp index 9e8e50c6..a80e9c5d 100644 --- a/backends/trtllm/lib/backend.cpp +++ b/backends/trtllm/lib/backend.cpp @@ -53,6 +53,7 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co // Define some configuration variables execConfig.setKvCacheConfig(tle::KvCacheConfig(true)); execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere()); + execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION)); return execConfig; }