chore(trtllm): ensure max throughput scheduling policy is selected

This commit is contained in:
Morgan Funtowicz 2024-10-21 23:40:54 +02:00
parent a6ac2741a3
commit 47d8c53dda

View File

@ -53,6 +53,7 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
// Define some configuration variables
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere());
execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION));
return execConfig;
}