chore(trtllm): ensure max throughput scheduling policy is selected

This commit is contained in:
Morgan Funtowicz 2024-10-21 23:40:54 +02:00 committed by Nicolas Patry
parent d659cb0113
commit cdba16fd23
No known key found for this signature in database
GPG Key ID: D2920555C90F704C

View File

@ -53,6 +53,7 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
// Define some configuration variables
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere());
execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION));
return execConfig;
}