mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-05-28 17:52:08 +00:00
chore(trtllm): ensure max throughput scheduling policy is selected
This commit is contained in:
parent
a6ac2741a3
commit
47d8c53dda
@ -53,6 +53,7 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
|
||||
// Define some configuration variables
|
||||
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
|
||||
execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere());
|
||||
execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION));
|
||||
return execConfig;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user