mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-05-30 02:32:09 +00:00
chore(trtllm): ensure max throughput scheduling policy is selected
This commit is contained in:
parent
a6ac2741a3
commit
47d8c53dda
@ -53,6 +53,7 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
|
|||||||
// Define some configuration variables
|
// Define some configuration variables
|
||||||
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
|
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
|
||||||
execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere());
|
execConfig.setEnableChunkedContext(computeCapabilities.isPostAmpere());
|
||||||
|
execConfig.setSchedulerConfig(tle::SchedulerConfig(tle::CapacitySchedulerPolicy::kMAX_UTILIZATION));
|
||||||
return execConfig;
|
return execConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user