Remembering to check how we can detect support for chunked context

This commit is contained in:
Morgan Funtowicz 2024-07-03 21:38:17 +00:00
parent f57f2a4521
commit 29c7cb36e5

View File

@ -16,6 +16,10 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
tle::OrchestratorConfig(true, workerPath)
));
// TODO : Need to check for >= sm_80 (ampere)
// execConfig.setEnableChunkedContext(true)
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
return execConfig;
}