mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Remembering to check how we can detect support for chunked context
This commit is contained in:
parent
f57f2a4521
commit
29c7cb36e5
@ -16,6 +16,10 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
|
||||
tle::OrchestratorConfig(true, workerPath)
|
||||
));
|
||||
|
||||
|
||||
// TODO : Need to check for >= sm_80 (ampere)
|
||||
// execConfig.setEnableChunkedContext(true)
|
||||
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
|
||||
return execConfig;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user