mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Remembering to check how we can detect support for chunked context
This commit is contained in:
parent
f57f2a4521
commit
29c7cb36e5
@ -16,6 +16,10 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
|
|||||||
tle::OrchestratorConfig(true, workerPath)
|
tle::OrchestratorConfig(true, workerPath)
|
||||||
));
|
));
|
||||||
|
|
||||||
|
|
||||||
|
// TODO : Need to check for >= sm_80 (ampere)
|
||||||
|
// execConfig.setEnableChunkedContext(true)
|
||||||
|
execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
|
||||||
return execConfig;
|
return execConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user