Remembering to check how we can detect support for chunked context

2025-06-19 15:52:08 +00:00 · 2024-07-03 21:38:17 +00:00 · 2024-07-03 21:38:17 +00:00 · 29c7cb36e5
commit 29c7cb36e5
parent f57f2a4521
1 changed files with 4 additions and 0 deletions
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@ -16,6 +16,10 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co
            tle::OrchestratorConfig(true, workerPath)
    ));

+
+    // TODO : Need to check for >= sm_80 (ampere)
+    // execConfig.setEnableChunkedContext(true)
+    execConfig.setKvCacheConfig(tle::KvCacheConfig(true));
    return execConfig;
 }