From 29c7cb36e5abd9b1fcacf47dcaa58a0ae9c0bb60 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 3 Jul 2024 21:38:17 +0000 Subject: [PATCH] Remembering to check how we can detect support for chunked context --- backends/trtllm/lib/backend.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backends/trtllm/lib/backend.cpp b/backends/trtllm/lib/backend.cpp index e7a5b969..fc5d4594 100644 --- a/backends/trtllm/lib/backend.cpp +++ b/backends/trtllm/lib/backend.cpp @@ -16,6 +16,10 @@ tle::ExecutorConfig huggingface::tgi::backends::GetExecutorConfig(const json &co tle::OrchestratorConfig(true, workerPath) )); + + // TODO : Need to check for >= sm_80 (ampere) + // execConfig.setEnableChunkedContext(true) + execConfig.setKvCacheConfig(tle::KvCacheConfig(true)); return execConfig; }