diff --git a/backends/trtllm/build.rs b/backends/trtllm/build.rs
index 9970d84f..fe60769f 100644
--- a/backends/trtllm/build.rs
+++ b/backends/trtllm/build.rs
@@ -43,8 +43,8 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf
         install_path = absolute(out_dir).expect("cannot happen").join(install_path);
     }
 
-    let _ = cmake::Config::new(".")
-        .uses_cxx11()
+    let mut config = cmake::Config::new(".");
+    config.uses_cxx11()
         .generator("Ninja")
         .profile(match is_debug {
             true => "Debug",
@@ -53,9 +53,16 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf
         .env("OPT_LEVEL", opt_level)
         .define("CMAKE_INSTALL_PREFIX", &install_path)
         .define("CMAKE_CUDA_COMPILER", "/usr/local/cuda/bin/nvcc")
+        .define("Python3_ROOT_DIR", "../venv")
         .define("TGI_TRTLLM_BACKEND_TARGET_CUDA_ARCH_LIST", cuda_arch_list)
-        .define("TGI_TRTLLM_BACKEND_TRT_ROOT", tensorrt_path)
-        .build();
+        .define("TGI_TRTLLM_BACKEND_TRT_ROOT", tensorrt_path);
+
+        // Allow to override which Python to use ...
+        if let Some(python3) = option_env!("Python3_EXECUTABLE") {
+            config.define("Python3_EXECUTABLE", python3);
+        }
+
+        config.build();
 
     // Additional transitive CMake dependencies
     let deps_folder = out_dir.join("build").join("_deps");
diff --git a/backends/trtllm/cmake/trtllm.cmake b/backends/trtllm/cmake/trtllm.cmake
index 91b7753a..78f8df85 100644
--- a/backends/trtllm/cmake/trtllm.cmake
+++ b/backends/trtllm/cmake/trtllm.cmake
@@ -21,11 +21,13 @@ else ()
     set(NVTX_DISABLE ON)
 endif ()
 
+find_package(Python3 REQUIRED Interpreter)
+
 fetchcontent_declare(
         trtllm
         GIT_REPOSITORY https://github.com/NVIDIA/TensorRT-LLM.git
-        GIT_TAG c994b697313b8d36527ba346a0c5e8e374616c95
-        GIT_SHALLOW ON
+        GIT_TAG 385626572df16175dd327fa785e4434cb7866a64
+        GIT_SHALLOW OFF
         DOWNLOAD_EXTRACT_TIMESTAMP
 )
 fetchcontent_makeavailable(trtllm)