From 41819d70f7b8306d20f4128e33d3408df48ee728 Mon Sep 17 00:00:00 2001 From: Tzu-Yu Lee Date: Sun, 18 May 2025 18:22:02 +0800 Subject: [PATCH] fix(trtllm): fix do_sample being ignored Currently, the do_sample option is ignored and the executor will always sample. Set top_k to 1 if do_sample is false. --- backends/trtllm/src/looper.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backends/trtllm/src/looper.rs b/backends/trtllm/src/looper.rs index 17030b21..a4b70ea9 100644 --- a/backends/trtllm/src/looper.rs +++ b/backends/trtllm/src/looper.rs @@ -98,12 +98,17 @@ fn executor_status_looper( let generation_params = &request.parameters; let stopping_params = &request.stopping_parameters; let input_ids = request.input_ids.as_deref(); + let top_k = if generation_params.do_sample { + generation_params.top_k + } else { + 1 + }; // Submit to the TensorRT-LLM executor for scheduling match backend.pin_mut().submit( &input_ids.unwrap(), // This is checked beforehand in validate() stopping_params.max_new_tokens, - generation_params.top_k, + top_k, generation_params.top_p, generation_params.temperature, generation_params.repetition_penalty,