mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-07-27 10:20:17 +00:00
fix(trtllm): fix do_sample being ignored
Currently, the do_sample option is ignored and the executor will always sample. Set top_k to 1 if do_sample is false.
This commit is contained in:
parent
56dd0a09e6
commit
41819d70f7
@ -98,12 +98,17 @@ fn executor_status_looper(
|
||||
let generation_params = &request.parameters;
|
||||
let stopping_params = &request.stopping_parameters;
|
||||
let input_ids = request.input_ids.as_deref();
|
||||
let top_k = if generation_params.do_sample {
|
||||
generation_params.top_k
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
// Submit to the TensorRT-LLM executor for scheduling
|
||||
match backend.pin_mut().submit(
|
||||
&input_ids.unwrap(), // This is checked beforehand in validate()
|
||||
stopping_params.max_new_tokens,
|
||||
generation_params.top_k,
|
||||
top_k,
|
||||
generation_params.top_p,
|
||||
generation_params.temperature,
|
||||
generation_params.repetition_penalty,
|
||||
|
Loading…
Reference in New Issue
Block a user