mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-07-29 19:30:16 +00:00
fix(trtllm): fix do_sample being ignored
Currently, the do_sample option is ignored and the executor will always sample. Set top_k to 1 if do_sample is false.
This commit is contained in:
parent
56dd0a09e6
commit
41819d70f7
@ -98,12 +98,17 @@ fn executor_status_looper(
|
|||||||
let generation_params = &request.parameters;
|
let generation_params = &request.parameters;
|
||||||
let stopping_params = &request.stopping_parameters;
|
let stopping_params = &request.stopping_parameters;
|
||||||
let input_ids = request.input_ids.as_deref();
|
let input_ids = request.input_ids.as_deref();
|
||||||
|
let top_k = if generation_params.do_sample {
|
||||||
|
generation_params.top_k
|
||||||
|
} else {
|
||||||
|
1
|
||||||
|
};
|
||||||
|
|
||||||
// Submit to the TensorRT-LLM executor for scheduling
|
// Submit to the TensorRT-LLM executor for scheduling
|
||||||
match backend.pin_mut().submit(
|
match backend.pin_mut().submit(
|
||||||
&input_ids.unwrap(), // This is checked beforehand in validate()
|
&input_ids.unwrap(), // This is checked beforehand in validate()
|
||||||
stopping_params.max_new_tokens,
|
stopping_params.max_new_tokens,
|
||||||
generation_params.top_k,
|
top_k,
|
||||||
generation_params.top_p,
|
generation_params.top_p,
|
||||||
generation_params.temperature,
|
generation_params.temperature,
|
||||||
generation_params.repetition_penalty,
|
generation_params.repetition_penalty,
|
||||||
|
Loading…
Reference in New Issue
Block a user