mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-15 13:52:06 +00:00
fix(generator): emulate greedy in sampling parameters
When on-device sampling is enabled, we need to emulate the greedy behaviour using top-k=1, top-p=1, temperature=1.
This commit is contained in:
parent
b916076c72
commit
4e8ffec8ef
@ -176,6 +176,12 @@ class Slot:
|
||||
self._generation_config.top_p = request.parameters.top_p
|
||||
if request.parameters.typical_p != 0:
|
||||
self._generation_config.typical_p = request.parameters.typical_p
|
||||
else:
|
||||
# Set the sampling parameters to emulate greedy decoding when using on-device sampling
|
||||
self._generation_config.temperature = 1.0
|
||||
self._generation_config.top_k = 1
|
||||
self._generation_config.top_p = 1.0
|
||||
self._generation_config.typical_p = 1.0
|
||||
if request.parameters.repetition_penalty != 0:
|
||||
self._generation_config.repetition_penalty = (
|
||||
request.parameters.repetition_penalty
|
||||
|
Loading…
Reference in New Issue
Block a user