mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 03:14:53 +00:00
fix(generator): emulate greedy in sampling parameters
When on-device sampling is enabled, we need to emulate the greedy behaviour using top-k=1, top-p=1, temperature=1.
This commit is contained in:
parent
b916076c72
commit
4e8ffec8ef
@ -176,6 +176,12 @@ class Slot:
|
|||||||
self._generation_config.top_p = request.parameters.top_p
|
self._generation_config.top_p = request.parameters.top_p
|
||||||
if request.parameters.typical_p != 0:
|
if request.parameters.typical_p != 0:
|
||||||
self._generation_config.typical_p = request.parameters.typical_p
|
self._generation_config.typical_p = request.parameters.typical_p
|
||||||
|
else:
|
||||||
|
# Set the sampling parameters to emulate greedy decoding when using on-device sampling
|
||||||
|
self._generation_config.temperature = 1.0
|
||||||
|
self._generation_config.top_k = 1
|
||||||
|
self._generation_config.top_p = 1.0
|
||||||
|
self._generation_config.typical_p = 1.0
|
||||||
if request.parameters.repetition_penalty != 0:
|
if request.parameters.repetition_penalty != 0:
|
||||||
self._generation_config.repetition_penalty = (
|
self._generation_config.repetition_penalty = (
|
||||||
request.parameters.repetition_penalty
|
request.parameters.repetition_penalty
|
||||||
|
Loading…
Reference in New Issue
Block a user