mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
fix(server): Use cleanup_tokenization_spaces=False for lossless decoding (#13)
Fixes #12 in the easiest way I could think of.
This commit is contained in:
parent
60472f9d2b
commit
b94f30215f
@ -354,7 +354,8 @@ class CausalLM(Model):
|
|||||||
if stop:
|
if stop:
|
||||||
# Decode all tokens
|
# Decode all tokens
|
||||||
output_text = self.tokenizer.decode(
|
output_text = self.tokenizer.decode(
|
||||||
all_input_ids.squeeze(-1), skip_special_tokens=True
|
all_input_ids.squeeze(-1), skip_special_tokens=True,
|
||||||
|
cleanup_tokenization_spaces=False
|
||||||
)
|
)
|
||||||
# Slice with input_length to remove padding
|
# Slice with input_length to remove padding
|
||||||
token_ids = all_input_ids[-new_input_length:]
|
token_ids = all_input_ids[-new_input_length:]
|
||||||
|
Loading…
Reference in New Issue
Block a user