mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Remove special, it's not correct enough (and not necessarily useful).
This commit is contained in:
parent
c12ff38974
commit
048bc5b4b7
@ -696,13 +696,13 @@ async fn tokenize(
|
|||||||
.get_ids()
|
.get_ids()
|
||||||
.iter()
|
.iter()
|
||||||
.zip(encoding.get_offsets())
|
.zip(encoding.get_offsets())
|
||||||
.map(|(&id, (start, stop))| {
|
.map(|(&id, &(start, stop))| {
|
||||||
let text: String = input.chars().skip(*start).take(stop - start).collect();
|
let text: String = input.chars().skip(start).take(stop - start).collect();
|
||||||
SimpleToken {
|
SimpleToken {
|
||||||
id,
|
id,
|
||||||
text,
|
text,
|
||||||
start: *start,
|
start,
|
||||||
stop: *stop,
|
stop,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -365,7 +365,7 @@ fn prepare_input(
|
|||||||
) -> Result<(tokenizers::Encoding, String), ValidationError> {
|
) -> Result<(tokenizers::Encoding, String), ValidationError> {
|
||||||
// Get the number of tokens in the input
|
// Get the number of tokens in the input
|
||||||
let mut encoding = tokenizer
|
let mut encoding = tokenizer
|
||||||
.encode(inputs.clone(), true)
|
.encode(inputs.clone(), false)
|
||||||
.map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
|
.map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
|
||||||
|
|
||||||
// Optionally truncate
|
// Optionally truncate
|
||||||
|
Loading…
Reference in New Issue
Block a user