Remove special, it's not correct enough (and not necessarily useful).

This commit is contained in:
Nicolas Patry 2024-01-23 15:04:17 +01:00
parent c12ff38974
commit 048bc5b4b7
2 changed files with 5 additions and 5 deletions

View File

@ -696,13 +696,13 @@ async fn tokenize(
.get_ids() .get_ids()
.iter() .iter()
.zip(encoding.get_offsets()) .zip(encoding.get_offsets())
.map(|(&id, (start, stop))| { .map(|(&id, &(start, stop))| {
let text: String = input.chars().skip(*start).take(stop - start).collect(); let text: String = input.chars().skip(start).take(stop - start).collect();
SimpleToken { SimpleToken {
id, id,
text, text,
start: *start, start,
stop: *stop, stop,
} }
}) })
.collect(); .collect();

View File

@ -365,7 +365,7 @@ fn prepare_input(
) -> Result<(tokenizers::Encoding, String), ValidationError> { ) -> Result<(tokenizers::Encoding, String), ValidationError> {
// Get the number of tokens in the input // Get the number of tokens in the input
let mut encoding = tokenizer let mut encoding = tokenizer
.encode(inputs.clone(), true) .encode(inputs.clone(), false)
.map_err(|err| ValidationError::Tokenizer(err.to_string()))?; .map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
// Optionally truncate // Optionally truncate