Remove special, it's not correct enough (and not necessarily useful).

This commit is contained in:
Nicolas Patry 2024-01-23 15:04:17 +01:00
parent c12ff38974
commit 048bc5b4b7
2 changed files with 5 additions and 5 deletions

View File

@ -696,13 +696,13 @@ async fn tokenize(
.get_ids()
.iter()
.zip(encoding.get_offsets())
.map(|(&id, (start, stop))| {
let text: String = input.chars().skip(*start).take(stop - start).collect();
.map(|(&id, &(start, stop))| {
let text: String = input.chars().skip(start).take(stop - start).collect();
SimpleToken {
id,
text,
start: *start,
stop: *stop,
start,
stop,
}
})
.collect();

View File

@ -365,7 +365,7 @@ fn prepare_input(
) -> Result<(tokenizers::Encoding, String), ValidationError> {
// Get the number of tokens in the input
let mut encoding = tokenizer
.encode(inputs.clone(), true)
.encode(inputs.clone(), false)
.map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
// Optionally truncate