This commit is contained in:
Nicolas Patry 2025-04-09 15:22:49 +02:00
parent 0b28aabb94
commit 0eb4bdc909
No known key found for this signature in database
GPG Key ID: 87B37D879D09DEB4

View File

@ -74,11 +74,8 @@ fn encoding_to_tokens(encoding: &tokenizers::Encoding, input: &str) -> Vec<Simpl
.iter() .iter()
.zip(offsets) .zip(offsets)
.map(|(&id, &(start, stop))| { .map(|(&id, &(start, stop))| {
let text = input let text: Vec<u8> = input.bytes().skip(start).take(stop - start).collect();
.chars() let text: String = String::from_utf8_lossy(&text).to_string();
.skip(start)
.take(stop - start)
.collect::<String>();
SimpleToken { SimpleToken {
id, id,
text, text,