This commit is contained in:
Nicolas Patry 2025-04-09 15:22:49 +02:00
parent 0b28aabb94
commit 0eb4bdc909
No known key found for this signature in database
GPG Key ID: 87B37D879D09DEB4

View File

@ -74,11 +74,8 @@ fn encoding_to_tokens(encoding: &tokenizers::Encoding, input: &str) -> Vec<Simpl
.iter()
.zip(offsets)
.map(|(&id, &(start, stop))| {
let text = input
.chars()
.skip(start)
.take(stop - start)
.collect::<String>();
let text: Vec<u8> = input.bytes().skip(start).take(stop - start).collect();
let text: String = String::from_utf8_lossy(&text).to_string();
SimpleToken {
id,
text,