mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-27 21:12:07 +00:00
Truncating left for radix purposes.
This commit is contained in:
parent
0a60973166
commit
e6ee67f301
@ -167,7 +167,8 @@ impl Validation {
|
||||
));
|
||||
}
|
||||
|
||||
let input_ids = encoding.get_ids()[..input_length].to_owned();
|
||||
let ids = encoding.get_ids();
|
||||
let input_ids = ids[ids.len().saturating_sub(input_length)..].to_owned();
|
||||
|
||||
metrics::histogram!("tgi_request_input_length").record(input_length as f64);
|
||||
Ok((inputs, Some(input_ids), input_length, max_new_tokens))
|
||||
|
@ -272,6 +272,9 @@ class FlashCausalLMBatch(Batch):
|
||||
|
||||
prefix_len = r.prefix_len
|
||||
assert prefix_len <= orig_input_length
|
||||
if prefix_len == orig_input_length:
|
||||
assert prefix_len > 0
|
||||
prefix_len -= 1
|
||||
|
||||
prefix_ids.append(tokenized_input[:prefix_len])
|
||||
tokenized_input = tokenized_input[prefix_len:]
|
||||
|
Loading…
Reference in New Issue
Block a user