mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-27 13:02:12 +00:00
Truncating left for radix purposes.
This commit is contained in:
parent
0a60973166
commit
e6ee67f301
@ -167,7 +167,8 @@ impl Validation {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let input_ids = encoding.get_ids()[..input_length].to_owned();
|
let ids = encoding.get_ids();
|
||||||
|
let input_ids = ids[ids.len().saturating_sub(input_length)..].to_owned();
|
||||||
|
|
||||||
metrics::histogram!("tgi_request_input_length").record(input_length as f64);
|
metrics::histogram!("tgi_request_input_length").record(input_length as f64);
|
||||||
Ok((inputs, Some(input_ids), input_length, max_new_tokens))
|
Ok((inputs, Some(input_ids), input_length, max_new_tokens))
|
||||||
|
@ -272,6 +272,9 @@ class FlashCausalLMBatch(Batch):
|
|||||||
|
|
||||||
prefix_len = r.prefix_len
|
prefix_len = r.prefix_len
|
||||||
assert prefix_len <= orig_input_length
|
assert prefix_len <= orig_input_length
|
||||||
|
if prefix_len == orig_input_length:
|
||||||
|
assert prefix_len > 0
|
||||||
|
prefix_len -= 1
|
||||||
|
|
||||||
prefix_ids.append(tokenized_input[:prefix_len])
|
prefix_ids.append(tokenized_input[:prefix_len])
|
||||||
tokenized_input = tokenized_input[prefix_len:]
|
tokenized_input = tokenized_input[prefix_len:]
|
||||||
|
Loading…
Reference in New Issue
Block a user