mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Tmp dump (sending real image for real memory offset to be computed.
This commit is contained in:
parent
274b68ad7d
commit
215030ac88
@ -112,10 +112,12 @@ impl Client {
|
||||
// Create requests
|
||||
while n_tokens < max_prefill_tokens {
|
||||
let truncate = min(max_input_length, max_prefill_tokens - n_tokens);
|
||||
let mut inputs = "_test ".to_string().repeat(max_input_length as usize);
|
||||
inputs.push_str(";
|
||||
requests.push(Request {
|
||||
id: 0,
|
||||
// We truncate the input on the server side to be sure that it has the correct size
|
||||
inputs: "_test ".to_string().repeat(max_input_length as usize),
|
||||
inputs,
|
||||
truncate,
|
||||
// Set sampling parameters to also take these ops into account in the max memory
|
||||
parameters: Some(NextTokenChooserParameters {
|
||||
|
Loading…
Reference in New Issue
Block a user