mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Tmp dump (sending real image for real memory offset to be computed.
This commit is contained in:
parent
274b68ad7d
commit
215030ac88
@ -112,10 +112,12 @@ impl Client {
|
|||||||
// Create requests
|
// Create requests
|
||||||
while n_tokens < max_prefill_tokens {
|
while n_tokens < max_prefill_tokens {
|
||||||
let truncate = min(max_input_length, max_prefill_tokens - n_tokens);
|
let truncate = min(max_input_length, max_prefill_tokens - n_tokens);
|
||||||
|
let mut inputs = "_test ".to_string().repeat(max_input_length as usize);
|
||||||
|
inputs.push_str(";
|
||||||
requests.push(Request {
|
requests.push(Request {
|
||||||
id: 0,
|
id: 0,
|
||||||
// We truncate the input on the server side to be sure that it has the correct size
|
// We truncate the input on the server side to be sure that it has the correct size
|
||||||
inputs: "_test ".to_string().repeat(max_input_length as usize),
|
inputs,
|
||||||
truncate,
|
truncate,
|
||||||
// Set sampling parameters to also take these ops into account in the max memory
|
// Set sampling parameters to also take these ops into account in the max memory
|
||||||
parameters: Some(NextTokenChooserParameters {
|
parameters: Some(NextTokenChooserParameters {
|
||||||
|
Loading…
Reference in New Issue
Block a user