mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 19:34:53 +00:00
fmt
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
b5e1ae9209
commit
becf36f5e4
@ -16,6 +16,7 @@ use text_generation_router::validation::{
|
|||||||
use tokio::sync::{mpsc, oneshot};
|
use tokio::sync::{mpsc, oneshot};
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use tracing::{info_span, instrument, Instrument, Span};
|
use tracing::{info_span, instrument, Instrument, Span};
|
||||||
|
|
||||||
/// Queue entry
|
/// Queue entry
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) struct Entry {
|
pub(crate) struct Entry {
|
||||||
@ -372,8 +373,8 @@ impl State {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//HPU padding for the prefill
|
|
||||||
if self.is_hpu_device {
|
if self.is_hpu_device {
|
||||||
|
//HPU needs to pad for the prefill
|
||||||
max_input_length = max_input_length.max(entry.request.input_length);
|
max_input_length = max_input_length.max(entry.request.input_length);
|
||||||
let actual_prefill_tokens_for_hpu =
|
let actual_prefill_tokens_for_hpu =
|
||||||
(batch.len() + 1) as u32 * max_input_length;
|
(batch.len() + 1) as u32 * max_input_length;
|
||||||
|
Loading…
Reference in New Issue
Block a user