mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Less spammy logs too.
This commit is contained in:
parent
a495ee5342
commit
5208d3f93e
@ -311,7 +311,7 @@ impl State {
|
|||||||
+ entry.request.stopping_parameters.max_new_tokens
|
+ entry.request.stopping_parameters.max_new_tokens
|
||||||
+ self.speculate
|
+ self.speculate
|
||||||
- 1;
|
- 1;
|
||||||
tracing::debug!("Allocating {tokens} with {input_ids:?}");
|
// tracing::debug!("Allocating {tokens} with {input_ids:?}");
|
||||||
|
|
||||||
let block_allocation = match block_allocator.allocate(tokens, input_ids).await {
|
let block_allocation = match block_allocator.allocate(tokens, input_ids).await {
|
||||||
None => {
|
None => {
|
||||||
@ -322,7 +322,7 @@ impl State {
|
|||||||
break 'entry_loop;
|
break 'entry_loop;
|
||||||
}
|
}
|
||||||
Some(mut block_allocation) => {
|
Some(mut block_allocation) => {
|
||||||
tracing::debug!("Allocation: {block_allocation:?}");
|
// tracing::debug!("Allocation: {block_allocation:?}");
|
||||||
max_blocks = max(max_blocks, block_allocation.blocks.len() as u32);
|
max_blocks = max(max_blocks, block_allocation.blocks.len() as u32);
|
||||||
|
|
||||||
if block_allocation.prefix_len == entry.request.input_length {
|
if block_allocation.prefix_len == entry.request.input_length {
|
||||||
|
Loading…
Reference in New Issue
Block a user