mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Remove pdb comments.
This commit is contained in:
parent
79f9afba90
commit
d99f281050
@ -825,8 +825,6 @@ class FlashCausalLM(Model):
|
||||
next_token_logits = out
|
||||
|
||||
|
||||
# if next_token_logits.shape[0] == 3:
|
||||
# import ipdb;ipdb.set_trace()
|
||||
from text_generation_server.models import SPECULATE
|
||||
next_input_ids, next_token_logprobs, logprobs, accepted_ids, speculative_ids = batch.next_token_chooser(
|
||||
batch.all_input_ids_tensor[:, : batch.max_seqlen], next_token_logits, SPECULATE, batch.speculative_ids, speculative_logits
|
||||
@ -850,7 +848,6 @@ class FlashCausalLM(Model):
|
||||
batch.cu_seqlen_prefill = None
|
||||
else:
|
||||
prefill_logprobs = None
|
||||
# import ipdb;ipdb.set_trace()
|
||||
next_position_ids = batch.position_ids
|
||||
|
||||
# Cumulative length
|
||||
@ -912,15 +909,6 @@ class FlashCausalLM(Model):
|
||||
cumulative_length += input_length
|
||||
|
||||
|
||||
# if accepted_ids[0] > 1:
|
||||
# import ipdb;ipdb.set_trace()
|
||||
|
||||
# if len(accepted_ids) > 1:
|
||||
# raise Exception("Implemtent the batched behavior")
|
||||
|
||||
# Set values in batch
|
||||
# batch.input_ids = torch.cat([next_input_ids.unsqueeze(-1), speculative_ids], dim=1).view(-1)
|
||||
|
||||
batch.input_ids = next_input_ids[accepted_ids.cumsum(dim=-1) - 1]
|
||||
batch.speculative_ids = speculative_ids
|
||||
batch.position_ids = next_position_ids + accepted_ids
|
||||
|
@ -308,8 +308,6 @@ class HeterogeneousNextTokenChooser:
|
||||
break
|
||||
accepted_ids.append(accepted)
|
||||
|
||||
from loguru import logger
|
||||
logger.info(f"ACCEPTED IDS {accepted_ids}")
|
||||
accepted_ids = torch.tensor(accepted_ids, device=input_ids.device, dtype=input_ids.dtype)
|
||||
next_ids = next_ids[indices]
|
||||
scores = scores[indices]
|
||||
|
Loading…
Reference in New Issue
Block a user