mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Remove pdb comments.
This commit is contained in:
parent
79f9afba90
commit
d99f281050
@ -825,8 +825,6 @@ class FlashCausalLM(Model):
|
|||||||
next_token_logits = out
|
next_token_logits = out
|
||||||
|
|
||||||
|
|
||||||
# if next_token_logits.shape[0] == 3:
|
|
||||||
# import ipdb;ipdb.set_trace()
|
|
||||||
from text_generation_server.models import SPECULATE
|
from text_generation_server.models import SPECULATE
|
||||||
next_input_ids, next_token_logprobs, logprobs, accepted_ids, speculative_ids = batch.next_token_chooser(
|
next_input_ids, next_token_logprobs, logprobs, accepted_ids, speculative_ids = batch.next_token_chooser(
|
||||||
batch.all_input_ids_tensor[:, : batch.max_seqlen], next_token_logits, SPECULATE, batch.speculative_ids, speculative_logits
|
batch.all_input_ids_tensor[:, : batch.max_seqlen], next_token_logits, SPECULATE, batch.speculative_ids, speculative_logits
|
||||||
@ -850,7 +848,6 @@ class FlashCausalLM(Model):
|
|||||||
batch.cu_seqlen_prefill = None
|
batch.cu_seqlen_prefill = None
|
||||||
else:
|
else:
|
||||||
prefill_logprobs = None
|
prefill_logprobs = None
|
||||||
# import ipdb;ipdb.set_trace()
|
|
||||||
next_position_ids = batch.position_ids
|
next_position_ids = batch.position_ids
|
||||||
|
|
||||||
# Cumulative length
|
# Cumulative length
|
||||||
@ -912,15 +909,6 @@ class FlashCausalLM(Model):
|
|||||||
cumulative_length += input_length
|
cumulative_length += input_length
|
||||||
|
|
||||||
|
|
||||||
# if accepted_ids[0] > 1:
|
|
||||||
# import ipdb;ipdb.set_trace()
|
|
||||||
|
|
||||||
# if len(accepted_ids) > 1:
|
|
||||||
# raise Exception("Implemtent the batched behavior")
|
|
||||||
|
|
||||||
# Set values in batch
|
|
||||||
# batch.input_ids = torch.cat([next_input_ids.unsqueeze(-1), speculative_ids], dim=1).view(-1)
|
|
||||||
|
|
||||||
batch.input_ids = next_input_ids[accepted_ids.cumsum(dim=-1) - 1]
|
batch.input_ids = next_input_ids[accepted_ids.cumsum(dim=-1) - 1]
|
||||||
batch.speculative_ids = speculative_ids
|
batch.speculative_ids = speculative_ids
|
||||||
batch.position_ids = next_position_ids + accepted_ids
|
batch.position_ids = next_position_ids + accepted_ids
|
||||||
|
@ -308,8 +308,6 @@ class HeterogeneousNextTokenChooser:
|
|||||||
break
|
break
|
||||||
accepted_ids.append(accepted)
|
accepted_ids.append(accepted)
|
||||||
|
|
||||||
from loguru import logger
|
|
||||||
logger.info(f"ACCEPTED IDS {accepted_ids}")
|
|
||||||
accepted_ids = torch.tensor(accepted_ids, device=input_ids.device, dtype=input_ids.dtype)
|
accepted_ids = torch.tensor(accepted_ids, device=input_ids.device, dtype=input_ids.dtype)
|
||||||
next_ids = next_ids[indices]
|
next_ids = next_ids[indices]
|
||||||
scores = scores[indices]
|
scores = scores[indices]
|
||||||
|
Loading…
Reference in New Issue
Block a user