mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 07:42:06 +00:00
Remove debug info
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
7f346a88e3
commit
eed58b77c3
@ -229,21 +229,8 @@ class Qwen3MoE(nn.Module):
|
||||
self.process_group = weights.process_group
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
# router_logits: (num_tokens, n_experts)
|
||||
router_logits = self.gate(x)
|
||||
# synchronize(x.device)
|
||||
# real_free_memory = get_free_memory(x.device, 1)
|
||||
# log_master(
|
||||
# logger.debug,
|
||||
# f"moe forward 1Free memory real: {real_free_memory / 1e9:.2f}GB"
|
||||
# )
|
||||
out = self.moe(x, gating_output=router_logits)
|
||||
# synchronize(x.device)
|
||||
# real_free_memory = get_free_memory(x.device, 1)
|
||||
# log_master(
|
||||
# logger.debug,
|
||||
# f"moe forward 2 Free memory real: {real_free_memory / 1e9:.2f}GB"
|
||||
# )
|
||||
|
||||
# Reduce sum
|
||||
if self.process_group.size() > 1:
|
||||
|
@ -1412,7 +1412,6 @@ class FlashCausalLM(Model):
|
||||
aliases=aliases,
|
||||
weights_loader=weights_loader,
|
||||
)
|
||||
print(f"weights: {weights}")
|
||||
|
||||
prefix = None
|
||||
model = model_class(prefix, config, weights)
|
||||
|
Loading…
Reference in New Issue
Block a user