mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Remove debug info
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
7f346a88e3
commit
eed58b77c3
@ -229,21 +229,8 @@ class Qwen3MoE(nn.Module):
|
|||||||
self.process_group = weights.process_group
|
self.process_group = weights.process_group
|
||||||
|
|
||||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||||
# router_logits: (num_tokens, n_experts)
|
|
||||||
router_logits = self.gate(x)
|
router_logits = self.gate(x)
|
||||||
# synchronize(x.device)
|
|
||||||
# real_free_memory = get_free_memory(x.device, 1)
|
|
||||||
# log_master(
|
|
||||||
# logger.debug,
|
|
||||||
# f"moe forward 1Free memory real: {real_free_memory / 1e9:.2f}GB"
|
|
||||||
# )
|
|
||||||
out = self.moe(x, gating_output=router_logits)
|
out = self.moe(x, gating_output=router_logits)
|
||||||
# synchronize(x.device)
|
|
||||||
# real_free_memory = get_free_memory(x.device, 1)
|
|
||||||
# log_master(
|
|
||||||
# logger.debug,
|
|
||||||
# f"moe forward 2 Free memory real: {real_free_memory / 1e9:.2f}GB"
|
|
||||||
# )
|
|
||||||
|
|
||||||
# Reduce sum
|
# Reduce sum
|
||||||
if self.process_group.size() > 1:
|
if self.process_group.size() > 1:
|
||||||
|
@ -1412,7 +1412,6 @@ class FlashCausalLM(Model):
|
|||||||
aliases=aliases,
|
aliases=aliases,
|
||||||
weights_loader=weights_loader,
|
weights_loader=weights_loader,
|
||||||
)
|
)
|
||||||
print(f"weights: {weights}")
|
|
||||||
|
|
||||||
prefix = None
|
prefix = None
|
||||||
model = model_class(prefix, config, weights)
|
model = model_class(prefix, config, weights)
|
||||||
|
Loading…
Reference in New Issue
Block a user