Remove debug info

Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
yuanwu 2025-06-03 05:28:38 +00:00
parent 5155fef477
commit 1a5ef906ae
2 changed files with 2 additions and 20 deletions

View File

@ -30,12 +30,6 @@ from text_generation_server.layers import (
SpeculativeHead,
FastLinear,
)
from text_generation_server.utils.import_utils import (
synchronize,
get_free_memory,
)
from loguru import logger
from text_generation_server.utils.log import log_master
from text_generation_server.layers.layernorm import (
FastRMSNorm,
@ -359,9 +353,6 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
routing_weights, selected_experts = torch.topk(
routing_weights, self.top_k, dim=-1
)
print(
f"routing_weights: {routing_weights.device}, selected_experts: {selected_experts.device}"
)
if self.norm_topk_prob: # only diff with mixtral sparse moe block!
routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
# we cast back to the input dtype
@ -376,7 +367,6 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
expert_mask = torch.nn.functional.one_hot(
selected_experts, num_classes=self.num_experts
).permute(2, 1, 0)
print(f"expert_mask: {expert_mask.device}")
# Loop over all available experts in the model and perform the computation on each expert
for expert_idx in range(self.num_experts):
expert_layer = self.experts[expert_idx]
@ -432,10 +422,6 @@ class Qwen3MoeDecoderLayer(nn.Module):
SparseMoELayer if SparseMoELayer.is_supported(weights) else DenseMoELayer
)
moe_layer_cls = (
SparseMoELayer if SparseMoELayer.is_supported(weights) else DenseMoELayer
)
if (layer_idx not in config.mlp_only_layers) and (
config.num_experts > 0 and (layer_idx + 1) % config.decoder_sparse_step == 0
):
@ -516,11 +502,6 @@ class Qwen3MoeModel(nn.Module):
self.norm = FastRMSNorm.load(
prefix=f"{prefix}.norm", weights=weights, eps=config.rms_norm_eps
)
synchronize(weights.device)
real_free_memory = get_free_memory(weights.device, 1)
log_master(
logger.debug, f"init model Free memory real: {real_free_memory / 1e9:.2f}GB"
)
def forward(
self,

View File

@ -49,7 +49,8 @@ from text_generation_server.models.custom_modeling.flash_qwen2_modeling import (
# Copied from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py
from typing import Union
from transformers.feature_extraction_utils import BatchFeature
from transformers.image_utils import ImageInput, VideoInput
from transformers.image_utils import ImageInput
from transformers.video_utils import VideoInput
from transformers.processing_utils import (
ProcessingKwargs,
ProcessorMixin,