From d701f9e86640e4f7f21860bc2a9963cee50921ba Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 9 Dec 2024 10:48:20 +0100 Subject: [PATCH] Adding small comment for source of calculation. --- launcher/src/main.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 9de2e4e5..32adcd01 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -295,6 +295,8 @@ impl Config { // TODO This calculation depends on the actual implementation let dtype_size = 2; let mlp_size = self.intermediate_size?; + // calculation is overshooting here. + // Coming from here: https://github.com/vllm-project/vllm/blob/d1c2e15eb31ef12e688ce0cb71895f88eaf4cd4f/vllm/model_executor/layers/fused_moe/fused_moe.py#L618-L624 Some((mlp_size + mlp_size / 2) * self.num_experts * dtype_size * 3) }