mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Adding small comment for source of calculation.
This commit is contained in:
parent
36ed43c920
commit
d701f9e866
@ -295,6 +295,8 @@ impl Config {
|
|||||||
// TODO This calculation depends on the actual implementation
|
// TODO This calculation depends on the actual implementation
|
||||||
let dtype_size = 2;
|
let dtype_size = 2;
|
||||||
let mlp_size = self.intermediate_size?;
|
let mlp_size = self.intermediate_size?;
|
||||||
|
// calculation is overshooting here.
|
||||||
|
// Coming from here: https://github.com/vllm-project/vllm/blob/d1c2e15eb31ef12e688ce0cb71895f88eaf4cd4f/vllm/model_executor/layers/fused_moe/fused_moe.py#L618-L624
|
||||||
Some((mlp_size + mlp_size / 2) * self.num_experts * dtype_size * 3)
|
Some((mlp_size + mlp_size / 2) * self.num_experts * dtype_size * 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user