[mxfp8 moe training] update benchmarks to force load balancing (#3193)

danielvegamyhre · web-flow · commit 4b79f9ec5d69 · 2025-10-16T17:11:17.000-07:00
diff --git a/benchmarks/prototype/moe_training/bench_moe_layer.py b/benchmarks/prototype/moe_training/bench_moe_layer.py
@@ -23,10 +23,10 @@
 
 # this benchmark requires torchtitan
 try:
-    from torchtitan.distributed.expert_parallel import (
+    from torchtitan.models.moe import MoE, MoEArgs
+    from torchtitan.models.moe.utils import (
         set_token_group_alignment_size_m,
     )
-    from torchtitan.models.moe import MoE, MoEArgs
 except ImportError:
     logging.warning(
         "please pip install torchtitan to run this benchmark: https://github.com/pytorch/torchtitan"
@@ -77,6 +77,8 @@ def bench_moe_training_fsdp(args: argparse.Namespace):
     target_fqns = ["experts"]
     model_args = MoEArgs(
         num_experts=local_num_experts,
+        num_shared_experts=1,
+        _debug_force_load_balance=True,
     )
     init_std = 0.02
     device = torch.device("cuda")
diff --git a/benchmarks/prototype/moe_training/benchmark_moe_layer_fsdp.py b/benchmarks/prototype/moe_training/benchmark_moe_layer_fsdp.py
@@ -32,10 +32,10 @@
 
 # this benchmark requires torchtitan
 try:
-    from torchtitan.distributed.expert_parallel import (
+    from torchtitan.models.moe import MoE, MoEArgs
+    from torchtitan.models.moe.utils import (
         set_token_group_alignment_size_m,
     )
-    from torchtitan.models.moe import MoE, MoEArgs
 except ImportError:
     pytest.skip(
         "torchtitan not installed, skipping MoE tests.", allow_module_level=True
@@ -71,6 +71,8 @@ def bench_moe_training_fsdp(recipe_name: str, enable_profile: bool, use_compile:
     target_fqns = ["experts"]
     model_args = MoEArgs(
         num_experts=16,
+        num_shared_experts=1,
+        _debug_force_load_balance=True,
     )
     init_std = 0.02
     device = torch.device("cuda")