We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e86d6db commit c7b06b1Copy full SHA for c7b06b1
tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py
@@ -222,6 +222,8 @@ def __init__(
222
f"Not available alltoall method type: {self.alltoall_method_type!r}"
223
)
224
225
+ self.use_fused_finalize = not model_config.moe_disable_finalize_fusion
226
+
227
self._weights_created = False
228
if not model_config.skip_create_weights_in_init:
229
self.create_weights()
@@ -689,7 +691,7 @@ def forward_chunk(
689
691
input_sf=x_sf,
690
692
swizzled_input_sf=False,
693
min_latency_mode=False,
- use_fused_finalize=True,
694
+ use_fused_finalize=self.use_fused_finalize,
695
tuner_num_tokens=tuner_num_tokens,
696
tuner_top_k=tuner_top_k,
697
0 commit comments