From 1f7b5759ab05d83fd4dcb797cecb3fa599e4ffa8 Mon Sep 17 00:00:00 2001 From: Sergey Klevtsov <141879860+sklevtsov-nvidia@users.noreply.github.com> Date: Tue, 7 Oct 2025 14:44:54 -0700 Subject: [PATCH] [https://nvbugs/5488576][fix] Propagate disable_finalize_fusion config flag in WIDEEP MoE backend (#8141) Signed-off-by: Sergey Klevtsov --- tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py b/tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py index afefde4f979..c6a40dd6611 100755 --- a/tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py +++ b/tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py @@ -222,6 +222,8 @@ def __init__( f"Not available alltoall method type: {self.alltoall_method_type!r}" ) + self.use_fused_finalize = not model_config.moe_disable_finalize_fusion + self._weights_created = False if not model_config.skip_create_weights_in_init: self.create_weights() @@ -689,7 +691,7 @@ def forward_chunk( input_sf=x_sf, swizzled_input_sf=False, min_latency_mode=False, - use_fused_finalize=True, + use_fused_finalize=self.use_fused_finalize, tuner_num_tokens=tuner_num_tokens, tuner_top_k=tuner_top_k, )