diff --git a/fastdeploy/config.py b/fastdeploy/config.py
index 21abac469c9..e2434008bbb 100644
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -125,6 +125,7 @@ class ErnieArchitectures:
         "Ernie4_5_ForCausalLM",
         "Ernie4_5_MoeForCausalLM",
         "Ernie4_5_VLMoeForConditionalGeneration",
+        "Ernie4_5_VLMoeForProcessRewardModel",
     }
 
     @classmethod
diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py
index beee8f940aa..b329844daa9 100644
--- a/fastdeploy/model_executor/layers/linear.py
+++ b/fastdeploy/model_executor/layers/linear.py
@@ -393,6 +393,7 @@ def __init__(
         with_bias: bool = False,
         add_bias: bool = False,
         skip_quant: bool = False,
+        weight_dtype="",
     ):
         """
         Initializes a linear layer and provides additional parameters required for inference and quantization.
@@ -421,6 +422,7 @@ def __init__(
             with_bias=with_bias,
             add_bias=add_bias,
             skip_quant=skip_quant,
+            weight_dtype=weight_dtype,
         )
 
         assert self.quant_method is not None
@@ -796,6 +798,7 @@ def __init__(
         add_bias: bool = False,
         reduce_results: bool = True,
         skip_quant: bool = False,
+        weight_dtype="",
     ):
         """
         Initialize a linear layer with additional parameters for inference and quantization.
@@ -830,6 +833,7 @@ def __init__(
             with_bias=with_bias,
             add_bias=add_bias,
             skip_quant=skip_quant,
+            weight_dtype=weight_dtype,
         )
         if add_bias:
             assert with_bias, "with_bias must be True when add_bias is True."
@@ -847,12 +851,6 @@ def __init__(
             if self.with_bias:
                 # col parallel
                 _set_var_distributed(self.bias, split_axis=0)
-                set_weight_attrs(
-                    self.bias,
-                    {
-                        "output_dim": False,
-                    },
-                )
 
         self.reduce_results = reduce_results
 
diff --git a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
index 3f81bc3a5a5..3f1a9f015c8 100644
--- a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
+++ b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
@@ -548,6 +548,12 @@ def forward(
         return out
 
 
+@ModelRegistry.register_model_class(
+    architecture="Ernie4_5_VLMoeForConditionalGeneration",
+    module_name="ernie4_5_vl.ernie4_5_vl_moe",
+    category=ModelCategory.MULTIMODAL,
+    primary_use=ModelCategory.MULTIMODAL,
+)
 class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM):
     """
     Ernie4_5_VLMoeForConditionalGeneration
@@ -678,6 +684,13 @@ def load_weights(self, weights_iterator) -> None:
         expert_id = None
         shard_id = None
         for loaded_weight_name, loaded_weight in weights_iterator:
+            loaded_weight_name = (
+                self.process_weights_before_loading_fn(loaded_weight_name)
+                if getattr(self, "process_weights_before_loading_fn", None)
+                else loaded_weight_name
+            )
+            if loaded_weight_name is None:
+                continue
             for param_name, weight_name, exp_id, shard_id in all_param_mapping:
                 model_param_name = loaded_weight_name.replace(weight_name, param_name)
                 if model_param_name.startswith("model.") and self.fd_config.model_config.model_format == "torch":
@@ -792,12 +805,6 @@ def clear_grpah_opt_backend(self):
         self.ernie.clear_grpah_opt_backend(fd_config=self.fd_config)
 
 
-@ModelRegistry.register_model_class(
-    architecture="Ernie4_5_VLMoeForConditionalGeneration",
-    module_name="ernie4_5_vl.ernie4_5_vl_moe",
-    category=ModelCategory.MULTIMODAL,
-    primary_use=ModelCategory.MULTIMODAL,
-)
 class Ernie4_5_VLPretrainedModel(PretrainedModel):
     """
     Ernie4_5_MoePretrainedModel
diff --git a/fastdeploy/model_executor/models/ernie_vl_rm.py b/fastdeploy/model_executor/models/ernie_vl_rm.py
new file mode 100644
index 00000000000..86cddcb42c2
--- /dev/null
+++ b/fastdeploy/model_executor/models/ernie_vl_rm.py
@@ -0,0 +1,158 @@
+"""
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+import paddle
+from paddle import nn
+
+from fastdeploy.config import FDConfig
+from fastdeploy.model_executor.forward_meta import ForwardMeta
+from fastdeploy.model_executor.layers.activation import SiluAndMul
+from fastdeploy.model_executor.layers.linear import (
+    MergedColumnParallelLinear,
+    RowParallelLinear,
+)
+from fastdeploy.model_executor.layers.pooler import DispatchPooler, Pooler
+from fastdeploy.model_executor.utils import process_weights_before_loading
+
+from .ernie4_5_vl.ernie4_5_vl_moe import (
+    Ernie4_5_VLModel,
+    Ernie4_5_VLMoeForConditionalGeneration,
+)
+from .interfaces_base import default_pooling_type
+from .model_base import ModelCategory, ModelRegistry
+
+
+class Ernie4_5_VLMoeRewardBaseModel(nn.Layer):
+    """
+    Ernie4_5_VLMoeRewardBaseModel
+    """
+
+    is_pooling_model = True
+    pooler: Pooler
+
+    def __init__(self, fd_config: FDConfig):
+        super().__init__()
+        # ----------- vision model ------------
+        self.vision_model = Ernie4_5_VLMoeForConditionalGeneration._init_vision_model(self, fd_config.model_config)
+        # -----------  resampler_model ------------
+        self.resampler_model = Ernie4_5_VLMoeForConditionalGeneration._init_resampler_model_model(
+            self, fd_config.model_config
+        )
+        self.ernie = Ernie4_5_VLModel(fd_config=fd_config)
+        self.head_dtype = paddle.bfloat16
+
+        # Persistent buffers for CUDA graphs.
+        self._input_embeddings = paddle.zeros(
+            [fd_config.parallel_config.max_model_len, fd_config.model_config.hidden_size],
+            dtype=fd_config.model_config.dtype,
+        )
+
+        self.rm_head = nn.Sequential(
+            (
+                "up_gate_proj",
+                MergedColumnParallelLinear(
+                    fd_config=fd_config,
+                    prefix="",
+                    input_size=fd_config.model_config.hidden_size,
+                    output_size=fd_config.model_config.hidden_size * 2,
+                    with_bias=False,
+                ),
+            ),
+            ("act_fn", SiluAndMul(fd_config=fd_config, bias=None, act_method=fd_config.model_config.hidden_act)),
+            (
+                "down_proj",
+                RowParallelLinear(
+                    fd_config=fd_config,
+                    input_size=fd_config.model_config.hidden_size,
+                    output_size=fd_config.model_config.num_labels,
+                    skip_quant=True,
+                    weight_dtype=self.head_dtype,
+                    with_bias=False,
+                ),
+            ),
+        )
+
+    def get_input_embeddings(
+        self,
+        ids_remove_padding: paddle.Tensor,
+        image_token_num: int,
+        image_features: Optional[paddle.Tensor] = None,
+    ) -> paddle.Tensor:
+        input_embeddings = self.ernie.get_input_embeddings(ids_remove_padding=ids_remove_padding)
+        if image_token_num > 0:
+            input_embeddings[ids_remove_padding == self.ernie.im_patch_id] = image_features.cast(self.ernie._dtype)
+        return input_embeddings
+
+    def forward(
+        self,
+        ids_remove_padding: paddle.Tensor,
+        image_features: Optional[paddle.Tensor],
+        forward_meta: ForwardMeta,
+    ):
+        vl_moe_meta = self.ernie.prepare_vl_moe_meta(ids_remove_padding=ids_remove_padding)
+        input_embeddings = self.get_input_embeddings(
+            ids_remove_padding=ids_remove_padding,
+            image_features=image_features,
+            image_token_num=vl_moe_meta.image_token_num.item(),
+        )
+        self._input_embeddings.copy_(input_embeddings, False)
+
+        hidden_states = self.ernie(
+            input_embeddings=self._input_embeddings,
+            ids_remove_padding=ids_remove_padding,
+            forward_meta=forward_meta,
+            vl_moe_meta=vl_moe_meta,
+        )
+        hidden_states = hidden_states.to(self.head_dtype)
+        logits = self.rm_head(hidden_states)
+        return logits
+
+
+@ModelRegistry.register_model_class(
+    architecture="Ernie4_5_VLMoeForProcessRewardModel",
+    module_name="ernie_vl_rm",
+    category=[ModelCategory.REWARD],
+    primary_use=ModelCategory.REWARD,
+)
+@default_pooling_type("ALL")
+class Ernie4_5_VLMoeForProcessRewardModel(Ernie4_5_VLMoeRewardBaseModel):
+
+    def __init__(self, fd_config: FDConfig):
+        self.fd_config = fd_config
+        fd_config.model_config.num_labels = 1
+        super().__init__(fd_config=fd_config)
+        self.tie_word_embeddings = False
+
+        pooler_config = fd_config.model_config.pooler_config
+        assert pooler_config is not None
+
+        self.pooler = DispatchPooler({"encode": Pooler.for_encode(pooler_config)})
+
+        self.process_weights_before_loading_fn = process_weights_before_loading(skip_prefixes=["lm_head"])
+
+    @classmethod
+    def name(self):
+        """ """
+        return "Ernie4_5_VLMoeForProcessRewardModel"
+
+    @paddle.no_grad()
+    def load_weights(self, weights_iterator):
+        # Filter out lm_head weights of Ernie4_5_VLMoeForConditionalGeneration
+        Ernie4_5_VLMoeForConditionalGeneration.load_weights(self, weights_iterator)
diff --git a/fastdeploy/model_executor/models/interfaces_base.py b/fastdeploy/model_executor/models/interfaces_base.py
index 77533209d9b..b5cea3d231d 100644
--- a/fastdeploy/model_executor/models/interfaces_base.py
+++ b/fastdeploy/model_executor/models/interfaces_base.py
@@ -48,6 +48,8 @@ def determine_model_category(class_name: str):
         return ModelCategory.MULTIMODAL
     elif any(pattern in class_name for pattern in ["Embedding", "ForSequenceClassification"]):
         return ModelCategory.EMBEDDING
+    elif any(pattern in class_name for pattern in ["Reward"]):
+        return ModelCategory.REWARD
     return ModelCategory.TEXT_GENERATION
 
 
@@ -100,3 +102,11 @@ class FdModelForPooling(FdModel[T_co], Protocol[T_co]):
     """
     pooler: Pooler
     """The pooler is only called on TP rank 0."""
+
+
+def default_pooling_type(pooling_type: str):
+    def func(model):
+        model.default_pooling_type = pooling_type  # type: ignore
+        return model
+
+    return func
diff --git a/fastdeploy/model_executor/models/model_base.py b/fastdeploy/model_executor/models/model_base.py
index fddfb4de51a..28eb6b7da0b 100644
--- a/fastdeploy/model_executor/models/model_base.py
+++ b/fastdeploy/model_executor/models/model_base.py
@@ -39,6 +39,7 @@ class ModelCategory(Enum):
     TEXT_GENERATION = "text_generation"
     MULTIMODAL = "multimodal"
     EMBEDDING = "embedding"
+    REWARD = "reward"
 
 
 @dataclass(frozen=True)
@@ -228,8 +229,7 @@ def register_model_class(
 
         def _register(model_cls):
             # Traditional registration for ModelForCasualLM subclasses
-            if issubclass(model_cls, ModelForCasualLM) and model_cls is not ModelForCasualLM:
-                cls._arch_to_model_cls[model_cls.name()] = model_cls
+            cls._arch_to_model_cls[model_cls.name()] = model_cls
 
             # Enhanced decorator-style registration
             if architecture and module_name:
diff --git a/fastdeploy/model_executor/models/qwen2.py b/fastdeploy/model_executor/models/qwen2.py
index fd51358c5b5..ec4df06002e 100644
--- a/fastdeploy/model_executor/models/qwen2.py
+++ b/fastdeploy/model_executor/models/qwen2.py
@@ -44,6 +44,12 @@
     ModelForCasualLM,
     ModelRegistry,
 )
+from fastdeploy.model_executor.utils import (
+    WeightsMapper,
+    default_weight_loader,
+    process_weights_after_loading,
+    process_weights_before_loading,
+)
 
 
 class Qwen2MLP(nn.Layer):
@@ -316,6 +322,14 @@ def __init__(self, fd_config: FDConfig):
             prefix="lm_head",
         )
 
+        self.process_weights_before_loading_fn = process_weights_before_loading(
+            mapper=(
+                WeightsMapper(orig_to_new_prefix={"model.": "qwen2."})
+                if self.fd_config.model_config.model_format == "torch"
+                else None
+            ),
+        )
+
     @paddle.no_grad()
     def load_weights(self, weights_iterator) -> None:
         """
@@ -325,11 +339,6 @@ def load_weights(self, weights_iterator) -> None:
             weights_iterator (Iterator): An iterator yielding (name, weight) pairs.
         """
 
-        from fastdeploy.model_executor.utils import (
-            default_weight_loader,
-            process_weights_after_loading,
-        )
-
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
             ("qkv_proj", "q_proj", "q"),
@@ -344,10 +353,13 @@ def load_weights(self, weights_iterator) -> None:
         params_dict = dict(self.named_parameters())
         process_weights_after_loading_fn = process_weights_after_loading(dict(self.named_sublayers()))
         for loaded_weight_name, loaded_weight in weights_iterator:
-            model_format = self.fd_config.model_config.model_format
-            # Because the prefix for Paddle is qwen2, and for Hugging Face it is model.
-            if model_format == "torch":
-                loaded_weight_name = loaded_weight_name.replace("model", "qwen2")
+            loaded_weight_name = (
+                self.process_weights_before_loading_fn(loaded_weight_name)
+                if getattr(self, "process_weights_before_loading_fn", None)
+                else loaded_weight_name
+            )
+            if loaded_weight_name is None:
+                continue
             for param_name, weight_name, shard_id in stacked_params_mapping:
                 if weight_name not in loaded_weight_name:
                     continue
diff --git a/fastdeploy/model_executor/models/qwen2_rm.py b/fastdeploy/model_executor/models/qwen2_rm.py
new file mode 100644
index 00000000000..629f65f9248
--- /dev/null
+++ b/fastdeploy/model_executor/models/qwen2_rm.py
@@ -0,0 +1,109 @@
+"""
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+from __future__ import annotations
+
+import paddle
+from paddle import nn
+
+from fastdeploy.config import FDConfig
+from fastdeploy.model_executor.forward_meta import ForwardMeta
+from fastdeploy.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    RowParallelLinear,
+)
+from fastdeploy.model_executor.layers.pooler import DispatchPooler, Pooler
+from fastdeploy.model_executor.utils import process_weights_before_loading
+
+from .interfaces_base import default_pooling_type
+from .model_base import ModelCategory, ModelRegistry
+from .qwen2 import Qwen2ForCausalLM, Qwen2Model
+
+
+class Qwen2RewardBaseModel(nn.Layer):
+    """
+    Qwen2RewardBaseModel
+    """
+
+    is_pooling_model = True
+    pooler: Pooler
+
+    def __init__(self, fd_config: FDConfig):
+        super().__init__()
+        self.model = Qwen2Model(fd_config=fd_config)
+        self.head_dtype = paddle.float32
+
+        self.score = nn.Sequential(
+            ColumnParallelLinear(
+                fd_config=fd_config,
+                input_size=fd_config.model_config.hidden_size,
+                output_size=fd_config.model_config.hidden_size,
+                skip_quant=True,
+                weight_dtype=self.head_dtype,
+                with_bias=True,
+            ),
+            nn.ReLU(),
+            RowParallelLinear(
+                fd_config=fd_config,
+                input_size=fd_config.model_config.hidden_size,
+                output_size=fd_config.model_config.num_labels,
+                skip_quant=True,
+                weight_dtype=self.head_dtype,
+                with_bias=True,
+            ),
+        )
+
+    def forward(
+        self,
+        ids_remove_padding: paddle.Tensor,
+        forward_meta: ForwardMeta,
+    ):
+        hidden_states = self.model(ids_remove_padding=ids_remove_padding, forward_meta=forward_meta)
+        hidden_states = hidden_states.to(self.head_dtype)
+        logits = self.score(hidden_states)
+        return logits
+
+
+@ModelRegistry.register_model_class(
+    architecture="Qwen2ForProcessRewardModel",
+    module_name="qwen2_rm",
+    category=[ModelCategory.REWARD],
+    primary_use=ModelCategory.REWARD,
+)
+@default_pooling_type("STEP")
+class Qwen2ForProcessRewardModel(Qwen2RewardBaseModel):
+
+    def __init__(self, fd_config: FDConfig):
+        self.fd_config = fd_config
+        fd_config.model_config.num_labels = 2
+        super().__init__(fd_config=fd_config)
+
+        pooler_config = fd_config.model_config.pooler_config
+        assert pooler_config is not None
+
+        self.pooler = DispatchPooler({"encode": Pooler.for_encode(pooler_config)})
+
+        self.process_weights_before_loading_fn = process_weights_before_loading(skip_prefixes=["lm_head"])
+
+    @classmethod
+    def name(self):
+        """ """
+        return "Qwen2ForProcessRewardModel"
+
+    @paddle.no_grad()
+    def load_weights(self, weights_iterator):
+        # Filter out lm_head weights of Qwen2ForCausalLM
+        Qwen2ForCausalLM.load_weights(self, weights_iterator)
diff --git a/fastdeploy/model_executor/utils.py b/fastdeploy/model_executor/utils.py
index 154024ca39c..15d285212b0 100644
--- a/fastdeploy/model_executor/utils.py
+++ b/fastdeploy/model_executor/utils.py
@@ -16,8 +16,10 @@
 
 import os
 import re
+from collections.abc import Mapping
 from contextlib import contextmanager
-from typing import Any, Optional, Union
+from dataclasses import dataclass, field
+from typing import Any, List, Optional, Union
 
 import paddle
 from paddleformers.utils.log import logger
@@ -150,6 +152,36 @@ def fn(model_sublayer_name: str, param=None):
     return fn
 
 
+@dataclass
+class WeightsMapper:
+    orig_to_new_prefix: Mapping[str, Optional[str]] = field(default_factory=dict)
+
+    def _map_name(self, key: str) -> Optional[str]:
+        for prefix, new_key in self.orig_to_new_prefix.items():
+            if key.startswith(prefix):
+                key = key.replace(prefix, new_key, 1)
+        return key
+
+    def apply(self, weight_name):
+        return self._map_name(weight_name)
+
+
+def process_weights_before_loading(
+    *, skip_prefixes: Optional[List[str]] = None, mapper: Optional[WeightsMapper] = None
+):
+    def _can_skip(weight_name):
+        return any(weight_name.startswith(p) for p in (skip_prefixes or []))
+
+    def fn(weight_name):
+        if mapper is not None:
+            weight_name = mapper.apply(weight_name)
+        if _can_skip(weight_name):
+            weight_name = None
+        return weight_name
+
+    return fn
+
+
 def free_tensor(tensor):
     if hasattr(tensor, "tensor_track"):
         tensor.tensor_track = None
diff --git a/fastdeploy/multimodal/registry.py b/fastdeploy/multimodal/registry.py
index f014ba55532..d827c9b8068 100644
--- a/fastdeploy/multimodal/registry.py
+++ b/fastdeploy/multimodal/registry.py
@@ -25,6 +25,7 @@ class MultimodalRegistry:
         "Ernie5MoeForCausalLM",
         "Qwen2_5_VLForConditionalGeneration",
         "Ernie5ForCausalLM",
+        "Ernie4_5_VLMoeForProcessRewardModel",
     }
 
     @classmethod