We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 684a0a2 commit 6f7f844Copy full SHA for 6f7f844
vllm_ascend/torchair/torchair_mtp_proposer.py
@@ -80,7 +80,8 @@ def dummy_run(self,
80
num_reqs: int = 0,
81
num_tokens_across_dp=None,
82
aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,
83
- batch_descriptor=None) -> None:
+ batch_descriptor=None,
84
+ dummy_compute_logits=lambda hidden_states: None) -> None:
85
moe_comm_type = self.runner._select_moe_comm_method(num_tokens)
86
87
if not with_prefill:
@@ -142,6 +143,7 @@ def dummy_run(self,
142
143
self.model(input_ids=input_ids,
144
positions=positions,
145
hidden_states=previous_hidden_states)
146
+ dummy_compute_logits(previous_hidden_states)
147
if with_prefill:
148
break
149
0 commit comments