Check numeric result with reference output

shewu-quic · shewu-quic · commit 0b33455a67dc · 2025-11-04T12:41:47.000+08:00
diff --git a/backends/qualcomm/tests/test_passes.py b/backends/qualcomm/tests/test_passes.py
@@ -80,12 +80,7 @@ def test_mha_to_sha(self):
         mod = convert_linear_to_conv2d(LlamaAttention(0, args, True))
 
         # Prepare inputs
-        hidden_states = torch.randint(
-            low=0,
-            high=100,
-            size=(args.max_batch_size, args.ar_len, args.dim),
-            dtype=torch.float32,
-        )
+        hidden_states = torch.randn(args.max_batch_size, args.ar_len, args.dim)
         freqs_cos = torch.randn(args.ar_len, 1)
         freqs_sin = torch.randn(args.ar_len, 1)
         atten_mask = CausalAttentionMask(
@@ -113,6 +108,9 @@ def test_mha_to_sha(self):
             v_cache,
         )
 
+        # Run original module for reference
+        refs = mod(*sample_input)
+
         # Export the module and convert linear to conv2d
         edge_program = to_edge(torch.export.export(mod, sample_input))
         new_ep = edge_program.exported_program()
@@ -141,6 +139,16 @@ def test_mha_to_sha(self):
         # Check graph structure: WQ, WK, WV should be converted to SHA
         self.assertTrue(len(conv_nodes) == 25, "Convolution nodes should be splited")
 
+        # Execute new graph and compare with reference
+        outs = graph_module(
+            *new_ep.state_dict.values(), *new_ep.constants.values(), *sample_input
+        )
+        for i, (out, ref) in enumerate(zip(outs, refs)):
+            self.assertTrue(
+                torch.allclose(out, *ref, rtol=1e-6, atol=1e-6),
+                f"Output {i} mismatch: got {out}, expected {ref}",
+            )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py
@@ -157,7 +157,7 @@ def __init__(self, weight, bias=None):
 
         def forward(self, x):
             rank = x.dim()
-            x = x.unsqueeze(-1) if rank == 3 else x.reshape(1, *x.shape, 1)
+            x = x.reshape(*x.shape, 1) if rank == 3 else x.reshape(1, *x.shape, 1)
             x = torch.transpose(x, 1, 2)
             res = self.conv(x)
             res = torch.transpose(res, 1, 2)