@@ -512,12 +512,13 @@ let OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue
512512  defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
513513} // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts
514514
515- class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
515+ class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2, bit op1IsRight = 0 > : PatFrag<
516516  (ops node:$x, node:$y, node:$z),
517517  // When the inner operation is used multiple times, selecting 3-op
518518  // instructions may still be beneficial -- if the other users can be
519519  // combined similarly. Let's be conservative for now.
520-   (op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z),
520+   !if(op1IsRight, (op2 node:$z, (HasOneUseBinOp<op1> node:$x, node:$y)),
521+                   (op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z)),
521522  [{
522523    // Only use VALU ops when the result is divergent.
523524    if (!N->isDivergent())
@@ -544,7 +545,10 @@ class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
544545  let PredicateCodeUsesOperands = 1;
545546}
546547
547- class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : ThreeOpFragSDAG<op1, op2> {
548+ // Matches (op2 (op1 x, y), z) if op1IsRight = 0 and
549+ // matches (op2 z, (op1, x, y)) if op1IsRight = 1.
550+ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2,
551+                   bit op1IsRight = 0> : ThreeOpFragSDAG<op1, op2, op1IsRight> {
548552  // The divergence predicate is irrelevant in GlobalISel, as we have
549553  // proper register bank checks. We just need to verify the constant
550554  // bus restriction when all the sources are considered.
@@ -834,12 +838,19 @@ def : GCNPat<
834838 (DivergentBinFrag<mul> i32:$src0, IsPow2Plus1:$src1),
835839 (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>;
836840
837- let SubtargetPredicate = HasLshlAddU64Inst in
841+ let SubtargetPredicate = HasLshlAddU64Inst in { 
838842def : GCNPat<
839843  (ThreeOpFrag<shl_0_to_4, add> i64:$src0, i32:$src1, i64:$src2),
840844  (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
841845>;
842846
847+ def : GCNPat <
848+   // (ptradd z, (shl x, y)) -> ((x << y) + z)
849+   (ThreeOpFrag<shl_0_to_4, ptradd, /*op1IsRight=*/1> i64:$src0, i32:$src1, i64:$src2),
850+   (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
851+ >;
852+ } // End SubtargetPredicate = HasLshlAddU64Inst
853+ 
843854def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
844855def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>;
845856
@@ -908,19 +919,24 @@ multiclass IMAD32_Pats <VOP3_Pseudo inst> {
908919
909920// Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul.
910921// We need to separate this because otherwise OtherPredicates would be overriden.
911- class IMAD32_Mul24_Pat<VOP3_Pseudo inst>: GCNPat <
912-     (i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)),
913-     (inst $src0, $src1, $src2, 0 /* clamp */)
914-     >;
922+ class IMAD32_Mul24_Pats_Impl<VOP3_Pseudo inst, SDPatternOperator AddOp, bit mulIsRight = 0> : GCNPat <
923+     !if(mulIsRight, (i64 (AddOp i64:$src2, (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)))),
924+                     (i64 (AddOp (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2))),
925+     (inst $src0, $src1, $src2, 0 /* clamp */)>;
926+ 
927+ multiclass IMAD32_Mul24_Pats<VOP3_Pseudo inst> {
928+   def : IMAD32_Mul24_Pats_Impl<inst, add>;
929+   def : IMAD32_Mul24_Pats_Impl<inst, ptradd, /*mulIsRight=*/1>;
930+ }
915931
916932// exclude pre-GFX9 where it was slow
917933let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in {
918934  defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
919-   def  : IMAD32_Mul24_Pat <V_MAD_U64_U32_e64>;
935+   defm  : IMAD32_Mul24_Pats <V_MAD_U64_U32_e64>;
920936}
921937let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in {
922938  defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
923-   def  : IMAD32_Mul24_Pat <V_MAD_U64_U32_gfx11_e64>;
939+   defm  : IMAD32_Mul24_Pats <V_MAD_U64_U32_gfx11_e64>;
924940}
925941
926942def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
0 commit comments