@@ -2689,59 +2689,82 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
26892689  if (PtrVT == IntVT && isNullConstant(N0))
26902690    return N1;
26912691
2692-   if (N0.getOpcode() != ISD::PTRADD ||
2693-       reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2694-     return SDValue();
2695- 
2696-   SDValue X = N0.getOperand(0);
2697-   SDValue Y = N0.getOperand(1);
2698-   SDValue Z = N1;
2699-   bool N0OneUse = N0.hasOneUse();
2700-   bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2701-   bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2702- 
2703-   // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2704-   //   * y is a constant and (ptradd x, y) has one use; or
2705-   //   * y and z are both constants.
2706-   if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2707-     // If both additions in the original were NUW, the new ones are as well.
2708-     SDNodeFlags Flags =
2709-         (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2710-     SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2711-     AddToWorklist(Add.getNode());
2712-     return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2692+   if (N0.getOpcode() == ISD::PTRADD &&
2693+       !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2694+     SDValue X = N0.getOperand(0);
2695+     SDValue Y = N0.getOperand(1);
2696+     SDValue Z = N1;
2697+     bool N0OneUse = N0.hasOneUse();
2698+     bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2699+     bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2700+ 
2701+     // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2702+     //   * y is a constant and (ptradd x, y) has one use; or
2703+     //   * y and z are both constants.
2704+     if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2705+       // If both additions in the original were NUW, the new ones are as well.
2706+       SDNodeFlags Flags =
2707+           (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2708+       SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2709+       AddToWorklist(Add.getNode());
2710+       return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2711+     }
2712+   }
2713+ 
2714+   // The following combines can turn in-bounds pointer arithmetic out of bounds.
2715+   // That is problematic for settings like AArch64's CPA, which checks that
2716+   // intermediate results of pointer arithmetic remain in bounds. The target
2717+   // therefore needs to opt-in to enable them.
2718+   if (!TLI.canTransformPtrArithOutOfBounds(
2719+           DAG.getMachineFunction().getFunction(), PtrVT))
2720+     return SDValue();
2721+ 
2722+   if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
2723+     // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2724+     // global address GA and constant c, such that c can be folded into GA.
2725+     SDValue GAValue = N0.getOperand(0);
2726+     if (const GlobalAddressSDNode *GA =
2727+             dyn_cast<GlobalAddressSDNode>(GAValue)) {
2728+       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2729+       if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2730+         // If both additions in the original were NUW, reassociation preserves
2731+         // that.
2732+         SDNodeFlags Flags =
2733+             (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2734+         SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2735+         AddToWorklist(Inner.getNode());
2736+         return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2737+       }
2738+     }
27132739  }
27142740
2715-   // TODO: There is another possible fold here that was proven useful.
2716-   // It would be this:
2717-   //
2718-   // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2719-   //   * (ptradd x, y) has one use; and
2720-   //   * y is a constant; and
2721-   //   * z is not a constant.
2722-   //
2723-   // In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2724-   // opportunity to select more complex instructions such as SUBPT and
2725-   // MSUBPT. However, a hypothetical corner case has been found that we could
2726-   // not avoid. Consider this (pseudo-POSIX C):
2727-   //
2728-   // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2729-   // char *p = mmap(LARGE_CONSTANT);
2730-   // char *q = foo(p, -LARGE_CONSTANT);
2731-   //
2732-   // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2733-   // further + z takes it back to the start of the mapping, so valid,
2734-   // regardless of the address mmap gave back. However, if mmap gives you an
2735-   // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2736-   // borrow from the high bits (with the subsequent + z carrying back into
2737-   // the high bits to give you a well-defined pointer) and thus trip
2738-   // FEAT_CPA's pointer corruption checks.
2739-   //
2740-   // We leave this fold as an opportunity for future work, addressing the
2741-   // corner case for FEAT_CPA, as well as reconciling the solution with the
2742-   // more general application of pointer arithmetic in other future targets.
2743-   // For now each architecture that wants this fold must implement it in the
2744-   // target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2741+   if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2742+     // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2743+     //    y is not, and (add y, z) is used only once.
2744+     // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2745+     //    z is not, and (add y, z) is used only once.
2746+     // The goal is to move constant offsets to the outermost ptradd, to create
2747+     // more opportunities to fold offsets into memory instructions.
2748+     // Together with the another combine above, this also implements
2749+     //   (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2750+     SDValue X = N0;
2751+     SDValue Y = N1.getOperand(0);
2752+     SDValue Z = N1.getOperand(1);
2753+     bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2754+     bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2755+ 
2756+     // If both additions in the original were NUW, reassociation preserves that.
2757+     SDNodeFlags ReassocFlags =
2758+         (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2759+ 
2760+     if (ZIsConstant != YIsConstant) {
2761+       if (YIsConstant)
2762+         std::swap(Y, Z);
2763+       SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2764+       AddToWorklist(Inner.getNode());
2765+       return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2766+     }
2767+   }
27452768
27462769  return SDValue();
27472770}
0 commit comments