@@ -2688,59 +2688,82 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
26882688 if (PtrVT == IntVT && isNullConstant(N0))
26892689 return N1;
26902690
2691- if (N0.getOpcode() != ISD::PTRADD ||
2692- reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2693- return SDValue();
2694-
2695- SDValue X = N0.getOperand(0);
2696- SDValue Y = N0.getOperand(1);
2697- SDValue Z = N1;
2698- bool N0OneUse = N0.hasOneUse();
2699- bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2700- bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2701-
2702- // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2703- // * y is a constant and (ptradd x, y) has one use; or
2704- // * y and z are both constants.
2705- if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2706- // If both additions in the original were NUW, the new ones are as well.
2707- SDNodeFlags Flags =
2708- (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2709- SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2710- AddToWorklist(Add.getNode());
2711- return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2691+ if (N0.getOpcode() == ISD::PTRADD &&
2692+ !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2693+ SDValue X = N0.getOperand(0);
2694+ SDValue Y = N0.getOperand(1);
2695+ SDValue Z = N1;
2696+ bool N0OneUse = N0.hasOneUse();
2697+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2698+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2699+
2700+ // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2701+ // * y is a constant and (ptradd x, y) has one use; or
2702+ // * y and z are both constants.
2703+ if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2704+ // If both additions in the original were NUW, the new ones are as well.
2705+ SDNodeFlags Flags =
2706+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2707+ SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2708+ AddToWorklist(Add.getNode());
2709+ return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2710+ }
2711+ }
2712+
2713+ // The following combines can turn in-bounds pointer arithmetic out of bounds.
2714+ // That is problematic for settings like AArch64's CPA, which checks that
2715+ // intermediate results of pointer arithmetic remain in bounds. The target
2716+ // therefore needs to opt-in to enable them.
2717+ if (!TLI.canTransformPtrArithOutOfBounds(
2718+ DAG.getMachineFunction().getFunction(), PtrVT))
2719+ return SDValue();
2720+
2721+ if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
2722+ // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2723+ // global address GA and constant c, such that c can be folded into GA.
2724+ SDValue GAValue = N0.getOperand(0);
2725+ if (const GlobalAddressSDNode *GA =
2726+ dyn_cast<GlobalAddressSDNode>(GAValue)) {
2727+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2728+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2729+ // If both additions in the original were NUW, reassociation preserves
2730+ // that.
2731+ SDNodeFlags Flags =
2732+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2733+ SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2734+ AddToWorklist(Inner.getNode());
2735+ return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2736+ }
2737+ }
27122738 }
27132739
2714- // TODO: There is another possible fold here that was proven useful.
2715- // It would be this:
2716- //
2717- // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2718- // * (ptradd x, y) has one use; and
2719- // * y is a constant; and
2720- // * z is not a constant.
2721- //
2722- // In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2723- // opportunity to select more complex instructions such as SUBPT and
2724- // MSUBPT. However, a hypothetical corner case has been found that we could
2725- // not avoid. Consider this (pseudo-POSIX C):
2726- //
2727- // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2728- // char *p = mmap(LARGE_CONSTANT);
2729- // char *q = foo(p, -LARGE_CONSTANT);
2730- //
2731- // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2732- // further + z takes it back to the start of the mapping, so valid,
2733- // regardless of the address mmap gave back. However, if mmap gives you an
2734- // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2735- // borrow from the high bits (with the subsequent + z carrying back into
2736- // the high bits to give you a well-defined pointer) and thus trip
2737- // FEAT_CPA's pointer corruption checks.
2738- //
2739- // We leave this fold as an opportunity for future work, addressing the
2740- // corner case for FEAT_CPA, as well as reconciling the solution with the
2741- // more general application of pointer arithmetic in other future targets.
2742- // For now each architecture that wants this fold must implement it in the
2743- // target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2740+ if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2741+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2742+ // y is not, and (add y, z) is used only once.
2743+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2744+ // z is not, and (add y, z) is used only once.
2745+ // The goal is to move constant offsets to the outermost ptradd, to create
2746+ // more opportunities to fold offsets into memory instructions.
2747+ // Together with the another combine above, this also implements
2748+ // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2749+ SDValue X = N0;
2750+ SDValue Y = N1.getOperand(0);
2751+ SDValue Z = N1.getOperand(1);
2752+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2753+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2754+
2755+ // If both additions in the original were NUW, reassociation preserves that.
2756+ SDNodeFlags ReassocFlags =
2757+ (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2758+
2759+ if (ZIsConstant != YIsConstant) {
2760+ if (YIsConstant)
2761+ std::swap(Y, Z);
2762+ SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2763+ AddToWorklist(Inner.getNode());
2764+ return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2765+ }
2766+ }
27442767
27452768 return SDValue();
27462769}
0 commit comments