@@ -2696,59 +2696,83 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
26962696 if (PtrVT == IntVT && isNullConstant(N0))
26972697 return N1;
26982698
2699- if (N0.getOpcode() != ISD::PTRADD ||
2700- reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2701- return SDValue();
2702-
2703- SDValue X = N0.getOperand(0);
2704- SDValue Y = N0.getOperand(1);
2705- SDValue Z = N1;
2706- bool N0OneUse = N0.hasOneUse();
2707- bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2708- bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2709-
2710- // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2711- // * y is a constant and (ptradd x, y) has one use; or
2712- // * y and z are both constants.
2713- if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2714- // If both additions in the original were NUW, the new ones are as well.
2715- SDNodeFlags Flags =
2716- (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2717- SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2718- AddToWorklist(Add.getNode());
2719- return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2699+ if (N0.getOpcode() == ISD::PTRADD &&
2700+ !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2701+ SDValue X = N0.getOperand(0);
2702+ SDValue Y = N0.getOperand(1);
2703+ SDValue Z = N1;
2704+ bool N0OneUse = N0.hasOneUse();
2705+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2706+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2707+
2708+ // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2709+ // * y is a constant and (ptradd x, y) has one use; or
2710+ // * y and z are both constants.
2711+ if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2712+ // If both additions in the original were NUW, the new ones are as well.
2713+ SDNodeFlags Flags =
2714+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2715+ SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2716+ AddToWorklist(Add.getNode());
2717+ return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2718+ }
2719+ }
2720+
2721+ // The following combines can turn in-bounds pointer arithmetic out of bounds.
2722+ // That is problematic for settings like AArch64's CPA, which checks that
2723+ // intermediate results of pointer arithmetic remain in bounds. The target
2724+ // therefore needs to opt-in to enable them.
2725+ if (!TLI.canTransformPtrArithOutOfBounds(
2726+ DAG.getMachineFunction().getFunction(), PtrVT))
2727+ return SDValue();
2728+
2729+ if (N0.getOpcode() == ISD::PTRADD && isa<ConstantSDNode>(N1)) {
2730+ // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2731+ // global address GA and constant c, such that c can be folded into GA.
2732+ // TODO: Support constant vector splats.
2733+ SDValue GAValue = N0.getOperand(0);
2734+ if (const GlobalAddressSDNode *GA =
2735+ dyn_cast<GlobalAddressSDNode>(GAValue)) {
2736+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2737+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2738+ // If both additions in the original were NUW, reassociation preserves
2739+ // that.
2740+ SDNodeFlags Flags =
2741+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2742+ SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2743+ AddToWorklist(Inner.getNode());
2744+ return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2745+ }
2746+ }
27202747 }
27212748
2722- // TODO: There is another possible fold here that was proven useful.
2723- // It would be this:
2724- //
2725- // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2726- // * (ptradd x, y) has one use; and
2727- // * y is a constant; and
2728- // * z is not a constant.
2729- //
2730- // In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2731- // opportunity to select more complex instructions such as SUBPT and
2732- // MSUBPT. However, a hypothetical corner case has been found that we could
2733- // not avoid. Consider this (pseudo-POSIX C):
2734- //
2735- // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2736- // char *p = mmap(LARGE_CONSTANT);
2737- // char *q = foo(p, -LARGE_CONSTANT);
2738- //
2739- // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2740- // further + z takes it back to the start of the mapping, so valid,
2741- // regardless of the address mmap gave back. However, if mmap gives you an
2742- // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2743- // borrow from the high bits (with the subsequent + z carrying back into
2744- // the high bits to give you a well-defined pointer) and thus trip
2745- // FEAT_CPA's pointer corruption checks.
2746- //
2747- // We leave this fold as an opportunity for future work, addressing the
2748- // corner case for FEAT_CPA, as well as reconciling the solution with the
2749- // more general application of pointer arithmetic in other future targets.
2750- // For now each architecture that wants this fold must implement it in the
2751- // target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2749+ if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2750+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2751+ // y is not, and (add y, z) is used only once.
2752+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2753+ // z is not, and (add y, z) is used only once.
2754+ // The goal is to move constant offsets to the outermost ptradd, to create
2755+ // more opportunities to fold offsets into memory instructions.
2756+ // Together with the another combine above, this also implements
2757+ // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2758+ SDValue X = N0;
2759+ SDValue Y = N1.getOperand(0);
2760+ SDValue Z = N1.getOperand(1);
2761+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2762+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2763+
2764+ // If both additions in the original were NUW, reassociation preserves that.
2765+ SDNodeFlags ReassocFlags =
2766+ (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2767+
2768+ if (ZIsConstant != YIsConstant) {
2769+ if (YIsConstant)
2770+ std::swap(Y, Z);
2771+ SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2772+ AddToWorklist(Inner.getNode());
2773+ return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2774+ }
2775+ }
27522776
27532777 return SDValue();
27542778}
0 commit comments