Skip to content

Commit 771c94c

Browse files
authored
[SDAG][AMDGPU] Allow opting in to OOB-generating PTRADD transforms (#146074)
This PR adds a TargetLowering hook, canTransformPtrArithOutOfBounds, that targets can use to allow transformations to introduce out-of-bounds pointer arithmetic. It also moves two such transformations from the AMDGPU-specific DAG combines to the generic DAGCombiner. This is motivated by target features like AArch64's checked pointer arithmetic, CPA, which does not tolerate the introduction of out-of-bounds pointer arithmetic.
1 parent 6735483 commit 771c94c

File tree

4 files changed

+95
-100
lines changed

4 files changed

+95
-100
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3518,6 +3518,13 @@ class LLVM_ABI TargetLoweringBase {
35183518
return false;
35193519
}
35203520

3521+
/// True if the target allows transformations of in-bounds pointer
3522+
/// arithmetic that cause out-of-bounds intermediate results.
3523+
virtual bool canTransformPtrArithOutOfBounds(const Function &F,
3524+
EVT PtrVT) const {
3525+
return false;
3526+
}
3527+
35213528
/// Does this target support complex deinterleaving
35223529
virtual bool isComplexDeinterleavingSupported() const { return false; }
35233530

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 75 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2696,59 +2696,83 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
26962696
if (PtrVT == IntVT && isNullConstant(N0))
26972697
return N1;
26982698

2699-
if (N0.getOpcode() != ISD::PTRADD ||
2700-
reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2701-
return SDValue();
2702-
2703-
SDValue X = N0.getOperand(0);
2704-
SDValue Y = N0.getOperand(1);
2705-
SDValue Z = N1;
2706-
bool N0OneUse = N0.hasOneUse();
2707-
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2708-
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2709-
2710-
// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2711-
// * y is a constant and (ptradd x, y) has one use; or
2712-
// * y and z are both constants.
2713-
if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2714-
// If both additions in the original were NUW, the new ones are as well.
2715-
SDNodeFlags Flags =
2716-
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2717-
SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2718-
AddToWorklist(Add.getNode());
2719-
return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2699+
if (N0.getOpcode() == ISD::PTRADD &&
2700+
!reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2701+
SDValue X = N0.getOperand(0);
2702+
SDValue Y = N0.getOperand(1);
2703+
SDValue Z = N1;
2704+
bool N0OneUse = N0.hasOneUse();
2705+
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2706+
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2707+
2708+
// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2709+
// * y is a constant and (ptradd x, y) has one use; or
2710+
// * y and z are both constants.
2711+
if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2712+
// If both additions in the original were NUW, the new ones are as well.
2713+
SDNodeFlags Flags =
2714+
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2715+
SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2716+
AddToWorklist(Add.getNode());
2717+
return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2718+
}
2719+
}
2720+
2721+
// The following combines can turn in-bounds pointer arithmetic out of bounds.
2722+
// That is problematic for settings like AArch64's CPA, which checks that
2723+
// intermediate results of pointer arithmetic remain in bounds. The target
2724+
// therefore needs to opt-in to enable them.
2725+
if (!TLI.canTransformPtrArithOutOfBounds(
2726+
DAG.getMachineFunction().getFunction(), PtrVT))
2727+
return SDValue();
2728+
2729+
if (N0.getOpcode() == ISD::PTRADD && isa<ConstantSDNode>(N1)) {
2730+
// Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2731+
// global address GA and constant c, such that c can be folded into GA.
2732+
// TODO: Support constant vector splats.
2733+
SDValue GAValue = N0.getOperand(0);
2734+
if (const GlobalAddressSDNode *GA =
2735+
dyn_cast<GlobalAddressSDNode>(GAValue)) {
2736+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2737+
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2738+
// If both additions in the original were NUW, reassociation preserves
2739+
// that.
2740+
SDNodeFlags Flags =
2741+
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2742+
SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2743+
AddToWorklist(Inner.getNode());
2744+
return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2745+
}
2746+
}
27202747
}
27212748

2722-
// TODO: There is another possible fold here that was proven useful.
2723-
// It would be this:
2724-
//
2725-
// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2726-
// * (ptradd x, y) has one use; and
2727-
// * y is a constant; and
2728-
// * z is not a constant.
2729-
//
2730-
// In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2731-
// opportunity to select more complex instructions such as SUBPT and
2732-
// MSUBPT. However, a hypothetical corner case has been found that we could
2733-
// not avoid. Consider this (pseudo-POSIX C):
2734-
//
2735-
// char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2736-
// char *p = mmap(LARGE_CONSTANT);
2737-
// char *q = foo(p, -LARGE_CONSTANT);
2738-
//
2739-
// Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2740-
// further + z takes it back to the start of the mapping, so valid,
2741-
// regardless of the address mmap gave back. However, if mmap gives you an
2742-
// address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2743-
// borrow from the high bits (with the subsequent + z carrying back into
2744-
// the high bits to give you a well-defined pointer) and thus trip
2745-
// FEAT_CPA's pointer corruption checks.
2746-
//
2747-
// We leave this fold as an opportunity for future work, addressing the
2748-
// corner case for FEAT_CPA, as well as reconciling the solution with the
2749-
// more general application of pointer arithmetic in other future targets.
2750-
// For now each architecture that wants this fold must implement it in the
2751-
// target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2749+
if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2750+
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2751+
// y is not, and (add y, z) is used only once.
2752+
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2753+
// z is not, and (add y, z) is used only once.
2754+
// The goal is to move constant offsets to the outermost ptradd, to create
2755+
// more opportunities to fold offsets into memory instructions.
2756+
// Together with the another combine above, this also implements
2757+
// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2758+
SDValue X = N0;
2759+
SDValue Y = N1.getOperand(0);
2760+
SDValue Z = N1.getOperand(1);
2761+
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2762+
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2763+
2764+
// If both additions in the original were NUW, reassociation preserves that.
2765+
SDNodeFlags ReassocFlags =
2766+
(N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2767+
2768+
if (ZIsConstant != YIsConstant) {
2769+
if (YIsConstant)
2770+
std::swap(Y, Z);
2771+
SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2772+
AddToWorklist(Inner.getNode());
2773+
return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2774+
}
2775+
}
27522776

27532777
return SDValue();
27542778
}

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -11476,6 +11476,11 @@ bool SITargetLowering::shouldPreservePtrArith(const Function &F,
1147611476
return UseSelectionDAGPTRADD && PtrVT == MVT::i64;
1147711477
}
1147811478

11479+
bool SITargetLowering::canTransformPtrArithOutOfBounds(const Function &F,
11480+
EVT PtrVT) const {
11481+
return true;
11482+
}
11483+
1147911484
// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
1148011485
// offset (the offset that is included in bounds checking and swizzling, to be
1148111486
// split between the instruction's voffset and immoffset fields) and soffset
@@ -16140,64 +16145,17 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
1614016145
return Folded;
1614116146
}
1614216147

16143-
if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
16144-
// Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
16145-
// global address GA and constant c, such that c can be folded into GA.
16146-
SDValue GAValue = N0.getOperand(0);
16147-
if (const GlobalAddressSDNode *GA =
16148-
dyn_cast<GlobalAddressSDNode>(GAValue)) {
16149-
if (DCI.isBeforeLegalizeOps() && isOffsetFoldingLegal(GA)) {
16150-
// If both additions in the original were NUW, reassociation preserves
16151-
// that.
16152-
SDNodeFlags Flags =
16153-
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
16154-
SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
16155-
DCI.AddToWorklist(Inner.getNode());
16156-
return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
16157-
}
16158-
}
16159-
}
16160-
1616116148
if (N1.getOpcode() != ISD::ADD || !N1.hasOneUse())
1616216149
return SDValue();
1616316150

16164-
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
16165-
// y is not, and (add y, z) is used only once.
16166-
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
16167-
// z is not, and (add y, z) is used only once.
16168-
// The goal is to move constant offsets to the outermost ptradd, to create
16169-
// more opportunities to fold offsets into memory instructions.
16170-
// Together with the generic combines in DAGCombiner.cpp, this also
16171-
// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
16172-
//
16173-
// This transform is here instead of in the general DAGCombiner as it can
16174-
// turn in-bounds pointer arithmetic out-of-bounds, which is problematic for
16175-
// AArch64's CPA.
1617616151
SDValue X = N0;
1617716152
SDValue Y = N1.getOperand(0);
1617816153
SDValue Z = N1.getOperand(1);
1617916154
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
1618016155
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
1618116156

16182-
// If both additions in the original were NUW, reassociation preserves that.
16183-
SDNodeFlags ReassocFlags =
16184-
(N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
16185-
16186-
if (ZIsConstant != YIsConstant) {
16187-
if (YIsConstant)
16188-
std::swap(Y, Z);
16189-
SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
16190-
DCI.AddToWorklist(Inner.getNode());
16191-
return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
16192-
}
16193-
16194-
// If one of Y and Z is constant, they have been handled above. If both were
16195-
// constant, the addition would have been folded in SelectionDAG::getNode
16196-
// already. This ensures that the generic DAG combines won't undo the
16197-
// following reassociation.
16198-
assert(!YIsConstant && !ZIsConstant);
16199-
16200-
if (!X->isDivergent() && Y->isDivergent() != Z->isDivergent()) {
16157+
if (!YIsConstant && !ZIsConstant && !X->isDivergent() &&
16158+
Y->isDivergent() != Z->isDivergent()) {
1620116159
// Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if x and
1620216160
// y are uniform and z isn't.
1620316161
// Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if x and
@@ -16208,6 +16166,9 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
1620816166
// reassociate.
1620916167
if (Y->isDivergent())
1621016168
std::swap(Y, Z);
16169+
// If both additions in the original were NUW, reassociation preserves that.
16170+
SDNodeFlags ReassocFlags =
16171+
(N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
1621116172
SDValue UniformInner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
1621216173
DCI.AddToWorklist(UniformInner.getNode());
1621316174
return DAG.getMemBasePlusOffset(UniformInner, Z, DL, ReassocFlags);

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
277277

278278
bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override;
279279

280+
bool canTransformPtrArithOutOfBounds(const Function &F,
281+
EVT PtrVT) const override;
282+
280283
private:
281284
// Analyze a combined offset from an amdgcn_s_buffer_load intrinsic and store
282285
// the three offsets (voffset, soffset and instoffset) into the SDValue[3]

0 commit comments

Comments
 (0)