Skip to content

Commit bfd2dd4

Browse files
committed
[GVN] Refactor handling of pointer-select in GVN pass
This patch extends Def memory dependency with support of select instructions to consistently handle pointer-select conversion. Differential Revision: https://reviews.llvm.org/D141619
1 parent 5091357 commit bfd2dd4

File tree

5 files changed

+48
-49
lines changed

5 files changed

+48
-49
lines changed

llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ class MemDepResult {
7777
/// calls or memory use intrinsics with identical callees and no
7878
/// intervening clobbers. No validation is done that the operands to
7979
/// the calls are the same.
80+
/// 4. For loads and stores, this could be a select instruction that
81+
/// defines pointer to this memory location. In this case, users can
82+
/// find non-clobbered Defs for both select values that are reaching
83+
// the desired memory location (there is still a guarantee that there
84+
// are no clobbers between analyzed memory location and select).
8085
Def,
8186

8287
/// This marker indicates that the query has no known dependency in the
@@ -142,6 +147,9 @@ class MemDepResult {
142147
/// definition dependency.
143148
bool isDef() const { return Value.is<Def>(); }
144149

150+
/// Tests if this MemDepResult represents a valid local query (Clobber/Def).
151+
bool isLocal() const { return isClobber() || isDef(); }
152+
145153
/// Tests if this MemDepResult represents a query that is transparent to the
146154
/// start of the block, but where a non-local hasn't been done.
147155
bool isNonLocal() const {

llvm/lib/Analysis/MemoryDependenceAnalysis.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,11 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
592592
return MemDepResult::getDef(Inst);
593593
}
594594

595+
// If we found a select instruction for MemLoc pointer, return it as Def
596+
// dependency.
597+
if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst)
598+
return MemDepResult::getDef(Inst);
599+
595600
if (isInvariantLoad)
596601
continue;
597602

@@ -962,7 +967,7 @@ MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
962967
// If the block has a dependency (i.e. it isn't completely transparent to
963968
// the value), remember the reverse association because we just added it
964969
// to Cache!
965-
if (!Dep.isDef() && !Dep.isClobber())
970+
if (!Dep.isLocal())
966971
return Dep;
967972

968973
// Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently

llvm/lib/Transforms/Scalar/GVN.cpp

Lines changed: 25 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,31 +1120,26 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
11201120
/// 1. The pointer select (\p Address) must be defined in \p DepBB.
11211121
/// 2. Both value operands of the pointer select must be loaded in the same
11221122
/// basic block, before the pointer select.
1123-
/// 3. There must be no instructions between the found loads and \p End that may
1123+
/// 3. There must be no instructions between the found loads and \p Sel that may
11241124
/// clobber the loads.
11251125
static std::optional<AvailableValue>
1126-
tryToConvertLoadOfPtrSelect(BasicBlock *DepBB, BasicBlock::iterator End,
1127-
Value *Address, Type *LoadTy, DominatorTree &DT,
1126+
tryToConvertLoadOfPtrSelect(SelectInst *Sel, Type *LoadTy, DominatorTree &DT,
11281127
AAResults *AA) {
1129-
1130-
auto *Sel = dyn_cast_or_null<SelectInst>(Address);
1131-
if (!Sel || DepBB != Sel->getParent())
1132-
return std::nullopt;
1133-
11341128
LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), LoadTy, Sel, DT);
11351129
LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), LoadTy, Sel, DT);
11361130
if (!L1 || !L2)
11371131
return std::nullopt;
11381132

11391133
// Ensure there are no accesses that may modify the locations referenced by
1140-
// either L1 or L2 between L1, L2 and the specified End iterator.
1134+
// either L1 or L2 between L1, L2 and the specified Sel instruction.
11411135
Instruction *EarlierLoad = L1->comesBefore(L2) ? L1 : L2;
11421136
MemoryLocation L1Loc = MemoryLocation::get(L1);
11431137
MemoryLocation L2Loc = MemoryLocation::get(L2);
1144-
if (any_of(make_range(EarlierLoad->getIterator(), End), [&](Instruction &I) {
1145-
return isModSet(AA->getModRefInfo(&I, L1Loc)) ||
1146-
isModSet(AA->getModRefInfo(&I, L2Loc));
1147-
}))
1138+
if (any_of(make_range(EarlierLoad->getIterator(), Sel->getIterator()),
1139+
[&](Instruction &I) {
1140+
return isModSet(AA->getModRefInfo(&I, L1Loc)) ||
1141+
isModSet(AA->getModRefInfo(&I, L2Loc));
1142+
}))
11481143
return std::nullopt;
11491144

11501145
return AvailableValue::getSelect(Sel);
@@ -1153,20 +1148,12 @@ tryToConvertLoadOfPtrSelect(BasicBlock *DepBB, BasicBlock::iterator End,
11531148
std::optional<AvailableValue>
11541149
GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
11551150
Value *Address) {
1156-
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1157-
assert(isa<SelectInst>(Address));
1158-
return tryToConvertLoadOfPtrSelect(Load->getParent(), Load->getIterator(),
1159-
Address, Load->getType(),
1160-
getDominatorTree(), getAliasAnalysis());
1161-
}
1162-
1163-
assert((DepInfo.isDef() || DepInfo.isClobber()) &&
1164-
"expected a local dependence");
11651151
assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1166-
1167-
const DataLayout &DL = Load->getModule()->getDataLayout();
1152+
assert(DepInfo.isLocal() && "expected a local dependence");
11681153

11691154
Instruction *DepInst = DepInfo.getInst();
1155+
1156+
const DataLayout &DL = Load->getModule()->getDataLayout();
11701157
if (DepInfo.isClobber()) {
11711158
// If the dependence is to a store that writes to a superset of the bits
11721159
// read by the load, we can extract the bits we need for the load from the
@@ -1272,6 +1259,13 @@ GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
12721259
return AvailableValue::getLoad(LD);
12731260
}
12741261

1262+
// Check if load with Addr dependent from select can be converted to select
1263+
// between load values. There must be no instructions between the found
1264+
// loads and DepInst that may clobber the loads.
1265+
if (auto *Sel = dyn_cast<SelectInst>(DepInst))
1266+
return tryToConvertLoadOfPtrSelect(Sel, Load->getType(), getDominatorTree(),
1267+
getAliasAnalysis());
1268+
12751269
// Unknown def - must be conservative
12761270
LLVM_DEBUG(
12771271
// fast print dep, using operator<< on instruction is too slow.
@@ -1298,24 +1292,15 @@ void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
12981292
continue;
12991293
}
13001294

1301-
// The address being loaded in this non-local block may not be the same as
1302-
// the pointer operand of the load if PHI translation occurs. Make sure
1303-
// to consider the right address.
1304-
Value *Address = Dep.getAddress();
1305-
1306-
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1307-
if (auto R = tryToConvertLoadOfPtrSelect(
1308-
DepBB, DepBB->end(), Address, Load->getType(), getDominatorTree(),
1309-
getAliasAnalysis())) {
1310-
ValuesPerBlock.push_back(
1311-
AvailableValueInBlock::get(DepBB, std::move(*R)));
1312-
continue;
1313-
}
1295+
if (!DepInfo.isLocal()) {
13141296
UnavailableBlocks.push_back(DepBB);
13151297
continue;
13161298
}
13171299

1318-
if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Address)) {
1300+
// The address being loaded in this non-local block may not be the same as
1301+
// the pointer operand of the load if PHI translation occurs. Make sure
1302+
// to consider the right address.
1303+
if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Dep.getAddress())) {
13191304
// subtlety: because we know this was a non-local dependency, we know
13201305
// it's safe to materialize anywhere between the instruction within
13211306
// DepInfo and the end of it's block.
@@ -2043,9 +2028,8 @@ bool GVNPass::processLoad(LoadInst *L) {
20432028
if (Dep.isNonLocal())
20442029
return processNonLocalLoad(L);
20452030

2046-
Value *Address = L->getPointerOperand();
20472031
// Only handle the local case below
2048-
if (!Dep.isDef() && !Dep.isClobber() && !isa<SelectInst>(Address)) {
2032+
if (!Dep.isLocal()) {
20492033
// This might be a NonFuncLocal or an Unknown
20502034
LLVM_DEBUG(
20512035
// fast print dep, using operator<< on instruction is too slow.
@@ -2054,7 +2038,7 @@ bool GVNPass::processLoad(LoadInst *L) {
20542038
return false;
20552039
}
20562040

2057-
auto AV = AnalyzeLoadAvailability(L, Dep, Address);
2041+
auto AV = AnalyzeLoadAvailability(L, Dep, L->getPointerOperand());
20582042
if (!AV)
20592043
return false;
20602044

llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,13 +236,15 @@ define i32 @test_pointer_phi_select_simp_store_clobber_3(ptr %a, ptr %b, ptr %c,
236236
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[A:%.*]], align 4
237237
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[B:%.*]], align 4
238238
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
239+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
239240
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], ptr [[A]], ptr [[B]]
240241
; CHECK-NEXT: br label [[EXIT:%.*]]
241242
; CHECK: else:
243+
; CHECK-NEXT: [[RES_2_PRE:%.*]] = load i32, ptr [[A]], align 4
242244
; CHECK-NEXT: br label [[EXIT]]
243245
; CHECK: exit:
246+
; CHECK-NEXT: [[RES_2:%.*]] = phi i32 [ [[TMP0]], [[THEN]] ], [ [[RES_2_PRE]], [[ELSE]] ]
244247
; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[MIN_SELECT]], [[THEN]] ], [ [[A]], [[ELSE]] ]
245-
; CHECK-NEXT: [[RES_2:%.*]] = load i32, ptr [[P]], align 4
246248
; CHECK-NEXT: ret i32 [[RES_2]]
247249
;
248250
entry:

llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -311,13 +311,13 @@ define i32 @test_pointer_phi_select_load_may_not_execute_1(ptr %A, ptr %B, ptr %
311311
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[PTR_IV]], align 4
312312
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[MIN_PTR]], align 4
313313
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
314+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
314315
; CHECK-NEXT: [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], ptr [[PTR_IV]], ptr [[MIN_PTR]]
315316
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1
316317
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END:%.*]]
317318
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
318319
; CHECK: exit:
319-
; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[MIN_SELECT]], align 4
320-
; CHECK-NEXT: ret i32 [[RES]]
320+
; CHECK-NEXT: ret i32 [[TMP0]]
321321
;
322322
entry:
323323
br label %loop
@@ -350,13 +350,13 @@ define i32 @test_pointer_phi_select_load_may_not_execute_2(ptr %A, ptr %B, ptr %
350350
; CHECK-NEXT: call void @may_throw()
351351
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[MIN_PTR]], align 4
352352
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
353+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
353354
; CHECK-NEXT: [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], ptr [[PTR_IV]], ptr [[MIN_PTR]]
354355
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1
355356
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END:%.*]]
356357
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
357358
; CHECK: exit:
358-
; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[MIN_SELECT]], align 4
359-
; CHECK-NEXT: ret i32 [[RES]]
359+
; CHECK-NEXT: ret i32 [[TMP0]]
360360
;
361361
entry:
362362
br label %loop
@@ -514,13 +514,13 @@ define i32 @test_pointer_phi_select_same_object_may_write_call_1(ptr %ptr, ptr %
514514
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[PTR_IV]], align 4
515515
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[MIN_PTR]], align 4
516516
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
517+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
517518
; CHECK-NEXT: [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], ptr [[PTR_IV]], ptr [[MIN_PTR]]
518519
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1
519520
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END:%.*]]
520521
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
521522
; CHECK: exit:
522-
; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[MIN_SELECT]], align 4
523-
; CHECK-NEXT: ret i32 [[RES]]
523+
; CHECK-NEXT: ret i32 [[TMP0]]
524524
;
525525
entry:
526526
%start.ptr = getelementptr inbounds i32, ptr %ptr, i64 1

0 commit comments

Comments
 (0)