Skip to content

Commit e939378

Browse files
committed
[AggressiveInstCombine] Handle the insert point of the merged load correctly.
This patch updates the load insert point of the merged load in AggressiveInstCombine(). This is done to handle the reported test breaks by handling Alias Analysis correctly. Differential Revision: https://reviews.llvm.org/D137201
1 parent 3ea6a9a commit e939378

File tree

3 files changed

+234
-166
lines changed

3 files changed

+234
-166
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,7 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
610610
/// shift amount, zero extend type and loadSize.
611611
struct LoadOps {
612612
LoadInst *Root = nullptr;
613+
LoadInst *RootInsert = nullptr;
613614
bool FoundRoot = false;
614615
uint64_t LoadSize = 0;
615616
Value *Shift = nullptr;
@@ -675,16 +676,6 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
675676
Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
676677
/* AllowNonInbounds */ true);
677678

678-
// Make sure Load with lower Offset is at LI1
679-
bool Reverse = false;
680-
if (Offset2.slt(Offset1)) {
681-
std::swap(LI1, LI2);
682-
std::swap(ShAmt1, ShAmt2);
683-
std::swap(Offset1, Offset2);
684-
std::swap(Load1Ptr, Load2Ptr);
685-
Reverse = true;
686-
}
687-
688679
// Verify if both loads have same base pointers and load sizes are same.
689680
uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
690681
uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
@@ -695,20 +686,36 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
695686
if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1))
696687
return false;
697688

698-
// TODO: Alias Analysis to check for stores b/w the loads.
699-
// Currently bail out if there are stores b/w the loads.
700-
LoadInst *Start = LI1, *End = LI2;
701-
if (!LI1->comesBefore(LI2))
689+
// Alias Analysis to check for stores b/w the loads.
690+
LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2;
691+
MemoryLocation Loc;
692+
if (!Start->comesBefore(End)) {
702693
std::swap(Start, End);
694+
Loc = MemoryLocation::get(End);
695+
if (LOps.FoundRoot)
696+
Loc = Loc.getWithNewSize(LOps.LoadSize);
697+
} else
698+
Loc = MemoryLocation::get(End);
703699
unsigned NumScanned = 0;
704700
for (Instruction &Inst :
705701
make_range(Start->getIterator(), End->getIterator())) {
706-
if (Inst.mayWriteToMemory())
702+
if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
707703
return false;
708704
if (++NumScanned > MaxInstrsToScan)
709705
return false;
710706
}
711707

708+
// Make sure Load with lower Offset is at LI1
709+
bool Reverse = false;
710+
if (Offset2.slt(Offset1)) {
711+
std::swap(LI1, LI2);
712+
std::swap(ShAmt1, ShAmt2);
713+
std::swap(Offset1, Offset2);
714+
std::swap(Load1Ptr, Load2Ptr);
715+
std::swap(LoadSize1, LoadSize2);
716+
Reverse = true;
717+
}
718+
712719
// Big endian swap the shifts
713720
if (IsBigEndian)
714721
std::swap(ShAmt1, ShAmt2);
@@ -746,6 +753,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
746753
AATags1 = LI1->getAAMetadata();
747754
}
748755
LOps.LoadSize = LoadSize1 + LoadSize2;
756+
LOps.RootInsert = Start;
749757

750758
// Concatenate the AATags of the Merged Loads.
751759
LOps.AATags = AATags1.concat(AATags2);
@@ -781,9 +789,15 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
781789
if (!Allowed || !Fast)
782790
return false;
783791

792+
// Make sure the Load pointer of type GEP/non-GEP is above insert point
793+
Instruction *Inst = dyn_cast<Instruction>(LI1->getPointerOperand());
794+
if (Inst && Inst->getParent() == LI1->getParent() &&
795+
!Inst->comesBefore(LOps.RootInsert))
796+
Inst->moveBefore(LOps.RootInsert);
797+
784798
// New load can be generated
785799
Value *Load1Ptr = LI1->getPointerOperand();
786-
Builder.SetInsertPoint(LI1);
800+
Builder.SetInsertPoint(LOps.RootInsert);
787801
Value *NewPtr = Builder.CreateBitCast(Load1Ptr, WiderType->getPointerTo(AS));
788802
NewLoad = Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(),
789803
LI1->isVolatile(), "");

llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll

Lines changed: 76 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -142,26 +142,31 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) {
142142
}
143143

144144
define i32 @loadCombine_4consecutive_alias(ptr %p) {
145-
; ALL-LABEL: @loadCombine_4consecutive_alias(
146-
; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
147-
; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
148-
; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
149-
; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
150-
; ALL-NEXT: store i8 10, ptr [[P]], align 1
151-
; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
152-
; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
153-
; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
154-
; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
155-
; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
156-
; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
157-
; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
158-
; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
159-
; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
160-
; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
161-
; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
162-
; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
163-
; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
164-
; ALL-NEXT: ret i32 [[O3]]
145+
; LE-LABEL: @loadCombine_4consecutive_alias(
146+
; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
147+
; LE-NEXT: store i8 10, ptr [[P]], align 1
148+
; LE-NEXT: ret i32 [[L1]]
149+
;
150+
; BE-LABEL: @loadCombine_4consecutive_alias(
151+
; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
152+
; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
153+
; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
154+
; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
155+
; BE-NEXT: store i8 10, ptr [[P]], align 1
156+
; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
157+
; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
158+
; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
159+
; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
160+
; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
161+
; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
162+
; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
163+
; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
164+
; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
165+
; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
166+
; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
167+
; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
168+
; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
169+
; BE-NEXT: ret i32 [[O3]]
165170
;
166171
%p1 = getelementptr i8, ptr %p, i32 1
167172
%p2 = getelementptr i8, ptr %p, i32 2
@@ -188,26 +193,31 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) {
188193
}
189194

190195
define i32 @loadCombine_4consecutive_alias_BE(ptr %p) {
191-
; ALL-LABEL: @loadCombine_4consecutive_alias_BE(
192-
; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
193-
; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
194-
; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
195-
; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
196-
; ALL-NEXT: store i8 10, ptr [[P]], align 1
197-
; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
198-
; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
199-
; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
200-
; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
201-
; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
202-
; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
203-
; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
204-
; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
205-
; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
206-
; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
207-
; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
208-
; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
209-
; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
210-
; ALL-NEXT: ret i32 [[O3]]
196+
; LE-LABEL: @loadCombine_4consecutive_alias_BE(
197+
; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
198+
; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
199+
; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
200+
; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
201+
; LE-NEXT: store i8 10, ptr [[P]], align 1
202+
; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
203+
; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
204+
; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
205+
; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
206+
; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
207+
; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
208+
; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
209+
; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
210+
; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
211+
; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
212+
; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
213+
; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
214+
; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
215+
; LE-NEXT: ret i32 [[O3]]
216+
;
217+
; BE-LABEL: @loadCombine_4consecutive_alias_BE(
218+
; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
219+
; BE-NEXT: store i8 10, ptr [[P]], align 1
220+
; BE-NEXT: ret i32 [[L1]]
211221
;
212222
%p1 = getelementptr i8, ptr %p, i32 1
213223
%p2 = getelementptr i8, ptr %p, i32 2
@@ -1760,26 +1770,32 @@ define i16 @loadCombine_2consecutive_badinsert(ptr %p) {
17601770
}
17611771

17621772
define i32 @loadCombine_4consecutive_badinsert(ptr %p) {
1763-
; ALL-LABEL: @loadCombine_4consecutive_badinsert(
1764-
; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
1765-
; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
1766-
; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
1767-
; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
1768-
; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
1769-
; ALL-NEXT: store i8 0, ptr [[P1]], align 1
1770-
; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
1771-
; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
1772-
; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
1773-
; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
1774-
; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
1775-
; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
1776-
; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
1777-
; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
1778-
; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
1779-
; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
1780-
; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
1781-
; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
1782-
; ALL-NEXT: ret i32 [[O3]]
1773+
; LE-LABEL: @loadCombine_4consecutive_badinsert(
1774+
; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
1775+
; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1
1776+
; LE-NEXT: store i8 0, ptr [[P1]], align 1
1777+
; LE-NEXT: ret i32 [[L1]]
1778+
;
1779+
; BE-LABEL: @loadCombine_4consecutive_badinsert(
1780+
; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
1781+
; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
1782+
; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
1783+
; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
1784+
; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
1785+
; BE-NEXT: store i8 0, ptr [[P1]], align 1
1786+
; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
1787+
; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
1788+
; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
1789+
; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
1790+
; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
1791+
; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
1792+
; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
1793+
; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
1794+
; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
1795+
; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
1796+
; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
1797+
; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
1798+
; BE-NEXT: ret i32 [[O3]]
17831799
;
17841800
%p1 = getelementptr i8, ptr %p, i32 1
17851801
%p2 = getelementptr i8, ptr %p, i32 2

0 commit comments

Comments
 (0)