Skip to content

[SimplifyCFG] Emit SelectInst when folding branches to common dest with different PHI incoming values #144434

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,000 changes: 1,064 additions & 936 deletions clang/test/Headers/__clang_hip_math.hip

Large diffs are not rendered by default.

90 changes: 81 additions & 9 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,9 @@ static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
// Note that we expect to be in a block-closed SSA form for this to work!
for (Use &U : make_early_inc_range(BonusInst.uses())) {
auto *UI = cast<Instruction>(U.getUser());
// Avoid dangling select instructions
if (!UI->getParent())
continue;
auto *PN = dyn_cast<PHINode>(UI);
if (!PN) {
assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
Expand Down Expand Up @@ -3962,10 +3965,10 @@ shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
return std::nullopt;
}

static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU,
const TargetTransformInfo *TTI) {
static bool performBranchToCommonDestFolding(
BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI,
SmallDenseMap<PHINode *, SelectInst *, 8> &InsertNewPHIs) {
BasicBlock *BB = BI->getParent();
BasicBlock *PredBlock = PBI->getParent();

Expand Down Expand Up @@ -4052,6 +4055,28 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,

ValueToValueMapTy VMap; // maps original values to cloned values
cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
if (!InsertNewPHIs.empty()) {
// Fixup PHINode in the commong successor
for (PHINode &PN : CommonSucc->phis()) {
auto It = InsertNewPHIs.find(&PN);
if (It != InsertNewPHIs.end() && It->first == &PN) {
Instruction *SI = It->second;
// Oprands might have been promoted to bonous inst
RemapInstruction(SI, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
// Insert SelectInst as the new PHINode incoming value
SI->insertBefore(PredBlock->getTerminator()->getIterator());
// Fix PHINode
PN.removeIncomingValue(PredBlock);
PN.addIncoming(SI, PredBlock);
// Remove map entry
InsertNewPHIs.erase(It);
}
}
// Cleanup dangling SelectInst
for (SelectInst *SI : InsertNewPHIs.values())
delete SI;
}

Module *M = BB->getModule();

Expand Down Expand Up @@ -4109,15 +4134,50 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,

// With which predecessors will we want to deal with?
SmallVector<BasicBlock *, 8> Preds;
struct InsertPointTy {
InstructionCost Cost;
Value *TValue; // True Value
Value *FValue; // False Value
PHINode *Phi;
};
SmallDenseMap<BranchInst *, SmallVector<InsertPointTy, 8>, 8> InsertPts;
for (BasicBlock *PredBlock : predecessors(BB)) {
BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());

// Check that we have two conditional branches. If there is a PHI node in
// the common successor, verify that the same value flows in from both
// blocks.
if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
// Check that we have two conditional branches.
if (!PBI || PBI->isUnconditional())
continue;

// If there is a PHI node in the common successor, verify that the same
// value flows in from both blocks. Otherwise, check whether we can create a
// SelectInst to combine the incoming values
if (!safeToMergeTerminators(BI, PBI)) {
if (BI == PBI)
continue;
for (BasicBlock *Succ : BI->successors()) {
if (llvm::is_contained(PBI->successors(), Succ)) {
for (PHINode &Phi : Succ->phis()) {
Value *IV0 = Phi.getIncomingValueForBlock(BB);
Value *IV1 = Phi.getIncomingValueForBlock(PredBlock);
InstructionCost PCost;
if (TTI) {
PCost = TTI->getCmpSelInstrCost(
Instruction::Select, Phi.getType(),
CmpInst::makeCmpResultType(Phi.getType()),
CmpInst::BAD_ICMP_PREDICATE, CostKind);
}
auto &IP = InsertPts[PBI];
if (PBI->getSuccessor(0) == BB)
IP.emplace_back(InsertPointTy{PCost, IV0, IV1, &Phi});
else
IP.emplace_back(InsertPointTy{PCost, IV1, IV0, &Phi});
}
}
}
if (InsertPts.empty())
continue;
}

// Determine if the two branches share a common destination.
BasicBlock *CommonSucc;
Instruction::BinaryOps Opc;
Expand All @@ -4136,6 +4196,9 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
!isa<CmpInst>(PBI->getCondition())))
Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);

for (auto const &InsertPoints : InsertPts.values())
for (auto &InsertInfo : InsertPoints)
Cost += InsertInfo.Cost;
if (Cost > BranchFoldThreshold)
continue;
}
Expand Down Expand Up @@ -4201,7 +4264,16 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// Ok, we have the budget. Perform the transformation.
for (BasicBlock *PredBlock : Preds) {
auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
SmallDenseMap<PHINode *, SelectInst *, 8> newPhis;
if (InsertPts.contains(PBI)) {
Value *PC = PBI->getCondition();
for (auto const InsertInfo : InsertPts[PBI]) {
SelectInst *newPhi =
SelectInst::Create(PC, InsertInfo.TValue, InsertInfo.FValue);
newPhis.insert(std::make_pair(InsertInfo.Phi, newPhi));
}
}
return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI, newPhis);
}
return false;
}
Expand Down
56 changes: 26 additions & 30 deletions llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,16 @@ define void @test_i16_2cmp_signed_2() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:cost_s_i8_i16
; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_s_i8_i16]
; CHECK-NEXT: ldrsh w9, [x8, #2]
; CHECK-NEXT: ldrsh w10, [x8, #4]
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: b.gt .LBB1_2
; CHECK-NEXT: // %bb.1: // %if.else
; CHECK-NEXT: mov w9, w10
; CHECK-NEXT: b.ge .LBB1_3
; CHECK-NEXT: .LBB1_2: // %if.end8.sink.split
; CHECK-NEXT: ldrh w10, [x8, #2]
; CHECK-NEXT: ldrh w11, [x8, #4]
; CHECK-NEXT: sxth w9, w10
; CHECK-NEXT: cmp w9, w11, sxth
; CHECK-NEXT: csel w9, w10, w11, gt
; CHECK-NEXT: cmp w10, w11
; CHECK-NEXT: b.eq .LBB1_2
; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strh w9, [x8]
; CHECK-NEXT: .LBB1_3: // %if.end8
; CHECK-NEXT: .LBB1_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr getelementptr inbounds (%struct.s_signed_i16, ptr @cost_s_i8_i16, i64 0, i32 1), align 2
Expand Down Expand Up @@ -125,13 +125,11 @@ define void @test_i16_2cmp_unsigned_2() {
; CHECK-NEXT: ldrh w9, [x8, #2]
; CHECK-NEXT: ldrh w10, [x8, #4]
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: b.hi .LBB3_2
; CHECK-NEXT: // %bb.1: // %if.else
; CHECK-NEXT: mov w9, w10
; CHECK-NEXT: b.hs .LBB3_3
; CHECK-NEXT: .LBB3_2: // %if.end8.sink.split
; CHECK-NEXT: csel w9, w9, w10, hi
; CHECK-NEXT: b.eq .LBB3_2
; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strh w9, [x8]
; CHECK-NEXT: .LBB3_3: // %if.end8
; CHECK-NEXT: .LBB3_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr getelementptr inbounds (%struct.s_unsigned_i16, ptr @cost_u_i16, i64 0, i32 1), align 2
Expand Down Expand Up @@ -204,16 +202,16 @@ define void @test_i8_2cmp_signed_2() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:cost_s
; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_s]
; CHECK-NEXT: ldrsb w9, [x8, #1]
; CHECK-NEXT: ldrsb w10, [x8, #2]
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: b.gt .LBB5_2
; CHECK-NEXT: // %bb.1: // %if.else
; CHECK-NEXT: mov w9, w10
; CHECK-NEXT: b.ge .LBB5_3
; CHECK-NEXT: .LBB5_2: // %if.end8.sink.split
; CHECK-NEXT: ldrb w10, [x8, #1]
; CHECK-NEXT: ldrb w11, [x8, #2]
; CHECK-NEXT: sxtb w9, w10
; CHECK-NEXT: cmp w9, w11, sxtb
; CHECK-NEXT: csel w9, w10, w11, gt
; CHECK-NEXT: cmp w10, w11
; CHECK-NEXT: b.eq .LBB5_2
; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strb w9, [x8]
; CHECK-NEXT: .LBB5_3: // %if.end8
; CHECK-NEXT: .LBB5_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i8, ptr getelementptr inbounds (%struct.s_signed_i8, ptr @cost_s, i64 0, i32 1), align 2
Expand Down Expand Up @@ -280,13 +278,11 @@ define void @test_i8_2cmp_unsigned_2() {
; CHECK-NEXT: ldrb w9, [x8, #1]
; CHECK-NEXT: ldrb w10, [x8, #2]
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: b.hi .LBB7_2
; CHECK-NEXT: // %bb.1: // %if.else
; CHECK-NEXT: mov w9, w10
; CHECK-NEXT: b.hs .LBB7_3
; CHECK-NEXT: .LBB7_2: // %if.end8.sink.split
; CHECK-NEXT: csel w9, w9, w10, hi
; CHECK-NEXT: b.eq .LBB7_2
; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strb w9, [x8]
; CHECK-NEXT: .LBB7_3: // %if.end8
; CHECK-NEXT: .LBB7_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i8, ptr getelementptr inbounds (%struct.s_unsigned_i8, ptr @cost_u_i8, i64 0, i32 1), align 2
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

; CHECK-LABEL: test:
; CHECK-LABEL: %cond.false12.i
; CHECK: b.gt
; CHECK-NEXT: LBB0_8:
; CHECK: b.le
; CHECK-LABEL: LBB0_9:
; CHECK-NEXT: mov x8, x9
; CHECK-NEXT: LBB0_9:
; CHECK-NEXT: LBB0_10:
define i64 @test(i64 %n, ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f) {
entry:
%cmp28 = icmp sgt i64 %n, 1
Expand Down
49 changes: 18 additions & 31 deletions llvm/test/CodeGen/AArch64/typepromotion-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,41 +6,28 @@
define i32 @needless_promotion(ptr nocapture noundef readonly %S, i64 noundef %red_cost) {
; CHECK-O2-LABEL: needless_promotion:
; CHECK-O2: // %bb.0: // %entry
; CHECK-O2-NEXT: ldrsh w8, [x0, #4]
; CHECK-O2-NEXT: tbnz w8, #31, .LBB0_3
; CHECK-O2-NEXT: // %bb.1: // %lor.rhs
; CHECK-O2-NEXT: cbz x1, .LBB0_5
; CHECK-O2-NEXT: // %bb.2:
; CHECK-O2-NEXT: mov w9, #2
; CHECK-O2-NEXT: b .LBB0_4
; CHECK-O2-NEXT: .LBB0_3:
; CHECK-O2-NEXT: mov w9, #1
; CHECK-O2-NEXT: .LBB0_4: // %lor.end.sink.split
; CHECK-O2-NEXT: cmp w8, w9
; CHECK-O2-NEXT: cset w0, eq
; CHECK-O2-NEXT: ret
; CHECK-O2-NEXT: .LBB0_5:
; CHECK-O2-NEXT: mov w0, wzr
; CHECK-O2-NEXT: ldrsh w9, [x0, #4]
; CHECK-O2-NEXT: mov w8, #1 // =0x1
; CHECK-O2-NEXT: cmp w9, #0
; CHECK-O2-NEXT: cinc w8, w8, ge
; CHECK-O2-NEXT: cmp w8, w9, uxth
; CHECK-O2-NEXT: cset w8, eq
; CHECK-O2-NEXT: cmp x1, #0
; CHECK-O2-NEXT: ccmn w9, #1, #4, eq
; CHECK-O2-NEXT: csel w0, wzr, w8, gt
; CHECK-O2-NEXT: ret
;
; CHECK-O3-LABEL: needless_promotion:
; CHECK-O3: // %bb.0: // %entry
; CHECK-O3-NEXT: ldrsh w8, [x0, #4]
; CHECK-O3-NEXT: tbnz w8, #31, .LBB0_3
; CHECK-O3-NEXT: // %bb.1: // %lor.rhs
; CHECK-O3-NEXT: cbz x1, .LBB0_4
; CHECK-O3-NEXT: // %bb.2:
; CHECK-O3-NEXT: mov w9, #2
; CHECK-O3-NEXT: cmp w8, w9
; CHECK-O3-NEXT: cset w0, eq
; CHECK-O3-NEXT: ret
; CHECK-O3-NEXT: .LBB0_3:
; CHECK-O3-NEXT: mov w9, #1
; CHECK-O3-NEXT: cmp w8, w9
; CHECK-O3-NEXT: cset w0, eq
; CHECK-O3-NEXT: ret
; CHECK-O3-NEXT: .LBB0_4:
; CHECK-O3-NEXT: mov w0, wzr
; CHECK-O3-NEXT: ldrsh w9, [x0, #4]
; CHECK-O3-NEXT: mov w8, #1 // =0x1
; CHECK-O3-NEXT: cmp w9, #0
; CHECK-O3-NEXT: cinc w8, w8, ge
; CHECK-O3-NEXT: cmp w8, w9, uxth
; CHECK-O3-NEXT: cset w8, eq
; CHECK-O3-NEXT: cmp x1, #0
; CHECK-O3-NEXT: ccmn w9, #1, #4, eq
; CHECK-O3-NEXT: csel w0, wzr, w8, gt
; CHECK-O3-NEXT: ret
entry:
%ident = getelementptr inbounds %struct.S, ptr %S, i64 0, i32 1
Expand Down
32 changes: 17 additions & 15 deletions llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,27 @@
define signext i32 @limit_loop(i32 signext %iters, ptr nocapture readonly %vec, i32 signext %limit) local_unnamed_addr {
; CHECK-LABEL: limit_loop:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mr 6, 3
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: cmpwi 6, 0
; CHECK-NEXT: blelr 0
; CHECK-NEXT: cmpwi 3, 0
; CHECK-NEXT: ble 0, .LBB0_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: mtctr 6
; CHECK-NEXT: addi 4, 4, -4
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: # %for.cond
; CHECK-NEXT: #
; CHECK-NEXT: bdzlr
; CHECK-NEXT: .LBB0_3: # %for.body
; CHECK-NEXT: li 6, 1
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_2: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: lwzu 6, 4(4)
; CHECK-NEXT: cmpw 6, 5
; CHECK-NEXT: blt 0, .LBB0_2
; CHECK-NEXT: # %bb.4:
; CHECK-NEXT: lwzu 7, 4(4)
; CHECK-NEXT: cmpd 1, 6, 3
; CHECK-NEXT: addi 6, 6, 1
; CHECK-NEXT: cmpw 7, 5
; CHECK-NEXT: crand 20, 0, 4
; CHECK-NEXT: bc 12, 20, .LBB0_2
; CHECK-NEXT: # %bb.3: # %cleanup.loopexit
; CHECK-NEXT: li 3, 1
; CHECK-NEXT: isellt 3, 0, 3
; CHECK-NEXT: clrldi 3, 3, 32
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: blr
entry:
%cmp5 = icmp sgt i32 %iters, 0
Expand Down
27 changes: 16 additions & 11 deletions llvm/test/CodeGen/X86/loop-search.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,29 @@ define zeroext i1 @search(i32 %needle, ptr nocapture readonly %haystack, i32 %co
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: jle LBB0_5
; CHECK-NEXT: ## %bb.1: ## %for.body.preheader
; CHECK-NEXT: movslq %edx, %rax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: movslq %edx, %rcx
; CHECK-NEXT: movl $1, %edx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB0_2: ## %for.body
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpl %edi, (%rsi,%rcx,4)
; CHECK-NEXT: je LBB0_6
; CHECK-NEXT: ## %bb.3: ## %for.cond
; CHECK-NEXT: movl -4(%rsi,%rdx,4), %r8d
; CHECK-NEXT: cmpl %edi, %r8d
; CHECK-NEXT: sete %al
; CHECK-NEXT: negb %al
; CHECK-NEXT: cmpl %edi, %r8d
; CHECK-NEXT: je LBB0_4
; CHECK-NEXT: ## %bb.3: ## %for.body
; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: incq %rcx
; CHECK-NEXT: cmpq %rax, %rcx
; CHECK-NEXT: cmpq %rcx, %rdx
; CHECK-NEXT: leaq 1(%rdx), %rdx
; CHECK-NEXT: jl LBB0_2
; CHECK-NEXT: LBB0_5:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: LBB0_4: ## %cleanup
; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
; CHECK-NEXT: LBB0_6:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: LBB0_5:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
entry:
Expand Down
Loading