Skip to content

Commit ca20c99

Browse files
authored
[GlobalISel][IRTranslator] Port switch binary tree search optimization. (#77279)
This re-uses some code extracted earlier from SelectionDAG into SwitchLoweringUtils Much of the code is a straight port from SDAG's splitWorkItem(), with minor changes needed for GISel.
1 parent 6343b4e commit ca20c99

File tree

3 files changed

+99
-18
lines changed

3 files changed

+99
-18
lines changed

llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,10 @@ class IRTranslator : public MachineFunctionPass {
366366
BranchProbability BranchProbToNext, Register Reg,
367367
SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
368368

369+
void splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
370+
const SwitchCG::SwitchWorkListItem &W, Value *Cond,
371+
MachineBasicBlock *SwitchMBB, MachineIRBuilder &MIB);
372+
369373
bool lowerJumpTableWorkItem(
370374
SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
371375
MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -751,16 +751,91 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
751751
auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
752752
WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
753753

754-
// FIXME: At the moment we don't do any splitting optimizations here like
755-
// SelectionDAG does, so this worklist only has one entry.
756754
while (!WorkList.empty()) {
757755
SwitchWorkListItem W = WorkList.pop_back_val();
756+
757+
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
758+
// For optimized builds, lower large range as a balanced binary tree.
759+
if (NumClusters > 3 &&
760+
MF->getTarget().getOptLevel() != CodeGenOptLevel::None &&
761+
!DefaultMBB->getParent()->getFunction().hasMinSize()) {
762+
splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB, MIB);
763+
continue;
764+
}
765+
758766
if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
759767
return false;
760768
}
761769
return true;
762770
}
763771

772+
void IRTranslator::splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
773+
const SwitchCG::SwitchWorkListItem &W,
774+
Value *Cond, MachineBasicBlock *SwitchMBB,
775+
MachineIRBuilder &MIB) {
776+
using namespace SwitchCG;
777+
assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
778+
"Clusters not sorted?");
779+
assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
780+
781+
auto [LastLeft, FirstRight, LeftProb, RightProb] =
782+
SL->computeSplitWorkItemInfo(W);
783+
784+
// Use the first element on the right as pivot since we will make less-than
785+
// comparisons against it.
786+
CaseClusterIt PivotCluster = FirstRight;
787+
assert(PivotCluster > W.FirstCluster);
788+
assert(PivotCluster <= W.LastCluster);
789+
790+
CaseClusterIt FirstLeft = W.FirstCluster;
791+
CaseClusterIt LastRight = W.LastCluster;
792+
793+
const ConstantInt *Pivot = PivotCluster->Low;
794+
795+
// New blocks will be inserted immediately after the current one.
796+
MachineFunction::iterator BBI(W.MBB);
797+
++BBI;
798+
799+
// We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
800+
// we can branch to its destination directly if it's squeezed exactly in
801+
// between the known lower bound and Pivot - 1.
802+
MachineBasicBlock *LeftMBB;
803+
if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
804+
FirstLeft->Low == W.GE &&
805+
(FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
806+
LeftMBB = FirstLeft->MBB;
807+
} else {
808+
LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
809+
FuncInfo.MF->insert(BBI, LeftMBB);
810+
WorkList.push_back(
811+
{LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
812+
}
813+
814+
// Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
815+
// single cluster, RHS.Low == Pivot, and we can branch to its destination
816+
// directly if RHS.High equals the current upper bound.
817+
MachineBasicBlock *RightMBB;
818+
if (FirstRight == LastRight && FirstRight->Kind == CC_Range && W.LT &&
819+
(FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
820+
RightMBB = FirstRight->MBB;
821+
} else {
822+
RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
823+
FuncInfo.MF->insert(BBI, RightMBB);
824+
WorkList.push_back(
825+
{RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
826+
}
827+
828+
// Create the CaseBlock record that will be used to lower the branch.
829+
CaseBlock CB(ICmpInst::Predicate::ICMP_SLT, false, Cond, Pivot, nullptr,
830+
LeftMBB, RightMBB, W.MBB, MIB.getDebugLoc(), LeftProb,
831+
RightProb);
832+
833+
if (W.MBB == SwitchMBB)
834+
emitSwitchCase(CB, SwitchMBB, MIB);
835+
else
836+
SL->SwitchCases.push_back(CB);
837+
}
838+
764839
void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
765840
MachineBasicBlock *MBB) {
766841
// Emit the code for the jump table

llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,31 +17,33 @@ define i32 @scanfile(i32 %call148) {
1717
; CHECK-NEXT: .cfi_offset w30, -8
1818
; CHECK-NEXT: .cfi_offset w29, -16
1919
; CHECK-NEXT: mov w8, w0
20+
; CHECK-NEXT: cmp w0, #1
2021
; CHECK-NEXT: mov w0, wzr
21-
; CHECK-NEXT: cbz w8, LBB0_7
22+
; CHECK-NEXT: b.ge LBB0_3
2223
; CHECK-NEXT: ; %bb.1: ; %entry
23-
; CHECK-NEXT: cmp w8, #1
24-
; CHECK-NEXT: b.eq LBB0_7
25-
; CHECK-NEXT: ; %bb.2: ; %entry
24+
; CHECK-NEXT: cbnz w8, LBB0_7
25+
; CHECK-NEXT: LBB0_2: ; %common.ret1
26+
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
27+
; CHECK-NEXT: ret
28+
; CHECK-NEXT: LBB0_3: ; %entry
29+
; CHECK-NEXT: b.eq LBB0_2
30+
; CHECK-NEXT: ; %bb.4: ; %entry
2631
; CHECK-NEXT: cmp w8, #2
27-
; CHECK-NEXT: b.eq LBB0_4
28-
; CHECK-NEXT: ; %bb.3: ; %entry
32+
; CHECK-NEXT: b.eq LBB0_6
33+
; CHECK-NEXT: ; %bb.5: ; %entry
2934
; CHECK-NEXT: cmp w8, #3
30-
; CHECK-NEXT: b.ne LBB0_5
31-
; CHECK-NEXT: LBB0_4: ; %sw.bb300
35+
; CHECK-NEXT: b.ne LBB0_2
36+
; CHECK-NEXT: LBB0_6: ; %sw.bb300
3237
; CHECK-NEXT: bl _logg
3338
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
3439
; CHECK-NEXT: ret
35-
; CHECK-NEXT: LBB0_5: ; %entry
40+
; CHECK-NEXT: LBB0_7: ; %entry
3641
; CHECK-NEXT: cmn w8, #2
37-
; CHECK-NEXT: b.eq LBB0_8
38-
; CHECK-NEXT: ; %bb.6: ; %entry
42+
; CHECK-NEXT: b.eq LBB0_9
43+
; CHECK-NEXT: ; %bb.8: ; %entry
3944
; CHECK-NEXT: cmn w8, #1
40-
; CHECK-NEXT: b.eq LBB0_8
41-
; CHECK-NEXT: LBB0_7: ; %common.ret1
42-
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
43-
; CHECK-NEXT: ret
44-
; CHECK-NEXT: LBB0_8: ; %sw.bb150
45+
; CHECK-NEXT: b.ne LBB0_2
46+
; CHECK-NEXT: LBB0_9: ; %sw.bb150
4547
; CHECK-NEXT: bl _logg
4648
; CHECK-NEXT: brk #0x1
4749
entry:

0 commit comments

Comments
 (0)