Skip to content
This repository was archived by the owner on Sep 2, 2018. It is now read-only.

Commit f2fc1eb

Browse files
author
jamesm
committed
[Thumb-1] Synthesize TBB/TBH instructions to make use of compressed jump tables
The TBB and TBH instructions in Thumb-2 allow jump tables to be compressed into sequences of bytes or shorts respectively. These instructions do not exist in Thumb-1, however it is possible to synthesize them out of a sequence of other instructions. It turns out this sequence is so short that it's almost never a lose for performance and is ALWAYS a significant win for code size. TBB example: Before: lsls r0, r0, #2 After: add r0, pc adr r1, .LJTI0_0 ldrb r0, [r0, #6] ldr r0, [r0, r1] lsls r0, r0, #1 mov pc, r0 add pc, r0 => No change in prologue code size or dynamic instruction count. Jump table shrunk by a factor of 4. The only case that can increase dynamic instruction count is the TBH case: Before: lsls r0, r4, #2 After: lsls r4, r4, #1 adr r1, .LJTI0_0 add r4, pc ldr r0, [r0, r1] ldrh r4, [r4, #6] mov pc, r0 lsls r4, r4, #1 add pc, r4 => 1 more instruction in prologue. Jump table shrunk by a factor of 2. So there is an argument that this should be disabled when optimizing for performance (and a TBH needs to be generated). I'm not so sure about that in practice, because on small cores with Thumb-1 performance is often tied to code size. But I'm willing to turn it off when optimizing for performance if people want (also note that TBHs are fairly rare in practice!) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284580 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 8a22bb7 commit f2fc1eb

8 files changed

+262
-37
lines changed

lib/Target/ARM/ARMAsmPrinter.cpp

+77
Original file line numberDiff line numberDiff line change
@@ -1711,6 +1711,83 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
17111711
.addReg(0));
17121712
return;
17131713
}
1714+
case ARM::tTBB_JT:
1715+
case ARM::tTBH_JT: {
1716+
1717+
bool Is8Bit = MI->getOpcode() == ARM::tTBB_JT;
1718+
unsigned Base = MI->getOperand(0).getReg();
1719+
unsigned Idx = MI->getOperand(1).getReg();
1720+
assert(MI->getOperand(1).isKill() && "We need the index register as scratch!");
1721+
1722+
// Multiply up idx if necessary.
1723+
if (!Is8Bit)
1724+
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri)
1725+
.addReg(Idx)
1726+
.addReg(ARM::CPSR)
1727+
.addReg(Idx)
1728+
.addImm(1)
1729+
// Add predicate operands.
1730+
.addImm(ARMCC::AL)
1731+
.addReg(0));
1732+
1733+
if (Base == ARM::PC) {
1734+
// TBB [base, idx] =
1735+
// ADDS idx, idx, base
1736+
// LDRB idx, [idx, #4] ; or LDRH if TBH
1737+
// LSLS idx, #1
1738+
// ADDS pc, pc, idx
1739+
1740+
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
1741+
.addReg(Idx)
1742+
.addReg(Idx)
1743+
.addReg(Base)
1744+
// Add predicate operands.
1745+
.addImm(ARMCC::AL)
1746+
.addReg(0));
1747+
1748+
unsigned Opc = Is8Bit ? ARM::tLDRBi : ARM::tLDRHi;
1749+
EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
1750+
.addReg(Idx)
1751+
.addReg(Idx)
1752+
.addImm(Is8Bit ? 4 : 2)
1753+
// Add predicate operands.
1754+
.addImm(ARMCC::AL)
1755+
.addReg(0));
1756+
} else {
1757+
// TBB [base, idx] =
1758+
// LDRB idx, [base, idx] ; or LDRH if TBH
1759+
// LSLS idx, #1
1760+
// ADDS pc, pc, idx
1761+
1762+
unsigned Opc = Is8Bit ? ARM::tLDRBr : ARM::tLDRHr;
1763+
EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
1764+
.addReg(Idx)
1765+
.addReg(Base)
1766+
.addReg(Idx)
1767+
// Add predicate operands.
1768+
.addImm(ARMCC::AL)
1769+
.addReg(0));
1770+
}
1771+
1772+
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri)
1773+
.addReg(Idx)
1774+
.addReg(ARM::CPSR)
1775+
.addReg(Idx)
1776+
.addImm(1)
1777+
// Add predicate operands.
1778+
.addImm(ARMCC::AL)
1779+
.addReg(0));
1780+
1781+
OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
1782+
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
1783+
.addReg(ARM::PC)
1784+
.addReg(ARM::PC)
1785+
.addReg(Idx)
1786+
// Add predicate operands.
1787+
.addImm(ARMCC::AL)
1788+
.addReg(0));
1789+
return;
1790+
}
17141791
case ARM::tBR_JTr:
17151792
case ARM::BR_JTr: {
17161793
// Lower and emit the instruction itself, then the jump table following it.

lib/Target/ARM/ARMConstantIslandPass.cpp

+89-14
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ static cl::opt<unsigned>
5858
CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30),
5959
cl::desc("The max number of iteration for converge"));
6060

61+
static cl::opt<bool> SynthesizeThumb1TBB(
62+
"arm-synthesize-thumb-1-tbb", cl::Hidden, cl::init(true),
63+
cl::desc("Use compressed jump tables in Thumb-1 by synthesizing an "
64+
"equivalent to the TBB/TBH instructions"));
65+
6166
namespace {
6267
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
6368
/// requires constant pool entries to be scattered among the instructions
@@ -189,6 +194,7 @@ namespace {
189194
bool isThumb;
190195
bool isThumb1;
191196
bool isThumb2;
197+
bool isPositionIndependentOrROPI;
192198
public:
193199
static char ID;
194200
ARMConstantIslands() : MachineFunctionPass(ID) {}
@@ -319,13 +325,16 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
319325

320326
STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
321327
TII = STI->getInstrInfo();
328+
isPositionIndependentOrROPI =
329+
STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
322330
AFI = MF->getInfo<ARMFunctionInfo>();
323331

324332
isThumb = AFI->isThumbFunction();
325333
isThumb1 = AFI->isThumb1OnlyFunction();
326334
isThumb2 = AFI->isThumb2Function();
327335

328336
HasFarJump = false;
337+
bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
329338

330339
// This pass invalidates liveness information when it splits basic blocks.
331340
MF->getRegInfo().invalidateLiveness();
@@ -337,7 +346,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
337346
// Try to reorder and otherwise adjust the block layout to make good use
338347
// of the TB[BH] instructions.
339348
bool MadeChange = false;
340-
if (isThumb2 && AdjustJumpTableBlocks) {
349+
if (GenerateTBB && AdjustJumpTableBlocks) {
341350
scanFunctionJumpTables();
342351
MadeChange |= reorderThumb2JumpTables();
343352
// Data is out of date, so clear it. It'll be re-computed later.
@@ -414,7 +423,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
414423
MadeChange |= optimizeThumb2Branches();
415424

416425
// Optimize jump tables using TBB / TBH.
417-
if (isThumb2)
426+
if (GenerateTBB)
418427
MadeChange |= optimizeThumb2JumpTables();
419428

420429
// After a while, this might be made debug-only, but it is not expensive.
@@ -540,9 +549,11 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
540549
case ARM::t2BR_JT:
541550
JTOpcode = ARM::JUMPTABLE_INSTS;
542551
break;
552+
case ARM::tTBB_JT:
543553
case ARM::t2TBB_JT:
544554
JTOpcode = ARM::JUMPTABLE_TBB;
545555
break;
556+
case ARM::tTBH_JT:
546557
case ARM::t2TBH_JT:
547558
JTOpcode = ARM::JUMPTABLE_TBH;
548559
break;
@@ -638,7 +649,8 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
638649
void ARMConstantIslands::scanFunctionJumpTables() {
639650
for (MachineBasicBlock &MBB : *MF) {
640651
for (MachineInstr &I : MBB)
641-
if (I.isBranch() && I.getOpcode() == ARM::t2BR_JT)
652+
if (I.isBranch() &&
653+
(I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr))
642654
T2JumpTables.push_back(&I);
643655
}
644656
}
@@ -679,6 +691,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
679691
default:
680692
continue; // Ignore other JT branches
681693
case ARM::t2BR_JT:
694+
case ARM::tBR_JTr:
682695
T2JumpTables.push_back(&I);
683696
continue; // Does not get an entry in ImmBranches
684697
case ARM::Bcc:
@@ -1943,7 +1956,7 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
19431956

19441957
if (RemovableAdd) {
19451958
RemovableAdd->eraseFromParent();
1946-
DeadSize += 4;
1959+
DeadSize += isThumb2 ? 4 : 2;
19471960
} else if (BaseReg == EntryReg) {
19481961
// The add wasn't removable, but clobbered the base for the TBB. So we can't
19491962
// preserve it.
@@ -2010,25 +2023,80 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
20102023
if (!ByteOk && !HalfWordOk)
20112024
continue;
20122025

2026+
CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
20132027
MachineBasicBlock *MBB = MI->getParent();
20142028
if (!MI->getOperand(0).isKill()) // FIXME: needed now?
20152029
continue;
2016-
unsigned IdxReg = MI->getOperand(1).getReg();
2017-
bool IdxRegKill = MI->getOperand(1).isKill();
20182030

2019-
CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
20202031
unsigned DeadSize = 0;
20212032
bool CanDeleteLEA = false;
20222033
bool BaseRegKill = false;
2023-
bool PreservedBaseReg =
2034+
2035+
unsigned IdxReg = ~0U;
2036+
bool IdxRegKill = true;
2037+
if (isThumb2) {
2038+
IdxReg = MI->getOperand(1).getReg();
2039+
IdxRegKill = MI->getOperand(1).isKill();
2040+
2041+
bool PreservedBaseReg =
20242042
preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
2043+
if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
2044+
continue;
2045+
} else {
2046+
// We're in thumb-1 mode, so we must have something like:
2047+
// %idx = tLSLri %idx, 2
2048+
// %base = tLEApcrelJT
2049+
// %t = tLDRr %idx, %base
2050+
unsigned BaseReg = User.MI->getOperand(0).getReg();
2051+
2052+
MachineInstr *Shift = User.MI->getPrevNode();
2053+
if (Shift->getOpcode() != ARM::tLSLri ||
2054+
Shift->getOperand(3).getImm() != 2 ||
2055+
!Shift->getOperand(2).isKill())
2056+
continue;
2057+
IdxReg = Shift->getOperand(2).getReg();
2058+
unsigned ShiftedIdxReg = Shift->getOperand(0).getReg();
20252059

2026-
if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
2027-
continue;
2060+
MachineInstr *Load = User.MI->getNextNode();
2061+
if (Load->getOpcode() != ARM::tLDRr)
2062+
continue;
2063+
if (Load->getOperand(1).getReg() != ShiftedIdxReg ||
2064+
Load->getOperand(2).getReg() != BaseReg ||
2065+
!Load->getOperand(1).isKill())
2066+
continue;
20282067

2068+
// If we're in PIC mode, there should be another ADD following.
2069+
if (isPositionIndependentOrROPI) {
2070+
MachineInstr *Add = Load->getNextNode();
2071+
if (Add->getOpcode() != ARM::tADDrr ||
2072+
Add->getOperand(2).getReg() != Load->getOperand(0).getReg() ||
2073+
Add->getOperand(3).getReg() != BaseReg ||
2074+
!Add->getOperand(2).isKill())
2075+
continue;
2076+
if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg())
2077+
continue;
2078+
2079+
Add->eraseFromParent();
2080+
DeadSize += 2;
2081+
} else {
2082+
if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg())
2083+
continue;
2084+
}
2085+
2086+
2087+
// Now safe to delete the load and lsl. The LEA will be removed later.
2088+
CanDeleteLEA = true;
2089+
Shift->eraseFromParent();
2090+
Load->eraseFromParent();
2091+
DeadSize += 4;
2092+
}
2093+
20292094
DEBUG(dbgs() << "Shrink JT: " << *MI);
20302095
MachineInstr *CPEMI = User.CPEMI;
20312096
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
2097+
if (!isThumb2)
2098+
Opc = ByteOk ? ARM::tTBB_JT : ARM::tTBH_JT;
2099+
20322100
MachineBasicBlock::iterator MI_JT = MI;
20332101
MachineInstr *NewJTMI =
20342102
BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
@@ -2048,7 +2116,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
20482116

20492117
if (CanDeleteLEA) {
20502118
User.MI->eraseFromParent();
2051-
DeadSize += 4;
2119+
DeadSize += isThumb2 ? 4 : 2;
20522120

20532121
// The LEA was eliminated, the TBB instruction becomes the only new user
20542122
// of the jump table.
@@ -2164,9 +2232,16 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
21642232
// Add an unconditional branch from NewBB to BB.
21652233
// There doesn't seem to be meaningful DebugInfo available; this doesn't
21662234
// correspond directly to anything in the source.
2167-
assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
2168-
BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
2169-
.addImm(ARMCC::AL).addReg(0);
2235+
if (isThumb2)
2236+
BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B))
2237+
.addMBB(BB)
2238+
.addImm(ARMCC::AL)
2239+
.addReg(0);
2240+
else
2241+
BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB))
2242+
.addMBB(BB)
2243+
.addImm(ARMCC::AL)
2244+
.addReg(0);
21702245

21712246
// Update internal data structures to account for the newly inserted MBB.
21722247
MF->RenumberBlocks(NewBB);

lib/Target/ARM/ARMInstrThumb.td

+12
Original file line numberDiff line numberDiff line change
@@ -1308,6 +1308,18 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
13081308
(ins i32imm:$label, pred:$p),
13091309
2, IIC_iALUi, []>, Sched<[WriteALU]>;
13101310

1311+
// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
1312+
// and make use of the same compressed jump table format as Thumb-2.
1313+
let Size = 2 in {
1314+
def tTBB_JT : tPseudoInst<(outs),
1315+
(ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
1316+
Sched<[WriteBr]>;
1317+
1318+
def tTBH_JT : tPseudoInst<(outs),
1319+
(ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
1320+
Sched<[WriteBr]>;
1321+
}
1322+
13111323
//===----------------------------------------------------------------------===//
13121324
// TLS Instructions
13131325
//

test/CodeGen/ARM/arm-position-independence-jump-table.ll

+13-21
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
; RUN: llc -relocation-model=ropi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2
99
; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2
1010

11-
; RUN: llc -relocation-model=static -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_ABS
12-
; RUN: llc -relocation-model=ropi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_PC
13-
; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_PC
11+
; RUN: llc -relocation-model=static -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
12+
; RUN: llc -relocation-model=ropi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
13+
; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
1414

1515

1616
declare void @exit0()
@@ -85,30 +85,22 @@ lab4:
8585
; THUMB2: [[LBB4]]
8686
; THUMB2-NEXT: b exit4
8787

88-
; THUMB1: lsls r[[R_TAB_INDEX:[0-9]+]], r{{[0-9]+}}, #2
89-
; THUMB1: adr r[[R_TAB_BASE:[0-9]+]], [[LJTI:\.LJTI[0-9]+_[0-9]+]]
90-
; THUMB1: ldr r[[R_BB_ADDR:[0-9]+]], [r[[R_TAB_INDEX]], r[[R_TAB_BASE]]]
91-
; THUMB1_PC: adds r[[R_BB_ADDR]], r[[R_BB_ADDR]], r[[R_TAB_BASE]]
92-
; THUMB1: mov pc, r[[R_BB_ADDR]]
93-
; THUMB1: [[LJTI]]
94-
; THUMB1_ABS: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]+1
95-
; THUMB1_ABS: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]+1
96-
; THUMB1_ABS: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]+1
97-
; THUMB1_ABS: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]+1
98-
; THUMB1_PC: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
99-
; THUMB1_PC: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
100-
; THUMB1_PC: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
101-
; THUMB1_PC: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]-[[LJTI]]
88+
89+
; THUMB1: add r[[x:[0-9]+]], pc
90+
; THUMB1: ldrb r[[x]], [r[[x]], #4]
91+
; THUMB1: lsls r[[x]], r[[x]], #1
92+
; THUMB1: [[LCPI:\.LCPI[0-9]+_[0-9]+]]:
93+
; THUMB1: add pc, r[[x]]
94+
; THUMB1: .byte ([[LBB1:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
95+
; THUMB1: .byte ([[LBB2:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
96+
; THUMB1: .byte ([[LBB3:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
97+
; THUMB1: .byte ([[LBB4:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2
10298
; THUMB1: [[LBB1]]
10399
; THUMB1-NEXT: bl exit1
104-
; THUMB1-NEXT: pop
105100
; THUMB1: [[LBB2]]
106101
; THUMB1-NEXT: bl exit2
107-
; THUMB1-NEXT: pop
108102
; THUMB1: [[LBB3]]
109103
; THUMB1-NEXT: bl exit3
110-
; THUMB1-NEXT: pop
111104
; THUMB1: [[LBB4]]
112105
; THUMB1-NEXT: bl exit4
113-
; THUMB1-NEXT: pop
114106
}

0 commit comments

Comments
 (0)