Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit b015ecd

Browse files
neykovalexcrichton
authored andcommitted
Add stack overflow check for ARM Thumb instruction set.
Besides the mechanincal changes between the ARM and Thumb functions, because of the different instruction sets, there is difference in how the stack limit is located. The ARM version uses hardware which isn't available on the lower-end Thumb processors (namely system co-processor and MMU) therefore the stack limit is placed at a predefined location in memory - STACK_LIMIT. It is the responsibility of the wrapping runtime to manage this location with the correct value. It can vary from a simple constant defined by the linker to actively managed variable by a RTOS implementation.
1 parent f5f1ffe commit b015ecd

File tree

4 files changed

+365
-1
lines changed

4 files changed

+365
-1
lines changed

lib/Target/ARM/ARMFrameLowering.cpp

+177-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "ARMFrameLowering.h"
1515
#include "ARMBaseInstrInfo.h"
1616
#include "ARMBaseRegisterInfo.h"
17+
#include "ARMConstantPoolValue.h"
1718
#include "ARMInstrInfo.h"
1819
#include "ARMMachineFunctionInfo.h"
1920
#include "ARMTargetMachine.h"
@@ -1481,10 +1482,20 @@ static uint32_t AlignToARMConstant(uint32_t Value) {
14811482
// stack limit.
14821483
static const uint64_t kSplitStackAvailable = 256;
14831484

1485+
void
1486+
ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
1487+
const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();
1488+
if(ST->isThumb()) {
1489+
adjustForSegmentedStacksThumb(MF);
1490+
} else {
1491+
adjustForSegmentedStacksARM(MF);
1492+
}
1493+
}
1494+
14841495
// Adjust function prologue to enable split stack.
14851496
// Only support android and linux.
14861497
void
1487-
ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
1498+
ARMFrameLowering::adjustForSegmentedStacksARM(MachineFunction &MF) const {
14881499
const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();
14891500

14901501
// Doesn't support vararg function.
@@ -1697,3 +1708,168 @@ ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
16971708
MF.verify();
16981709
#endif
16991710
}
1711+
1712+
void
1713+
ARMFrameLowering::adjustForSegmentedStacksThumb(MachineFunction &MF) const {
1714+
// const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();
1715+
1716+
// Doesn't support vararg function.
1717+
if (MF.getFunction()->isVarArg())
1718+
report_fatal_error("Segmented stacks do not support vararg functions.");
1719+
1720+
MachineBasicBlock &prologueMBB = MF.front();
1721+
MachineFrameInfo* MFI = MF.getFrameInfo();
1722+
const ARMBaseInstrInfo &TII = *TM.getInstrInfo();
1723+
ARMFunctionInfo* ARMFI = MF.getInfo<ARMFunctionInfo>();
1724+
DebugLoc DL;
1725+
1726+
// Use R4 and R5 as scratch register.
1727+
// We should save R4 and R5 before use it and restore before
1728+
// leave the function.
1729+
unsigned ScratchReg0 = ARM::R4;
1730+
unsigned ScratchReg1 = ARM::R5;
1731+
uint64_t AlignedStackSize;
1732+
1733+
MachineBasicBlock* prevStackMBB = MF.CreateMachineBasicBlock();
1734+
MachineBasicBlock* postStackMBB = MF.CreateMachineBasicBlock();
1735+
MachineBasicBlock* allocMBB = MF.CreateMachineBasicBlock();
1736+
MachineBasicBlock* getMBB = MF.CreateMachineBasicBlock();
1737+
MachineBasicBlock* mcrMBB = MF.CreateMachineBasicBlock();
1738+
1739+
for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
1740+
e = prologueMBB.livein_end(); i != e; ++i) {
1741+
allocMBB->addLiveIn(*i);
1742+
getMBB->addLiveIn(*i);
1743+
mcrMBB->addLiveIn(*i);
1744+
prevStackMBB->addLiveIn(*i);
1745+
postStackMBB->addLiveIn(*i);
1746+
}
1747+
1748+
MF.push_front(postStackMBB);
1749+
MF.push_front(allocMBB);
1750+
MF.push_front(getMBB);
1751+
MF.push_front(mcrMBB);
1752+
MF.push_front(prevStackMBB);
1753+
1754+
// The required stack size that is aligend to ARM constant critarion.
1755+
uint64_t StackSize = MFI->getStackSize();
1756+
1757+
AlignedStackSize = AlignToARMConstant(StackSize);
1758+
1759+
// When the frame size is less than 256 we just compare the stack
1760+
// boundary directly to the value of the stack pointer, per gcc.
1761+
bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
1762+
1763+
// We will use two of callee save registers as scratch register so we
1764+
// need to save those registers into stack frame before use it.
1765+
// We will use SR0 to hold stack limit and SR1 to stack size requested.
1766+
// and arguments for __morestack().
1767+
// SR0: Scratch Register #0
1768+
// SR1: Scratch Register #1
1769+
// push {SR0, SR1}
1770+
AddDefaultPred(BuildMI(prevStackMBB, DL, TII.get(ARM::tPUSH)))
1771+
.addReg(ScratchReg0)
1772+
.addReg(ScratchReg1);
1773+
1774+
// mov SR1, sp
1775+
AddDefaultPred(BuildMI(mcrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
1776+
.addReg(ARM::SP));
1777+
1778+
if (!CompareStackPointer) {
1779+
// sub SR1, #StackSize
1780+
AddDefaultPred(AddDefaultCC(BuildMI(mcrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1))
1781+
.addReg(ScratchReg1).addImm(AlignedStackSize));
1782+
}
1783+
1784+
unsigned PCLabelId = ARMFI->createPICLabelUId();
1785+
ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::
1786+
Create(MF.getFunction()->getContext(), "STACK_LIMIT", PCLabelId, 0);
1787+
MachineConstantPool *MCP = MF.getConstantPool();
1788+
unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment());
1789+
1790+
//ldr SR0, [pc, offset(STACK_LIMIT)]
1791+
AddDefaultPred(BuildMI(getMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
1792+
.addConstantPoolIndex(CPI));
1793+
1794+
//ldr SR0, [SR0]
1795+
AddDefaultPred(BuildMI(getMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
1796+
.addReg(ScratchReg0)
1797+
.addImm(0));
1798+
1799+
// Compare stack limit with stack size requested.
1800+
// cmp SR0, SR1
1801+
AddDefaultPred(BuildMI(getMBB, DL, TII.get(ARM::tCMPr))
1802+
.addReg(ScratchReg0)
1803+
.addReg(ScratchReg1));
1804+
1805+
// This jump is taken if StackLimit < SP - stack required.
1806+
BuildMI(getMBB, DL, TII.get(ARM::tBcc))
1807+
.addMBB(postStackMBB)
1808+
.addImm(ARMCC::LO)
1809+
.addReg(ARM::CPSR);
1810+
1811+
1812+
// Calling __morestack(StackSize, Size of stack arguments).
1813+
// __morestack knows that the stack size requested is in SR0(r4)
1814+
// and amount size of stack arguments is in SR1(r5).
1815+
1816+
// Pass first argument for the __morestack by Scratch Register #0.
1817+
// The amount size of stack required
1818+
AddDefaultPred(AddDefaultCC(BuildMI(allocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0))
1819+
.addImm(AlignedStackSize));
1820+
// Pass second argument for the __morestack by Scratch Register #1.
1821+
// The amount size of stack consumed to save function arguments.
1822+
AddDefaultPred(AddDefaultCC(BuildMI(allocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1))
1823+
.addImm(AlignToARMConstant(ARMFI->getArgumentStackSize())));
1824+
1825+
// push {lr} - Save return address of this function.
1826+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tPUSH)))
1827+
.addReg(ARM::LR);
1828+
1829+
// Call __morestack().
1830+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tBL)))
1831+
.addExternalSymbol("__morestack");
1832+
1833+
// Restore return address of this original function.
1834+
// pop {SR0}
1835+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tPOP)))
1836+
.addReg(ScratchReg0);
1837+
1838+
// mov lr, SR0
1839+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
1840+
.addReg(ScratchReg0));
1841+
1842+
// Restore SR0 and SR1 in case of __morestack() was called.
1843+
// __morestack() will skip postStackMBB block so we need to restore
1844+
// scratch registers from here.
1845+
// pop {SR0, SR1}
1846+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tPOP)))
1847+
.addReg(ScratchReg0)
1848+
.addReg(ScratchReg1);
1849+
1850+
// Return from this function.
1851+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tMOVr), ARM::PC)
1852+
.addReg(ARM::LR));
1853+
1854+
// Restore SR0 and SR1 in case of __morestack() was not called.
1855+
// pop {SR0, SR1}
1856+
AddDefaultPred(BuildMI(postStackMBB, DL, TII.get(ARM::tPOP)))
1857+
.addReg(ScratchReg0)
1858+
.addReg(ScratchReg1);
1859+
1860+
// Organizing MBB lists
1861+
postStackMBB->addSuccessor(&prologueMBB);
1862+
1863+
allocMBB->addSuccessor(postStackMBB);
1864+
1865+
getMBB->addSuccessor(postStackMBB);
1866+
getMBB->addSuccessor(allocMBB);
1867+
1868+
mcrMBB->addSuccessor(getMBB);
1869+
1870+
prevStackMBB->addSuccessor(mcrMBB);
1871+
1872+
#ifdef XDEBUG
1873+
MF.verify();
1874+
#endif
1875+
}

lib/Target/ARM/ARMFrameLowering.h

+2
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ class ARMFrameLowering : public TargetFrameLowering {
6262
RegScavenger *RS) const;
6363

6464
void adjustForSegmentedStacks(MachineFunction &MF) const;
65+
void adjustForSegmentedStacksThumb(MachineFunction &MF) const;
66+
void adjustForSegmentedStacksARM(MachineFunction &MF) const;
6567

6668
private:
6769
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; RUN: llc < %s -mcpu=generic -mtriple=thumb-linux-android -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-Linux-Android
2+
; RUN: llc < %s -mcpu=generic -mtriple=thumb-linux-android -segmented-stacks -filetype=obj
3+
4+
; Just to prevent the alloca from being optimized away
5+
declare void @dummy_use(i32*, i32)
6+
7+
define i32 @test_basic(i32 %l) {
8+
%mem = alloca i32, i32 %l
9+
call void @dummy_use (i32* %mem, i32 %l)
10+
%terminate = icmp eq i32 %l, 0
11+
br i1 %terminate, label %true, label %false
12+
13+
true:
14+
ret i32 0
15+
16+
false:
17+
%newlen = sub i32 %l, 1
18+
%retvalue = call i32 @test_basic(i32 %newlen)
19+
ret i32 %retvalue
20+
21+
; Thumb-Linux-Android: test_basic:
22+
23+
; Thumb-Linux-Android: push {r4, r5}
24+
; Thumb-Linux-Android-NEXT: mov r5, sp
25+
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI0_0
26+
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
27+
; Thumb-Linux-Android-NEXT: cmp r4, r5
28+
; Thumb-Linux-Android-NEXT: blo .LBB0_2
29+
30+
; Thumb-Linux-Android: mov r4, #16
31+
; Thumb-Linux-Android-NEXT: mov r5, #0
32+
; Thumb-Linux-Android-NEXT: push {lr}
33+
; Thumb-Linux-Android-NEXT: bl __morestack
34+
; Thumb-Linux-Android-NEXT: pop {r4}
35+
; Thumb-Linux-Android-NEXT: mov lr, r4
36+
; Thumb-Linux-Android-NEXT: pop {r4, r5}
37+
; Thumb-Linux-Android-NEXT: mov pc, lr
38+
39+
; Thumb-Linux-Android: pop {r4, r5}
40+
41+
}
+145
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
; RUN: llc < %s -mcpu=generic -mtriple=thumb-linux-android -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-Linux-Android
2+
3+
; We used to crash with filetype=obj
4+
; RUN: llc < %s -mcpu=generic -mtriple=thumb-linux-android -segmented-stacks -filetype=obj
5+
6+
7+
; Just to prevent the alloca from being optimized away
8+
declare void @dummy_use(i32*, i32)
9+
10+
define void @test_basic() {
11+
%mem = alloca i32, i32 10
12+
call void @dummy_use (i32* %mem, i32 10)
13+
ret void
14+
15+
; Thumb-Linux-Android: test_basic:
16+
17+
; Thumb-Linux-Android: push {r4, r5}
18+
; Thumb-Linux-Android-NEXT: mov r5, sp
19+
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI0_0
20+
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
21+
; Thumb-Linux-Android-NEXT: cmp r4, r5
22+
; Thumb-Linux-Android-NEXT: blo .LBB0_2
23+
24+
; Thumb-Linux-Android: mov r4, #48
25+
; Thumb-Linux-Android-NEXT: mov r5, #0
26+
; Thumb-Linux-Android-NEXT: push {lr}
27+
; Thumb-Linux-Android-NEXT: bl __morestack
28+
; Thumb-Linux-Android-NEXT: pop {r4}
29+
; Thumb-Linux-Android-NEXT: mov lr, r4
30+
; Thumb-Linux-Android-NEXT: pop {r4, r5}
31+
; Thumb-Linux-Android-NEXT: mov pc, lr
32+
33+
; Thumb-Linux-Android: pop {r4, r5}
34+
35+
}
36+
37+
define i32 @test_nested(i32 * nest %closure, i32 %other) {
38+
%addend = load i32 * %closure
39+
%result = add i32 %other, %addend
40+
ret i32 %result
41+
42+
; Thumb-Linux-Android: test_nested:
43+
44+
; Thumb-Linux-Android: push {r4, r5}
45+
; Thumb-Linux-Android-NEXT: mov r5, sp
46+
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI1_0
47+
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
48+
; Thumb-Linux-Android-NEXT: cmp r4, r5
49+
; Thumb-Linux-Android-NEXT: blo .LBB1_2
50+
51+
; Thumb-Linux-Android: mov r4, #0
52+
; Thumb-Linux-Android-NEXT: mov r5, #0
53+
; Thumb-Linux-Android-NEXT: push {lr}
54+
; Thumb-Linux-Android-NEXT: bl __morestack
55+
; Thumb-Linux-Android-NEXT: pop {r4}
56+
; Thumb-Linux-Android-NEXT: mov lr, r4
57+
; Thumb-Linux-Android-NEXT: pop {r4, r5}
58+
; Thumb-Linux-Android-NEXT: mov pc, lr
59+
60+
; Thumb-Linux-Android: pop {r4, r5}
61+
62+
}
63+
64+
define void @test_large() {
65+
%mem = alloca i32, i32 10000
66+
call void @dummy_use (i32* %mem, i32 0)
67+
ret void
68+
69+
; Thumb-Linux-Android: test_large:
70+
71+
; Thumb-Linux-Android: push {r4, r5}
72+
; Thumb-Linux-Android-NEXT: mov r5, sp
73+
; Thumb-Linux-Android-NEXT: sub r5, #40192
74+
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI2_2
75+
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
76+
; Thumb-Linux-Android-NEXT: cmp r4, r5
77+
; Thumb-Linux-Android-NEXT: blo .LBB2_2
78+
79+
; Thumb-Linux-Android: mov r4, #40192
80+
; Thumb-Linux-Android-NEXT: mov r5, #0
81+
; Thumb-Linux-Android-NEXT: push {lr}
82+
; Thumb-Linux-Android-NEXT: bl __morestack
83+
; Thumb-Linux-Android-NEXT: pop {r4}
84+
; Thumb-Linux-Android-NEXT: mov lr, r4
85+
; Thumb-Linux-Android-NEXT: pop {r4, r5}
86+
; Thumb-Linux-Android-NEXT: mov pc, lr
87+
88+
; Thumb-Linux-Android: pop {r4, r5}
89+
90+
}
91+
92+
define fastcc void @test_fastcc() {
93+
%mem = alloca i32, i32 10
94+
call void @dummy_use (i32* %mem, i32 10)
95+
ret void
96+
97+
; Thumb-Linux-Android: test_fastcc:
98+
99+
; Thumb-Linux-Android: push {r4, r5}
100+
; Thumb-Linux-Android-NEXT: mov r5, sp
101+
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI3_0
102+
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
103+
; Thumb-Linux-Android-NEXT: cmp r4, r5
104+
; Thumb-Linux-Android-NEXT: blo .LBB3_2
105+
106+
; Thumb-Linux-Android: mov r4, #48
107+
; Thumb-Linux-Android-NEXT: mov r5, #0
108+
; Thumb-Linux-Android-NEXT: push {lr}
109+
; Thumb-Linux-Android-NEXT: bl __morestack
110+
; Thumb-Linux-Android-NEXT: pop {r4}
111+
; Thumb-Linux-Android-NEXT: mov lr, r4
112+
; Thumb-Linux-Android-NEXT: pop {r4, r5}
113+
; Thumb-Linux-Android-NEXT: mov pc, lr
114+
115+
; Thumb-Linux-Android: pop {r4, r5}
116+
117+
}
118+
119+
define fastcc void @test_fastcc_large() {
120+
%mem = alloca i32, i32 10000
121+
call void @dummy_use (i32* %mem, i32 0)
122+
ret void
123+
124+
; Thumb-Linux-Android: test_fastcc_large:
125+
126+
; Thumb-Linux-Android: push {r4, r5}
127+
; Thumb-Linux-Android-NEXT: mov r5, sp
128+
; Thumb-Linux-Android-NEXT: sub r5, #40192
129+
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI4_2
130+
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
131+
; Thumb-Linux-Android-NEXT: cmp r4, r5
132+
; Thumb-Linux-Android-NEXT: blo .LBB4_2
133+
134+
; Thumb-Linux-Android: mov r4, #40192
135+
; Thumb-Linux-Android-NEXT: mov r5, #0
136+
; Thumb-Linux-Android-NEXT: push {lr}
137+
; Thumb-Linux-Android-NEXT: bl __morestack
138+
; Thumb-Linux-Android-NEXT: pop {r4}
139+
; Thumb-Linux-Android-NEXT: mov lr, r4
140+
; Thumb-Linux-Android-NEXT: pop {r4, r5}
141+
; Thumb-Linux-Android-NEXT: mov pc, lr
142+
143+
; Thumb-Linux-Android: pop {r4, r5}
144+
145+
}

0 commit comments

Comments
 (0)