Skip to content

Commit 3f46e54

Browse files
committed
[AIX][TLS] Produce a faster local-exec access sequence with -maix-small-local-exec-tls (And optimize when load/store offsets are 0)
This patch utilizes the -maix-small-local-exec-tls option added in D155544 to produce a faster access sequence for the local-exec TLS model, where loading from the TOC can be avoided. The patch either produces an addi/la with a displacement off of r13 (the thread pointer) when the address is calculated, or it produces an addi/la followed by a load/store when the address is calculated and used for further accesses. This patch also optimizes this sequence a bit more where we can remove the addi/la when the load/store offset is 0. A follow up patch will be posted to account for when the load/store offset is non-zero, and currently in these situations we keep the addi/la that precedes the load/store. Furthermore, this access sequence is only performed for TLS variables that are less than ~32KB in size. Differential Revision: https://reviews.llvm.org/D155600
1 parent 8bdbee8 commit 3f46e54

14 files changed

+1548
-24
lines changed

llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
6969
return {XCOFF::RelocationType::R_TOCU, SignAndSizeForHalf16};
7070
case MCSymbolRefExpr::VK_PPC_L:
7171
return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16};
72+
case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
73+
return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForHalf16};
7274
}
7375
} break;
7476
case PPC::fixup_ppc_half16ds:
@@ -82,6 +84,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
8284
return {XCOFF::RelocationType::R_TOC, 15};
8385
case MCSymbolRefExpr::VK_PPC_L:
8486
return {XCOFF::RelocationType::R_TOCL, 15};
87+
case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
88+
return {XCOFF::RelocationType::R_TLS_LE, 15};
8589
}
8690
} break;
8791
case PPC::fixup_ppc_br24:

llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,6 +1534,22 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
15341534
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::EnforceIEIO));
15351535
return;
15361536
}
1537+
case PPC::ADDI8: {
1538+
// The faster non-TOC-based local-exec sequence is represented by `addi`
1539+
// with an immediate operand having the MO_TPREL_FLAG. Such an instruction
1540+
// does not otherwise arise.
1541+
const MachineOperand &MO = MI->getOperand(2);
1542+
if ((MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) != 0) {
1543+
assert(
1544+
Subtarget->hasAIXSmallLocalExecTLS() &&
1545+
"addi with thread-pointer only expected with local-exec small TLS");
1546+
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
1547+
TmpInst.setOpcode(PPC::LA8);
1548+
EmitToStreamer(*OutStreamer, TmpInst);
1549+
return;
1550+
}
1551+
break;
1552+
}
15371553
}
15381554

15391555
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);

llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7654,13 +7654,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
76547654
// is already in place on the operand, so copying the operand
76557655
// is sufficient.
76567656
ReplaceFlags = false;
7657-
// For these cases, the immediate may not be divisible by 4, in
7658-
// which case the fold is illegal for DS-form instructions. (The
7659-
// other cases provide aligned addresses and are always safe.)
7660-
if (RequiresMod4Offset &&
7661-
(!isa<ConstantSDNode>(Base.getOperand(1)) ||
7662-
Base.getConstantOperandVal(1) % 4 != 0))
7663-
continue;
76647657
break;
76657658
case PPC::ADDIdtprelL:
76667659
Flags = PPCII::MO_DTPREL_LO;
@@ -7712,6 +7705,18 @@ void PPCDAGToDAGISel::PeepholePPC64() {
77127705
UpdateHBase = true;
77137706
}
77147707
} else {
7708+
// Global addresses can be folded, but only if they are sufficiently
7709+
// aligned.
7710+
if (RequiresMod4Offset) {
7711+
if (GlobalAddressSDNode *GA =
7712+
dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7713+
const GlobalValue *GV = GA->getGlobal();
7714+
Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7715+
if (Alignment < 4)
7716+
continue;
7717+
}
7718+
}
7719+
77157720
// If we're directly folding the addend from an addi instruction, then:
77167721
// 1. In general, the offset on the memory access must be zero.
77177722
// 2. If the addend is a constant, then it can be combined with a

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,12 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
148148

149149
static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
150150

151+
// A faster local-exec TLS access sequence (enabled with the
152+
// -maix-small-local-exec-tls option) can be produced for TLS variables;
153+
// consistent with the IBM XL compiler, we apply a max size of slightly under
154+
// 32KB.
155+
constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
156+
151157
// FIXME: Remove this once the bug has been fixed!
152158
extern cl::opt<bool> ANDIGlueBug;
153159

@@ -3355,22 +3361,39 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
33553361
const GlobalValue *GV = GA->getGlobal();
33563362
EVT PtrVT = getPointerTy(DAG.getDataLayout());
33573363
bool Is64Bit = Subtarget.isPPC64();
3364+
bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
33583365
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3366+
bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
33593367

3360-
if (Model == TLSModel::LocalExec || Model == TLSModel::InitialExec) {
3368+
if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
33613369
SDValue VariableOffsetTGA =
33623370
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
33633371
SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
33643372
SDValue TLSReg;
3365-
if (Is64Bit)
3373+
if (Is64Bit) {
33663374
// For local-exec and initial-exec on AIX (64-bit), the sequence generated
33673375
// involves a load of the variable offset (from the TOC), followed by an
33683376
// add of the loaded variable offset to R13 (the thread pointer).
33693377
// This code sequence looks like:
33703378
// ld reg1,var[TC](2)
33713379
// add reg2, reg1, r13 // r13 contains the thread pointer
33723380
TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3373-
else
3381+
3382+
// With the -maix-small-local-exec-tls option, produce a faster access
3383+
// sequence for local-exec TLS variables where the offset from the TLS
3384+
// base is encoded as an immediate operand.
3385+
//
3386+
// We only utilize the faster local-exec access sequence when the TLS
3387+
// variable has a size within the policy limit. We treat types that are
3388+
// not sized or are empty as being over the policy size limit.
3389+
if (HasAIXSmallLocalExecTLS && IsTLSLocalExecModel) {
3390+
Type *GVType = GV->getValueType();
3391+
if (GVType->isSized() && !GVType->isEmptyTy() &&
3392+
GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
3393+
AIXSmallTlsPolicySizeLimit)
3394+
return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3395+
}
3396+
} else {
33743397
// For local-exec and initial-exec on AIX (32-bit), the sequence generated
33753398
// involves loading the variable offset from the TOC, generating a call to
33763399
// .__get_tpointer to get the thread pointer (which will be in R3), and
@@ -3379,6 +3402,13 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
33793402
// bla .__get_tpointer
33803403
// add reg2, reg1, r3
33813404
TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3405+
3406+
// We do not implement the 32-bit version of the faster access sequence
3407+
// for local-exec that is controlled by -maix-small-local-exec-tls.
3408+
if (HasAIXSmallLocalExecTLS)
3409+
report_fatal_error("The small-local-exec TLS access sequence is "
3410+
"currently only supported on AIX (64-bit mode).");
3411+
}
33823412
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
33833413
}
33843414

llvm/lib/Target/PowerPC/PPCMCInstLower.cpp

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,19 @@ using namespace llvm;
3131

3232
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
3333
AsmPrinter &AP) {
34-
const TargetMachine &TM = AP.TM;
35-
Mangler &Mang = TM.getObjFileLowering()->getMangler();
36-
const DataLayout &DL = AP.getDataLayout();
37-
MCContext &Ctx = AP.OutContext;
38-
39-
SmallString<128> Name;
40-
if (!MO.isGlobal()) {
41-
assert(MO.isSymbol() && "Isn't a symbol reference");
42-
Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
43-
} else {
34+
if (MO.isGlobal()) {
4435
const GlobalValue *GV = MO.getGlobal();
45-
TM.getNameWithPrefix(Name, GV, Mang);
36+
return AP.getSymbol(GV);
4637
}
4738

48-
MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
39+
assert(MO.isSymbol() && "Isn't a symbol reference");
40+
41+
SmallString<128> Name;
42+
const DataLayout &DL = AP.getDataLayout();
43+
Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
4944

45+
MCContext &Ctx = AP.OutContext;
46+
MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
5047
return Sym;
5148
}
5249

@@ -80,6 +77,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
8077
break;
8178
}
8279

80+
const TargetMachine &TM = Printer.TM;
81+
8382
if (MO.getTargetFlags() == PPCII::MO_PLT)
8483
RefKind = MCSymbolRefExpr::VK_PLT;
8584
else if (MO.getTargetFlags() == PPCII::MO_PCREL_FLAG)
@@ -94,12 +93,21 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
9493
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL;
9594
else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG)
9695
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL;
96+
else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG) {
97+
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
98+
TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
99+
// For the local-exec TLS model, we may generate the offset from the TLS
100+
// base as an immediate operand (instead of using a TOC entry).
101+
// Set the relocation type in case the result is used for purposes other
102+
// than a TOC reference. In TOC reference cases, this result is discarded.
103+
if (Model == TLSModel::LocalExec)
104+
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
105+
}
97106

98107
const MachineInstr *MI = MO.getParent();
99108
const MachineFunction *MF = MI->getMF();
100109
const Module *M = MF->getFunction().getParent();
101110
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
102-
const TargetMachine &TM = Printer.TM;
103111

104112
unsigned MIOpcode = MI->getOpcode();
105113
assert((Subtarget->isUsingPCRelativeCalls() || MIOpcode != PPC::BL8_NOTOC) &&
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
3+
; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \
4+
; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64
5+
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
6+
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
7+
; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \
8+
; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64
9+
10+
@ThreadLocalVarInit = thread_local(localexec) global i8 1, align 1
11+
@VarInit = local_unnamed_addr global i8 87, align 1
12+
@IThreadLocalVarInit = internal thread_local(localexec) global i8 1, align 1
13+
declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
14+
@c = thread_local(localexec) global [87 x i8] zeroinitializer, align 1
15+
16+
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
17+
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
18+
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
19+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, c[TL]@le(r13)
20+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 1
21+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
22+
;
23+
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
24+
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
25+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, c[TL]@le(r13)
26+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 1
27+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
28+
entry:
29+
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @c)
30+
%arrayidx = getelementptr inbounds [87 x i8], ptr %0, i64 0, i64 1
31+
ret ptr %arrayidx
32+
}
33+
34+
define void @storeITLInit(i8 noundef zeroext %x) {
35+
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit:
36+
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
37+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13)
38+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
39+
;
40+
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit:
41+
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
42+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13)
43+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
44+
entry:
45+
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
46+
store i8 %x, ptr %0, align 1
47+
ret void
48+
}
49+
50+
define void @storeTLInit(i8 noundef zeroext %x) {
51+
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit:
52+
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
53+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, ThreadLocalVarInit[TL]@le(r13)
54+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
55+
;
56+
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit:
57+
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
58+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, ThreadLocalVarInit[TL]@le(r13)
59+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
60+
entry:
61+
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
62+
store i8 %x, ptr %0, align 1
63+
ret void
64+
}
65+
66+
define zeroext i8 @loadITLInit() {
67+
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit:
68+
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
69+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13)
70+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
71+
;
72+
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit:
73+
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
74+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13)
75+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
76+
entry:
77+
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
78+
%1 = load i8, ptr %0, align 1
79+
ret i8 %1
80+
}
81+
82+
define zeroext i8 @loadITLInit2() {
83+
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2:
84+
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
85+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit
86+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r4, IThreadLocalVarInit[TL]@le(r13)
87+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, 0(r3)
88+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4
89+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 56
90+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
91+
;
92+
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2:
93+
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
94+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2)
95+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r4, IThreadLocalVarInit[TL]@le(r13)
96+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3)
97+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, 0(r3)
98+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4
99+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 56
100+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
101+
entry:
102+
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
103+
%1 = load i8, ptr %0, align 1
104+
%2 = load i8, ptr @VarInit, align 1
105+
%add = add i8 %2, %1
106+
ret i8 %add
107+
}
108+
109+
define zeroext i8 @loadTLInit() {
110+
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit:
111+
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
112+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, ThreadLocalVarInit[TL]@le(r13)
113+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
114+
;
115+
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit:
116+
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
117+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, ThreadLocalVarInit[TL]@le(r13)
118+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
119+
entry:
120+
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
121+
%1 = load i8, ptr %0, align 1
122+
ret i8 %1
123+
}
124+
125+
define zeroext i8 @loadTLInit2() {
126+
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2:
127+
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
128+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit
129+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r4, ThreadLocalVarInit[TL]@le(r13)
130+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, 0(r3)
131+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4
132+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 56
133+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
134+
;
135+
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2:
136+
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
137+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2)
138+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r4, ThreadLocalVarInit[TL]@le(r13)
139+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3)
140+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, 0(r3)
141+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4
142+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 56
143+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
144+
entry:
145+
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
146+
%1 = load i8, ptr %0, align 1
147+
%2 = load i8, ptr @VarInit, align 1
148+
%add = add i8 %2, %1
149+
ret i8 %add
150+
}
151+
152+
define void @loadStore1(i8 noundef zeroext %x) {
153+
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1:
154+
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
155+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13)
156+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 9
157+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13)
158+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
159+
;
160+
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1:
161+
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
162+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13)
163+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 9
164+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13)
165+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
166+
entry:
167+
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
168+
%1 = load i8, ptr %0, align 1
169+
%add = add i8 %1, 9
170+
store i8 %add, ptr %0, align 1
171+
ret void
172+
}

0 commit comments

Comments
 (0)