Skip to content

Commit 2a50921

Browse files
authored
[AIX][TLS] Optimize the small local-exec access sequence for non-zero offsets (#71485)
This patch utilizes the -maix-small-local-exec-tls option to produce a faster, non-TOC-based access sequence for the local-exec TLS model. Specifically, for when the offsets from the TLS variable are non-zero. In particular, this patch produces either a single: - addi/la with a displacement off of R13 plus a non-zero offset for when an address is calculated, or - load or store off of R13 plus a non-zero offset for when an address is calculated and used for further access where R13 is the thread pointer, respectively. In order to produce a single addi or load/store off of the thread pointer with a non-zero offset, this patch also adds the necessary support in the assembly printer when printing these instructions. Specifically: - The non-zero offset is added to the TLS variable address when the address of the TLS variable + it's offset is less than 32KB. - Otherwise, when the address of the TLS variable + its offset is greater than 32KB, the non-zero offset (and a multiple of 64KB) is subtracted from the TLS address. This handling in the assembly printer is necessary to ensure that the TLS address + the non-zero offset is between [-32768, 32768), so that the total displacement can fit within the addi/load/store instructions. This patch is meant to be a follow-up to 3f46e54 (where the optimization occurs for when the offset is zero).
1 parent 15295d0 commit 2a50921

9 files changed

+515
-155
lines changed

llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp

Lines changed: 142 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,10 @@
6666
#include "llvm/Support/Debug.h"
6767
#include "llvm/Support/Error.h"
6868
#include "llvm/Support/ErrorHandling.h"
69+
#include "llvm/Support/MathExtras.h"
6970
#include "llvm/Support/Process.h"
70-
#include "llvm/Support/raw_ostream.h"
7171
#include "llvm/Support/Threading.h"
72+
#include "llvm/Support/raw_ostream.h"
7273
#include "llvm/Target/TargetMachine.h"
7374
#include "llvm/TargetParser/Triple.h"
7475
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -155,6 +156,11 @@ class PPCAsmPrinter : public AsmPrinter {
155156
TOC;
156157
const PPCSubtarget *Subtarget = nullptr;
157158

159+
// Keep track of the number of TLS variables and their corresponding
160+
// addresses, which is then used for the assembly printing of
161+
// non-TOC-based local-exec variables.
162+
MapVector<const GlobalValue *, uint64_t> TLSVarsToAddressMapping;
163+
158164
public:
159165
explicit PPCAsmPrinter(TargetMachine &TM,
160166
std::unique_ptr<MCStreamer> Streamer)
@@ -199,6 +205,8 @@ class PPCAsmPrinter : public AsmPrinter {
199205
void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
200206
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
201207
void EmitAIXTlsCallHelper(const MachineInstr *MI);
208+
const MCExpr *getAdjustedLocalExecExpr(const MachineOperand &MO,
209+
int64_t Offset);
202210
bool runOnMachineFunction(MachineFunction &MF) override {
203211
Subtarget = &MF.getSubtarget<PPCSubtarget>();
204212
bool Changed = AsmPrinter::runOnMachineFunction(MF);
@@ -753,6 +761,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
753761
MCInst TmpInst;
754762
const bool IsPPC64 = Subtarget->isPPC64();
755763
const bool IsAIX = Subtarget->isAIXABI();
764+
const bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
756765
const Module *M = MF->getFunction().getParent();
757766
PICLevel::Level PL = M->getPICLevel();
758767

@@ -1504,12 +1513,70 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
15041513
// Verify alignment is legal, so we don't create relocations
15051514
// that can't be supported.
15061515
unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
1516+
// For non-TOC-based local-exec TLS accesses with non-zero offsets, the
1517+
// machine operand (which is a TargetGlobalTLSAddress) is expected to be
1518+
// the same operand for both loads and stores.
1519+
for (const MachineOperand &TempMO : MI->operands()) {
1520+
if (((TempMO.getTargetFlags() == PPCII::MO_TPREL_FLAG)) &&
1521+
TempMO.getOperandNo() == 1)
1522+
OpNum = 1;
1523+
}
15071524
const MachineOperand &MO = MI->getOperand(OpNum);
15081525
if (MO.isGlobal()) {
15091526
const DataLayout &DL = MO.getGlobal()->getParent()->getDataLayout();
15101527
if (MO.getGlobal()->getPointerAlignment(DL) < 4)
15111528
llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
15121529
}
1530+
// As these load/stores share common code with the following load/stores,
1531+
// fall through to the subsequent cases in order to either process the
1532+
// non-TOC-based local-exec sequence or to process the instruction normally.
1533+
[[fallthrough]];
1534+
}
1535+
case PPC::LBZ:
1536+
case PPC::LBZ8:
1537+
case PPC::LHA:
1538+
case PPC::LHA8:
1539+
case PPC::LHZ:
1540+
case PPC::LHZ8:
1541+
case PPC::LWZ:
1542+
case PPC::LWZ8:
1543+
case PPC::STB:
1544+
case PPC::STB8:
1545+
case PPC::STH:
1546+
case PPC::STH8:
1547+
case PPC::STW:
1548+
case PPC::STW8:
1549+
case PPC::LFS:
1550+
case PPC::STFS:
1551+
case PPC::LFD:
1552+
case PPC::STFD:
1553+
case PPC::ADDI8: {
1554+
// A faster non-TOC-based local-exec sequence is represented by `addi`
1555+
// or a load/store instruction (that directly loads or stores off of the
1556+
// thread pointer) with an immediate operand having the MO_TPREL_FLAG.
1557+
// Such instructions do not otherwise arise.
1558+
if (!HasAIXSmallLocalExecTLS)
1559+
break;
1560+
bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8;
1561+
unsigned OpNum = IsMIADDI8 ? 2 : 1;
1562+
const MachineOperand &MO = MI->getOperand(OpNum);
1563+
unsigned Flag = MO.getTargetFlags();
1564+
if (Flag == PPCII::MO_TPREL_FLAG ||
1565+
Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
1566+
Flag == PPCII::MO_TPREL_PCREL_FLAG) {
1567+
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
1568+
1569+
const MCExpr *Expr = getAdjustedLocalExecExpr(MO, MO.getOffset());
1570+
if (Expr)
1571+
TmpInst.getOperand(OpNum) = MCOperand::createExpr(Expr);
1572+
1573+
// Change the opcode to load address if the original opcode is an `addi`.
1574+
if (IsMIADDI8)
1575+
TmpInst.setOpcode(PPC::LA8);
1576+
1577+
EmitToStreamer(*OutStreamer, TmpInst);
1578+
return;
1579+
}
15131580
// Now process the instruction normally.
15141581
break;
15151582
}
@@ -1523,30 +1590,73 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
15231590
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::EnforceIEIO));
15241591
return;
15251592
}
1526-
case PPC::ADDI8: {
1527-
// The faster non-TOC-based local-exec sequence is represented by `addi`
1528-
// with an immediate operand having the MO_TPREL_FLAG. Such an instruction
1529-
// does not otherwise arise.
1530-
unsigned Flag = MI->getOperand(2).getTargetFlags();
1531-
if (Flag == PPCII::MO_TPREL_FLAG ||
1532-
Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
1533-
Flag == PPCII::MO_TPREL_PCREL_FLAG) {
1534-
assert(
1535-
Subtarget->hasAIXSmallLocalExecTLS() &&
1536-
"addi with thread-pointer only expected with local-exec small TLS");
1537-
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
1538-
TmpInst.setOpcode(PPC::LA8);
1539-
EmitToStreamer(*OutStreamer, TmpInst);
1540-
return;
1541-
}
1542-
break;
1543-
}
15441593
}
15451594

15461595
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
15471596
EmitToStreamer(*OutStreamer, TmpInst);
15481597
}
15491598

1599+
// For non-TOC-based local-exec variables that have a non-zero offset,
1600+
// we need to create a new MCExpr that adds the non-zero offset to the address
1601+
// of the local-exec variable that will be used in either an addi, load or
1602+
// store. However, the final displacement for these instructions must be
1603+
// between [-32768, 32768), so if the TLS address + its non-zero offset is
1604+
// greater than 32KB, a new MCExpr is produced to accommodate this situation.
1605+
const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
1606+
int64_t Offset) {
1607+
// Non-zero offsets (for loads, stores or `addi`) require additional handling.
1608+
// When the offset is zero, there is no need to create an adjusted MCExpr.
1609+
if (!Offset)
1610+
return nullptr;
1611+
1612+
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
1613+
const GlobalValue *GValue = MO.getGlobal();
1614+
assert(TM.getTLSModel(GValue) == TLSModel::LocalExec &&
1615+
"Only local-exec accesses are handled!");
1616+
1617+
bool IsGlobalADeclaration = GValue->isDeclarationForLinker();
1618+
// Find the GlobalVariable that corresponds to the particular TLS variable
1619+
// in the TLS variable-to-address mapping. All TLS variables should exist
1620+
// within this map, with the exception of TLS variables marked as extern.
1621+
const auto TLSVarsMapEntryIter = TLSVarsToAddressMapping.find(GValue);
1622+
if (TLSVarsMapEntryIter == TLSVarsToAddressMapping.end())
1623+
assert(IsGlobalADeclaration &&
1624+
"Only expecting to find extern TLS variables not present in the TLS "
1625+
"variable-to-address map!");
1626+
1627+
unsigned TLSVarAddress =
1628+
IsGlobalADeclaration ? 0 : TLSVarsMapEntryIter->second;
1629+
ptrdiff_t FinalAddress = (TLSVarAddress + Offset);
1630+
// If the address of the TLS variable + the offset is less than 32KB,
1631+
// or if the TLS variable is extern, we simply produce an MCExpr to add the
1632+
// non-zero offset to the TLS variable address.
1633+
// For when TLS variables are extern, this is safe to do because we can
1634+
// assume that the address of extern TLS variables are zero.
1635+
const MCExpr *Expr = MCSymbolRefExpr::create(
1636+
getSymbol(GValue), MCSymbolRefExpr::VK_PPC_AIX_TLSLE, OutContext);
1637+
Expr = MCBinaryExpr::createAdd(
1638+
Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
1639+
if (FinalAddress >= 32768) {
1640+
// Handle the written offset for cases where:
1641+
// TLS variable address + Offset > 32KB.
1642+
1643+
// The assembly that is printed will look like:
1644+
// TLSVar@le + Offset - Delta
1645+
// where Delta is a multiple of 64KB: ((FinalAddress + 32768) & ~0xFFFF).
1646+
ptrdiff_t Delta = ((FinalAddress + 32768) & ~0xFFFF);
1647+
// Check that the total instruction displacement fits within [-32768,32768).
1648+
ptrdiff_t InstDisp = TLSVarAddress + Offset - Delta;
1649+
assert((InstDisp < 32768) ||
1650+
(InstDisp >= -32768) &&
1651+
"Expecting the instruction displacement for local-exec TLS "
1652+
"variables to be between [-32768, 32768)!");
1653+
Expr = MCBinaryExpr::createAdd(
1654+
Expr, MCConstantExpr::create(-Delta, OutContext), OutContext);
1655+
}
1656+
1657+
return Expr;
1658+
}
1659+
15501660
void PPCLinuxAsmPrinter::emitGNUAttributes(Module &M) {
15511661
// Emit float ABI into GNU attribute
15521662
Metadata *MD = M.getModuleFlag("float-abi");
@@ -2772,6 +2882,19 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
27722882
Csect->ensureMinAlignment(GOAlign);
27732883
};
27742884

2885+
// For all TLS variables, calculate their corresponding addresses and store
2886+
// them into TLSVarsToAddressMapping, which will be used to determine whether
2887+
// or not local-exec TLS variables require special assembly printing.
2888+
uint64_t TLSVarAddress = 0;
2889+
auto DL = M.getDataLayout();
2890+
for (const auto &G : M.globals()) {
2891+
if (G.isThreadLocal() && !G.isDeclaration()) {
2892+
TLSVarAddress = alignTo(TLSVarAddress, getGVAlignment(&G, DL));
2893+
TLSVarsToAddressMapping[&G] = TLSVarAddress;
2894+
TLSVarAddress += DL.getTypeAllocSize(G.getValueType());
2895+
}
2896+
}
2897+
27752898
// We need to know, up front, the alignment of csects for the assembly path,
27762899
// because once a .csect directive gets emitted, we could not change the
27772900
// alignment value on it.

llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 107 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7562,8 +7562,98 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
75627562
DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
75637563
}
75647564

7565+
// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
7566+
static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
7567+
SDValue ADDIToFold) {
7568+
// Check if ADDIToFold (the ADDI that we want to fold into local-exec
7569+
// accesses), is truly an ADDI.
7570+
if (!ADDIToFold.isMachineOpcode() ||
7571+
(ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7572+
return false;
7573+
7574+
// The first operand of the ADDIToFold should be the thread pointer.
7575+
// This transformation is only performed if the first operand of the
7576+
// addi is the thread pointer.
7577+
SDValue TPRegNode = ADDIToFold.getOperand(0);
7578+
RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7579+
const PPCSubtarget &Subtarget =
7580+
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
7581+
if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7582+
return false;
7583+
7584+
// The second operand of the ADDIToFold should be the global TLS address
7585+
// (the local-exec TLS variable). We only perform the folding if the TLS
7586+
// variable is the second operand.
7587+
SDValue TLSVarNode = ADDIToFold.getOperand(1);
7588+
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7589+
if (!GA)
7590+
return false;
7591+
7592+
// The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
7593+
// so this optimization is not performed otherwise if the flag is not set.
7594+
unsigned TargetFlags = GA->getTargetFlags();
7595+
if (TargetFlags != PPCII::MO_TPREL_FLAG)
7596+
return false;
7597+
7598+
// If all conditions are satisfied, the ADDI is valid for folding.
7599+
return true;
7600+
}
7601+
7602+
// For non-TOC-based local-exec access where an addi is feeding into another
7603+
// addi, fold this sequence into a single addi if possible.
7604+
// Before this optimization, the sequence appears as:
7605+
// addi rN, r13, sym@le
7606+
// addi rM, rN, imm
7607+
// After this optimization, we can fold the two addi into a single one:
7608+
// addi rM, r13, sym@le + imm
7609+
static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
7610+
if (N->getMachineOpcode() != PPC::ADDI8)
7611+
return;
7612+
7613+
// InitialADDI is the addi feeding into N (also an addi), and the addi that
7614+
// we want optimized out.
7615+
SDValue InitialADDI = N->getOperand(0);
7616+
7617+
if (!isEligibleToFoldADDIForLocalExecAccesses(DAG, InitialADDI))
7618+
return;
7619+
7620+
// At this point, InitialADDI can be folded into a non-TOC-based local-exec
7621+
// access. The first operand of InitialADDI should be the thread pointer,
7622+
// which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
7623+
SDValue TPRegNode = InitialADDI.getOperand(0);
7624+
RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7625+
const PPCSubtarget &Subtarget =
7626+
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
7627+
assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
7628+
"Expecting the first operand to be a thread pointer for folding addi "
7629+
"in local-exec accesses!");
7630+
7631+
// The second operand of the InitialADDI should be the global TLS address
7632+
// (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
7633+
// This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
7634+
SDValue TLSVarNode = InitialADDI.getOperand(1);
7635+
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7636+
assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7637+
"local-exec accesses!");
7638+
unsigned TargetFlags = GA->getTargetFlags();
7639+
7640+
// The second operand of the addi that we want to preserve will be an
7641+
// immediate. We add this immediate, together with the address of the TLS
7642+
// variable found in InitialADDI, in order to preserve the correct TLS address
7643+
// information during assembly printing. The offset is likely to be non-zero
7644+
// when we end up in this case.
7645+
int Offset = N->getConstantOperandVal(1);
7646+
TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
7647+
Offset, TargetFlags);
7648+
7649+
(void)DAG->UpdateNodeOperands(N, TPRegNode, TLSVarNode);
7650+
if (InitialADDI.getNode()->use_empty())
7651+
DAG->RemoveDeadNode(InitialADDI.getNode());
7652+
}
7653+
75657654
void PPCDAGToDAGISel::PeepholePPC64() {
75667655
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7656+
bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
75677657

75687658
while (Position != CurDAG->allnodes_begin()) {
75697659
SDNode *N = &*--Position;
@@ -7574,6 +7664,10 @@ void PPCDAGToDAGISel::PeepholePPC64() {
75747664
if (isVSXSwap(SDValue(N, 0)))
75757665
reduceVSXSwap(N, CurDAG);
75767666

7667+
// This optimization is performed for non-TOC-based local-exec accesses.
7668+
if (HasAIXSmallLocalExecTLS)
7669+
foldADDIForLocalExecAccesses(N, CurDAG);
7670+
75777671
unsigned FirstOp;
75787672
unsigned StorageOpcode = N->getMachineOpcode();
75797673
bool RequiresMod4Offset = false;
@@ -7730,7 +7824,19 @@ void PPCDAGToDAGISel::PeepholePPC64() {
77307824
ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
77317825
ImmOpnd.getValueType());
77327826
} else if (Offset != 0) {
7733-
continue;
7827+
// This optimization is performed for non-TOC-based local-exec accesses.
7828+
if (HasAIXSmallLocalExecTLS &&
7829+
isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) {
7830+
// Add the non-zero offset information into the load or store
7831+
// instruction to be used for non-TOC-based local-exec accesses.
7832+
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
7833+
assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7834+
"addi into local-exec accesses!");
7835+
ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
7836+
MVT::i64, Offset,
7837+
GA->getTargetFlags());
7838+
} else
7839+
continue;
77347840
}
77357841
}
77367842

llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,12 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
1616
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
1717
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
1818
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
19-
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, c[TL]@le(r13)
20-
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 1
19+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, c[TL]@le+1(r13)
2120
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
2221
;
2322
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
2423
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
25-
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, c[TL]@le(r13)
26-
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 1
24+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, c[TL]@le+1(r13)
2725
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
2826
entry:
2927
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @c)

llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,12 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
1616
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
1717
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
1818
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
19-
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, f[TL]@le(r13)
20-
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 48
19+
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, f[TL]@le+48(r13)
2120
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
2221
;
2322
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
2423
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
25-
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, f[TL]@le(r13)
26-
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 48
24+
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, f[TL]@le+48(r13)
2725
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
2826
entry:
2927
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @f)

0 commit comments

Comments
 (0)