66
66
#include " llvm/Support/Debug.h"
67
67
#include " llvm/Support/Error.h"
68
68
#include " llvm/Support/ErrorHandling.h"
69
+ #include " llvm/Support/MathExtras.h"
69
70
#include " llvm/Support/Process.h"
70
- #include " llvm/Support/raw_ostream.h"
71
71
#include " llvm/Support/Threading.h"
72
+ #include " llvm/Support/raw_ostream.h"
72
73
#include " llvm/Target/TargetMachine.h"
73
74
#include " llvm/TargetParser/Triple.h"
74
75
#include " llvm/Transforms/Utils/ModuleUtils.h"
@@ -155,6 +156,11 @@ class PPCAsmPrinter : public AsmPrinter {
155
156
TOC;
156
157
const PPCSubtarget *Subtarget = nullptr ;
157
158
159
+ // Keep track of the number of TLS variables and their corresponding
160
+ // addresses, which is then used for the assembly printing of
161
+ // non-TOC-based local-exec variables.
162
+ MapVector<const GlobalValue *, uint64_t > TLSVarsToAddressMapping;
163
+
158
164
public:
159
165
explicit PPCAsmPrinter (TargetMachine &TM,
160
166
std::unique_ptr<MCStreamer> Streamer)
@@ -199,6 +205,8 @@ class PPCAsmPrinter : public AsmPrinter {
199
205
void LowerPATCHPOINT (StackMaps &SM, const MachineInstr &MI);
200
206
void EmitTlsCall (const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
201
207
void EmitAIXTlsCallHelper (const MachineInstr *MI);
208
+ const MCExpr *getAdjustedLocalExecExpr (const MachineOperand &MO,
209
+ int64_t Offset);
202
210
bool runOnMachineFunction (MachineFunction &MF) override {
203
211
Subtarget = &MF.getSubtarget <PPCSubtarget>();
204
212
bool Changed = AsmPrinter::runOnMachineFunction (MF);
@@ -753,6 +761,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
753
761
MCInst TmpInst;
754
762
const bool IsPPC64 = Subtarget->isPPC64 ();
755
763
const bool IsAIX = Subtarget->isAIXABI ();
764
+ const bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS ();
756
765
const Module *M = MF->getFunction ().getParent ();
757
766
PICLevel::Level PL = M->getPICLevel ();
758
767
@@ -1504,12 +1513,70 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
1504
1513
// Verify alignment is legal, so we don't create relocations
1505
1514
// that can't be supported.
1506
1515
unsigned OpNum = (MI->getOpcode () == PPC::STD) ? 2 : 1 ;
1516
+ // For non-TOC-based local-exec TLS accesses with non-zero offsets, the
1517
+ // machine operand (which is a TargetGlobalTLSAddress) is expected to be
1518
+ // the same operand for both loads and stores.
1519
+ for (const MachineOperand &TempMO : MI->operands ()) {
1520
+ if (((TempMO.getTargetFlags () == PPCII::MO_TPREL_FLAG)) &&
1521
+ TempMO.getOperandNo () == 1 )
1522
+ OpNum = 1 ;
1523
+ }
1507
1524
const MachineOperand &MO = MI->getOperand (OpNum);
1508
1525
if (MO.isGlobal ()) {
1509
1526
const DataLayout &DL = MO.getGlobal ()->getParent ()->getDataLayout ();
1510
1527
if (MO.getGlobal ()->getPointerAlignment (DL) < 4 )
1511
1528
llvm_unreachable (" Global must be word-aligned for LD, STD, LWA!" );
1512
1529
}
1530
+ // As these load/stores share common code with the following load/stores,
1531
+ // fall through to the subsequent cases in order to either process the
1532
+ // non-TOC-based local-exec sequence or to process the instruction normally.
1533
+ [[fallthrough]];
1534
+ }
1535
+ case PPC::LBZ:
1536
+ case PPC::LBZ8:
1537
+ case PPC::LHA:
1538
+ case PPC::LHA8:
1539
+ case PPC::LHZ:
1540
+ case PPC::LHZ8:
1541
+ case PPC::LWZ:
1542
+ case PPC::LWZ8:
1543
+ case PPC::STB:
1544
+ case PPC::STB8:
1545
+ case PPC::STH:
1546
+ case PPC::STH8:
1547
+ case PPC::STW:
1548
+ case PPC::STW8:
1549
+ case PPC::LFS:
1550
+ case PPC::STFS:
1551
+ case PPC::LFD:
1552
+ case PPC::STFD:
1553
+ case PPC::ADDI8: {
1554
+ // A faster non-TOC-based local-exec sequence is represented by `addi`
1555
+ // or a load/store instruction (that directly loads or stores off of the
1556
+ // thread pointer) with an immediate operand having the MO_TPREL_FLAG.
1557
+ // Such instructions do not otherwise arise.
1558
+ if (!HasAIXSmallLocalExecTLS)
1559
+ break ;
1560
+ bool IsMIADDI8 = MI->getOpcode () == PPC::ADDI8;
1561
+ unsigned OpNum = IsMIADDI8 ? 2 : 1 ;
1562
+ const MachineOperand &MO = MI->getOperand (OpNum);
1563
+ unsigned Flag = MO.getTargetFlags ();
1564
+ if (Flag == PPCII::MO_TPREL_FLAG ||
1565
+ Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
1566
+ Flag == PPCII::MO_TPREL_PCREL_FLAG) {
1567
+ LowerPPCMachineInstrToMCInst (MI, TmpInst, *this );
1568
+
1569
+ const MCExpr *Expr = getAdjustedLocalExecExpr (MO, MO.getOffset ());
1570
+ if (Expr)
1571
+ TmpInst.getOperand (OpNum) = MCOperand::createExpr (Expr);
1572
+
1573
+ // Change the opcode to load address if the original opcode is an `addi`.
1574
+ if (IsMIADDI8)
1575
+ TmpInst.setOpcode (PPC::LA8);
1576
+
1577
+ EmitToStreamer (*OutStreamer, TmpInst);
1578
+ return ;
1579
+ }
1513
1580
// Now process the instruction normally.
1514
1581
break ;
1515
1582
}
@@ -1523,30 +1590,73 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
1523
1590
EmitToStreamer (*OutStreamer, MCInstBuilder (PPC::EnforceIEIO));
1524
1591
return ;
1525
1592
}
1526
- case PPC::ADDI8: {
1527
- // The faster non-TOC-based local-exec sequence is represented by `addi`
1528
- // with an immediate operand having the MO_TPREL_FLAG. Such an instruction
1529
- // does not otherwise arise.
1530
- unsigned Flag = MI->getOperand (2 ).getTargetFlags ();
1531
- if (Flag == PPCII::MO_TPREL_FLAG ||
1532
- Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
1533
- Flag == PPCII::MO_TPREL_PCREL_FLAG) {
1534
- assert (
1535
- Subtarget->hasAIXSmallLocalExecTLS () &&
1536
- " addi with thread-pointer only expected with local-exec small TLS" );
1537
- LowerPPCMachineInstrToMCInst (MI, TmpInst, *this );
1538
- TmpInst.setOpcode (PPC::LA8);
1539
- EmitToStreamer (*OutStreamer, TmpInst);
1540
- return ;
1541
- }
1542
- break ;
1543
- }
1544
1593
}
1545
1594
1546
1595
LowerPPCMachineInstrToMCInst (MI, TmpInst, *this );
1547
1596
EmitToStreamer (*OutStreamer, TmpInst);
1548
1597
}
1549
1598
1599
+ // For non-TOC-based local-exec variables that have a non-zero offset,
1600
+ // we need to create a new MCExpr that adds the non-zero offset to the address
1601
+ // of the local-exec variable that will be used in either an addi, load or
1602
+ // store. However, the final displacement for these instructions must be
1603
+ // between [-32768, 32768), so if the TLS address + its non-zero offset is
1604
+ // greater than 32KB, a new MCExpr is produced to accommodate this situation.
1605
+ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr (const MachineOperand &MO,
1606
+ int64_t Offset) {
1607
+ // Non-zero offsets (for loads, stores or `addi`) require additional handling.
1608
+ // When the offset is zero, there is no need to create an adjusted MCExpr.
1609
+ if (!Offset)
1610
+ return nullptr ;
1611
+
1612
+ assert (MO.isGlobal () && " Only expecting a global MachineOperand here!" );
1613
+ const GlobalValue *GValue = MO.getGlobal ();
1614
+ assert (TM.getTLSModel (GValue) == TLSModel::LocalExec &&
1615
+ " Only local-exec accesses are handled!" );
1616
+
1617
+ bool IsGlobalADeclaration = GValue->isDeclarationForLinker ();
1618
+ // Find the GlobalVariable that corresponds to the particular TLS variable
1619
+ // in the TLS variable-to-address mapping. All TLS variables should exist
1620
+ // within this map, with the exception of TLS variables marked as extern.
1621
+ const auto TLSVarsMapEntryIter = TLSVarsToAddressMapping.find (GValue);
1622
+ if (TLSVarsMapEntryIter == TLSVarsToAddressMapping.end ())
1623
+ assert (IsGlobalADeclaration &&
1624
+ " Only expecting to find extern TLS variables not present in the TLS "
1625
+ " variable-to-address map!" );
1626
+
1627
+ unsigned TLSVarAddress =
1628
+ IsGlobalADeclaration ? 0 : TLSVarsMapEntryIter->second ;
1629
+ ptrdiff_t FinalAddress = (TLSVarAddress + Offset);
1630
+ // If the address of the TLS variable + the offset is less than 32KB,
1631
+ // or if the TLS variable is extern, we simply produce an MCExpr to add the
1632
+ // non-zero offset to the TLS variable address.
1633
+ // For when TLS variables are extern, this is safe to do because we can
1634
+ // assume that the address of extern TLS variables are zero.
1635
+ const MCExpr *Expr = MCSymbolRefExpr::create (
1636
+ getSymbol (GValue), MCSymbolRefExpr::VK_PPC_AIX_TLSLE, OutContext);
1637
+ Expr = MCBinaryExpr::createAdd (
1638
+ Expr, MCConstantExpr::create (Offset, OutContext), OutContext);
1639
+ if (FinalAddress >= 32768 ) {
1640
+ // Handle the written offset for cases where:
1641
+ // TLS variable address + Offset > 32KB.
1642
+
1643
+ // The assembly that is printed will look like:
1644
+ // TLSVar@le + Offset - Delta
1645
+ // where Delta is a multiple of 64KB: ((FinalAddress + 32768) & ~0xFFFF).
1646
+ ptrdiff_t Delta = ((FinalAddress + 32768 ) & ~0xFFFF );
1647
+ // Check that the total instruction displacement fits within [-32768,32768).
1648
+ ptrdiff_t InstDisp = TLSVarAddress + Offset - Delta;
1649
+ assert ((InstDisp < 32768 ) ||
1650
+ (InstDisp >= -32768 ) &&
1651
+ " Expecting the instruction displacement for local-exec TLS "
1652
+ " variables to be between [-32768, 32768)!" );
1653
+ Expr = MCBinaryExpr::createAdd (
1654
+ Expr, MCConstantExpr::create (-Delta, OutContext), OutContext);
1655
+ }
1656
+
1657
+ return Expr;
1658
+ }
1659
+
1550
1660
void PPCLinuxAsmPrinter::emitGNUAttributes (Module &M) {
1551
1661
// Emit float ABI into GNU attribute
1552
1662
Metadata *MD = M.getModuleFlag (" float-abi" );
@@ -2772,6 +2882,19 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
2772
2882
Csect->ensureMinAlignment (GOAlign);
2773
2883
};
2774
2884
2885
+ // For all TLS variables, calculate their corresponding addresses and store
2886
+ // them into TLSVarsToAddressMapping, which will be used to determine whether
2887
+ // or not local-exec TLS variables require special assembly printing.
2888
+ uint64_t TLSVarAddress = 0 ;
2889
+ auto DL = M.getDataLayout ();
2890
+ for (const auto &G : M.globals ()) {
2891
+ if (G.isThreadLocal () && !G.isDeclaration ()) {
2892
+ TLSVarAddress = alignTo (TLSVarAddress, getGVAlignment (&G, DL));
2893
+ TLSVarsToAddressMapping[&G] = TLSVarAddress;
2894
+ TLSVarAddress += DL.getTypeAllocSize (G.getValueType ());
2895
+ }
2896
+ }
2897
+
2775
2898
// We need to know, up front, the alignment of csects for the assembly path,
2776
2899
// because once a .csect directive gets emitted, we could not change the
2777
2900
// alignment value on it.
0 commit comments