Skip to content

[AIX][TLS] Optimize the small local-exec access sequence for non-zero offsets #71485

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 142 additions & 19 deletions llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
Expand Down Expand Up @@ -155,6 +156,11 @@ class PPCAsmPrinter : public AsmPrinter {
TOC;
const PPCSubtarget *Subtarget = nullptr;

// Keep track of the number of TLS variables and their corresponding
// addresses, which is then used for the assembly printing of
// non-TOC-based local-exec variables.
MapVector<const GlobalValue *, uint64_t> TLSVarsToAddressMapping;

public:
explicit PPCAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
Expand Down Expand Up @@ -199,6 +205,8 @@ class PPCAsmPrinter : public AsmPrinter {
void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
void EmitAIXTlsCallHelper(const MachineInstr *MI);
const MCExpr *getAdjustedLocalExecExpr(const MachineOperand &MO,
int64_t Offset);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
bool Changed = AsmPrinter::runOnMachineFunction(MF);
Expand Down Expand Up @@ -753,6 +761,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
const bool IsPPC64 = Subtarget->isPPC64();
const bool IsAIX = Subtarget->isAIXABI();
const bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
const Module *M = MF->getFunction().getParent();
PICLevel::Level PL = M->getPICLevel();

Expand Down Expand Up @@ -1504,12 +1513,70 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Verify alignment is legal, so we don't create relocations
// that can't be supported.
unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
// For non-TOC-based local-exec TLS accesses with non-zero offsets, the
// machine operand (which is a TargetGlobalTLSAddress) is expected to be
// the same operand for both loads and stores.
for (const MachineOperand &TempMO : MI->operands()) {
if (((TempMO.getTargetFlags() == PPCII::MO_TPREL_FLAG)) &&
TempMO.getOperandNo() == 1)
OpNum = 1;
}
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.isGlobal()) {
const DataLayout &DL = MO.getGlobal()->getParent()->getDataLayout();
if (MO.getGlobal()->getPointerAlignment(DL) < 4)
llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
}
// As these load/stores share common code with the following load/stores,
// fall through to the subsequent cases in order to either process the
// non-TOC-based local-exec sequence or to process the instruction normally.
[[fallthrough]];
}
case PPC::LBZ:
case PPC::LBZ8:
case PPC::LHA:
case PPC::LHA8:
case PPC::LHZ:
case PPC::LHZ8:
case PPC::LWZ:
case PPC::LWZ8:
case PPC::STB:
case PPC::STB8:
case PPC::STH:
case PPC::STH8:
case PPC::STW:
case PPC::STW8:
case PPC::LFS:
case PPC::STFS:
case PPC::LFD:
case PPC::STFD:
case PPC::ADDI8: {
// A faster non-TOC-based local-exec sequence is represented by `addi`
// or a load/store instruction (that directly loads or stores off of the
// thread pointer) with an immediate operand having the MO_TPREL_FLAG.
// Such instructions do not otherwise arise.
if (!HasAIXSmallLocalExecTLS)
break;
bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8;
unsigned OpNum = IsMIADDI8 ? 2 : 1;
const MachineOperand &MO = MI->getOperand(OpNum);
unsigned Flag = MO.getTargetFlags();
if (Flag == PPCII::MO_TPREL_FLAG ||
Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
Flag == PPCII::MO_TPREL_PCREL_FLAG) {
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);

const MCExpr *Expr = getAdjustedLocalExecExpr(MO, MO.getOffset());
if (Expr)
TmpInst.getOperand(OpNum) = MCOperand::createExpr(Expr);

// Change the opcode to load address if the original opcode is an `addi`.
if (IsMIADDI8)
TmpInst.setOpcode(PPC::LA8);

EmitToStreamer(*OutStreamer, TmpInst);
return;
}
// Now process the instruction normally.
break;
}
Expand All @@ -1523,30 +1590,73 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::EnforceIEIO));
return;
}
case PPC::ADDI8: {
// The faster non-TOC-based local-exec sequence is represented by `addi`
// with an immediate operand having the MO_TPREL_FLAG. Such an instruction
// does not otherwise arise.
unsigned Flag = MI->getOperand(2).getTargetFlags();
if (Flag == PPCII::MO_TPREL_FLAG ||
Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
Flag == PPCII::MO_TPREL_PCREL_FLAG) {
assert(
Subtarget->hasAIXSmallLocalExecTLS() &&
"addi with thread-pointer only expected with local-exec small TLS");
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
TmpInst.setOpcode(PPC::LA8);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
break;
}
}

LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
EmitToStreamer(*OutStreamer, TmpInst);
}

// For non-TOC-based local-exec variables that have a non-zero offset,
// we need to create a new MCExpr that adds the non-zero offset to the address
// of the local-exec variable that will be used in either an addi, load or
// store. However, the final displacement for these instructions must be
// between [-32768, 32768), so if the TLS address + its non-zero offset is
// greater than 32KB, a new MCExpr is produced to accommodate this situation.
const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
int64_t Offset) {
// Non-zero offsets (for loads, stores or `addi`) require additional handling.
// When the offset is zero, there is no need to create an adjusted MCExpr.
if (!Offset)
return nullptr;

assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
const GlobalValue *GValue = MO.getGlobal();
assert(TM.getTLSModel(GValue) == TLSModel::LocalExec &&
"Only local-exec accesses are handled!");

bool IsGlobalADeclaration = GValue->isDeclarationForLinker();
// Find the GlobalVariable that corresponds to the particular TLS variable
// in the TLS variable-to-address mapping. All TLS variables should exist
// within this map, with the exception of TLS variables marked as extern.
const auto TLSVarsMapEntryIter = TLSVarsToAddressMapping.find(GValue);
if (TLSVarsMapEntryIter == TLSVarsToAddressMapping.end())
assert(IsGlobalADeclaration &&
"Only expecting to find extern TLS variables not present in the TLS "
"variable-to-address map!");

unsigned TLSVarAddress =
IsGlobalADeclaration ? 0 : TLSVarsMapEntryIter->second;
ptrdiff_t FinalAddress = (TLSVarAddress + Offset);
// If the address of the TLS variable + the offset is less than 32KB,
// or if the TLS variable is extern, we simply produce an MCExpr to add the
// non-zero offset to the TLS variable address.
// For when TLS variables are extern, this is safe to do because we can
// assume that the address of extern TLS variables are zero.
const MCExpr *Expr = MCSymbolRefExpr::create(
getSymbol(GValue), MCSymbolRefExpr::VK_PPC_AIX_TLSLE, OutContext);
Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
if (FinalAddress >= 32768) {
// Handle the written offset for cases where:
// TLS variable address + Offset > 32KB.

// The assembly that is printed will look like:
// TLSVar@le + Offset - Delta
// where Delta is a multiple of 64KB: ((FinalAddress + 32768) & ~0xFFFF).
ptrdiff_t Delta = ((FinalAddress + 32768) & ~0xFFFF);
// Check that the total instruction displacement fits within [-32768,32768).
ptrdiff_t InstDisp = TLSVarAddress + Offset - Delta;
assert((InstDisp < 32768) ||
(InstDisp >= -32768) &&
"Expecting the instruction displacement for local-exec TLS "
"variables to be between [-32768, 32768)!");
Comment on lines +1649 to +1652
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(not a review comment)

Per my understanding, allow peephole for non-zero offsets but without changes to AsmPrinter, we'll get assembler error like The displacement must be greater than or equal to -32768 and less than or equal to 32767 (no-integrated-as).

Since code here is to rewrite offsets exceeding upper limit (32768) into negative, will this assert be hit if the offset is even larger? (for example, twice the size as objects in aix-small-local-exec-tls-largeaccess.ll)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For larger variables, the assert will be not triggered. This is because in the initial patch that introduced this feature, I only restricted this non-TOC-based access sequence if the size of the TLS variable is less than 32751 within PPCISelLowering.cpp.

constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
....

     // With the -maix-small-local-exec-tls option, produce a faster access
      // sequence for local-exec TLS variables where the offset from the TLS
      // base is encoded as an immediate operand.
      //
      // We only utilize the faster local-exec access sequence when the TLS
      // variable has a size within the policy limit. We treat types that are
      // not sized or are empty as being over the policy size limit.
      if (HasAIXSmallLocalExecTLS && IsTLSLocalExecModel) {
        Type *GVType = GV->getValueType();
        if (GVType->isSized() && !GVType->isEmptyTy() &&
            GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
                AIXSmallTlsPolicySizeLimit)
          return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
      }

So in the case where the size is 8187, the test case for the large variable in aix-small-local-exec-tls-largeaccess.ll looks like:

        stw r3, mySmallLocalExecTLSv1[TL]@le(r13)

If the size is increased past AIXSmallTlsPolicySizeLimit, it will load from the TOC instead and do the regular local-exec sequence, which is expected:

        ld r4, L..C0(r2)                        # target-flags(ppc-tprel) @mySmallLocalExecTLSv1
        li r3, 1
        add r4, r13, r4
        stw r3, 0(r4)
. . .

Hope the above answers your question.

Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(-Delta, OutContext), OutContext);
}

return Expr;
}

void PPCLinuxAsmPrinter::emitGNUAttributes(Module &M) {
// Emit float ABI into GNU attribute
Metadata *MD = M.getModuleFlag("float-abi");
Expand Down Expand Up @@ -2757,6 +2867,19 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
Csect->ensureMinAlignment(GOAlign);
};

// For all TLS variables, calculate their corresponding addresses and store
// them into TLSVarsToAddressMapping, which will be used to determine whether
// or not local-exec TLS variables require special assembly printing.
uint64_t TLSVarAddress = 0;
auto DL = M.getDataLayout();
for (const auto &G : M.globals()) {
if (G.isThreadLocal() && !G.isDeclaration()) {
TLSVarAddress = alignTo(TLSVarAddress, getGVAlignment(&G, DL));
TLSVarsToAddressMapping[&G] = TLSVarAddress;
TLSVarAddress += DL.getTypeAllocSize(G.getValueType());
}
}

// We need to know, up front, the alignment of csects for the assembly path,
// because once a .csect directive gets emitted, we could not change the
// alignment value on it.
Expand Down
108 changes: 107 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7565,8 +7565,98 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
}

// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
SDValue ADDIToFold) {
// Check if ADDIToFold (the ADDI that we want to fold into local-exec
// accesses), is truly an ADDI.
if (!ADDIToFold.isMachineOpcode() ||
(ADDIToFold.getMachineOpcode() != PPC::ADDI8))
return false;

// The first operand of the ADDIToFold should be the thread pointer.
// This transformation is only performed if the first operand of the
// addi is the thread pointer.
SDValue TPRegNode = ADDIToFold.getOperand(0);
RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
const PPCSubtarget &Subtarget =
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
return false;

// The second operand of the ADDIToFold should be the global TLS address
// (the local-exec TLS variable). We only perform the folding if the TLS
// variable is the second operand.
SDValue TLSVarNode = ADDIToFold.getOperand(1);
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
if (!GA)
return false;

// The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
// so this optimization is not performed otherwise if the flag is not set.
unsigned TargetFlags = GA->getTargetFlags();
if (TargetFlags != PPCII::MO_TPREL_FLAG)
return false;

// If all conditions are satisfied, the ADDI is valid for folding.
return true;
}

// For non-TOC-based local-exec access where an addi is feeding into another
// addi, fold this sequence into a single addi if possible.
// Before this optimization, the sequence appears as:
// addi rN, r13, sym@le
// addi rM, rN, imm
// After this optimization, we can fold the two addi into a single one:
// addi rM, r13, sym@le + imm
static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
if (N->getMachineOpcode() != PPC::ADDI8)
return;

// InitialADDI is the addi feeding into N (also an addi), and the addi that
// we want optimized out.
SDValue InitialADDI = N->getOperand(0);

if (!isEligibleToFoldADDIForLocalExecAccesses(DAG, InitialADDI))
return;

// At this point, InitialADDI can be folded into a non-TOC-based local-exec
// access. The first operand of InitialADDI should be the thread pointer,
// which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
SDValue TPRegNode = InitialADDI.getOperand(0);
RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
const PPCSubtarget &Subtarget =
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
"Expecting the first operand to be a thread pointer for folding addi "
"in local-exec accesses!");

// The second operand of the InitialADDI should be the global TLS address
// (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
// This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
SDValue TLSVarNode = InitialADDI.getOperand(1);
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
"local-exec accesses!");
unsigned TargetFlags = GA->getTargetFlags();

// The second operand of the addi that we want to preserve will be an
// immediate. We add this immediate, together with the address of the TLS
// variable found in InitialADDI, in order to preserve the correct TLS address
// information during assembly printing. The offset is likely to be non-zero
// when we end up in this case.
int Offset = N->getConstantOperandVal(1);
TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
Offset, TargetFlags);

(void)DAG->UpdateNodeOperands(N, TPRegNode, TLSVarNode);
if (InitialADDI.getNode()->use_empty())
DAG->RemoveDeadNode(InitialADDI.getNode());
}

void PPCDAGToDAGISel::PeepholePPC64() {
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();

while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
Expand All @@ -7577,6 +7667,10 @@ void PPCDAGToDAGISel::PeepholePPC64() {
if (isVSXSwap(SDValue(N, 0)))
reduceVSXSwap(N, CurDAG);

// This optimization is performed for non-TOC-based local-exec accesses.
if (HasAIXSmallLocalExecTLS)
foldADDIForLocalExecAccesses(N, CurDAG);
Copy link
Contributor

@diggerlin diggerlin Jan 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if foldADDIForLocalExecAccesses success , It will be continue; the loop,

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is fine. Since we can fold addi instructions first, and then after there may be load and store instructions that we can fold, which can only happen if we continue further into this loop.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if foldADDIForLocalExecAccesses success only when N->getMachineOpcode() == PPC::ADDI8. and the Opcode of N is not changed in the foldADDIForLocalExecAccesses , so the OpeCode of N is still ADDI8

it will be continue here.

unsigned FirstOp;
    unsigned StorageOpcode = N->getMachineOpcode();
    bool RequiresMod4Offset = false;
 switch (StorageOpcode) {
    default: continue;

is it correct ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I see what you mean. If we have successfully did the transformation in foldADDIForLocalExecAccesses(), we're still going to have PPC::ADDI8 and it will continue down into the code below.

I think this is fine and is what I initially expected, because as you mentioned, it will hit the default: continue; code, which makes sense all of the code below that point checks for if N is a load/store (which will be false since we have an PPC::ADDI8 in the current iteration), so it will go into checking the next N (which could be an addi, load, store, etc) and do the most suitable transformation depending on the opcode.

I think what I have done here would be similar to the check and transformation above my change,

    if (isVSXSwap(SDValue(N, 0)))
      reduceVSXSwap(N, CurDAG);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

continue the loop earlier will save compiler compile time ,
anyway, you can keep your code. I don't have a strong preference.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion, Digger. I was initially thinking of keeping the code as is.

However, just so I understand your suggestion, are you suggesting the following or am I mistaken?

    if (HasAIXSmallLocalExecTLS) {
      foldADDIForLocalExecAccesses(N, CurDAG);
      continue;
    }

In the case where I have something like:

SelectionDAG has 9 nodes:
  t0: ch,glue = EntryToken
      t2: i64,ch = CopyFromReg t0, Register:i64 %0
      t22: i64 = ADDI8 Register:i64 $x13, TargetGlobalTLSAddress:i64<ptr @IThreadLocalVarInit> 0 [TF=7]
    t17: ch = STB8<Mem:(store (s8) into %ir.0)> t2, TargetConstant:i64<0>, t22, t0
  t13: ch = BLR8 t17

It will visit the STB8 first, and it will fail the transformation for foldADDIForLocalExecAccesses(), but since I have a continue; right after, it will not proceed below to optimize the store.

Just wanted to double check if I understood your suggestion correctly, because I think currently as I understand it, it doesn't seem like it would work in this situation.


unsigned FirstOp;
unsigned StorageOpcode = N->getMachineOpcode();
bool RequiresMod4Offset = false;
Expand Down Expand Up @@ -7733,7 +7827,19 @@ void PPCDAGToDAGISel::PeepholePPC64() {
ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
ImmOpnd.getValueType());
} else if (Offset != 0) {
continue;
// This optimization is performed for non-TOC-based local-exec accesses.
if (HasAIXSmallLocalExecTLS &&
isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) {
// Add the non-zero offset information into the load or store
// instruction to be used for non-TOC-based local-exec accesses.
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
"addi into local-exec accesses!");
ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
MVT::i64, Offset,
GA->getTargetFlags());
} else
continue;
}
}

Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, c[TL]@le(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 1
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, c[TL]@le+1(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
;
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, c[TL]@le(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 1
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, c[TL]@le+1(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @c)
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, f[TL]@le(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 48
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, f[TL]@le+48(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
;
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, f[TL]@le(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 48
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, f[TL]@le+48(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @f)
Expand Down
Loading