diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index dfea9e7709240..af82b6cdb1809 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -7558,6 +7558,16 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) { DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0)); } +// Check if an SDValue has the 'aix-small-tls' global variable attribute. +static bool hasAIXSmallTLSAttr(SDValue Val) { + if (GlobalAddressSDNode *GA = dyn_cast(Val)) + if (const GlobalVariable *GV = dyn_cast(GA->getGlobal())) + if (GV->hasAttribute("aix-small-tls")) + return true; + + return false; +} + // Is an ADDI eligible for folding for non-TOC-based local-exec accesses? static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG, SDValue ADDIToFold) { @@ -7567,20 +7577,25 @@ static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG, (ADDIToFold.getMachineOpcode() != PPC::ADDI8)) return false; + // Folding is only allowed for the AIX small-local-exec TLS target attribute + // or when the 'aix-small-tls' global variable attribute is present. + const PPCSubtarget &Subtarget = + DAG->getMachineFunction().getSubtarget(); + SDValue TLSVarNode = ADDIToFold.getOperand(1); + if (!(Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode))) + return false; + // The first operand of the ADDIToFold should be the thread pointer. // This transformation is only performed if the first operand of the // addi is the thread pointer. SDValue TPRegNode = ADDIToFold.getOperand(0); RegisterSDNode *TPReg = dyn_cast(TPRegNode.getNode()); - const PPCSubtarget &Subtarget = - DAG->getMachineFunction().getSubtarget(); if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister())) return false; // The second operand of the ADDIToFold should be the global TLS address // (the local-exec TLS variable). We only perform the folding if the TLS // variable is the second operand. - SDValue TLSVarNode = ADDIToFold.getOperand(1); GlobalAddressSDNode *GA = dyn_cast(TLSVarNode); if (!GA) return false; @@ -7649,7 +7664,6 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) { void PPCDAGToDAGISel::PeepholePPC64() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); - bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS(); while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; @@ -7661,8 +7675,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { reduceVSXSwap(N, CurDAG); // This optimization is performed for non-TOC-based local-exec accesses. - if (HasAIXSmallLocalExecTLS) - foldADDIForLocalExecAccesses(N, CurDAG); + foldADDIForLocalExecAccesses(N, CurDAG); unsigned FirstOp; unsigned StorageOpcode = N->getMachineOpcode(); @@ -7821,8 +7834,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { ImmOpnd.getValueType()); } else if (Offset != 0) { // This optimization is performed for non-TOC-based local-exec accesses. - if (HasAIXSmallLocalExecTLS && - isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) { + if (isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) { // Add the non-zero offset information into the load or store // instruction to be used for non-TOC-based local-exec accesses. GlobalAddressSDNode *GA = dyn_cast(ImmOpnd); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index cce0efad39c75..7436b202fba0d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3367,15 +3367,21 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op, const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool Is64Bit = Subtarget.isPPC64(); - bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); bool IsTLSLocalExecModel = Model == TLSModel::LocalExec; if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) { + bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS(); + bool HasAIXSmallTLSGlobalAttr = false; SDValue VariableOffsetTGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG); SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA); SDValue TLSReg; + + if (const GlobalVariable *GVar = dyn_cast(GV)) + if (GVar->hasAttribute("aix-small-tls")) + HasAIXSmallTLSGlobalAttr = true; + if (Is64Bit) { // For local-exec and initial-exec on AIX (64-bit), the sequence generated // involves a load of the variable offset (from the TOC), followed by an @@ -3385,14 +3391,16 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op, // add reg2, reg1, r13 // r13 contains the thread pointer TLSReg = DAG.getRegister(PPC::X13, MVT::i64); - // With the -maix-small-local-exec-tls option, produce a faster access - // sequence for local-exec TLS variables where the offset from the TLS - // base is encoded as an immediate operand. + // With the -maix-small-local-exec-tls option, or with the "aix-small-tls" + // global variable attribute, produce a faster access sequence for + // local-exec TLS variables where the offset from the TLS base is encoded + // as an immediate operand. // // We only utilize the faster local-exec access sequence when the TLS // variable has a size within the policy limit. We treat types that are // not sized or are empty as being over the policy size limit. - if (HasAIXSmallLocalExecTLS && IsTLSLocalExecModel) { + if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) && + IsTLSLocalExecModel) { Type *GVType = GV->getValueType(); if (GVType->isSized() && !GVType->isEmptyTy() && GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <= @@ -3410,8 +3418,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op, TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT); // We do not implement the 32-bit version of the faster access sequence - // for local-exec that is controlled by -maix-small-local-exec-tls. - if (HasAIXSmallLocalExecTLS) + // for local-exec that is controlled by the -maix-small-local-exec-tls + // option, or the "aix-small-tls" global variable attribute. + if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) report_fatal_error("The small-local-exec TLS access sequence is " "currently only supported on AIX (64-bit mode)."); } diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll new file mode 100644 index 0000000000000..38b35dc6c81cf --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll @@ -0,0 +1,105 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s \ +; RUN: | FileCheck %s --check-prefixes=COMMONCM,CHECK-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: < %s | FileCheck %s --check-prefixes=COMMONCM,CHECK-LARGECM64 + +@mySmallTLS = thread_local(localexec) global [7800 x i64] zeroinitializer, align 8 #0 +@mySmallTLS2 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0 +@mySmallTLS3 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) + +; All accesses use a "faster" local-exec sequence directly off the thread pointer, +; except for mySmallTLS, as this variable is over the 32KB size limit. +define i64 @StoreLargeAccess1() #1 { +; COMMONCM-LABEL: StoreLargeAccess1: +; COMMONCM-NEXT: # %bb.0: # %entry +; CHECK-SMALLCM64: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS +; CHECK-SMALLCM64-NEXT: li r4, 0 +; CHECK-SMALLCM64-NEXT: li r5, 23 +; CHECK-LARGECM64: addis r3, L..C0@u(r2) +; CHECK-LARGECM64-NEXT: li r4, 0 +; CHECK-LARGECM64-NEXT: li r5, 23 +; CHECK-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; COMMONCM: ori r4, r4, 53328 +; COMMONCM-NEXT: add r3, r13, r3 +; COMMONCM-NEXT: stdx r5, r3, r4 +; COMMONCM-NEXT: li r3, 55 +; COMMONCM-NEXT: li r4, 64 +; COMMONCM-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13) +; COMMONCM-NEXT: li r3, 142 +; COMMONCM-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13) +; COMMONCM-NEXT: blr +entry: + %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS) + %arrayidx = getelementptr inbounds i8, ptr %tls0, i32 53328 + store i64 23, ptr %arrayidx, align 8 + %tls1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2) + %arrayidx1 = getelementptr inbounds i8, ptr %tls1, i32 696 + store i64 55, ptr %arrayidx1, align 8 + %tls2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3) + %arrayidx2 = getelementptr inbounds i8, ptr %tls2, i32 20000 + store i64 64, ptr %arrayidx2, align 8 + %load1 = load i64, ptr %arrayidx, align 8 + %load2 = load i64, ptr %arrayidx1, align 8 + %add1 = add i64 %load1, 64 + %add2 = add i64 %add1, %load2 + ret i64 %add2 +} + +; Since this function does not have the 'aix-small-local-exec-tls` attribute, +; only some local-exec variables should have the small-local-exec TLS access +; sequence (as opposed to all of them). +define i64 @StoreLargeAccess2() { +; COMMONCM-LABEL: StoreLargeAccess2: +; COMMONCM-NEXT: # %bb.0: # %entry +; CHECK-SMALLCM64: ld r5, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS +; CHECK-SMALLCM64-NEXT: li r3, 0 +; CHECK-SMALLCM64-NEXT: li r4, 23 +; CHECK-SMALLCM64-NEXT: ori r3, r3, 53328 +; CHECK-SMALLCM64-NEXT: add r5, r13, r5 +; CHECK-SMALLCM64-NEXT: stdx r4, r5, r3 +; CHECK-SMALLCM64-NEXT: ld r5, L..C1(r2) # target-flags(ppc-tprel) @mySmallTLS3 +; CHECK-SMALLCM64-NEXT: li r3, 55 +; CHECK-SMALLCM64-NEXT: li r4, 64 +; CHECK-SMALLCM64-NEXT: std r3, mySmallTLS2[TL]@le+696(r13) +; CHECK-SMALLCM64-NEXT: li r3, 142 +; CHECK-SMALLCM64-NEXT: add r5, r13, r5 +; CHECK-SMALLCM64-NEXT: std r4, 20000(r5) +; CHECK-LARGECM64: addis r3, L..C0@u(r2) +; CHECK-LARGECM64-NEXT: li r4, 0 +; CHECK-LARGECM64-NEXT: li r5, 23 +; CHECK-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; CHECK-LARGECM64-NEXT: ori r4, r4, 53328 +; CHECK-LARGECM64-NEXT: add r3, r13, r3 +; CHECK-LARGECM64-NEXT: stdx r5, r3, r4 +; CHECK-LARGECM64-NEXT: addis r3, L..C1@u(r2) +; CHECK-LARGECM64-NEXT: li r4, 55 +; CHECK-LARGECM64-NEXT: li r5, 64 +; CHECK-LARGECM64-NEXT: ld r3, L..C1@l(r3) +; CHECK-LARGECM64-NEXT: std r4, mySmallTLS2[TL]@le+696(r13) +; CHECK-LARGECM64-NEXT: add r3, r13, r3 +; CHECK-LARGECM64-NEXT: std r5, 20000(r3) +; CHECK-LARGECM64-NEXT: li r3, 142 +; COMMONCM-NEXT: blr +; +entry: + %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS) + %arrayidx = getelementptr inbounds i8, ptr %tls0, i32 53328 + store i64 23, ptr %arrayidx, align 8 + %tls1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2) + %arrayidx1 = getelementptr inbounds i8, ptr %tls1, i32 696 + store i64 55, ptr %arrayidx1, align 8 + %tls2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3) + %arrayidx2 = getelementptr inbounds i8, ptr %tls2, i32 20000 + store i64 64, ptr %arrayidx2, align 8 + %load1 = load i64, ptr %arrayidx, align 8 + %load2 = load i64, ptr %arrayidx1, align 8 + %add1 = add i64 %load1, 64 + %add2 = add i64 %add1, %load2 + ret i64 %add2 +} + +attributes #0 = { "aix-small-tls" } +attributes #1 = { "target-features"="+aix-small-local-exec-tls" } diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll new file mode 100644 index 0000000000000..c8537fba6a3cf --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll @@ -0,0 +1,222 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=-aix-small-local-exec-tls \ +; RUN: < %s | FileCheck %s --check-prefixes=COMMONCM,SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=-aix-small-local-exec-tls < %s | \ +; RUN: FileCheck %s --check-prefixes=COMMONCM,LARGECM64 + +; Test that the 'aix-small-tls' global variable attribute generates the +; optimized small-local-exec TLS sequence. Global variables without this +; attribute should still generate a TOC-based local-exec access sequence. + +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) + +@a = thread_local(localexec) global [87 x i8] zeroinitializer, align 1 #0 +@a_noattr = thread_local(localexec) global [87 x i8] zeroinitializer, align 1 +@b = thread_local(localexec) global [87 x i16] zeroinitializer, align 2 #0 +@b_noattr = thread_local(localexec) global [87 x i16] zeroinitializer, align 2 +@c = thread_local(localexec) global [87 x i32] zeroinitializer, align 4 #0 +@c_noattr = thread_local(localexec) global [87 x i32] zeroinitializer, align 4 +@d = thread_local(localexec) global [87 x i64] zeroinitializer, align 8 #0 +@d_noattr = thread_local(localexec) global [87 x i64] zeroinitializer, align 8 #0 + +@e = thread_local(localexec) global [87 x double] zeroinitializer, align 8 #0 +@e_noattr = thread_local(localexec) global [87 x double] zeroinitializer, align 8 +@f = thread_local(localexec) global [87 x float] zeroinitializer, align 4 #0 +@f_noattr = thread_local(localexec) global [87 x float] zeroinitializer, align 4 + +define nonnull ptr @AddrTest1() { +; COMMONCM-LABEL: AddrTest1: +; COMMONCM: # %bb.0: # %entry +; COMMONCM-NEXT: addi r3, r13, a[TL]@le+1 +; COMMONCM-NEXT: blr +entry: + %tls0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a) + %arrayidx = getelementptr inbounds [87 x i8], ptr %tls0, i64 0, i64 1 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest1_NoAttr() { +; SMALLCM64-LABEL: AddrTest1_NoAttr: +; SMALLCM64: # %bb.0: # %entry +; SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @a_noattr +; SMALLCM64-NEXT: add r3, r13, r3 +; SMALLCM64-NEXT: addi r3, r3, 1 +; SMALLCM64-NEXT: blr +; +; LARGECM64-LABEL: AddrTest1_NoAttr: +; LARGECM64: # %bb.0: # %entry +; LARGECM64-NEXT: addis r3, L..C0@u(r2) +; LARGECM64-NEXT: ld r3, L..C0@l(r3) +; LARGECM64-NEXT: add r3, r13, r3 +; LARGECM64-NEXT: addi r3, r3, 1 +; LARGECM64-NEXT: blr +entry: + %tls0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a_noattr) + %arrayidx = getelementptr inbounds [87 x i8], ptr %tls0, i64 0, i64 1 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest2() { +; COMMONCM-LABEL: AddrTest2: +; COMMONCM: # %bb.0: # %entry +; COMMONCM-NEXT: addi r3, r13, b[TL]@le+4 +; COMMONCM-NEXT: blr +entry: + %tls0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b) + %arrayidx = getelementptr inbounds [87 x i16], ptr %tls0, i64 0, i64 2 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest2_NoAttr() { +; SMALLCM64-LABEL: AddrTest2_NoAttr: +; SMALLCM64: # %bb.0: # %entry +; SMALLCM64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @b_noattr +; SMALLCM64-NEXT: add r3, r13, r3 +; SMALLCM64-NEXT: addi r3, r3, 4 +; SMALLCM64-NEXT: blr +; +; LARGECM64-LABEL: AddrTest2_NoAttr: +; LARGECM64: # %bb.0: # %entry +; LARGECM64-NEXT: addis r3, L..C1@u(r2) +; LARGECM64-NEXT: ld r3, L..C1@l(r3) +; LARGECM64-NEXT: add r3, r13, r3 +; LARGECM64-NEXT: addi r3, r3, 4 +; LARGECM64-NEXT: blr +entry: + %tls0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b_noattr) + %arrayidx = getelementptr inbounds [87 x i16], ptr %tls0, i64 0, i64 2 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest3() { +; COMMONCM-LABEL: AddrTest3: +; COMMONCM: # %bb.0: # %entry +; COMMONCM-NEXT: addi r3, r13, c[TL]@le+12 +; COMMONCM-NEXT: blr +entry: + %tls0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @c) + %arrayidx = getelementptr inbounds [87 x i32], ptr %tls0, i64 0, i64 3 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest3_NoAttr() { +; SMALLCM64-LABEL: AddrTest3_NoAttr: +; SMALLCM64: # %bb.0: # %entry +; SMALLCM64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @c_noattr +; SMALLCM64-NEXT: add r3, r13, r3 +; SMALLCM64-NEXT: addi r3, r3, 12 +; SMALLCM64-NEXT: blr +; +; LARGECM64-LABEL: AddrTest3_NoAttr: +; LARGECM64: # %bb.0: # %entry +; LARGECM64-NEXT: addis r3, L..C2@u(r2) +; LARGECM64-NEXT: ld r3, L..C2@l(r3) +; LARGECM64-NEXT: add r3, r13, r3 +; LARGECM64-NEXT: addi r3, r3, 12 +; LARGECM64-NEXT: blr +entry: + %tls0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @c_noattr) + %arrayidx = getelementptr inbounds [87 x i32], ptr %tls0, i64 0, i64 3 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest4() { +; COMMONCM-LABEL: AddrTest4: +; COMMONCM: # %bb.0: # %entry +; COMMONCM-NEXT: addi r3, r13, c[TL]@le+56 +; COMMONCM-NEXT: blr +entry: + %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @c) + %arrayidx = getelementptr inbounds [87 x i64], ptr %tls0, i64 0, i64 7 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest4_NoAttr() { +; SMALLCM64-LABEL: AddrTest4_NoAttr: +; SMALLCM64: # %bb.0: # %entry +; SMALLCM64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @c_noattr +; SMALLCM64-NEXT: add r3, r13, r3 +; SMALLCM64-NEXT: addi r3, r3, 56 +; SMALLCM64-NEXT: blr +; +; LARGECM64-LABEL: AddrTest4_NoAttr: +; LARGECM64: # %bb.0: # %entry +; LARGECM64-NEXT: addis r3, L..C2@u(r2) +; LARGECM64-NEXT: ld r3, L..C2@l(r3) +; LARGECM64-NEXT: add r3, r13, r3 +; LARGECM64-NEXT: addi r3, r3, 56 +; LARGECM64-NEXT: blr +entry: + %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @c_noattr) + %arrayidx = getelementptr inbounds [87 x i64], ptr %tls0, i64 0, i64 7 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest5() { +; COMMONCM-LABEL: AddrTest5: +; COMMONCM: # %bb.0: # %entry +; COMMONCM-NEXT: addi r3, r13, e[TL]@le+48 +; COMMONCM-NEXT: blr +entry: + %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @e) + %arrayidx = getelementptr inbounds [87 x double], ptr %tls0, i64 0, i64 6 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest5_NoAttr() { +; SMALLCM64-LABEL: AddrTest5_NoAttr: +; SMALLCM64: # %bb.0: # %entry +; SMALLCM64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @e_noattr +; SMALLCM64-NEXT: add r3, r13, r3 +; SMALLCM64-NEXT: addi r3, r3, 48 +; SMALLCM64-NEXT: blr +; +; LARGECM64-LABEL: AddrTest5_NoAttr: +; LARGECM64: # %bb.0: # %entry +; LARGECM64-NEXT: addis r3, L..C3@u(r2) +; LARGECM64-NEXT: ld r3, L..C3@l(r3) +; LARGECM64-NEXT: add r3, r13, r3 +; LARGECM64-NEXT: addi r3, r3, 48 +; LARGECM64-NEXT: blr +entry: + %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @e_noattr) + %arrayidx = getelementptr inbounds [87 x double], ptr %tls0, i64 0, i64 6 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest6() { +; COMMONCM-LABEL: AddrTest6: +; COMMONCM: # %bb.0: # %entry +; COMMONCM-NEXT: addi r3, r13, f[TL]@le+16 +; COMMONCM-NEXT: blr +entry: + %tls0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @f) + %arrayidx = getelementptr inbounds [87 x float], ptr %tls0, i64 0, i64 4 + ret ptr %arrayidx +} + +define nonnull ptr @AddrTest6_NoAttr() { +; SMALLCM64-LABEL: AddrTest6_NoAttr: +; SMALLCM64: # %bb.0: # %entry +; SMALLCM64-NEXT: ld r3, L..C4(r2) # target-flags(ppc-tprel) @f_noattr +; SMALLCM64-NEXT: add r3, r13, r3 +; SMALLCM64-NEXT: addi r3, r3, 16 +; SMALLCM64-NEXT: blr +; +; LARGECM64-LABEL: AddrTest6_NoAttr: +; LARGECM64: # %bb.0: # %entry +; LARGECM64-NEXT: addis r3, L..C4@u(r2) +; LARGECM64-NEXT: ld r3, L..C4@l(r3) +; LARGECM64-NEXT: add r3, r13, r3 +; LARGECM64-NEXT: addi r3, r3, 16 +; LARGECM64-NEXT: blr +entry: + %tls0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @f_noattr) + %arrayidx = getelementptr inbounds [87 x float], ptr %tls0, i64 0, i64 4 + ret ptr %arrayidx +} + +attributes #0 = { "aix-small-tls" } diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll new file mode 100644 index 0000000000000..1e4a3b9bcc47c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll @@ -0,0 +1,53 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefixes=COMMONCM,SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefixes=COMMONCM,SMALL-LOCAL-EXEC-LARGECM64 + +@mySmallTLS = thread_local(localexec) global [7800 x i64] zeroinitializer, align 8 #0 +@mySmallTLS2 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0 +@mySmallTLS3 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) + +; Although some global variables are annotated with 'aix-small-tls', because the +; aix-small-local-exec-tls target attribute is turned on, all accesses will use +; a "faster" local-exec sequence directly off the thread pointer. +define i64 @StoreLargeAccess1() { +; COMMONCM-LABEL: StoreLargeAccess1: +; COMMONCM-NEXT: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r5, 23 +; SMALL-LOCAL-EXEC-LARGECM64: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r5, 23 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; COMMONCM: ori r4, r4, 53328 +; COMMONCM-NEXT: add r3, r13, r3 +; COMMONCM-NEXT: stdx r5, r3, r4 +; COMMONCM-NEXT: li r3, 55 +; COMMONCM-NEXT: li r4, 64 +; COMMONCM-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13) +; COMMONCM-NEXT: li r3, 142 +; COMMONCM-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13) +; COMMONCM-NEXT: blr +entry: + %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS) + %arrayidx = getelementptr inbounds i8, ptr %tls0, i32 53328 + store i64 23, ptr %arrayidx, align 8 + %tls1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2) + %arrayidx1 = getelementptr inbounds i8, ptr %tls1, i32 696 + store i64 55, ptr %arrayidx1, align 8 + %tls2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3) + %arrayidx2 = getelementptr inbounds i8, ptr %tls2, i32 20000 + store i64 64, ptr %arrayidx2, align 8 + %load1 = load i64, ptr %arrayidx, align 8 + %load2 = load i64, ptr %arrayidx1, align 8 + %add1 = add i64 %load1, 64 + %add2 = add i64 %add1, %load2 + ret i64 %add2 +} + +attributes #0 = { "aix-small-tls" }