Skip to content

[AArch64] Implement -fno-plt for SelectionDAG/GlobalISel #78890

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
// Try looking through a bitcast from one function type to another.
// Commonly happens with calls to objc_msgSend().
const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts();
if (const Function *F = dyn_cast<Function>(CalleeV))
Info.Callee = MachineOperand::CreateGA(F, 0);
else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
if (const Function *F = dyn_cast<Function>(CalleeV)) {
if (F->hasFnAttribute(Attribute::NonLazyBind)) {
LLT Ty = getLLTForType(*F->getType(), DL);
Register Reg = MIRBuilder.buildGlobalValue(Ty, F).getReg(0);
Info.Callee = MachineOperand::CreateReg(Reg, false);
} else {
Info.Callee = MachineOperand::CreateGA(F, 0);
}
} else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
// IR IFuncs and Aliases can't be forward declared (only defined), so the
// callee must be in the same TU and therefore we can direct-call it without
// worrying about it being out of range.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3172,6 +3172,11 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
return false;

// ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
// attribute. Check "RtLibUseGOT" instead.
if (MF->getFunction().getParent()->getRtLibUseGOT())
return false;

// Let SDISel handle vararg functions.
if (IsVarArg)
return false;
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8211,13 +8211,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
}
} else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
Subtarget->isTargetMachO()) {
const char *Sym = S->getSymbol();
bool UseGot = (getTargetMachine().getCodeModel() == CodeModel::Large &&
Subtarget->isTargetMachO()) ||
MF.getFunction().getParent()->getRtLibUseGOT();
const char *Sym = S->getSymbol();
if (UseGot) {
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
} else {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
}
Expand Down
11 changes: 6 additions & 5 deletions llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ static cl::opt<bool>
UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
"an address is ignored"), cl::init(false), cl::Hidden);

static cl::opt<bool>
UseNonLazyBind("aarch64-enable-nonlazybind",
cl::desc("Call nonlazybind functions via direct GOT load"),
cl::init(false), cl::Hidden);
static cl::opt<bool> MachOUseNonLazyBind(
"aarch64-macho-enable-nonlazybind",
cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
cl::Hidden);

static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
cl::desc("Enable the use of AA during codegen."));
Expand Down Expand Up @@ -433,7 +433,8 @@ unsigned AArch64Subtarget::classifyGlobalFunctionReference(

// NonLazyBind goes via GOT unless we know it's available locally.
auto *F = dyn_cast<Function>(GV);
if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
F->hasFnAttribute(Attribute::NonLazyBind) &&
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return AArch64II::MO_GOT;

Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1301,8 +1301,17 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
!Subtarget.noBTIAtReturnTwice() &&
MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
Opc = AArch64::BLR_BTI;
else
else {
// For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
// is set.
if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE);
DstOp(getLLTForType(*F.getType(), DL)).addDefToMIB(MRI, MIB);
MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT);
Info.Callee = MachineOperand::CreateReg(MIB.getReg(0), false);
}
Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
}

auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
unsigned CalleeOpNo = 0;
Expand Down
16 changes: 12 additions & 4 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2804,11 +2804,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}

case TargetOpcode::G_GLOBAL_VALUE: {
auto GV = I.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return selectTLSGlobalValue(I, MRI);
const GlobalValue *GV = nullptr;
unsigned OpFlags;
if (I.getOperand(1).isSymbol()) {
OpFlags = I.getOperand(1).getTargetFlags();
// Currently only used by "RtLibUseGOT".
assert(OpFlags == AArch64II::MO_GOT);
} else {
GV = I.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return selectTLSGlobalValue(I, MRI);
OpFlags = STI.ClassifyGlobalReference(GV, TM);
}

unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,9 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
// By splitting this here, we can optimize accesses in the small code model by
// folding in the G_ADD_LOW into the load/store offset.
auto &GlobalOp = MI.getOperand(1);
// Don't modify an intrinsic call.
if (GlobalOp.isSymbol())
return true;
const auto* GV = GlobalOp.getGlobal();
if (GV->isThreadLocal())
return true; // Don't want to modify TLS vars.
Expand Down
36 changes: 28 additions & 8 deletions llvm/test/CodeGen/AArch64/call-rv-marker.ll
Original file line number Diff line number Diff line change
Expand Up @@ -201,17 +201,27 @@ define dso_local void @rv_marker_3() personality ptr @__gxx_personality_v0 {
; GISEL-NEXT: bl _objc_object
; GISEL-NEXT: Ltmp1:
; GISEL-NEXT: ; %bb.1: ; %invoke.cont
; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; GISEL-NEXT: Lloh0:
; GISEL-NEXT: adrp x1, _objc_release@GOTPAGE
; GISEL-NEXT: mov x0, x19
; GISEL-NEXT: Lloh1:
; GISEL-NEXT: ldr x1, [x1, _objc_release@GOTPAGEOFF]
; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; GISEL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; GISEL-NEXT: b _objc_release
; GISEL-NEXT: br x1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fhahn, @TNorthover do these sound OK to you?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

; GISEL-NEXT: LBB3_2: ; %lpad
; GISEL-NEXT: Ltmp2:
; GISEL-NEXT: Lloh2:
; GISEL-NEXT: adrp x8, _objc_release@GOTPAGE
; GISEL-NEXT: mov x20, x0
; GISEL-NEXT: mov x0, x19
; GISEL-NEXT: bl _objc_release
; GISEL-NEXT: Lloh3:
; GISEL-NEXT: ldr x8, [x8, _objc_release@GOTPAGEOFF]
; GISEL-NEXT: blr x8
; GISEL-NEXT: mov x0, x20
; GISEL-NEXT: bl __Unwind_Resume
; GISEL-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
; GISEL-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
; GISEL-NEXT: Lfunc_end0:
; GISEL-NEXT: .cfi_endproc
; GISEL-NEXT: .section __TEXT,__gcc_except_tab
Expand Down Expand Up @@ -352,8 +362,12 @@ define dso_local void @rv_marker_4() personality ptr @__gxx_personality_v0 {
; GISEL-NEXT: bl _objc_object
; GISEL-NEXT: Ltmp7:
; GISEL-NEXT: ; %bb.2: ; %invoke.cont2
; GISEL-NEXT: Lloh4:
; GISEL-NEXT: adrp x8, _objc_release@GOTPAGE
; GISEL-NEXT: mov x0, x19
; GISEL-NEXT: bl _objc_release
; GISEL-NEXT: Lloh5:
; GISEL-NEXT: ldr x8, [x8, _objc_release@GOTPAGEOFF]
; GISEL-NEXT: blr x8
; GISEL-NEXT: add x0, sp, #15
; GISEL-NEXT: bl __ZN1SD1Ev
; GISEL-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
Expand All @@ -362,9 +376,13 @@ define dso_local void @rv_marker_4() personality ptr @__gxx_personality_v0 {
; GISEL-NEXT: ret
; GISEL-NEXT: LBB4_3: ; %lpad1
; GISEL-NEXT: Ltmp8:
; GISEL-NEXT: Lloh6:
; GISEL-NEXT: adrp x8, _objc_release@GOTPAGE
; GISEL-NEXT: mov x20, x0
; GISEL-NEXT: mov x0, x19
; GISEL-NEXT: bl _objc_release
; GISEL-NEXT: Lloh7:
; GISEL-NEXT: ldr x8, [x8, _objc_release@GOTPAGEOFF]
; GISEL-NEXT: blr x8
; GISEL-NEXT: b LBB4_5
; GISEL-NEXT: LBB4_4: ; %lpad
; GISEL-NEXT: Ltmp5:
Expand All @@ -374,6 +392,8 @@ define dso_local void @rv_marker_4() personality ptr @__gxx_personality_v0 {
; GISEL-NEXT: bl __ZN1SD1Ev
; GISEL-NEXT: mov x0, x20
; GISEL-NEXT: bl __Unwind_Resume
; GISEL-NEXT: .loh AdrpLdrGot Lloh4, Lloh5
; GISEL-NEXT: .loh AdrpLdrGot Lloh6, Lloh7
; GISEL-NEXT: Lfunc_end1:
; GISEL-NEXT: .cfi_endproc
; GISEL-NEXT: .section __TEXT,__gcc_except_tab
Expand Down Expand Up @@ -467,9 +487,9 @@ define dso_local ptr @rv_marker_5_indirect_call() {
; GISEL-NEXT: .cfi_offset w29, -16
; GISEL-NEXT: .cfi_offset w19, -24
; GISEL-NEXT: .cfi_offset w20, -32
; GISEL-NEXT: Lloh0:
; GISEL-NEXT: Lloh8:
; GISEL-NEXT: adrp x8, _fptr@PAGE
; GISEL-NEXT: Lloh1:
; GISEL-NEXT: Lloh9:
; GISEL-NEXT: ldr x8, [x8, _fptr@PAGEOFF]
; GISEL-NEXT: blr x8
; GISEL-NEXT: mov x29, x29
Expand All @@ -480,7 +500,7 @@ define dso_local ptr @rv_marker_5_indirect_call() {
; GISEL-NEXT: mov x0, x19
; GISEL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; GISEL-NEXT: ret
; GISEL-NEXT: .loh AdrpLdr Lloh0, Lloh1
; GISEL-NEXT: .loh AdrpLdr Lloh8, Lloh9
entry:
%0 = load ptr, ptr @fptr, align 8
%call = call ptr %0() [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ]
Expand Down
81 changes: 38 additions & 43 deletions llvm/test/CodeGen/AArch64/nonlazybind.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-enable-nonlazybind | FileCheck %s --check-prefix=MACHO
; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-macho-enable-nonlazybind | FileCheck %s --check-prefix=MACHO
; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=MACHO-NORMAL
; RUN: llc -mtriple=aarch64 -fast-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-FI
; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-GI
Expand All @@ -19,13 +19,18 @@ define void @test_laziness(ptr %a) nounwind {
; MACHO-NEXT: Lloh1:
; MACHO-NEXT: ldr x8, [x8, _external@GOTPAGEOFF]
; MACHO-NEXT: blr x8
; MACHO-NEXT: Lloh2:
; MACHO-NEXT: adrp x8, _memset@GOTPAGE
; MACHO-NEXT: mov x0, x19
; MACHO-NEXT: mov w1, #1 ; =0x1
; MACHO-NEXT: Lloh3:
; MACHO-NEXT: ldr x8, [x8, _memset@GOTPAGEOFF]
; MACHO-NEXT: mov w2, #1000 ; =0x3e8
; MACHO-NEXT: bl _memset
; MACHO-NEXT: blr x8
; MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; MACHO-NEXT: ret
; MACHO-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
; MACHO-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
;
; MACHO-NORMAL-LABEL: test_laziness:
Expand All @@ -34,50 +39,34 @@ define void @test_laziness(ptr %a) nounwind {
; MACHO-NORMAL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; MACHO-NORMAL-NEXT: mov x19, x0
; MACHO-NORMAL-NEXT: bl _external
; MACHO-NORMAL-NEXT: Lloh0:
; MACHO-NORMAL-NEXT: adrp x8, _memset@GOTPAGE
; MACHO-NORMAL-NEXT: mov x0, x19
; MACHO-NORMAL-NEXT: mov w1, #1 ; =0x1
; MACHO-NORMAL-NEXT: Lloh1:
; MACHO-NORMAL-NEXT: ldr x8, [x8, _memset@GOTPAGEOFF]
; MACHO-NORMAL-NEXT: mov w2, #1000 ; =0x3e8
; MACHO-NORMAL-NEXT: bl _memset
; MACHO-NORMAL-NEXT: blr x8
; MACHO-NORMAL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; MACHO-NORMAL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; MACHO-NORMAL-NEXT: ret
; MACHO-NORMAL-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
;
; ELF-FI-LABEL: test_laziness:
; ELF-FI: // %bb.0:
; ELF-FI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; ELF-FI-NEXT: mov x19, x0
; ELF-FI-NEXT: bl external
; ELF-FI-NEXT: mov w8, #1 // =0x1
; ELF-FI-NEXT: mov x0, x19
; ELF-FI-NEXT: mov x2, #1000 // =0x3e8
; ELF-FI-NEXT: uxtb w1, w8
; ELF-FI-NEXT: bl memset
; ELF-FI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; ELF-FI-NEXT: ret
;
; ELF-GI-LABEL: test_laziness:
; ELF-GI: // %bb.0:
; ELF-GI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; ELF-GI-NEXT: mov x19, x0
; ELF-GI-NEXT: bl external
; ELF-GI-NEXT: mov x0, x19
; ELF-GI-NEXT: mov w1, #1 // =0x1
; ELF-GI-NEXT: mov w2, #1000 // =0x3e8
; ELF-GI-NEXT: bl memset
; ELF-GI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; ELF-GI-NEXT: ret
;
; ELF-SDAG-LABEL: test_laziness:
; ELF-SDAG: // %bb.0:
; ELF-SDAG-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; ELF-SDAG-NEXT: mov x19, x0
; ELF-SDAG-NEXT: bl external
; ELF-SDAG-NEXT: mov x0, x19
; ELF-SDAG-NEXT: mov w1, #1 // =0x1
; ELF-SDAG-NEXT: mov w2, #1000 // =0x3e8
; ELF-SDAG-NEXT: bl memset
; ELF-SDAG-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; ELF-SDAG-NEXT: ret
; ELF-LABEL: test_laziness:
; ELF: // %bb.0:
; ELF-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; ELF-NEXT: adrp x8, :got:external
; ELF-NEXT: mov x19, x0
; ELF-NEXT: ldr x8, [x8, :got_lo12:external]
; ELF-NEXT: blr x8
; ELF-NEXT: adrp x8, :got:memset
; ELF-NEXT: mov x0, x19
; ELF-NEXT: mov w1, #1 // =0x1
; ELF-NEXT: ldr x8, [x8, :got_lo12:memset]
; ELF-NEXT: mov w2, #1000 // =0x3e8
; ELF-NEXT: blr x8
; ELF-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; ELF-NEXT: ret
call void @external()
call void @llvm.memset.p0.i64(ptr align 1 %a, i8 1, i64 1000, i1 false)
ret void
Expand All @@ -86,20 +75,22 @@ define void @test_laziness(ptr %a) nounwind {
define void @test_laziness_tail() nounwind {
; MACHO-LABEL: test_laziness_tail:
; MACHO: ; %bb.0:
; MACHO-NEXT: Lloh2:
; MACHO-NEXT: Lloh4:
; MACHO-NEXT: adrp x0, _external@GOTPAGE
; MACHO-NEXT: Lloh3:
; MACHO-NEXT: Lloh5:
; MACHO-NEXT: ldr x0, [x0, _external@GOTPAGEOFF]
; MACHO-NEXT: br x0
; MACHO-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
; MACHO-NEXT: .loh AdrpLdrGot Lloh4, Lloh5
;
; MACHO-NORMAL-LABEL: test_laziness_tail:
; MACHO-NORMAL: ; %bb.0:
; MACHO-NORMAL-NEXT: b _external
;
; ELF-LABEL: test_laziness_tail:
; ELF: // %bb.0:
; ELF-NEXT: b external
; ELF-NEXT: adrp x0, :got:external
; ELF-NEXT: ldr x0, [x0, :got_lo12:external]
; ELF-NEXT: br x0
tail call void @external()
ret void
}
Expand All @@ -108,3 +99,7 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)

!llvm.module.flags = !{!0}
!0 = !{i32 7, !"RtLibUseGOT", i32 1}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; ELF-FI: {{.*}}
; ELF-GI: {{.*}}
; ELF-SDAG: {{.*}}