Skip to content

[AArch64][PAC] Eliminate excessive MOVs when computing blend #115185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 69 additions & 41 deletions llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,27 @@ class AArch64AsmPrinter : public AsmPrinter {
// Emit the sequence for AUT or AUTPAC.
void emitPtrauthAuthResign(const MachineInstr *MI);

// Emit the sequence to compute a discriminator into x17, or reuse AddrDisc.
unsigned emitPtrauthDiscriminator(uint16_t Disc, unsigned AddrDisc);
// Emit the sequence to compute the discriminator.
//
// ScratchReg should be x16/x17.
//
// The returned register is either unmodified AddrDisc or x16/x17.
//
// If the expanded pseudo is allowed to clobber AddrDisc register, setting
// MayUseAddrAsScratch may save one MOV instruction, provided the address
// is already in x16/x17 (i.e. return x16/x17 which is the *modified* AddrDisc
// register at the same time):
//
// mov x17, x16
// movk x17, #1234, lsl #48
// ; x16 is not used anymore
//
// can be replaced by
//
// movk x16, #1234, lsl #48
Register emitPtrauthDiscriminator(uint16_t Disc, Register AddrDisc,
Register ScratchReg,
bool MayUseAddrAsScratch = false);

// Emit the sequence for LOADauthptrstatic
void LowerLOADauthptrstatic(const MachineInstr &MI);
Expand Down Expand Up @@ -1726,8 +1745,11 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
}
}

unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
unsigned AddrDisc) {
Register AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
Register AddrDisc,
Register ScratchReg,
bool MayUseAddrAsScratch) {
assert(ScratchReg == AArch64::X16 || ScratchReg == AArch64::X17);
// So far we've used NoRegister in pseudos. Now we need real encodings.
if (AddrDisc == AArch64::NoRegister)
AddrDisc = AArch64::XZR;
Expand All @@ -1737,16 +1759,24 @@ unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
if (!Disc)
return AddrDisc;

// If there's only a constant discriminator, MOV it into x17.
// If there's only a constant discriminator, MOV it into the scratch register.
if (AddrDisc == AArch64::XZR) {
emitMOVZ(AArch64::X17, Disc, 0);
return AArch64::X17;
emitMOVZ(ScratchReg, Disc, 0);
return ScratchReg;
}

// If there are both, emit a blend into x17.
emitMovXReg(AArch64::X17, AddrDisc);
emitMOVK(AArch64::X17, Disc, 48);
return AArch64::X17;
// If there are both, emit a blend into the scratch register.

// Check if we can save one MOV instruction.
assert(MayUseAddrAsScratch || ScratchReg != AddrDisc);
bool AddrDiscIsSafe = AddrDisc == AArch64::X16 || AddrDisc == AArch64::X17;
if (MayUseAddrAsScratch && AddrDiscIsSafe)
ScratchReg = AddrDisc;
else
emitMovXReg(ScratchReg, AddrDisc);

emitMOVK(ScratchReg, Disc, 48);
return ScratchReg;
}

/// Emits a code sequence to check an authenticated pointer value.
Expand Down Expand Up @@ -1963,7 +1993,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {

// Compute aut discriminator into x17
assert(isUInt<16>(AUTDisc));
unsigned AUTDiscReg = emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc);
Register AUTDiscReg =
emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc, AArch64::X17);
bool AUTZero = AUTDiscReg == AArch64::XZR;
unsigned AUTOpc = getAUTOpcodeForKey(AUTKey, AUTZero);

Expand Down Expand Up @@ -2004,7 +2035,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {

// Compute pac discriminator into x17
assert(isUInt<16>(PACDisc));
unsigned PACDiscReg = emitPtrauthDiscriminator(PACDisc, PACAddrDisc);
Register PACDiscReg =
emitPtrauthDiscriminator(PACDisc, PACAddrDisc, AArch64::X17);
bool PACZero = PACDiscReg == AArch64::XZR;
unsigned PACOpc = getPACOpcodeForKey(PACKey, PACZero);

Expand Down Expand Up @@ -2036,8 +2068,20 @@ void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {

unsigned AddrDisc = MI->getOperand(3).getReg();

// Compute discriminator into x17
unsigned DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc);
// Make sure AddrDisc is solely used to compute the discriminator.
// While hardly meaningful, it is still possible to describe an authentication
// of a pointer against its own value (instead of storage address) with
// intrinsics, so use report_fatal_error instead of assert.
if (BrTarget == AddrDisc)
report_fatal_error("Branch target is signed with its own value");

// If we are printing BLRA pseudo instruction, then x16 and x17 are
// implicit-def'ed by the MI and AddrDisc is not used as any other input, so
// try to save one MOV by setting MayUseAddrAsScratch.
// Unlike BLRA, BRA pseudo is used to perform computed goto, and thus not
// declared as clobbering x16/x17.
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, AArch64::X17,
/*MayUseAddrAsScratch=*/IsCall);
bool IsZeroDisc = DiscReg == AArch64::XZR;

unsigned Opc;
Expand Down Expand Up @@ -2331,16 +2375,7 @@ void AArch64AsmPrinter::LowerMOVaddrPAC(const MachineInstr &MI) {
}
}

unsigned DiscReg = AddrDisc;
if (Disc != 0) {
if (AddrDisc != AArch64::XZR) {
emitMovXReg(AArch64::X17, AddrDisc);
emitMOVK(AArch64::X17, Disc, 48);
} else {
emitMOVZ(AArch64::X17, Disc, 0);
}
DiscReg = AArch64::X17;
}
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, AArch64::X17);

auto MIB = MCInstBuilder(getPACOpcodeForKey(Key, DiscReg == AArch64::XZR))
.addReg(AArch64::X16)
Expand Down Expand Up @@ -2608,6 +2643,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
// instruction here.
case AArch64::AUTH_TCRETURN:
case AArch64::AUTH_TCRETURN_BTI: {
Register Callee = MI->getOperand(0).getReg();
const uint64_t Key = MI->getOperand(2).getImm();
assert((Key == AArch64PACKey::IA || Key == AArch64PACKey::IB) &&
"Invalid auth key for tail-call return");
Expand All @@ -2617,31 +2653,23 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {

Register AddrDisc = MI->getOperand(4).getReg();

Register ScratchReg = MI->getOperand(0).getReg() == AArch64::X16
? AArch64::X17
: AArch64::X16;
Register ScratchReg = Callee == AArch64::X16 ? AArch64::X17 : AArch64::X16;

emitPtrauthTailCallHardening(MI);

unsigned DiscReg = AddrDisc;
if (Disc) {
if (AddrDisc != AArch64::NoRegister) {
if (ScratchReg != AddrDisc)
emitMovXReg(ScratchReg, AddrDisc);
emitMOVK(ScratchReg, Disc, 48);
} else {
emitMOVZ(ScratchReg, Disc, 0);
}
DiscReg = ScratchReg;
}
// See the comments in emitPtrauthBranch.
if (Callee == AddrDisc)
report_fatal_error("Call target is signed with its own value");
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, ScratchReg,
/*MayUseAddrAsScratch=*/true);

const bool IsZero = DiscReg == AArch64::NoRegister;
const bool IsZero = DiscReg == AArch64::XZR;
const unsigned Opcodes[2][2] = {{AArch64::BRAA, AArch64::BRAAZ},
{AArch64::BRAB, AArch64::BRABZ}};

MCInst TmpInst;
TmpInst.setOpcode(Opcodes[Key][IsZero]);
TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
TmpInst.addOperand(MCOperand::createReg(Callee));
if (!IsZero)
TmpInst.addOperand(MCOperand::createReg(DiscReg));
EmitToStreamer(*OutStreamer, TmpInst);
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1841,28 +1841,28 @@ let Predicates = [HasPAuth] in {
// materialization here), in part because they're handled in a safer way by
// the kernel, notably on Darwin.
def BLRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
GPR64noip:$AddrDisc),
GPR64:$AddrDisc),
[(AArch64authcall GPR64noip:$Rn, timm:$Key, timm:$Disc,
GPR64noip:$AddrDisc)]>, Sched<[]> {
GPR64:$AddrDisc)]>, Sched<[]> {
let isCodeGenOnly = 1;
let hasSideEffects = 1;
let mayStore = 0;
let mayLoad = 0;
let isCall = 1;
let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
let Defs = [X17,LR];
let Defs = [X16,X17,LR];
let Uses = [SP];
}

def BLRA_RVMARKER : Pseudo<
(outs), (ins i64imm:$rvfunc, GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
GPR64noip:$AddrDisc),
GPR64:$AddrDisc),
[(AArch64authcall_rvmarker tglobaladdr:$rvfunc,
GPR64noip:$Rn, timm:$Key, timm:$Disc,
GPR64noip:$AddrDisc)]>, Sched<[]> {
GPR64:$AddrDisc)]>, Sched<[]> {
let isCodeGenOnly = 1;
let isCall = 1;
let Defs = [X17,LR];
let Defs = [X16,X17,LR];
let Uses = [SP];
}

Expand Down Expand Up @@ -1972,7 +1972,7 @@ let Predicates = [HasPAuth] in {
// make sure at least one register is usable as a scratch one - for that
// purpose, use tcGPRnotx16x17 register class for one of the operands.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Size = 16,
Uses = [SP] in {
Defs = [X16,X17], Uses = [SP] in {
def AUTH_TCRETURN
: Pseudo<(outs), (ins tcGPRnotx16x17:$dst, i32imm:$FPDiff, i32imm:$Key,
i64imm:$Disc, tcGPR64:$AddrDisc),
Expand Down
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/AArch64/ptrauth-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,33 @@ define void @test_tailcall_omit_mov_x16_x16(ptr %objptr) #0 {
ret void
}

define i32 @test_call_omit_extra_moves(ptr %objptr) #0 {
; CHECK-LABEL: test_call_omit_extra_moves:
; DARWIN-NEXT: stp x29, x30, [sp, #-16]!
; ELF-NEXT: str x30, [sp, #-16]!
; CHECK-NEXT: ldr x16, [x0]
; CHECK-NEXT: mov x17, x0
; CHECK-NEXT: movk x17, #6503, lsl #48
; CHECK-NEXT: autda x16, x17
; CHECK-NEXT: ldr x8, [x16]
; CHECK-NEXT: movk x16, #34646, lsl #48
; CHECK-NEXT: blraa x8, x16
; CHECK-NEXT: mov w0, #42
; DARWIN-NEXT: ldp x29, x30, [sp], #16
; ELF-NEXT: ldr x30, [sp], #16
; CHECK-NEXT: ret
%vtable.signed = load ptr, ptr %objptr
%objptr.int = ptrtoint ptr %objptr to i64
%vtable.discr = tail call i64 @llvm.ptrauth.blend(i64 %objptr.int, i64 6503)
%vtable.signed.int = ptrtoint ptr %vtable.signed to i64
%vtable.int = tail call i64 @llvm.ptrauth.auth(i64 %vtable.signed.int, i32 2, i64 %vtable.discr)
%vtable = inttoptr i64 %vtable.int to ptr
%callee.signed = load ptr, ptr %vtable
%callee.discr = tail call i64 @llvm.ptrauth.blend(i64 %vtable.int, i64 34646)
%call.result = tail call i32 %callee.signed(ptr %objptr) [ "ptrauth"(i32 0, i64 %callee.discr) ]
ret i32 42
}

define i32 @test_call_ia_arg(ptr %arg0, i64 %arg1) #0 {
; DARWIN-LABEL: test_call_ia_arg:
; DARWIN-NEXT: stp x29, x30, [sp, #-16]!
Expand Down