diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index d9c558819db3d..dec3349867960 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -466,6 +466,16 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI, case Intrinsic::aarch64_neon_fminv: case Intrinsic::aarch64_neon_fmaxnmv: case Intrinsic::aarch64_neon_fminnmv: + case Intrinsic::aarch64_neon_fmulx: + case Intrinsic::aarch64_neon_frecpe: + case Intrinsic::aarch64_neon_frecps: + case Intrinsic::aarch64_neon_frecpx: + case Intrinsic::aarch64_neon_frsqrte: + case Intrinsic::aarch64_neon_frsqrts: + case Intrinsic::aarch64_neon_facge: + case Intrinsic::aarch64_neon_facgt: + case Intrinsic::aarch64_neon_fabd: + case Intrinsic::aarch64_sisd_fabd: return true; case Intrinsic::aarch64_neon_saddlv: { const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); @@ -540,6 +550,24 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, case TargetOpcode::G_LROUND: case TargetOpcode::G_LLROUND: return true; + case TargetOpcode::G_INTRINSIC: + switch (cast(MI).getIntrinsicID()) { + case Intrinsic::aarch64_neon_fcvtas: + case Intrinsic::aarch64_neon_fcvtau: + case Intrinsic::aarch64_neon_fcvtzs: + case Intrinsic::aarch64_neon_fcvtzu: + case Intrinsic::aarch64_neon_fcvtms: + case Intrinsic::aarch64_neon_fcvtmu: + case Intrinsic::aarch64_neon_fcvtns: + case Intrinsic::aarch64_neon_fcvtnu: + case Intrinsic::aarch64_neon_fcvtps: + case Intrinsic::aarch64_neon_fcvtpu: + // Force FPR register bank for half types, as those types otherwise + // don't get legalized correctly resulting in fp16 <-> gpr32 COPY's. + return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16(); + default: + break; + } default: break; } @@ -1082,24 +1110,41 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: { - // Check if we know that the intrinsic has any constraints on its register - // banks. If it does, then update the mapping accordingly. - unsigned Idx = 0; - if (onlyDefinesFP(MI, MRI, TRI)) - for (const auto &Op : MI.defs()) { - if (Op.isReg()) - OpRegBankIdx[Idx] = PMI_FirstFPR; - ++Idx; - } - else - Idx += MI.getNumExplicitDefs(); - - if (onlyUsesFP(MI, MRI, TRI)) - for (const auto &Op : MI.explicit_uses()) { - if (Op.isReg()) - OpRegBankIdx[Idx] = PMI_FirstFPR; - ++Idx; - } + switch (cast(MI).getIntrinsicID()) { + case Intrinsic::aarch64_neon_vcvtfxs2fp: + case Intrinsic::aarch64_neon_vcvtfxu2fp: + case Intrinsic::aarch64_neon_vcvtfp2fxs: + case Intrinsic::aarch64_neon_vcvtfp2fxu: + // Override these intrinsics, because they would have a partial + // mapping. This is needed for 'half' types, which otherwise don't + // get legalised correctly. + OpRegBankIdx[0] = PMI_FirstFPR; + OpRegBankIdx[2] = PMI_FirstFPR; + // OpRegBankIdx[1] is the intrinsic ID. + // OpRegBankIdx[3] is an integer immediate. + break; + default: { + // Check if we know that the intrinsic has any constraints on its register + // banks. If it does, then update the mapping accordingly. + unsigned Idx = 0; + if (onlyDefinesFP(MI, MRI, TRI)) + for (const auto &Op : MI.defs()) { + if (Op.isReg()) + OpRegBankIdx[Idx] = PMI_FirstFPR; + ++Idx; + } + else + Idx += MI.getNumExplicitDefs(); + + if (onlyUsesFP(MI, MRI, TRI)) + for (const auto &Op : MI.explicit_uses()) { + if (Op.isReg()) + OpRegBankIdx[Idx] = PMI_FirstFPR; + ++Idx; + } + break; + } + } break; } case TargetOpcode::G_LROUND: diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index fe4657186cd2a..7ddbdf2cf2c52 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -2,10 +2,6 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck -check-prefixes=CHECK,CHECK-SD %s ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for fabds -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fabdd -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd_i64 - define <8 x i16> @sabdl8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sabdl8h: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll index 40d2d636b94bb..1b9895445152a 100644 --- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll +++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+v8.2a,+fullfp16 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL +; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL declare i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half) declare i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half) @@ -26,11 +27,18 @@ declare half @llvm.aarch64.neon.frecpx.f16(half) declare half @llvm.aarch64.neon.frecpe.f16(half) define dso_local i16 @t2(half %a) { -; CHECK-LABEL: t2: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmp h0, #0.0 -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: ret +; SDISEL-LABEL: t2: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcmp h0, #0.0 +; SDISEL-NEXT: csetm w0, eq +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t2: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcmp h0, #0.0 +; GISEL-NEXT: cset w8, eq +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: ret entry: %0 = fcmp oeq half %a, 0xH0000 %vceqz = sext i1 %0 to i16 @@ -38,11 +46,18 @@ entry: } define dso_local i16 @t3(half %a) { -; CHECK-LABEL: t3: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmp h0, #0.0 -; CHECK-NEXT: csetm w0, ge -; CHECK-NEXT: ret +; SDISEL-LABEL: t3: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcmp h0, #0.0 +; SDISEL-NEXT: csetm w0, ge +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t3: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcmp h0, #0.0 +; GISEL-NEXT: cset w8, ge +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: ret entry: %0 = fcmp oge half %a, 0xH0000 %vcgez = sext i1 %0 to i16 @@ -50,11 +65,18 @@ entry: } define dso_local i16 @t4(half %a) { -; CHECK-LABEL: t4: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmp h0, #0.0 -; CHECK-NEXT: csetm w0, gt -; CHECK-NEXT: ret +; SDISEL-LABEL: t4: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcmp h0, #0.0 +; SDISEL-NEXT: csetm w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t4: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcmp h0, #0.0 +; GISEL-NEXT: cset w8, gt +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: ret entry: %0 = fcmp ogt half %a, 0xH0000 %vcgtz = sext i1 %0 to i16 @@ -62,11 +84,18 @@ entry: } define dso_local i16 @t5(half %a) { -; CHECK-LABEL: t5: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmp h0, #0.0 -; CHECK-NEXT: csetm w0, ls -; CHECK-NEXT: ret +; SDISEL-LABEL: t5: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcmp h0, #0.0 +; SDISEL-NEXT: csetm w0, ls +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t5: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcmp h0, #0.0 +; GISEL-NEXT: cset w8, ls +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: ret entry: %0 = fcmp ole half %a, 0xH0000 %vclez = sext i1 %0 to i16 @@ -74,11 +103,18 @@ entry: } define dso_local i16 @t6(half %a) { -; CHECK-LABEL: t6: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmp h0, #0.0 -; CHECK-NEXT: csetm w0, mi -; CHECK-NEXT: ret +; SDISEL-LABEL: t6: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcmp h0, #0.0 +; SDISEL-NEXT: csetm w0, mi +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t6: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcmp h0, #0.0 +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: ret entry: %0 = fcmp olt half %a, 0xH0000 %vcltz = sext i1 %0 to i16 @@ -136,10 +172,15 @@ entry: } define dso_local i16 @t16(half %a) { -; CHECK-LABEL: t16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs w0, h0 -; CHECK-NEXT: ret +; SDISEL-LABEL: t16: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcvtzs w0, h0 +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcvtzu w0, h0 +; GISEL-NEXT: ret entry: %0 = fptoui half %a to i16 ret i16 %0 diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll index 36795f86e0065..5b08ef2852977 100644 --- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll +++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+v8.2a,+fullfp16 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL +; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL + declare half @llvm.aarch64.sisd.fabd.f16(half, half) declare half @llvm.aarch64.neon.fmax.f16(half, half) @@ -33,11 +35,18 @@ entry: } define dso_local i16 @t_vceqh_f16(half %a, half %b) { -; CHECK-LABEL: t_vceqh_f16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmp h0, h1 -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: ret +; SDISEL-LABEL: t_vceqh_f16: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcmp h0, h1 +; SDISEL-NEXT: csetm w0, eq +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t_vceqh_f16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcmp h0, h1 +; GISEL-NEXT: cset w8, eq +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: ret entry: %0 = fcmp oeq half %a, %b %vcmpd = sext i1 %0 to i16 @@ -45,11 +54,18 @@ entry: } define dso_local i16 @t_vcgeh_f16(half %a, half %b) { -; CHECK-LABEL: t_vcgeh_f16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmp h0, h1 -; CHECK-NEXT: csetm w0, ge -; CHECK-NEXT: ret +; SDISEL-LABEL: t_vcgeh_f16: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcmp h0, h1 +; SDISEL-NEXT: csetm w0, ge +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t_vcgeh_f16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcmp h0, h1 +; GISEL-NEXT: cset w8, ge +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: ret entry: %0 = fcmp oge half %a, %b %vcmpd = sext i1 %0 to i16 @@ -57,11 +73,18 @@ entry: } define dso_local i16 @t_vcgth_f16(half %a, half %b) { -; CHECK-LABEL: t_vcgth_f16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmp h0, h1 -; CHECK-NEXT: csetm w0, gt -; CHECK-NEXT: ret +; SDISEL-LABEL: t_vcgth_f16: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcmp h0, h1 +; SDISEL-NEXT: csetm w0, gt +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t_vcgth_f16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcmp h0, h1 +; GISEL-NEXT: cset w8, gt +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: ret entry: %0 = fcmp ogt half %a, %b %vcmpd = sext i1 %0 to i16 @@ -69,11 +92,18 @@ entry: } define dso_local i16 @t_vcleh_f16(half %a, half %b) { -; CHECK-LABEL: t_vcleh_f16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmp h0, h1 -; CHECK-NEXT: csetm w0, ls -; CHECK-NEXT: ret +; SDISEL-LABEL: t_vcleh_f16: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcmp h0, h1 +; SDISEL-NEXT: csetm w0, ls +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t_vcleh_f16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcmp h0, h1 +; GISEL-NEXT: cset w8, ls +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: ret entry: %0 = fcmp ole half %a, %b %vcmpd = sext i1 %0 to i16 @@ -81,11 +111,18 @@ entry: } define dso_local i16 @t_vclth_f16(half %a, half %b) { -; CHECK-LABEL: t_vclth_f16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmp h0, h1 -; CHECK-NEXT: csetm w0, mi -; CHECK-NEXT: ret +; SDISEL-LABEL: t_vclth_f16: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fcmp h0, h1 +; SDISEL-NEXT: csetm w0, mi +; SDISEL-NEXT: ret +; +; GISEL-LABEL: t_vclth_f16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fcmp h0, h1 +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: ret entry: %0 = fcmp olt half %a, %b %vcmpd = sext i1 %0 to i16 @@ -150,11 +187,18 @@ declare half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32, i32) #1 declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half, i32) #1 define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) { -; CHECK-LABEL: test_vcvth_n_f16_s16_1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: scvtf h0, h0, #1 -; CHECK-NEXT: ret +; SDISEL-LABEL: test_vcvth_n_f16_s16_1: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fmov s0, w0 +; SDISEL-NEXT: scvtf h0, h0, #1 +; SDISEL-NEXT: ret +; +; GISEL-LABEL: test_vcvth_n_f16_s16_1: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: sxth w8, w0 +; GISEL-NEXT: fmov s0, w8 +; GISEL-NEXT: scvtf h0, h0, #1 +; GISEL-NEXT: ret entry: %sext = sext i16 %a to i32 %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 1) @@ -162,11 +206,18 @@ entry: } define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) { -; CHECK-LABEL: test_vcvth_n_f16_s16_16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: scvtf h0, h0, #16 -; CHECK-NEXT: ret +; SDISEL-LABEL: test_vcvth_n_f16_s16_16: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fmov s0, w0 +; SDISEL-NEXT: scvtf h0, h0, #16 +; SDISEL-NEXT: ret +; +; GISEL-LABEL: test_vcvth_n_f16_s16_16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: sxth w8, w0 +; GISEL-NEXT: fmov s0, w8 +; GISEL-NEXT: scvtf h0, h0, #16 +; GISEL-NEXT: ret entry: %sext = sext i16 %a to i32 %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 16) @@ -264,11 +315,18 @@ entry: } define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) { -; CHECK-LABEL: test_vcvth_n_f16_u16_1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ucvtf h0, h0, #1 -; CHECK-NEXT: ret +; SDISEL-LABEL: test_vcvth_n_f16_u16_1: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fmov s0, w0 +; SDISEL-NEXT: ucvtf h0, h0, #1 +; SDISEL-NEXT: ret +; +; GISEL-LABEL: test_vcvth_n_f16_u16_1: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w0, #0xffff +; GISEL-NEXT: fmov s0, w8 +; GISEL-NEXT: ucvtf h0, h0, #1 +; GISEL-NEXT: ret entry: %0 = zext i16 %a to i32 %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 1) @@ -276,11 +334,18 @@ entry: } define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) { -; CHECK-LABEL: test_vcvth_n_f16_u16_16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ucvtf h0, h0, #16 -; CHECK-NEXT: ret +; SDISEL-LABEL: test_vcvth_n_f16_u16_16: +; SDISEL: // %bb.0: // %entry +; SDISEL-NEXT: fmov s0, w0 +; SDISEL-NEXT: ucvtf h0, h0, #16 +; SDISEL-NEXT: ret +; +; GISEL-LABEL: test_vcvth_n_f16_u16_16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w0, #0xffff +; GISEL-NEXT: fmov s0, w8 +; GISEL-NEXT: ucvtf h0, h0, #16 +; GISEL-NEXT: ret entry: %0 = zext i16 %a to i32 %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 16)