-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AArch64][GISel] Fix lowering of fp16 intrinsics #130156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Sander de Smalen (sdesmalen-arm) ChangesThis addresses the issue described in #128843 Full diff: https://github.com/llvm/llvm-project/pull/130156.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index d9c558819db3d..f11eb7934814c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -466,6 +466,18 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
case Intrinsic::aarch64_neon_fminv:
case Intrinsic::aarch64_neon_fmaxnmv:
case Intrinsic::aarch64_neon_fminnmv:
+ case Intrinsic::aarch64_neon_fmax:
+ case Intrinsic::aarch64_neon_fmin:
+ case Intrinsic::aarch64_neon_fmulx:
+ case Intrinsic::aarch64_neon_frecpe:
+ case Intrinsic::aarch64_neon_frecps:
+ case Intrinsic::aarch64_neon_frecpx:
+ case Intrinsic::aarch64_neon_frsqrte:
+ case Intrinsic::aarch64_neon_frsqrts:
+ case Intrinsic::aarch64_neon_facge:
+ case Intrinsic::aarch64_neon_facgt:
+ case Intrinsic::aarch64_neon_fabd:
+ case Intrinsic::aarch64_sisd_fabd:
return true;
case Intrinsic::aarch64_neon_saddlv: {
const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
@@ -540,6 +552,24 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case TargetOpcode::G_LROUND:
case TargetOpcode::G_LLROUND:
return true;
+ case TargetOpcode::G_INTRINSIC:
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+ case Intrinsic::aarch64_neon_fcvtas:
+ case Intrinsic::aarch64_neon_fcvtau:
+ case Intrinsic::aarch64_neon_fcvtzs:
+ case Intrinsic::aarch64_neon_fcvtzu:
+ case Intrinsic::aarch64_neon_fcvtms:
+ case Intrinsic::aarch64_neon_fcvtmu:
+ case Intrinsic::aarch64_neon_fcvtns:
+ case Intrinsic::aarch64_neon_fcvtnu:
+ case Intrinsic::aarch64_neon_fcvtps:
+ case Intrinsic::aarch64_neon_fcvtpu:
+ // Force FPR register bank for half types, as those types otherwise
+ // don't get legalized correctly resulting in fp16 <-> gpr32 COPY's.
+ return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16();
+ default:
+ break;
+ }
default:
break;
}
@@ -1082,24 +1112,39 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
- // Check if we know that the intrinsic has any constraints on its register
- // banks. If it does, then update the mapping accordingly.
- unsigned Idx = 0;
- if (onlyDefinesFP(MI, MRI, TRI))
- for (const auto &Op : MI.defs()) {
- if (Op.isReg())
- OpRegBankIdx[Idx] = PMI_FirstFPR;
- ++Idx;
- }
- else
- Idx += MI.getNumExplicitDefs();
-
- if (onlyUsesFP(MI, MRI, TRI))
- for (const auto &Op : MI.explicit_uses()) {
- if (Op.isReg())
- OpRegBankIdx[Idx] = PMI_FirstFPR;
- ++Idx;
- }
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+ case Intrinsic::aarch64_neon_vcvtfxs2fp:
+ case Intrinsic::aarch64_neon_vcvtfxu2fp:
+ case Intrinsic::aarch64_neon_vcvtfp2fxs:
+ case Intrinsic::aarch64_neon_vcvtfp2fxu:
+ // Override these two intrinsics, because they would have a partial
+ // mapping. This is needed for 'half' types, which otherwise don't
+ // get legalised correctly.
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ OpRegBankIdx[2] = PMI_FirstFPR;
+ break;
+ default: {
+ // Check if we know that the intrinsic has any constraints on its register
+ // banks. If it does, then update the mapping accordingly.
+ unsigned Idx = 0;
+ if (onlyDefinesFP(MI, MRI, TRI))
+ for (const auto &Op : MI.defs()) {
+ if (Op.isReg())
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ ++Idx;
+ }
+ else
+ Idx += MI.getNumExplicitDefs();
+
+ if (onlyUsesFP(MI, MRI, TRI))
+ for (const auto &Op : MI.explicit_uses()) {
+ if (Op.isReg())
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ ++Idx;
+ }
+ break;
+ }
+ }
break;
}
case TargetOpcode::G_LROUND:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index fe4657186cd2a..7ddbdf2cf2c52 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -2,10 +2,6 @@
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck -check-prefixes=CHECK,CHECK-SD %s
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for fabds
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fabdd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd_i64
-
define <8 x i16> @sabdl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sabdl8h:
; CHECK: // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll
index 40d2d636b94bb..1b9895445152a 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -mattr=+v8.2a,+fullfp16 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL
+; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL
declare i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half)
declare i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half)
@@ -26,11 +27,18 @@ declare half @llvm.aarch64.neon.frecpx.f16(half)
declare half @llvm.aarch64.neon.frecpe.f16(half)
define dso_local i16 @t2(half %a) {
-; CHECK-LABEL: t2:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, #0.0
-; CHECK-NEXT: csetm w0, eq
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t2:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, #0.0
+; SDISEL-NEXT: csetm w0, eq
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t2:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, #0.0
+; GISEL-NEXT: cset w8, eq
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp oeq half %a, 0xH0000
%vceqz = sext i1 %0 to i16
@@ -38,11 +46,18 @@ entry:
}
define dso_local i16 @t3(half %a) {
-; CHECK-LABEL: t3:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, #0.0
-; CHECK-NEXT: csetm w0, ge
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t3:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, #0.0
+; SDISEL-NEXT: csetm w0, ge
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t3:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, #0.0
+; GISEL-NEXT: cset w8, ge
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp oge half %a, 0xH0000
%vcgez = sext i1 %0 to i16
@@ -50,11 +65,18 @@ entry:
}
define dso_local i16 @t4(half %a) {
-; CHECK-LABEL: t4:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, #0.0
-; CHECK-NEXT: csetm w0, gt
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t4:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, #0.0
+; SDISEL-NEXT: csetm w0, gt
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t4:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, #0.0
+; GISEL-NEXT: cset w8, gt
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp ogt half %a, 0xH0000
%vcgtz = sext i1 %0 to i16
@@ -62,11 +84,18 @@ entry:
}
define dso_local i16 @t5(half %a) {
-; CHECK-LABEL: t5:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, #0.0
-; CHECK-NEXT: csetm w0, ls
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t5:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, #0.0
+; SDISEL-NEXT: csetm w0, ls
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t5:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, #0.0
+; GISEL-NEXT: cset w8, ls
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp ole half %a, 0xH0000
%vclez = sext i1 %0 to i16
@@ -74,11 +103,18 @@ entry:
}
define dso_local i16 @t6(half %a) {
-; CHECK-LABEL: t6:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, #0.0
-; CHECK-NEXT: csetm w0, mi
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t6:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, #0.0
+; SDISEL-NEXT: csetm w0, mi
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t6:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, #0.0
+; GISEL-NEXT: cset w8, mi
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp olt half %a, 0xH0000
%vcltz = sext i1 %0 to i16
@@ -136,10 +172,15 @@ entry:
}
define dso_local i16 @t16(half %a) {
-; CHECK-LABEL: t16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs w0, h0
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcvtzs w0, h0
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcvtzu w0, h0
+; GISEL-NEXT: ret
entry:
%0 = fptoui half %a to i16
ret i16 %0
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
index 36795f86e0065..5b08ef2852977 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -mattr=+v8.2a,+fullfp16 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL
+; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL
+
declare half @llvm.aarch64.sisd.fabd.f16(half, half)
declare half @llvm.aarch64.neon.fmax.f16(half, half)
@@ -33,11 +35,18 @@ entry:
}
define dso_local i16 @t_vceqh_f16(half %a, half %b) {
-; CHECK-LABEL: t_vceqh_f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, h1
-; CHECK-NEXT: csetm w0, eq
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t_vceqh_f16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, h1
+; SDISEL-NEXT: csetm w0, eq
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t_vceqh_f16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, h1
+; GISEL-NEXT: cset w8, eq
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp oeq half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -45,11 +54,18 @@ entry:
}
define dso_local i16 @t_vcgeh_f16(half %a, half %b) {
-; CHECK-LABEL: t_vcgeh_f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, h1
-; CHECK-NEXT: csetm w0, ge
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t_vcgeh_f16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, h1
+; SDISEL-NEXT: csetm w0, ge
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t_vcgeh_f16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, h1
+; GISEL-NEXT: cset w8, ge
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp oge half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -57,11 +73,18 @@ entry:
}
define dso_local i16 @t_vcgth_f16(half %a, half %b) {
-; CHECK-LABEL: t_vcgth_f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, h1
-; CHECK-NEXT: csetm w0, gt
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t_vcgth_f16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, h1
+; SDISEL-NEXT: csetm w0, gt
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t_vcgth_f16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, h1
+; GISEL-NEXT: cset w8, gt
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp ogt half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -69,11 +92,18 @@ entry:
}
define dso_local i16 @t_vcleh_f16(half %a, half %b) {
-; CHECK-LABEL: t_vcleh_f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, h1
-; CHECK-NEXT: csetm w0, ls
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t_vcleh_f16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, h1
+; SDISEL-NEXT: csetm w0, ls
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t_vcleh_f16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, h1
+; GISEL-NEXT: cset w8, ls
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp ole half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -81,11 +111,18 @@ entry:
}
define dso_local i16 @t_vclth_f16(half %a, half %b) {
-; CHECK-LABEL: t_vclth_f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, h1
-; CHECK-NEXT: csetm w0, mi
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t_vclth_f16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, h1
+; SDISEL-NEXT: csetm w0, mi
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t_vclth_f16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, h1
+; GISEL-NEXT: cset w8, mi
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp olt half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -150,11 +187,18 @@ declare half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32, i32) #1
declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half, i32) #1
define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) {
-; CHECK-LABEL: test_vcvth_n_f16_s16_1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: scvtf h0, h0, #1
-; CHECK-NEXT: ret
+; SDISEL-LABEL: test_vcvth_n_f16_s16_1:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fmov s0, w0
+; SDISEL-NEXT: scvtf h0, h0, #1
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: test_vcvth_n_f16_s16_1:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: sxth w8, w0
+; GISEL-NEXT: fmov s0, w8
+; GISEL-NEXT: scvtf h0, h0, #1
+; GISEL-NEXT: ret
entry:
%sext = sext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 1)
@@ -162,11 +206,18 @@ entry:
}
define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) {
-; CHECK-LABEL: test_vcvth_n_f16_s16_16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: scvtf h0, h0, #16
-; CHECK-NEXT: ret
+; SDISEL-LABEL: test_vcvth_n_f16_s16_16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fmov s0, w0
+; SDISEL-NEXT: scvtf h0, h0, #16
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: test_vcvth_n_f16_s16_16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: sxth w8, w0
+; GISEL-NEXT: fmov s0, w8
+; GISEL-NEXT: scvtf h0, h0, #16
+; GISEL-NEXT: ret
entry:
%sext = sext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 16)
@@ -264,11 +315,18 @@ entry:
}
define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) {
-; CHECK-LABEL: test_vcvth_n_f16_u16_1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: ucvtf h0, h0, #1
-; CHECK-NEXT: ret
+; SDISEL-LABEL: test_vcvth_n_f16_u16_1:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fmov s0, w0
+; SDISEL-NEXT: ucvtf h0, h0, #1
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: test_vcvth_n_f16_u16_1:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: and w8, w0, #0xffff
+; GISEL-NEXT: fmov s0, w8
+; GISEL-NEXT: ucvtf h0, h0, #1
+; GISEL-NEXT: ret
entry:
%0 = zext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 1)
@@ -276,11 +334,18 @@ entry:
}
define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) {
-; CHECK-LABEL: test_vcvth_n_f16_u16_16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: ucvtf h0, h0, #16
-; CHECK-NEXT: ret
+; SDISEL-LABEL: test_vcvth_n_f16_u16_16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fmov s0, w0
+; SDISEL-NEXT: ucvtf h0, h0, #16
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: test_vcvth_n_f16_u16_16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: and w8, w0, #0xffff
+; GISEL-NEXT: fmov s0, w8
+; GISEL-NEXT: ucvtf h0, h0, #16
+; GISEL-NEXT: ret
entry:
%0 = zext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 16)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
Sorry about the delay. Yes I think for now this is an ok fix. Ideally we want to be able to infer this automatically in future. |
This addresses the issue described in: llvm#128843
391022d
to
741be24
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/186/builds/7304 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/12547 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/168/builds/9724 Here is the relevant piece of the build log for the reference
|
This caused build failures with -Werror
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/66/builds/11193 Here is the relevant piece of the build log for the reference
|
This addresses the issue described in llvm#128843
This caused build failures with -Werror
This addresses the issue described in #128843