Skip to content

Commit 9af5384

Browse files
[AArch64][GISel] Fix lowering of fp16 intrinsics (#130156)
This addresses the issue described in #128843
1 parent 44f4e43 commit 9af5384

File tree

4 files changed

+245
-98
lines changed

4 files changed

+245
-98
lines changed

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 63 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,16 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
466466
case Intrinsic::aarch64_neon_fminv:
467467
case Intrinsic::aarch64_neon_fmaxnmv:
468468
case Intrinsic::aarch64_neon_fminnmv:
469+
case Intrinsic::aarch64_neon_fmulx:
470+
case Intrinsic::aarch64_neon_frecpe:
471+
case Intrinsic::aarch64_neon_frecps:
472+
case Intrinsic::aarch64_neon_frecpx:
473+
case Intrinsic::aarch64_neon_frsqrte:
474+
case Intrinsic::aarch64_neon_frsqrts:
475+
case Intrinsic::aarch64_neon_facge:
476+
case Intrinsic::aarch64_neon_facgt:
477+
case Intrinsic::aarch64_neon_fabd:
478+
case Intrinsic::aarch64_sisd_fabd:
469479
return true;
470480
case Intrinsic::aarch64_neon_saddlv: {
471481
const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
@@ -540,6 +550,24 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
540550
case TargetOpcode::G_LROUND:
541551
case TargetOpcode::G_LLROUND:
542552
return true;
553+
case TargetOpcode::G_INTRINSIC:
554+
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
555+
case Intrinsic::aarch64_neon_fcvtas:
556+
case Intrinsic::aarch64_neon_fcvtau:
557+
case Intrinsic::aarch64_neon_fcvtzs:
558+
case Intrinsic::aarch64_neon_fcvtzu:
559+
case Intrinsic::aarch64_neon_fcvtms:
560+
case Intrinsic::aarch64_neon_fcvtmu:
561+
case Intrinsic::aarch64_neon_fcvtns:
562+
case Intrinsic::aarch64_neon_fcvtnu:
563+
case Intrinsic::aarch64_neon_fcvtps:
564+
case Intrinsic::aarch64_neon_fcvtpu:
565+
// Force FPR register bank for half types, as those types otherwise
566+
// don't get legalized correctly resulting in fp16 <-> gpr32 COPY's.
567+
return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16();
568+
default:
569+
break;
570+
}
543571
default:
544572
break;
545573
}
@@ -1082,24 +1110,41 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
10821110
break;
10831111
case TargetOpcode::G_INTRINSIC:
10841112
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
1085-
// Check if we know that the intrinsic has any constraints on its register
1086-
// banks. If it does, then update the mapping accordingly.
1087-
unsigned Idx = 0;
1088-
if (onlyDefinesFP(MI, MRI, TRI))
1089-
for (const auto &Op : MI.defs()) {
1090-
if (Op.isReg())
1091-
OpRegBankIdx[Idx] = PMI_FirstFPR;
1092-
++Idx;
1093-
}
1094-
else
1095-
Idx += MI.getNumExplicitDefs();
1096-
1097-
if (onlyUsesFP(MI, MRI, TRI))
1098-
for (const auto &Op : MI.explicit_uses()) {
1099-
if (Op.isReg())
1100-
OpRegBankIdx[Idx] = PMI_FirstFPR;
1101-
++Idx;
1102-
}
1113+
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
1114+
case Intrinsic::aarch64_neon_vcvtfxs2fp:
1115+
case Intrinsic::aarch64_neon_vcvtfxu2fp:
1116+
case Intrinsic::aarch64_neon_vcvtfp2fxs:
1117+
case Intrinsic::aarch64_neon_vcvtfp2fxu:
1118+
// Override these intrinsics, because they would have a partial
1119+
// mapping. This is needed for 'half' types, which otherwise don't
1120+
// get legalised correctly.
1121+
OpRegBankIdx[0] = PMI_FirstFPR;
1122+
OpRegBankIdx[2] = PMI_FirstFPR;
1123+
// OpRegBankIdx[1] is the intrinsic ID.
1124+
// OpRegBankIdx[3] is an integer immediate.
1125+
break;
1126+
default: {
1127+
// Check if we know that the intrinsic has any constraints on its register
1128+
// banks. If it does, then update the mapping accordingly.
1129+
unsigned Idx = 0;
1130+
if (onlyDefinesFP(MI, MRI, TRI))
1131+
for (const auto &Op : MI.defs()) {
1132+
if (Op.isReg())
1133+
OpRegBankIdx[Idx] = PMI_FirstFPR;
1134+
++Idx;
1135+
}
1136+
else
1137+
Idx += MI.getNumExplicitDefs();
1138+
1139+
if (onlyUsesFP(MI, MRI, TRI))
1140+
for (const auto &Op : MI.explicit_uses()) {
1141+
if (Op.isReg())
1142+
OpRegBankIdx[Idx] = PMI_FirstFPR;
1143+
++Idx;
1144+
}
1145+
break;
1146+
}
1147+
}
11031148
break;
11041149
}
11051150
case TargetOpcode::G_LROUND:

llvm/test/CodeGen/AArch64/arm64-vabs.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@
22
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck -check-prefixes=CHECK,CHECK-SD %s
33
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

5-
; CHECK-GI: warning: Instruction selection used fallback path for fabds
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fabdd
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd_i64
8-
95
define <8 x i16> @sabdl8h(ptr %A, ptr %B) nounwind {
106
; CHECK-LABEL: sabdl8h:
117
; CHECK: // %bb.0:

llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll

Lines changed: 71 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=aarch64 -mattr=+v8.2a,+fullfp16 | FileCheck %s
2+
; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL
3+
; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL
34

45
declare i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half)
56
declare i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half)
@@ -26,59 +27,94 @@ declare half @llvm.aarch64.neon.frecpx.f16(half)
2627
declare half @llvm.aarch64.neon.frecpe.f16(half)
2728

2829
define dso_local i16 @t2(half %a) {
29-
; CHECK-LABEL: t2:
30-
; CHECK: // %bb.0: // %entry
31-
; CHECK-NEXT: fcmp h0, #0.0
32-
; CHECK-NEXT: csetm w0, eq
33-
; CHECK-NEXT: ret
30+
; SDISEL-LABEL: t2:
31+
; SDISEL: // %bb.0: // %entry
32+
; SDISEL-NEXT: fcmp h0, #0.0
33+
; SDISEL-NEXT: csetm w0, eq
34+
; SDISEL-NEXT: ret
35+
;
36+
; GISEL-LABEL: t2:
37+
; GISEL: // %bb.0: // %entry
38+
; GISEL-NEXT: fcmp h0, #0.0
39+
; GISEL-NEXT: cset w8, eq
40+
; GISEL-NEXT: sbfx w0, w8, #0, #1
41+
; GISEL-NEXT: ret
3442
entry:
3543
%0 = fcmp oeq half %a, 0xH0000
3644
%vceqz = sext i1 %0 to i16
3745
ret i16 %vceqz
3846
}
3947

4048
define dso_local i16 @t3(half %a) {
41-
; CHECK-LABEL: t3:
42-
; CHECK: // %bb.0: // %entry
43-
; CHECK-NEXT: fcmp h0, #0.0
44-
; CHECK-NEXT: csetm w0, ge
45-
; CHECK-NEXT: ret
49+
; SDISEL-LABEL: t3:
50+
; SDISEL: // %bb.0: // %entry
51+
; SDISEL-NEXT: fcmp h0, #0.0
52+
; SDISEL-NEXT: csetm w0, ge
53+
; SDISEL-NEXT: ret
54+
;
55+
; GISEL-LABEL: t3:
56+
; GISEL: // %bb.0: // %entry
57+
; GISEL-NEXT: fcmp h0, #0.0
58+
; GISEL-NEXT: cset w8, ge
59+
; GISEL-NEXT: sbfx w0, w8, #0, #1
60+
; GISEL-NEXT: ret
4661
entry:
4762
%0 = fcmp oge half %a, 0xH0000
4863
%vcgez = sext i1 %0 to i16
4964
ret i16 %vcgez
5065
}
5166

5267
define dso_local i16 @t4(half %a) {
53-
; CHECK-LABEL: t4:
54-
; CHECK: // %bb.0: // %entry
55-
; CHECK-NEXT: fcmp h0, #0.0
56-
; CHECK-NEXT: csetm w0, gt
57-
; CHECK-NEXT: ret
68+
; SDISEL-LABEL: t4:
69+
; SDISEL: // %bb.0: // %entry
70+
; SDISEL-NEXT: fcmp h0, #0.0
71+
; SDISEL-NEXT: csetm w0, gt
72+
; SDISEL-NEXT: ret
73+
;
74+
; GISEL-LABEL: t4:
75+
; GISEL: // %bb.0: // %entry
76+
; GISEL-NEXT: fcmp h0, #0.0
77+
; GISEL-NEXT: cset w8, gt
78+
; GISEL-NEXT: sbfx w0, w8, #0, #1
79+
; GISEL-NEXT: ret
5880
entry:
5981
%0 = fcmp ogt half %a, 0xH0000
6082
%vcgtz = sext i1 %0 to i16
6183
ret i16 %vcgtz
6284
}
6385

6486
define dso_local i16 @t5(half %a) {
65-
; CHECK-LABEL: t5:
66-
; CHECK: // %bb.0: // %entry
67-
; CHECK-NEXT: fcmp h0, #0.0
68-
; CHECK-NEXT: csetm w0, ls
69-
; CHECK-NEXT: ret
87+
; SDISEL-LABEL: t5:
88+
; SDISEL: // %bb.0: // %entry
89+
; SDISEL-NEXT: fcmp h0, #0.0
90+
; SDISEL-NEXT: csetm w0, ls
91+
; SDISEL-NEXT: ret
92+
;
93+
; GISEL-LABEL: t5:
94+
; GISEL: // %bb.0: // %entry
95+
; GISEL-NEXT: fcmp h0, #0.0
96+
; GISEL-NEXT: cset w8, ls
97+
; GISEL-NEXT: sbfx w0, w8, #0, #1
98+
; GISEL-NEXT: ret
7099
entry:
71100
%0 = fcmp ole half %a, 0xH0000
72101
%vclez = sext i1 %0 to i16
73102
ret i16 %vclez
74103
}
75104

76105
define dso_local i16 @t6(half %a) {
77-
; CHECK-LABEL: t6:
78-
; CHECK: // %bb.0: // %entry
79-
; CHECK-NEXT: fcmp h0, #0.0
80-
; CHECK-NEXT: csetm w0, mi
81-
; CHECK-NEXT: ret
106+
; SDISEL-LABEL: t6:
107+
; SDISEL: // %bb.0: // %entry
108+
; SDISEL-NEXT: fcmp h0, #0.0
109+
; SDISEL-NEXT: csetm w0, mi
110+
; SDISEL-NEXT: ret
111+
;
112+
; GISEL-LABEL: t6:
113+
; GISEL: // %bb.0: // %entry
114+
; GISEL-NEXT: fcmp h0, #0.0
115+
; GISEL-NEXT: cset w8, mi
116+
; GISEL-NEXT: sbfx w0, w8, #0, #1
117+
; GISEL-NEXT: ret
82118
entry:
83119
%0 = fcmp olt half %a, 0xH0000
84120
%vcltz = sext i1 %0 to i16
@@ -136,10 +172,15 @@ entry:
136172
}
137173

138174
define dso_local i16 @t16(half %a) {
139-
; CHECK-LABEL: t16:
140-
; CHECK: // %bb.0: // %entry
141-
; CHECK-NEXT: fcvtzs w0, h0
142-
; CHECK-NEXT: ret
175+
; SDISEL-LABEL: t16:
176+
; SDISEL: // %bb.0: // %entry
177+
; SDISEL-NEXT: fcvtzs w0, h0
178+
; SDISEL-NEXT: ret
179+
;
180+
; GISEL-LABEL: t16:
181+
; GISEL: // %bb.0: // %entry
182+
; GISEL-NEXT: fcvtzu w0, h0
183+
; GISEL-NEXT: ret
143184
entry:
144185
%0 = fptoui half %a to i16
145186
ret i16 %0

0 commit comments

Comments
 (0)