Skip to content

Commit 9e23bae

Browse files
committed
DAG: Check if is_fpclass is custom, instead of isLegalOrCustom
For some reason, isOperationLegalOrCustom is not the same as isOperationLegal || isOperationCustom. Unfortunately, it checks if the type is legal which makes it uesless for custom lowering on non-legal types (which is always ppcf128). Really the DAG builder shouldn't be going to expand this in the builder, it makes it difficult to work with. It's only here to work around the DAG requiring legal integer types the same size as the FP type after type legalization.
1 parent 5c5122c commit 9e23bae

File tree

5 files changed

+160
-112
lines changed

5 files changed

+160
-112
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7031,7 +7031,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
70317031
// If ISD::IS_FPCLASS should be expanded, do it right now, because the
70327032
// expansion can use illegal types. Making expansion early allows
70337033
// legalizing these types prior to selection.
7034-
if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
7034+
if (!TLI.isOperationLegal(ISD::IS_FPCLASS, ArgVT) &&
7035+
!TLI.isOperationCustom(ISD::IS_FPCLASS, ArgVT)) {
70357036
SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
70367037
setValue(&I, Result);
70377038
return;

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -426,12 +426,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
426426
// FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
427427
// scalarization code. Can be removed when IS_FPCLASS expand isn't called by
428428
// default unless marked custom/legal.
429-
setOperationAction(
430-
ISD::IS_FPCLASS,
431-
{MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32,
432-
MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
433-
MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64},
434-
Custom);
429+
setOperationAction(ISD::IS_FPCLASS,
430+
{MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
431+
MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
432+
MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64,
433+
MVT::v16f64},
434+
Custom);
435+
436+
if (isTypeLegal(MVT::f16))
437+
setOperationAction(ISD::IS_FPCLASS,
438+
{MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16},
439+
Custom);
435440

436441
// Expand to fneg + fadd.
437442
setOperationAction(ISD::FSUB, MVT::f64, Expand);

llvm/test/CodeGen/AMDGPU/fract-match.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2135,16 +2135,16 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
21352135
; GFX8-LABEL: safe_math_fract_v2f16:
21362136
; GFX8: ; %bb.0: ; %entry
21372137
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2138-
; GFX8-NEXT: v_mov_b32_e32 v6, 0x204
2138+
; GFX8-NEXT: s_movk_i32 s6, 0x204
21392139
; GFX8-NEXT: v_floor_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
21402140
; GFX8-NEXT: v_floor_f16_e32 v4, v0
2141-
; GFX8-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v6 src0_sel:WORD_1 src1_sel:DWORD
2141+
; GFX8-NEXT: v_fract_f16_sdwa v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2142+
; GFX8-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, s6 src0_sel:WORD_1 src1_sel:DWORD
21422143
; GFX8-NEXT: v_pack_b32_f16 v3, v4, v3
21432144
; GFX8-NEXT: v_fract_f16_e32 v4, v0
2144-
; GFX8-NEXT: v_fract_f16_sdwa v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2145-
; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v0, v6
21462145
; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, 0, s[4:5]
2147-
; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
2146+
; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s6
2147+
; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5]
21482148
; GFX8-NEXT: v_pack_b32_f16 v0, v0, v5
21492149
; GFX8-NEXT: global_store_dword v[1:2], v3, off
21502150
; GFX8-NEXT: s_waitcnt vmcnt(0)

llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll

Lines changed: 128 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -959,47 +959,86 @@ define <2 x i1> @isnan_v2f16(<2 x half> %x) nounwind {
959959
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
960960
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
961961
;
962-
; GFX8CHECK-LABEL: isnan_v2f16:
963-
; GFX8CHECK: ; %bb.0:
964-
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
965-
; GFX8CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
966-
; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
967-
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
968-
; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
969-
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
970-
; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
962+
; GFX8SELDAG-LABEL: isnan_v2f16:
963+
; GFX8SELDAG: ; %bb.0:
964+
; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
965+
; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
966+
; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v1, v1
967+
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
968+
; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
969+
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
970+
; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31]
971971
;
972-
; GFX9CHECK-LABEL: isnan_v2f16:
973-
; GFX9CHECK: ; %bb.0:
974-
; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975-
; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 3
976-
; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
977-
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
978-
; GFX9CHECK-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v1 src0_sel:WORD_1 src1_sel:DWORD
979-
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
980-
; GFX9CHECK-NEXT: v_mov_b32_e32 v0, v2
981-
; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
972+
; GFX8GLISEL-LABEL: isnan_v2f16:
973+
; GFX8GLISEL: ; %bb.0:
974+
; GFX8GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975+
; GFX8GLISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
976+
; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
977+
; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
978+
; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
979+
; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
980+
; GFX8GLISEL-NEXT: s_setpc_b64 s[30:31]
982981
;
983-
; GFX10CHECK-LABEL: isnan_v2f16:
984-
; GFX10CHECK: ; %bb.0:
985-
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
986-
; GFX10CHECK-NEXT: v_mov_b32_e32 v1, 3
987-
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3
988-
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
989-
; GFX10CHECK-NEXT: v_cmp_class_f16_sdwa s4, v0, v1 src0_sel:WORD_1 src1_sel:DWORD
990-
; GFX10CHECK-NEXT: v_mov_b32_e32 v0, v2
991-
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
992-
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
982+
; GFX9SELDAG-LABEL: isnan_v2f16:
983+
; GFX9SELDAG: ; %bb.0:
984+
; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985+
; GFX9SELDAG-NEXT: v_cmp_u_f16_sdwa s[4:5], v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
986+
; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
987+
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
988+
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
989+
; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31]
993990
;
994-
; GFX11CHECK-LABEL: isnan_v2f16:
995-
; GFX11CHECK: ; %bb.0:
996-
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997-
; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
998-
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3
999-
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1000-
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3
1001-
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
1002-
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
991+
; GFX9GLISEL-LABEL: isnan_v2f16:
992+
; GFX9GLISEL: ; %bb.0:
993+
; GFX9GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
994+
; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 3
995+
; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
996+
; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
997+
; GFX9GLISEL-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v1 src0_sel:WORD_1 src1_sel:DWORD
998+
; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
999+
; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, v2
1000+
; GFX9GLISEL-NEXT: s_setpc_b64 s[30:31]
1001+
;
1002+
; GFX10SELDAG-LABEL: isnan_v2f16:
1003+
; GFX10SELDAG: ; %bb.0:
1004+
; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1005+
; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
1006+
; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
1007+
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1008+
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
1009+
; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v2
1010+
; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31]
1011+
;
1012+
; GFX10GLISEL-LABEL: isnan_v2f16:
1013+
; GFX10GLISEL: ; %bb.0:
1014+
; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1015+
; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 3
1016+
; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3
1017+
; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
1018+
; GFX10GLISEL-NEXT: v_cmp_class_f16_sdwa s4, v0, v1 src0_sel:WORD_1 src1_sel:DWORD
1019+
; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, v2
1020+
; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
1021+
; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31]
1022+
;
1023+
; GFX11SELDAG-LABEL: isnan_v2f16:
1024+
; GFX11SELDAG: ; %bb.0:
1025+
; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1026+
; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1027+
; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
1028+
; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1029+
; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1
1030+
; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
1031+
; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31]
1032+
;
1033+
; GFX11GLISEL-LABEL: isnan_v2f16:
1034+
; GFX11GLISEL: ; %bb.0:
1035+
; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036+
; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1037+
; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v0, 3
1038+
; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1039+
; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v1, 3
1040+
; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
1041+
; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31]
10031042
%1 = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> %x, i32 3) ; nan
10041043
ret <2 x i1> %1
10051044
}
@@ -1196,16 +1235,17 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
11961235
; GFX8SELDAG-LABEL: isnan_v4f16:
11971236
; GFX8SELDAG: ; %bb.0:
11981237
; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1199-
; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
12001238
; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1201-
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1202-
; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
1203-
; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v1
1204-
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
1205-
; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v3, 3
1206-
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
1207-
; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v4, 3
1208-
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
1239+
; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1240+
; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v3, v3
1241+
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1242+
; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v2, v2
1243+
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1244+
; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
1245+
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1246+
; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v1, v1
1247+
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1248+
; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, v4
12091249
; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31]
12101250
;
12111251
; GFX8GLISEL-LABEL: isnan_v4f16:
@@ -1227,16 +1267,14 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
12271267
; GFX9SELDAG-LABEL: isnan_v4f16:
12281268
; GFX9SELDAG: ; %bb.0:
12291269
; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230-
; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
1231-
; GFX9SELDAG-NEXT: v_mov_b32_e32 v3, 3
1232-
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]
1233-
; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
1234-
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
1235-
; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v3 src0_sel:WORD_1 src1_sel:DWORD
1270+
; GFX9SELDAG-NEXT: v_cmp_u_f16_sdwa s[4:5], v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
1271+
; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
12361272
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
1237-
; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v1, v3 src0_sel:WORD_1 src1_sel:DWORD
1273+
; GFX9SELDAG-NEXT: v_cmp_u_f16_sdwa s[4:5], v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
1274+
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1275+
; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v1, v1
12381276
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
1239-
; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, v5
1277+
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
12401278
; GFX9SELDAG-NEXT: v_mov_b32_e32 v1, v4
12411279
; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31]
12421280
;
@@ -1259,16 +1297,14 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
12591297
; GFX10SELDAG-LABEL: isnan_v4f16:
12601298
; GFX10SELDAG: ; %bb.0:
12611299
; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262-
; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v0, 3
1263-
; GFX10SELDAG-NEXT: v_mov_b32_e32 v3, 3
1264-
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4
1265-
; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v1, 3
1266-
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
1267-
; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v0, v3 src0_sel:WORD_1 src1_sel:DWORD
1268-
; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v5
1300+
; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
1301+
; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
12691302
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4
1270-
; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v3 src0_sel:WORD_1 src1_sel:DWORD
1303+
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1304+
; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1
1305+
; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
12711306
; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v4
1307+
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
12721308
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
12731309
; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31]
12741310
;
@@ -1288,20 +1324,35 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
12881324
; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
12891325
; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31]
12901326
;
1291-
; GFX11CHECK-LABEL: isnan_v4f16:
1292-
; GFX11CHECK: ; %bb.0:
1293-
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294-
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3
1295-
; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1296-
; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v4, 16, v1
1297-
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1298-
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3
1299-
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
1300-
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v3, 3
1301-
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
1302-
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v4, 3
1303-
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
1304-
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1327+
; GFX11SELDAG-LABEL: isnan_v4f16:
1328+
; GFX11SELDAG: ; %bb.0:
1329+
; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330+
; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
1331+
; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1332+
; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
1333+
; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1334+
; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1
1335+
; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1336+
; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v4, v4
1337+
; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
1338+
; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v3, v3
1339+
; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
1340+
; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31]
1341+
;
1342+
; GFX11GLISEL-LABEL: isnan_v4f16:
1343+
; GFX11GLISEL: ; %bb.0:
1344+
; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1345+
; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v0, 3
1346+
; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1347+
; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v1
1348+
; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1349+
; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v1, 3
1350+
; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
1351+
; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v3, 3
1352+
; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
1353+
; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v4, 3
1354+
; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
1355+
; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31]
13051356
%1 = call <4 x i1> @llvm.is.fpclass.v4f16(<4 x half> %x, i32 3) ; nan
13061357
ret <4 x i1> %1
13071358
}

llvm/test/CodeGen/PowerPC/is_fpclass.ll

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ define i1 @isnan_double(double %x) nounwind {
2929
define i1 @isnan_ppc_fp128(ppc_fp128 %x) nounwind {
3030
; CHECK-LABEL: isnan_ppc_fp128:
3131
; CHECK: # %bb.0:
32-
; CHECK-NEXT: fcmpu 0, 1, 1
32+
; CHECK-NEXT: xststdcdp 0, 1, 64
3333
; CHECK-NEXT: li 3, 0
3434
; CHECK-NEXT: li 4, 1
35-
; CHECK-NEXT: isel 3, 4, 3, 3
35+
; CHECK-NEXT: iseleq 3, 4, 3
3636
; CHECK-NEXT: blr
3737
%1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 3) ; nan
3838
ret i1 %1
@@ -77,14 +77,10 @@ define i1 @isnan_double_strictfp(double %x) strictfp nounwind {
7777
define i1 @isnan_ppc_fp128_strictfp(ppc_fp128 %x) strictfp nounwind {
7878
; CHECK-LABEL: isnan_ppc_fp128_strictfp:
7979
; CHECK: # %bb.0:
80-
; CHECK-NEXT: mffprd 3, 1
81-
; CHECK-NEXT: li 4, 2047
82-
; CHECK-NEXT: clrldi 3, 3, 1
83-
; CHECK-NEXT: rldic 4, 4, 52, 1
84-
; CHECK-NEXT: cmpd 3, 4
80+
; CHECK-NEXT: xststdcdp 0, 1, 64
8581
; CHECK-NEXT: li 3, 0
8682
; CHECK-NEXT: li 4, 1
87-
; CHECK-NEXT: iselgt 3, 4, 3
83+
; CHECK-NEXT: iseleq 3, 4, 3
8884
; CHECK-NEXT: blr
8985
%1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 3) strictfp ; nan
9086
ret i1 %1
@@ -279,15 +275,11 @@ define i1 @issnan_ppc_fp128(ppc_fp128 %x) nounwind {
279275
; CHECK-LABEL: issnan_ppc_fp128:
280276
; CHECK: # %bb.0:
281277
; CHECK-NEXT: mffprd 3, 1
282-
; CHECK-NEXT: li 4, 4095
283-
; CHECK-NEXT: clrldi 3, 3, 1
284-
; CHECK-NEXT: rldic 4, 4, 51, 1
285-
; CHECK-NEXT: cmpd 3, 4
286-
; CHECK-NEXT: li 4, 2047
287-
; CHECK-NEXT: rldic 4, 4, 52, 1
288-
; CHECK-NEXT: cmpd 1, 3, 4
278+
; CHECK-NEXT: xststdcdp 1, 1, 64
279+
; CHECK-NEXT: rldicl 3, 3, 32, 32
280+
; CHECK-NEXT: andis. 3, 3, 8
289281
; CHECK-NEXT: li 3, 1
290-
; CHECK-NEXT: crnand 20, 5, 0
282+
; CHECK-NEXT: crnand 20, 6, 2
291283
; CHECK-NEXT: isel 3, 0, 3, 20
292284
; CHECK-NEXT: blr
293285
%1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 1)
@@ -345,13 +337,12 @@ define i1 @isqnan_ppc_fp128(ppc_fp128 %x) nounwind {
345337
; CHECK-LABEL: isqnan_ppc_fp128:
346338
; CHECK: # %bb.0:
347339
; CHECK-NEXT: mffprd 3, 1
348-
; CHECK-NEXT: li 4, -17
349-
; CHECK-NEXT: clrldi 3, 3, 1
350-
; CHECK-NEXT: rldicl 4, 4, 47, 1
351-
; CHECK-NEXT: cmpd 3, 4
352-
; CHECK-NEXT: li 3, 0
353-
; CHECK-NEXT: li 4, 1
354-
; CHECK-NEXT: iselgt 3, 4, 3
340+
; CHECK-NEXT: xststdcdp 1, 1, 64
341+
; CHECK-NEXT: rldicl 3, 3, 13, 51
342+
; CHECK-NEXT: andi. 3, 3, 1
343+
; CHECK-NEXT: li 3, 1
344+
; CHECK-NEXT: crnand 20, 6, 1
345+
; CHECK-NEXT: isel 3, 0, 3, 20
355346
; CHECK-NEXT: blr
356347
%1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 2)
357348
ret i1 %1

0 commit comments

Comments
 (0)