@@ -959,47 +959,86 @@ define <2 x i1> @isnan_v2f16(<2 x half> %x) nounwind {
959959; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
960960; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
961961;
962- ; GFX8CHECK -LABEL: isnan_v2f16:
963- ; GFX8CHECK : ; %bb.0:
964- ; GFX8CHECK -NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
965- ; GFX8CHECK -NEXT: v_lshrrev_b32_e32 v1, 16, v0
966- ; GFX8CHECK -NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
967- ; GFX8CHECK -NEXT: v_cndmask_b32_e64 v0 , 0, 1, s[4:5]
968- ; GFX8CHECK -NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
969- ; GFX8CHECK -NEXT: v_cndmask_b32_e64 v1 , 0, 1, s[4:5]
970- ; GFX8CHECK -NEXT: s_setpc_b64 s[30:31]
962+ ; GFX8SELDAG -LABEL: isnan_v2f16:
963+ ; GFX8SELDAG : ; %bb.0:
964+ ; GFX8SELDAG -NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
965+ ; GFX8SELDAG -NEXT: v_lshrrev_b32_e32 v1, 16, v0
966+ ; GFX8SELDAG -NEXT: v_cmp_u_f16_e32 vcc, v1, v1
967+ ; GFX8SELDAG -NEXT: v_cndmask_b32_e64 v1 , 0, 1, vcc
968+ ; GFX8SELDAG -NEXT: v_cmp_u_f16_e32 vcc, v0, v0
969+ ; GFX8SELDAG -NEXT: v_cndmask_b32_e64 v0 , 0, 1, vcc
970+ ; GFX8SELDAG -NEXT: s_setpc_b64 s[30:31]
971971;
972- ; GFX9CHECK-LABEL: isnan_v2f16:
973- ; GFX9CHECK: ; %bb.0:
974- ; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975- ; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 3
976- ; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
977- ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
978- ; GFX9CHECK-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v1 src0_sel:WORD_1 src1_sel:DWORD
979- ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
980- ; GFX9CHECK-NEXT: v_mov_b32_e32 v0, v2
981- ; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
972+ ; GFX8GLISEL-LABEL: isnan_v2f16:
973+ ; GFX8GLISEL: ; %bb.0:
974+ ; GFX8GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975+ ; GFX8GLISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
976+ ; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
977+ ; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
978+ ; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
979+ ; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
980+ ; GFX8GLISEL-NEXT: s_setpc_b64 s[30:31]
982981;
983- ; GFX10CHECK-LABEL: isnan_v2f16:
984- ; GFX10CHECK: ; %bb.0:
985- ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
986- ; GFX10CHECK-NEXT: v_mov_b32_e32 v1, 3
987- ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3
988- ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
989- ; GFX10CHECK-NEXT: v_cmp_class_f16_sdwa s4, v0, v1 src0_sel:WORD_1 src1_sel:DWORD
990- ; GFX10CHECK-NEXT: v_mov_b32_e32 v0, v2
991- ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
992- ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
982+ ; GFX9SELDAG-LABEL: isnan_v2f16:
983+ ; GFX9SELDAG: ; %bb.0:
984+ ; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985+ ; GFX9SELDAG-NEXT: v_cmp_u_f16_sdwa s[4:5], v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
986+ ; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
987+ ; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
988+ ; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
989+ ; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31]
993990;
994- ; GFX11CHECK-LABEL: isnan_v2f16:
995- ; GFX11CHECK: ; %bb.0:
996- ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997- ; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
998- ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3
999- ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1000- ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3
1001- ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
1002- ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
991+ ; GFX9GLISEL-LABEL: isnan_v2f16:
992+ ; GFX9GLISEL: ; %bb.0:
993+ ; GFX9GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
994+ ; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 3
995+ ; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
996+ ; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
997+ ; GFX9GLISEL-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v1 src0_sel:WORD_1 src1_sel:DWORD
998+ ; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
999+ ; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, v2
1000+ ; GFX9GLISEL-NEXT: s_setpc_b64 s[30:31]
1001+ ;
1002+ ; GFX10SELDAG-LABEL: isnan_v2f16:
1003+ ; GFX10SELDAG: ; %bb.0:
1004+ ; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1005+ ; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
1006+ ; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
1007+ ; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1008+ ; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
1009+ ; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v2
1010+ ; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31]
1011+ ;
1012+ ; GFX10GLISEL-LABEL: isnan_v2f16:
1013+ ; GFX10GLISEL: ; %bb.0:
1014+ ; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1015+ ; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 3
1016+ ; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3
1017+ ; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
1018+ ; GFX10GLISEL-NEXT: v_cmp_class_f16_sdwa s4, v0, v1 src0_sel:WORD_1 src1_sel:DWORD
1019+ ; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, v2
1020+ ; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
1021+ ; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31]
1022+ ;
1023+ ; GFX11SELDAG-LABEL: isnan_v2f16:
1024+ ; GFX11SELDAG: ; %bb.0:
1025+ ; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1026+ ; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1027+ ; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
1028+ ; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1029+ ; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1
1030+ ; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
1031+ ; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31]
1032+ ;
1033+ ; GFX11GLISEL-LABEL: isnan_v2f16:
1034+ ; GFX11GLISEL: ; %bb.0:
1035+ ; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036+ ; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1037+ ; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v0, 3
1038+ ; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1039+ ; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v1, 3
1040+ ; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
1041+ ; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31]
10031042 %1 = call <2 x i1 > @llvm.is.fpclass.v2f16 (<2 x half > %x , i32 3 ) ; nan
10041043 ret <2 x i1 > %1
10051044}
@@ -1196,16 +1235,17 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
11961235; GFX8SELDAG-LABEL: isnan_v4f16:
11971236; GFX8SELDAG: ; %bb.0:
11981237; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1199- ; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
12001238; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1201- ; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1202- ; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
1203- ; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v1
1204- ; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
1205- ; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v3, 3
1206- ; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
1207- ; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v4, 3
1208- ; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
1239+ ; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1
1240+ ; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v3, v3
1241+ ; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1242+ ; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v2, v2
1243+ ; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1244+ ; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
1245+ ; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1246+ ; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v1, v1
1247+ ; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1248+ ; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, v4
12091249; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31]
12101250;
12111251; GFX8GLISEL-LABEL: isnan_v4f16:
@@ -1227,16 +1267,14 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
12271267; GFX9SELDAG-LABEL: isnan_v4f16:
12281268; GFX9SELDAG: ; %bb.0:
12291269; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230- ; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
1231- ; GFX9SELDAG-NEXT: v_mov_b32_e32 v3, 3
1232- ; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]
1233- ; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
1234- ; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
1235- ; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v3 src0_sel:WORD_1 src1_sel:DWORD
1270+ ; GFX9SELDAG-NEXT: v_cmp_u_f16_sdwa s[4:5], v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
1271+ ; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
12361272; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
1237- ; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v1, v3 src0_sel:WORD_1 src1_sel:DWORD
1273+ ; GFX9SELDAG-NEXT: v_cmp_u_f16_sdwa s[4:5], v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
1274+ ; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1275+ ; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v1, v1
12381276; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
1239- ; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, v5
1277+ ; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
12401278; GFX9SELDAG-NEXT: v_mov_b32_e32 v1, v4
12411279; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31]
12421280;
@@ -1259,16 +1297,14 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
12591297; GFX10SELDAG-LABEL: isnan_v4f16:
12601298; GFX10SELDAG: ; %bb.0:
12611299; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262- ; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v0, 3
1263- ; GFX10SELDAG-NEXT: v_mov_b32_e32 v3, 3
1264- ; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4
1265- ; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v1, 3
1266- ; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
1267- ; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v0, v3 src0_sel:WORD_1 src1_sel:DWORD
1268- ; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v5
1300+ ; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
1301+ ; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
12691302; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4
1270- ; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v3 src0_sel:WORD_1 src1_sel:DWORD
1303+ ; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1304+ ; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1
1305+ ; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
12711306; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v4
1307+ ; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
12721308; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
12731309; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31]
12741310;
@@ -1288,20 +1324,35 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
12881324; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
12891325; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31]
12901326;
1291- ; GFX11CHECK-LABEL: isnan_v4f16:
1292- ; GFX11CHECK: ; %bb.0:
1293- ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294- ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3
1295- ; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1296- ; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v4, 16, v1
1297- ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1298- ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3
1299- ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
1300- ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v3, 3
1301- ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
1302- ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v4, 3
1303- ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
1304- ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1327+ ; GFX11SELDAG-LABEL: isnan_v4f16:
1328+ ; GFX11SELDAG: ; %bb.0:
1329+ ; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330+ ; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
1331+ ; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1332+ ; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
1333+ ; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1334+ ; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1
1335+ ; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1336+ ; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v4, v4
1337+ ; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
1338+ ; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v3, v3
1339+ ; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
1340+ ; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31]
1341+ ;
1342+ ; GFX11GLISEL-LABEL: isnan_v4f16:
1343+ ; GFX11GLISEL: ; %bb.0:
1344+ ; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1345+ ; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v0, 3
1346+ ; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1347+ ; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v1
1348+ ; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1349+ ; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v1, 3
1350+ ; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
1351+ ; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v3, 3
1352+ ; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
1353+ ; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v4, 3
1354+ ; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
1355+ ; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31]
13051356 %1 = call <4 x i1 > @llvm.is.fpclass.v4f16 (<4 x half > %x , i32 3 ) ; nan
13061357 ret <4 x i1 > %1
13071358}
0 commit comments