@@ -1184,14 +1184,15 @@ define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
1184
1184
1185
1185
; CHECK-LABEL: test_fabs(
1186
1186
; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fabs_param_0];
1187
- ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1188
- ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1189
- ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1190
- ; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
1191
- ; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
1192
- ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
1193
- ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
1194
- ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1187
+ ; CHECK-NOF16: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1188
+ ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1189
+ ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1190
+ ; CHECK-NOF16-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
1191
+ ; CHECK-NOF16-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
1192
+ ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
1193
+ ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
1194
+ ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1195
+ ; CHECK-F16: and.b32 [[R:%r[0-9]+]], [[A]], 2147450879;
1195
1196
; CHECK: st.param.b32 [func_retval0+0], [[R]];
1196
1197
; CHECK: ret;
1197
1198
define <2 x half > @test_fabs (<2 x half > %a ) #0 {
@@ -1244,15 +1245,18 @@ define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
1244
1245
; CHECK-LABEL: test_copysign(
1245
1246
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0];
1246
1247
; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1];
1247
- ; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1248
- ; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1249
- ; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1250
- ; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1251
- ; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1252
- ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1253
- ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1254
- ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1255
- ; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1248
+ ; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1249
+ ; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1250
+ ; CHECK-NOF16-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1251
+ ; CHECK-NOF16-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1252
+ ; CHECK-NOF16-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1253
+ ; CHECK-NOF16-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1254
+ ; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1255
+ ; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1256
+ ; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1257
+ ; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880;
1258
+ ; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879;
1259
+ ; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R1]], [[R0]]
1256
1260
; CHECK: st.param.b32 [func_retval0+0], [[R]];
1257
1261
; CHECK: ret;
1258
1262
define <2 x half > @test_copysign (<2 x half > %a , <2 x half > %b ) #0 {
@@ -1263,18 +1267,24 @@ define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
1263
1267
; CHECK-LABEL: test_copysign_f32(
1264
1268
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f32_param_0];
1265
1269
; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1];
1266
- ; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1267
- ; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
1268
- ; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
1269
- ; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1270
- ; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1271
- ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
1272
- ; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
1273
- ; CHECK-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; }
1274
- ; CHECK-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; }
1275
- ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1276
- ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1277
- ; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1270
+ ; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1271
+ ; CHECK-NOF16-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
1272
+ ; CHECK-NOF16-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
1273
+ ; CHECK-NOF16-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1274
+ ; CHECK-NOF16-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1275
+ ; CHECK-NOF16-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
1276
+ ; CHECK-NOF16-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
1277
+ ; CHECK-NOF16-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; }
1278
+ ; CHECK-NOF16-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; }
1279
+ ; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1280
+ ; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1281
+ ; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1282
+ ; CHECK-F16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[B1]];
1283
+ ; CHECK-F16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[B0]];
1284
+ ; CHECK-F16-DAG: mov.b32 [[R2:%r[0-9]+]], {[[R1]], [[R0]]};
1285
+ ; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880;
1286
+ ; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879;
1287
+ ; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]]
1278
1288
; CHECK: st.param.b32 [func_retval0+0], [[R]];
1279
1289
; CHECK: ret;
1280
1290
define <2 x half > @test_copysign_f32 (<2 x half > %a , <2 x float > %b ) #0 {
@@ -1286,20 +1296,26 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
1286
1296
; CHECK-LABEL: test_copysign_f64(
1287
1297
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f64_param_0];
1288
1298
; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1];
1289
- ; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1290
- ; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
1291
- ; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
1292
- ; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1293
- ; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1294
- ; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
1295
- ; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
1296
- ; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
1297
- ; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
1298
- ; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
1299
- ; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
1300
- ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1301
- ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1302
- ; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1299
+ ; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1300
+ ; CHECK-NOF16-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
1301
+ ; CHECK-NOF16-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
1302
+ ; CHECK-NOF16-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1303
+ ; CHECK-NOF16-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1304
+ ; CHECK-NOF16-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
1305
+ ; CHECK-NOF16-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
1306
+ ; CHECK-NOF16-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
1307
+ ; CHECK-NOF16-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
1308
+ ; CHECK-NOF16-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
1309
+ ; CHECK-NOF16-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
1310
+ ; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1311
+ ; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1312
+ ; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1313
+ ; CHECK-F16-DAG: cvt.rn.f16.f64 [[R0:%rs[0-9]+]], [[B1]];
1314
+ ; CHECK-F16-DAG: cvt.rn.f16.f64 [[R1:%rs[0-9]+]], [[B0]];
1315
+ ; CHECK-F16-DAG: mov.b32 [[R2:%r[0-9]+]], {[[R1]], [[R0]]};
1316
+ ; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880;
1317
+ ; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879;
1318
+ ; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]];
1303
1319
; CHECK: st.param.b32 [func_retval0+0], [[R]];
1304
1320
; CHECK: ret;
1305
1321
define <2 x half > @test_copysign_f64 (<2 x half > %a , <2 x double > %b ) #0 {
@@ -1311,16 +1327,22 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
1311
1327
; CHECK-LABEL: test_copysign_extended(
1312
1328
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_extended_param_0];
1313
1329
; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_extended_param_1];
1314
- ; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1315
- ; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1316
- ; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1317
- ; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1318
- ; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1319
- ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1320
- ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1321
- ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1322
- ; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]];
1323
- ; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]];
1330
+ ; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1331
+ ; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1332
+ ; CHECK-NOF16-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1333
+ ; CHECK-NOF16-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1334
+ ; CHECK-NOF16-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1335
+ ; CHECK-NOF16-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1336
+ ; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1337
+ ; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1338
+ ; CHECK-NOF16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]];
1339
+ ; CHECK-NOF16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]];
1340
+ ; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880;
1341
+ ; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879;
1342
+ ; CHECK-F16-DAG: or.b32 [[R2:%r[0-9]+]], [[R1]], [[R0]]
1343
+ ; CHECK-F16-DAG: mov.b32 {[[R3:%rs[0-9]+]], [[R4:%rs[0-9]+]]}, [[R2]]
1344
+ ; CHECK-F16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R3]]
1345
+ ; CHECK-F16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R4]]
1324
1346
; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
1325
1347
; CHECK: ret;
1326
1348
define <2 x float > @test_copysign_extended (<2 x half > %a , <2 x half > %b ) #0 {
0 commit comments