Skip to content

Commit 92a8b81

Browse files
authored
[LegalizeVectorOps] Enable ExpandFABS/COPYSIGN to use integer ops for fixed vectors in some cases. (#109232)
Copy the same FSUB check from ExpandFNEG to avoid breaking AArch64 and ARM.
1 parent ab393ce commit 92a8b81

File tree

7 files changed

+381
-2102
lines changed

7 files changed

+381
-2102
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,9 +1804,12 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
18041804
EVT VT = Node->getValueType(0);
18051805
EVT IntVT = VT.changeVectorElementTypeToInteger();
18061806

1807+
if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT))
1808+
return SDValue();
1809+
18071810
// FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1808-
if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) ||
1809-
!(TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) || VT.isScalableVector()))
1811+
if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1812+
!VT.isScalableVector())
18101813
return SDValue();
18111814

18121815
SDLoc DL(Node);
@@ -1821,8 +1824,12 @@ SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
18211824
EVT VT = Node->getValueType(0);
18221825
EVT IntVT = VT.changeVectorElementTypeToInteger();
18231826

1824-
// FIXME: We shouldn't restrict this to scalable vectors.
1825-
if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT) || !VT.isScalableVector())
1827+
if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
1828+
return SDValue();
1829+
1830+
// FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1831+
if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1832+
!VT.isScalableVector())
18261833
return SDValue();
18271834

18281835
SDLoc DL(Node);
@@ -1837,10 +1844,14 @@ SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
18371844
EVT VT = Node->getValueType(0);
18381845
EVT IntVT = VT.changeVectorElementTypeToInteger();
18391846

1840-
// FIXME: We shouldn't restrict this to scalable vectors.
18411847
if (VT != Node->getOperand(1).getValueType() ||
18421848
!TLI.isOperationLegalOrCustom(ISD::AND, IntVT) ||
1843-
!TLI.isOperationLegalOrCustom(ISD::OR, IntVT) || !VT.isScalableVector())
1849+
!TLI.isOperationLegalOrCustom(ISD::OR, IntVT))
1850+
return SDValue();
1851+
1852+
// FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1853+
if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1854+
!VT.isScalableVector())
18441855
return SDValue();
18451856

18461857
SDLoc DL(Node);

llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -508,21 +508,24 @@ define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 {
508508
; CHECK-LABEL: test_copysign(
509509
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0];
510510
; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1];
511-
; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
512-
; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
513-
; CHECK-DAG: abs.bf16 [[AW1:%rs[0-9]+]], [[A1]];
514-
; CHECK-DAG: neg.bf16 [[AY1:%rs[0-9]+]], [[AW1]];
515-
; CHECK-DAG: shr.u16 [[BS1:%rs[0-9]+]], [[B1]], 15;
516-
; CHECK-DAG: and.b16 [[BR1:%rs[0-9]+]], [[BS1]], 1;
517-
; CHECK-DAG: setp.eq.b16 [[P1:%p[0-9]+]], [[BR1]], 1;
518-
; CHECK-DAG: selp.b16 [[RS1:%rs[0-9]+]], [[AY1]], [[AW1]], [[P1]]
519-
; CHECK-DAG: abs.bf16 [[AW0:%rs[0-9]+]], [[A0]];
520-
; CHECK-DAG: neg.bf16 [[AY0:%rs[0-9]+]], [[AW0]];
521-
; CHECK-DAG: shr.u16 [[BS0:%rs[0-9]+]], [[B0]], 15;
522-
; CHECK-DAG: and.b16 [[BR0:%rs[0-9]+]], [[BS0]], 1;
523-
; CHECK-DAG: setp.eq.b16 [[P0:%p[0-9]+]], [[BR0]], 1;
524-
; CHECK-DAG: selp.b16 [[RS0:%rs[0-9]+]], [[AY0]], [[AW0]], [[P0]]
525-
; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS0]], [[RS1]]}
511+
; SM80-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
512+
; SM80-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
513+
; SM80-DAG: abs.bf16 [[AW1:%rs[0-9]+]], [[A1]];
514+
; SM80-DAG: neg.bf16 [[AY1:%rs[0-9]+]], [[AW1]];
515+
; SM80-DAG: shr.u16 [[BS1:%rs[0-9]+]], [[B1]], 15;
516+
; SM80-DAG: and.b16 [[BR1:%rs[0-9]+]], [[BS1]], 1;
517+
; SM80-DAG: setp.eq.b16 [[P1:%p[0-9]+]], [[BR1]], 1;
518+
; SM80-DAG: selp.b16 [[RS1:%rs[0-9]+]], [[AY1]], [[AW1]], [[P1]]
519+
; SM80-DAG: abs.bf16 [[AW0:%rs[0-9]+]], [[A0]];
520+
; SM80-DAG: neg.bf16 [[AY0:%rs[0-9]+]], [[AW0]];
521+
; SM80-DAG: shr.u16 [[BS0:%rs[0-9]+]], [[B0]], 15;
522+
; SM80-DAG: and.b16 [[BR0:%rs[0-9]+]], [[BS0]], 1;
523+
; SM80-DAG: setp.eq.b16 [[P0:%p[0-9]+]], [[BR0]], 1;
524+
; SM80-DAG: selp.b16 [[RS0:%rs[0-9]+]], [[AY0]], [[AW0]], [[P0]]
525+
; SM80-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS0]], [[RS1]]}
526+
; SM90-DAG: and.b32 [[R1:%r[0-9]+]], [[B]], -2147450880;
527+
; SM90-DAG: and.b32 [[R2:%r[0-9]+]], [[A]], 2147450879;
528+
; SM90-DAG: or.b32 [[R:%r[0-9]+]], [[R2]], [[R1]];
526529
; CHECK: st.param.b32 [func_retval0+0], [[R]];
527530
; CHECK: ret;
528531
define <2 x bfloat> @test_copysign(<2 x bfloat> %a, <2 x bfloat> %b) #0 {

llvm/test/CodeGen/NVPTX/f16x2-instructions.ll

Lines changed: 75 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,14 +1184,15 @@ define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
11841184

11851185
; CHECK-LABEL: test_fabs(
11861186
; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fabs_param_0];
1187-
; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1188-
; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1189-
; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1190-
; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
1191-
; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
1192-
; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
1193-
; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
1194-
; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1187+
; CHECK-NOF16: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1188+
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1189+
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1190+
; CHECK-NOF16-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
1191+
; CHECK-NOF16-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
1192+
; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
1193+
; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
1194+
; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1195+
; CHECK-F16: and.b32 [[R:%r[0-9]+]], [[A]], 2147450879;
11951196
; CHECK: st.param.b32 [func_retval0+0], [[R]];
11961197
; CHECK: ret;
11971198
define <2 x half> @test_fabs(<2 x half> %a) #0 {
@@ -1244,15 +1245,18 @@ define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
12441245
; CHECK-LABEL: test_copysign(
12451246
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0];
12461247
; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1];
1247-
; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1248-
; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1249-
; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1250-
; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1251-
; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1252-
; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1253-
; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1254-
; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1255-
; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1248+
; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1249+
; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1250+
; CHECK-NOF16-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1251+
; CHECK-NOF16-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1252+
; CHECK-NOF16-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1253+
; CHECK-NOF16-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1254+
; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1255+
; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1256+
; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1257+
; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880;
1258+
; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879;
1259+
; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R1]], [[R0]]
12561260
; CHECK: st.param.b32 [func_retval0+0], [[R]];
12571261
; CHECK: ret;
12581262
define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
@@ -1263,18 +1267,24 @@ define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
12631267
; CHECK-LABEL: test_copysign_f32(
12641268
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f32_param_0];
12651269
; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1];
1266-
; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1267-
; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
1268-
; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
1269-
; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1270-
; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1271-
; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
1272-
; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
1273-
; CHECK-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; }
1274-
; CHECK-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; }
1275-
; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1276-
; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1277-
; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1270+
; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1271+
; CHECK-NOF16-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
1272+
; CHECK-NOF16-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
1273+
; CHECK-NOF16-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1274+
; CHECK-NOF16-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1275+
; CHECK-NOF16-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
1276+
; CHECK-NOF16-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
1277+
; CHECK-NOF16-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; }
1278+
; CHECK-NOF16-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; }
1279+
; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1280+
; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1281+
; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1282+
; CHECK-F16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[B1]];
1283+
; CHECK-F16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[B0]];
1284+
; CHECK-F16-DAG: mov.b32 [[R2:%r[0-9]+]], {[[R1]], [[R0]]};
1285+
; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880;
1286+
; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879;
1287+
; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]]
12781288
; CHECK: st.param.b32 [func_retval0+0], [[R]];
12791289
; CHECK: ret;
12801290
define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
@@ -1286,20 +1296,26 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
12861296
; CHECK-LABEL: test_copysign_f64(
12871297
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f64_param_0];
12881298
; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1];
1289-
; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1290-
; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
1291-
; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
1292-
; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1293-
; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1294-
; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
1295-
; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
1296-
; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
1297-
; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
1298-
; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
1299-
; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
1300-
; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1301-
; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1302-
; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1299+
; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1300+
; CHECK-NOF16-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
1301+
; CHECK-NOF16-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
1302+
; CHECK-NOF16-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1303+
; CHECK-NOF16-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1304+
; CHECK-NOF16-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
1305+
; CHECK-NOF16-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
1306+
; CHECK-NOF16-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
1307+
; CHECK-NOF16-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
1308+
; CHECK-NOF16-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
1309+
; CHECK-NOF16-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
1310+
; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1311+
; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1312+
; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1313+
; CHECK-F16-DAG: cvt.rn.f16.f64 [[R0:%rs[0-9]+]], [[B1]];
1314+
; CHECK-F16-DAG: cvt.rn.f16.f64 [[R1:%rs[0-9]+]], [[B0]];
1315+
; CHECK-F16-DAG: mov.b32 [[R2:%r[0-9]+]], {[[R1]], [[R0]]};
1316+
; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880;
1317+
; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879;
1318+
; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]];
13031319
; CHECK: st.param.b32 [func_retval0+0], [[R]];
13041320
; CHECK: ret;
13051321
define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
@@ -1311,16 +1327,22 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
13111327
; CHECK-LABEL: test_copysign_extended(
13121328
; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_extended_param_0];
13131329
; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_extended_param_1];
1314-
; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1315-
; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1316-
; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1317-
; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1318-
; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1319-
; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1320-
; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1321-
; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1322-
; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]];
1323-
; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]];
1330+
; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1331+
; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1332+
; CHECK-NOF16-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1333+
; CHECK-NOF16-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1334+
; CHECK-NOF16-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1335+
; CHECK-NOF16-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1336+
; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1337+
; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1338+
; CHECK-NOF16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]];
1339+
; CHECK-NOF16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]];
1340+
; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880;
1341+
; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879;
1342+
; CHECK-F16-DAG: or.b32 [[R2:%r[0-9]+]], [[R1]], [[R0]]
1343+
; CHECK-F16-DAG: mov.b32 {[[R3:%rs[0-9]+]], [[R4:%rs[0-9]+]]}, [[R2]]
1344+
; CHECK-F16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R3]]
1345+
; CHECK-F16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R4]]
13241346
; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
13251347
; CHECK: ret;
13261348
define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 {

llvm/test/CodeGen/PowerPC/vec_abs.ll

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1
1919

2020
; CHECK: xvabssp
2121
; CHECK: blr
22-
; CHECK-NOVSX: fabs
23-
; CHECK-NOVSX: fabs
24-
; CHECK-NOVSX: fabs
25-
; CHECK-NOVSX: fabs
22+
; CHECK-NOVSX: vspltisb
23+
; CHECK-NOVSX: vslw
24+
; CHECK-NOVSX: vandc
2625
; CHECK-NOVSX: blr
2726

2827
define <4 x float> @test2_float(<4 x float> %aa) #0 {
@@ -40,11 +39,8 @@ define <4 x float> @test2_float(<4 x float> %aa) #0 {
4039
; CHECK: xvnabssp
4140
; CHECK: blr
4241
; CHECK-NOVSX: vspltisb
43-
; CHECK-NOVSX: fabs
44-
; CHECK-NOVSX: fabs
45-
; CHECK-NOVSX: fabs
46-
; CHECK-NOVSX: fabs
47-
; CHECK-NOVSX: vxor
42+
; CHECK-NOVSX: vslw
43+
; CHECK-NOVSX: vor
4844
; CHECK-NOVSX: blr
4945

5046
define <2 x double> @test_double(<2 x double> %aa) #0 {

0 commit comments

Comments
 (0)