Skip to content

Commit 189efb0

Browse files
committed
[X86] vselect-pcmp.ll - add tests showing poor codegen on AVX1 targets where we have to split/concat 128-bit subvectors
We'd be better off consistently using 128-bit instructions Based off a regression reported after #92794
1 parent f34dedb commit 189efb0

File tree

1 file changed

+276
-0
lines changed

1 file changed

+276
-0
lines changed

llvm/test/CodeGen/X86/vselect-pcmp.ll

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1365,6 +1365,282 @@ define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z
13651365
ret <32 x i8> %r
13661366
}
13671367

1368+
define void @store_blend_load_v4i64(ptr %a0, ptr %a1, ptr %a2) {
1369+
; AVX1-LABEL: store_blend_load_v4i64:
1370+
; AVX1: # %bb.0:
1371+
; AVX1-NEXT: vmovapd (%rsi), %ymm0
1372+
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
1373+
; AVX1-NEXT: # xmm1 = mem[0,0]
1374+
; AVX1-NEXT: vpxor 16(%rdi), %xmm1, %xmm2
1375+
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775815,9223372036854775815]
1376+
; AVX1-NEXT: # xmm3 = mem[0,0]
1377+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
1378+
; AVX1-NEXT: vpxor (%rdi), %xmm1, %xmm1
1379+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
1380+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1381+
; AVX1-NEXT: vblendvpd %ymm1, (%rdi), %ymm0, %ymm0
1382+
; AVX1-NEXT: vmovapd %ymm0, (%rdx)
1383+
; AVX1-NEXT: vzeroupper
1384+
; AVX1-NEXT: retq
1385+
;
1386+
; AVX2-LABEL: store_blend_load_v4i64:
1387+
; AVX2: # %bb.0:
1388+
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
1389+
; AVX2-NEXT: vmovapd (%rsi), %ymm1
1390+
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1391+
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
1392+
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775815,9223372036854775815,9223372036854775815,9223372036854775815]
1393+
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
1394+
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1395+
; AVX2-NEXT: vmovapd %ymm0, (%rdx)
1396+
; AVX2-NEXT: vzeroupper
1397+
; AVX2-NEXT: retq
1398+
;
1399+
; AVX512F-LABEL: store_blend_load_v4i64:
1400+
; AVX512F: # %bb.0:
1401+
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
1402+
; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
1403+
; AVX512F-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
1404+
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
1405+
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
1406+
; AVX512F-NEXT: vzeroupper
1407+
; AVX512F-NEXT: retq
1408+
;
1409+
; AVX512VL-LABEL: store_blend_load_v4i64:
1410+
; AVX512VL: # %bb.0:
1411+
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
1412+
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1
1413+
; AVX512VL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
1414+
; AVX512VL-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
1415+
; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx)
1416+
; AVX512VL-NEXT: vzeroupper
1417+
; AVX512VL-NEXT: retq
1418+
;
1419+
; XOP-LABEL: store_blend_load_v4i64:
1420+
; XOP: # %bb.0:
1421+
; XOP-NEXT: vmovapd (%rsi), %ymm0
1422+
; XOP-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7]
1423+
; XOP-NEXT: vpcomltuq 16(%rdi), %xmm1, %xmm2
1424+
; XOP-NEXT: vpcomltuq (%rdi), %xmm1, %xmm1
1425+
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1426+
; XOP-NEXT: vblendvpd %ymm1, (%rdi), %ymm0, %ymm0
1427+
; XOP-NEXT: vmovapd %ymm0, (%rdx)
1428+
; XOP-NEXT: vzeroupper
1429+
; XOP-NEXT: retq
1430+
%v0 = load <4 x i64>, ptr %a0
1431+
%v1 = load <4 x i64>, ptr %a1
1432+
%cmp = icmp ugt <4 x i64> %v0, <i64 7, i64 7, i64 7, i64 7>
1433+
%res = select <4 x i1> %cmp, <4 x i64> %v0, <4 x i64> %v1
1434+
store <4 x i64> %res, ptr %a2
1435+
ret void
1436+
}
1437+
1438+
define void @store_blend_load_v8i32(ptr %a0, ptr %a1, ptr %a2) {
1439+
; AVX1-LABEL: store_blend_load_v8i32:
1440+
; AVX1: # %bb.0:
1441+
; AVX1-NEXT: vmovaps (%rsi), %ymm0
1442+
; AVX1-NEXT: vmovdqa (%rdi), %xmm1
1443+
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2
1444+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [8,8,8,8]
1445+
; AVX1-NEXT: vpmaxud %xmm3, %xmm2, %xmm4
1446+
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm2
1447+
; AVX1-NEXT: vpmaxud %xmm3, %xmm1, %xmm3
1448+
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
1449+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1450+
; AVX1-NEXT: vblendvps %ymm1, (%rdi), %ymm0, %ymm0
1451+
; AVX1-NEXT: vmovaps %ymm0, (%rdx)
1452+
; AVX1-NEXT: vzeroupper
1453+
; AVX1-NEXT: retq
1454+
;
1455+
; AVX2-LABEL: store_blend_load_v8i32:
1456+
; AVX2: # %bb.0:
1457+
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
1458+
; AVX2-NEXT: vmovaps (%rsi), %ymm1
1459+
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8]
1460+
; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm2
1461+
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2
1462+
; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
1463+
; AVX2-NEXT: vmovaps %ymm0, (%rdx)
1464+
; AVX2-NEXT: vzeroupper
1465+
; AVX2-NEXT: retq
1466+
;
1467+
; AVX512F-LABEL: store_blend_load_v8i32:
1468+
; AVX512F: # %bb.0:
1469+
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
1470+
; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
1471+
; AVX512F-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
1472+
; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
1473+
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
1474+
; AVX512F-NEXT: vzeroupper
1475+
; AVX512F-NEXT: retq
1476+
;
1477+
; AVX512VL-LABEL: store_blend_load_v8i32:
1478+
; AVX512VL: # %bb.0:
1479+
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
1480+
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1
1481+
; AVX512VL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
1482+
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
1483+
; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx)
1484+
; AVX512VL-NEXT: vzeroupper
1485+
; AVX512VL-NEXT: retq
1486+
;
1487+
; XOP-LABEL: store_blend_load_v8i32:
1488+
; XOP: # %bb.0:
1489+
; XOP-NEXT: vmovaps (%rsi), %ymm0
1490+
; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
1491+
; XOP-NEXT: vpcomltud 16(%rdi), %xmm1, %xmm2
1492+
; XOP-NEXT: vpcomltud (%rdi), %xmm1, %xmm1
1493+
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1494+
; XOP-NEXT: vblendvps %ymm1, (%rdi), %ymm0, %ymm0
1495+
; XOP-NEXT: vmovaps %ymm0, (%rdx)
1496+
; XOP-NEXT: vzeroupper
1497+
; XOP-NEXT: retq
1498+
%v0 = load <8 x i32>, ptr %a0
1499+
%v1 = load <8 x i32>, ptr %a1
1500+
%cmp = icmp ugt <8 x i32> %v0, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
1501+
%res = select <8 x i1> %cmp, <8 x i32> %v0, <8 x i32> %v1
1502+
store <8 x i32> %res, ptr %a2
1503+
ret void
1504+
}
1505+
1506+
define void @store_blend_load_v16i16(ptr %a0, ptr %a1, ptr %a2) {
1507+
; AVX1-LABEL: store_blend_load_v16i16:
1508+
; AVX1: # %bb.0:
1509+
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
1510+
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
1511+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8]
1512+
; AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm3
1513+
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
1514+
; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm2
1515+
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
1516+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1517+
; AVX1-NEXT: vandnps (%rsi), %ymm0, %ymm1
1518+
; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0
1519+
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1520+
; AVX1-NEXT: vmovaps %ymm0, (%rdx)
1521+
; AVX1-NEXT: vzeroupper
1522+
; AVX1-NEXT: retq
1523+
;
1524+
; AVX2-LABEL: store_blend_load_v16i16:
1525+
; AVX2: # %bb.0:
1526+
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
1527+
; AVX2-NEXT: vmovdqa (%rsi), %ymm1
1528+
; AVX2-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1529+
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
1530+
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1531+
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
1532+
; AVX2-NEXT: vzeroupper
1533+
; AVX2-NEXT: retq
1534+
;
1535+
; AVX512F-LABEL: store_blend_load_v16i16:
1536+
; AVX512F: # %bb.0:
1537+
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
1538+
; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
1539+
; AVX512F-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1540+
; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
1541+
; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1542+
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
1543+
; AVX512F-NEXT: vzeroupper
1544+
; AVX512F-NEXT: retq
1545+
;
1546+
; AVX512VL-LABEL: store_blend_load_v16i16:
1547+
; AVX512VL: # %bb.0:
1548+
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
1549+
; AVX512VL-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1550+
; AVX512VL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
1551+
; AVX512VL-NEXT: vpternlogq $202, (%rsi), %ymm0, %ymm1
1552+
; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx)
1553+
; AVX512VL-NEXT: vzeroupper
1554+
; AVX512VL-NEXT: retq
1555+
;
1556+
; XOP-LABEL: store_blend_load_v16i16:
1557+
; XOP: # %bb.0:
1558+
; XOP-NEXT: vmovdqa (%rdi), %ymm0
1559+
; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
1560+
; XOP-NEXT: vpcomltuw 16(%rdi), %xmm1, %xmm2
1561+
; XOP-NEXT: vpcomltuw (%rdi), %xmm1, %xmm1
1562+
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1563+
; XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0
1564+
; XOP-NEXT: vmovdqa %ymm0, (%rdx)
1565+
; XOP-NEXT: vzeroupper
1566+
; XOP-NEXT: retq
1567+
%v0 = load <16 x i16>, ptr %a0
1568+
%v1 = load <16 x i16>, ptr %a1
1569+
%cmp = icmp ugt <16 x i16> %v0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
1570+
%res = select <16 x i1> %cmp, <16 x i16> %v0, <16 x i16> %v1
1571+
store <16 x i16> %res, ptr %a2
1572+
ret void
1573+
}
1574+
1575+
define void @store_blend_load_v32i8(ptr %a0, ptr %a1, ptr %a2) {
1576+
; AVX1-LABEL: store_blend_load_v32i8:
1577+
; AVX1: # %bb.0:
1578+
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
1579+
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
1580+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1581+
; AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm3
1582+
; AVX1-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm1
1583+
; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm2
1584+
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
1585+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1586+
; AVX1-NEXT: vandnps (%rsi), %ymm0, %ymm1
1587+
; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0
1588+
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1589+
; AVX1-NEXT: vmovaps %ymm0, (%rdx)
1590+
; AVX1-NEXT: vzeroupper
1591+
; AVX1-NEXT: retq
1592+
;
1593+
; AVX2-LABEL: store_blend_load_v32i8:
1594+
; AVX2: # %bb.0:
1595+
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
1596+
; AVX2-NEXT: vmovdqa (%rsi), %ymm1
1597+
; AVX2-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1598+
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
1599+
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1600+
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
1601+
; AVX2-NEXT: vzeroupper
1602+
; AVX2-NEXT: retq
1603+
;
1604+
; AVX512F-LABEL: store_blend_load_v32i8:
1605+
; AVX512F: # %bb.0:
1606+
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
1607+
; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
1608+
; AVX512F-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
1609+
; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
1610+
; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
1611+
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
1612+
; AVX512F-NEXT: vzeroupper
1613+
; AVX512F-NEXT: retq
1614+
;
1615+
; AVX512VL-LABEL: store_blend_load_v32i8:
1616+
; AVX512VL: # %bb.0:
1617+
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
1618+
; AVX512VL-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1619+
; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1
1620+
; AVX512VL-NEXT: vpternlogq $202, (%rsi), %ymm0, %ymm1
1621+
; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx)
1622+
; AVX512VL-NEXT: vzeroupper
1623+
; AVX512VL-NEXT: retq
1624+
;
1625+
; XOP-LABEL: store_blend_load_v32i8:
1626+
; XOP: # %bb.0:
1627+
; XOP-NEXT: vmovdqa (%rdi), %ymm0
1628+
; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1629+
; XOP-NEXT: vpcomltub 16(%rdi), %xmm1, %xmm2
1630+
; XOP-NEXT: vpcomltub (%rdi), %xmm1, %xmm1
1631+
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1632+
; XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0
1633+
; XOP-NEXT: vmovdqa %ymm0, (%rdx)
1634+
; XOP-NEXT: vzeroupper
1635+
; XOP-NEXT: retq
1636+
%v0 = load <32 x i8>, ptr %a0
1637+
%v1 = load <32 x i8>, ptr %a1
1638+
%cmp = icmp ugt <32 x i8> %v0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
1639+
%res = select <32 x i1> %cmp, <32 x i8> %v0, <32 x i8> %v1
1640+
store <32 x i8> %res, ptr %a2
1641+
ret void
1642+
}
1643+
13681644
define void @PR46531(ptr %x, ptr %y, ptr %z) {
13691645
; AVX12-LABEL: PR46531:
13701646
; AVX12: # %bb.0:

0 commit comments

Comments
 (0)