Skip to content

Commit c7768ce

Browse files
committed
[X86] Update the haswell and broadwell scheduler information for gather instructions
Broadwell was missing half the gather instructions. Both models had some mixups in the resource costs and number of uops. I've updated here based on what I think the original IACA source says with some cross checking against the microcode. I'm not sure about latency as the IACA source I have doesn't have that information. So I'm using the latency from uops.info. I plan to update Skylake models as well, but I'll do that in a separate patch. Differential Revision: https://reviews.llvm.org/D73844
1 parent 9a40670 commit c7768ce

File tree

4 files changed

+91
-123
lines changed

4 files changed

+91
-123
lines changed

llvm/lib/Target/X86/X86SchedBroadwell.td

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,54 +1480,42 @@ def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
14801480
def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI(16|32)m")>;
14811481

14821482
def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
1483-
let Latency = 22;
1483+
let Latency = 17;
14841484
let NumMicroOps = 7;
14851485
let ResourceCycles = [1,3,2,1];
14861486
}
1487-
def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERQPDrm)>;
1487+
def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERDPDrm, VPGATHERDQrm,
1488+
VGATHERQPDrm, VPGATHERQQrm)>;
14881489

14891490
def BWWriteResGroup183_2 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
1490-
let Latency = 23;
1491+
let Latency = 18;
14911492
let NumMicroOps = 9;
14921493
let ResourceCycles = [1,3,4,1];
14931494
}
1494-
def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERQPDYrm)>;
1495+
def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
1496+
VGATHERQPDYrm, VPGATHERQQYrm)>;
14951497

14961498
def BWWriteResGroup183_3 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
1497-
let Latency = 24;
1499+
let Latency = 19;
14981500
let NumMicroOps = 9;
14991501
let ResourceCycles = [1,5,2,1];
15001502
}
1501-
def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSYrm)>;
1503+
def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSrm, VPGATHERQDrm)>;
15021504

15031505
def BWWriteResGroup183_4 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
1504-
let Latency = 25;
1505-
let NumMicroOps = 7;
1506-
let ResourceCycles = [1,3,2,1];
1506+
let Latency = 19;
1507+
let NumMicroOps = 10;
1508+
let ResourceCycles = [1,4,4,1];
15071509
}
1508-
def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPDrm,
1509-
VGATHERDPSrm)>;
1510+
def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPSrm, VPGATHERDDrm,
1511+
VGATHERQPSYrm, VPGATHERQDYrm)>;
15101512

15111513
def BWWriteResGroup183_5 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
1512-
let Latency = 26;
1513-
let NumMicroOps = 9;
1514-
let ResourceCycles = [1,5,2,1];
1515-
}
1516-
def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPDYrm)>;
1517-
1518-
def BWWriteResGroup183_6 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
1519-
let Latency = 26;
1514+
let Latency = 21;
15201515
let NumMicroOps = 14;
15211516
let ResourceCycles = [1,4,8,1];
15221517
}
1523-
def: InstRW<[BWWriteResGroup183_6], (instrs VGATHERDPSYrm)>;
1524-
1525-
def BWWriteResGroup183_7 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
1526-
let Latency = 27;
1527-
let NumMicroOps = 9;
1528-
let ResourceCycles = [1,5,2,1];
1529-
}
1530-
def: InstRW<[BWWriteResGroup183_7], (instrs VGATHERQPSrm)>;
1518+
def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
15311519

15321520
def BWWriteResGroup185 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> {
15331521
let Latency = 29;

llvm/lib/Target/X86/X86SchedHaswell.td

Lines changed: 27 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1785,75 +1785,55 @@ def HWWriteResGroup183 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,
17851785
}
17861786
def: InstRW<[HWWriteResGroup183], (instrs FSTENVm)>;
17871787

1788-
def HWWriteResGroup184 : SchedWriteRes<[HWPort0, HWPort5, HWPort15, HWPort015, HWPort06, HWPort23]> {
1789-
let Latency = 26;
1788+
def HWWriteResGroup184 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
1789+
let Latency = 14;
17901790
let NumMicroOps = 12;
1791-
let ResourceCycles = [2,2,1,3,2,2];
1792-
}
1793-
def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm,
1794-
VPGATHERDQrm,
1795-
VPGATHERDDrm)>;
1796-
1797-
def HWWriteResGroup185 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
1798-
let Latency = 24;
1799-
let NumMicroOps = 22;
1800-
let ResourceCycles = [5,3,4,1,5,4];
1791+
let ResourceCycles = [2,2,2,1,3,2];
18011792
}
1802-
def: InstRW<[HWWriteResGroup185], (instrs VGATHERQPDYrm,
1803-
VPGATHERQQYrm)>;
1793+
def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm, VPGATHERDQrm)>;
18041794

1805-
def HWWriteResGroup186 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
1806-
let Latency = 28;
1807-
let NumMicroOps = 22;
1808-
let ResourceCycles = [5,3,4,1,5,4];
1809-
}
1810-
def: InstRW<[HWWriteResGroup186], (instrs VPGATHERQDYrm)>;
1811-
1812-
def HWWriteResGroup187 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
1813-
let Latency = 25;
1814-
let NumMicroOps = 22;
1815-
let ResourceCycles = [5,3,4,1,5,4];
1795+
def HWWriteResGroup185 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
1796+
let Latency = 17;
1797+
let NumMicroOps = 20;
1798+
let ResourceCycles = [3,3,4,1,5,4];
18161799
}
1817-
def: InstRW<[HWWriteResGroup187], (instrs VPGATHERQDrm)>;
1800+
def: InstRW<[HWWriteResGroup185], (instrs VGATHERDPDYrm, VPGATHERDQYrm)>;
18181801

1819-
def HWWriteResGroup188 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
1820-
let Latency = 27;
1802+
def HWWriteResGroup186 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
1803+
let Latency = 16;
18211804
let NumMicroOps = 20;
18221805
let ResourceCycles = [3,3,4,1,5,4];
18231806
}
1824-
def: InstRW<[HWWriteResGroup188], (instrs VGATHERDPDYrm,
1825-
VPGATHERDQYrm)>;
1807+
def: InstRW<[HWWriteResGroup186], (instrs VGATHERDPSrm, VPGATHERDDrm)>;
18261808

1827-
def HWWriteResGroup189 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
1828-
let Latency = 27;
1809+
def HWWriteResGroup187 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
1810+
let Latency = 22;
18291811
let NumMicroOps = 34;
18301812
let ResourceCycles = [5,3,8,1,9,8];
18311813
}
1832-
def: InstRW<[HWWriteResGroup189], (instrs VGATHERDPSYrm,
1833-
VPGATHERDDYrm)>;
1814+
def: InstRW<[HWWriteResGroup187], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
18341815

1835-
def HWWriteResGroup190 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
1836-
let Latency = 23;
1816+
def HWWriteResGroup188 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
1817+
let Latency = 15;
18371818
let NumMicroOps = 14;
18381819
let ResourceCycles = [3,3,2,1,3,2];
18391820
}
1840-
def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPDrm,
1841-
VPGATHERQQrm)>;
1821+
def: InstRW<[HWWriteResGroup188], (instrs VGATHERQPDrm, VPGATHERQQrm)>;
18421822

1843-
def HWWriteResGroup191 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
1844-
let Latency = 28;
1845-
let NumMicroOps = 15;
1846-
let ResourceCycles = [3,3,2,1,4,2];
1823+
def HWWriteResGroup189 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
1824+
let Latency = 17;
1825+
let NumMicroOps = 22;
1826+
let ResourceCycles = [5,3,4,1,5,4];
18471827
}
1848-
def: InstRW<[HWWriteResGroup191], (instrs VGATHERQPSYrm)>;
1828+
def: InstRW<[HWWriteResGroup189], (instrs VGATHERQPDYrm, VPGATHERQQYrm,
1829+
VGATHERQPSYrm, VPGATHERQDYrm)>;
18491830

1850-
def HWWriteResGroup192 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
1851-
let Latency = 25;
1831+
def HWWriteResGroup190 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
1832+
let Latency = 16;
18521833
let NumMicroOps = 15;
18531834
let ResourceCycles = [3,3,2,1,4,2];
18541835
}
1855-
def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm,
1856-
VGATHERDPSrm)>;
1836+
def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPSrm, VPGATHERQDrm)>;
18571837

18581838
def: InstRW<[WriteZero], (instrs CLC)>;
18591839

llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -465,14 +465,14 @@ vpxor (%rax), %ymm1, %ymm2
465465
# CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0
466466
# CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2
467467
# CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax)
468-
# CHECK-NEXT: 7 25 3.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
469-
# CHECK-NEXT: 9 26 5.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
470-
# CHECK-NEXT: 7 25 3.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
471-
# CHECK-NEXT: 14 26 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
472-
# CHECK-NEXT: 7 22 3.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
473-
# CHECK-NEXT: 9 23 3.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
474-
# CHECK-NEXT: 9 27 5.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
475-
# CHECK-NEXT: 9 24 5.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
468+
# CHECK-NEXT: 7 17 3.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
469+
# CHECK-NEXT: 9 18 3.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
470+
# CHECK-NEXT: 10 19 4.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
471+
# CHECK-NEXT: 14 21 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
472+
# CHECK-NEXT: 7 17 3.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
473+
# CHECK-NEXT: 9 18 3.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
474+
# CHECK-NEXT: 9 19 5.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
475+
# CHECK-NEXT: 10 19 4.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
476476
# CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2
477477
# CHECK-NEXT: 2 6 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
478478
# CHECK-NEXT: 1 6 0.50 * vmovntdqa (%rax), %ymm0
@@ -568,14 +568,14 @@ vpxor (%rax), %ymm1, %ymm2
568568
# CHECK-NEXT: 2 9 1.00 * vpermps (%rax), %ymm1, %ymm2
569569
# CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2
570570
# CHECK-NEXT: 2 9 1.00 * vpermq $1, (%rax), %ymm2
571-
# CHECK-NEXT: 1 5 0.50 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
572-
# CHECK-NEXT: 1 5 0.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
573-
# CHECK-NEXT: 1 5 0.50 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
574-
# CHECK-NEXT: 1 5 0.50 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
575-
# CHECK-NEXT: 1 5 0.50 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
576-
# CHECK-NEXT: 1 5 0.50 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
577-
# CHECK-NEXT: 1 5 0.50 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
578-
# CHECK-NEXT: 1 5 0.50 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
571+
# CHECK-NEXT: 10 19 4.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
572+
# CHECK-NEXT: 14 21 4.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
573+
# CHECK-NEXT: 7 17 3.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
574+
# CHECK-NEXT: 9 18 3.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
575+
# CHECK-NEXT: 9 19 5.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
576+
# CHECK-NEXT: 10 19 4.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
577+
# CHECK-NEXT: 7 17 3.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
578+
# CHECK-NEXT: 9 18 3.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
579579
# CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2
580580
# CHECK-NEXT: 4 9 2.00 * vphaddd (%rax), %ymm1, %ymm2
581581
# CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2
@@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
776776

777777
# CHECK: Resource pressure per iteration:
778778
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
779-
# CHECK-NEXT: - - 94.67 58.67 85.67 85.67 13.00 237.67 2.00 1.67
779+
# CHECK-NEXT: - - 96.67 60.67 99.67 99.67 21.00 266.67 4.00 1.67
780780

781781
# CHECK: Resource pressure by instruction:
782782
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -786,13 +786,13 @@ vpxor (%rax), %ymm1, %ymm2
786786
# CHECK-NEXT: - - - - - - - 1.00 - - vextracti128 $1, %ymm0, %xmm2
787787
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax)
788788
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
789-
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
790-
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
789+
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
790+
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
791791
# CHECK-NEXT: - - 0.25 0.25 4.00 4.00 1.00 4.25 0.25 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
792792
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
793793
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
794794
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
795-
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
795+
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
796796
# CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2
797797
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2
798798
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0
@@ -888,14 +888,14 @@ vpxor (%rax), %ymm1, %ymm2
888888
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2
889889
# CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2
890890
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2
891-
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
892-
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
893-
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
894-
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
895-
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
896-
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
897-
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
898-
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
891+
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
892+
# CHECK-NEXT: - - 0.25 0.25 4.00 4.00 1.00 4.25 0.25 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
893+
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
894+
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
895+
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
896+
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
897+
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
898+
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
899899
# CHECK-NEXT: - - - 0.50 - - - 2.50 - - vphaddd %ymm0, %ymm1, %ymm2
900900
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 2.50 - - vphaddd (%rax), %ymm1, %ymm2
901901
# CHECK-NEXT: - - - 0.50 - - - 2.50 - - vphaddsw %ymm0, %ymm1, %ymm2

0 commit comments

Comments
 (0)