Skip to content

Commit 0a6d797

Browse files
committed
[X86] Improve F16C CVT schedules on SNB/HSW/BDW
Add complete IvyBridge schedule (which is included in the SandyBridge model, IvyBridge was the first to support F16C) - split rr/rm schedules as they usually have very different port usage. Haswell/Broadwell use Port1 not Port0. Confirmed with a mixture of Agner + uops.info comparisons.
1 parent 6cfaddf commit 0a6d797

File tree

7 files changed

+66
-62
lines changed

7 files changed

+66
-62
lines changed

llvm/lib/Target/X86/X86SchedBroadwell.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -393,11 +393,11 @@ defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
393393
defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1,BWPort5], 6, [1,1], 2, 6>;
394394
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
395395

396-
defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
397-
defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
396+
defm : X86WriteRes<WriteCvtPH2PS, [BWPort1,BWPort5], 2, [1,1], 2>;
397+
defm : X86WriteRes<WriteCvtPH2PSY, [BWPort1,BWPort5], 2, [1,1], 2>;
398398
defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
399-
defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
400-
defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
399+
defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort1,BWPort23], 6, [1,1], 2>;
400+
defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort1,BWPort23], 6, [1,1], 2>;
401401
defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
402402

403403
defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;

llvm/lib/Target/X86/X86SchedHaswell.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -393,12 +393,12 @@ defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1,HWPort5], 4, [1,1], 2, 6>;
393393
defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
394394
defm : HWWriteResPair<WriteCvtPD2PSZ, [HWPort1,HWPort5], 4, [1,1], 2, 6>; // Unsupported = 1
395395

396-
defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>;
397-
defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>;
398-
defm : X86WriteRes<WriteCvtPH2PSZ, [HWPort0,HWPort5], 2, [1,1], 2>; // Unsupported = 1
399-
defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>;
400-
defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort0,HWPort23], 7, [1,1], 2>;
401-
defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort0,HWPort23], 7, [1,1], 2>; // Unsupported = 1
396+
defm : X86WriteRes<WriteCvtPH2PS, [HWPort1,HWPort5], 2, [1,1], 2>;
397+
defm : X86WriteRes<WriteCvtPH2PSY, [HWPort1,HWPort5], 2, [1,1], 2>;
398+
defm : X86WriteRes<WriteCvtPH2PSZ, [HWPort1,HWPort5], 2, [1,1], 2>; // Unsupported = 1
399+
defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort1,HWPort23], 6, [1,1], 2>;
400+
defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort1,HWPort23], 7, [1,1], 2>;
401+
defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort1,HWPort23], 7, [1,1], 2>; // Unsupported = 1
402402

403403
defm : X86WriteRes<WriteCvtPS2PH, [HWPort1,HWPort5], 4, [1,1], 2>;
404404
defm : X86WriteRes<WriteCvtPS2PHY, [HWPort1,HWPort5], 6, [1,1], 2>;

llvm/lib/Target/X86/X86SchedSandyBridge.td

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -361,16 +361,20 @@ defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
361361
defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
362362
defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1
363363

364-
defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
365-
defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
366-
defm : SBWriteResPair<WriteCvtPH2PSZ, [SBPort1], 3>; // Unsupported = 1
367-
368-
defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
369-
defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
370-
defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort1], 3, [1], 1>; // Unsupported = 1
371-
defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
372-
defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
373-
defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; // Unsupported = 1
364+
// F16C Instructions (IvyBridge+)
365+
defm : X86WriteRes<WriteCvtPH2PS, [SBPort0,SBPort5], 3, [1,1], 2>;
366+
defm : X86WriteRes<WriteCvtPH2PSY, [SBPort0,SBPort5], 3, [1,1], 2>;
367+
defm : X86WriteRes<WriteCvtPH2PSZ, [SBPort0,SBPort5], 3, [1,1], 2>; // Unsupported = 1
368+
defm : X86WriteRes<WriteCvtPH2PSLd, [SBPort0,SBPort23], 8, [1,1], 2>;
369+
defm : X86WriteRes<WriteCvtPH2PSYLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>;
370+
defm : X86WriteRes<WriteCvtPH2PSZLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>; // Unsupported = 1
371+
372+
defm : X86WriteRes<WriteCvtPS2PH, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>;
373+
defm : X86WriteRes<WriteCvtPS2PHY, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>;
374+
defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>; // Unsupported = 1
375+
defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>;
376+
defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>;
377+
defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>; // Unsupported = 1
374378

375379
// Vector integer operations.
376380
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;

llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax)
4545

4646
# CHECK: Resource pressure per iteration:
4747
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
48-
# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 4.00 - 0.67
48+
# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 4.00 - 0.67
4949

5050
# CHECK: Resource pressure by instruction:
5151
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
52-
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
53-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
54-
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
55-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
52+
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
53+
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
54+
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
55+
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
5656
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
5757
# CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 - - 0.33 vcvtps2ph $0, %xmm0, (%rax)
5858
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2

llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax)
2222
# CHECK-NEXT: [6]: HasSideEffects (U)
2323

2424
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
25-
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2
25+
# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2
2626
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2
27-
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2
28-
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2
29-
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
30-
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
31-
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2
32-
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
27+
# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2
28+
# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2
29+
# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2
30+
# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax)
31+
# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2
32+
# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax)
3333

3434
# CHECK: Resources:
3535
# CHECK-NEXT: [0] - SBDivider
@@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax)
4343

4444
# CHECK: Resource pressure per iteration:
4545
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
46-
# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00
46+
# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00
4747

4848
# CHECK: Resource pressure by instruction:
4949
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
50-
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2
51-
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
52-
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2
53-
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2
54-
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2
55-
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
56-
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2
57-
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)
50+
# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2
51+
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
52+
# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2
53+
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2
54+
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
55+
# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
56+
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
57+
# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)

llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax)
4545

4646
# CHECK: Resource pressure per iteration:
4747
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
48-
# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67
48+
# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 6.00 - 0.67
4949

5050
# CHECK: Resource pressure by instruction:
5151
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
52-
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
53-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
54-
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
55-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
52+
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
53+
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
54+
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
55+
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
5656
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
5757
# CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax)
5858
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2

llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax)
2222
# CHECK-NEXT: [6]: HasSideEffects (U)
2323

2424
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
25-
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2
25+
# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2
2626
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2
27-
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2
28-
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2
29-
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
30-
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
31-
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2
32-
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
27+
# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2
28+
# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2
29+
# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2
30+
# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax)
31+
# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2
32+
# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax)
3333

3434
# CHECK: Resources:
3535
# CHECK-NEXT: [0] - SBDivider
@@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax)
4343

4444
# CHECK: Resource pressure per iteration:
4545
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
46-
# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00
46+
# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00
4747

4848
# CHECK: Resource pressure by instruction:
4949
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
50-
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2
51-
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
52-
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2
53-
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2
54-
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2
55-
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
56-
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2
57-
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)
50+
# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2
51+
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
52+
# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2
53+
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2
54+
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
55+
# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
56+
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
57+
# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)

0 commit comments

Comments
 (0)