Skip to content

Commit 3dcf62b

Browse files
committed
[X86] HSW/BDW - vector splat shifts don't use Port5 when loading the shift amount
Noticed while trying to compare splat vs per-element shift perf stats for #39424 Confirmed with uops.info
1 parent edfa97a commit 3dcf62b

File tree

6 files changed

+76
-72
lines changed

6 files changed

+76
-72
lines changed

llvm/lib/Target/X86/X86SchedBroadwell.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,9 +471,11 @@ defm : X86WriteResPairUnsupported<WritePSADBWZ>;
471471
defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.
472472

473473
// Vector integer shifts.
474-
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1, [1], 1, 5>;
475-
defm : BWWriteResPair<WriteVecShiftX, [BWPort0,BWPort5], 2, [1,1], 2, 5>;
474+
defm : X86WriteRes<WriteVecShift, [BWPort0], 1, [1], 1>;
475+
defm : X86WriteRes<WriteVecShiftX, [BWPort0,BWPort5], 2, [1,1], 2>;
476476
defm : X86WriteRes<WriteVecShiftY, [BWPort0,BWPort5], 4, [1,1], 2>;
477+
defm : X86WriteRes<WriteVecShiftLd, [BWPort0,BWPort23], 6, [1,1], 2>;
478+
defm : X86WriteRes<WriteVecShiftXLd, [BWPort0,BWPort23], 7, [1,1], 2>;
477479
defm : X86WriteRes<WriteVecShiftYLd, [BWPort0,BWPort23], 7, [1,1], 2>;
478480
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
479481

llvm/lib/Target/X86/X86SchedHaswell.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -469,10 +469,12 @@ defm : HWWriteResPair<WritePSADBWZ, [HWPort0], 5, [1], 1, 7>; // Unsupported = 1
469469
defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>;
470470

471471
// Vector integer shifts.
472-
defm : HWWriteResPair<WriteVecShift, [HWPort0], 1, [1], 1, 5>;
473-
defm : HWWriteResPair<WriteVecShiftX, [HWPort0,HWPort5], 2, [1,1], 2, 6>;
472+
defm : X86WriteRes<WriteVecShift, [HWPort0], 1, [1], 1>;
473+
defm : X86WriteRes<WriteVecShiftX, [HWPort0,HWPort5], 2, [1,1], 2>;
474474
defm : X86WriteRes<WriteVecShiftY, [HWPort0,HWPort5], 4, [1,1], 2>;
475475
defm : X86WriteRes<WriteVecShiftZ, [HWPort0,HWPort5], 4, [1,1], 2>; // Unsupported = 1
476+
defm : X86WriteRes<WriteVecShiftLd, [HWPort0,HWPort23], 6, [1,1], 2>;
477+
defm : X86WriteRes<WriteVecShiftXLd, [HWPort0,HWPort23], 8, [1,1], 2>;
476478
defm : X86WriteRes<WriteVecShiftYLd, [HWPort0,HWPort23], 8, [1,1], 2>;
477479
defm : X86WriteRes<WriteVecShiftZLd, [HWPort0,HWPort23], 8, [1,1], 2>; // Unsupported = 1
478480

llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,30 +1564,30 @@ vzeroupper
15641564
# CHECK-NEXT: 2 6 0.50 * vpsignw (%rax), %xmm1, %xmm2
15651565
# CHECK-NEXT: 1 1 1.00 vpslld $1, %xmm0, %xmm2
15661566
# CHECK-NEXT: 2 2 1.00 vpslld %xmm0, %xmm1, %xmm2
1567-
# CHECK-NEXT: 3 7 1.00 * vpslld (%rax), %xmm1, %xmm2
1567+
# CHECK-NEXT: 2 7 1.00 * vpslld (%rax), %xmm1, %xmm2
15681568
# CHECK-NEXT: 1 1 1.00 vpslldq $1, %xmm1, %xmm2
15691569
# CHECK-NEXT: 1 1 1.00 vpsllq $1, %xmm0, %xmm2
15701570
# CHECK-NEXT: 2 2 1.00 vpsllq %xmm0, %xmm1, %xmm2
1571-
# CHECK-NEXT: 3 7 1.00 * vpsllq (%rax), %xmm1, %xmm2
1571+
# CHECK-NEXT: 2 7 1.00 * vpsllq (%rax), %xmm1, %xmm2
15721572
# CHECK-NEXT: 1 1 1.00 vpsllw $1, %xmm0, %xmm2
15731573
# CHECK-NEXT: 2 2 1.00 vpsllw %xmm0, %xmm1, %xmm2
1574-
# CHECK-NEXT: 3 7 1.00 * vpsllw (%rax), %xmm1, %xmm2
1574+
# CHECK-NEXT: 2 7 1.00 * vpsllw (%rax), %xmm1, %xmm2
15751575
# CHECK-NEXT: 1 1 1.00 vpsrad $1, %xmm0, %xmm2
15761576
# CHECK-NEXT: 2 2 1.00 vpsrad %xmm0, %xmm1, %xmm2
1577-
# CHECK-NEXT: 3 7 1.00 * vpsrad (%rax), %xmm1, %xmm2
1577+
# CHECK-NEXT: 2 7 1.00 * vpsrad (%rax), %xmm1, %xmm2
15781578
# CHECK-NEXT: 1 1 1.00 vpsraw $1, %xmm0, %xmm2
15791579
# CHECK-NEXT: 2 2 1.00 vpsraw %xmm0, %xmm1, %xmm2
1580-
# CHECK-NEXT: 3 7 1.00 * vpsraw (%rax), %xmm1, %xmm2
1580+
# CHECK-NEXT: 2 7 1.00 * vpsraw (%rax), %xmm1, %xmm2
15811581
# CHECK-NEXT: 1 1 1.00 vpsrld $1, %xmm0, %xmm2
15821582
# CHECK-NEXT: 2 2 1.00 vpsrld %xmm0, %xmm1, %xmm2
1583-
# CHECK-NEXT: 3 7 1.00 * vpsrld (%rax), %xmm1, %xmm2
1583+
# CHECK-NEXT: 2 7 1.00 * vpsrld (%rax), %xmm1, %xmm2
15841584
# CHECK-NEXT: 1 1 1.00 vpsrldq $1, %xmm1, %xmm2
15851585
# CHECK-NEXT: 1 1 1.00 vpsrlq $1, %xmm0, %xmm2
15861586
# CHECK-NEXT: 2 2 1.00 vpsrlq %xmm0, %xmm1, %xmm2
1587-
# CHECK-NEXT: 3 7 1.00 * vpsrlq (%rax), %xmm1, %xmm2
1587+
# CHECK-NEXT: 2 7 1.00 * vpsrlq (%rax), %xmm1, %xmm2
15881588
# CHECK-NEXT: 1 1 1.00 vpsrlw $1, %xmm0, %xmm2
15891589
# CHECK-NEXT: 2 2 1.00 vpsrlw %xmm0, %xmm1, %xmm2
1590-
# CHECK-NEXT: 3 7 1.00 * vpsrlw (%rax), %xmm1, %xmm2
1590+
# CHECK-NEXT: 2 7 1.00 * vpsrlw (%rax), %xmm1, %xmm2
15911591
# CHECK-NEXT: 1 1 0.50 vpsubb %xmm0, %xmm1, %xmm2
15921592
# CHECK-NEXT: 2 6 0.50 * vpsubb (%rax), %xmm1, %xmm2
15931593
# CHECK-NEXT: 1 1 0.50 vpsubd %xmm0, %xmm1, %xmm2
@@ -1736,7 +1736,7 @@ vzeroupper
17361736

17371737
# CHECK: Resource pressure per iteration:
17381738
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
1739-
# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 432.25 2.25 12.67
1739+
# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 424.25 2.25 12.67
17401740

17411741
# CHECK: Resource pressure by instruction:
17421742
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -2274,30 +2274,30 @@ vzeroupper
22742274
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - vpsignw (%rax), %xmm1, %xmm2
22752275
# CHECK-NEXT: - - 1.00 - - - - - - - vpslld $1, %xmm0, %xmm2
22762276
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpslld %xmm0, %xmm1, %xmm2
2277-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpslld (%rax), %xmm1, %xmm2
2277+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpslld (%rax), %xmm1, %xmm2
22782278
# CHECK-NEXT: - - - - - - - 1.00 - - vpslldq $1, %xmm1, %xmm2
22792279
# CHECK-NEXT: - - 1.00 - - - - - - - vpsllq $1, %xmm0, %xmm2
22802280
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsllq %xmm0, %xmm1, %xmm2
2281-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsllq (%rax), %xmm1, %xmm2
2281+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsllq (%rax), %xmm1, %xmm2
22822282
# CHECK-NEXT: - - 1.00 - - - - - - - vpsllw $1, %xmm0, %xmm2
22832283
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsllw %xmm0, %xmm1, %xmm2
2284-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsllw (%rax), %xmm1, %xmm2
2284+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsllw (%rax), %xmm1, %xmm2
22852285
# CHECK-NEXT: - - 1.00 - - - - - - - vpsrad $1, %xmm0, %xmm2
22862286
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsrad %xmm0, %xmm1, %xmm2
2287-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsrad (%rax), %xmm1, %xmm2
2287+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsrad (%rax), %xmm1, %xmm2
22882288
# CHECK-NEXT: - - 1.00 - - - - - - - vpsraw $1, %xmm0, %xmm2
22892289
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsraw %xmm0, %xmm1, %xmm2
2290-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsraw (%rax), %xmm1, %xmm2
2290+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsraw (%rax), %xmm1, %xmm2
22912291
# CHECK-NEXT: - - 1.00 - - - - - - - vpsrld $1, %xmm0, %xmm2
22922292
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsrld %xmm0, %xmm1, %xmm2
2293-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsrld (%rax), %xmm1, %xmm2
2293+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsrld (%rax), %xmm1, %xmm2
22942294
# CHECK-NEXT: - - - - - - - 1.00 - - vpsrldq $1, %xmm1, %xmm2
22952295
# CHECK-NEXT: - - 1.00 - - - - - - - vpsrlq $1, %xmm0, %xmm2
22962296
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsrlq %xmm0, %xmm1, %xmm2
2297-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsrlq (%rax), %xmm1, %xmm2
2297+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsrlq (%rax), %xmm1, %xmm2
22982298
# CHECK-NEXT: - - 1.00 - - - - - - - vpsrlw $1, %xmm0, %xmm2
22992299
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpsrlw %xmm0, %xmm1, %xmm2
2300-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpsrlw (%rax), %xmm1, %xmm2
2300+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpsrlw (%rax), %xmm1, %xmm2
23012301
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpsubb %xmm0, %xmm1, %xmm2
23022302
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - vpsubb (%rax), %xmm1, %xmm2
23032303
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpsubd %xmm0, %xmm1, %xmm2

llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -596,30 +596,30 @@ xorpd (%rax), %xmm2
596596
# CHECK-NEXT: 2 6 1.00 * pshuflw $1, (%rax), %xmm2
597597
# CHECK-NEXT: 1 1 1.00 pslld $1, %xmm2
598598
# CHECK-NEXT: 2 2 1.00 pslld %xmm0, %xmm2
599-
# CHECK-NEXT: 3 7 1.00 * pslld (%rax), %xmm2
599+
# CHECK-NEXT: 2 7 1.00 * pslld (%rax), %xmm2
600600
# CHECK-NEXT: 1 1 1.00 pslldq $1, %xmm2
601601
# CHECK-NEXT: 1 1 1.00 psllq $1, %xmm2
602602
# CHECK-NEXT: 2 2 1.00 psllq %xmm0, %xmm2
603-
# CHECK-NEXT: 3 7 1.00 * psllq (%rax), %xmm2
603+
# CHECK-NEXT: 2 7 1.00 * psllq (%rax), %xmm2
604604
# CHECK-NEXT: 1 1 1.00 psllw $1, %xmm2
605605
# CHECK-NEXT: 2 2 1.00 psllw %xmm0, %xmm2
606-
# CHECK-NEXT: 3 7 1.00 * psllw (%rax), %xmm2
606+
# CHECK-NEXT: 2 7 1.00 * psllw (%rax), %xmm2
607607
# CHECK-NEXT: 1 1 1.00 psrad $1, %xmm2
608608
# CHECK-NEXT: 2 2 1.00 psrad %xmm0, %xmm2
609-
# CHECK-NEXT: 3 7 1.00 * psrad (%rax), %xmm2
609+
# CHECK-NEXT: 2 7 1.00 * psrad (%rax), %xmm2
610610
# CHECK-NEXT: 1 1 1.00 psraw $1, %xmm2
611611
# CHECK-NEXT: 2 2 1.00 psraw %xmm0, %xmm2
612-
# CHECK-NEXT: 3 7 1.00 * psraw (%rax), %xmm2
612+
# CHECK-NEXT: 2 7 1.00 * psraw (%rax), %xmm2
613613
# CHECK-NEXT: 1 1 1.00 psrld $1, %xmm2
614614
# CHECK-NEXT: 2 2 1.00 psrld %xmm0, %xmm2
615-
# CHECK-NEXT: 3 7 1.00 * psrld (%rax), %xmm2
615+
# CHECK-NEXT: 2 7 1.00 * psrld (%rax), %xmm2
616616
# CHECK-NEXT: 1 1 1.00 psrldq $1, %xmm2
617617
# CHECK-NEXT: 1 1 1.00 psrlq $1, %xmm2
618618
# CHECK-NEXT: 2 2 1.00 psrlq %xmm0, %xmm2
619-
# CHECK-NEXT: 3 7 1.00 * psrlq (%rax), %xmm2
619+
# CHECK-NEXT: 2 7 1.00 * psrlq (%rax), %xmm2
620620
# CHECK-NEXT: 1 1 1.00 psrlw $1, %xmm2
621621
# CHECK-NEXT: 2 2 1.00 psrlw %xmm0, %xmm2
622-
# CHECK-NEXT: 3 7 1.00 * psrlw (%rax), %xmm2
622+
# CHECK-NEXT: 2 7 1.00 * psrlw (%rax), %xmm2
623623
# CHECK-NEXT: 1 1 0.50 psubb %xmm0, %xmm2
624624
# CHECK-NEXT: 2 6 0.50 * psubb (%rax), %xmm2
625625
# CHECK-NEXT: 1 1 0.50 psubd %xmm0, %xmm2
@@ -689,7 +689,7 @@ xorpd (%rax), %xmm2
689689

690690
# CHECK: Resource pressure per iteration:
691691
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
692-
# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 127.25 2.25 4.67
692+
# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 119.25 2.25 4.67
693693

694694
# CHECK: Resource pressure by instruction:
695695
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -882,30 +882,30 @@ xorpd (%rax), %xmm2
882882
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pshuflw $1, (%rax), %xmm2
883883
# CHECK-NEXT: - - 1.00 - - - - - - - pslld $1, %xmm2
884884
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - pslld %xmm0, %xmm2
885-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - pslld (%rax), %xmm2
885+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pslld (%rax), %xmm2
886886
# CHECK-NEXT: - - - - - - - 1.00 - - pslldq $1, %xmm2
887887
# CHECK-NEXT: - - 1.00 - - - - - - - psllq $1, %xmm2
888888
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psllq %xmm0, %xmm2
889-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psllq (%rax), %xmm2
889+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psllq (%rax), %xmm2
890890
# CHECK-NEXT: - - 1.00 - - - - - - - psllw $1, %xmm2
891891
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psllw %xmm0, %xmm2
892-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psllw (%rax), %xmm2
892+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psllw (%rax), %xmm2
893893
# CHECK-NEXT: - - 1.00 - - - - - - - psrad $1, %xmm2
894894
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psrad %xmm0, %xmm2
895-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psrad (%rax), %xmm2
895+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psrad (%rax), %xmm2
896896
# CHECK-NEXT: - - 1.00 - - - - - - - psraw $1, %xmm2
897897
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psraw %xmm0, %xmm2
898-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psraw (%rax), %xmm2
898+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psraw (%rax), %xmm2
899899
# CHECK-NEXT: - - 1.00 - - - - - - - psrld $1, %xmm2
900900
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psrld %xmm0, %xmm2
901-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psrld (%rax), %xmm2
901+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psrld (%rax), %xmm2
902902
# CHECK-NEXT: - - - - - - - 1.00 - - psrldq $1, %xmm2
903903
# CHECK-NEXT: - - 1.00 - - - - - - - psrlq $1, %xmm2
904904
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psrlq %xmm0, %xmm2
905-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psrlq (%rax), %xmm2
905+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psrlq (%rax), %xmm2
906906
# CHECK-NEXT: - - 1.00 - - - - - - - psrlw $1, %xmm2
907907
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - psrlw %xmm0, %xmm2
908-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - psrlw (%rax), %xmm2
908+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - psrlw (%rax), %xmm2
909909
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - psubb %xmm0, %xmm2
910910
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - psubb (%rax), %xmm2
911911
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - psubd %xmm0, %xmm2

0 commit comments

Comments
 (0)