Skip to content

Commit 71d64ed

Browse files
committed
[X86][Peephole] Add NDD entries for EFLAGS optimization
1 parent dd3e6c8 commit 71d64ed

File tree

3 files changed

+555
-118
lines changed

3 files changed

+555
-118
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 118 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -4809,96 +4809,96 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
48094809

48104810
// The shift instructions only modify ZF if their shift count is non-zero.
48114811
// N.B.: The processor truncates the shift count depending on the encoding.
4812-
case X86::SAR8ri:
4813-
case X86::SAR16ri:
4814-
case X86::SAR32ri:
4815-
case X86::SAR64ri:
4816-
case X86::SHR8ri:
4817-
case X86::SHR16ri:
4818-
case X86::SHR32ri:
4819-
case X86::SHR64ri:
4812+
CASE_ND(SAR8ri)
4813+
CASE_ND(SAR16ri)
4814+
CASE_ND(SAR32ri)
4815+
CASE_ND(SAR64ri)
4816+
CASE_ND(SHR8ri)
4817+
CASE_ND(SHR16ri)
4818+
CASE_ND(SHR32ri)
4819+
CASE_ND(SHR64ri)
48204820
return getTruncatedShiftCount(MI, 2) != 0;
48214821

48224822
// Some left shift instructions can be turned into LEA instructions but only
48234823
// if their flags aren't used. Avoid transforming such instructions.
4824-
case X86::SHL8ri:
4825-
case X86::SHL16ri:
4826-
case X86::SHL32ri:
4827-
case X86::SHL64ri: {
4824+
CASE_ND(SHL8ri)
4825+
CASE_ND(SHL16ri)
4826+
CASE_ND(SHL32ri)
4827+
CASE_ND(SHL64ri) {
48284828
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
48294829
if (isTruncatedShiftCountForLEA(ShAmt))
48304830
return false;
48314831
return ShAmt != 0;
48324832
}
48334833

4834-
case X86::SHRD16rri8:
4835-
case X86::SHRD32rri8:
4836-
case X86::SHRD64rri8:
4837-
case X86::SHLD16rri8:
4838-
case X86::SHLD32rri8:
4839-
case X86::SHLD64rri8:
4834+
CASE_ND(SHRD16rri8)
4835+
CASE_ND(SHRD32rri8)
4836+
CASE_ND(SHRD64rri8)
4837+
CASE_ND(SHLD16rri8)
4838+
CASE_ND(SHLD32rri8)
4839+
CASE_ND(SHLD64rri8)
48404840
return getTruncatedShiftCount(MI, 3) != 0;
48414841

4842-
case X86::SUB64ri32:
4843-
case X86::SUB32ri:
4844-
case X86::SUB16ri:
4845-
case X86::SUB8ri:
4846-
case X86::SUB64rr:
4847-
case X86::SUB32rr:
4848-
case X86::SUB16rr:
4849-
case X86::SUB8rr:
4850-
case X86::SUB64rm:
4851-
case X86::SUB32rm:
4852-
case X86::SUB16rm:
4853-
case X86::SUB8rm:
4854-
case X86::DEC64r:
4855-
case X86::DEC32r:
4856-
case X86::DEC16r:
4857-
case X86::DEC8r:
4858-
case X86::ADD64ri32:
4859-
case X86::ADD32ri:
4860-
case X86::ADD16ri:
4861-
case X86::ADD8ri:
4862-
case X86::ADD64rr:
4863-
case X86::ADD32rr:
4864-
case X86::ADD16rr:
4865-
case X86::ADD8rr:
4866-
case X86::ADD64rm:
4867-
case X86::ADD32rm:
4868-
case X86::ADD16rm:
4869-
case X86::ADD8rm:
4870-
case X86::INC64r:
4871-
case X86::INC32r:
4872-
case X86::INC16r:
4873-
case X86::INC8r:
4874-
case X86::ADC64ri32:
4875-
case X86::ADC32ri:
4876-
case X86::ADC16ri:
4877-
case X86::ADC8ri:
4878-
case X86::ADC64rr:
4879-
case X86::ADC32rr:
4880-
case X86::ADC16rr:
4881-
case X86::ADC8rr:
4882-
case X86::ADC64rm:
4883-
case X86::ADC32rm:
4884-
case X86::ADC16rm:
4885-
case X86::ADC8rm:
4886-
case X86::SBB64ri32:
4887-
case X86::SBB32ri:
4888-
case X86::SBB16ri:
4889-
case X86::SBB8ri:
4890-
case X86::SBB64rr:
4891-
case X86::SBB32rr:
4892-
case X86::SBB16rr:
4893-
case X86::SBB8rr:
4894-
case X86::SBB64rm:
4895-
case X86::SBB32rm:
4896-
case X86::SBB16rm:
4897-
case X86::SBB8rm:
4898-
case X86::NEG8r:
4899-
case X86::NEG16r:
4900-
case X86::NEG32r:
4901-
case X86::NEG64r:
4842+
CASE_ND(SUB64ri32)
4843+
CASE_ND(SUB32ri)
4844+
CASE_ND(SUB16ri)
4845+
CASE_ND(SUB8ri)
4846+
CASE_ND(SUB64rr)
4847+
CASE_ND(SUB32rr)
4848+
CASE_ND(SUB16rr)
4849+
CASE_ND(SUB8rr)
4850+
CASE_ND(SUB64rm)
4851+
CASE_ND(SUB32rm)
4852+
CASE_ND(SUB16rm)
4853+
CASE_ND(SUB8rm)
4854+
CASE_ND(DEC64r)
4855+
CASE_ND(DEC32r)
4856+
CASE_ND(DEC16r)
4857+
CASE_ND(DEC8r)
4858+
CASE_ND(ADD64ri32)
4859+
CASE_ND(ADD32ri)
4860+
CASE_ND(ADD16ri)
4861+
CASE_ND(ADD8ri)
4862+
CASE_ND(ADD64rr)
4863+
CASE_ND(ADD32rr)
4864+
CASE_ND(ADD16rr)
4865+
CASE_ND(ADD8rr)
4866+
CASE_ND(ADD64rm)
4867+
CASE_ND(ADD32rm)
4868+
CASE_ND(ADD16rm)
4869+
CASE_ND(ADD8rm)
4870+
CASE_ND(INC64r)
4871+
CASE_ND(INC32r)
4872+
CASE_ND(INC16r)
4873+
CASE_ND(INC8r)
4874+
CASE_ND(ADC64ri32)
4875+
CASE_ND(ADC32ri)
4876+
CASE_ND(ADC16ri)
4877+
CASE_ND(ADC8ri)
4878+
CASE_ND(ADC64rr)
4879+
CASE_ND(ADC32rr)
4880+
CASE_ND(ADC16rr)
4881+
CASE_ND(ADC8rr)
4882+
CASE_ND(ADC64rm)
4883+
CASE_ND(ADC32rm)
4884+
CASE_ND(ADC16rm)
4885+
CASE_ND(ADC8rm)
4886+
CASE_ND(SBB64ri32)
4887+
CASE_ND(SBB32ri)
4888+
CASE_ND(SBB16ri)
4889+
CASE_ND(SBB8ri)
4890+
CASE_ND(SBB64rr)
4891+
CASE_ND(SBB32rr)
4892+
CASE_ND(SBB16rr)
4893+
CASE_ND(SBB8rr)
4894+
CASE_ND(SBB64rm)
4895+
CASE_ND(SBB32rm)
4896+
CASE_ND(SBB16rm)
4897+
CASE_ND(SBB8rm)
4898+
CASE_ND(NEG8r)
4899+
CASE_ND(NEG16r)
4900+
CASE_ND(NEG32r)
4901+
CASE_ND(NEG64r)
49024902
case X86::LZCNT16rr:
49034903
case X86::LZCNT16rm:
49044904
case X86::LZCNT32rr:
@@ -4918,42 +4918,42 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
49184918
case X86::TZCNT64rr:
49194919
case X86::TZCNT64rm:
49204920
return true;
4921-
case X86::AND64ri32:
4922-
case X86::AND32ri:
4923-
case X86::AND16ri:
4924-
case X86::AND8ri:
4925-
case X86::AND64rr:
4926-
case X86::AND32rr:
4927-
case X86::AND16rr:
4928-
case X86::AND8rr:
4929-
case X86::AND64rm:
4930-
case X86::AND32rm:
4931-
case X86::AND16rm:
4932-
case X86::AND8rm:
4933-
case X86::XOR64ri32:
4934-
case X86::XOR32ri:
4935-
case X86::XOR16ri:
4936-
case X86::XOR8ri:
4937-
case X86::XOR64rr:
4938-
case X86::XOR32rr:
4939-
case X86::XOR16rr:
4940-
case X86::XOR8rr:
4941-
case X86::XOR64rm:
4942-
case X86::XOR32rm:
4943-
case X86::XOR16rm:
4944-
case X86::XOR8rm:
4945-
case X86::OR64ri32:
4946-
case X86::OR32ri:
4947-
case X86::OR16ri:
4948-
case X86::OR8ri:
4949-
case X86::OR64rr:
4950-
case X86::OR32rr:
4951-
case X86::OR16rr:
4952-
case X86::OR8rr:
4953-
case X86::OR64rm:
4954-
case X86::OR32rm:
4955-
case X86::OR16rm:
4956-
case X86::OR8rm:
4921+
CASE_ND(AND64ri32)
4922+
CASE_ND(AND32ri)
4923+
CASE_ND(AND16ri)
4924+
CASE_ND(AND8ri)
4925+
CASE_ND(AND64rr)
4926+
CASE_ND(AND32rr)
4927+
CASE_ND(AND16rr)
4928+
CASE_ND(AND8rr)
4929+
CASE_ND(AND64rm)
4930+
CASE_ND(AND32rm)
4931+
CASE_ND(AND16rm)
4932+
CASE_ND(AND8rm)
4933+
CASE_ND(XOR64ri32)
4934+
CASE_ND(XOR32ri)
4935+
CASE_ND(XOR16ri)
4936+
CASE_ND(XOR8ri)
4937+
CASE_ND(XOR64rr)
4938+
CASE_ND(XOR32rr)
4939+
CASE_ND(XOR16rr)
4940+
CASE_ND(XOR8rr)
4941+
CASE_ND(XOR64rm)
4942+
CASE_ND(XOR32rm)
4943+
CASE_ND(XOR16rm)
4944+
CASE_ND(XOR8rm)
4945+
CASE_ND(OR64ri32)
4946+
CASE_ND(OR32ri)
4947+
CASE_ND(OR16ri)
4948+
CASE_ND(OR8ri)
4949+
CASE_ND(OR64rr)
4950+
CASE_ND(OR32rr)
4951+
CASE_ND(OR16rr)
4952+
CASE_ND(OR8rr)
4953+
CASE_ND(OR64rm)
4954+
CASE_ND(OR32rm)
4955+
CASE_ND(OR16rm)
4956+
CASE_ND(OR8rm)
49574957
case X86::ANDN32rr:
49584958
case X86::ANDN32rm:
49594959
case X86::ANDN64rr:
@@ -5035,10 +5035,10 @@ static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
50355035
switch (MI.getOpcode()) {
50365036
default:
50375037
return X86::COND_INVALID;
5038-
case X86::NEG8r:
5039-
case X86::NEG16r:
5040-
case X86::NEG32r:
5041-
case X86::NEG64r:
5038+
CASE_ND(NEG8r)
5039+
CASE_ND(NEG16r)
5040+
CASE_ND(NEG32r)
5041+
CASE_ND(NEG64r)
50425042
return X86::COND_AE;
50435043
case X86::LZCNT16rr:
50445044
case X86::LZCNT32rr:
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -o - %s -mtriple=x86_64-- -run-pass peephole-opt -mattr=+ndd | FileCheck %s
3+
4+
---
5+
name: opt_redundant_flags_0
6+
body: |
7+
bb.0:
8+
; CHECK-LABEL: name: opt_redundant_flags_0
9+
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
10+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
11+
; CHECK-NEXT: [[SUB32rr_ND:%[0-9]+]]:gr32 = SUB32rr_ND [[COPY]], [[COPY1]], implicit-def $eflags
12+
; CHECK-NEXT: $eax = COPY [[SUB32rr_ND]]
13+
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
14+
%0:gr32 = COPY $esi
15+
%1:gr32 = COPY $edi
16+
%2:gr32 = SUB32rr_ND %0, %1, implicit-def dead $eflags
17+
$eax = COPY %2
18+
; CMP should be removed.
19+
CMP32rr %0, %1, implicit-def $eflags
20+
$bl = SETCCr 2, implicit $eflags
21+
...
22+
---
23+
name: opt_redundant_flags_1
24+
body: |
25+
bb.0:
26+
; CHECK-LABEL: name: opt_redundant_flags_1
27+
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
28+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
29+
; CHECK-NEXT: [[SUB32rr_ND:%[0-9]+]]:gr32 = SUB32rr_ND [[COPY]], [[COPY1]], implicit-def $eflags
30+
; CHECK-NEXT: $eax = COPY [[SUB32rr_ND]]
31+
; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
32+
%0:gr32 = COPY $esi
33+
%1:gr32 = COPY $edi
34+
%2:gr32 = SUB32rr_ND %0, %1, implicit-def dead $eflags
35+
$eax = COPY %2
36+
; CMP should be removed.
37+
CMP32rr %1, %0, implicit-def $eflags
38+
$bl = SETCCr 3, implicit $eflags
39+
...
40+
---
41+
name: opt_redundant_flags_2
42+
body: |
43+
bb.0:
44+
; CHECK-LABEL: name: opt_redundant_flags_2
45+
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
46+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
47+
; CHECK-NEXT: [[SUB32rr_ND:%[0-9]+]]:gr32 = SUB32rr_ND [[COPY]], [[COPY1]], implicit-def $eflags
48+
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
49+
; CHECK-NEXT: $eax = COPY [[SUB32rr_ND]]
50+
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
51+
%0:gr32 = COPY $esi
52+
%1:gr32 = COPY $edi
53+
%2:gr32 = SUB32rr_ND %0, %1, implicit-def $eflags
54+
; an extra eflags reader shouldn't stop optimization.
55+
$cl = SETCCr 2, implicit $eflags
56+
$eax = COPY %2
57+
CMP32rr %0, %1, implicit-def $eflags
58+
$bl = SETCCr 2, implicit $eflags
59+
...
60+
---
61+
name: opt_zerocmp_user_0
62+
body: |
63+
bb.0:
64+
; CHECK-LABEL: name: opt_zerocmp_user_0
65+
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
66+
; CHECK-NEXT: [[NEG32r_ND:%[0-9]+]]:gr32 = NEG32r_ND [[COPY]], implicit-def $eflags
67+
; CHECK-NEXT: $al = SETCCr 3, implicit $eflags
68+
%0:gr32 = COPY $esi
69+
%1:gr32 = NEG32r_ND %0, implicit-def dead $eflags
70+
; TEST should be removed.
71+
TEST32rr %0, %0, implicit-def $eflags
72+
$al = SETCCr 4, implicit $eflags
73+
...

0 commit comments

Comments
 (0)