Skip to content

Commit 6a2442e

Browse files
[AArch64] Increase AddedComplexity of BIC
This diff adjusts AddedComplexity of BIC to bump its position in the list of patterns to make LLVM pick it instead of MVN + AND. MVN + AND requires 2 cycles, so does e.g. MOV + BIC, but the latter outperforms the former if the instructions producing the operands of BIC can be issued in parallel. One may consider the following example: ldur x15, [x0, #2] # 4 cycles mvn x10, x15 # 1 cycle (depends on ldur) and x9, x10, #0x8080808080808080 vs. ldur x15, [x0, #2] # 4 cycles mov x9, #0x8080808080808080 # 1 cycle (can be executed in parallel with ldur) bic x9, x9, x15. # 1 cycle Test plan: ninja check-all Differential revision: https://reviews.llvm.org/D133345
1 parent 31f434e commit 6a2442e

File tree

5 files changed

+21
-21
lines changed

5 files changed

+21
-21
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2994,8 +2994,8 @@ class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
29942994

29952995
// Split from LogicalImm as not all instructions have both.
29962996
multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
2997-
SDPatternOperator OpNode> {
2998-
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
2997+
SDPatternOperator OpNode, int AddedComplexityVal = 0> {
2998+
let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = AddedComplexityVal in {
29992999
def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
30003000
def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
30013001
}

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2097,7 +2097,7 @@ defm BICS : LogicalRegS<0b11, 1, "bics",
20972097
BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
20982098
defm AND : LogicalReg<0b00, 0, "and", and>;
20992099
defm BIC : LogicalReg<0b00, 1, "bic",
2100-
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
2100+
BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>;
21012101
defm EON : LogicalReg<0b10, 1, "eon",
21022102
BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
21032103
defm EOR : LogicalReg<0b10, 0, "eor", xor>;

llvm/test/CodeGen/AArch64/addsub.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,8 @@ declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
373373
define i1 @uadd_add(i8 %a, i8 %b, i8* %p) {
374374
; CHECK-LABEL: uadd_add:
375375
; CHECK: // %bb.0:
376-
; CHECK-NEXT: mvn w8, w0
377-
; CHECK-NEXT: and w8, w8, #0xff
376+
; CHECK-NEXT: mov w8, #255
377+
; CHECK-NEXT: bic w8, w8, w0
378378
; CHECK-NEXT: add w8, w8, w1, uxtb
379379
; CHECK-NEXT: lsr w0, w8, #8
380380
; CHECK-NEXT: add w8, w8, #1

llvm/test/CodeGen/AArch64/dag-combine-setcc.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) {
55
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v8i1:
66
; CHECK: // %bb.0:
77
; CHECK-NEXT: cmeq v0.8b, v0.8b, #0
8+
; CHECK-NEXT: mov w8, #1
89
; CHECK-NEXT: umaxv b0, v0.8b
9-
; CHECK-NEXT: fmov w8, s0
10-
; CHECK-NEXT: mvn w8, w8
11-
; CHECK-NEXT: and w0, w8, #0x1
10+
; CHECK-NEXT: fmov w9, s0
11+
; CHECK-NEXT: bic w0, w8, w9
1212
; CHECK-NEXT: ret
1313
%cmp1 = icmp eq <8 x i8> %a, zeroinitializer
1414
%cast = bitcast <8 x i1> %cmp1 to i8
@@ -20,10 +20,10 @@ define i1 @combine_setcc_eq_vecreduce_or_v16i1(<16 x i8> %a) {
2020
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v16i1:
2121
; CHECK: // %bb.0:
2222
; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
23+
; CHECK-NEXT: mov w8, #1
2324
; CHECK-NEXT: umaxv b0, v0.16b
24-
; CHECK-NEXT: fmov w8, s0
25-
; CHECK-NEXT: mvn w8, w8
26-
; CHECK-NEXT: and w0, w8, #0x1
25+
; CHECK-NEXT: fmov w9, s0
26+
; CHECK-NEXT: bic w0, w8, w9
2727
; CHECK-NEXT: ret
2828
%cmp1 = icmp eq <16 x i8> %a, zeroinitializer
2929
%cast = bitcast <16 x i1> %cmp1 to i16
@@ -35,12 +35,12 @@ define i1 @combine_setcc_eq_vecreduce_or_v32i1(<32 x i8> %a) {
3535
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v32i1:
3636
; CHECK: // %bb.0:
3737
; CHECK-NEXT: cmeq v1.16b, v1.16b, #0
38+
; CHECK-NEXT: mov w8, #1
3839
; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
3940
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
4041
; CHECK-NEXT: umaxv b0, v0.16b
41-
; CHECK-NEXT: fmov w8, s0
42-
; CHECK-NEXT: mvn w8, w8
43-
; CHECK-NEXT: and w0, w8, #0x1
42+
; CHECK-NEXT: fmov w9, s0
43+
; CHECK-NEXT: bic w0, w8, w9
4444
; CHECK-NEXT: ret
4545
%cmp1 = icmp eq <32 x i8> %a, zeroinitializer
4646
%cast = bitcast <32 x i1> %cmp1 to i32
@@ -52,16 +52,16 @@ define i1 @combine_setcc_eq_vecreduce_or_v64i1(<64 x i8> %a) {
5252
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v64i1:
5353
; CHECK: // %bb.0:
5454
; CHECK-NEXT: cmeq v2.16b, v2.16b, #0
55+
; CHECK-NEXT: mov w8, #1
5556
; CHECK-NEXT: cmeq v3.16b, v3.16b, #0
5657
; CHECK-NEXT: cmeq v1.16b, v1.16b, #0
5758
; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
5859
; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b
5960
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
6061
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
6162
; CHECK-NEXT: umaxv b0, v0.16b
62-
; CHECK-NEXT: fmov w8, s0
63-
; CHECK-NEXT: mvn w8, w8
64-
; CHECK-NEXT: and w0, w8, #0x1
63+
; CHECK-NEXT: fmov w9, s0
64+
; CHECK-NEXT: bic w0, w8, w9
6565
; CHECK-NEXT: ret
6666
%cmp1 = icmp eq <64 x i8> %a, zeroinitializer
6767
%cast = bitcast <64 x i1> %cmp1 to i64

llvm/test/CodeGen/AArch64/select_const.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
define i32 @select_0_or_1(i1 %cond) {
1010
; CHECK-LABEL: select_0_or_1:
1111
; CHECK: // %bb.0:
12-
; CHECK-NEXT: mvn w8, w0
13-
; CHECK-NEXT: and w0, w8, #0x1
12+
; CHECK-NEXT: mov w8, #1
13+
; CHECK-NEXT: bic w0, w8, w0
1414
; CHECK-NEXT: ret
1515
%sel = select i1 %cond, i32 0, i32 1
1616
ret i32 %sel
@@ -28,8 +28,8 @@ define i32 @select_0_or_1_zeroext(i1 zeroext %cond) {
2828
define i32 @select_0_or_1_signext(i1 signext %cond) {
2929
; CHECK-LABEL: select_0_or_1_signext:
3030
; CHECK: // %bb.0:
31-
; CHECK-NEXT: mvn w8, w0
32-
; CHECK-NEXT: and w0, w8, #0x1
31+
; CHECK-NEXT: mov w8, #1
32+
; CHECK-NEXT: bic w0, w8, w0
3333
; CHECK-NEXT: ret
3434
%sel = select i1 %cond, i32 0, i32 1
3535
ret i32 %sel

0 commit comments

Comments
 (0)