Skip to content

Commit c2352ab

Browse files
committed
Adding the missed optimisation
1 parent 1393eb5 commit c2352ab

File tree

3 files changed

+55
-33
lines changed

3 files changed

+55
-33
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4299,6 +4299,36 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
42994299
Op.getOperand(1));
43004300
}
43014301

4302+
// Converts SETCC (AND X Y) Z ULT -> SETCC (AND X (Y & ~(Z - 1)) 0 EQ when Y is
4303+
// a power of 2. This is then lowered to ANDS X (Y & ~(Z - 1)) which produces a
4304+
// better opt with EmitComparison.
4305+
static void SimplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS,
4306+
SelectionDAG &DAG, const SDLoc DL) {
4307+
switch (CC) {
4308+
default:
4309+
break;
4310+
case ISD::SETULT:
4311+
if (LHS.getOpcode() == ISD::AND) {
4312+
ConstantSDNode *LHSAndConst = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
4313+
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
4314+
if (LHSAndConst && RHSConst && LHSAndConst->hasOneUse() &&
4315+
RHSConst->hasOneUse()) {
4316+
uint64_t LHSAndConstValue = LHSAndConst->getZExtValue();
4317+
uint64_t RHSConstValue = RHSConst->getZExtValue();
4318+
if (isPowerOf2_64(RHSConstValue)) {
4319+
uint64_t NewMaskValue = LHSAndConstValue & ~(RHSConstValue - 1);
4320+
LHS = DAG.getNode(
4321+
ISD::AND, DL, LHS.getValueType(), LHS.getOperand(0),
4322+
DAG.getConstant(NewMaskValue, DL, LHS.getValueType()));
4323+
RHS = DAG.getConstant(0, DL, RHS.getValueType());
4324+
CC = ISD::SETEQ;
4325+
}
4326+
}
4327+
}
4328+
break;
4329+
}
4330+
}
4331+
43024332
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
43034333
SelectionDAG &DAG) const {
43044334
EVT VT = Op.getValueType();
@@ -10587,6 +10617,9 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1058710617
}
1058810618

1058910619
if (LHS.getValueType().isInteger()) {
10620+
10621+
SimplifySetCCIntoEq(CC, LHS, RHS, DAG, dl);
10622+
1059010623
SDValue CCVal;
1059110624
SDValue Cmp = getAArch64Cmp(
1059210625
LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);

llvm/test/CodeGen/AArch64/pr102703.ll

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
define i1 @lt2_u8(i8 %0) {
55
; CHECK-LABEL: lt2_u8:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: and w8, w0, #0xff
8-
; CHECK-NEXT: cmp w8, #2
9-
; CHECK-NEXT: cset w0, lo
7+
; CHECK-NEXT: tst w0, #0xfe
8+
; CHECK-NEXT: cset w0, eq
109
; CHECK-NEXT: ret
1110
%2 = icmp ult i8 %0, 2
1211
ret i1 %2
@@ -15,9 +14,8 @@ define i1 @lt2_u8(i8 %0) {
1514
define i1 @lt4_u8(i8 %0) {
1615
; CHECK-LABEL: lt4_u8:
1716
; CHECK: // %bb.0:
18-
; CHECK-NEXT: and w8, w0, #0xff
19-
; CHECK-NEXT: cmp w8, #4
20-
; CHECK-NEXT: cset w0, lo
17+
; CHECK-NEXT: tst w0, #0xfc
18+
; CHECK-NEXT: cset w0, eq
2119
; CHECK-NEXT: ret
2220
%2 = icmp ult i8 %0, 4
2321
ret i1 %2
@@ -26,9 +24,8 @@ define i1 @lt4_u8(i8 %0) {
2624
define i1 @lt8_u8(i8 %0) {
2725
; CHECK-LABEL: lt8_u8:
2826
; CHECK: // %bb.0:
29-
; CHECK-NEXT: and w8, w0, #0xff
30-
; CHECK-NEXT: cmp w8, #8
31-
; CHECK-NEXT: cset w0, lo
27+
; CHECK-NEXT: tst w0, #0xf8
28+
; CHECK-NEXT: cset w0, eq
3229
; CHECK-NEXT: ret
3330
%2 = icmp ult i8 %0, 8
3431
ret i1 %2
@@ -37,9 +34,8 @@ define i1 @lt8_u8(i8 %0) {
3734
define i1 @lt16_u8(i8 %0) {
3835
; CHECK-LABEL: lt16_u8:
3936
; CHECK: // %bb.0:
40-
; CHECK-NEXT: and w8, w0, #0xff
41-
; CHECK-NEXT: cmp w8, #16
42-
; CHECK-NEXT: cset w0, lo
37+
; CHECK-NEXT: tst w0, #0xf0
38+
; CHECK-NEXT: cset w0, eq
4339
; CHECK-NEXT: ret
4440
%2 = icmp ult i8 %0, 16
4541
ret i1 %2
@@ -48,9 +44,8 @@ define i1 @lt16_u8(i8 %0) {
4844
define i1 @lt32_u8(i8 %0) {
4945
; CHECK-LABEL: lt32_u8:
5046
; CHECK: // %bb.0:
51-
; CHECK-NEXT: and w8, w0, #0xff
52-
; CHECK-NEXT: cmp w8, #32
53-
; CHECK-NEXT: cset w0, lo
47+
; CHECK-NEXT: tst w0, #0xe0
48+
; CHECK-NEXT: cset w0, eq
5449
; CHECK-NEXT: ret
5550
%2 = icmp ult i8 %0, 32
5651
ret i1 %2
@@ -59,9 +54,8 @@ define i1 @lt32_u8(i8 %0) {
5954
define i1 @lt64_u8(i8 %0) {
6055
; CHECK-LABEL: lt64_u8:
6156
; CHECK: // %bb.0:
62-
; CHECK-NEXT: and w8, w0, #0xff
63-
; CHECK-NEXT: cmp w8, #64
64-
; CHECK-NEXT: cset w0, lo
57+
; CHECK-NEXT: tst w0, #0xc0
58+
; CHECK-NEXT: cset w0, eq
6559
; CHECK-NEXT: ret
6660
%2 = icmp ult i8 %0, 64
6761
ret i1 %2

llvm/test/CodeGen/AArch64/signed-truncation-check.ll

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,8 @@ define i1 @add_ultcmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind {
287287
; CHECK-LABEL: add_ultcmp_bad_i16_i8_add:
288288
; CHECK: // %bb.0:
289289
; CHECK-NEXT: add w8, w0, w1
290-
; CHECK-NEXT: and w8, w8, #0xffff
291-
; CHECK-NEXT: cmp w8, #256
292-
; CHECK-NEXT: cset w0, lo
290+
; CHECK-NEXT: tst w8, #0xff00
291+
; CHECK-NEXT: cset w0, eq
293292
; CHECK-NEXT: ret
294293
%tmp0 = add i16 %x, %y
295294
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -328,9 +327,8 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
328327
; CHECK-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
329328
; CHECK: // %bb.0:
330329
; CHECK-NEXT: add w8, w0, #192
331-
; CHECK-NEXT: and w8, w8, #0xffff
332-
; CHECK-NEXT: cmp w8, #256
333-
; CHECK-NEXT: cset w0, lo
330+
; CHECK-NEXT: tst w8, #0xff00
331+
; CHECK-NEXT: cset w0, eq
334332
; CHECK-NEXT: ret
335333
%tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1))
336334
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -356,9 +354,8 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
356354
; CHECK-LABEL: add_ultcmp_bad_i16_i8_magic:
357355
; CHECK: // %bb.0:
358356
; CHECK-NEXT: add w8, w0, #64
359-
; CHECK-NEXT: and w8, w8, #0xffff
360-
; CHECK-NEXT: cmp w8, #256
361-
; CHECK-NEXT: cset w0, lo
357+
; CHECK-NEXT: tst w8, #0xff00
358+
; CHECK-NEXT: cset w0, eq
362359
; CHECK-NEXT: ret
363360
%tmp0 = add i16 %x, 64 ; 1U << (8-1-1)
364361
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -370,9 +367,8 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
370367
; CHECK-LABEL: add_ultcmp_bad_i16_i4:
371368
; CHECK: // %bb.0:
372369
; CHECK-NEXT: add w8, w0, #8
373-
; CHECK-NEXT: and w8, w8, #0xffff
374-
; CHECK-NEXT: cmp w8, #16
375-
; CHECK-NEXT: cset w0, lo
370+
; CHECK-NEXT: tst w8, #0xfff0
371+
; CHECK-NEXT: cset w0, eq
376372
; CHECK-NEXT: ret
377373
%tmp0 = add i16 %x, 8 ; 1U << (4-1)
378374
%tmp1 = icmp ult i16 %tmp0, 16 ; 1U << 4
@@ -384,9 +380,8 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
384380
; CHECK-LABEL: add_ultcmp_bad_i24_i8:
385381
; CHECK: // %bb.0:
386382
; CHECK-NEXT: add w8, w0, #128
387-
; CHECK-NEXT: and w8, w8, #0xffffff
388-
; CHECK-NEXT: cmp w8, #256
389-
; CHECK-NEXT: cset w0, lo
383+
; CHECK-NEXT: tst w8, #0xffff00
384+
; CHECK-NEXT: cset w0, eq
390385
; CHECK-NEXT: ret
391386
%tmp0 = add i24 %x, 128 ; 1U << (8-1)
392387
%tmp1 = icmp ult i24 %tmp0, 256 ; 1U << 8

0 commit comments

Comments
 (0)