Skip to content

Commit 1789534

Browse files
authored
[SelectOpt] Don't convert constant selects to branches. (#110858)
Selects that choose between two constants will be less profitable to turn into branches, especially if the constants can be folded somehow into the surrounding instructions. They will also be cost modelled in a way that can make them over-optimistically converted to branches, as neither branch will have a latency depth but the constants still need to be materialized. This patch disabled selectopt for selects with two constant branches. It is currently in the target independent part, as it sounds generic, but I could move it into AArch64 if needed.
1 parent 0548481 commit 1789534

File tree

2 files changed

+77
-1
lines changed

2 files changed

+77
-1
lines changed

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,9 +407,13 @@ class TargetTransformInfoImplBase {
407407
bool enableSelectOptimize() const { return true; }
408408

409409
bool shouldTreatInstructionLikeSelect(const Instruction *I) {
410+
// A select with two constant operands will usually be better left as a
411+
// select.
412+
using namespace llvm::PatternMatch;
413+
if (match(I, m_Select(m_Value(), m_Constant(), m_Constant())))
414+
return false;
410415
// If the select is a logical-and/logical-or then it is better treated as a
411416
// and/or by the backend.
412-
using namespace llvm::PatternMatch;
413417
return isa<SelectInst>(I) &&
414418
!match(I, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
415419
m_LogicalOr(m_Value(), m_Value())));
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -O3 < %s | FileCheck %s
3+
4+
define i32 @test_const(ptr %in1, ptr %in2, ptr %out, i32 %n, ptr %tbl) {
5+
; CHECK-LABEL: test_const:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: cmp w3, #1
8+
; CHECK-NEXT: b.lt .LBB0_3
9+
; CHECK-NEXT: // %bb.1: // %for.body.preheader
10+
; CHECK-NEXT: mov w9, #1267 // =0x4f3
11+
; CHECK-NEXT: fmov s1, #1.00000000
12+
; CHECK-NEXT: fmov d2, #5.00000000
13+
; CHECK-NEXT: mov w8, w3
14+
; CHECK-NEXT: movk w9, #16309, lsl #16
15+
; CHECK-NEXT: fmov s0, w9
16+
; CHECK-NEXT: .p2align 5, , 16
17+
; CHECK-NEXT: .LBB0_2: // %for.body
18+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
19+
; CHECK-NEXT: ldr s4, [x1], #4
20+
; CHECK-NEXT: ldr w9, [x0], #4
21+
; CHECK-NEXT: add w9, w9, #10
22+
; CHECK-NEXT: scvtf d3, w9
23+
; CHECK-NEXT: fmadd s4, s4, s0, s1
24+
; CHECK-NEXT: fabs s4, s4
25+
; CHECK-NEXT: fcvt d4, s4
26+
; CHECK-NEXT: fdiv d3, d3, d4
27+
; CHECK-NEXT: fcmp d3, d2
28+
; CHECK-NEXT: cset w9, lt
29+
; CHECK-NEXT: subs x8, x8, #1
30+
; CHECK-NEXT: ubfiz x9, x9, #4, #32
31+
; CHECK-NEXT: ldr s3, [x4, x9]
32+
; CHECK-NEXT: fcvtzs w9, s3
33+
; CHECK-NEXT: str w9, [x2], #4
34+
; CHECK-NEXT: b.ne .LBB0_2
35+
; CHECK-NEXT: .LBB0_3: // %for.cond.cleanup
36+
; CHECK-NEXT: mov w0, wzr
37+
; CHECK-NEXT: ret
38+
entry:
39+
%cmp15 = icmp sgt i32 %n, 0
40+
br i1 %cmp15, label %for.body.preheader, label %for.cond.cleanup
41+
42+
for.body.preheader: ; preds = %entry
43+
%wide.trip.count = zext nneg i32 %n to i64
44+
br label %for.body
45+
46+
for.cond.cleanup: ; preds = %for.body, %entry
47+
ret i32 0
48+
49+
for.body: ; preds = %for.body.preheader, %for.body
50+
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
51+
%arrayidx = getelementptr inbounds i32, ptr %in1, i64 %indvars.iv
52+
%0 = load i32, ptr %arrayidx, align 4
53+
%add = add nsw i32 %0, 10
54+
%conv = sitofp i32 %add to double
55+
%arrayidx2 = getelementptr inbounds float, ptr %in2, i64 %indvars.iv
56+
%1 = load float, ptr %arrayidx2, align 4
57+
%mul = fmul fast float %1, 0x3FF6A09E60000000
58+
%add3 = fadd fast float %mul, 1.000000e+00
59+
%2 = tail call fast float @llvm.fabs.f32(float %add3)
60+
%3 = fpext float %2 to double
61+
%div = fdiv fast double %conv, %3
62+
%cmp5 = fcmp fast olt double %div, 5.000000e+00
63+
%idxprom6 = select i1 %cmp5, i64 4, i64 0
64+
%arrayidx7 = getelementptr inbounds float, ptr %tbl, i64 %idxprom6
65+
%4 = load float, ptr %arrayidx7, align 4
66+
%conv8 = fptosi float %4 to i32
67+
%arrayidx10 = getelementptr inbounds i32, ptr %out, i64 %indvars.iv
68+
store i32 %conv8, ptr %arrayidx10, align 4
69+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
70+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
71+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
72+
}

0 commit comments

Comments
 (0)