Skip to content

Commit 577c7dd

Browse files
committed
[AArch64] Add a phase-ordering test for vectorizing predicated selects. NFC
1 parent 800a47d commit 577c7dd

File tree

1 file changed

+294
-0
lines changed

1 file changed

+294
-0
lines changed
Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes="default<O3>" -S < %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
5+
target triple = "aarch64"
6+
7+
define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef %RAND_BLOCK_LENGTH, ptr noundef %samples, double noundef nofpclass(nan inf) %Y, double noundef nofpclass(nan inf) %Z) {
8+
; CHECK-LABEL: define nofpclass(nan inf) double @monte_simple(
9+
; CHECK-SAME: i32 noundef [[NBLOCKS:%.*]], i32 noundef [[RAND_BLOCK_LENGTH:%.*]], ptr nocapture noundef readonly [[SAMPLES:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: [[ENTRY:.*]]:
11+
; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[RAND_BLOCK_LENGTH]], 0
12+
; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
13+
; CHECK: [[FOR_BODY_PREHEADER]]:
14+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
15+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
16+
; CHECK: [[FOR_BODY]]:
17+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
18+
; CHECK-NEXT: [[V1_011:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V1_1:%.*]], %[[FOR_BODY]] ]
19+
; CHECK-NEXT: [[V0_010:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V0_1:%.*]], %[[FOR_BODY]] ]
20+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
21+
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
22+
; CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
23+
; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[Y]], [[CONV]]
24+
; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]]
25+
; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00
26+
; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[SUB]], [[V0_010]]
27+
; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]]
28+
; CHECK-NEXT: [[ADD4:%.*]] = fadd fast double [[MUL3]], [[V1_011]]
29+
; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], double [[ADD]], double [[V0_010]]
30+
; CHECK-NEXT: [[V1_1]] = select i1 [[CMP1]], double [[ADD4]], double [[V1_011]]
31+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
32+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
33+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
34+
; CHECK: [[FOR_END_LOOPEXIT]]:
35+
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]]
36+
; CHECK-NEXT: br label %[[FOR_END]]
37+
; CHECK: [[FOR_END]]:
38+
; CHECK-NEXT: [[ADD5:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[FOR_END_LOOPEXIT]] ]
39+
; CHECK-NEXT: ret double [[ADD5]]
40+
;
41+
entry:
42+
%nblocks.addr = alloca i32, align 4
43+
%RAND_BLOCK_LENGTH.addr = alloca i32, align 4
44+
%samples.addr = alloca ptr, align 8
45+
%Y.addr = alloca double, align 8
46+
%Z.addr = alloca double, align 8
47+
%i = alloca i32, align 4
48+
%block = alloca i32, align 4
49+
%rngVal = alloca double, align 8
50+
%callValue = alloca double, align 8
51+
%v0 = alloca double, align 8
52+
%v1 = alloca double, align 8
53+
store i32 %nblocks, ptr %nblocks.addr, align 4
54+
store i32 %RAND_BLOCK_LENGTH, ptr %RAND_BLOCK_LENGTH.addr, align 4
55+
store ptr %samples, ptr %samples.addr, align 8
56+
store double %Y, ptr %Y.addr, align 8
57+
store double %Z, ptr %Z.addr, align 8
58+
call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2
59+
call void @llvm.lifetime.start.p0(i64 4, ptr %block) #2
60+
call void @llvm.lifetime.start.p0(i64 8, ptr %rngVal) #2
61+
call void @llvm.lifetime.start.p0(i64 8, ptr %callValue) #2
62+
call void @llvm.lifetime.start.p0(i64 8, ptr %v0) #2
63+
store double 0.000000e+00, ptr %v0, align 8
64+
call void @llvm.lifetime.start.p0(i64 8, ptr %v1) #2
65+
store double 0.000000e+00, ptr %v1, align 8
66+
store i32 0, ptr %i, align 4
67+
br label %for.cond
68+
69+
for.cond: ; preds = %for.inc, %entry
70+
%0 = load i32, ptr %i, align 4
71+
%1 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4
72+
%cmp = icmp slt i32 %0, %1
73+
br i1 %cmp, label %for.body, label %for.end
74+
75+
for.body: ; preds = %for.cond
76+
%2 = load ptr, ptr %samples.addr, align 8
77+
%3 = load i32, ptr %i, align 4
78+
%idxprom = sext i32 %3 to i64
79+
%arrayidx = getelementptr inbounds float, ptr %2, i64 %idxprom
80+
%4 = load float, ptr %arrayidx, align 4
81+
%conv = fpext float %4 to double
82+
store double %conv, ptr %rngVal, align 8
83+
%5 = load double, ptr %Y.addr, align 8
84+
%6 = load double, ptr %rngVal, align 8
85+
%mul = fmul fast double %5, %6
86+
%7 = load double, ptr %Z.addr, align 8
87+
%sub = fsub fast double %mul, %7
88+
store double %sub, ptr %callValue, align 8
89+
%8 = load double, ptr %callValue, align 8
90+
%cmp1 = fcmp fast ogt double %8, 0.000000e+00
91+
br i1 %cmp1, label %if.then, label %if.end
92+
93+
if.then: ; preds = %for.body
94+
%9 = load double, ptr %callValue, align 8
95+
%10 = load double, ptr %v0, align 8
96+
%add = fadd fast double %10, %9
97+
store double %add, ptr %v0, align 8
98+
%11 = load double, ptr %callValue, align 8
99+
%12 = load double, ptr %callValue, align 8
100+
%mul3 = fmul fast double %11, %12
101+
%13 = load double, ptr %v1, align 8
102+
%add4 = fadd fast double %13, %mul3
103+
store double %add4, ptr %v1, align 8
104+
br label %if.end
105+
106+
if.end: ; preds = %if.then, %for.body
107+
br label %for.inc
108+
109+
for.inc: ; preds = %if.end
110+
%14 = load i32, ptr %i, align 4
111+
%inc = add nsw i32 %14, 1
112+
store i32 %inc, ptr %i, align 4
113+
br label %for.cond
114+
115+
for.end: ; preds = %for.cond
116+
%15 = load double, ptr %v0, align 8
117+
%16 = load double, ptr %v1, align 8
118+
%add5 = fadd fast double %15, %16
119+
call void @llvm.lifetime.end.p0(i64 8, ptr %v1) #2
120+
call void @llvm.lifetime.end.p0(i64 8, ptr %v0) #2
121+
call void @llvm.lifetime.end.p0(i64 8, ptr %callValue) #2
122+
call void @llvm.lifetime.end.p0(i64 8, ptr %rngVal) #2
123+
call void @llvm.lifetime.end.p0(i64 4, ptr %block) #2
124+
call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2
125+
ret double %add5
126+
}
127+
128+
define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %RAND_BLOCK_LENGTH, ptr noundef %samples, double noundef nofpclass(nan inf) %Y, double noundef nofpclass(nan inf) %Z) {
129+
; CHECK-LABEL: define nofpclass(nan inf) double @monte_exp(
130+
; CHECK-SAME: i32 noundef [[NBLOCKS:%.*]], i32 noundef [[RAND_BLOCK_LENGTH:%.*]], ptr noundef [[SAMPLES:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr {
131+
; CHECK-NEXT: [[ENTRY:.*]]:
132+
; CHECK-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[NBLOCKS]], 0
133+
; CHECK-NEXT: br i1 [[CMP16]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END10:.*]]
134+
; CHECK: [[FOR_BODY_LR_PH]]:
135+
; CHECK-NEXT: [[CMP211:%.*]] = icmp sgt i32 [[RAND_BLOCK_LENGTH]], 0
136+
; CHECK-NEXT: br i1 [[CMP211]], label %[[FOR_BODY_US_PREHEADER:.*]], label %[[FOR_BODY:.*]]
137+
; CHECK: [[FOR_BODY_US_PREHEADER]]:
138+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
139+
; CHECK-NEXT: br label %[[FOR_BODY_US:.*]]
140+
; CHECK: [[FOR_BODY_US]]:
141+
; CHECK-NEXT: [[V1_019_US:%.*]] = phi double [ [[V1_2_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US:.*]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ]
142+
; CHECK-NEXT: [[V0_018_US:%.*]] = phi double [ [[V0_2_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ]
143+
; CHECK-NEXT: [[BLOCK_017_US:%.*]] = phi i32 [ [[INC9_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0, %[[FOR_BODY_US_PREHEADER]] ]
144+
; CHECK-NEXT: tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]])
145+
; CHECK-NEXT: br label %[[FOR_BODY3_US:.*]]
146+
; CHECK: [[FOR_BODY3_US]]:
147+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_US]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY3_US]] ]
148+
; CHECK-NEXT: [[V1_114_US:%.*]] = phi double [ [[V1_019_US]], %[[FOR_BODY_US]] ], [ [[V1_2_US]], %[[FOR_BODY3_US]] ]
149+
; CHECK-NEXT: [[V0_113_US:%.*]] = phi double [ [[V0_018_US]], %[[FOR_BODY_US]] ], [ [[V0_2_US]], %[[FOR_BODY3_US]] ]
150+
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
151+
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_US]], align 4
152+
; CHECK-NEXT: [[CONV_US:%.*]] = fpext float [[TMP0]] to double
153+
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast double @llvm.exp2.f64(double [[CONV_US]])
154+
; CHECK-NEXT: [[MUL_US:%.*]] = fmul fast double [[TMP1]], [[Y]]
155+
; CHECK-NEXT: [[SUB_US:%.*]] = fsub fast double [[MUL_US]], [[Z]]
156+
; CHECK-NEXT: [[CMP4_US:%.*]] = fcmp fast ogt double [[SUB_US]], 0.000000e+00
157+
; CHECK-NEXT: [[ADD_US:%.*]] = fadd fast double [[SUB_US]], [[V0_113_US]]
158+
; CHECK-NEXT: [[MUL6_US:%.*]] = fmul fast double [[SUB_US]], [[SUB_US]]
159+
; CHECK-NEXT: [[ADD7_US:%.*]] = fadd fast double [[MUL6_US]], [[V1_114_US]]
160+
; CHECK-NEXT: [[V0_2_US]] = select i1 [[CMP4_US]], double [[ADD_US]], double [[V0_113_US]]
161+
; CHECK-NEXT: [[V1_2_US]] = select i1 [[CMP4_US]], double [[ADD7_US]], double [[V1_114_US]]
162+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
163+
; CHECK-NEXT: [[EXITCOND25_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
164+
; CHECK-NEXT: br i1 [[EXITCOND25_NOT]], label %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]], label %[[FOR_BODY3_US]]
165+
; CHECK: [[FOR_COND1_FOR_INC8_CRIT_EDGE_US]]:
166+
; CHECK-NEXT: [[INC9_US]] = add nuw nsw i32 [[BLOCK_017_US]], 1
167+
; CHECK-NEXT: [[EXITCOND26_NOT:%.*]] = icmp eq i32 [[INC9_US]], [[NBLOCKS]]
168+
; CHECK-NEXT: br i1 [[EXITCOND26_NOT]], label %[[FOR_END10]], label %[[FOR_BODY_US]]
169+
; CHECK: [[FOR_BODY]]:
170+
; CHECK-NEXT: [[BLOCK_017:%.*]] = phi i32 [ [[INC9:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_LR_PH]] ]
171+
; CHECK-NEXT: tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]])
172+
; CHECK-NEXT: [[INC9]] = add nuw nsw i32 [[BLOCK_017]], 1
173+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC9]], [[NBLOCKS]]
174+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END10]], label %[[FOR_BODY]]
175+
; CHECK: [[FOR_END10]]:
176+
; CHECK-NEXT: [[V0_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[V0_2_US]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY]] ]
177+
; CHECK-NEXT: [[V1_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[V1_2_US]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY]] ]
178+
; CHECK-NEXT: [[ADD11:%.*]] = fadd fast double [[V1_0_LCSSA]], [[V0_0_LCSSA]]
179+
; CHECK-NEXT: ret double [[ADD11]]
180+
;
181+
entry:
182+
%nblocks.addr = alloca i32, align 4
183+
%RAND_BLOCK_LENGTH.addr = alloca i32, align 4
184+
%samples.addr = alloca ptr, align 8
185+
%Y.addr = alloca double, align 8
186+
%Z.addr = alloca double, align 8
187+
%i = alloca i32, align 4
188+
%block = alloca i32, align 4
189+
%rngVal = alloca double, align 8
190+
%callValue = alloca double, align 8
191+
%v0 = alloca double, align 8
192+
%v1 = alloca double, align 8
193+
store i32 %nblocks, ptr %nblocks.addr, align 4
194+
store i32 %RAND_BLOCK_LENGTH, ptr %RAND_BLOCK_LENGTH.addr, align 4
195+
store ptr %samples, ptr %samples.addr, align 8
196+
store double %Y, ptr %Y.addr, align 8
197+
store double %Z, ptr %Z.addr, align 8
198+
call void @llvm.lifetime.start.p0(i64 4, ptr %i) #4
199+
call void @llvm.lifetime.start.p0(i64 4, ptr %block) #4
200+
call void @llvm.lifetime.start.p0(i64 8, ptr %rngVal) #4
201+
call void @llvm.lifetime.start.p0(i64 8, ptr %callValue) #4
202+
call void @llvm.lifetime.start.p0(i64 8, ptr %v0) #4
203+
store double 0.000000e+00, ptr %v0, align 8
204+
call void @llvm.lifetime.start.p0(i64 8, ptr %v1) #4
205+
store double 0.000000e+00, ptr %v1, align 8
206+
store i32 0, ptr %block, align 4
207+
br label %for.cond
208+
209+
for.cond: ; preds = %for.inc8, %entry
210+
%0 = load i32, ptr %block, align 4
211+
%1 = load i32, ptr %nblocks.addr, align 4
212+
%cmp = icmp slt i32 %0, %1
213+
br i1 %cmp, label %for.body, label %for.end10
214+
215+
for.body: ; preds = %for.cond
216+
%2 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4
217+
%3 = load ptr, ptr %samples.addr, align 8
218+
call void @resample(i32 noundef %2, ptr noundef %3)
219+
store i32 0, ptr %i, align 4
220+
br label %for.cond1
221+
222+
for.cond1: ; preds = %for.inc, %for.body
223+
%4 = load i32, ptr %i, align 4
224+
%5 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4
225+
%cmp2 = icmp slt i32 %4, %5
226+
br i1 %cmp2, label %for.body3, label %for.end
227+
228+
for.body3: ; preds = %for.cond1
229+
%6 = load ptr, ptr %samples.addr, align 8
230+
%7 = load i32, ptr %i, align 4
231+
%idxprom = sext i32 %7 to i64
232+
%arrayidx = getelementptr inbounds float, ptr %6, i64 %idxprom
233+
%8 = load float, ptr %arrayidx, align 4
234+
%conv = fpext float %8 to double
235+
store double %conv, ptr %rngVal, align 8
236+
%9 = load double, ptr %Y.addr, align 8
237+
%10 = load double, ptr %rngVal, align 8
238+
%11 = call fast double @llvm.exp2.f64(double %10)
239+
%mul = fmul fast double %9, %11
240+
%12 = load double, ptr %Z.addr, align 8
241+
%sub = fsub fast double %mul, %12
242+
store double %sub, ptr %callValue, align 8
243+
%13 = load double, ptr %callValue, align 8
244+
%cmp4 = fcmp fast ogt double %13, 0.000000e+00
245+
br i1 %cmp4, label %if.then, label %if.end
246+
247+
if.then: ; preds = %for.body3
248+
%14 = load double, ptr %callValue, align 8
249+
%15 = load double, ptr %v0, align 8
250+
%add = fadd fast double %15, %14
251+
store double %add, ptr %v0, align 8
252+
%16 = load double, ptr %callValue, align 8
253+
%17 = load double, ptr %callValue, align 8
254+
%mul6 = fmul fast double %16, %17
255+
%18 = load double, ptr %v1, align 8
256+
%add7 = fadd fast double %18, %mul6
257+
store double %add7, ptr %v1, align 8
258+
br label %if.end
259+
260+
if.end: ; preds = %if.then, %for.body3
261+
br label %for.inc
262+
263+
for.inc: ; preds = %if.end
264+
%19 = load i32, ptr %i, align 4
265+
%inc = add nsw i32 %19, 1
266+
store i32 %inc, ptr %i, align 4
267+
br label %for.cond1
268+
269+
for.end: ; preds = %for.cond1
270+
br label %for.inc8
271+
272+
for.inc8: ; preds = %for.end
273+
%20 = load i32, ptr %block, align 4
274+
%inc9 = add nsw i32 %20, 1
275+
store i32 %inc9, ptr %block, align 4
276+
br label %for.cond
277+
278+
for.end10: ; preds = %for.cond
279+
%21 = load double, ptr %v0, align 8
280+
%22 = load double, ptr %v1, align 8
281+
%add11 = fadd fast double %21, %22
282+
call void @llvm.lifetime.end.p0(i64 8, ptr %v1) #4
283+
call void @llvm.lifetime.end.p0(i64 8, ptr %v0) #4
284+
call void @llvm.lifetime.end.p0(i64 8, ptr %callValue) #4
285+
call void @llvm.lifetime.end.p0(i64 8, ptr %rngVal) #4
286+
call void @llvm.lifetime.end.p0(i64 4, ptr %block) #4
287+
call void @llvm.lifetime.end.p0(i64 4, ptr %i) #4
288+
ret double %add11
289+
}
290+
291+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
292+
declare void @resample(i32 noundef, ptr noundef)
293+
declare double @llvm.exp2.f64(double)
294+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)

0 commit comments

Comments
 (0)