Skip to content

Commit e056129

Browse files
committed
Address comments and fix tests
1 parent 1bf78c8 commit e056129

File tree

2 files changed

+41
-96
lines changed

2 files changed

+41
-96
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 3 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
22
; RUN: opt -S < %s -passes=loop-vectorize -mtriple aarch64-linux-gnu -mattr=+sve 2>&1 | FileCheck %s
33

4-
define void @test(ptr nocapture noundef writeonly %dst, i32 noundef %n, i64 noundef %val) local_unnamed_addr #0 {
5-
; CHECK-LABEL: define void @test
6-
; CHECK-SAME: (ptr nocapture noundef writeonly [[DST:%.*]], i32 noundef [[N:%.*]], i64 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
7-
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[REM:%.*]] = and i32 [[N]], 63
9-
; CHECK-NEXT: [[CMP8_NOT:%.*]] = icmp eq i32 [[REM]], 0
10-
; CHECK-NEXT: br i1 [[CMP8_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
11-
; CHECK: for.body.preheader:
12-
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[REM]], 7
13-
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 3
14-
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SHR]] to i64
15-
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
16-
; CHECK: vector.ph:
17-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
18-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
19-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
20-
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
21-
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
22-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
23-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
24-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
25-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
26-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
27-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
28-
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 8, [[TMP6]]
29-
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 8, [[TMP6]]
30-
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
31-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
32-
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 8 x i64> @llvm.experimental.stepvector.nxv8i64()
33-
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 8 x i64> [[TMP10]], zeroinitializer
34-
; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 8 x i64> [[TMP11]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
35-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP12]]
36-
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
37-
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8
38-
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP14]]
39-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP15]], i64 0
40-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
41-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
42-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
43-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
44-
; CHECK: vector.body:
45-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
46-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
47-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
48-
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0
49-
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]]
50-
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
51-
; CHECK-NEXT: [[TMP18:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP17]]
52-
; CHECK-NEXT: [[TMP19:%.*]] = trunc <vscale x 8 x i64> [[TMP18]] to <vscale x 8 x i8>
53-
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
54-
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP19]], ptr [[TMP20]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
55-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]])
56-
; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
57-
; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 8
58-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]]
59-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
60-
; CHECK-NEXT: [[TMP23:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
61-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
62-
; CHECK: middle.block:
63-
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
64-
; CHECK: scalar.ph:
65-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
66-
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[FOR_BODY_PREHEADER]] ]
67-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
68-
; CHECK: for.body:
69-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
70-
; CHECK-NEXT: [[P_OUT_TAIL_09:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
71-
; CHECK-NEXT: [[TMP24:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3
72-
; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[VAL]], [[TMP24]]
73-
; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[SHR3]] to i8
74-
; CHECK-NEXT: store i8 [[CONV4]], ptr [[P_OUT_TAIL_09]], align 1
75-
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_OUT_TAIL_09]], i64 1
76-
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
77-
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
78-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
79-
; CHECK: for.cond.cleanup.loopexit:
80-
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
81-
; CHECK: for.cond.cleanup:
82-
; CHECK-NEXT: ret void
83-
;
4+
define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val){
5+
; CHECK-LABEL: define void @clamped_tc_8
6+
; CHECK: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> %19, ptr %20, i32 1, <vscale x 8 x i1> %active.lane.mask)
847
entry:
858
%rem = and i32 %n, 63
869
%cmp8.not = icmp eq i32 %rem, 0

llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,32 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran
66
; CHECK-NEXT: entry:
77
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
88
; CHECK: vector.ph:
9+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
10+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4
11+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
12+
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4
13+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1
14+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP4]]
15+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
16+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
917
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1018
; CHECK: vector.body:
1119
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
12-
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
13-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
14-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
15-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
16-
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
17-
; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP2]], align 4
18-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
20+
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0
21+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[TMP5]], i32 4)
22+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]]
23+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
24+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
25+
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
26+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP8]], ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
27+
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
28+
; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4
29+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP10]]
1930
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2031
; CHECK: middle.block:
2132
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
2233
; CHECK: scalar.ph:
23-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
34+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2435
; CHECK-NEXT: br label [[LOOP:%.*]]
2536
; CHECK: loop:
2637
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -56,21 +67,32 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang
5667
; CHECK-NEXT: entry:
5768
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5869
; CHECK: vector.ph:
70+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
71+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4
72+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
73+
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4
74+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1
75+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP4]]
76+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
77+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
5978
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
6079
; CHECK: vector.body:
6180
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
62-
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
63-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
64-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
65-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
66-
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
67-
; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP2]], align 4
68-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
81+
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0
82+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[TMP5]], i32 4)
83+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]]
84+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
85+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
86+
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
87+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP8]], ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
88+
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
89+
; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4
90+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP10]]
6991
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
7092
; CHECK: middle.block:
7193
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
7294
; CHECK: scalar.ph:
73-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
95+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
7496
; CHECK-NEXT: br label [[LOOP:%.*]]
7597
; CHECK: loop:
7698
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]

0 commit comments

Comments
 (0)