Skip to content

Commit 390d66b

Browse files
committed
[LSR] Add tests for restricting term-fold budget based on exact trip count
1 parent 6050cf2 commit 390d66b

File tree

1 file changed

+121
-8
lines changed

1 file changed

+121
-8
lines changed

llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll

+121-8
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,11 @@ for.end: ; preds = %for.body
474474
ret void
475475
}
476476

477-
define void @expensive_expand_short_tc(ptr %a, i32 %offset, i32 %n) {
478-
; CHECK-LABEL: @expensive_expand_short_tc(
477+
;; The next step of tests exercise various cases with the expansion
478+
;; budget and different trip counts or estimated trip counts.
479+
480+
define void @profiled_short_tc(ptr %a, i32 %offset, i32 %n) {
481+
; CHECK-LABEL: @profiled_short_tc(
479482
; CHECK-NEXT: entry:
480483
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
481484
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
@@ -514,8 +517,8 @@ for.end: ; preds = %for.body
514517
ret void
515518
}
516519

517-
define void @expensive_expand_long_tc(ptr %a, i32 %offset, i32 %n) {
518-
; CHECK-LABEL: @expensive_expand_long_tc(
520+
define void @profiled_long_tc(ptr %a, i32 %offset, i32 %n) {
521+
; CHECK-LABEL: @profiled_long_tc(
519522
; CHECK-NEXT: entry:
520523
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
521524
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
@@ -554,8 +557,8 @@ for.end: ; preds = %for.body
554557
ret void
555558
}
556559

557-
define void @expensive_expand_unknown_tc(ptr %a, i32 %offset, i32 %n) {
558-
; CHECK-LABEL: @expensive_expand_unknown_tc(
560+
define void @unknown_tc(ptr %a, i32 %offset, i32 %n) {
561+
; CHECK-LABEL: @unknown_tc(
559562
; CHECK-NEXT: entry:
560563
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
561564
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
@@ -594,8 +597,8 @@ for.end: ; preds = %for.body
594597
ret void
595598
}
596599

597-
define void @expensive_expand_unknown_tc2(ptr %a, i32 %offset, i32 %n, i32 %step) mustprogress {
598-
; CHECK-LABEL: @expensive_expand_unknown_tc2(
600+
define void @unknown_tc2(ptr %a, i32 %offset, i32 %n, i32 %step) mustprogress {
601+
; CHECK-LABEL: @unknown_tc2(
599602
; CHECK-NEXT: entry:
600603
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
601604
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
@@ -628,3 +631,113 @@ for.body: ; preds = %for.body, %entry
628631
for.end: ; preds = %for.body
629632
ret void
630633
}
634+
635+
define void @small_tc_trivial_loop(ptr %a, i32 %offset) {
636+
; CHECK-LABEL: @small_tc_trivial_loop(
637+
; CHECK-NEXT: entry:
638+
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
639+
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
640+
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
641+
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 84
642+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
643+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
644+
; CHECK: for.body:
645+
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
646+
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
647+
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
648+
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
649+
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
650+
; CHECK: for.end:
651+
; CHECK-NEXT: ret void
652+
;
653+
entry:
654+
%offset.nonzero = or i32 %offset, 1
655+
%uglygep = getelementptr i8, ptr %a, i64 84
656+
br label %for.body
657+
658+
for.body: ; preds = %for.body, %entry
659+
%lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ]
660+
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 0, %entry ]
661+
store i32 1, ptr %lsr.iv1, align 4
662+
%lsr.iv.next = add nsw i32 %lsr.iv, 1
663+
%uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 %offset.nonzero
664+
%exitcond.not = icmp eq i32 %lsr.iv.next, 1
665+
br i1 %exitcond.not, label %for.end, label %for.body
666+
667+
for.end: ; preds = %for.body
668+
ret void
669+
}
670+
671+
define void @small_tc_below_threshold(ptr %a, i32 %offset) {
672+
; CHECK-LABEL: @small_tc_below_threshold(
673+
; CHECK-NEXT: entry:
674+
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
675+
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
676+
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
677+
; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1
678+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 84
679+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
680+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
681+
; CHECK: for.body:
682+
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
683+
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
684+
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
685+
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
686+
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
687+
; CHECK: for.end:
688+
; CHECK-NEXT: ret void
689+
;
690+
entry:
691+
%offset.nonzero = or i32 %offset, 1
692+
%uglygep = getelementptr i8, ptr %a, i64 84
693+
br label %for.body
694+
695+
for.body: ; preds = %for.body, %entry
696+
%lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ]
697+
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 0, %entry ]
698+
store i32 1, ptr %lsr.iv1, align 4
699+
%lsr.iv.next = add nsw i32 %lsr.iv, 1
700+
%uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 %offset.nonzero
701+
%exitcond.not = icmp eq i32 %lsr.iv.next, 2
702+
br i1 %exitcond.not, label %for.end, label %for.body
703+
704+
for.end: ; preds = %for.body
705+
ret void
706+
}
707+
708+
define void @small_tc_above_threshold(ptr %a, i32 %offset) {
709+
; CHECK-LABEL: @small_tc_above_threshold(
710+
; CHECK-NEXT: entry:
711+
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
712+
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
713+
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
714+
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i64 [[TMP0]], 10
715+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 84
716+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
717+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
718+
; CHECK: for.body:
719+
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
720+
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
721+
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
722+
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
723+
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
724+
; CHECK: for.end:
725+
; CHECK-NEXT: ret void
726+
;
727+
entry:
728+
%offset.nonzero = or i32 %offset, 1
729+
%uglygep = getelementptr i8, ptr %a, i64 84
730+
br label %for.body
731+
732+
for.body: ; preds = %for.body, %entry
733+
%lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ]
734+
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 0, %entry ]
735+
store i32 1, ptr %lsr.iv1, align 4
736+
%lsr.iv.next = add nsw i32 %lsr.iv, 1
737+
%uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 %offset.nonzero
738+
%exitcond.not = icmp eq i32 %lsr.iv.next, 10
739+
br i1 %exitcond.not, label %for.end, label %for.body
740+
741+
for.end: ; preds = %for.body
742+
ret void
743+
}

0 commit comments

Comments
 (0)