@@ -453,7 +453,7 @@ define void @non_branch_terminator(ptr %a) {
453
453
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
454
454
; CHECK-NEXT: [[LSR_IV_NEXT3]] = add nsw i64 [[LSR_IV2]], -1
455
455
; CHECK-NEXT: switch i64 [[LSR_IV2]], label [[FOR_BODY]] [
456
- ; CHECK-NEXT: i64 0, label [[FOR_END:%.*]]
456
+ ; CHECK-NEXT: i64 0, label [[FOR_END:%.*]]
457
457
; CHECK-NEXT: ]
458
458
; CHECK: for.end:
459
459
; CHECK-NEXT: ret void
@@ -473,3 +473,117 @@ for.body: ; preds = %for.body, %entry
473
473
for.end: ; preds = %for.body
474
474
ret void
475
475
}
476
+
477
+ define void @expensive_expand_short_tc (ptr %a , i32 %offset , i32 %n ) {
478
+ ; CHECK-LABEL: @expensive_expand_short_tc(
479
+ ; CHECK-NEXT: entry:
480
+ ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
481
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
482
+ ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
483
+ ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
484
+ ; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[OFFSET:%.*]] to i64
485
+ ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
486
+ ; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 84
487
+ ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
488
+ ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
489
+ ; CHECK: for.body:
490
+ ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
491
+ ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
492
+ ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET]]
493
+ ; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
494
+ ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !prof [[PROF0:![0-9]+]]
495
+ ; CHECK: for.end:
496
+ ; CHECK-NEXT: ret void
497
+ ;
498
+ entry:
499
+ %uglygep = getelementptr i8 , ptr %a , i64 84
500
+ br label %for.body
501
+
502
+ for.body: ; preds = %for.body, %entry
503
+ %lsr.iv1 = phi ptr [ %uglygep2 , %for.body ], [ %uglygep , %entry ]
504
+ %lsr.iv = phi i32 [ %lsr.iv.next , %for.body ], [ 0 , %entry ]
505
+ store i32 1 , ptr %lsr.iv1 , align 4
506
+ %lsr.iv.next = add nsw i32 %lsr.iv , 1
507
+ %uglygep2 = getelementptr i8 , ptr %lsr.iv1 , i32 %offset
508
+ %exitcond.not = icmp eq i32 %lsr.iv.next , %n
509
+ br i1 %exitcond.not , label %for.end , label %for.body , !prof !{!"branch_weights" , i32 1 , i32 3 }
510
+
511
+ for.end: ; preds = %for.body
512
+ ret void
513
+ }
514
+
515
+ define void @expensive_expand_long_tc (ptr %a , i32 %offset , i32 %n ) {
516
+ ; CHECK-LABEL: @expensive_expand_long_tc(
517
+ ; CHECK-NEXT: entry:
518
+ ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
519
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
520
+ ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
521
+ ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
522
+ ; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[OFFSET:%.*]] to i64
523
+ ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
524
+ ; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 84
525
+ ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
526
+ ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
527
+ ; CHECK: for.body:
528
+ ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
529
+ ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
530
+ ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET]]
531
+ ; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
532
+ ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !prof [[PROF1:![0-9]+]]
533
+ ; CHECK: for.end:
534
+ ; CHECK-NEXT: ret void
535
+ ;
536
+ entry:
537
+ %uglygep = getelementptr i8 , ptr %a , i64 84
538
+ br label %for.body
539
+
540
+ for.body: ; preds = %for.body, %entry
541
+ %lsr.iv1 = phi ptr [ %uglygep2 , %for.body ], [ %uglygep , %entry ]
542
+ %lsr.iv = phi i32 [ %lsr.iv.next , %for.body ], [ 0 , %entry ]
543
+ store i32 1 , ptr %lsr.iv1 , align 4
544
+ %lsr.iv.next = add nsw i32 %lsr.iv , 1
545
+ %uglygep2 = getelementptr i8 , ptr %lsr.iv1 , i32 %offset
546
+ %exitcond.not = icmp eq i32 %lsr.iv.next , %n
547
+ br i1 %exitcond.not , label %for.end , label %for.body , !prof !{!"branch_weights" , i32 1 , i32 300 }
548
+
549
+ for.end: ; preds = %for.body
550
+ ret void
551
+ }
552
+
553
+ define void @expensive_expand_unknown_tc (ptr %a , i32 %offset , i32 %n ) {
554
+ ; CHECK-LABEL: @expensive_expand_unknown_tc(
555
+ ; CHECK-NEXT: entry:
556
+ ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
557
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
558
+ ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
559
+ ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
560
+ ; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[OFFSET:%.*]] to i64
561
+ ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
562
+ ; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 84
563
+ ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
564
+ ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
565
+ ; CHECK: for.body:
566
+ ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
567
+ ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
568
+ ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET]]
569
+ ; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
570
+ ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
571
+ ; CHECK: for.end:
572
+ ; CHECK-NEXT: ret void
573
+ ;
574
+ entry:
575
+ %uglygep = getelementptr i8 , ptr %a , i64 84
576
+ br label %for.body
577
+
578
+ for.body: ; preds = %for.body, %entry
579
+ %lsr.iv1 = phi ptr [ %uglygep2 , %for.body ], [ %uglygep , %entry ]
580
+ %lsr.iv = phi i32 [ %lsr.iv.next , %for.body ], [ 0 , %entry ]
581
+ store i32 1 , ptr %lsr.iv1 , align 4
582
+ %lsr.iv.next = add nsw i32 %lsr.iv , 1
583
+ %uglygep2 = getelementptr i8 , ptr %lsr.iv1 , i32 %offset
584
+ %exitcond.not = icmp eq i32 %lsr.iv.next , %n
585
+ br i1 %exitcond.not , label %for.end , label %for.body
586
+
587
+ for.end: ; preds = %for.body
588
+ ret void
589
+ }
0 commit comments