diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 09e5c080c19cf..d5db3263294a6 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -10129,8 +10129,11 @@ const SCEV *ScalarEvolution::stripInjectiveFunctions(const SCEV *S) const { /// A and B isn't important. /// /// If the equation does not have a solution, SCEVCouldNotCompute is returned. -static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, - ScalarEvolution &SE) { +static const SCEV * +SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, + SmallPtrSetImpl *Predicates, + + ScalarEvolution &SE) { uint32_t BW = A.getBitWidth(); assert(BW == SE.getTypeSizeInBits(B->getType())); assert(A != 0 && "A must be non-zero."); @@ -10146,8 +10149,22 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, // // B is divisible by D if and only if the multiplicity of prime factor 2 for B // is not less than multiplicity of this prime factor for D. - if (SE.getMinTrailingZeros(B) < Mult2) - return SE.getCouldNotCompute(); + if (SE.getMinTrailingZeros(B) < Mult2) { + // Check if we can prove there's no remainder using URem. + const SCEV *URem = + SE.getURemExpr(B, SE.getConstant(APInt::getOneBitSet(BW, Mult2))); + const SCEV *Zero = SE.getZero(B->getType()); + if (!SE.isKnownPredicate(CmpInst::ICMP_EQ, URem, Zero)) { + // Try to add a predicate ensuring B is a multiple of 1 << Mult2. + if (!Predicates) + return SE.getCouldNotCompute(); + + // Avoid adding a predicate that is known to be false. + if (SE.isKnownPredicate(CmpInst::ICMP_NE, URem, Zero)) + return SE.getCouldNotCompute(); + Predicates->insert(SE.getEqualPredicate(URem, Zero)); + } + } // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic // modulo (N / D). @@ -10577,8 +10594,9 @@ ScalarEvolution::ExitLimit ScalarEvolution::howFarToZero(const SCEV *V, // Solve the general equation. if (!StepC || StepC->getValue()->isZero()) return getCouldNotCompute(); - const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(), - getNegativeSCEV(Start), *this); + const SCEV *E = SolveLinEquationWithOverflow( + StepC->getAPInt(), getNegativeSCEV(Start), + AllowPredicates ? &Predicates : nullptr, *this); const SCEV *M = E; if (E != getCouldNotCompute()) { diff --git a/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll b/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll index 3022281658a75..bb97005e8faf4 100644 --- a/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll +++ b/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll @@ -58,6 +58,15 @@ define void @test_well_defined_infinite_st(i32 %N) mustprogress { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-2 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i32 2147483647 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((-2 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 ; entry: br label %for.body @@ -79,6 +88,15 @@ define void @test_well_defined_infinite_ld(i32 %N) mustprogress { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-2 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i32 2147483647 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((-2 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 ; entry: br label %for.body @@ -100,6 +118,15 @@ define void @test_no_mustprogress(i32 %N) { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-2 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i32 2147483647 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((-2 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 ; entry: br label %for.body @@ -187,6 +214,15 @@ define void @test_abnormal_exit(i32 %N) mustprogress { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-2 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i32 2147483647 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((-2 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 ; entry: br label %for.body @@ -209,10 +245,24 @@ define void @test_other_exit(i32 %N) mustprogress { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: exit count for for.body: i32 9 ; CHECK-NEXT: exit count for for.latch: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: predicated exit count for for.latch: ((-2 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-EMPTY: ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 9 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is i32 9 ; CHECK-NEXT: symbolic max exit count for for.body: i32 9 ; CHECK-NEXT: symbolic max exit count for for.latch: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: predicated symbolic max exit count for for.latch: ((-2 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-EMPTY: +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (9 umin ((-2 + %N) /u 2)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (9 umin ((-2 + %N) /u 2)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i32 %N to i1) to i32) == 0 ; entry: br label %for.body @@ -267,6 +317,18 @@ define void @test_sext(i64 %N) mustprogress { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (%N /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 %N to i1) to i64) == 0 +; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i64 9223372036854775807 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 %N to i1) to i64) == 0 +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (%N /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 %N to i1) to i64) == 0 ; entry: br label %for.body @@ -288,6 +350,21 @@ define void @test_zext_of_sext(i64 %N) mustprogress { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (%N /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 %N to i1) to i64) == 0 +; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i64 9223372036854775807 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 %N to i1) to i64) == 0 +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (%N /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 %N to i1) to i64) == 0 ; entry: br label %for.body @@ -310,6 +387,18 @@ define void @test_zext_offset(i64 %N) mustprogress { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-21 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (true + (trunc i64 %N to i1)) to i64) == 0 +; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i64 9223372036854775807 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (true + (trunc i64 %N to i1)) to i64) == 0 +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((-21 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (true + (trunc i64 %N to i1)) to i64) == 0 ; entry: br label %for.body @@ -332,6 +421,18 @@ define void @test_sext_offset(i64 %N) mustprogress { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-21 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (true + (trunc i64 %N to i1)) to i64) == 0 +; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i64 9223372036854775807 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (true + (trunc i64 %N to i1)) to i64) == 0 +; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((-21 + %N) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,2}<%for.body> Added Flags: +; CHECK-NEXT: Equal predicate: (zext i1 (true + (trunc i64 %N to i1)) to i64) == 0 ; entry: br label %for.body diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/predicated-max-backedge-taken-count-guard-info.ll index ba3a5b4cfb3ae..1805b983c8e2e 100644 --- a/llvm/test/Analysis/ScalarEvolution/predicated-max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/predicated-max-backedge-taken-count-guard-info.ll @@ -7,6 +7,15 @@ define i32 @ptr_induction_ult(ptr %a, ptr %b) { ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (((-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 4) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %b to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %a to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %b to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %a to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is (((-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 4) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %b to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %a to i64) to i2))) to i64) == 0 ; entry: %cmp.6 = icmp ult ptr %a, %b @@ -28,6 +37,15 @@ define i32 @ptr_induction_ult_3_step_6(ptr %a, ptr %b) { ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (((3074457345618258603 * (ptrtoint ptr %b to i64)) + (-3074457345618258603 * (ptrtoint ptr %a to i64))) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 ((-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) to i1) to i64) == 0 +; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 9223372036854775807 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 ((-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) to i1) to i64) == 0 +; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is (((3074457345618258603 * (ptrtoint ptr %b to i64)) + (-3074457345618258603 * (ptrtoint ptr %a to i64))) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 ((-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) to i1) to i64) == 0 ; entry: %cmp.6 = icmp ult ptr %a, %b @@ -74,10 +92,24 @@ define void @ptr_induction_early_exit_eq_1(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: exit count for loop.inc: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: predicated exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 8) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i3 ((trunc i64 (ptrtoint ptr %b to i64) to i3) + (-1 * (trunc i64 (ptrtoint ptr %a to i64) to i3))) to i64) == 0 +; CHECK-EMPTY: ; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. ; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: symbolic max exit count for loop.inc: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: predicated symbolic max exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 8) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i3 ((trunc i64 (ptrtoint ptr %b to i64) to i3) + (-1 * (trunc i64 (ptrtoint ptr %a to i64) to i3))) to i64) == 0 +; CHECK-EMPTY: +; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 2305843009213693951 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i3 ((trunc i64 (ptrtoint ptr %b to i64) to i3) + (-1 * (trunc i64 (ptrtoint ptr %a to i64) to i3))) to i64) == 0 +; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-8 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 8) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i3 ((trunc i64 (ptrtoint ptr %b to i64) to i3) + (-1 * (trunc i64 (ptrtoint ptr %a to i64) to i3))) to i64) == 0 ; entry: %cmp = icmp eq ptr %a, %b diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-urem.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-urem.ll index 928d9accf35e7..d24655f6ae5c1 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-urem.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-urem.ll @@ -6,9 +6,10 @@ declare void @foo() define void @test_trip_count_expr_contains_urem(i32 %N) { ; CHECK-LABEL: 'test_trip_count_expr_contains_urem' ; CHECK-NEXT: Determining loop execution counts for: @test_trip_count_expr_contains_urem -; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. -; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %loop: backedge-taken count is ((1 + (-1 * (zext i4 (1 + (trunc i32 %N to i4)) to i32)) + %N) /u 16) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 268435455 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((1 + (-1 * (zext i4 (1 + (trunc i32 %N to i4)) to i32)) + %N) /u 16) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 ; entry: %n.rnd.up = add i32 %N, 1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index 8b71987246ee5..c0bc34c2b06ef 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -4,11 +4,10 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_mul: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB0_1: @ %for.body.preheader ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: beq .LBB0_10 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB0_3 ; CHECK-NEXT: @ %bb.2: @@ -33,39 +32,42 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: beq .LBB0_11 ; CHECK-NEXT: .LBB0_4: @ %for.body.preheader22 ; CHECK-NEXT: mvn.w r7, r12 -; CHECK-NEXT: add.w r8, r7, r3 -; CHECK-NEXT: and r5, r3, #3 -; CHECK-NEXT: wls lr, r5, .LBB0_7 +; CHECK-NEXT: adds r4, r7, r3 +; CHECK-NEXT: and r7, r3, #3 +; CHECK-NEXT: add.w r8, r12, r7 +; CHECK-NEXT: wls lr, r7, .LBB0_7 ; CHECK-NEXT: @ %bb.5: @ %for.body.prol.preheader -; CHECK-NEXT: add.w r4, r12, r5 -; CHECK-NEXT: add.w r5, r0, r12, lsl #2 -; CHECK-NEXT: add.w r6, r1, r12, lsl #2 -; CHECK-NEXT: add.w r7, r2, r12, lsl #2 -; CHECK-NEXT: mov r12, r4 +; CHECK-NEXT: add.w r6, r0, r12, lsl #2 +; CHECK-NEXT: add.w r7, r1, r12, lsl #2 +; CHECK-NEXT: add.w r5, r2, r12, lsl #2 +; CHECK-NEXT: mov r12, r8 ; CHECK-NEXT: .LBB0_6: @ %for.body.prol ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldmia r6!, {s0} -; CHECK-NEXT: vldmia r5!, {s2} +; CHECK-NEXT: vldmia r7!, {s0} +; CHECK-NEXT: vldmia r6!, {s2} ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vstmia r7!, {s0} +; CHECK-NEXT: vstmia r5!, {s0} ; CHECK-NEXT: le lr, .LBB0_6 ; CHECK-NEXT: .LBB0_7: @ %for.body.prol.loopexit -; CHECK-NEXT: cmp.w r8, #3 +; CHECK-NEXT: cmp r4, #3 ; CHECK-NEXT: blo .LBB0_10 ; CHECK-NEXT: @ %bb.8: @ %for.body.preheader1 -; CHECK-NEXT: sub.w r3, r3, r12 -; CHECK-NEXT: lsl.w r12, r12, #2 +; CHECK-NEXT: sub.w r3, r8, r3 +; CHECK-NEXT: movs r7, #1 +; CHECK-NEXT: rsb r3, r3, r3, lsl #30 +; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: add.w lr, r7, r3, lsr #2 +; CHECK-NEXT: lsl.w r3, r12, #2 ; CHECK-NEXT: .LBB0_9: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r7, r1, r12 -; CHECK-NEXT: add.w r6, r0, r12 -; CHECK-NEXT: add.w r5, r2, r12 +; CHECK-NEXT: adds r7, r1, r3 +; CHECK-NEXT: adds r6, r0, r3 +; CHECK-NEXT: adds r5, r2, r3 ; CHECK-NEXT: adds r0, #16 ; CHECK-NEXT: vldr s0, [r7] ; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: vldr s2, [r6] ; CHECK-NEXT: adds r2, #16 -; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5] ; CHECK-NEXT: vldr s0, [r7, #4] @@ -80,10 +82,9 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: vldr s2, [r6, #12] ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] -; CHECK-NEXT: bne .LBB0_9 -; CHECK-NEXT: .LBB0_10: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: le lr, .LBB0_9 +; CHECK-NEXT: .LBB0_10: @ %for.cond.cleanup +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} ; CHECK-NEXT: .LBB0_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -217,11 +218,10 @@ for.body: ; preds = %for.body.prol.loope define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_add: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB1_1: @ %for.body.preheader ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: beq .LBB1_10 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB1_3 ; CHECK-NEXT: @ %bb.2: @@ -246,39 +246,42 @@ define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: beq .LBB1_11 ; CHECK-NEXT: .LBB1_4: @ %for.body.preheader22 ; CHECK-NEXT: mvn.w r7, r12 -; CHECK-NEXT: add.w r8, r7, r3 -; CHECK-NEXT: and r5, r3, #3 -; CHECK-NEXT: wls lr, r5, .LBB1_7 +; CHECK-NEXT: adds r4, r7, r3 +; CHECK-NEXT: and r7, r3, #3 +; CHECK-NEXT: add.w r8, r12, r7 +; CHECK-NEXT: wls lr, r7, .LBB1_7 ; CHECK-NEXT: @ %bb.5: @ %for.body.prol.preheader -; CHECK-NEXT: add.w r4, r12, r5 -; CHECK-NEXT: add.w r5, r0, r12, lsl #2 -; CHECK-NEXT: add.w r6, r1, r12, lsl #2 -; CHECK-NEXT: add.w r7, r2, r12, lsl #2 -; CHECK-NEXT: mov r12, r4 +; CHECK-NEXT: add.w r6, r0, r12, lsl #2 +; CHECK-NEXT: add.w r7, r1, r12, lsl #2 +; CHECK-NEXT: add.w r5, r2, r12, lsl #2 +; CHECK-NEXT: mov r12, r8 ; CHECK-NEXT: .LBB1_6: @ %for.body.prol ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldmia r6!, {s0} -; CHECK-NEXT: vldmia r5!, {s2} +; CHECK-NEXT: vldmia r7!, {s0} +; CHECK-NEXT: vldmia r6!, {s2} ; CHECK-NEXT: vadd.f32 s0, s2, s0 -; CHECK-NEXT: vstmia r7!, {s0} +; CHECK-NEXT: vstmia r5!, {s0} ; CHECK-NEXT: le lr, .LBB1_6 ; CHECK-NEXT: .LBB1_7: @ %for.body.prol.loopexit -; CHECK-NEXT: cmp.w r8, #3 +; CHECK-NEXT: cmp r4, #3 ; CHECK-NEXT: blo .LBB1_10 ; CHECK-NEXT: @ %bb.8: @ %for.body.preheader1 -; CHECK-NEXT: sub.w r3, r3, r12 -; CHECK-NEXT: lsl.w r12, r12, #2 +; CHECK-NEXT: sub.w r3, r8, r3 +; CHECK-NEXT: movs r7, #1 +; CHECK-NEXT: rsb r3, r3, r3, lsl #30 +; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: add.w lr, r7, r3, lsr #2 +; CHECK-NEXT: lsl.w r3, r12, #2 ; CHECK-NEXT: .LBB1_9: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r7, r1, r12 -; CHECK-NEXT: add.w r6, r0, r12 -; CHECK-NEXT: add.w r5, r2, r12 +; CHECK-NEXT: adds r7, r1, r3 +; CHECK-NEXT: adds r6, r0, r3 +; CHECK-NEXT: adds r5, r2, r3 ; CHECK-NEXT: adds r0, #16 ; CHECK-NEXT: vldr s0, [r7] ; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: vldr s2, [r6] ; CHECK-NEXT: adds r2, #16 -; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vadd.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5] ; CHECK-NEXT: vldr s0, [r7, #4] @@ -293,10 +296,9 @@ define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: vldr s2, [r6, #12] ; CHECK-NEXT: vadd.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] -; CHECK-NEXT: bne .LBB1_9 -; CHECK-NEXT: .LBB1_10: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: le lr, .LBB1_9 +; CHECK-NEXT: .LBB1_10: @ %for.cond.cleanup +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} ; CHECK-NEXT: .LBB1_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -430,11 +432,10 @@ for.body: ; preds = %for.body.prol.loope define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_sub: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB2_1: @ %for.body.preheader ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: beq .LBB2_10 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB2_3 ; CHECK-NEXT: @ %bb.2: @@ -459,39 +460,42 @@ define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: beq .LBB2_11 ; CHECK-NEXT: .LBB2_4: @ %for.body.preheader22 ; CHECK-NEXT: mvn.w r7, r12 -; CHECK-NEXT: add.w r8, r7, r3 -; CHECK-NEXT: and r5, r3, #3 -; CHECK-NEXT: wls lr, r5, .LBB2_7 +; CHECK-NEXT: adds r4, r7, r3 +; CHECK-NEXT: and r7, r3, #3 +; CHECK-NEXT: add.w r8, r12, r7 +; CHECK-NEXT: wls lr, r7, .LBB2_7 ; CHECK-NEXT: @ %bb.5: @ %for.body.prol.preheader -; CHECK-NEXT: add.w r4, r12, r5 -; CHECK-NEXT: add.w r5, r0, r12, lsl #2 -; CHECK-NEXT: add.w r6, r1, r12, lsl #2 -; CHECK-NEXT: add.w r7, r2, r12, lsl #2 -; CHECK-NEXT: mov r12, r4 +; CHECK-NEXT: add.w r6, r0, r12, lsl #2 +; CHECK-NEXT: add.w r7, r1, r12, lsl #2 +; CHECK-NEXT: add.w r5, r2, r12, lsl #2 +; CHECK-NEXT: mov r12, r8 ; CHECK-NEXT: .LBB2_6: @ %for.body.prol ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldmia r6!, {s0} -; CHECK-NEXT: vldmia r5!, {s2} +; CHECK-NEXT: vldmia r7!, {s0} +; CHECK-NEXT: vldmia r6!, {s2} ; CHECK-NEXT: vsub.f32 s0, s2, s0 -; CHECK-NEXT: vstmia r7!, {s0} +; CHECK-NEXT: vstmia r5!, {s0} ; CHECK-NEXT: le lr, .LBB2_6 ; CHECK-NEXT: .LBB2_7: @ %for.body.prol.loopexit -; CHECK-NEXT: cmp.w r8, #3 +; CHECK-NEXT: cmp r4, #3 ; CHECK-NEXT: blo .LBB2_10 ; CHECK-NEXT: @ %bb.8: @ %for.body.preheader1 -; CHECK-NEXT: sub.w r3, r3, r12 -; CHECK-NEXT: lsl.w r12, r12, #2 +; CHECK-NEXT: sub.w r3, r8, r3 +; CHECK-NEXT: movs r7, #1 +; CHECK-NEXT: rsb r3, r3, r3, lsl #30 +; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: add.w lr, r7, r3, lsr #2 +; CHECK-NEXT: lsl.w r3, r12, #2 ; CHECK-NEXT: .LBB2_9: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r7, r1, r12 -; CHECK-NEXT: add.w r6, r0, r12 -; CHECK-NEXT: add.w r5, r2, r12 +; CHECK-NEXT: adds r7, r1, r3 +; CHECK-NEXT: adds r6, r0, r3 +; CHECK-NEXT: adds r5, r2, r3 ; CHECK-NEXT: adds r0, #16 ; CHECK-NEXT: vldr s0, [r7] ; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: vldr s2, [r6] ; CHECK-NEXT: adds r2, #16 -; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vsub.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5] ; CHECK-NEXT: vldr s0, [r7, #4] @@ -506,10 +510,9 @@ define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: vldr s2, [r6, #12] ; CHECK-NEXT: vsub.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] -; CHECK-NEXT: bne .LBB2_9 -; CHECK-NEXT: .LBB2_10: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: le lr, .LBB2_9 +; CHECK-NEXT: .LBB2_10: @ %for.cond.cleanup +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} ; CHECK-NEXT: .LBB2_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -643,11 +646,10 @@ for.body: ; preds = %for.body.prol.loope define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_int_mul: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB3_1: @ %for.body.preheader -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: beq.w .LBB3_13 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bls .LBB3_6 ; CHECK-NEXT: @ %bb.2: @ %vector.memcheck @@ -681,42 +683,45 @@ define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapt ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: .LBB3_7: @ %for.body.preheader16 ; CHECK-NEXT: mvn.w r7, r12 -; CHECK-NEXT: add.w r8, r7, r3 -; CHECK-NEXT: and r5, r3, #3 -; CHECK-NEXT: wls lr, r5, .LBB3_10 +; CHECK-NEXT: add.w r9, r7, r3 +; CHECK-NEXT: and r7, r3, #3 +; CHECK-NEXT: add.w r8, r12, r7 +; CHECK-NEXT: wls lr, r7, .LBB3_10 ; CHECK-NEXT: @ %bb.8: @ %for.body.prol.preheader -; CHECK-NEXT: add.w r4, r12, r5 -; CHECK-NEXT: add.w r5, r0, r12, lsl #2 -; CHECK-NEXT: add.w r6, r1, r12, lsl #2 -; CHECK-NEXT: add.w r7, r2, r12, lsl #2 -; CHECK-NEXT: mov r12, r4 +; CHECK-NEXT: add.w r6, r0, r12, lsl #2 +; CHECK-NEXT: add.w r7, r1, r12, lsl #2 +; CHECK-NEXT: add.w r5, r2, r12, lsl #2 +; CHECK-NEXT: mov r12, r8 ; CHECK-NEXT: .LBB3_9: @ %for.body.prol ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r4, [r6], #4 -; CHECK-NEXT: vldmia r5!, {s2} +; CHECK-NEXT: ldr r4, [r7], #4 +; CHECK-NEXT: vldmia r6!, {s2} ; CHECK-NEXT: vmov s0, r4 ; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vstmia r7!, {s0} +; CHECK-NEXT: vstmia r5!, {s0} ; CHECK-NEXT: le lr, .LBB3_9 ; CHECK-NEXT: .LBB3_10: @ %for.body.prol.loopexit -; CHECK-NEXT: cmp.w r8, #3 +; CHECK-NEXT: cmp.w r9, #3 ; CHECK-NEXT: blo .LBB3_13 ; CHECK-NEXT: @ %bb.11: @ %for.body.preheader1 +; CHECK-NEXT: sub.w r3, r8, r3 ; CHECK-NEXT: add.w r1, r1, r12, lsl #2 -; CHECK-NEXT: sub.w r3, r3, r12 +; CHECK-NEXT: movs r7, #1 ; CHECK-NEXT: adds r1, #8 -; CHECK-NEXT: lsl.w r12, r12, #2 +; CHECK-NEXT: rsb r3, r3, r3, lsl #30 +; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: add.w lr, r7, r3, lsr #2 +; CHECK-NEXT: lsl.w r3, r12, #2 ; CHECK-NEXT: .LBB3_12: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr s0, [r1, #-8] -; CHECK-NEXT: add.w r7, r0, r12 -; CHECK-NEXT: add.w r6, r2, r12 +; CHECK-NEXT: adds r7, r0, r3 +; CHECK-NEXT: adds r6, r2, r3 ; CHECK-NEXT: adds r0, #16 ; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vldr s2, [r7] ; CHECK-NEXT: adds r2, #16 -; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r6] ; CHECK-NEXT: vldr s0, [r1, #-4] @@ -730,15 +735,14 @@ define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapt ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r6, #8] ; CHECK-NEXT: vldr s0, [r1, #4] -; CHECK-NEXT: add.w r1, r1, #16 +; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: vldr s2, [r7, #12] ; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r6, #12] -; CHECK-NEXT: bne .LBB3_12 -; CHECK-NEXT: .LBB3_13: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: le lr, .LBB3_12 +; CHECK-NEXT: .LBB3_13: @ %for.cond.cleanup +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll index 272e47b3ee1e7..5eebf13313555 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -4514,7 +4514,7 @@ define void @test8() { ; EPILOG-NEXT: %i4.7 = add nuw nsw i64 %i3, 8 ; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7 ; EPILOG: latch.7: -; EPILOG-NEXT: %niter.next.7 = add i64 %niter, 8 +; EPILOG-NEXT: %niter.next.7 = add nuw nsw i64 %niter, 8 ; EPILOG-NEXT: %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter ; EPILOG-NEXT: br i1 %niter.ncmp.7, label %innerH, label %exit.unr-lcssa.loopexit ; EPILOG: exit.unr-lcssa.loopexit: diff --git a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll index 13e79a4a47b39..f62c3c7f42ec4 100644 --- a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll +++ b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll @@ -4,18 +4,80 @@ define void @test_ptr_iv_no_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end) { ; CHECK-LABEL: @test_ptr_iv_no_inbounds( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1_START7:%.*]] = ptrtoint ptr [[P1_START:%.*]] to i64 +; CHECK-NEXT: [[P1_END6:%.*]] = ptrtoint ptr [[P1_END:%.*]] to i64 +; CHECK-NEXT: [[P1_START4:%.*]] = ptrtoint ptr [[P1_START]] to i64 +; CHECK-NEXT: [[P1_END3:%.*]] = ptrtoint ptr [[P1_END]] to i64 +; CHECK-NEXT: [[P1_START2:%.*]] = ptrtoint ptr [[P1_START]] to i64 +; CHECK-NEXT: [[P1_END1:%.*]] = ptrtoint ptr [[P1_END]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[P1_END6]], -4 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[P1_START7]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[P1_END1]] to i2 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[P1_START2]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = sub i2 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i2 [[TMP6]] to i64 +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[P1_END3]], -4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[P1_START4]] +; CHECK-NEXT: [[TMP10:%.*]] = lshr i64 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P1_START]], i64 [[TMP12]] +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[P2_START:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[P1_START]], [[SCEVGEP5]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[P2_START]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P1_START]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[IND_END8:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[TMP14]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1_START]], i64 [[TMP15]] +; CHECK-NEXT: [[OFFSET_IDX10:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX10]], 0 +; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr float, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP17]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr float, ptr [[NEXT_GEP11]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x float>, ptr [[TMP18]], align 4, !alias.scope [[META3]] +; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x float> [[WIDE_LOAD]], [[WIDE_LOAD12]] +; CHECK-NEXT: store <2 x float> [[TMP19]], ptr [[TMP17]], align 4, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P1_START]], [[ENTRY:%.*]] ], [ [[P1_START]], [[VECTOR_SCEVCHECK]] ], [ [[P1_START]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi ptr [ [[IND_END8]], [[MIDDLE_BLOCK]] ], [ [[P2_START]], [[ENTRY]] ], [ [[P2_START]], [[VECTOR_SCEVCHECK]] ], [ [[P2_START]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[P1:%.*]] = phi ptr [ [[P1_START:%.*]], [[ENTRY:%.*]] ], [ [[P1_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[P2:%.*]] = phi ptr [ [[P2_START:%.*]], [[ENTRY]] ], [ [[P2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[P1:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[P1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[P2:%.*]] = phi ptr [ [[BC_RESUME_VAL9]], [[SCALAR_PH]] ], [ [[P2_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[P1_VAL:%.*]] = load float, ptr [[P1]], align 4 ; CHECK-NEXT: [[P2_VAL:%.*]] = load float, ptr [[P2]], align 4 ; CHECK-NEXT: [[SUM:%.*]] = fadd float [[P1_VAL]], [[P2_VAL]] ; CHECK-NEXT: store float [[SUM]], ptr [[P1]], align 4 ; CHECK-NEXT: [[P1_NEXT]] = getelementptr float, ptr [[P1]], i64 1 ; CHECK-NEXT: [[P2_NEXT]] = getelementptr float, ptr [[P2]], i64 1 -; CHECK-NEXT: [[C:%.*]] = icmp ne ptr [[P1_NEXT]], [[P1_END:%.*]] -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ne ptr [[P1_NEXT]], [[P1_END]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -80,14 +142,14 @@ define void @test_ptr_iv_with_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX8]], 0 ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP13]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP13]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[NEXT_GEP9]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, ptr [[TMP14]], align 4, !alias.scope [[META3]] +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, ptr [[TMP14]], align 4, !alias.scope [[META12]] ; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x float> [[WIDE_LOAD]], [[WIDE_LOAD10]] -; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[TMP13]], align 4, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[TMP13]], align 4, !alias.scope [[META9]], !noalias [[META12]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -105,7 +167,7 @@ define void @test_ptr_iv_with_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end ; CHECK-NEXT: [[P1_NEXT]] = getelementptr inbounds float, ptr [[P1]], i64 1 ; CHECK-NEXT: [[P2_NEXT]] = getelementptr inbounds float, ptr [[P2]], i64 1 ; CHECK-NEXT: [[C:%.*]] = icmp ne ptr [[P1_NEXT]], [[P1_END]] -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -155,7 +217,7 @@ define void @store_pointer_induction(ptr %start, ptr %end) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -167,7 +229,7 @@ define void @store_pointer_induction(ptr %start, ptr %end) { ; CHECK-NEXT: store ptr [[IV]], ptr [[IV]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds ptr, ptr [[IV]], i32 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq ptr [[IV_NEXT]], [[END]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ;