diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 3ca3818938fd2..f966ccaa83842 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2186,6 +2186,12 @@ class LSRInstance { /// Induction variables that were generated and inserted by the SCEV Expander. SmallVector ScalarEvolutionIVs; + // Inserting instructions in the loop and using them as PHI's input could + // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the + // corresponding incoming block is not loop exiting). So collect all such + // instructions to form LCSSA for them later. + SmallSetVector InsertedNonLCSSAInsts; + void OptimizeShadowIV(); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); @@ -2276,9 +2282,9 @@ class LSRInstance { SmallVectorImpl &DeadInsts) const; void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SmallVectorImpl &DeadInsts) const; + SmallVectorImpl &DeadInsts); void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SmallVectorImpl &DeadInsts) const; + SmallVectorImpl &DeadInsts); void ImplementSolution(const SmallVectorImpl &Solution); public: @@ -5858,17 +5864,11 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, /// Helper for Rewrite. PHI nodes are special because the use of their operands /// effectively happens in their predecessor blocks, so the expression may need /// to be expanded in multiple places. -void LSRInstance::RewriteForPHI( - PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SmallVectorImpl &DeadInsts) const { +void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU, + const LSRFixup &LF, const Formula &F, + SmallVectorImpl &DeadInsts) { DenseMap Inserted; - // Inserting instructions in the loop and using them as PHI's input could - // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the - // corresponding incoming block is not loop exiting). So collect all such - // instructions to form LCSSA for them later. - SmallVector InsertedNonLCSSAInsts; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { bool needUpdateFixups = false; @@ -5939,7 +5939,7 @@ void LSRInstance::RewriteForPHI( // the inserted value. if (auto *I = dyn_cast(FullV)) if (L->contains(I) && !L->contains(BB)) - InsertedNonLCSSAInsts.push_back(I); + InsertedNonLCSSAInsts.insert(I); PN->setIncomingValue(i, FullV); Pair.first->second = FullV; @@ -5983,8 +5983,6 @@ void LSRInstance::RewriteForPHI( } } } - - formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE); } /// Emit instructions for the leading candidate expression for this LSRUse (this @@ -5992,7 +5990,7 @@ void LSRInstance::RewriteForPHI( /// expanded value. void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SmallVectorImpl &DeadInsts) const { + SmallVectorImpl &DeadInsts) { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast(LF.UserInst)) { @@ -6080,6 +6078,9 @@ void LSRInstance::ImplementSolution( Changed = true; } + auto InsertedInsts = InsertedNonLCSSAInsts.takeVector(); + formLCSSAForInstructions(InsertedInsts, DT, LI, &SE); + for (const IVChain &Chain : IVChainVec) { GenerateIVChain(Chain, DeadInsts); Changed = true; diff --git a/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll b/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll index bf52c968ad870..7195d4cab96f4 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll @@ -24,15 +24,15 @@ define ptr @test1() { ; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]] ; CHECK: bbA: ; CHECK-NEXT: switch i32 0, label [[BBA_BB89_CRIT_EDGE:%.*]] [ -; CHECK-NEXT: i32 47, label [[BBA_BB89_CRIT_EDGE]] -; CHECK-NEXT: i32 58, label [[BBA_BB89_CRIT_EDGE]] +; CHECK-NEXT: i32 47, label [[BBA_BB89_CRIT_EDGE]] +; CHECK-NEXT: i32 58, label [[BBA_BB89_CRIT_EDGE]] ; CHECK-NEXT: ] ; CHECK: bbA.bb89_crit_edge: ; CHECK-NEXT: br label [[BB89:%.*]] ; CHECK: bbB: ; CHECK-NEXT: switch i8 0, label [[BBB_BB89_CRIT_EDGE:%.*]] [ -; CHECK-NEXT: i8 47, label [[BBB_BB89_CRIT_EDGE]] -; CHECK-NEXT: i8 58, label [[BBB_BB89_CRIT_EDGE]] +; CHECK-NEXT: i8 47, label [[BBB_BB89_CRIT_EDGE]] +; CHECK-NEXT: i8 58, label [[BBB_BB89_CRIT_EDGE]] ; CHECK-NEXT: ] ; CHECK: bbB.bb89_crit_edge: ; CHECK-NEXT: br label [[BB89]] @@ -85,23 +85,22 @@ define ptr @test2() { ; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 1 ; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: -; CHECK-NEXT: [[SCEVGEP_LCSSA1:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ] ; CHECK-NEXT: [[SCEVGEP_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ] ; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]] ; CHECK: bbA: ; CHECK-NEXT: switch i32 0, label [[BB89:%.*]] [ -; CHECK-NEXT: i32 47, label [[BB89]] -; CHECK-NEXT: i32 58, label [[BB89]] +; CHECK-NEXT: i32 47, label [[BB89]] +; CHECK-NEXT: i32 58, label [[BB89]] ; CHECK-NEXT: ] ; CHECK: bbB: ; CHECK-NEXT: switch i8 0, label [[BBB_EXIT_CRIT_EDGE:%.*]] [ -; CHECK-NEXT: i8 47, label [[BBB_EXIT_CRIT_EDGE]] -; CHECK-NEXT: i8 58, label [[BBB_EXIT_CRIT_EDGE]] +; CHECK-NEXT: i8 47, label [[BBB_EXIT_CRIT_EDGE]] +; CHECK-NEXT: i8 58, label [[BBB_EXIT_CRIT_EDGE]] ; CHECK-NEXT: ] ; CHECK: bbB.exit_crit_edge: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: bb89: -; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ] +; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[RESULT:%.*]] = phi ptr [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP_LCSSA]], [[BBB_EXIT_CRIT_EDGE]] ] diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll index b4fb4fe7aaf96..737a590394e5f 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll @@ -16,8 +16,8 @@ define amdgpu_kernel void @scaledregtest() local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: loopexit: -; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[SCEVGEP11_LCSSA:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP11:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: br label [[FOR_BODY_1:%.*]] ; CHECK: for.body.1: ; CHECK-NEXT: [[LSR_IV5:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP6:%.*]], [[FOR_BODY_1]] ], [ [[SCEVGEP11_LCSSA]], [[LOOPEXIT:%.*]] ] diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll index fbb9e2a7b6b82..841836c7d2dd8 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll @@ -20,16 +20,17 @@ define i64 @sqlite3DropTriggerPtr() nounwind { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq %rbx, %rcx ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: je .LBB0_4 ; CHECK-NEXT: # %bb.2: # %bb4 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: leaq 1(%rcx), %rbx +; CHECK-NEXT: incq %rbx ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_1 -; CHECK-NEXT: .LBB0_3: # %bb8 -; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: # %bb.3: # %bb8split +; CHECK-NEXT: decq %rbx +; CHECK-NEXT: .LBB0_4: # %bb8 +; CHECK-NEXT: movq %rbx, %rax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq bb: diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll index 29c03b88c5fb1..d652e5c5aa060 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll @@ -21,8 +21,8 @@ define i64 @blam(ptr %start, ptr %end, ptr %ptr.2) { ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[END:%.*]] ; CHECK-NEXT: br i1 [[EC]], label [[LOOP_2_PH:%.*]], label [[LOOP_1_HEADER]] ; CHECK: loop.2.ph: -; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ] ; CHECK-NEXT: [[LSR_IV_NEXT5_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT5]], [[LOOP_1_HEADER]] ] +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ] ; CHECK-NEXT: br label [[LOOP_2_HEADER:%.*]] ; CHECK: loop.2.header: ; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[LOOP_2_LATCH:%.*]] ], [ [[LSR_IV_NEXT5_LCSSA]], [[LOOP_2_PH]] ] diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll index b13503543d6ee..ae24da06415cc 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll @@ -18,23 +18,24 @@ define i32 @foo(ptr %A, i32 %t) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP_32:%.*]] ; CHECK: loop.exit.loopexitsplitsplitsplit: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV:%.*]], -1 +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV1:%.*]], [[IFMERGE_34:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV]], -1 ; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT:%.*]] ; CHECK: ifmerge.38.loop.exit.loopexitsplitsplit_crit_edge: -; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_38:%.*]] ] +; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_38:%.*]] ] ; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] ; CHECK: loop.exit.loopexitsplitsplit: ; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH_PH:%.*]] = phi i64 [ [[LSR_IV_LCSSA10]], [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT:%.*]] ] ; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT:%.*]] ; CHECK: ifmerge.42.loop.exit.loopexitsplit_crit_edge: -; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_42:%.*]] ] +; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_42:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_LCSSA11]], 1 ; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT]] ; CHECK: loop.exit.loopexitsplit: ; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH:%.*]] = phi i64 [ [[TMP1]], [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] ] ; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] ; CHECK: then.34.loop.exit.loopexit_crit_edge: -; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV]], [[THEN_34:%.*]] ] +; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV1]], [[THEN_34:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[LSR_IV_LCSSA]], -2 ; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT]] ; CHECK: loop.exit.loopexit: @@ -48,23 +49,23 @@ define i32 @foo(ptr %A, i32 %t) { ; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP]], [[LOOP_EXIT]] ], [ 50, [[THEN_8_1]] ], [ 50, [[IFMERGE_8:%.*]] ] ; CHECK-NEXT: ret i32 [[I_0_LCSSA]] ; CHECK: loop.32: -; CHECK-NEXT: [[LSR_IV]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[I1_I64_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTIVLOOP_32:%.*]], [[IFMERGE_46]] ] -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2 ; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 -4 ; CHECK-NEXT: [[GEPLOAD:%.*]] = load i32, ptr [[SCEVGEP8]], align 4 ; CHECK-NEXT: [[CMP_34:%.*]] = icmp sgt i32 [[GEPLOAD]], [[T]] -; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34:%.*]] +; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34]] ; CHECK: then.34: -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2 ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 -8 ; CHECK-NEXT: [[GEPLOAD18:%.*]] = load i32, ptr [[SCEVGEP6]], align 4 ; CHECK-NEXT: [[CMP_35:%.*]] = icmp slt i32 [[GEPLOAD18]], [[T]] ; CHECK-NEXT: br i1 [[CMP_35]], label [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE]], label [[IFMERGE_34]] ; CHECK: ifmerge.34: -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2 ; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]] ; CHECK-NEXT: [[GEPLOAD20:%.*]] = load i32, ptr [[SCEVGEP4]], align 4 ; CHECK-NEXT: [[CMP_38:%.*]] = icmp sgt i32 [[GEPLOAD20]], [[T]] @@ -72,7 +73,7 @@ define i32 @foo(ptr %A, i32 %t) { ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_38]], [[CMP_39]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT]], label [[IFMERGE_38]] ; CHECK: ifmerge.38: -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2 ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4 ; CHECK-NEXT: [[GEPLOAD24:%.*]] = load i32, ptr [[SCEVGEP3]], align 4 @@ -81,7 +82,7 @@ define i32 @foo(ptr %A, i32 %t) { ; CHECK-NEXT: [[OR_COND55:%.*]] = and i1 [[CMP_42]], [[CMP_43]] ; CHECK-NEXT: br i1 [[OR_COND55]], label [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE]], label [[IFMERGE_42]] ; CHECK: ifmerge.42: -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 8 ; CHECK-NEXT: [[GEPLOAD28:%.*]] = load i32, ptr [[SCEVGEP1]], align 4 @@ -91,7 +92,7 @@ define i32 @foo(ptr %A, i32 %t) { ; CHECK-NEXT: br i1 [[OR_COND56]], label [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE]], label [[IFMERGE_46]] ; CHECK: ifmerge.46: ; CHECK-NEXT: [[NEXTIVLOOP_32]] = add nuw nsw i64 [[I1_I64_0]], 1 -; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV1]], 4 ; CHECK-NEXT: [[CONDLOOP_32:%.*]] = icmp ult i64 [[NEXTIVLOOP_32]], 12 ; CHECK-NEXT: br i1 [[CONDLOOP_32]], label [[LOOP_32]], label [[LOOP_25:%.*]] ; CHECK: loop.25: diff --git a/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll b/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll index 0add19e286f58..376831faa99fb 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll @@ -89,3 +89,119 @@ loop_exit_7: ; preds = %be_6, %loop_4 %val_i32_24.lcssa = phi i32 [ %val_i32_24, %be_6 ], [ %val_i32_24, %loop_4 ] br label %bb_5 } + +define i64 @test_duplicated_phis(i64 noundef %N) { +; LEGACYPM-LABEL: define i64 @test_duplicated_phis +; LEGACYPM-SAME: (i64 noundef [[N:%.*]]) { +; LEGACYPM-NEXT: entry: +; LEGACYPM-NEXT: [[MUL:%.*]] = shl i64 [[N]], 1 +; LEGACYPM-NEXT: [[CMP6_NOT:%.*]] = icmp eq i64 [[MUL]], 0 +; LEGACYPM-NEXT: br i1 [[CMP6_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; LEGACYPM: for.body.preheader: +; LEGACYPM-NEXT: [[TMP0:%.*]] = icmp ult i64 [[MUL]], 4 +; LEGACYPM-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] +; LEGACYPM: for.body.preheader.new: +; LEGACYPM-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4 +; LEGACYPM-NEXT: [[TMP1:%.*]] = add i64 [[UNROLL_ITER]], -4 +; LEGACYPM-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2 +; LEGACYPM-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1 +; LEGACYPM-NEXT: [[TMP4:%.*]] = sub i64 -3, [[TMP3]] +; LEGACYPM-NEXT: br label [[FOR_BODY:%.*]] +; LEGACYPM: for.body: +; LEGACYPM-NEXT: [[I_07:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ] +; LEGACYPM-NEXT: [[INC_3]] = add i64 [[I_07]], 4 +; LEGACYPM-NEXT: [[NITER_NCMP_3_NOT:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[INC_3]] +; LEGACYPM-NEXT: br i1 [[NITER_NCMP_3_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]] +; LEGACYPM: for.end.loopexit.unr-lcssa.loopexit: +; LEGACYPM-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1 +; LEGACYPM-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]] +; LEGACYPM: for.end.loopexit.unr-lcssa: +; LEGACYPM-NEXT: [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; LEGACYPM-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[TMP4]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; LEGACYPM-NEXT: [[TMP6:%.*]] = and i64 [[N]], 1 +; LEGACYPM-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP6]], 0 +; LEGACYPM-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[LCMP_MOD_NOT]], i64 [[RES_1_LCSSA_PH]], i64 [[RES_09_UNR]] +; LEGACYPM-NEXT: br label [[FOR_END]] +; LEGACYPM: for.end: +; LEGACYPM-NEXT: [[RES_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ] +; LEGACYPM-NEXT: ret i64 [[RES_0_LCSSA]] +; +; NEWPM-LABEL: define i64 @test_duplicated_phis +; NEWPM-SAME: (i64 noundef [[N:%.*]]) { +; NEWPM-NEXT: entry: +; NEWPM-NEXT: [[MUL:%.*]] = shl i64 [[N]], 1 +; NEWPM-NEXT: [[CMP6_NOT:%.*]] = icmp eq i64 [[MUL]], 0 +; NEWPM-NEXT: br i1 [[CMP6_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; NEWPM: for.body.preheader: +; NEWPM-NEXT: [[TMP0:%.*]] = icmp ult i64 [[MUL]], 4 +; NEWPM-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] +; NEWPM: for.body.preheader.new: +; NEWPM-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4 +; NEWPM-NEXT: br label [[FOR_BODY:%.*]] +; NEWPM: for.body: +; NEWPM-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 3, [[FOR_BODY_PREHEADER_NEW]] ] +; NEWPM-NEXT: [[I_07:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ] +; NEWPM-NEXT: [[INC_3]] = add i64 [[I_07]], 4 +; NEWPM-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -2 +; NEWPM-NEXT: [[NITER_NCMP_3_NOT:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[INC_3]] +; NEWPM-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_NEXT]], -3 +; NEWPM-NEXT: br i1 [[NITER_NCMP_3_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]] +; NEWPM: for.end.loopexit.unr-lcssa.loopexit: +; NEWPM-NEXT: [[REASS_SUB_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT]], [[FOR_BODY]] ] +; NEWPM-NEXT: [[RES_1_3_LCSSA:%.*]] = phi i64 [ [[TMP1]], [[FOR_BODY]] ] +; NEWPM-NEXT: [[TMP2:%.*]] = add i64 [[REASS_SUB_LCSSA]], -4 +; NEWPM-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]] +; NEWPM: for.end.loopexit.unr-lcssa: +; NEWPM-NEXT: [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[RES_1_3_LCSSA]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; NEWPM-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[TMP2]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; NEWPM-NEXT: [[TMP3:%.*]] = and i64 [[N]], 1 +; NEWPM-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP3]], 0 +; NEWPM-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[LCMP_MOD_NOT]], i64 [[RES_1_LCSSA_PH]], i64 [[RES_09_UNR]] +; NEWPM-NEXT: br label [[FOR_END]] +; NEWPM: for.end: +; NEWPM-NEXT: [[RES_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ] +; NEWPM-NEXT: ret i64 [[RES_0_LCSSA]] +; +entry: + %mul = shl i64 %N, 1 + %cmp6.not = icmp eq i64 %mul, 0 + br i1 %cmp6.not, label %for.end, label %for.body.preheader + +for.body.preheader: + %0 = icmp ult i64 %mul, 4 + br i1 %0, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new + +for.body.preheader.new: + %unroll_iter = and i64 %mul, -4 + br label %for.body + +for.body: + %res.09 = phi i64 [ 0, %for.body.preheader.new ], [ %res.1.3, %for.body ] + %i.07 = phi i64 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] + %niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.3, %for.body ] + %res.1.1 = add i64 %res.09, -1 + %inc.1 = or disjoint i64 %i.07, 2 + %res.1.2 = add i64 %inc.1, %res.1.1 + %reass.sub = sub i64 %res.1.2, %i.07 + %res.1.3 = add i64 %reass.sub, -3 + %inc.3 = add nuw i64 %i.07, 4 + %niter.next.3 = add i64 %niter, 4 + %niter.ncmp.3.not = icmp eq i64 %niter.next.3, %unroll_iter + br i1 %niter.ncmp.3.not, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body + +for.end.loopexit.unr-lcssa.loopexit: + %1 = add i64 %reass.sub, -4 + br label %for.end.loopexit.unr-lcssa + +for.end.loopexit.unr-lcssa: + %res.1.lcssa.ph = phi i64 [ undef, %for.body.preheader ], [ %res.1.3, %for.end.loopexit.unr-lcssa.loopexit ] + %res.09.unr = phi i64 [ -1, %for.body.preheader ], [ %1, %for.end.loopexit.unr-lcssa.loopexit ] + %2 = and i64 %N, 1 + %lcmp.mod.not = icmp eq i64 %2, 0 + %spec.select = select i1 %lcmp.mod.not, i64 %res.1.lcssa.ph, i64 %res.09.unr + br label %for.end + +for.end: + %res.0.lcssa = phi i64 [ 0, %entry ], [ %spec.select, %for.end.loopexit.unr-lcssa ] + ret i64 %res.0.lcssa +}