From 521a5934cf664622d0fb005745f81389b60f7899 Mon Sep 17 00:00:00 2001 From: "Nadeem, Usman" Date: Thu, 5 Dec 2024 14:51:42 -0800 Subject: [PATCH] [DFAJumpThreading] Don't bail early after encountering unpredictable values After #96127 landed, mshockwave reported that the pass was no longer threading SPEC2006/perlbench. After 96127 we started bailing out in `getStateDefMap` and rejecting the transformation because one of the unpredictable values was coming from inside the loop. There was no fundamental change in that function except that we started calling `Loop->contains(IncomingBB)` instead of `LoopBBs.count(IncomingBB)`. After some analysis I came to the conclusion that even before 96127 we would reject the transformation if we provided large enough limits on the path traversal (large enough so that LoopBBs contained blocks corresponding to that unpredictable value). In this patch I changed `getStateDefMap` to not terminate early on finding an unpredictable value, this is because `getPathsFromStateDefMap`, later, actually has checks to ensure that the final list of paths only have predictable values. As a result we can now partially thread functions like `negative6` in the tests that have some predictable paths. This patch does not really have any compile-time impact on the test suite without `-dfa-early-exit-heuristic=false` (early exit is enabled by default). Change-Id: Ie1633b370ed4a0eda8dea52650b40f6f66ef49a3 --- .../Transforms/Scalar/DFAJumpThreading.cpp | 29 ++++++------- .../DFAJumpThreading/dfa-unfold-select.ll | 42 ++++++++++++++++--- .../Transforms/DFAJumpThreading/negative.ll | 40 +++++++++++++++++- 3 files changed, 87 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 3c4a40fab3e03..8a5c506eed694 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -109,7 +109,7 @@ static cl::opt MaxNumVisitiedPaths( "dfa-max-num-visited-paths", cl::desc( "Max number of blocks visited while enumerating paths around a switch"), - cl::Hidden, cl::init(2000)); + cl::Hidden, cl::init(2500)); static cl::opt MaxNumPaths("dfa-max-num-paths", @@ -754,17 +754,15 @@ struct AllSwitchPaths { return Res; } - /// Walk the use-def chain and collect all the state-defining instructions. - /// - /// Return an empty map if unpredictable values encountered inside the basic - /// blocks of \p LoopPaths. + /// Walk the use-def chain and collect all the state-defining blocks and the + /// PHI nodes in those blocks that define the state. StateDefMap getStateDefMap() const { StateDefMap Res; - Value *FirstDef = Switch->getOperand(0); - assert(isa(FirstDef) && "The first definition must be a phi."); + PHINode *FirstDef = dyn_cast(Switch->getOperand(0)); + assert(FirstDef && "The first definition must be a phi."); SmallVector Stack; - Stack.push_back(dyn_cast(FirstDef)); + Stack.push_back(FirstDef); SmallSet SeenValues; while (!Stack.empty()) { @@ -774,18 +772,15 @@ struct AllSwitchPaths { SeenValues.insert(CurPhi); for (BasicBlock *IncomingBB : CurPhi->blocks()) { - Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB); + PHINode *IncomingPhi = + dyn_cast(CurPhi->getIncomingValueForBlock(IncomingBB)); + if (!IncomingPhi) + continue; bool IsOutsideLoops = !SwitchOuterLoop->contains(IncomingBB); - if (Incoming == FirstDef || isa(Incoming) || - SeenValues.contains(Incoming) || IsOutsideLoops) { + if (SeenValues.contains(IncomingPhi) || IsOutsideLoops) continue; - } - - // Any unpredictable value inside the loops means we must bail out. - if (!isa(Incoming)) - return StateDefMap(); - Stack.push_back(cast(Incoming)); + Stack.push_back(IncomingPhi); } } diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll index 366446a1cc9e4..93872c3938768 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll @@ -381,26 +381,58 @@ define void @pr65222(i32 %flags, i1 %cmp, i1 %tobool.not) { ; CHECK: then: ; CHECK-NEXT: br i1 [[TOBOOL_NOT:%.*]], label [[COND1_SI_UNFOLD_TRUE:%.*]], label [[COND_SI_UNFOLD_TRUE:%.*]] ; CHECK: cond.si.unfold.true: +; CHECK-NEXT: br i1 [[CMP]], label [[TOUNFOLD_SI_UNFOLD_FALSE1:%.*]], label [[COND_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK: cond.si.unfold.true.jt2: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI:%.*]] = phi i32 [ 2, [[THEN]] ] ; CHECK-NEXT: br i1 [[CMP]], label [[TOUNFOLD_SI_UNFOLD_FALSE:%.*]], label [[COND_SI_UNFOLD_FALSE:%.*]] ; CHECK: cond.si.unfold.false: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 0, [[COND_SI_UNFOLD_TRUE]] ] -; CHECK-NEXT: br label [[TOUNFOLD_SI_UNFOLD_FALSE]] +; CHECK-NEXT: br label [[TOUNFOLD_SI_UNFOLD_FALSE1]] +; CHECK: cond.si.unfold.false.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1_JT0:%.*]] = phi i32 [ 0, [[COND_SI_UNFOLD_TRUE1:%.*]] ] +; CHECK-NEXT: br label [[TOUNFOLD_SI_UNFOLD_FALSE_JT0:%.*]] ; CHECK: tounfold.si.unfold.false: -; CHECK-NEXT: [[COND_SI_UNFOLD_PHI:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI]], [[COND_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI1]], [[COND_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: [[COND_SI_UNFOLD_PHI:%.*]] = phi i32 [ poison, [[COND_SI_UNFOLD_TRUE1]] ], [ [[DOTSI_UNFOLD_PHI1]], [[COND_SI_UNFOLD_FALSE]] ] ; CHECK-NEXT: br label [[IF_END]] +; CHECK: tounfold.si.unfold.false.jt0: +; CHECK-NEXT: [[COND_SI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI1_JT0]], [[COND_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br label [[IF_END_JT0:%.*]] +; CHECK: tounfold.si.unfold.false.jt2: +; CHECK-NEXT: [[COND_SI_UNFOLD_PHI_JT2:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI]], [[COND_SI_UNFOLD_TRUE]] ] +; CHECK-NEXT: br label [[IF_END_JT2:%.*]] ; CHECK: cond1.si.unfold.true: +; CHECK-NEXT: br i1 [[CMP]], label [[IF_END]], label [[COND1_SI_UNFOLD_FALSE_JT1:%.*]] +; CHECK: cond1.si.unfold.true.jt3: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI2:%.*]] = phi i32 [ 3, [[THEN]] ] -; CHECK-NEXT: br i1 [[CMP]], label [[IF_END]], label [[COND1_SI_UNFOLD_FALSE:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_END_JT3:%.*]], label [[COND1_SI_UNFOLD_FALSE:%.*]] ; CHECK: cond1.si.unfold.false: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI3:%.*]] = phi i32 [ 1, [[COND1_SI_UNFOLD_TRUE]] ] ; CHECK-NEXT: br label [[IF_END]] +; CHECK: cond1.si.unfold.false.jt1: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI3_JT1:%.*]] = phi i32 [ 1, [[COND1_SI_UNFOLD_TRUE1:%.*]] ] +; CHECK-NEXT: br label [[IF_END_JT1:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[UNFOLDED:%.*]] = phi i32 [ [[FLAGS:%.*]], [[WHILE_COND]] ], [ [[COND_SI_UNFOLD_PHI]], [[TOUNFOLD_SI_UNFOLD_FALSE]] ], [ [[DOTSI_UNFOLD_PHI2]], [[COND1_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI3]], [[COND1_SI_UNFOLD_FALSE]] ] -; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ [[FLAGS]], [[WHILE_COND]] ], [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE]] ], [ 0, [[COND1_SI_UNFOLD_TRUE]] ], [ 0, [[COND1_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: [[UNFOLDED:%.*]] = phi i32 [ [[FLAGS:%.*]], [[WHILE_COND]] ], [ [[COND_SI_UNFOLD_PHI]], [[TOUNFOLD_SI_UNFOLD_FALSE1]] ], [ poison, [[COND1_SI_UNFOLD_TRUE1]] ], [ [[DOTSI_UNFOLD_PHI3]], [[COND1_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ [[FLAGS]], [[WHILE_COND]] ], [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE1]] ], [ 0, [[COND1_SI_UNFOLD_TRUE1]] ], [ 0, [[COND1_SI_UNFOLD_FALSE]] ] ; CHECK-NEXT: switch i32 [[UNFOLDED]], label [[UNREACHABLE:%.*]] [ ; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] ; CHECK-NEXT: ] +; CHECK: if.end.jt1: +; CHECK-NEXT: [[UNFOLDED_JT1:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI3_JT1]], [[COND1_SI_UNFOLD_FALSE_JT1]] ] +; CHECK-NEXT: [[OTHER_JT1:%.*]] = phi i32 [ 0, [[COND1_SI_UNFOLD_FALSE_JT1]] ] +; CHECK-NEXT: br label [[UNREACHABLE]] +; CHECK: if.end.jt3: +; CHECK-NEXT: [[UNFOLDED_JT3:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI2]], [[COND1_SI_UNFOLD_TRUE]] ] +; CHECK-NEXT: [[OTHER_JT3:%.*]] = phi i32 [ 0, [[COND1_SI_UNFOLD_TRUE]] ] +; CHECK-NEXT: br label [[UNREACHABLE]] +; CHECK: if.end.jt0: +; CHECK-NEXT: [[UNFOLDED_JT0:%.*]] = phi i32 [ [[COND_SI_UNFOLD_PHI_JT0]], [[TOUNFOLD_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: [[OTHER_JT0:%.*]] = phi i32 [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br label [[SW_BB]] +; CHECK: if.end.jt2: +; CHECK-NEXT: [[UNFOLDED_JT2:%.*]] = phi i32 [ [[COND_SI_UNFOLD_PHI_JT2]], [[TOUNFOLD_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: [[OTHER_JT2:%.*]] = phi i32 [ 0, [[TOUNFOLD_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: br label [[UNREACHABLE]] ; CHECK: unreachable: ; CHECK-NEXT: unreachable ; CHECK: sw.bb: diff --git a/llvm/test/Transforms/DFAJumpThreading/negative.ll b/llvm/test/Transforms/DFAJumpThreading/negative.ll index a964281427699..3eab1e14417fb 100644 --- a/llvm/test/Transforms/DFAJumpThreading/negative.ll +++ b/llvm/test/Transforms/DFAJumpThreading/negative.ll @@ -218,9 +218,45 @@ for.end: declare i32 @arbitrary_function() ; Don't confuse %state.2 for the initial switch value. +; [ 3, %case2 ] can still be threaded. define i32 @negative6(i32 %init) { -; REMARK: SwitchNotPredictable -; REMARK-NEXT: negative6 +; CHECK-LABEL: define i32 @negative6( +; CHECK-SAME: i32 [[INIT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INIT]], 0 +; CHECK-NEXT: br label %[[LOOP_2:.*]] +; CHECK: [[LOOP_2]]: +; CHECK-NEXT: [[STATE_2:%.*]] = call i32 @arbitrary_function() +; CHECK-NEXT: br label %[[LOOP_3:.*]] +; CHECK: [[LOOP_3]]: +; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ [[STATE_2]], %[[LOOP_2]] ] +; CHECK-NEXT: switch i32 [[STATE]], label %[[INFLOOP_I:.*]] [ +; CHECK-NEXT: i32 2, label %[[CASE2:.*]] +; CHECK-NEXT: i32 3, label %[[CASE3:.*]] +; CHECK-NEXT: i32 4, label %[[CASE4:.*]] +; CHECK-NEXT: i32 0, label %[[CASE0:.*]] +; CHECK-NEXT: i32 1, label %[[CASE1:.*]] +; CHECK-NEXT: ] +; CHECK: [[LOOP_3_JT3:.*]]: +; CHECK-NEXT: [[STATE_JT3:%.*]] = phi i32 [ 3, %[[CASE2]] ] +; CHECK-NEXT: br label %[[CASE3]] +; CHECK: [[CASE2]]: +; CHECK-NEXT: br label %[[LOOP_3_JT3]] +; CHECK: [[CASE3]]: +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_2_BACKEDGE:.*]], label %[[CASE4]] +; CHECK: [[CASE4]]: +; CHECK-NEXT: br label %[[LOOP_2_BACKEDGE]] +; CHECK: [[LOOP_2_BACKEDGE]]: +; CHECK-NEXT: br label %[[LOOP_2]] +; CHECK: [[CASE0]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[CASE1]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[INFLOOP_I]]: +; CHECK-NEXT: br label %[[INFLOOP_I]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 0 +; entry: %cmp = icmp eq i32 %init, 0 br label %loop.2