Skip to content

[Inline][Cloning] Defer constant propagation after phi-nodes resolution #87963

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 35 additions & 57 deletions llvm/lib/Transforms/Utils/CloneFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
Expand Down Expand Up @@ -540,18 +541,13 @@ void PruningFunctionCloner::CloneBlock(
RemapInstruction(NewInst, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);

// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
// the basic block.
if (Value *V =
simplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
// On the off-chance that this simplifies to an instruction in the old
// function, map it back into the new function.
if (NewFunc != OldFunc)
if (Value *MappedV = VMap.lookup(V))
V = MappedV;

if (!NewInst->mayHaveSideEffects()) {
// Eagerly constant fold the newly cloned instruction. If successful, add
// a mapping to the new value. Non-constant operands may be incomplete at
// this stage, thus instruction simplification is performed after
// processing phi-nodes.
if (Value *V = ConstantFoldInstruction(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe worth explicitly noting that non-constant operands might not incomplete at this point, so we don't want to try to do simplifications based on them.

NewInst, BB->getModule()->getDataLayout())) {
if (isInstructionTriviallyDead(NewInst)) {
VMap[&*II] = V;
NewInst->eraseFromParent();
continue;
Expand Down Expand Up @@ -823,52 +819,34 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
}

// Make a second pass over the PHINodes now that all of them have been
// remapped into the new function, simplifying the PHINode and performing any
// recursive simplifications exposed. This will transparently update the
// WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
// two PHINodes, the iteration over the old PHIs remains valid, and the
// mapping will just map us to the new node (which may not even be a PHI
// node).
// As phi-nodes have been now remapped, allow incremental simplification of
// newly-cloned instructions.
const DataLayout &DL = NewFunc->getParent()->getDataLayout();
SmallSetVector<const Value *, 8> Worklist;
for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
Worklist.insert(PHIToResolve[Idx]);

// Note that we must test the size on each iteration, the worklist can grow.
for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
const Value *OrigV = Worklist[Idx];
auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
if (!I)
continue;

// Skip over non-intrinsic callsites, we don't want to remove any nodes from
// the CGSCC.
CallBase *CB = dyn_cast<CallBase>(I);
if (CB && CB->getCalledFunction() &&
!CB->getCalledFunction()->isIntrinsic())
continue;

// See if this instruction simplifies.
Value *SimpleV = simplifyInstruction(I, DL);
if (!SimpleV)
continue;

// Stash away all the uses of the old instruction so we can check them for
// recursive simplifications after a RAUW. This is cheaper than checking all
// uses of To on the recursive step in most cases.
for (const User *U : OrigV->users())
Worklist.insert(cast<Instruction>(U));

// Replace the instruction with its simplified value.
I->replaceAllUsesWith(SimpleV);

// If the original instruction had no side effects, remove it.
if (isInstructionTriviallyDead(I))
I->eraseFromParent();
else
VMap[OrigV] = I;
for (const auto &BB : *OldFunc) {
for (const auto &I : BB) {
auto *NewI = dyn_cast_or_null<Instruction>(VMap.lookup(&I));
if (!NewI)
continue;

// Skip over non-intrinsic callsites, we don't want to remove any nodes
// from the CGSCC.
CallBase *CB = dyn_cast<CallBase>(NewI);
if (CB && CB->getCalledFunction() &&
!CB->getCalledFunction()->isIntrinsic())
continue;

if (Value *V = simplifyInstruction(NewI, DL)) {
NewI->replaceAllUsesWith(V);

if (isInstructionTriviallyDead(NewI)) {
NewI->eraseFromParent();
} else {
// Did not erase it? Restore the new instruction into VMap previously
// dropped by `ValueIsRAUWd`.
VMap[&I] = NewI;
}
}
}
}

// Remap debug intrinsic operands now that all values have been mapped.
Expand Down
78 changes: 78 additions & 0 deletions llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -passes=inline -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s

%struct.a = type { i32, i32, i32, i32, i32 }

@g_var = global %struct.a { i32 1, i32 0, i32 0, i32 0, i32 0 }, align 8
@other_g_var = global %struct.a zeroinitializer, align 4

define void @callee(ptr noundef byval(%struct.a) align 8 %ptr) {
; CHECK-LABEL: define void @callee(
; CHECK-SAME: ptr noundef byval([[STRUCT_A:%.*]]) align 8 [[PTR:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 8
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[VAL]], 0
; CHECK-NEXT: br i1 [[DOTNOT]], label [[CHECK_POINTERS_ARE_EQUAL:%.*]], label [[STORE_PTR_IN_GVAR:%.*]]
; CHECK: store_ptr_in_gvar:
; CHECK-NEXT: store ptr [[PTR]], ptr @other_g_var, align 8
; CHECK-NEXT: br label [[CHECK_POINTERS_ARE_EQUAL]]
; CHECK: check_pointers_are_equal:
; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[PTR]], [[STORE_PTR_IN_GVAR]] ], [ @other_g_var, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[DOTNOT1:%.*]] = icmp eq ptr [[PHI]], [[PTR]]
; CHECK-NEXT: br i1 [[DOTNOT1]], label [[RETURN:%.*]], label [[ABORT:%.*]]
; CHECK: abort:
; CHECK-NEXT: call void @abort()
; CHECK-NEXT: unreachable
; CHECK: return:
; CHECK-NEXT: ret void
;
entry:
%val = load i32, ptr %ptr, align 8
%.not = icmp eq i32 %val, 0
br i1 %.not, label %check_pointers_are_equal, label %store_ptr_in_gvar

store_ptr_in_gvar: ; preds = %entry
store ptr %ptr, ptr @other_g_var, align 8
br label %check_pointers_are_equal

check_pointers_are_equal: ; preds = %store_ptr_in_gvar, %entry
%phi = phi ptr [ %ptr, %store_ptr_in_gvar ], [ @other_g_var, %entry ]
%.not1 = icmp eq ptr %phi, %ptr
br i1 %.not1, label %return, label %abort

abort: ; preds = %check_pointers_are_equal
call void @abort()
unreachable

return: ; preds = %check_pointers_are_equal
ret void
}

define i32 @main() {
; CHECK-LABEL: define i32 @main() {
; CHECK-NEXT: [[G_VAR:%.*]] = alloca [[STRUCT_A:%.*]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr [[G_VAR]])
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[G_VAR]], ptr align 1 @g_var, i64 20, i1 false)
; CHECK-NEXT: [[VAL_I:%.*]] = load i32, ptr [[G_VAR]], align 8
; CHECK-NEXT: [[DOTNOT_I:%.*]] = icmp eq i32 [[VAL_I]], 0
; CHECK-NEXT: br i1 [[DOTNOT_I]], label [[CHECK_POINTERS_ARE_EQUAL_I:%.*]], label [[STORE_PTR_IN_GVAR_I:%.*]]
; CHECK: store_ptr_in_gvar.i:
; CHECK-NEXT: store ptr [[G_VAR]], ptr @other_g_var, align 8
; CHECK-NEXT: br label [[CHECK_POINTERS_ARE_EQUAL_I]]
; CHECK: check_pointers_are_equal.i:
; CHECK-NEXT: [[PHI_I:%.*]] = phi ptr [ [[G_VAR]], [[STORE_PTR_IN_GVAR_I]] ], [ @other_g_var, [[TMP0:%.*]] ]
; CHECK-NEXT: [[DOTNOT1_I:%.*]] = icmp eq ptr [[PHI_I]], [[G_VAR]]
; CHECK-NEXT: br i1 [[DOTNOT1_I]], label [[CALLEE_EXIT:%.*]], label [[ABORT_I:%.*]]
; CHECK: abort.i:
; CHECK-NEXT: call void @abort()
; CHECK-NEXT: unreachable
; CHECK: callee.exit:
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr [[G_VAR]])
; CHECK-NEXT: ret i32 0
;
call void @callee(ptr noundef byval(%struct.a) align 8 @g_var)
ret i32 0
}

declare void @abort()
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ define void @caller() {
!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
attributes #0 = { alwaysinline }
; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i32 2000}
; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i32 1200}
; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12}
; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i32 800}
Expand Down
1 change: 0 additions & 1 deletion llvm/test/Transforms/Inline/prof-update-sample.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ define void @caller() {
!17 = !{!"branch_weights", i32 400}
!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i32 2000}
; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i32 1200}
; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12}
; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i32 800}
Expand Down