Skip to content

Commit d2ddfec

Browse files
committed
VPlan: implement VPlan-level constant-folding
Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan, and use it in VPBuilder to constant-fold when all the underlying IR values passed into the API are constants.
1 parent d9804c7 commit d2ddfec

12 files changed

+350
-137
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7941,7 +7941,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
79417941
OrigLoop->getHeader()->getContext());
79427942
VPlanTransforms::materializeBroadcasts(BestVPlan);
79437943
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
7944-
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
7944+
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType(),
7945+
OrigLoop->getHeader()->getDataLayout());
79457946
VPlanTransforms::narrowInterleaveGroups(
79467947
BestVPlan, BestVF,
79477948
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -9232,7 +9233,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
92329233
if (!HasScalarVF)
92339234
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
92349235
*Plan, CM.getMinimalBitwidths());
9235-
VPlanTransforms::optimize(*Plan);
9236+
VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
92369237
// TODO: try to put it close to addActiveLaneMask().
92379238
// Discard the plan if it is not EVL-compatible
92389239
if (CM.foldTailWithEVL() && !HasScalarVF &&

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1548,7 +1548,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
15481548
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
15491549
VFRange SubRange = {VF, MaxVFTimes2};
15501550
if (auto Plan = tryToBuildVPlan(SubRange)) {
1551-
VPlanTransforms::optimize(*Plan);
1551+
VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
15521552
// Update the name of the latch of the top-level vector loop region region
15531553
// after optimizations which includes block folding.
15541554
Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
//===- VPlanConstantFolder.h - ConstantFolder for VPlan -------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANCONSTANTFOLDER_H
10+
#define LLVM_TRANSFORMS_VECTORIZE_VPLANCONSTANTFOLDER_H
11+
12+
#include "VPlan.h"
13+
#include "VPlanValue.h"
14+
#include "llvm/Analysis/TargetFolder.h"
15+
16+
namespace llvm {
17+
class VPConstantFolder {
18+
TargetFolder Folder;
19+
VPTypeAnalysis TypeInfo;
20+
21+
Constant *getIRConstant(VPValue *V) const {
22+
if (!V->isLiveIn())
23+
return nullptr;
24+
return dyn_cast_if_present<Constant>(V->getLiveInIRValue());
25+
}
26+
27+
Value *foldBinOp(Instruction::BinaryOps Opcode, VPValue *LHS,
28+
VPValue *RHS) const {
29+
auto *LC = getIRConstant(LHS);
30+
auto *RC = getIRConstant(RHS);
31+
if (LC && RC)
32+
return Folder.FoldBinOp(Opcode, LC, RC);
33+
return nullptr;
34+
}
35+
36+
Value *foldNot(VPValue *Op) const {
37+
auto *C = getIRConstant(Op);
38+
if (C)
39+
return Folder.FoldBinOp(Instruction::BinaryOps::Xor, C,
40+
Constant::getAllOnesValue(C->getType()));
41+
return nullptr;
42+
}
43+
44+
Value *foldLogicalAnd(VPValue *LHS, VPValue *RHS) const {
45+
auto *LC = getIRConstant(LHS);
46+
auto *RC = getIRConstant(RHS);
47+
if (LC && RC)
48+
return Folder.FoldSelect(LC, RC,
49+
ConstantInt::getNullValue(RC->getType()));
50+
return nullptr;
51+
}
52+
53+
Value *foldSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) const {
54+
auto *CC = getIRConstant(Cond);
55+
auto *TV = getIRConstant(TrueVal);
56+
auto *FV = getIRConstant(FalseVal);
57+
if (CC && TV && FV)
58+
return Folder.FoldSelect(CC, TV, FV);
59+
return nullptr;
60+
}
61+
62+
Value *foldCmp(CmpInst::Predicate Pred, VPValue *LHS, VPValue *RHS) const {
63+
auto *LC = getIRConstant(LHS);
64+
auto *RC = getIRConstant(RHS);
65+
if (LC && RC)
66+
return Folder.FoldCmp(Pred, LC, RC);
67+
return nullptr;
68+
}
69+
70+
Value *foldGEP(Type *Ty, VPValue *Base, ArrayRef<VPValue *> Offsets,
71+
GEPNoWrapFlags NW) const {
72+
auto *BC = getIRConstant(Base);
73+
if (!BC)
74+
return nullptr;
75+
SmallVector<Value *> IdxList;
76+
for (auto *O : Offsets) {
77+
if (auto *OffsetV = getIRConstant(O))
78+
IdxList.emplace_back(OffsetV);
79+
else
80+
return nullptr;
81+
}
82+
return Folder.FoldGEP(Ty, BC, IdxList, NW);
83+
}
84+
85+
Value *foldInsertElement(VPValue *Vec, VPValue *NewElt, VPValue *Idx) const {
86+
auto *VC = getIRConstant(Vec);
87+
auto *EC = getIRConstant(NewElt);
88+
auto *IC = getIRConstant(Idx);
89+
if (VC && EC && IC)
90+
Folder.FoldInsertElement(VC, EC, IC);
91+
return nullptr;
92+
}
93+
94+
Value *foldExtractElement(VPValue *Vec, VPValue *Idx) const {
95+
auto *VC = getIRConstant(Vec);
96+
auto *IC = getIRConstant(Idx);
97+
if (VC && IC)
98+
Folder.FoldExtractElement(VC, IC);
99+
return nullptr;
100+
}
101+
102+
Value *foldCast(Instruction::CastOps Opcode, VPValue *Op,
103+
Type *DestTy) const {
104+
auto *C = getIRConstant(Op);
105+
if (C)
106+
return Folder.FoldCast(Opcode, C, DestTy);
107+
return nullptr;
108+
}
109+
110+
public:
111+
VPConstantFolder(const DataLayout &DL, const VPTypeAnalysis &TypeInfo)
112+
: Folder(DL), TypeInfo(TypeInfo) {}
113+
114+
Value *tryToConstantFold(VPRecipeBase &R, unsigned Opcode,
115+
ArrayRef<VPValue *> Ops) {
116+
switch (Opcode) {
117+
case Instruction::BinaryOps::Add:
118+
case Instruction::BinaryOps::Sub:
119+
case Instruction::BinaryOps::Mul:
120+
case Instruction::BinaryOps::AShr:
121+
case Instruction::BinaryOps::LShr:
122+
case Instruction::BinaryOps::And:
123+
case Instruction::BinaryOps::Or:
124+
case Instruction::BinaryOps::Xor:
125+
return foldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
126+
Ops[1]);
127+
case VPInstruction::LogicalAnd:
128+
return foldLogicalAnd(Ops[0], Ops[1]);
129+
case VPInstruction::Not:
130+
return foldNot(Ops[0]);
131+
case Instruction::Select:
132+
return foldSelect(Ops[0], Ops[1], Ops[2]);
133+
case Instruction::ICmp:
134+
case Instruction::FCmp:
135+
return foldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
136+
Ops[1]);
137+
case Instruction::GetElementPtr:
138+
case VPInstruction::PtrAdd:
139+
return foldGEP(TypeInfo.inferScalarType(R.getVPSingleValue()), Ops[0],
140+
Ops.drop_front(),
141+
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
142+
case Instruction::InsertElement:
143+
return foldInsertElement(Ops[0], Ops[1], Ops[2]);
144+
case Instruction::ExtractElement:
145+
return foldExtractElement(Ops[0], Ops[1]);
146+
case Instruction::CastOps::SExt:
147+
case Instruction::CastOps::ZExt:
148+
case Instruction::CastOps::Trunc:
149+
return foldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
150+
TypeInfo.inferScalarType(R.getVPSingleValue()));
151+
}
152+
return nullptr;
153+
}
154+
};
155+
} // namespace llvm
156+
157+
#endif

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "VPlan.h"
1717
#include "VPlanAnalysis.h"
1818
#include "VPlanCFG.h"
19+
#include "VPlanConstantFolder.h"
1920
#include "VPlanDominatorTree.h"
2021
#include "VPlanHelpers.h"
2122
#include "VPlanPatternMatch.h"
@@ -931,9 +932,21 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
931932
}
932933

933934
/// Try to simplify recipe \p R.
934-
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
935+
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
936+
const DataLayout &DL) {
935937
using namespace llvm::VPlanPatternMatch;
936938

939+
// Constant folding.
940+
VPConstantFolder Folder(DL, TypeInfo);
941+
TypeSwitch<VPRecipeBase *, void>(&R)
942+
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe>(
943+
[&](auto *I) {
944+
VPlan *Plan = R.getParent()->getPlan();
945+
ArrayRef<VPValue *> Ops(I->op_begin(), I->op_end());
946+
if (Value *V = Folder.tryToConstantFold(R, I->getOpcode(), Ops))
947+
R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
948+
});
949+
937950
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
938951
// part 0 can be replaced by their start value, if only the first lane is
939952
// demanded.
@@ -1049,13 +1062,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10491062
}
10501063
}
10511064

1052-
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
1065+
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
1066+
const DataLayout &DL) {
10531067
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
10541068
Plan.getEntry());
10551069
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
10561070
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
10571071
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1058-
simplifyRecipe(R, TypeInfo);
1072+
simplifyRecipe(R, TypeInfo, DL);
10591073
}
10601074
}
10611075
}
@@ -1306,7 +1320,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
13061320

13071321
VPBlockUtils::connectBlocks(Preheader, Header);
13081322
VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
1309-
VPlanTransforms::simplifyRecipes(Plan, *CanIVTy);
1323+
VPlanTransforms::simplifyRecipes(Plan, *CanIVTy,
1324+
PSE.getSE()->getDataLayout());
13101325
} else {
13111326
// The vector region contains header phis for which we cannot remove the
13121327
// loop region yet.
@@ -1774,17 +1789,16 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
17741789
VPBB->back().eraseFromParent();
17751790
}
17761791
}
1777-
1778-
void VPlanTransforms::optimize(VPlan &Plan) {
1792+
void VPlanTransforms::optimize(VPlan &Plan, const DataLayout &DL) {
17791793
runPass(removeRedundantCanonicalIVs, Plan);
17801794
runPass(removeRedundantInductionCasts, Plan);
17811795

1782-
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
1796+
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
17831797
runPass(simplifyBlends, Plan);
17841798
runPass(removeDeadRecipes, Plan);
17851799
runPass(legalizeAndOptimizeInductions, Plan);
17861800
runPass(removeRedundantExpandSCEVRecipes, Plan);
1787-
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
1801+
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
17881802
runPass(removeBranchOnCondTrue, Plan);
17891803
runPass(removeDeadRecipes, Plan);
17901804

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ struct VPlanTransforms {
105105
/// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
106106
/// optimizations, dead recipe removal, replicate region optimizations and
107107
/// block merging.
108-
static void optimize(VPlan &Plan);
108+
static void optimize(VPlan &Plan, const DataLayout &DL);
109109

110110
/// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
111111
/// region block and remove the mask operand. Optimize the created regions by
@@ -187,7 +187,8 @@ struct VPlanTransforms {
187187

188188
/// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
189189
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
190-
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
190+
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
191+
const DataLayout &DL);
191192

192193
/// If there's a single exit block, optimize its phi recipes that use exiting
193194
/// IV values by feeding them precomputed end values instead, possibly taken

0 commit comments

Comments
 (0)