Skip to content

Commit 3b38332

Browse files
committed
Add new separate opcode, VPInstruction::WidePtrAdd
1 parent ecb3fa5 commit 3b38332

File tree

5 files changed

+38
-11
lines changed

5 files changed

+38
-11
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,13 @@ class VPBuilder {
254254
new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
255255
GEPNoWrapFlags::inBounds(), DL, Name));
256256
}
257+
VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset,
258+
DebugLoc DL = DebugLoc::getUnknown(),
259+
const Twine &Name = "") {
260+
return tryInsertInstruction(
261+
new VPInstruction(VPInstruction::WidePtrAdd, {Ptr, Offset},
262+
GEPNoWrapFlags::none(), DL, Name));
263+
}
257264

258265
VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues, DebugLoc DL,
259266
const Twine &Name = "") {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -958,11 +958,12 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
958958
ExtractPenultimateElement,
959959
LogicalAnd, // Non-poison propagating logical And.
960960
// Add an offset in bytes (second operand) to a base pointer (first
961-
// operand). The base pointer must be scalar, but the offset can be a
962-
// scalar, multiple scalars, or a vector. If the offset is multiple scalars
963-
// then it will generate multiple scalar values (either for the first lane
964-
// only or for all lanes, depending on its uses).
961+
// operand). Only generates scalar values (either for the first lane only or
962+
// for all lanes, depending on its uses).
965963
PtrAdd,
964+
// Add a vector offset in bytes (second operand) to a scalar base pointer
965+
// (first operand).
966+
WidePtrAdd,
966967
// Returns a scalar boolean value, which is true if any lane of its
967968
// (boolean) vector operands is true. It produces the reduced value across
968969
// all unrolled iterations. Unrolling will add all copies of its original
@@ -1000,7 +1001,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
10001001
/// values per all lanes, stemming from an original ingredient. This method
10011002
/// identifies the (rare) cases of VPInstructions that do so as well, w/o an
10021003
/// underlying ingredient.
1003-
bool doesGeneratePerAllLanes(VPTransformState &State) const;
1004+
bool doesGeneratePerAllLanes() const;
10041005

10051006
/// Returns true if we can generate a scalar for the first lane only if
10061007
/// needed.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
126126
return IntegerType::get(Ctx, 1);
127127
case VPInstruction::Broadcast:
128128
case VPInstruction::PtrAdd:
129+
case VPInstruction::WidePtrAdd:
129130
// Return the type based on first operand.
130131
return inferScalarType(R->getOperand(0));
131132
case VPInstruction::BranchOnCond:

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
475475
case VPInstruction::FirstOrderRecurrenceSplice:
476476
case VPInstruction::LogicalAnd:
477477
case VPInstruction::PtrAdd:
478+
case VPInstruction::WidePtrAdd:
478479
case VPInstruction::WideIVStep:
479480
return 2;
480481
case Instruction::Select:
@@ -494,9 +495,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
494495
}
495496
#endif
496497

497-
bool VPInstruction::doesGeneratePerAllLanes(VPTransformState &State) const {
498-
return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this) &&
499-
!State.hasVectorValue(getOperand(1));
498+
bool VPInstruction::doesGeneratePerAllLanes() const {
499+
return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
500500
}
501501

502502
bool VPInstruction::canGenerateScalarForFirstLane() const {
@@ -514,6 +514,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
514514
case VPInstruction::CalculateTripCountMinusVF:
515515
case VPInstruction::CanonicalIVIncrementForPart:
516516
case VPInstruction::PtrAdd:
517+
case VPInstruction::WidePtrAdd:
517518
case VPInstruction::ExplicitVectorLength:
518519
case VPInstruction::AnyOf:
519520
return true;
@@ -849,7 +850,14 @@ Value *VPInstruction::generate(VPTransformState &State) {
849850
return Builder.CreateLogicalAnd(A, B, Name);
850851
}
851852
case VPInstruction::PtrAdd: {
853+
assert(vputils::onlyFirstLaneUsed(this) &&
854+
"can only generate first lane for PtrAdd");
852855
Value *Ptr = State.get(getOperand(0), VPLane(0));
856+
Value *Addend = State.get(getOperand(1), VPLane(0));
857+
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
858+
}
859+
case VPInstruction::WidePtrAdd: {
860+
Value *Ptr = State.get(getOperand(0), true);
853861
Value *Addend = State.get(getOperand(1), vputils::onlyFirstLaneUsed(this));
854862
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
855863
}
@@ -910,6 +918,9 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
910918
}
911919
}
912920

921+
assert(!doesGeneratePerAllLanes() &&
922+
"Should only generate a vector value or single scalar, not scalars "
923+
"for all lanes.");
913924
return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
914925
}
915926

@@ -997,7 +1008,7 @@ void VPInstruction::execute(VPTransformState &State) {
9971008
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
9981009
(vputils::onlyFirstLaneUsed(this) ||
9991010
isVectorToScalar() || isSingleScalar());
1000-
bool GeneratesPerAllLanes = doesGeneratePerAllLanes(State);
1011+
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
10011012
if (GeneratesPerAllLanes) {
10021013
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
10031014
Lane != NumLanes; ++Lane) {
@@ -1041,6 +1052,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
10411052
case VPInstruction::LogicalAnd:
10421053
case VPInstruction::Not:
10431054
case VPInstruction::PtrAdd:
1055+
case VPInstruction::WidePtrAdd:
10441056
case VPInstruction::WideIVStep:
10451057
case VPInstruction::StepVector:
10461058
case VPInstruction::ReductionStartVector:
@@ -1078,6 +1090,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
10781090
case VPInstruction::ReductionStartVector:
10791091
return true;
10801092
case VPInstruction::PtrAdd:
1093+
case VPInstruction::WidePtrAdd:
10811094
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
10821095
case VPInstruction::ComputeAnyOfResult:
10831096
case VPInstruction::ComputeFindIVResult:
@@ -1181,6 +1194,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
11811194
case VPInstruction::PtrAdd:
11821195
O << "ptradd";
11831196
break;
1197+
case VPInstruction::WidePtrAdd:
1198+
O << "wide-ptradd";
1199+
break;
11841200
case VPInstruction::AnyOf:
11851201
O << "any-of";
11861202
break;
@@ -1765,7 +1781,8 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
17651781
return Opcode == Instruction::AShr;
17661782
case OperationType::GEPOp:
17671783
return Opcode == Instruction::GetElementPtr ||
1768-
Opcode == VPInstruction::PtrAdd;
1784+
Opcode == VPInstruction::PtrAdd ||
1785+
Opcode == VPInstruction::WidePtrAdd;
17691786
case OperationType::FPMathOp:
17701787
return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
17711788
Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,7 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
973973
RFlags.getGEPNoWrapFlags());
974974
}
975975
case VPInstruction::PtrAdd:
976+
case VPInstruction::WidePtrAdd:
976977
return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
977978
Ops[1],
978979
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
@@ -2767,7 +2768,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R,
27672768
{StartOffset,
27682769
Builder.createNaryOp(VPInstruction::StepVector, {}, PhiType)});
27692770

2770-
VPValue *PtrAdd = Builder.createPtrAdd(
2771+
VPValue *PtrAdd = Builder.createWidePtrAdd(
27712772
Phi,
27722773
Builder.createNaryOp(Instruction::Mul, {StartOffset, R->getStepValue()}),
27732774
DL, "vector.gep");

0 commit comments

Comments
 (0)