-
Notifications
You must be signed in to change notification settings - Fork 15k
[VPlan] Extract reverse operation for reverse accesses #146525
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b168223
bf61735
c41cc55
d9512b4
7c6c05f
7fefeda
0a697d8
94a8c78
cd66fd9
20b126a
6fdd4c9
ab0eef5
af401c2
f47286e
3eba2a6
5c7bc47
ef2d027
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -506,6 +506,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { | |||||||||||||||||
case VPInstruction::ExtractPenultimateElement: | ||||||||||||||||||
case VPInstruction::FirstActiveLane: | ||||||||||||||||||
case VPInstruction::Not: | ||||||||||||||||||
case VPInstruction::Reverse: | ||||||||||||||||||
return 1; | ||||||||||||||||||
case Instruction::ICmp: | ||||||||||||||||||
case Instruction::FCmp: | ||||||||||||||||||
|
@@ -983,6 +984,8 @@ Value *VPInstruction::generate(VPTransformState &State) { | |||||||||||||||||
} | ||||||||||||||||||
case VPInstruction::ResumeForEpilogue: | ||||||||||||||||||
return State.get(getOperand(0), true); | ||||||||||||||||||
case VPInstruction::Reverse: | ||||||||||||||||||
return Builder.CreateVectorReverse(State.get(getOperand(0)), "reverse"); | ||||||||||||||||||
lukel97 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||
default: | ||||||||||||||||||
llvm_unreachable("Unsupported opcode for instruction"); | ||||||||||||||||||
} | ||||||||||||||||||
|
@@ -1147,6 +1150,13 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, | |||||||||||||||||
I32Ty, {Arg0Ty, I32Ty, I1Ty}); | ||||||||||||||||||
return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind); | ||||||||||||||||||
} | ||||||||||||||||||
case VPInstruction::Reverse: { | ||||||||||||||||||
assert(VF.isVector() && "Reverse operation must be vector type"); | ||||||||||||||||||
Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); | ||||||||||||||||||
return Ctx.TTI.getShuffleCost( | ||||||||||||||||||
TargetTransformInfo::SK_Reverse, cast<VectorType>(VectorTy), | ||||||||||||||||||
cast<VectorType>(VectorTy), {}, Ctx.CostKind, 0); | ||||||||||||||||||
Comment on lines
+1155
to
+1158
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Could you also add `/Arg=/ to the arguments passing {} and 0? |
||||||||||||||||||
} | ||||||||||||||||||
case VPInstruction::ExtractLastElement: { | ||||||||||||||||||
// Add on the cost of extracting the element. | ||||||||||||||||||
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); | ||||||||||||||||||
|
@@ -1251,6 +1261,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { | |||||||||||||||||
case VPInstruction::WidePtrAdd: | ||||||||||||||||||
case VPInstruction::StepVector: | ||||||||||||||||||
case VPInstruction::ReductionStartVector: | ||||||||||||||||||
case VPInstruction::Reverse: | ||||||||||||||||||
case VPInstruction::VScale: | ||||||||||||||||||
return false; | ||||||||||||||||||
default: | ||||||||||||||||||
|
@@ -1414,6 +1425,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |||||||||||||||||
case VPInstruction::ResumeForEpilogue: | ||||||||||||||||||
O << "resume-for-epilogue"; | ||||||||||||||||||
break; | ||||||||||||||||||
case VPInstruction::Reverse: | ||||||||||||||||||
O << "reverse"; | ||||||||||||||||||
break; | ||||||||||||||||||
default: | ||||||||||||||||||
O << Instruction::getOpcodeName(getOpcode()); | ||||||||||||||||||
} | ||||||||||||||||||
|
@@ -2281,21 +2295,37 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF, | |||||||||||||||||
return TTI::CastContextHint::Normal; | ||||||||||||||||||
}; | ||||||||||||||||||
|
||||||||||||||||||
using namespace llvm::VPlanPatternMatch; | ||||||||||||||||||
VPValue *Operand = getOperand(0); | ||||||||||||||||||
TTI::CastContextHint CCH = TTI::CastContextHint::None; | ||||||||||||||||||
// For Trunc/FPTrunc, get the context from the only user. | ||||||||||||||||||
if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) && | ||||||||||||||||||
!hasMoreThanOneUniqueUser() && getNumUsers() > 0) { | ||||||||||||||||||
if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin())) | ||||||||||||||||||
CCH = ComputeCCH(StoreRecipe); | ||||||||||||||||||
if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) { | ||||||||||||||||||
auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * { | ||||||||||||||||||
if (R->getNumUsers() == 0 || R->hasMoreThanOneUniqueUser()) | ||||||||||||||||||
return nullptr; | ||||||||||||||||||
return dyn_cast<VPRecipeBase>(*R->user_begin()); | ||||||||||||||||||
}; | ||||||||||||||||||
|
||||||||||||||||||
if (VPRecipeBase *Recipe = GetOnlyUser(this)) { | ||||||||||||||||||
if (match(Recipe, m_VPInstruction<VPInstruction::Reverse>(m_VPValue()))) | ||||||||||||||||||
Recipe = GetOnlyUser(cast<VPInstruction>(Recipe)); | ||||||||||||||||||
if (Recipe) | ||||||||||||||||||
CCH = ComputeCCH(Recipe); | ||||||||||||||||||
Comment on lines
+2309
to
+2313
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, if we have a shuffle inbetween the load and a cast for example, can the cast still be folded into the load in most cases? Curious if this may have surfaced an in-accuracy of the current cost modeling. |
||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
// For Z/Sext, get the context from the operand. | ||||||||||||||||||
else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt || | ||||||||||||||||||
Opcode == Instruction::FPExt) { | ||||||||||||||||||
Comment on lines
2317
to
2318
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure I understand why the code is guarded by Trunc, FPTrunc, FPExt, ZExt, or SExt opcodes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code is used to determine TTI::CastContextHint. Perhaps for other cast opcodes, there isn’t currently a situation that requires this hint. But I think that’s a separate issue. The change here is just to ensure that CastContextHint::Reversed is still correctly propagated after the reverse operation is separated out. |
||||||||||||||||||
if (Operand->isLiveIn()) | ||||||||||||||||||
CCH = TTI::CastContextHint::Normal; | ||||||||||||||||||
else if (Operand->getDefiningRecipe()) | ||||||||||||||||||
CCH = ComputeCCH(Operand->getDefiningRecipe()); | ||||||||||||||||||
else if (auto *Recipe = Operand->getDefiningRecipe()) { | ||||||||||||||||||
VPValue *ReverseOp; | ||||||||||||||||||
if (match(Recipe, | ||||||||||||||||||
m_VPInstruction<VPInstruction::Reverse>(m_VPValue(ReverseOp)))) | ||||||||||||||||||
Recipe = ReverseOp->getDefiningRecipe(); | ||||||||||||||||||
if (Recipe) | ||||||||||||||||||
CCH = ComputeCCH(Recipe); | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
auto *SrcTy = | ||||||||||||||||||
|
@@ -3371,12 +3401,7 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, | |||||||||||||||||
Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind, | ||||||||||||||||||
OpInfo, &Ingredient); | ||||||||||||||||||
} | ||||||||||||||||||
if (!Reverse) | ||||||||||||||||||
return Cost; | ||||||||||||||||||
|
||||||||||||||||||
return Cost += Ctx.TTI.getShuffleCost( | ||||||||||||||||||
TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty), | ||||||||||||||||||
cast<VectorType>(Ty), {}, Ctx.CostKind, 0); | ||||||||||||||||||
return Cost; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
void VPWidenLoadRecipe::execute(VPTransformState &State) { | ||||||||||||||||||
|
@@ -3408,8 +3433,6 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) { | |||||||||||||||||
NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load"); | ||||||||||||||||||
} | ||||||||||||||||||
applyMetadata(*cast<Instruction>(NewLI)); | ||||||||||||||||||
if (Reverse) | ||||||||||||||||||
NewLI = Builder.CreateVectorReverse(NewLI, "reverse"); | ||||||||||||||||||
State.set(this, NewLI); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -3465,8 +3488,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { | |||||||||||||||||
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment)); | ||||||||||||||||||
applyMetadata(*NewLI); | ||||||||||||||||||
Instruction *Res = NewLI; | ||||||||||||||||||
if (isReverse()) | ||||||||||||||||||
Res = createReverseEVL(Builder, Res, EVL, "vp.reverse"); | ||||||||||||||||||
State.set(this, Res); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -3485,12 +3506,8 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, | |||||||||||||||||
unsigned AS = getLoadStoreAddressSpace(&Ingredient); | ||||||||||||||||||
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( | ||||||||||||||||||
Instruction::Load, Ty, Alignment, AS, Ctx.CostKind); | ||||||||||||||||||
if (!Reverse) | ||||||||||||||||||
return Cost; | ||||||||||||||||||
|
||||||||||||||||||
return Cost + Ctx.TTI.getShuffleCost( | ||||||||||||||||||
TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty), | ||||||||||||||||||
cast<VectorType>(Ty), {}, Ctx.CostKind, 0); | ||||||||||||||||||
return Cost; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||||||||||||||||||
|
@@ -3520,13 +3537,6 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) { | |||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
Value *StoredVal = State.get(StoredVPValue); | ||||||||||||||||||
if (isReverse()) { | ||||||||||||||||||
// If we store to reverse consecutive memory locations, then we need | ||||||||||||||||||
// to reverse the order of elements in the stored value. | ||||||||||||||||||
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); | ||||||||||||||||||
// We don't want to update the value in the map as it might be used in | ||||||||||||||||||
// another expression. So don't call resetVectorValue(StoredVal). | ||||||||||||||||||
} | ||||||||||||||||||
Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter); | ||||||||||||||||||
Instruction *NewSI = nullptr; | ||||||||||||||||||
if (CreateScatter) | ||||||||||||||||||
|
@@ -3556,8 +3566,6 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { | |||||||||||||||||
CallInst *NewSI = nullptr; | ||||||||||||||||||
Value *StoredVal = State.get(StoredValue); | ||||||||||||||||||
Value *EVL = State.get(getEVL(), VPLane(0)); | ||||||||||||||||||
if (isReverse()) | ||||||||||||||||||
StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse"); | ||||||||||||||||||
Value *Mask = nullptr; | ||||||||||||||||||
if (VPValue *VPMask = getMask()) { | ||||||||||||||||||
Mask = State.get(VPMask); | ||||||||||||||||||
|
@@ -3596,12 +3604,8 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, | |||||||||||||||||
unsigned AS = getLoadStoreAddressSpace(&Ingredient); | ||||||||||||||||||
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( | ||||||||||||||||||
Instruction::Store, Ty, Alignment, AS, Ctx.CostKind); | ||||||||||||||||||
if (!Reverse) | ||||||||||||||||||
return Cost; | ||||||||||||||||||
|
||||||||||||||||||
return Cost + Ctx.TTI.getShuffleCost( | ||||||||||||||||||
TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty), | ||||||||||||||||||
cast<VectorType>(Ty), {}, Ctx.CostKind, 0); | ||||||||||||||||||
return Cost; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||||||||||||||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -2482,6 +2482,29 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, | |||||
.Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) { | ||||||
VPValue *NewMask = GetNewMask(S->getMask()); | ||||||
VPValue *NewAddr = GetNewAddr(S->getAddr()); | ||||||
// Convert general reverse operations on stored value into vp.reverse, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
// when the VPVectorEndPointerRecipe adjusting the access address uses | ||||||
// EVL instead of VF. | ||||||
if (match(NewAddr, m_VectorEndPointer(m_VPValue(), m_Specific(&EVL)))) { | ||||||
VPValue *StoredVal = S->getStoredValue(); | ||||||
// Skip if the stored value is not defined in the loop region. | ||||||
if (!StoredVal->isDefinedOutsideLoopRegions()) { | ||||||
Comment on lines
+2490
to
+2491
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, is this correct even if the value outside the region is a vector other than a broadcast? |
||||||
VPValue *ReversedVal; | ||||||
bool IsReverse = | ||||||
match(StoredVal, m_VPInstruction<VPInstruction::Reverse>( | ||||||
m_VPValue(ReversedVal))); | ||||||
assert(IsReverse && "The stored value of reverse store must be " | ||||||
"defined by a reverse operation"); | ||||||
auto *Reverse = cast<VPInstruction>(StoredVal); | ||||||
auto *NewReverse = new VPWidenIntrinsicRecipe( | ||||||
Intrinsic::experimental_vp_reverse, | ||||||
{ReversedVal, &AllOneMask, &EVL}, | ||||||
TypeInfo.inferScalarType(Reverse), Reverse->getDebugLoc()); | ||||||
NewReverse->insertBefore(Reverse); | ||||||
return new VPWidenStoreEVLRecipe(*S, NewAddr, NewReverse, EVL, | ||||||
NewMask); | ||||||
} | ||||||
} | ||||||
return new VPWidenStoreEVLRecipe(*S, NewAddr, EVL, NewMask); | ||||||
}) | ||||||
.Case<VPInterleaveRecipe>([&](VPInterleaveRecipe *IR) { | ||||||
|
@@ -2623,6 +2646,34 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { | |||||
} | ||||||
} | ||||||
ToErase.push_back(CurRecipe); | ||||||
|
||||||
// Convert general reverse operations on loaded result into vp.reverse, when | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
// the VPVectorEndPointerRecipe adjusting the access address uses EVL | ||||||
// instead of VF. | ||||||
if (auto *LoadR = dyn_cast<VPWidenLoadEVLRecipe>(EVLRecipe)) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason we handle the load/store cases separately, instead of just converting all reverse operations? Could we mis-compile in the future if some other transform decides to create new reverse operations? |
||||||
if (!match(LoadR->getAddr(), | ||||||
m_VectorEndPointer(m_VPValue(), m_Specific(&EVL)))) | ||||||
continue; | ||||||
assert(LoadR->isReverse() && | ||||||
"Only reverse access uses VPVectorEndPointerRecipe as address"); | ||||||
// TODO: Extend conversion along the use-def chain, as reverse operations | ||||||
// may be eliminated or sunk in the future. | ||||||
assert(LoadR->getNumUsers() == 1 && | ||||||
"Unexpected user number of reverse load"); | ||||||
auto *UserR = cast<VPRecipeBase>(*LoadR->user_begin()); | ||||||
VPValue *ReversedVal; | ||||||
bool IsReverse = match(UserR, m_VPInstruction<VPInstruction::Reverse>( | ||||||
m_VPValue(ReversedVal))); | ||||||
assert(IsReverse && "The defined value of reverse load must be used by a " | ||||||
"reverse operation"); | ||||||
auto *Reverse = cast<VPInstruction>(UserR); | ||||||
auto *NewReverse = new VPWidenIntrinsicRecipe( | ||||||
Intrinsic::experimental_vp_reverse, {ReversedVal, AllOneMask, &EVL}, | ||||||
TypeInfo.inferScalarType(Reverse), Reverse->getDebugLoc()); | ||||||
NewReverse->insertBefore(Reverse); | ||||||
Reverse->replaceAllUsesWith(NewReverse); | ||||||
ToErase.push_back(Reverse); | ||||||
} | ||||||
} | ||||||
// Remove dead EVL mask. | ||||||
if (EVLMask->getNumUsers() == 0) | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now that the load/store doesn't handle reversing, it should not need the flag to indicate it is reversing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a similar discussion earlier: #146525 (comment)
I think it would be good to continue the discussion in the same comment thread.