-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV] Form vredsum from explode_vector + scalar (left) reduce #67821
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 3 commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
f224bd2
[RISCV] Form vredsum from explode_vector + scalar (left) reduce
preames dc9a20c
Address review comments
preames 1345207
Second attempt at stablizing explodevector tests
preames ed57234
Second round of review fixes - including one I missed last time
preames 38a60f0
Code format fixes
preames File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11122,6 +11122,85 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, | |
} | ||
} | ||
|
||
/// Perform two related transforms whose purpose is to incrementally recognize | ||
/// an explode_vector followed by scalar reduction as a vector reduction node. | ||
/// This exists to recover from a deficiency in SLP which can't handle | ||
/// forests with multiple roots sharing common nodes. In some cases, one | ||
/// of the trees will be vectorized, and the other will remain (unprofitably) | ||
/// scalarized. | ||
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, | ||
const RISCVSubtarget &Subtarget) { | ||
|
||
// This transforms need to run before all integer types have been legalized | ||
// to i64 (so that the vector element type matches the add type), and while | ||
// it's safe to introduce odd sized vector types. | ||
if (DAG.NewNodesMustHaveLegalTypes) | ||
return SDValue(); | ||
|
||
const SDLoc DL(N); | ||
const EVT VT = N->getValueType(0); | ||
[[maybe_unused]] const unsigned Opc = N->getOpcode(); | ||
assert(Opc == ISD::ADD && "extend this to other reduction types"); | ||
const SDValue LHS = N->getOperand(0); | ||
const SDValue RHS = N->getOperand(1); | ||
|
||
if (!LHS.hasOneUse() || !RHS.hasOneUse()) | ||
return SDValue(); | ||
|
||
if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || | ||
!isa<ConstantSDNode>(RHS.getOperand(1))) | ||
return SDValue(); | ||
|
||
SDValue SrcVec = RHS.getOperand(0); | ||
EVT SrcVecVT = SrcVec.getValueType(); | ||
assert(SrcVecVT.getVectorElementType() == VT); | ||
if (SrcVecVT.isScalableVector()) | ||
return SDValue(); | ||
|
||
if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen()) | ||
return SDValue(); | ||
|
||
// match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to | ||
// reduce_op (extract_subvector [2 x VT] from V). This will form the | ||
// root of our reduction tree. TODO: We could extend this to any two | ||
// adjacent constant indices if desired. | ||
if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT && | ||
LHS.getOperand(0) == SrcVec && isNullConstant(LHS.getOperand(1)) && | ||
isOneConstant(RHS.getOperand(1))) { | ||
EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2); | ||
SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, | ||
DAG.getVectorIdxConstant(0, DL)); | ||
return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, Vec); | ||
} | ||
|
||
// Match (binop (reduce (extract_subvector V, 0), | ||
// (extract_vector_elt V, sizeof(SubVec)))) | ||
// into a reduction of one more element from the original vector V. | ||
if (LHS.getOpcode() != ISD::VECREDUCE_ADD) | ||
return SDValue(); | ||
|
||
SDValue ReduceVec = LHS.getOperand(0); | ||
if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && | ||
ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) && | ||
isNullConstant(ReduceVec.getOperand(1)) && | ||
isa<ConstantSDNode>(RHS.getOperand(1))) { | ||
uint64_t Idx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue(); | ||
if (ReduceVec.getValueType().getVectorNumElements() == Idx) { | ||
// For illegal types (e.g. 3xi32), most will be combined again into a | ||
// wider (hopefully legal) type. If this is a terminal state, we are | ||
// relying on type legalization here to poduce something reasonable | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. poduce -> produce |
||
// and this lowering quality could probably be improved. (TODO) | ||
EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, Idx+1); | ||
SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, | ||
DAG.getVectorIdxConstant(0, DL)); | ||
return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, Vec); | ||
} | ||
} | ||
|
||
return SDValue(); | ||
} | ||
|
||
|
||
// Try to fold (<bop> x, (reduction.<bop> vec, start)) | ||
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, | ||
const RISCVSubtarget &Subtarget) { | ||
|
@@ -11449,6 +11528,9 @@ static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, | |
return V; | ||
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) | ||
return V; | ||
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) | ||
return V; | ||
|
||
// fold (add (select lhs, rhs, cc, 0, y), x) -> | ||
// (select lhs, rhs, cc, x, (add x, y)) | ||
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RHS.getOperand(1) is known to be a ConstantSDNode from line 11151 right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this was addressed. I think this
isa<ConstantSDNode>(RHS.getOperand(1))
can be removed since it was already checked earlier.