@@ -9396,29 +9396,28 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9396
9396
// reduce.add(mul(ext, ext)) can folded into VPMulAccRecipe
9397
9397
if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B))) &&
9398
9398
!VecOp->hasMoreThanOneUniqueUser()) {
9399
- VPRecipeBase *RecipeA = A->getDefiningRecipe();
9400
- VPRecipeBase *RecipeB = B->getDefiningRecipe();
9399
+ VPWidenCastRecipe *RecipeA =
9400
+ dyn_cast_if_present<VPWidenCastRecipe>(A->getDefiningRecipe());
9401
+ VPWidenCastRecipe *RecipeB =
9402
+ dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
9401
9403
if (RecipeA && RecipeB && match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
9402
9404
match(RecipeB, m_ZExtOrSExt(m_VPValue())) &&
9403
- cast<VPWidenCastRecipe>(RecipeA)->getOpcode() ==
9404
- cast<VPWidenCastRecipe>(RecipeB)->getOpcode() &&
9405
- !A->hasMoreThanOneUniqueUser() &&
9406
- !B->hasMoreThanOneUniqueUser()) {
9405
+ (RecipeA->getOpcode() == RecipeB->getOpcode() || A == B)) {
9407
9406
return new VPMulAccRecipe(
9408
9407
RdxDesc, CurrentLinkI, PreviousLink, CondOp,
9409
9408
CM.useOrderedReductions(RdxDesc),
9410
- cast<VPWidenRecipe>(VecOp->getDefiningRecipe()),
9411
- cast<VPWidenCastRecipe>(RecipeA),
9412
- cast<VPWidenCastRecipe>(RecipeB));
9409
+ cast<VPWidenRecipe>(VecOp->getDefiningRecipe()), RecipeA,
9410
+ RecipeB);
9413
9411
} else {
9414
9412
// Matched reduce.add(mul(...))
9415
9413
return new VPMulAccRecipe(
9416
9414
RdxDesc, CurrentLinkI, PreviousLink, CondOp,
9417
9415
CM.useOrderedReductions(RdxDesc),
9418
9416
cast<VPWidenRecipe>(VecOp->getDefiningRecipe()));
9419
9417
}
9420
- // Matched reduce.add(ext(mul(ext, ext)))
9421
- // Note that 3 extend instructions must have same opcode.
9418
+ // Matched reduce.add(ext(mul(ext(A), ext(B))))
9419
+ // Note that 3 extend instructions must have same opcode or A == B
9420
+ // which can be transform to reduce.add(zext(mul(sext(A), sext(B)))).
9422
9421
} else if (match(VecOp,
9423
9422
m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()),
9424
9423
m_ZExtOrSExt(m_VPValue())))) &&
@@ -9431,11 +9430,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9431
9430
cast<VPWidenCastRecipe>(Mul->getOperand(0)->getDefiningRecipe());
9432
9431
VPWidenCastRecipe *Ext1 =
9433
9432
cast<VPWidenCastRecipe>(Mul->getOperand(1)->getDefiningRecipe());
9434
- if (Ext->getOpcode() == Ext0->getOpcode() &&
9435
- Ext0->getOpcode() == Ext1->getOpcode() &&
9436
- !Mul->hasMoreThanOneUniqueUser() &&
9437
- !Ext0->hasMoreThanOneUniqueUser() &&
9438
- !Ext1->hasMoreThanOneUniqueUser()) {
9433
+ if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
9434
+ Ext0->getOpcode() == Ext1->getOpcode()) {
9439
9435
return new VPMulAccRecipe(
9440
9436
RdxDesc, CurrentLinkI, PreviousLink, CondOp,
9441
9437
CM.useOrderedReductions(RdxDesc),
@@ -9447,8 +9443,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9447
9443
};
9448
9444
auto TryToMatchExtendedReduction = [&]() -> VPSingleDefRecipe * {
9449
9445
VPValue *A;
9450
- if (match(VecOp, m_ZExtOrSExt(m_VPValue(A))) &&
9451
- !VecOp->hasMoreThanOneUniqueUser()) {
9446
+ if (match(VecOp, m_ZExtOrSExt(m_VPValue(A)))) {
9452
9447
return new VPExtendedReductionRecipe(
9453
9448
RdxDesc, CurrentLinkI, PreviousLink,
9454
9449
cast<VPWidenCastRecipe>(VecOp), CondOp,
0 commit comments