Skip to content

Commit 709a102

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:b195bb87e1a0 into amd-gfx:3af18d2c87d7
Local branch amd-gfx 3af18d2 Merged main:1d5154663509 into amd-gfx:ffd73010b9b9 Remote branch main b195bb8 [VectorCombine] scalarizeLoadExtract - consistently use LoadInst and ExtractElementInst specific operand getters. NFC
2 parents 3af18d2 + b195bb8 commit 709a102

File tree

4 files changed

+152
-151
lines changed

4 files changed

+152
-151
lines changed

clang/lib/CodeGen/Targets/AArch64.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
662662

663663
bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const {
664664
if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) {
665-
const auto *BT = Ty->getAs<BuiltinType>();
665+
const auto *BT = Ty->castAs<BuiltinType>();
666666
return !BT->isSVECount() &&
667667
getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1;
668668
}

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 522544
19+
#define LLVM_MAIN_REVISION 522547
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 145 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,151 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
662662
}
663663
}
664664

665+
/// Try to simplify recipe \p R.
666+
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
667+
using namespace llvm::VPlanPatternMatch;
668+
669+
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
670+
// Try to remove redundant blend recipes.
671+
SmallPtrSet<VPValue *, 4> UniqueValues;
672+
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
673+
UniqueValues.insert(Blend->getIncomingValue(0));
674+
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
675+
if (!match(Blend->getMask(I), m_False()))
676+
UniqueValues.insert(Blend->getIncomingValue(I));
677+
678+
if (UniqueValues.size() == 1) {
679+
Blend->replaceAllUsesWith(*UniqueValues.begin());
680+
Blend->eraseFromParent();
681+
return;
682+
}
683+
684+
if (Blend->isNormalized())
685+
return;
686+
687+
// Normalize the blend so its first incoming value is used as the initial
688+
// value with the others blended into it.
689+
690+
unsigned StartIndex = 0;
691+
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
692+
// If a value's mask is used only by the blend then is can be deadcoded.
693+
// TODO: Find the most expensive mask that can be deadcoded, or a mask
694+
// that's used by multiple blends where it can be removed from them all.
695+
VPValue *Mask = Blend->getMask(I);
696+
if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
697+
StartIndex = I;
698+
break;
699+
}
700+
}
701+
702+
SmallVector<VPValue *, 4> OperandsWithMask;
703+
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
704+
705+
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
706+
if (I == StartIndex)
707+
continue;
708+
OperandsWithMask.push_back(Blend->getIncomingValue(I));
709+
OperandsWithMask.push_back(Blend->getMask(I));
710+
}
711+
712+
auto *NewBlend = new VPBlendRecipe(
713+
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
714+
NewBlend->insertBefore(&R);
715+
716+
VPValue *DeadMask = Blend->getMask(StartIndex);
717+
Blend->replaceAllUsesWith(NewBlend);
718+
Blend->eraseFromParent();
719+
recursivelyDeleteDeadRecipes(DeadMask);
720+
return;
721+
}
722+
723+
VPValue *A;
724+
if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
725+
VPValue *Trunc = R.getVPSingleValue();
726+
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
727+
Type *ATy = TypeInfo.inferScalarType(A);
728+
if (TruncTy == ATy) {
729+
Trunc->replaceAllUsesWith(A);
730+
} else {
731+
// Don't replace a scalarizing recipe with a widened cast.
732+
if (isa<VPReplicateRecipe>(&R))
733+
return;
734+
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
735+
736+
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
737+
? Instruction::SExt
738+
: Instruction::ZExt;
739+
auto *VPC =
740+
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
741+
if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
742+
// UnderlyingExt has distinct return type, used to retain legacy cost.
743+
VPC->setUnderlyingValue(UnderlyingExt);
744+
}
745+
VPC->insertBefore(&R);
746+
Trunc->replaceAllUsesWith(VPC);
747+
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
748+
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
749+
VPC->insertBefore(&R);
750+
Trunc->replaceAllUsesWith(VPC);
751+
}
752+
}
753+
#ifndef NDEBUG
754+
// Verify that the cached type info is for both A and its users is still
755+
// accurate by comparing it to freshly computed types.
756+
VPTypeAnalysis TypeInfo2(
757+
R.getParent()->getPlan()->getCanonicalIV()->getScalarType());
758+
assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
759+
for (VPUser *U : A->users()) {
760+
auto *R = cast<VPRecipeBase>(U);
761+
for (VPValue *VPV : R->definedValues())
762+
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
763+
}
764+
#endif
765+
}
766+
767+
// Simplify (X && Y) || (X && !Y) -> X.
768+
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
769+
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
770+
// recipes to be visited during simplification.
771+
VPValue *X, *Y, *X1, *Y1;
772+
if (match(&R,
773+
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
774+
m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
775+
X == X1 && Y == Y1) {
776+
R.getVPSingleValue()->replaceAllUsesWith(X);
777+
R.eraseFromParent();
778+
return;
779+
}
780+
781+
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
782+
return R.getVPSingleValue()->replaceAllUsesWith(A);
783+
784+
if (match(&R, m_Not(m_Not(m_VPValue(A)))))
785+
return R.getVPSingleValue()->replaceAllUsesWith(A);
786+
787+
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
788+
if ((match(&R,
789+
m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) ||
790+
match(&R,
791+
m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&
792+
TypeInfo.inferScalarType(R.getOperand(1)) ==
793+
TypeInfo.inferScalarType(R.getVPSingleValue()))
794+
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
795+
}
796+
797+
/// Try to simplify the recipes in \p Plan
798+
static void simplifyRecipes(VPlan &Plan) {
799+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
800+
Plan.getEntry());
801+
Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
802+
VPTypeAnalysis TypeInfo(CanonicalIVType);
803+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
804+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
805+
simplifyRecipe(R, TypeInfo);
806+
}
807+
}
808+
}
809+
665810
void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
666811
unsigned BestUF,
667812
PredicatedScalarEvolution &PSE) {
@@ -942,138 +1087,6 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
9421087
}
9431088
}
9441089

945-
/// Try to simplify recipe \p R.
946-
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
947-
using namespace llvm::VPlanPatternMatch;
948-
949-
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
950-
// Try to remove redundant blend recipes.
951-
SmallPtrSet<VPValue *, 4> UniqueValues;
952-
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
953-
UniqueValues.insert(Blend->getIncomingValue(0));
954-
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
955-
if (!match(Blend->getMask(I), m_False()))
956-
UniqueValues.insert(Blend->getIncomingValue(I));
957-
958-
if (UniqueValues.size() == 1) {
959-
Blend->replaceAllUsesWith(*UniqueValues.begin());
960-
Blend->eraseFromParent();
961-
return;
962-
}
963-
964-
if (Blend->isNormalized())
965-
return;
966-
967-
// Normalize the blend so its first incoming value is used as the initial
968-
// value with the others blended into it.
969-
970-
unsigned StartIndex = 0;
971-
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
972-
// If a value's mask is used only by the blend then is can be deadcoded.
973-
// TODO: Find the most expensive mask that can be deadcoded, or a mask
974-
// that's used by multiple blends where it can be removed from them all.
975-
VPValue *Mask = Blend->getMask(I);
976-
if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
977-
StartIndex = I;
978-
break;
979-
}
980-
}
981-
982-
SmallVector<VPValue *, 4> OperandsWithMask;
983-
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
984-
985-
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
986-
if (I == StartIndex)
987-
continue;
988-
OperandsWithMask.push_back(Blend->getIncomingValue(I));
989-
OperandsWithMask.push_back(Blend->getMask(I));
990-
}
991-
992-
auto *NewBlend = new VPBlendRecipe(
993-
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
994-
NewBlend->insertBefore(&R);
995-
996-
VPValue *DeadMask = Blend->getMask(StartIndex);
997-
Blend->replaceAllUsesWith(NewBlend);
998-
Blend->eraseFromParent();
999-
recursivelyDeleteDeadRecipes(DeadMask);
1000-
return;
1001-
}
1002-
1003-
VPValue *A;
1004-
if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
1005-
VPValue *Trunc = R.getVPSingleValue();
1006-
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
1007-
Type *ATy = TypeInfo.inferScalarType(A);
1008-
if (TruncTy == ATy) {
1009-
Trunc->replaceAllUsesWith(A);
1010-
} else {
1011-
// Don't replace a scalarizing recipe with a widened cast.
1012-
if (isa<VPReplicateRecipe>(&R))
1013-
return;
1014-
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
1015-
1016-
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
1017-
? Instruction::SExt
1018-
: Instruction::ZExt;
1019-
auto *VPC =
1020-
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
1021-
if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
1022-
// UnderlyingExt has distinct return type, used to retain legacy cost.
1023-
VPC->setUnderlyingValue(UnderlyingExt);
1024-
}
1025-
VPC->insertBefore(&R);
1026-
Trunc->replaceAllUsesWith(VPC);
1027-
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
1028-
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
1029-
VPC->insertBefore(&R);
1030-
Trunc->replaceAllUsesWith(VPC);
1031-
}
1032-
}
1033-
#ifndef NDEBUG
1034-
// Verify that the cached type info is for both A and its users is still
1035-
// accurate by comparing it to freshly computed types.
1036-
VPTypeAnalysis TypeInfo2(
1037-
R.getParent()->getPlan()->getCanonicalIV()->getScalarType());
1038-
assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
1039-
for (VPUser *U : A->users()) {
1040-
auto *R = cast<VPRecipeBase>(U);
1041-
for (VPValue *VPV : R->definedValues())
1042-
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
1043-
}
1044-
#endif
1045-
}
1046-
1047-
// Simplify (X && Y) || (X && !Y) -> X.
1048-
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
1049-
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
1050-
// recipes to be visited during simplification.
1051-
VPValue *X, *Y, *X1, *Y1;
1052-
if (match(&R,
1053-
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1054-
m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
1055-
X == X1 && Y == Y1) {
1056-
R.getVPSingleValue()->replaceAllUsesWith(X);
1057-
R.eraseFromParent();
1058-
return;
1059-
}
1060-
1061-
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
1062-
return R.getVPSingleValue()->replaceAllUsesWith(A);
1063-
1064-
if (match(&R, m_Not(m_Not(m_VPValue(A)))))
1065-
return R.getVPSingleValue()->replaceAllUsesWith(A);
1066-
1067-
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
1068-
if ((match(&R,
1069-
m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) ||
1070-
match(&R,
1071-
m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&
1072-
TypeInfo.inferScalarType(R.getOperand(1)) ==
1073-
TypeInfo.inferScalarType(R.getVPSingleValue()))
1074-
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
1075-
}
1076-
10771090
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
10781091
static void licm(VPlan &Plan) {
10791092
VPBasicBlock *Preheader = Plan.getVectorPreheader();
@@ -1108,19 +1121,6 @@ static void licm(VPlan &Plan) {
11081121
}
11091122
}
11101123

1111-
/// Try to simplify the recipes in \p Plan.
1112-
static void simplifyRecipes(VPlan &Plan) {
1113-
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
1114-
Plan.getEntry());
1115-
Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
1116-
VPTypeAnalysis TypeInfo(CanonicalIVType);
1117-
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1118-
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1119-
simplifyRecipe(R, TypeInfo);
1120-
}
1121-
}
1122-
}
1123-
11241124
void VPlanTransforms::truncateToMinimalBitwidths(
11251125
VPlan &Plan, const MapVector<Instruction *, uint64_t> &MinBWs) {
11261126
#ifndef NDEBUG

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,8 +1360,8 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
13601360
if (!match(&I, m_Load(m_Value(Ptr))))
13611361
return false;
13621362

1363-
auto *VecTy = cast<VectorType>(I.getType());
13641363
auto *LI = cast<LoadInst>(&I);
1364+
auto *VecTy = cast<VectorType>(LI->getType());
13651365
if (LI->isVolatile() || !DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
13661366
return false;
13671367

@@ -1401,15 +1401,16 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
14011401
LastCheckedInst = UI;
14021402
}
14031403

1404-
auto ScalarIdx = canScalarizeAccess(VecTy, UI->getOperand(1), &I, AC, DT);
1404+
auto ScalarIdx =
1405+
canScalarizeAccess(VecTy, UI->getIndexOperand(), LI, AC, DT);
14051406
if (ScalarIdx.isUnsafe())
14061407
return false;
14071408
if (ScalarIdx.isSafeWithFreeze()) {
14081409
NeedFreeze.try_emplace(UI, ScalarIdx);
14091410
ScalarIdx.discard();
14101411
}
14111412

1412-
auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1413+
auto *Index = dyn_cast<ConstantInt>(UI->getIndexOperand());
14131414
OriginalCost +=
14141415
TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, CostKind,
14151416
Index ? Index->getZExtValue() : -1);
@@ -1425,7 +1426,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
14251426
// Replace extracts with narrow scalar loads.
14261427
for (User *U : LI->users()) {
14271428
auto *EI = cast<ExtractElementInst>(U);
1428-
Value *Idx = EI->getOperand(1);
1429+
Value *Idx = EI->getIndexOperand();
14291430

14301431
// Insert 'freeze' for poison indexes.
14311432
auto It = NeedFreeze.find(EI);

0 commit comments

Comments
 (0)