@@ -1930,30 +1930,38 @@ class BoUpSLP {
1930
1930
/// elements in the lane, it will be vectorized with higher probability
1931
1931
/// after removing duplicates. Currently the SLP vectorizer supports only
1932
1932
/// vectorization of the power-of-2 number of unique scalars.
1933
- int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const {
1933
+ int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx,
1934
+ const SmallBitVector &UsedLanes) const {
1934
1935
Value *IdxLaneV = getData(Idx, Lane).V;
1935
- if (!isa<Instruction>(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V)
1936
+ if (!isa<Instruction>(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V ||
1937
+ isa<ExtractElementInst>(IdxLaneV))
1936
1938
return 0;
1937
- SmallPtrSet <Value *, 4> Uniques;
1938
- for (unsigned Ln = 0, E = getNumLanes(); Ln < E; ++Ln ) {
1939
+ SmallDenseMap <Value *, unsigned , 4> Uniques;
1940
+ for (unsigned Ln : seq<unsigned>( getNumLanes()) ) {
1939
1941
if (Ln == Lane)
1940
1942
continue;
1941
1943
Value *OpIdxLnV = getData(OpIdx, Ln).V;
1942
1944
if (!isa<Instruction>(OpIdxLnV))
1943
1945
return 0;
1944
- Uniques.insert (OpIdxLnV);
1946
+ Uniques.try_emplace (OpIdxLnV, Ln );
1945
1947
}
1946
- int UniquesCount = Uniques.size();
1947
- int UniquesCntWithIdxLaneV =
1948
- Uniques.contains(IdxLaneV) ? UniquesCount : UniquesCount + 1;
1948
+ unsigned UniquesCount = Uniques.size();
1949
+ auto IdxIt = Uniques.find(IdxLaneV);
1950
+ unsigned UniquesCntWithIdxLaneV =
1951
+ IdxIt != Uniques.end() ? UniquesCount : UniquesCount + 1;
1949
1952
Value *OpIdxLaneV = getData(OpIdx, Lane).V;
1950
- int UniquesCntWithOpIdxLaneV =
1951
- Uniques.contains(OpIdxLaneV) ? UniquesCount : UniquesCount + 1;
1953
+ auto OpIdxIt = Uniques.find(OpIdxLaneV);
1954
+ unsigned UniquesCntWithOpIdxLaneV =
1955
+ OpIdxIt != Uniques.end() ? UniquesCount : UniquesCount + 1;
1952
1956
if (UniquesCntWithIdxLaneV == UniquesCntWithOpIdxLaneV)
1953
1957
return 0;
1954
- return (PowerOf2Ceil(UniquesCntWithOpIdxLaneV) -
1955
- UniquesCntWithOpIdxLaneV) -
1956
- (PowerOf2Ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV);
1958
+ return std::min(bit_ceil(UniquesCntWithOpIdxLaneV) -
1959
+ UniquesCntWithOpIdxLaneV,
1960
+ UniquesCntWithOpIdxLaneV -
1961
+ bit_floor(UniquesCntWithOpIdxLaneV)) -
1962
+ ((IdxIt != Uniques.end() && UsedLanes.test(IdxIt->second))
1963
+ ? UniquesCntWithIdxLaneV - bit_floor(UniquesCntWithIdxLaneV)
1964
+ : bit_ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV);
1957
1965
}
1958
1966
1959
1967
/// \param Lane lane of the operands under analysis.
@@ -1993,7 +2001,7 @@ class BoUpSLP {
1993
2001
/// predecessors.
1994
2002
int getLookAheadScore(Value *LHS, Value *RHS, ArrayRef<Value *> MainAltOps,
1995
2003
int Lane, unsigned OpIdx, unsigned Idx,
1996
- bool &IsUsed) {
2004
+ bool &IsUsed, const SmallBitVector &UsedLanes ) {
1997
2005
LookAheadHeuristics LookAhead(TLI, DL, SE, R, getNumLanes(),
1998
2006
LookAheadMaxDepth);
1999
2007
// Keep track of the instruction stack as we recurse into the operands
@@ -2002,11 +2010,10 @@ class BoUpSLP {
2002
2010
LookAhead.getScoreAtLevelRec(LHS, RHS, /*U1=*/nullptr, /*U2=*/nullptr,
2003
2011
/*CurrLevel=*/1, MainAltOps);
2004
2012
if (Score) {
2005
- int SplatScore = getSplatScore(Lane, OpIdx, Idx);
2013
+ int SplatScore = getSplatScore(Lane, OpIdx, Idx, UsedLanes );
2006
2014
if (Score <= -SplatScore) {
2007
- // Set the minimum score for splat-like sequence to avoid setting
2008
- // failed state.
2009
- Score = 1;
2015
+ // Failed score.
2016
+ Score = 0;
2010
2017
} else {
2011
2018
Score += SplatScore;
2012
2019
// Scale score to see the difference between different operands
@@ -2036,7 +2043,8 @@ class BoUpSLP {
2036
2043
std::optional<unsigned>
2037
2044
getBestOperand(unsigned OpIdx, int Lane, int LastLane,
2038
2045
ArrayRef<ReorderingMode> ReorderingModes,
2039
- ArrayRef<Value *> MainAltOps) {
2046
+ ArrayRef<Value *> MainAltOps,
2047
+ const SmallBitVector &UsedLanes) {
2040
2048
unsigned NumOperands = getNumOperands();
2041
2049
2042
2050
// The operand of the previous lane at OpIdx.
@@ -2092,7 +2100,7 @@ class BoUpSLP {
2092
2100
Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
2093
2101
Value *OpRight = (LeftToRight) ? Op : OpLastLane;
2094
2102
int Score = getLookAheadScore(OpLeft, OpRight, MainAltOps, Lane,
2095
- OpIdx, Idx, IsUsed);
2103
+ OpIdx, Idx, IsUsed, UsedLanes );
2096
2104
if (Score > static_cast<int>(BestOp.Score) ||
2097
2105
(Score > 0 && Score == static_cast<int>(BestOp.Score) &&
2098
2106
Idx == OpIdx)) {
@@ -2507,20 +2515,24 @@ class BoUpSLP {
2507
2515
for (unsigned I = 0; I < NumOperands; ++I)
2508
2516
MainAltOps[I].push_back(getData(I, FirstLane).V);
2509
2517
2518
+ SmallBitVector UsedLanes(NumLanes);
2519
+ UsedLanes.set(FirstLane);
2510
2520
for (unsigned Distance = 1; Distance != NumLanes; ++Distance) {
2511
2521
// Visit the lane on the right and then the lane on the left.
2512
2522
for (int Direction : {+1, -1}) {
2513
2523
int Lane = FirstLane + Direction * Distance;
2514
2524
if (Lane < 0 || Lane >= (int)NumLanes)
2515
2525
continue;
2526
+ UsedLanes.set(Lane);
2516
2527
int LastLane = Lane - Direction;
2517
2528
assert(LastLane >= 0 && LastLane < (int)NumLanes &&
2518
2529
"Out of bounds");
2519
2530
// Look for a good match for each operand.
2520
2531
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
2521
2532
// Search for the operand that matches SortedOps[OpIdx][Lane-1].
2522
- std::optional<unsigned> BestIdx = getBestOperand(
2523
- OpIdx, Lane, LastLane, ReorderingModes, MainAltOps[OpIdx]);
2533
+ std::optional<unsigned> BestIdx =
2534
+ getBestOperand(OpIdx, Lane, LastLane, ReorderingModes,
2535
+ MainAltOps[OpIdx], UsedLanes);
2524
2536
// By not selecting a value, we allow the operands that follow to
2525
2537
// select a better matching value. We will get a non-null value in
2526
2538
// the next run of getBestOperand().
0 commit comments