@@ -1083,9 +1083,14 @@ class BitPermutationSelector {
10831083 // lowest-order bit.
10841084 unsigned Idx;
10851085
1086+ // ConstZero means a bit we need to mask off.
1087+ // Variable is a bit comes from an input variable.
1088+ // VariableKnownToBeZero is also a bit comes from an input variable,
1089+ // but it is known to be already zero. So we do not need to mask them.
10861090 enum Kind {
10871091 ConstZero,
1088- Variable
1092+ Variable,
1093+ VariableKnownToBeZero
10891094 } K;
10901095
10911096 ValueBit (SDValue V, unsigned I, Kind K = Variable)
@@ -1094,11 +1099,11 @@ class BitPermutationSelector {
10941099 : V(SDValue(nullptr , 0 )), Idx(UINT32_MAX), K(K) {}
10951100
10961101 bool isZero () const {
1097- return K == ConstZero;
1102+ return K == ConstZero || K == VariableKnownToBeZero ;
10981103 }
10991104
11001105 bool hasValue () const {
1101- return K == Variable;
1106+ return K == Variable || K == VariableKnownToBeZero ;
11021107 }
11031108
11041109 SDValue getValue () const {
@@ -1248,8 +1253,14 @@ class BitPermutationSelector {
12481253 for (unsigned i = 0 ; i < NumBits; ++i)
12491254 if (((Mask >> i) & 1 ) == 1 )
12501255 Bits[i] = (*LHSBits)[i];
1251- else
1252- Bits[i] = ValueBit (ValueBit::ConstZero);
1256+ else {
1257+ // AND instruction masks this bit. If the input is already zero,
1258+ // we have nothing to do here. Otherwise, make the bit ConstZero.
1259+ if ((*LHSBits)[i].isZero ())
1260+ Bits[i] = (*LHSBits)[i];
1261+ else
1262+ Bits[i] = ValueBit (ValueBit::ConstZero);
1263+ }
12531264
12541265 return std::make_pair (Interesting, &Bits);
12551266 }
@@ -1259,15 +1270,43 @@ class BitPermutationSelector {
12591270 const auto &RHSBits = *getValueBits (V.getOperand (1 ), NumBits).second ;
12601271
12611272 bool AllDisjoint = true ;
1262- for (unsigned i = 0 ; i < NumBits; ++i)
1263- if (LHSBits[i].isZero ())
1273+ SDValue LastVal = SDValue ();
1274+ unsigned LastIdx = 0 ;
1275+ for (unsigned i = 0 ; i < NumBits; ++i) {
1276+ if (LHSBits[i].isZero () && RHSBits[i].isZero ()) {
1277+ // If both inputs are known to be zero and one is ConstZero and
1278+ // another is VariableKnownToBeZero, we can select whichever
1279+ // we like. To minimize the number of bit groups, we select
1280+ // VariableKnownToBeZero if this bit is the next bit of the same
1281+ // input variable from the previous bit. Otherwise, we select
1282+ // ConstZero.
1283+ if (LHSBits[i].hasValue () && LHSBits[i].getValue () == LastVal &&
1284+ LHSBits[i].getValueBitIndex () == LastIdx + 1 )
1285+ Bits[i] = LHSBits[i];
1286+ else if (RHSBits[i].hasValue () && RHSBits[i].getValue () == LastVal &&
1287+ RHSBits[i].getValueBitIndex () == LastIdx + 1 )
1288+ Bits[i] = RHSBits[i];
1289+ else
1290+ Bits[i] = ValueBit (ValueBit::ConstZero);
1291+ }
1292+ else if (LHSBits[i].isZero ())
12641293 Bits[i] = RHSBits[i];
12651294 else if (RHSBits[i].isZero ())
12661295 Bits[i] = LHSBits[i];
12671296 else {
12681297 AllDisjoint = false ;
12691298 break ;
12701299 }
1300+ // We remember the value and bit index of this bit.
1301+ if (Bits[i].hasValue ()) {
1302+ LastVal = Bits[i].getValue ();
1303+ LastIdx = Bits[i].getValueBitIndex ();
1304+ }
1305+ else {
1306+ if (LastVal) LastVal = SDValue ();
1307+ LastIdx = 0 ;
1308+ }
1309+ }
12711310
12721311 if (!AllDisjoint)
12731312 break ;
@@ -1293,6 +1332,44 @@ class BitPermutationSelector {
12931332
12941333 return std::make_pair (Interesting, &Bits);
12951334 }
1335+ case ISD::AssertZext: {
1336+ // For AssertZext, we look through the operand and
1337+ // mark the bits known to be zero.
1338+ const SmallVector<ValueBit, 64 > *LHSBits;
1339+ std::tie (Interesting, LHSBits) = getValueBits (V.getOperand (0 ),
1340+ NumBits);
1341+
1342+ EVT FromType = cast<VTSDNode>(V.getOperand (1 ))->getVT ();
1343+ const unsigned NumValidBits = FromType.getSizeInBits ();
1344+ for (unsigned i = 0 ; i < NumValidBits; ++i)
1345+ Bits[i] = (*LHSBits)[i];
1346+
1347+ // These bits are known to be zero.
1348+ for (unsigned i = NumValidBits; i < NumBits; ++i)
1349+ Bits[i] = ValueBit ((*LHSBits)[i].getValue (),
1350+ (*LHSBits)[i].getValueBitIndex (),
1351+ ValueBit::VariableKnownToBeZero);
1352+
1353+ return std::make_pair (Interesting, &Bits);
1354+ }
1355+ case ISD::LOAD:
1356+ LoadSDNode *LD = cast<LoadSDNode>(V);
1357+ if (ISD::isZEXTLoad (V.getNode ()) && V.getResNo () == 0 ) {
1358+ EVT VT = LD->getMemoryVT ();
1359+ const unsigned NumValidBits = VT.getSizeInBits ();
1360+
1361+ for (unsigned i = 0 ; i < NumValidBits; ++i)
1362+ Bits[i] = ValueBit (V, i);
1363+
1364+ // These bits are known to be zero.
1365+ for (unsigned i = NumValidBits; i < NumBits; ++i)
1366+ Bits[i] = ValueBit (V, i, ValueBit::VariableKnownToBeZero);
1367+
1368+ // Zero-extending load itself cannot be optimized. So, it is not
1369+ // interesting by itself though it gives useful information.
1370+ return std::make_pair (Interesting = false , &Bits);
1371+ }
1372+ break ;
12961373 }
12971374
12981375 for (unsigned i = 0 ; i < NumBits; ++i)
@@ -1304,7 +1381,7 @@ class BitPermutationSelector {
13041381 // For each value (except the constant ones), compute the left-rotate amount
13051382 // to get it from its original to final position.
13061383 void computeRotationAmounts () {
1307- HasZeros = false ;
1384+ NeedMask = false ;
13081385 RLAmt.resize (Bits.size ());
13091386 for (unsigned i = 0 ; i < Bits.size (); ++i)
13101387 if (Bits[i].hasValue ()) {
@@ -1314,7 +1391,7 @@ class BitPermutationSelector {
13141391 else
13151392 RLAmt[i] = Bits.size () - (VBI - i);
13161393 } else if (Bits[i].isZero ()) {
1317- HasZeros = true ;
1394+ NeedMask = true ;
13181395 RLAmt[i] = UINT32_MAX;
13191396 } else {
13201397 llvm_unreachable (" Unknown value bit type" );
@@ -1330,6 +1407,7 @@ class BitPermutationSelector {
13301407 unsigned LastRLAmt = RLAmt[0 ];
13311408 SDValue LastValue = Bits[0 ].hasValue () ? Bits[0 ].getValue () : SDValue ();
13321409 unsigned LastGroupStartIdx = 0 ;
1410+ bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue ();
13331411 for (unsigned i = 1 ; i < Bits.size (); ++i) {
13341412 unsigned ThisRLAmt = RLAmt[i];
13351413 SDValue ThisValue = Bits[i].hasValue () ? Bits[i].getValue () : SDValue ();
@@ -1342,17 +1420,28 @@ class BitPermutationSelector {
13421420 LastGroupStartIdx = 0 ;
13431421 }
13441422
1423+ // If this bit is known to be zero and the current group is a bit group
1424+ // of zeros, we do not need to terminate the current bit group even the
1425+ // Value or RLAmt does not match here. Instead, we terminate this group
1426+ // when the first non-zero bit appears later.
1427+ if (IsGroupOfZeros && Bits[i].isZero ())
1428+ continue ;
1429+
13451430 // If this bit has the same underlying value and the same rotate factor as
13461431 // the last one, then they're part of the same group.
13471432 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1348- continue ;
1433+ // We cannot continue the current group if this bits is not known to
1434+ // be zero in a bit group of zeros.
1435+ if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero ()))
1436+ continue ;
13491437
13501438 if (LastValue.getNode ())
13511439 BitGroups.push_back (BitGroup (LastValue, LastRLAmt, LastGroupStartIdx,
13521440 i-1 ));
13531441 LastRLAmt = ThisRLAmt;
13541442 LastValue = ThisValue;
13551443 LastGroupStartIdx = i;
1444+ IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue ();
13561445 }
13571446 if (LastValue.getNode ())
13581447 BitGroups.push_back (BitGroup (LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1698,7 +1787,7 @@ class BitPermutationSelector {
16981787 // If we've not yet selected a 'starting' instruction, and we have no zeros
16991788 // to fill in, select the (Value, RLAmt) with the highest priority (largest
17001789 // number of groups), and start with this rotated value.
1701- if ((!HasZeros || LateMask) && !Res) {
1790+ if ((!NeedMask || LateMask) && !Res) {
17021791 ValueRotInfo &VRI = ValueRotsVec[0 ];
17031792 if (VRI.RLAmt ) {
17041793 if (InstCnt) *InstCnt += 1 ;
@@ -2077,7 +2166,7 @@ class BitPermutationSelector {
20772166 // If we've not yet selected a 'starting' instruction, and we have no zeros
20782167 // to fill in, select the (Value, RLAmt) with the highest priority (largest
20792168 // number of groups), and start with this rotated value.
2080- if ((!HasZeros || LateMask) && !Res) {
2169+ if ((!NeedMask || LateMask) && !Res) {
20812170 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
20822171 // groups will come first, and so the VRI representing the largest number
20832172 // of groups might not be first (it might be the first Repl32 groups).
@@ -2230,7 +2319,7 @@ class BitPermutationSelector {
22302319
22312320 SmallVector<ValueBit, 64 > Bits;
22322321
2233- bool HasZeros ;
2322+ bool NeedMask ;
22342323 SmallVector<unsigned , 64 > RLAmt;
22352324
22362325 SmallVector<BitGroup, 16 > BitGroups;
@@ -2259,10 +2348,10 @@ class BitPermutationSelector {
22592348 " selection for: " );
22602349 LLVM_DEBUG (N->dump (CurDAG));
22612350
2262- // Fill it RLAmt and set HasZeros .
2351+ // Fill it RLAmt and set NeedMask .
22632352 computeRotationAmounts ();
22642353
2265- if (!HasZeros )
2354+ if (!NeedMask )
22662355 return Select (N, false );
22672356
22682357 // We currently have two techniques for handling results with zeros: early
0 commit comments