Skip to content

Commit 6fb473f

Browse files
committed
[X86][VBMI2] Try to lower shuffle as VSHLDI instructions
Fixes #145276
1 parent 9a6a87d commit 6fb473f

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12096,6 +12096,38 @@ static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1,
1209612096
return DAG.getBitcast(VT, Res);
1209712097
}
1209812098

12099+
static SDValue lowerShuffleAsVSHLD(const SDLoc &DL, MVT VT, SDValue V1,
12100+
SDValue V2, ArrayRef<int> Mask,
12101+
const X86Subtarget &Subtarget,
12102+
SelectionDAG &DAG) {
12103+
if (!Subtarget.hasVBMI2())
12104+
return SDValue();
12105+
if (!Subtarget.hasVLX() && !VT.is512BitVector())
12106+
return SDValue();
12107+
12108+
unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
12109+
for (int Scale = 2; Scale * ScalarSizeInBits <= 64; Scale *= 2) {
12110+
unsigned LaneSize = Scale * ScalarSizeInBits;
12111+
SmallVector<int, 8> RepeatedMask;
12112+
if (isRepeatedShuffleMask(LaneSize, VT, Mask, RepeatedMask)) {
12113+
for (int Shift = 1; Shift != Scale; ++Shift) {
12114+
if (isSequentialOrUndefInRange(RepeatedMask, 0, Shift,
12115+
(2 * Scale) - Shift) &&
12116+
isSequentialOrUndefInRange(RepeatedMask, Shift, Scale - Shift, 0)) {
12117+
MVT ShiftVT = MVT::getIntegerVT(LaneSize);
12118+
ShiftVT = MVT::getVectorVT(ShiftVT, VT.getSizeInBits() / LaneSize);
12119+
return DAG.getBitcast(
12120+
VT, DAG.getNode(X86ISD::VSHLD, DL, ShiftVT, V1, V2,
12121+
DAG.getTargetConstant(Shift * ScalarSizeInBits,
12122+
DL, MVT::i8)));
12123+
}
12124+
}
12125+
}
12126+
}
12127+
12128+
return SDValue();
12129+
}
12130+
1209912131
/// Try to lower a vector shuffle as a bit shift (shifts in zeros).
1210012132
///
1210112133
/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
@@ -13789,6 +13821,11 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1378913821
return Rotate;
1379013822
}
1379113823

13824+
// Try to use funnel shift instructions.
13825+
if (SDValue Funnel =
13826+
lowerShuffleAsVSHLD(DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
13827+
return Funnel;
13828+
1379213829
// Assume that a single SHUFPS is faster than an alternative sequence of
1379313830
// multiple instructions (even if the CPU has a domain penalty).
1379413831
// If some CPU is harmed by the domain switch, we can fix it in a later pass.
@@ -14507,6 +14544,11 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1450714544
Subtarget, DAG))
1450814545
return Rotate;
1450914546

14547+
// Try to use funnel shift instructions.
14548+
if (SDValue Funnel =
14549+
lowerShuffleAsVSHLD(DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
14550+
return Funnel;
14551+
1451014552
if (SDValue BitBlend =
1451114553
lowerShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
1451214554
return BitBlend;
@@ -14702,6 +14744,11 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1470214744
Subtarget, DAG))
1470314745
return Rotate;
1470414746

14747+
// Try to use funnel shift instructions.
14748+
if (SDValue Funnel =
14749+
lowerShuffleAsVSHLD(DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
14750+
return Funnel;
14751+
1470514752
// Use dedicated pack instructions for masks that match their pattern.
1470614753
if (SDValue V =
1470714754
lowerShuffleWithPACK(DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
@@ -16861,6 +16908,11 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1686116908
Subtarget, DAG))
1686216909
return Rotate;
1686316910

16911+
// Try to use funnel shift instructions.
16912+
if (SDValue Funnel =
16913+
lowerShuffleAsVSHLD(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
16914+
return Funnel;
16915+
1686416916
// Try to create an in-lane repeating shuffle mask and then shuffle the
1686516917
// results into the target lanes.
1686616918
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
@@ -16955,6 +17007,11 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1695517007
Subtarget, DAG))
1695617008
return Rotate;
1695717009

17010+
// Try to use funnel shift instructions.
17011+
if (SDValue Funnel =
17012+
lowerShuffleAsVSHLD(DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
17013+
return Funnel;
17014+
1695817015
// Try to create an in-lane repeating shuffle mask and then shuffle the
1695917016
// results into the target lanes.
1696017017
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
@@ -17078,6 +17135,11 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1707817135
Subtarget, DAG))
1707917136
return Rotate;
1708017137

17138+
// Try to use funnel shift instructions.
17139+
if (SDValue Funnel =
17140+
lowerShuffleAsVSHLD(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
17141+
return Funnel;
17142+
1708117143
// Try to use bit rotation instructions.
1708217144
if (V2.isUndef())
1708317145
if (SDValue Rotate =
@@ -17590,6 +17652,11 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1759017652
Subtarget, DAG))
1759117653
return Rotate;
1759217654

17655+
// Try to use funnel shift instructions.
17656+
if (SDValue Funnel =
17657+
lowerShuffleAsVSHLD(DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
17658+
return Funnel;
17659+
1759317660
// Assume that a single SHUFPS is faster than using a permv shuffle.
1759417661
// If some CPU is harmed by the domain switch, we can fix it in a later pass.
1759517662
if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
@@ -17655,6 +17722,11 @@ static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1765517722
Subtarget, DAG))
1765617723
return Rotate;
1765717724

17725+
// Try to use funnel shift instructions.
17726+
if (SDValue Funnel =
17727+
lowerShuffleAsVSHLD(DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG))
17728+
return Funnel;
17729+
1765817730
if (V2.isUndef()) {
1765917731
// Try to use bit rotation instructions.
1766017732
if (SDValue Rotate =
@@ -17726,6 +17798,11 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1772617798
Subtarget, DAG))
1772717799
return Rotate;
1772817800

17801+
// Try to use funnel shift instructions.
17802+
if (SDValue Funnel =
17803+
lowerShuffleAsVSHLD(DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
17804+
return Funnel;
17805+
1772917806
// Try to use bit rotation instructions.
1773017807
if (V2.isUndef())
1773117808
if (SDValue Rotate =

0 commit comments

Comments
 (0)