@@ -2134,6 +2134,35 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
2134
2134
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2135
2135
setOperationAction(ISD::CTPOP, VT, Legal);
2136
2136
}
2137
+
2138
+ // We can try to convert vectors to different sizes to leverage legal
2139
+ // `vpcompress` cases. So we mark these supported vector sizes as Custom and
2140
+ // then specialize to Legal below.
2141
+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v4i32, MVT::v4f32, MVT::v4i64,
2142
+ MVT::v4f64, MVT::v2i64, MVT::v2f64, MVT::v16i8, MVT::v8i16,
2143
+ MVT::v16i16, MVT::v8i8})
2144
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
2145
+
2146
+ // Legal vpcompress depends on various AVX512 extensions.
2147
+ // Legal in AVX512F
2148
+ for (MVT VT : {MVT::v16i32, MVT::v16f32, MVT::v8i64, MVT::v8f64})
2149
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Legal);
2150
+
2151
+ // Legal in AVX512F + AVX512VL
2152
+ if (Subtarget.hasVLX())
2153
+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v4i32, MVT::v4f32, MVT::v4i64,
2154
+ MVT::v4f64, MVT::v2i64, MVT::v2f64})
2155
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Legal);
2156
+
2157
+ // Legal in AVX512F + AVX512VBMI2
2158
+ if (Subtarget.hasVBMI2())
2159
+ for (MVT VT : {MVT::v32i16, MVT::v64i8})
2160
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Legal);
2161
+
2162
+ // Legal in AVX512F + AVX512VL + AVX512VBMI2
2163
+ if (Subtarget.hasVBMI2() && Subtarget.hasVLX())
2164
+ for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v32i8, MVT::v16i16})
2165
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Legal);
2137
2166
}
2138
2167
2139
2168
// This block control legalization of v32i1/v64i1 which are available with
@@ -17795,6 +17824,68 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
17795
17824
llvm_unreachable("Unimplemented!");
17796
17825
}
17797
17826
17827
+ // As legal vpcompress instructions depend on various AVX512 extensions, try to
17828
+ // convert illegal vector sizes to legal ones to avoid expansion.
17829
+ static SDValue lowerVECTOR_COMPRESS(SDValue Op, const X86Subtarget &Subtarget,
17830
+ SelectionDAG &DAG) {
17831
+ assert(Subtarget.hasAVX512() &&
17832
+ "Need AVX512 for custom VECTOR_COMPRESS lowering.");
17833
+
17834
+ SDLoc DL(Op);
17835
+ SDValue Vec = Op.getOperand(0);
17836
+ SDValue Mask = Op.getOperand(1);
17837
+ SDValue Passthru = Op.getOperand(2);
17838
+
17839
+ EVT VecVT = Vec.getValueType();
17840
+ EVT ElementVT = VecVT.getVectorElementType();
17841
+ unsigned NumElements = VecVT.getVectorNumElements();
17842
+ unsigned NumVecBits = VecVT.getFixedSizeInBits();
17843
+ unsigned NumElementBits = ElementVT.getFixedSizeInBits();
17844
+
17845
+ // 128- and 256-bit vectors with <= 16 elements can be converted to and
17846
+ // compressed as 512-bit vectors in AVX512F.
17847
+ if (NumVecBits != 128 && NumVecBits != 256)
17848
+ return SDValue();
17849
+
17850
+ if (NumElementBits == 32 || NumElementBits == 64) {
17851
+ unsigned NumLargeElements = 512 / NumElementBits;
17852
+ MVT LargeVecVT =
17853
+ MVT::getVectorVT(ElementVT.getSimpleVT(), NumLargeElements);
17854
+ MVT LargeMaskVT = MVT::getVectorVT(MVT::i1, NumLargeElements);
17855
+
17856
+ Vec = widenSubVector(LargeVecVT, Vec, /*ZeroNewElements=*/false, Subtarget,
17857
+ DAG, DL);
17858
+ Mask = widenSubVector(LargeMaskVT, Mask, /*ZeroNewElements=*/true,
17859
+ Subtarget, DAG, DL);
17860
+ Passthru = Passthru.isUndef() ? DAG.getUNDEF(LargeVecVT)
17861
+ : widenSubVector(LargeVecVT, Passthru,
17862
+ /*ZeroNewElements=*/false,
17863
+ Subtarget, DAG, DL);
17864
+
17865
+ SDValue Compressed =
17866
+ DAG.getNode(ISD::VECTOR_COMPRESS, DL, LargeVecVT, Vec, Mask, Passthru);
17867
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Compressed,
17868
+ DAG.getConstant(0, DL, MVT::i64));
17869
+ }
17870
+
17871
+ if (VecVT == MVT::v8i16 || VecVT == MVT::v8i8 || VecVT == MVT::v16i8 ||
17872
+ VecVT == MVT::v16i16) {
17873
+ MVT LageElementVT = MVT::getIntegerVT(512 / NumElements);
17874
+ EVT LargeVecVT = MVT::getVectorVT(LageElementVT, NumElements);
17875
+
17876
+ Vec = DAG.getNode(ISD::ANY_EXTEND, DL, LargeVecVT, Vec);
17877
+ Passthru = Passthru.isUndef()
17878
+ ? DAG.getUNDEF(LargeVecVT)
17879
+ : DAG.getNode(ISD::ANY_EXTEND, DL, LargeVecVT, Passthru);
17880
+
17881
+ SDValue Compressed =
17882
+ DAG.getNode(ISD::VECTOR_COMPRESS, DL, LargeVecVT, Vec, Mask, Passthru);
17883
+ return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Compressed);
17884
+ }
17885
+
17886
+ return SDValue();
17887
+ }
17888
+
17798
17889
/// Try to lower a VSELECT instruction to a vector shuffle.
17799
17890
static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
17800
17891
const X86Subtarget &Subtarget,
@@ -32621,6 +32712,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
32621
32712
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
32622
32713
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
32623
32714
case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG);
32715
+ case ISD::VECTOR_COMPRESS: return lowerVECTOR_COMPRESS(Op, Subtarget, DAG);
32624
32716
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
32625
32717
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
32626
32718
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
0 commit comments