diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2c41a871b6d6c..46a2370108278 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3472,6 +3472,59 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(TLI.expandVectorSplice(Node, DAG)); break; } + case ISD::VECTOR_DEINTERLEAVE: { + unsigned Factor = Node->getNumOperands(); + if (Factor <= 2 || !isPowerOf2_32(Factor)) + break; + SmallVector Ops; + for (SDValue Op : Node->ops()) + Ops.push_back(Op); + EVT VecVT = Node->getValueType(0); + SmallVector HalfVTs(Factor / 2, VecVT); + // Deinterleave at Factor/2 so each result contains two factors interleaved: + // a0b0 c0d0 a1b1 c1d1 -> [a0c0 b0d0] [a1c1 b1d1] + SDValue L = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, HalfVTs, + ArrayRef(Ops).take_front(Factor / 2)); + SDValue R = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, HalfVTs, + ArrayRef(Ops).take_back(Factor / 2)); + Results.resize(Factor); + // Deinterleave the 2 factors out: + // [a0c0 a1c1] [b0d0 b1d1] -> a0a1 b0b1 c0c1 d0d1 + for (unsigned I = 0; I < Factor / 2; I++) { + SDValue Deinterleave = + DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, {VecVT, VecVT}, + {L.getValue(I), R.getValue(I)}); + Results[I] = Deinterleave.getValue(0); + Results[I + Factor / 2] = Deinterleave.getValue(1); + } + break; + } + case ISD::VECTOR_INTERLEAVE: { + unsigned Factor = Node->getNumOperands(); + if (Factor <= 2 || !isPowerOf2_32(Factor)) + break; + EVT VecVT = Node->getValueType(0); + SmallVector HalfVTs(Factor / 2, VecVT); + SmallVector LOps, ROps; + // Interleave so we have 2 factors per result: + // a0a1 b0b1 c0c1 d0d1 -> [a0c0 b0d0] [a1c1 b1d1] + for (unsigned I = 0; I < Factor / 2; I++) { + SDValue Interleave = + DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, {VecVT, VecVT}, + {Node->getOperand(I), Node->getOperand(I + Factor / 2)}); + LOps.push_back(Interleave.getValue(0)); + ROps.push_back(Interleave.getValue(1)); + } + // Interleave at Factor/2: + // [a0c0 b0d0] [a1c1 b1d1] -> a0b0 c0d0 a1b1 c1d1 + SDValue L = DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, HalfVTs, LOps); + SDValue R = DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, HalfVTs, ROps); + for (unsigned I = 0; I < Factor / 2; I++) + Results.push_back(L.getValue(I)); + for (unsigned I = 0; I < Factor / 2; I++) + Results.push_back(R.getValue(I)); + break; + } case ISD::EXTRACT_ELEMENT: { EVT OpTy = Node->getOperand(0).getValueType(); if (Node->getConstantOperandVal(1)) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4dd9c513120bb..e2c12bba6e284 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29208,6 +29208,10 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op, EVT OpVT = Op.getValueType(); assert(OpVT.isScalableVector() && "Expected scalable vector in LowerVECTOR_DEINTERLEAVE."); + + if (Op->getNumOperands() != 2) + return SDValue(); + SDValue Even = DAG.getNode(AArch64ISD::UZP1, DL, OpVT, Op.getOperand(0), Op.getOperand(1)); SDValue Odd = DAG.getNode(AArch64ISD::UZP2, DL, OpVT, Op.getOperand(0), @@ -29222,6 +29226,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op, assert(OpVT.isScalableVector() && "Expected scalable vector in LowerVECTOR_INTERLEAVE."); + if (Op->getNumOperands() != 2) + return SDValue(); + SDValue Lo = DAG.getNode(AArch64ISD::ZIP1, DL, OpVT, Op.getOperand(0), Op.getOperand(1)); SDValue Hi = DAG.getNode(AArch64ISD::ZIP2, DL, OpVT, Op.getOperand(0), diff --git a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll index adf1b48b6998a..89fc10b47bb35 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll @@ -151,6 +151,102 @@ define {, } @vector_deinterleave_nxv2i64_nxv ret {, } %retval } +define {, , , } @vector_deinterleave_nxv16i8_nxv64i8( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv64i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z4.b, z2.b, z3.b +; CHECK-NEXT: uzp1 z5.b, z0.b, z1.b +; CHECK-NEXT: uzp2 z3.b, z2.b, z3.b +; CHECK-NEXT: uzp2 z6.b, z0.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z5.b, z4.b +; CHECK-NEXT: uzp2 z2.b, z5.b, z4.b +; CHECK-NEXT: uzp1 z1.b, z6.b, z3.b +; CHECK-NEXT: uzp2 z3.b, z6.b, z3.b +; CHECK-NEXT: ret + %retval = call {, , , } @llvm.vector.deinterleave4.nxv64i8( %vec) + ret {, , , } %retval +} + +define {, , , } @vector_deinterleave_nxv8i16_nxv32i16( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv32i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z4.h, z2.h, z3.h +; CHECK-NEXT: uzp1 z5.h, z0.h, z1.h +; CHECK-NEXT: uzp2 z3.h, z2.h, z3.h +; CHECK-NEXT: uzp2 z6.h, z0.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z5.h, z4.h +; CHECK-NEXT: uzp2 z2.h, z5.h, z4.h +; CHECK-NEXT: uzp1 z1.h, z6.h, z3.h +; CHECK-NEXT: uzp2 z3.h, z6.h, z3.h +; CHECK-NEXT: ret + %retval = call {, , , } @llvm.vector.deinterleave4.nxv32i16( %vec) + ret {, , , } %retval +} + +define {, , , } @vector_deinterleave_nxv4i32_nxv16i32( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv4i32_nxv16i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z4.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z5.s, z0.s, z1.s +; CHECK-NEXT: uzp2 z3.s, z2.s, z3.s +; CHECK-NEXT: uzp2 z6.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z5.s, z4.s +; CHECK-NEXT: uzp2 z2.s, z5.s, z4.s +; CHECK-NEXT: uzp1 z1.s, z6.s, z3.s +; CHECK-NEXT: uzp2 z3.s, z6.s, z3.s +; CHECK-NEXT: ret + %retval = call {, , , } @llvm.vector.deinterleave4.nxv16i32( %vec) + ret {, , , } %retval +} + +define {, , , } @vector_deinterleave_nxv2i64_nxv8i64( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d +; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z3.d, z2.d, z3.d +; CHECK-NEXT: uzp2 z6.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.d, z5.d, z4.d +; CHECK-NEXT: uzp2 z2.d, z5.d, z4.d +; CHECK-NEXT: uzp1 z1.d, z6.d, z3.d +; CHECK-NEXT: uzp2 z3.d, z6.d, z3.d +; CHECK-NEXT: ret + %retval = call {, , , } @llvm.vector.deinterleave4.nxv8i64( %vec) + ret {, , , } %retval +} + +define {, , , , , , , } @vector_deinterleave_nxv2i64_nxv16i64( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv16i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z24.d, z6.d, z7.d +; CHECK-NEXT: uzp1 z25.d, z4.d, z5.d +; CHECK-NEXT: uzp1 z26.d, z2.d, z3.d +; CHECK-NEXT: uzp1 z27.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z6.d, z6.d, z7.d +; CHECK-NEXT: uzp2 z4.d, z4.d, z5.d +; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d +; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z5.d, z25.d, z24.d +; CHECK-NEXT: uzp2 z24.d, z25.d, z24.d +; CHECK-NEXT: uzp1 z7.d, z27.d, z26.d +; CHECK-NEXT: uzp1 z28.d, z4.d, z6.d +; CHECK-NEXT: uzp2 z25.d, z27.d, z26.d +; CHECK-NEXT: uzp1 z29.d, z0.d, z2.d +; CHECK-NEXT: uzp2 z26.d, z4.d, z6.d +; CHECK-NEXT: uzp2 z27.d, z0.d, z2.d +; CHECK-NEXT: uzp1 z0.d, z7.d, z5.d +; CHECK-NEXT: uzp1 z2.d, z25.d, z24.d +; CHECK-NEXT: uzp2 z4.d, z7.d, z5.d +; CHECK-NEXT: uzp1 z1.d, z29.d, z28.d +; CHECK-NEXT: uzp1 z3.d, z27.d, z26.d +; CHECK-NEXT: uzp2 z5.d, z29.d, z28.d +; CHECK-NEXT: uzp2 z6.d, z25.d, z24.d +; CHECK-NEXT: uzp2 z7.d, z27.d, z26.d +; CHECK-NEXT: ret + %retval = call {, , , , , , , } @llvm.vector.deinterleave8.nxv16i64( %vec) + ret {, , , , , , , } %retval +} + ; Predicated define {, } @vector_deinterleave_nxv16i1_nxv32i1( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1: @@ -279,7 +375,6 @@ define {, } @vector_deinterleave_nxv2i32_nxv ret {, } %retval } - ; Floating declarations declare {,} @llvm.vector.deinterleave2.nxv4f16() declare {, } @llvm.vector.deinterleave2.nxv8f16() diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll index 288034422d9c0..34d026f43708c 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll @@ -146,6 +146,102 @@ define @interleave2_nxv4i64( %vec0, %retval } +define @interleave4_nxv16i8( %vec0, %vec1, %vec2, %vec3) { +; CHECK-LABEL: interleave4_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z4.b, z1.b, z3.b +; CHECK-NEXT: zip1 z5.b, z0.b, z2.b +; CHECK-NEXT: zip2 z3.b, z1.b, z3.b +; CHECK-NEXT: zip2 z6.b, z0.b, z2.b +; CHECK-NEXT: zip1 z0.b, z5.b, z4.b +; CHECK-NEXT: zip2 z1.b, z5.b, z4.b +; CHECK-NEXT: zip1 z2.b, z6.b, z3.b +; CHECK-NEXT: zip2 z3.b, z6.b, z3.b +; CHECK-NEXT: ret + %retval = call @llvm.vector.interleave4.nxv16i8( %vec0, %vec1, %vec2, %vec3) + ret %retval +} + +define @interleave4_nxv8i16( %vec0, %vec1, %vec2, %vec3) { +; CHECK-LABEL: interleave4_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z4.h, z1.h, z3.h +; CHECK-NEXT: zip1 z5.h, z0.h, z2.h +; CHECK-NEXT: zip2 z3.h, z1.h, z3.h +; CHECK-NEXT: zip2 z6.h, z0.h, z2.h +; CHECK-NEXT: zip1 z0.h, z5.h, z4.h +; CHECK-NEXT: zip2 z1.h, z5.h, z4.h +; CHECK-NEXT: zip1 z2.h, z6.h, z3.h +; CHECK-NEXT: zip2 z3.h, z6.h, z3.h +; CHECK-NEXT: ret + %retval = call @llvm.vector.interleave4.nxv8i16( %vec0, %vec1, %vec2, %vec3) + ret %retval +} + +define @interleave4_nxv4i32( %vec0, %vec1, %vec2, %vec3) { +; CHECK-LABEL: interleave4_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z4.s, z1.s, z3.s +; CHECK-NEXT: zip1 z5.s, z0.s, z2.s +; CHECK-NEXT: zip2 z3.s, z1.s, z3.s +; CHECK-NEXT: zip2 z6.s, z0.s, z2.s +; CHECK-NEXT: zip1 z0.s, z5.s, z4.s +; CHECK-NEXT: zip2 z1.s, z5.s, z4.s +; CHECK-NEXT: zip1 z2.s, z6.s, z3.s +; CHECK-NEXT: zip2 z3.s, z6.s, z3.s +; CHECK-NEXT: ret + %retval = call @llvm.vector.interleave4.nxv4i32( %vec0, %vec1, %vec2, %vec3) + ret %retval +} + +define @interleave4_nxv8i64( %vec0, %vec1, %vec2, %vec3) { +; CHECK-LABEL: interleave4_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z4.d, z1.d, z3.d +; CHECK-NEXT: zip1 z5.d, z0.d, z2.d +; CHECK-NEXT: zip2 z3.d, z1.d, z3.d +; CHECK-NEXT: zip2 z6.d, z0.d, z2.d +; CHECK-NEXT: zip1 z0.d, z5.d, z4.d +; CHECK-NEXT: zip2 z1.d, z5.d, z4.d +; CHECK-NEXT: zip1 z2.d, z6.d, z3.d +; CHECK-NEXT: zip2 z3.d, z6.d, z3.d +; CHECK-NEXT: ret + %retval = call @llvm.vector.interleave4.nxv8i64( %vec0, %vec1, %vec2, %vec3) + ret %retval +} + +define @interleave8_nxv16i64( %vec0, %vec1, %vec2, %vec3, %vec4, %vec5, %vec6, %vec7) { +; CHECK-LABEL: interleave8_nxv16i64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z24.d, z3.d, z7.d +; CHECK-NEXT: zip1 z25.d, z1.d, z5.d +; CHECK-NEXT: zip1 z26.d, z2.d, z6.d +; CHECK-NEXT: zip1 z27.d, z0.d, z4.d +; CHECK-NEXT: zip2 z3.d, z3.d, z7.d +; CHECK-NEXT: zip2 z1.d, z1.d, z5.d +; CHECK-NEXT: zip2 z2.d, z2.d, z6.d +; CHECK-NEXT: zip2 z0.d, z0.d, z4.d +; CHECK-NEXT: zip1 z4.d, z25.d, z24.d +; CHECK-NEXT: zip2 z6.d, z25.d, z24.d +; CHECK-NEXT: zip1 z5.d, z27.d, z26.d +; CHECK-NEXT: zip2 z7.d, z27.d, z26.d +; CHECK-NEXT: zip1 z24.d, z1.d, z3.d +; CHECK-NEXT: zip1 z25.d, z0.d, z2.d +; CHECK-NEXT: zip2 z26.d, z1.d, z3.d +; CHECK-NEXT: zip2 z27.d, z0.d, z2.d +; CHECK-NEXT: zip1 z0.d, z5.d, z4.d +; CHECK-NEXT: zip2 z1.d, z5.d, z4.d +; CHECK-NEXT: zip1 z2.d, z7.d, z6.d +; CHECK-NEXT: zip2 z3.d, z7.d, z6.d +; CHECK-NEXT: zip1 z4.d, z25.d, z24.d +; CHECK-NEXT: zip2 z5.d, z25.d, z24.d +; CHECK-NEXT: zip1 z6.d, z27.d, z26.d +; CHECK-NEXT: zip2 z7.d, z27.d, z26.d +; CHECK-NEXT: ret + %retval = call @llvm.vector.interleave8.nxv16i64( %vec0, %vec1, %vec2, %vec3, %vec4, %vec5, %vec6, %vec7) + ret %retval +} + ; Predicated define @interleave2_nxv32i1( %vec0, %vec1) {