diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8c5a4cdae1163..353509a1c1efa 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11369,54 +11369,105 @@ static bool isSingletonEXTMask(ArrayRef M, EVT VT, unsigned &Imm) { return true; } -// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from -// v4i32s. This is really a truncate, which we can construct out of (legal) -// concats and truncate nodes. -static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) { - if (V.getValueType() != MVT::v16i8) - return SDValue(); - assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR"); - - for (unsigned X = 0; X < 4; X++) { - // Check the first item in each group is an extract from lane 0 of a v4i32 - // or v4i16. - SDValue BaseExt = V.getOperand(X * 4); - if (BaseExt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - (BaseExt.getOperand(0).getValueType() != MVT::v4i16 && - BaseExt.getOperand(0).getValueType() != MVT::v4i32) || - !isa(BaseExt.getOperand(1)) || - BaseExt.getConstantOperandVal(1) != 0) +// Detect patterns like a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3, that +// are truncates, which we can construct out of (legal) concats and truncate +// nodes. +static SDValue ReconstructTruncateFromBuildVector(SDValue V, + SelectionDAG &DAG) { + EVT BVTy = V.getValueType(); + if (BVTy != MVT::v16i8 && BVTy != MVT::v8i16 && BVTy != MVT::v8i8 && + BVTy != MVT::v4i16) + return SDValue(); + + // Only handle truncating BVs. + if (V.getOperand(0).getValueType().getSizeInBits() == + BVTy.getScalarSizeInBits()) + return SDValue(); + + SmallVector Sources; + uint64_t LastIdx = 0; + uint64_t MaxIdx = 0; + // Check for sequential indices e.g. i=0, i+1, ..., i=0, i+1, ... + for (SDValue Extr : V->ops()) { + SDValue SourceVec = Extr.getOperand(0); + EVT SourceVecTy = SourceVec.getValueType(); + + if (!DAG.getTargetLoweringInfo().isTypeLegal(SourceVecTy)) return SDValue(); - SDValue Base = BaseExt.getOperand(0); - // And check the other items are extracts from the same vector. - for (unsigned Y = 1; Y < 4; Y++) { - SDValue Ext = V.getOperand(X * 4 + Y); - if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Ext.getOperand(0) != Base || - !isa(Ext.getOperand(1)) || - Ext.getConstantOperandVal(1) != Y) + if (!isa(Extr.getOperand(1))) + return SDValue(); + + uint64_t CurIdx = Extr.getConstantOperandVal(1); + // Allow repeat of sources. + if (CurIdx == 0) { + // Check if all lanes are used by the BV. + if (Sources.size() && Sources[Sources.size() - 1] + .getValueType() + .getVectorMinNumElements() != LastIdx + 1) return SDValue(); - } + Sources.push_back(SourceVec); + } else if (CurIdx != LastIdx + 1) + return SDValue(); + + LastIdx = CurIdx; + MaxIdx = std::max(MaxIdx, CurIdx); } - // Turn the buildvector into a series of truncates and concates, which will - // become uzip1's. Any v4i32s we found get truncated to v4i16, which are - // concat together to produce 2 v8i16. These are both truncated and concat - // together. + // Check if all lanes are used by the BV. + if (Sources[Sources.size() - 1].getValueType().getVectorMinNumElements() != + LastIdx + 1) + return SDValue(); + if (Sources.size() % 2 != 0) + return SDValue(); + + // At this point we know that we have a truncating BV of extract_vector_elt. + // We can just truncate and concat them. SDLoc DL(V); - SDValue Trunc[4] = { - V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0), - V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)}; - for (SDValue &V : Trunc) - if (V.getValueType() == MVT::v4i32) - V = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i16, V); - SDValue Concat0 = - DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[0], Trunc[1]); - SDValue Concat1 = - DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[2], Trunc[3]); - SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat0); - SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat1); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Trunc0, Trunc1); + LLVMContext &Ctx = *DAG.getContext(); + while (Sources.size() > 1) { + for (unsigned i = 0; i < Sources.size(); i += 2) { + SDValue V1 = Sources[i]; + SDValue V2 = Sources[i + 1]; + EVT VT1 = V1.getValueType(); + EVT VT2 = V2.getValueType(); + + if (VT1.is128BitVector()) { + VT1 = VT1.changeVectorElementType( + VT1.getVectorElementType().getHalfSizedIntegerVT(Ctx)); + V1 = DAG.getNode(ISD::TRUNCATE, DL, VT1, V1); + } + if (VT2.is128BitVector()) { + VT2 = VT2.changeVectorElementType( + VT2.getVectorElementType().getHalfSizedIntegerVT(Ctx)); + V2 = DAG.getNode(ISD::TRUNCATE, DL, VT2, V2); + } + + assert(VT1 == VT2 && "Mismatched types."); + Sources[i / 2] = + DAG.getNode(ISD::CONCAT_VECTORS, DL, + VT1.getDoubleNumVectorElementsVT(Ctx), V1, V2); + } + Sources.resize(Sources.size() / 2); + } + + // We might not have the final type in some cases e.g. <4i32, 4i32> -> 8i8. Do + // a final truncating shuffle instead of a concat + trunc. + if (Sources[0].getValueType() != BVTy) { + SDValue V1 = Sources[0].getOperand(0); + SDValue V2 = Sources[0].getOperand(1); + V1 = DAG.getNode(DAG.getDataLayout().isLittleEndian() ? ISD::BITCAST + : AArch64ISD::NVCAST, + DL, BVTy, V1); + V2 = DAG.getNode(DAG.getDataLayout().isLittleEndian() ? ISD::BITCAST + : AArch64ISD::NVCAST, + DL, BVTy, V2); + + SmallVector MaskVec; + for (unsigned i = 0; i < BVTy.getVectorNumElements() * 2; i += 2) + MaskVec.push_back(i); + return DAG.getVectorShuffle(BVTy, DL, V1, V2, MaskVec); + } + return Sources[0]; } /// Check if a vector shuffle corresponds to a DUP instructions with a larger @@ -13305,8 +13356,9 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, // Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from // v4i32s. This is really a truncate, which we can construct out of (legal) // concats and truncate nodes. - if (SDValue M = ReconstructTruncateFromBuildVector(Op, DAG)) - return M; + if (AllLanesExtractElt) + if (SDValue M = ReconstructTruncateFromBuildVector(Op, DAG)) + return M; // Empirical tests suggest this is rarely worth it for vectors of length <= 2. if (NumElts >= 4) { @@ -19096,6 +19148,28 @@ static SDValue performBuildVectorCombine(SDNode *N, SDLoc DL(N); EVT VT = N->getValueType(0); + // BUILD_VECTOR (extract_elt(Assert[S|Z]ext(x))) + // => BUILD_VECTOR (extract_elt(x)) + SmallVector Ops; + bool ExtractExtended = false; + for (SDValue Extr : N->ops()) { + if (Extr.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { + ExtractExtended = false; + break; + } + SDValue ExtractBase = Extr.getOperand(0); + if (ExtractBase.getOpcode() == ISD::AssertSext || + ExtractBase.getOpcode() == ISD::AssertZext) { + ExtractExtended = true; + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + Extr.getValueType(), ExtractBase.getOperand(0), + Extr.getOperand(1))); + } else + Ops.push_back(Extr); + } + if (ExtractExtended) + return DAG.getBuildVector(VT, DL, Ops); + // A build vector of two extracted elements is equivalent to an // extract subvector where the inner vector is any-extended to the // extract_vector_elt VT. diff --git a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll index 9bf638f57a512..193e3b0cfbc7b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll +++ b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll @@ -8,9 +8,8 @@ define <4 x i16> @fptosi_v4f64_to_v4i16(ptr %ptr) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %tmp1 = load <4 x double>, ptr %ptr %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16> @@ -26,13 +25,10 @@ define <8 x i8> @fptosi_v4f64_to_v4i8(ptr %ptr) { ; CHECK-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: xtn v3.2s, v3.2d -; CHECK-NEXT: xtn v2.2s, v2.2d -; CHECK-NEXT: uzp1 v0.4h, v1.4h, v0.4h -; CHECK-NEXT: uzp1 v1.4h, v2.4h, v3.4h -; CHECK-NEXT: uzp1 v0.8b, v1.8b, v0.8b +; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: ret %tmp1 = load <8 x double>, ptr %ptr %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8> @@ -72,9 +68,8 @@ define <4 x i16> @fptoui_v4f64_to_v4i16(ptr %ptr) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %tmp1 = load <4 x double>, ptr %ptr %tmp2 = fptoui <4 x double> %tmp1 to <4 x i16> diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll index 1ea87bb6b04b5..0a3b9a070c2b3 100644 --- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll @@ -73,9 +73,8 @@ define void @fptoui_v8f32_to_v8i8_no_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs.4s v1, v1 ; CHECK-NEXT: fcvtzs.4s v0, v0 -; CHECK-NEXT: xtn.4h v1, v1 -; CHECK-NEXT: xtn.4h v0, v0 -; CHECK-NEXT: uzp1.8b v0, v0, v1 +; CHECK-NEXT: uzp1.8h v0, v0, v1 +; CHECK-NEXT: xtn.8b v0, v0 ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index 251719c1e3b43..a099db4765555 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -1096,30 +1096,17 @@ entry: } define <3 x i16> @fptos_v3f64_v3i16(<3 x double> %a) { -; CHECK-SD-LABEL: fptos_v3f64_v3i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptos_v3f64_v3i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: xtn v0.4h, v0.4s -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptos_v3f64_v3i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret entry: %c = fptosi <3 x double> %a to <3 x i16> ret <3 x i16> %c @@ -1134,9 +1121,8 @@ define <3 x i16> @fptou_v3f64_v3i16(<3 x double> %a) { ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v3f64_v3i16: @@ -1160,9 +1146,8 @@ define <4 x i16> @fptos_v4f64_v4i16(<4 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v4f64_v4i16: @@ -1182,9 +1167,8 @@ define <4 x i16> @fptou_v4f64_v4i16(<4 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v4f64_v4i16: @@ -1204,15 +1188,11 @@ define <8 x i16> @fptos_v8f64_v8i16(<8 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI54_0 ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v6.2s, v3.2d -; CHECK-SD-NEXT: xtn v5.2s, v2.2d -; CHECK-SD-NEXT: xtn v4.2s, v1.2d -; CHECK-SD-NEXT: xtn v3.2s, v0.2d -; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI54_0] -; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v8f64_v8i16: @@ -1235,15 +1215,11 @@ define <8 x i16> @fptou_v8f64_v8i16(<8 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI55_0 ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v6.2s, v3.2d -; CHECK-SD-NEXT: xtn v5.2s, v2.2d -; CHECK-SD-NEXT: xtn v4.2s, v1.2d -; CHECK-SD-NEXT: xtn v3.2s, v0.2d -; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI55_0] -; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v8f64_v8i16: @@ -1265,25 +1241,19 @@ define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD-LABEL: fptos_v16f64_v16i16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: adrp x8, .LCPI56_0 ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v19.2s, v3.2d -; CHECK-SD-NEXT: xtn v23.2s, v7.2d -; CHECK-SD-NEXT: xtn v18.2s, v2.2d -; CHECK-SD-NEXT: xtn v22.2s, v6.2d -; CHECK-SD-NEXT: xtn v17.2s, v1.2d -; CHECK-SD-NEXT: xtn v21.2s, v5.2d -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI56_0] -; CHECK-SD-NEXT: xtn v16.2s, v0.2d -; CHECK-SD-NEXT: xtn v20.2s, v4.2d -; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v1.4s, v6.4s, v7.4s +; CHECK-SD-NEXT: uzp1 v3.4s, v4.4s, v5.4s +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: uzp1 v1.8h, v3.8h, v1.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v16f64_v16i16: @@ -1312,25 +1282,19 @@ define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD-LABEL: fptou_v16f64_v16i16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: adrp x8, .LCPI57_0 ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v19.2s, v3.2d -; CHECK-SD-NEXT: xtn v23.2s, v7.2d -; CHECK-SD-NEXT: xtn v18.2s, v2.2d -; CHECK-SD-NEXT: xtn v22.2s, v6.2d -; CHECK-SD-NEXT: xtn v17.2s, v1.2d -; CHECK-SD-NEXT: xtn v21.2s, v5.2d -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI57_0] -; CHECK-SD-NEXT: xtn v16.2s, v0.2d -; CHECK-SD-NEXT: xtn v20.2s, v4.2d -; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v1.4s, v6.4s, v7.4s +; CHECK-SD-NEXT: uzp1 v3.4s, v4.4s, v5.4s +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: uzp1 v1.8h, v3.8h, v1.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v16f64_v16i16: @@ -1358,65 +1322,38 @@ entry: define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-LABEL: fptos_v32f64_v32i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 -; CHECK-SD-NEXT: .cfi_offset b8, -8 -; CHECK-SD-NEXT: .cfi_offset b9, -16 -; CHECK-SD-NEXT: .cfi_offset b10, -24 -; CHECK-SD-NEXT: .cfi_offset b11, -32 -; CHECK-SD-NEXT: .cfi_offset b12, -40 -; CHECK-SD-NEXT: .cfi_offset b13, -48 -; CHECK-SD-NEXT: .cfi_offset b14, -56 -; CHECK-SD-NEXT: .cfi_offset b15, -64 +; CHECK-SD-NEXT: ldp q16, q17, [sp, #64] ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI58_0 -; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d -; CHECK-SD-NEXT: ldp q20, q21, [sp, #160] -; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d -; CHECK-SD-NEXT: ldp q23, q24, [sp, #96] +; CHECK-SD-NEXT: ldp q18, q19, [sp, #96] +; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-SD-NEXT: ldp q20, q21, [sp] +; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-SD-NEXT: ldp q22, q23, [sp, #32] +; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: ldp q16, q17, [sp, #128] -; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d ; CHECK-SD-NEXT: fcvtzs v21.2d, v21.2d ; CHECK-SD-NEXT: fcvtzs v20.2d, v20.2d -; CHECK-SD-NEXT: xtn v2.2s, v18.2d -; CHECK-SD-NEXT: ldp q18, q25, [sp, #64] -; CHECK-SD-NEXT: xtn v1.2s, v19.2d -; CHECK-SD-NEXT: fcvtzs v19.2d, v24.2d -; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-SD-NEXT: xtn v0.2s, v22.2d -; CHECK-SD-NEXT: fcvtzs v22.2d, v23.2d -; CHECK-SD-NEXT: xtn v29.2s, v7.2d -; CHECK-SD-NEXT: fcvtzs v7.2d, v25.2d -; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-SD-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-SD-NEXT: fcvtzs v19.2d, v19.2d ; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d ; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-SD-NEXT: xtn v15.2s, v21.2d -; CHECK-SD-NEXT: xtn v11.2s, v19.2d -; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v14.2s, v20.2d -; CHECK-SD-NEXT: xtn v10.2s, v22.2d -; CHECK-SD-NEXT: xtn v13.2s, v17.2d -; CHECK-SD-NEXT: xtn v9.2s, v7.2d -; CHECK-SD-NEXT: xtn v28.2s, v6.2d -; CHECK-SD-NEXT: xtn v8.2s, v18.2d -; CHECK-SD-NEXT: xtn v12.2s, v16.2d -; CHECK-SD-NEXT: xtn v27.2s, v5.2d -; CHECK-SD-NEXT: xtn v26.2s, v4.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI58_0] -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b -; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b -; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b -; CHECK-SD-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b -; CHECK-SD-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v1.4s, v6.4s, v7.4s +; CHECK-SD-NEXT: uzp1 v3.4s, v4.4s, v5.4s +; CHECK-SD-NEXT: uzp1 v5.4s, v20.4s, v21.4s +; CHECK-SD-NEXT: uzp1 v4.4s, v22.4s, v23.4s +; CHECK-SD-NEXT: uzp1 v6.4s, v18.4s, v19.4s +; CHECK-SD-NEXT: uzp1 v7.4s, v16.4s, v17.4s +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: uzp1 v1.8h, v3.8h, v1.8h +; CHECK-SD-NEXT: uzp1 v2.8h, v5.8h, v4.8h +; CHECK-SD-NEXT: uzp1 v3.8h, v7.8h, v6.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v32f64_v32i16: @@ -1462,65 +1399,38 @@ entry: define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-LABEL: fptou_v32f64_v32i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 -; CHECK-SD-NEXT: .cfi_offset b8, -8 -; CHECK-SD-NEXT: .cfi_offset b9, -16 -; CHECK-SD-NEXT: .cfi_offset b10, -24 -; CHECK-SD-NEXT: .cfi_offset b11, -32 -; CHECK-SD-NEXT: .cfi_offset b12, -40 -; CHECK-SD-NEXT: .cfi_offset b13, -48 -; CHECK-SD-NEXT: .cfi_offset b14, -56 -; CHECK-SD-NEXT: .cfi_offset b15, -64 +; CHECK-SD-NEXT: ldp q16, q17, [sp, #64] ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI59_0 -; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d -; CHECK-SD-NEXT: ldp q20, q21, [sp, #160] -; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d -; CHECK-SD-NEXT: ldp q23, q24, [sp, #96] +; CHECK-SD-NEXT: ldp q18, q19, [sp, #96] +; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-SD-NEXT: ldp q20, q21, [sp] +; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-SD-NEXT: ldp q22, q23, [sp, #32] +; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: ldp q16, q17, [sp, #128] -; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d ; CHECK-SD-NEXT: fcvtzs v21.2d, v21.2d ; CHECK-SD-NEXT: fcvtzs v20.2d, v20.2d -; CHECK-SD-NEXT: xtn v2.2s, v18.2d -; CHECK-SD-NEXT: ldp q18, q25, [sp, #64] -; CHECK-SD-NEXT: xtn v1.2s, v19.2d -; CHECK-SD-NEXT: fcvtzs v19.2d, v24.2d -; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-SD-NEXT: xtn v0.2s, v22.2d -; CHECK-SD-NEXT: fcvtzs v22.2d, v23.2d -; CHECK-SD-NEXT: xtn v29.2s, v7.2d -; CHECK-SD-NEXT: fcvtzs v7.2d, v25.2d -; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-SD-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-SD-NEXT: fcvtzs v19.2d, v19.2d ; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d ; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-SD-NEXT: xtn v15.2s, v21.2d -; CHECK-SD-NEXT: xtn v11.2s, v19.2d -; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v14.2s, v20.2d -; CHECK-SD-NEXT: xtn v10.2s, v22.2d -; CHECK-SD-NEXT: xtn v13.2s, v17.2d -; CHECK-SD-NEXT: xtn v9.2s, v7.2d -; CHECK-SD-NEXT: xtn v28.2s, v6.2d -; CHECK-SD-NEXT: xtn v8.2s, v18.2d -; CHECK-SD-NEXT: xtn v12.2s, v16.2d -; CHECK-SD-NEXT: xtn v27.2s, v5.2d -; CHECK-SD-NEXT: xtn v26.2s, v4.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI59_0] -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b -; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b -; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b -; CHECK-SD-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b -; CHECK-SD-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v1.4s, v6.4s, v7.4s +; CHECK-SD-NEXT: uzp1 v3.4s, v4.4s, v5.4s +; CHECK-SD-NEXT: uzp1 v5.4s, v20.4s, v21.4s +; CHECK-SD-NEXT: uzp1 v4.4s, v22.4s, v23.4s +; CHECK-SD-NEXT: uzp1 v6.4s, v18.4s, v19.4s +; CHECK-SD-NEXT: uzp1 v7.4s, v16.4s, v17.4s +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: uzp1 v1.8h, v3.8h, v1.8h +; CHECK-SD-NEXT: uzp1 v2.8h, v5.8h, v4.8h +; CHECK-SD-NEXT: uzp1 v3.8h, v7.8h, v6.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v32f64_v32i16: @@ -1600,9 +1510,8 @@ define <3 x i8> @fptos_v3f64_v3i8(<3 x double> %a) { ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s ; CHECK-SD-NEXT: umov w0, v0.h[0] ; CHECK-SD-NEXT: umov w1, v0.h[1] ; CHECK-SD-NEXT: umov w2, v0.h[2] @@ -1638,9 +1547,8 @@ define <3 x i8> @fptou_v3f64_v3i8(<3 x double> %a) { ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s ; CHECK-SD-NEXT: umov w0, v0.h[0] ; CHECK-SD-NEXT: umov w1, v0.h[1] ; CHECK-SD-NEXT: umov w2, v0.h[2] @@ -1672,9 +1580,8 @@ define <4 x i8> @fptos_v4f64_v4i8(<4 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v4f64_v4i8: @@ -1694,9 +1601,8 @@ define <4 x i8> @fptou_v4f64_v4i8(<4 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: xtn v0.4h, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v4f64_v4i8: @@ -1718,13 +1624,10 @@ define <8 x i8> @fptos_v8f64_v8i8(<8 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h -; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v2.8b +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: xtn v0.8b, v0.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v8f64_v8i8: @@ -1750,13 +1653,10 @@ define <8 x i8> @fptou_v8f64_v8i8(<8 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h -; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v2.8b +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: xtn v0.8b, v0.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v8f64_v8i8: @@ -1786,21 +1686,13 @@ define <16 x i8> @fptos_v16f64_v16i8(<16 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v7.2s, v7.2d -; CHECK-SD-NEXT: xtn v6.2s, v6.2d -; CHECK-SD-NEXT: xtn v5.2s, v5.2d -; CHECK-SD-NEXT: xtn v4.2s, v4.2d -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v6.4h, v6.4h, v7.4h -; CHECK-SD-NEXT: uzp1 v4.4h, v4.4h, v5.4h -; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h -; CHECK-SD-NEXT: mov v4.d[1], v6.d[0] -; CHECK-SD-NEXT: mov v0.d[1], v2.d[0] -; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: uzp1 v6.4s, v6.4s, v7.4s +; CHECK-SD-NEXT: uzp1 v4.4s, v4.4s, v5.4s +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v1.8h, v4.8h, v6.8h +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v16f64_v16i8: @@ -1837,21 +1729,13 @@ define <16 x i8> @fptou_v16f64_v16i8(<16 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v7.2s, v7.2d -; CHECK-SD-NEXT: xtn v6.2s, v6.2d -; CHECK-SD-NEXT: xtn v5.2s, v5.2d -; CHECK-SD-NEXT: xtn v4.2s, v4.2d -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: uzp1 v6.4h, v6.4h, v7.4h -; CHECK-SD-NEXT: uzp1 v4.4h, v4.4h, v5.4h -; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h -; CHECK-SD-NEXT: mov v4.d[1], v6.d[0] -; CHECK-SD-NEXT: mov v0.d[1], v2.d[0] -; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: uzp1 v6.4s, v6.4s, v7.4s +; CHECK-SD-NEXT: uzp1 v4.4s, v4.4s, v5.4s +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v1.8h, v4.8h, v6.8h +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v16f64_v16i8: @@ -1900,36 +1784,20 @@ define <32 x i8> @fptos_v32f64_v32i8(<32 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d ; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d ; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: xtn v7.2s, v7.2d -; CHECK-SD-NEXT: xtn v6.2s, v6.2d -; CHECK-SD-NEXT: xtn v5.2s, v5.2d -; CHECK-SD-NEXT: xtn v4.2s, v4.2d -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: xtn v23.2s, v23.2d -; CHECK-SD-NEXT: xtn v22.2s, v22.2d -; CHECK-SD-NEXT: xtn v21.2s, v21.2d -; CHECK-SD-NEXT: xtn v20.2s, v20.2d -; CHECK-SD-NEXT: xtn v19.2s, v19.2d -; CHECK-SD-NEXT: xtn v18.2s, v18.2d -; CHECK-SD-NEXT: xtn v17.2s, v17.2d -; CHECK-SD-NEXT: xtn v16.2s, v16.2d -; CHECK-SD-NEXT: uzp1 v6.4h, v6.4h, v7.4h -; CHECK-SD-NEXT: uzp1 v4.4h, v4.4h, v5.4h -; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h -; CHECK-SD-NEXT: uzp1 v1.4h, v22.4h, v23.4h -; CHECK-SD-NEXT: uzp1 v3.4h, v20.4h, v21.4h -; CHECK-SD-NEXT: uzp1 v5.4h, v18.4h, v19.4h -; CHECK-SD-NEXT: uzp1 v7.4h, v16.4h, v17.4h -; CHECK-SD-NEXT: mov v4.d[1], v6.d[0] -; CHECK-SD-NEXT: mov v0.d[1], v2.d[0] -; CHECK-SD-NEXT: mov v3.d[1], v1.d[0] -; CHECK-SD-NEXT: mov v7.d[1], v5.d[0] +; CHECK-SD-NEXT: uzp1 v6.4s, v6.4s, v7.4s +; CHECK-SD-NEXT: uzp1 v4.4s, v4.4s, v5.4s +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v3.4s, v20.4s, v21.4s +; CHECK-SD-NEXT: uzp1 v1.4s, v22.4s, v23.4s +; CHECK-SD-NEXT: uzp1 v5.4s, v18.4s, v19.4s +; CHECK-SD-NEXT: uzp1 v7.4s, v16.4s, v17.4s +; CHECK-SD-NEXT: uzp1 v4.8h, v4.8h, v6.8h +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: uzp1 v1.8h, v3.8h, v1.8h +; CHECK-SD-NEXT: uzp1 v2.8h, v7.8h, v5.8h ; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v4.16b -; CHECK-SD-NEXT: uzp1 v1.16b, v7.16b, v3.16b +; CHECK-SD-NEXT: uzp1 v1.16b, v2.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v32f64_v32i8: @@ -1997,36 +1865,20 @@ define <32 x i8> @fptou_v32f64_v32i8(<32 x double> %a) { ; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d ; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d ; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: xtn v7.2s, v7.2d -; CHECK-SD-NEXT: xtn v6.2s, v6.2d -; CHECK-SD-NEXT: xtn v5.2s, v5.2d -; CHECK-SD-NEXT: xtn v4.2s, v4.2d -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: xtn v23.2s, v23.2d -; CHECK-SD-NEXT: xtn v22.2s, v22.2d -; CHECK-SD-NEXT: xtn v21.2s, v21.2d -; CHECK-SD-NEXT: xtn v20.2s, v20.2d -; CHECK-SD-NEXT: xtn v19.2s, v19.2d -; CHECK-SD-NEXT: xtn v18.2s, v18.2d -; CHECK-SD-NEXT: xtn v17.2s, v17.2d -; CHECK-SD-NEXT: xtn v16.2s, v16.2d -; CHECK-SD-NEXT: uzp1 v6.4h, v6.4h, v7.4h -; CHECK-SD-NEXT: uzp1 v4.4h, v4.4h, v5.4h -; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h -; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h -; CHECK-SD-NEXT: uzp1 v1.4h, v22.4h, v23.4h -; CHECK-SD-NEXT: uzp1 v3.4h, v20.4h, v21.4h -; CHECK-SD-NEXT: uzp1 v5.4h, v18.4h, v19.4h -; CHECK-SD-NEXT: uzp1 v7.4h, v16.4h, v17.4h -; CHECK-SD-NEXT: mov v4.d[1], v6.d[0] -; CHECK-SD-NEXT: mov v0.d[1], v2.d[0] -; CHECK-SD-NEXT: mov v3.d[1], v1.d[0] -; CHECK-SD-NEXT: mov v7.d[1], v5.d[0] +; CHECK-SD-NEXT: uzp1 v6.4s, v6.4s, v7.4s +; CHECK-SD-NEXT: uzp1 v4.4s, v4.4s, v5.4s +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: uzp1 v3.4s, v20.4s, v21.4s +; CHECK-SD-NEXT: uzp1 v1.4s, v22.4s, v23.4s +; CHECK-SD-NEXT: uzp1 v5.4s, v18.4s, v19.4s +; CHECK-SD-NEXT: uzp1 v7.4s, v16.4s, v17.4s +; CHECK-SD-NEXT: uzp1 v4.8h, v4.8h, v6.8h +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: uzp1 v1.8h, v3.8h, v1.8h +; CHECK-SD-NEXT: uzp1 v2.8h, v7.8h, v5.8h ; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v4.16b -; CHECK-SD-NEXT: uzp1 v1.16b, v7.16b, v3.16b +; CHECK-SD-NEXT: uzp1 v1.16b, v2.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v32f64_v32i8: @@ -3028,9 +2880,8 @@ define <8 x i8> @fptos_v8f32_v8i8(<8 x float> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-SD-NEXT: xtn v1.4h, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s -; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: xtn v0.8b, v0.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v8f32_v8i8: @@ -3050,9 +2901,8 @@ define <8 x i8> @fptou_v8f32_v8i8(<8 x float> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-SD-NEXT: xtn v1.4h, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s -; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: xtn v0.8b, v0.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v8f32_v8i8: @@ -3074,12 +2924,8 @@ define <16 x i8> @fptos_v16f32_v16i8(<16 x float> %a) { ; CHECK-SD-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-SD-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-SD-NEXT: xtn v3.4h, v3.4s -; CHECK-SD-NEXT: xtn v2.4h, v2.4s -; CHECK-SD-NEXT: xtn v1.4h, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s -; CHECK-SD-NEXT: mov v2.d[1], v3.d[0] -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: ret ; @@ -3136,20 +2982,12 @@ define <32 x i8> @fptos_v32f32_v32i8(<32 x float> %a) { ; CHECK-SD-NEXT: fcvtzs v6.4s, v6.4s ; CHECK-SD-NEXT: fcvtzs v5.4s, v5.4s ; CHECK-SD-NEXT: fcvtzs v4.4s, v4.4s -; CHECK-SD-NEXT: xtn v3.4h, v3.4s -; CHECK-SD-NEXT: xtn v2.4h, v2.4s -; CHECK-SD-NEXT: xtn v1.4h, v1.4s -; CHECK-SD-NEXT: xtn v0.4h, v0.4s -; CHECK-SD-NEXT: xtn v7.4h, v7.4s -; CHECK-SD-NEXT: xtn v6.4h, v6.4s -; CHECK-SD-NEXT: xtn v5.4h, v5.4s -; CHECK-SD-NEXT: xtn v4.4h, v4.4s -; CHECK-SD-NEXT: mov v2.d[1], v3.d[0] -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: mov v6.d[1], v7.d[0] -; CHECK-SD-NEXT: mov v4.d[1], v5.d[0] +; CHECK-SD-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: uzp1 v1.8h, v6.8h, v7.8h +; CHECK-SD-NEXT: uzp1 v3.8h, v4.8h, v5.8h ; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: uzp1 v1.16b, v4.16b, v6.16b +; CHECK-SD-NEXT: uzp1 v1.16b, v3.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v32f32_v32i8: diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index 92fd3183393ea..ff7df77aef116 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -3288,63 +3288,62 @@ define <16 x i16> @test_signed_v16f16_v16i16(<16 x half> %f) { define <8 x i8> @test_signed_v8f64_v8i8(<8 x double> %f) { ; CHECK-LABEL: test_signed_v8f64_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: fcvtzs w11, d3 -; CHECK-NEXT: mov w9, #127 // =0x7f -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: fcvtzs w13, d2 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: fcvtzs w17, d0 -; CHECK-NEXT: fcvtzs w8, d4 ; CHECK-NEXT: mov d4, v2.d[1] -; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzs w10, d2 +; CHECK-NEXT: mov w8, #127 // =0x7f +; CHECK-NEXT: mov d2, v3.d[1] +; CHECK-NEXT: fcvtzs w12, d3 +; CHECK-NEXT: mov d3, v0.d[1] +; CHECK-NEXT: fcvtzs w15, d0 +; CHECK-NEXT: fcvtzs w16, d1 +; CHECK-NEXT: mov d0, v1.d[1] +; CHECK-NEXT: fcvtzs w9, d4 +; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: fcvtzs w14, d3 -; CHECK-NEXT: cmp w8, #127 -; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: fcvtzs w16, d2 -; CHECK-NEXT: csel w10, w8, w9, lt -; CHECK-NEXT: mov w8, #-128 // =0xffffff80 -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w8, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: csel w11, w11, w9, lt +; CHECK-NEXT: cmp w9, #127 +; CHECK-NEXT: csel w11, w9, w8, lt +; CHECK-NEXT: mov w9, #-128 // =0xffffff80 ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: csel w11, w11, w8, gt +; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: cmp w10, #127 +; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: csel w12, w12, w9, lt -; CHECK-NEXT: fmov s3, w11 +; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: fmov s1, w10 ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: csel w12, w12, w8, gt +; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w13, w13, w9, lt -; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: csel w13, w13, w8, lt +; CHECK-NEXT: mov v1.s[1], w11 ; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: csel w13, w13, w8, gt +; CHECK-NEXT: csel w13, w13, w9, gt ; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: csel w14, w14, w9, lt -; CHECK-NEXT: fmov s2, w13 +; CHECK-NEXT: csel w14, w14, w8, lt ; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w14, w14, w8, gt +; CHECK-NEXT: mov v1.s[2], w12 +; CHECK-NEXT: csel w14, w14, w9, gt ; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: csel w15, w15, w9, lt -; CHECK-NEXT: mov v2.s[1], w12 +; CHECK-NEXT: csel w15, w15, w8, lt ; CHECK-NEXT: cmn w15, #128 -; CHECK-NEXT: csel w15, w15, w8, gt +; CHECK-NEXT: csel w10, w15, w9, gt ; CHECK-NEXT: cmp w16, #127 -; CHECK-NEXT: csel w11, w16, w9, lt -; CHECK-NEXT: fmov s1, w15 +; CHECK-NEXT: mov v1.s[3], w13 +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: csel w11, w16, w8, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: csel w10, w11, w8, gt -; CHECK-NEXT: cmp w17, #127 -; CHECK-NEXT: csel w9, w17, w9, lt -; CHECK-NEXT: mov v1.s[1], w14 -; CHECK-NEXT: cmn w9, #128 -; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b +; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: cmp w10, #127 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: cmn w8, #128 +; CHECK-NEXT: mov v2.s[2], w11 +; CHECK-NEXT: csel w8, w8, w9, gt +; CHECK-NEXT: mov v2.s[3], w8 +; CHECK-NEXT: uzp1 v0.8h, v2.8h, v1.8h +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptosi.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -3353,135 +3352,115 @@ define <8 x i8> @test_signed_v8f64_v8i8(<8 x double> %f) { define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) { ; CHECK-LABEL: test_signed_v16f64_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: mov d16, v6.d[1] +; CHECK-NEXT: fcvtzs w11, d6 ; CHECK-NEXT: mov w8, #127 // =0x7f -; CHECK-NEXT: mov d0, v1.d[1] -; CHECK-NEXT: fcvtzs w13, d1 -; CHECK-NEXT: mov d1, v2.d[1] +; CHECK-NEXT: mov d6, v7.d[1] +; CHECK-NEXT: fcvtzs w12, d7 +; CHECK-NEXT: mov d7, v4.d[1] +; CHECK-NEXT: fcvtzs w16, d4 +; CHECK-NEXT: mov d4, v5.d[1] +; CHECK-NEXT: fcvtzs w1, d3 +; CHECK-NEXT: fcvtzs w4, d0 ; CHECK-NEXT: fcvtzs w9, d16 -; CHECK-NEXT: fcvtzs w12, d0 +; CHECK-NEXT: fcvtzs w14, d6 +; CHECK-NEXT: fcvtzs w15, d7 +; CHECK-NEXT: fcvtzs w18, d4 ; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w11, w9, w8, lt +; CHECK-NEXT: csel w10, w9, w8, lt ; CHECK-NEXT: mov w9, #-128 // =0xffffff80 -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: csel w10, w10, w8, lt ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: csel w10, w12, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w12, w13, w8, lt -; CHECK-NEXT: mov v0.s[1], w11 -; CHECK-NEXT: fcvtzs w11, d1 -; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: fmov s1, w12 -; CHECK-NEXT: fcvtzs w12, d2 -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: mov w13, v0.s[1] -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w11, d2 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: mov v0.b[1], w13 -; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov w13, v1.s[1] -; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s2, w12 -; CHECK-NEXT: fcvtzs w12, d3 -; CHECK-NEXT: mov d3, v4.d[1] -; CHECK-NEXT: mov v0.b[2], v1.b[0] -; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w11, d3 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: mov v0.b[3], w13 -; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov w13, v2.s[1] -; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s3, w12 -; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: mov v0.b[4], v2.b[0] -; CHECK-NEXT: mov d4, v5.d[1] -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: mov v0.b[5], w13 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: fcvtzs w11, d4 -; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov w13, v3.s[1] -; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: mov v0.b[6], v3.b[0] -; CHECK-NEXT: fmov s4, w12 -; CHECK-NEXT: fcvtzs w12, d5 ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: mov d5, v6.d[1] -; CHECK-NEXT: mov v4.s[1], w10 -; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: mov v0.b[7], w13 -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: csel w11, w11, w8, lt +; CHECK-NEXT: cmn w11, #128 +; CHECK-NEXT: csel w13, w11, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: fcvtzs w13, d5 ; CHECK-NEXT: csel w11, w12, w8, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov w12, v4.s[1] -; CHECK-NEXT: mov v0.b[8], v4.b[0] -; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: fmov s5, w11 -; CHECK-NEXT: fcvtzs w11, d6 -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov d6, v7.d[1] -; CHECK-NEXT: mov v0.b[9], w12 -; CHECK-NEXT: mov v5.s[1], w10 -; CHECK-NEXT: csel w10, w13, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fcvtzs w13, d6 -; CHECK-NEXT: csel w11, w11, w8, lt -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov v0.b[10], v5.b[0] -; CHECK-NEXT: mov w12, v5.s[1] ; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: fmov s6, w11 -; CHECK-NEXT: fcvtzs w11, d7 -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov v0.b[11], w12 -; CHECK-NEXT: mov v6.s[1], w10 -; CHECK-NEXT: csel w10, w13, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: csel w8, w11, w8, lt +; CHECK-NEXT: cmp w14, #127 +; CHECK-NEXT: csel w12, w14, w8, lt +; CHECK-NEXT: cmn w12, #128 +; CHECK-NEXT: csel w12, w12, w9, gt +; CHECK-NEXT: cmp w15, #127 +; CHECK-NEXT: csel w14, w15, w8, lt +; CHECK-NEXT: fcvtzs w15, d5 +; CHECK-NEXT: mov d5, v2.d[1] +; CHECK-NEXT: cmn w14, #128 +; CHECK-NEXT: csel w14, w14, w9, gt +; CHECK-NEXT: cmp w16, #127 +; CHECK-NEXT: csel w16, w16, w8, lt +; CHECK-NEXT: cmn w16, #128 +; CHECK-NEXT: fcvtzs w0, d5 +; CHECK-NEXT: csel w17, w16, w9, gt +; CHECK-NEXT: cmp w15, #127 +; CHECK-NEXT: csel w15, w15, w8, lt +; CHECK-NEXT: cmn w15, #128 +; CHECK-NEXT: csel w15, w15, w9, gt +; CHECK-NEXT: cmp w18, #127 +; CHECK-NEXT: csel w16, w18, w8, lt +; CHECK-NEXT: fcvtzs w18, d2 +; CHECK-NEXT: mov d2, v3.d[1] +; CHECK-NEXT: cmn w16, #128 +; CHECK-NEXT: mov d3, v0.d[1] +; CHECK-NEXT: fmov s0, w13 +; CHECK-NEXT: csel w16, w16, w9, gt +; CHECK-NEXT: cmp w0, #127 +; CHECK-NEXT: csel w0, w0, w8, lt +; CHECK-NEXT: cmn w0, #128 +; CHECK-NEXT: fcvtzs w2, d2 +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: csel w0, w0, w9, gt +; CHECK-NEXT: cmp w18, #127 +; CHECK-NEXT: fcvtzs w3, d3 +; CHECK-NEXT: csel w18, w18, w8, lt +; CHECK-NEXT: fmov s2, w17 +; CHECK-NEXT: cmn w18, #128 +; CHECK-NEXT: csel w18, w18, w9, gt +; CHECK-NEXT: cmp w1, #127 +; CHECK-NEXT: mov v0.s[2], w11 +; CHECK-NEXT: csel w1, w1, w8, lt +; CHECK-NEXT: fmov s3, w18 +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: cmn w1, #128 +; CHECK-NEXT: csel w1, w1, w9, gt +; CHECK-NEXT: cmp w2, #127 +; CHECK-NEXT: csel w2, w2, w8, lt +; CHECK-NEXT: mov v3.s[1], w0 +; CHECK-NEXT: mov v0.s[3], w12 +; CHECK-NEXT: cmn w2, #128 +; CHECK-NEXT: mov v2.s[2], w15 +; CHECK-NEXT: csel w2, w2, w9, gt +; CHECK-NEXT: cmp w3, #127 +; CHECK-NEXT: csel w3, w3, w8, lt +; CHECK-NEXT: cmn w3, #128 +; CHECK-NEXT: mov v3.s[2], w1 +; CHECK-NEXT: csel w13, w3, w9, gt +; CHECK-NEXT: cmp w4, #127 +; CHECK-NEXT: mov v2.s[3], w16 +; CHECK-NEXT: csel w3, w4, w8, lt +; CHECK-NEXT: fcvtzs w4, d1 +; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: cmn w3, #128 +; CHECK-NEXT: csel w10, w3, w9, gt +; CHECK-NEXT: mov v3.s[3], w2 +; CHECK-NEXT: fmov s4, w10 +; CHECK-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-NEXT: cmp w4, #127 +; CHECK-NEXT: fcvtzs w10, d1 +; CHECK-NEXT: mov v4.s[1], w13 +; CHECK-NEXT: csel w13, w4, w8, lt +; CHECK-NEXT: cmn w13, #128 +; CHECK-NEXT: csel w11, w13, w9, gt +; CHECK-NEXT: cmp w10, #127 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: mov v4.s[2], w11 ; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: mov v0.b[12], v6.b[0] -; CHECK-NEXT: mov w11, v6.s[1] ; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: fmov s7, w8 -; CHECK-NEXT: mov v0.b[13], w11 -; CHECK-NEXT: mov v7.s[1], w10 -; CHECK-NEXT: mov v0.b[14], v7.b[0] -; CHECK-NEXT: mov w8, v7.s[1] -; CHECK-NEXT: mov v0.b[15], w8 +; CHECK-NEXT: mov v4.s[3], w8 +; CHECK-NEXT: uzp1 v1.8h, v4.8h, v3.8h +; CHECK-NEXT: uzp1 v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptosi.sat.v16f64.v16i8(<16 x double> %f) ret <16 x i8> %x @@ -3490,63 +3469,61 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) { define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) { ; CHECK-LABEL: test_signed_v8f64_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v3.d[1] +; CHECK-NEXT: mov d4, v2.d[1] ; CHECK-NEXT: mov w8, #32767 // =0x7fff -; CHECK-NEXT: fcvtzs w11, d3 -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: fcvtzs w13, d2 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: fcvtzs w17, d0 +; CHECK-NEXT: fcvtzs w10, d2 +; CHECK-NEXT: mov d2, v3.d[1] +; CHECK-NEXT: fcvtzs w12, d3 +; CHECK-NEXT: mov d3, v0.d[1] +; CHECK-NEXT: fcvtzs w15, d0 +; CHECK-NEXT: fcvtzs w16, d1 +; CHECK-NEXT: mov d0, v1.d[1] ; CHECK-NEXT: fcvtzs w9, d4 -; CHECK-NEXT: mov d4, v2.d[1] -; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: fcvtzs w14, d3 ; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: fcvtzs w16, d2 -; CHECK-NEXT: csel w10, w9, w8, lt +; CHECK-NEXT: csel w11, w9, w8, lt ; CHECK-NEXT: mov w9, #-32768 // =0xffff8000 -; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: csel w11, w11, w8, lt ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, w8 ; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: fmov s3, w11 +; CHECK-NEXT: fmov s1, w10 ; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: cmp w13, w8 ; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w11 ; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w13, w13, w9, gt ; CHECK-NEXT: cmp w14, w8 ; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: fmov s2, w13 ; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v1.s[2], w12 ; CHECK-NEXT: csel w14, w14, w9, gt ; CHECK-NEXT: cmp w15, w8 ; CHECK-NEXT: csel w15, w15, w8, lt -; CHECK-NEXT: mov v2.s[1], w12 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w15, w15, w9, gt +; CHECK-NEXT: csel w10, w15, w9, gt ; CHECK-NEXT: cmp w16, w8 +; CHECK-NEXT: mov v1.s[3], w13 +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: fcvtzs w10, d0 ; CHECK-NEXT: csel w11, w16, w8, lt -; CHECK-NEXT: fmov s1, w15 ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w10, w11, w9, gt -; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: csel w8, w17, w8, lt -; CHECK-NEXT: mov v1.s[1], w14 +; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w8, w10, w8, lt ; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v2.s[2], w11 ; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-NEXT: mov v2.s[3], w8 +; CHECK-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptosi.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -3555,116 +3532,114 @@ define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) { define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) { ; CHECK-LABEL: test_signed_v16f64_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v3.d[1] -; CHECK-NEXT: mov w9, #32767 // =0x7fff -; CHECK-NEXT: fcvtzs w11, d3 -; CHECK-NEXT: mov d3, v1.d[1] +; CHECK-NEXT: mov d16, v2.d[1] +; CHECK-NEXT: mov w8, #32767 // =0x7fff +; CHECK-NEXT: fcvtzs w11, d2 +; CHECK-NEXT: mov d2, v3.d[1] +; CHECK-NEXT: fcvtzs w12, d3 +; CHECK-NEXT: mov d3, v0.d[1] +; CHECK-NEXT: fcvtzs w16, d0 +; CHECK-NEXT: mov d0, v1.d[1] +; CHECK-NEXT: fcvtzs w1, d7 +; CHECK-NEXT: fcvtzs w4, d4 +; CHECK-NEXT: fcvtzs w9, d16 ; CHECK-NEXT: fcvtzs w14, d2 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: mov d1, v7.d[1] +; CHECK-NEXT: fcvtzs w15, d3 ; CHECK-NEXT: fcvtzs w18, d0 -; CHECK-NEXT: fcvtzs w1, d7 -; CHECK-NEXT: fcvtzs w2, d6 -; CHECK-NEXT: fcvtzs w4, d5 -; CHECK-NEXT: fcvtzs w6, d4 -; CHECK-NEXT: fcvtzs w8, d16 -; CHECK-NEXT: mov d16, v2.d[1] -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: mov d0, v6.d[1] -; CHECK-NEXT: fcvtzs w0, d1 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: fcvtzs w13, d16 -; CHECK-NEXT: fcvtzs w17, d2 -; CHECK-NEXT: csel w10, w8, w9, lt -; CHECK-NEXT: mov w8, #-32768 // =0xffff8000 +; CHECK-NEXT: mov d0, v7.d[1] +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w10, w9, w8, lt +; CHECK-NEXT: mov w9, #-32768 // =0xffff8000 ; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w10, w10, w8, gt -; CHECK-NEXT: cmp w11, w9 -; CHECK-NEXT: csel w11, w11, w9, lt +; CHECK-NEXT: fcvtzs w2, d0 +; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: cmp w11, w8 +; CHECK-NEXT: csel w11, w11, w8, lt ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w12, w11, w8, gt -; CHECK-NEXT: cmp w13, w9 -; CHECK-NEXT: csel w11, w13, w9, lt -; CHECK-NEXT: fcvtzs w13, d3 +; CHECK-NEXT: csel w13, w11, w9, gt +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: csel w11, w12, w8, lt +; CHECK-NEXT: fmov s0, w13 ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w11, w11, w8, gt -; CHECK-NEXT: cmp w14, w9 -; CHECK-NEXT: csel w14, w14, w9, lt +; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: cmp w14, w8 +; CHECK-NEXT: csel w12, w14, w8, lt +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w12, w12, w9, gt +; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: csel w14, w15, w8, lt +; CHECK-NEXT: fcvtzs w15, d1 +; CHECK-NEXT: mov d1, v6.d[1] ; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w14, w14, w8, gt -; CHECK-NEXT: cmp w13, w9 -; CHECK-NEXT: csel w13, w13, w9, lt -; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w13, w8, gt -; CHECK-NEXT: cmp w15, w9 -; CHECK-NEXT: csel w15, w15, w9, lt -; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w16, w15, w8, gt -; CHECK-NEXT: cmp w17, w9 -; CHECK-NEXT: csel w15, w17, w9, lt +; CHECK-NEXT: mov v0.s[2], w11 +; CHECK-NEXT: csel w14, w14, w9, gt +; CHECK-NEXT: cmp w16, w8 +; CHECK-NEXT: csel w16, w16, w8, lt +; CHECK-NEXT: cmn w16, #8, lsl #12 // =32768 +; CHECK-NEXT: fcvtzs w0, d1 +; CHECK-NEXT: mov d1, v4.d[1] +; CHECK-NEXT: csel w17, w16, w9, gt +; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: mov v0.s[3], w12 +; CHECK-NEXT: csel w15, w15, w8, lt +; CHECK-NEXT: fmov s2, w17 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w15, w15, w8, gt -; CHECK-NEXT: cmp w18, w9 -; CHECK-NEXT: csel w17, w18, w9, lt -; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w17, w17, w8, gt -; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: csel w18, w0, w9, lt -; CHECK-NEXT: fcvtzs w0, d0 -; CHECK-NEXT: mov d0, v5.d[1] +; CHECK-NEXT: csel w15, w15, w9, gt +; CHECK-NEXT: cmp w18, w8 +; CHECK-NEXT: fcvtzs w3, d1 +; CHECK-NEXT: csel w16, w18, w8, lt +; CHECK-NEXT: fcvtzs w18, d6 +; CHECK-NEXT: mov d1, v5.d[1] +; CHECK-NEXT: cmn w16, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: csel w16, w16, w9, gt +; CHECK-NEXT: cmp w0, w8 +; CHECK-NEXT: csel w0, w0, w8, lt +; CHECK-NEXT: cmn w0, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w0, w0, w9, gt +; CHECK-NEXT: cmp w18, w8 +; CHECK-NEXT: mov v2.s[2], w15 +; CHECK-NEXT: csel w18, w18, w8, lt ; CHECK-NEXT: cmn w18, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w18, w18, w8, gt -; CHECK-NEXT: cmp w1, w9 -; CHECK-NEXT: csel w1, w1, w9, lt +; CHECK-NEXT: csel w18, w18, w9, gt +; CHECK-NEXT: cmp w1, w8 +; CHECK-NEXT: csel w1, w1, w8, lt +; CHECK-NEXT: fmov s3, w18 +; CHECK-NEXT: mov v2.s[3], w16 ; CHECK-NEXT: cmn w1, #8, lsl #12 // =32768 -; CHECK-NEXT: fcvtzs w3, d0 -; CHECK-NEXT: mov d0, v4.d[1] -; CHECK-NEXT: csel w1, w1, w8, gt -; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: csel w0, w0, w9, lt -; CHECK-NEXT: fmov s7, w1 -; CHECK-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w0, w0, w8, gt -; CHECK-NEXT: cmp w2, w9 -; CHECK-NEXT: fcvtzs w5, d0 -; CHECK-NEXT: csel w2, w2, w9, lt -; CHECK-NEXT: fmov s3, w12 -; CHECK-NEXT: mov v7.s[1], w18 +; CHECK-NEXT: csel w1, w1, w9, gt +; CHECK-NEXT: cmp w2, w8 +; CHECK-NEXT: csel w2, w2, w8, lt +; CHECK-NEXT: mov v3.s[1], w0 ; CHECK-NEXT: cmn w2, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w2, w2, w8, gt -; CHECK-NEXT: cmp w3, w9 -; CHECK-NEXT: csel w3, w3, w9, lt -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: fmov s6, w2 +; CHECK-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-NEXT: csel w2, w2, w9, gt +; CHECK-NEXT: cmp w3, w8 +; CHECK-NEXT: csel w3, w3, w8, lt ; CHECK-NEXT: cmn w3, #8, lsl #12 // =32768 -; CHECK-NEXT: fmov s2, w14 -; CHECK-NEXT: csel w3, w3, w8, gt -; CHECK-NEXT: cmp w4, w9 -; CHECK-NEXT: csel w4, w4, w9, lt -; CHECK-NEXT: mov v6.s[1], w0 -; CHECK-NEXT: cmn w4, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v2.s[1], w11 -; CHECK-NEXT: csel w12, w4, w8, gt -; CHECK-NEXT: cmp w5, w9 -; CHECK-NEXT: fmov s1, w16 -; CHECK-NEXT: csel w10, w5, w9, lt -; CHECK-NEXT: fmov s5, w12 -; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w10, w10, w8, gt -; CHECK-NEXT: cmp w6, w9 -; CHECK-NEXT: mov v1.s[1], w13 -; CHECK-NEXT: csel w9, w6, w9, lt -; CHECK-NEXT: mov v5.s[1], w3 -; CHECK-NEXT: fmov s0, w17 -; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: mov v0.s[1], w15 -; CHECK-NEXT: adrp x8, .LCPI85_0 -; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI85_0] -; CHECK-NEXT: mov v4.s[1], w10 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b -; CHECK-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b +; CHECK-NEXT: mov v3.s[2], w1 +; CHECK-NEXT: csel w13, w3, w9, gt +; CHECK-NEXT: cmp w4, w8 +; CHECK-NEXT: csel w3, w4, w8, lt +; CHECK-NEXT: fcvtzs w4, d5 +; CHECK-NEXT: cmn w3, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w10, w3, w9, gt +; CHECK-NEXT: mov v3.s[3], w2 +; CHECK-NEXT: fmov s4, w10 +; CHECK-NEXT: fcvtzs w10, d1 +; CHECK-NEXT: cmp w4, w8 +; CHECK-NEXT: mov v4.s[1], w13 +; CHECK-NEXT: csel w13, w4, w8, lt +; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w11, w13, w9, gt +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: mov v4.s[2], w11 +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w8, w8, w9, gt +; CHECK-NEXT: mov v4.s[3], w8 +; CHECK-NEXT: uzp1 v1.8h, v4.8h, v3.8h ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptosi.sat.v16f64.v16i16(<16 x double> %f) ret <16 x i16> %x diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index c94db3484994c..15a12c13ec25d 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -2754,46 +2754,45 @@ define <16 x i16> @test_unsigned_v16f16_v16i16(<16 x half> %f) { define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) { ; CHECK-LABEL: test_unsigned_v8f64_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: mov d5, v2.d[1] -; CHECK-NEXT: mov w11, #255 // =0xff -; CHECK-NEXT: fcvtzu w9, d3 -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fcvtzu w14, d1 +; CHECK-NEXT: mov d4, v2.d[1] +; CHECK-NEXT: mov d5, v3.d[1] +; CHECK-NEXT: mov w12, #255 // =0xff +; CHECK-NEXT: fcvtzu w9, d2 +; CHECK-NEXT: fcvtzu w10, d3 +; CHECK-NEXT: fcvtzu w14, d0 +; CHECK-NEXT: fcvtzu w15, d1 ; CHECK-NEXT: fcvtzu w8, d4 ; CHECK-NEXT: mov d4, v0.d[1] -; CHECK-NEXT: fcvtzu w10, d5 -; CHECK-NEXT: fcvtzu w13, d3 +; CHECK-NEXT: fcvtzu w11, d5 +; CHECK-NEXT: mov d0, v1.d[1] ; CHECK-NEXT: cmp w8, #255 -; CHECK-NEXT: fcvtzu w15, d4 -; CHECK-NEXT: csel w8, w8, w11, lo +; CHECK-NEXT: fcvtzu w13, d4 +; CHECK-NEXT: csel w8, w8, w12, lo ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w11, lo +; CHECK-NEXT: csel w9, w9, w12, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: fmov s4, w9 -; CHECK-NEXT: csel w9, w10, w11, lo -; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov v4.s[1], w8 -; CHECK-NEXT: csel w8, w12, w11, lo +; CHECK-NEXT: csel w10, w10, w12, lo +; CHECK-NEXT: cmp w11, #255 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: csel w11, w11, w12, lo ; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: csel w8, w13, w11, lo +; CHECK-NEXT: csel w13, w13, w12, lo ; CHECK-NEXT: cmp w14, #255 -; CHECK-NEXT: mov v3.s[1], w9 -; CHECK-NEXT: csel w9, w14, w11, lo +; CHECK-NEXT: csel w14, w14, w12, lo +; CHECK-NEXT: mov v1.s[1], w8 ; CHECK-NEXT: cmp w15, #255 -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: csel w9, w15, w11, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v2.s[1], w8 -; CHECK-NEXT: csel w8, w10, w11, lo -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b +; CHECK-NEXT: fmov s2, w14 +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: csel w9, w15, w12, lo +; CHECK-NEXT: mov v2.s[1], w13 +; CHECK-NEXT: mov v1.s[2], w10 +; CHECK-NEXT: cmp w8, #255 +; CHECK-NEXT: csel w8, w8, w12, lo +; CHECK-NEXT: mov v2.s[2], w9 +; CHECK-NEXT: mov v1.s[3], w11 +; CHECK-NEXT: mov v2.s[3], w8 +; CHECK-NEXT: uzp1 v0.8h, v2.8h, v1.8h +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptoui.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -2802,102 +2801,82 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) { define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { ; CHECK-LABEL: test_unsigned_v16f64_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: fcvtzu w10, d0 +; CHECK-NEXT: mov d16, v6.d[1] +; CHECK-NEXT: mov d17, v7.d[1] ; CHECK-NEXT: mov w8, #255 // =0xff +; CHECK-NEXT: fcvtzu w10, d6 +; CHECK-NEXT: mov d6, v4.d[1] +; CHECK-NEXT: fcvtzu w11, d7 +; CHECK-NEXT: mov d7, v5.d[1] +; CHECK-NEXT: fcvtzu w15, d4 +; CHECK-NEXT: fcvtzu w16, d5 +; CHECK-NEXT: mov d4, v3.d[1] +; CHECK-NEXT: mov d5, v0.d[1] +; CHECK-NEXT: fcvtzu w18, d2 ; CHECK-NEXT: fcvtzu w9, d16 -; CHECK-NEXT: mov d16, v1.d[1] -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: fcvtzu w10, d16 +; CHECK-NEXT: fcvtzu w12, d17 ; CHECK-NEXT: mov d16, v2.d[1] -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: fcvtzu w13, d6 +; CHECK-NEXT: fcvtzu w0, d3 +; CHECK-NEXT: fcvtzu w3, d0 +; CHECK-NEXT: fcvtzu w1, d4 +; CHECK-NEXT: fcvtzu w2, d5 ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov w11, v0.s[1] -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: fcvtzu w9, d16 -; CHECK-NEXT: mov d16, v3.d[1] -; CHECK-NEXT: mov v0.b[1], w11 -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: fcvtzu w17, d16 +; CHECK-NEXT: csel w14, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w11, v1.s[1] -; CHECK-NEXT: mov v0.b[2], v1.b[0] ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: fcvtzu w10, d16 -; CHECK-NEXT: mov d16, v4.d[1] -; CHECK-NEXT: mov v0.b[3], w11 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d3 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov w11, v2.s[1] -; CHECK-NEXT: mov v0.b[4], v2.b[0] -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: fcvtzu w9, d16 -; CHECK-NEXT: mov d16, v5.d[1] -; CHECK-NEXT: mov v0.b[5], w11 -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: fcvtzu w10, d4 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w11, v3.s[1] -; CHECK-NEXT: mov v0.b[6], v3.b[0] -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: fmov s4, w10 -; CHECK-NEXT: fcvtzu w10, d16 -; CHECK-NEXT: mov v0.b[7], w11 -; CHECK-NEXT: mov v4.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d5 -; CHECK-NEXT: mov d5, v6.d[1] -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov w11, v4.s[1] -; CHECK-NEXT: mov v0.b[8], v4.b[0] -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s16, w9 -; CHECK-NEXT: fcvtzu w9, d5 -; CHECK-NEXT: mov d5, v7.d[1] -; CHECK-NEXT: mov v0.b[9], w11 -; CHECK-NEXT: mov v16.s[1], w10 -; CHECK-NEXT: fcvtzu w10, d6 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v0.b[10], v16.b[0] -; CHECK-NEXT: mov w11, v16.s[1] -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: fmov s6, w10 -; CHECK-NEXT: fcvtzu w10, d7 -; CHECK-NEXT: mov v0.b[11], w11 -; CHECK-NEXT: mov v6.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d5 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov v0.b[12], v6.b[0] -; CHECK-NEXT: mov w11, v6.s[1] -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w8, w10, w8, lo -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: mov v0.b[13], w11 -; CHECK-NEXT: mov v5.s[1], w9 -; CHECK-NEXT: mov v0.b[14], v5.b[0] -; CHECK-NEXT: mov w8, v5.s[1] -; CHECK-NEXT: mov v0.b[15], w8 +; CHECK-NEXT: cmp w11, #255 +; CHECK-NEXT: csel w9, w11, w8, lo +; CHECK-NEXT: cmp w12, #255 +; CHECK-NEXT: fcvtzu w11, d7 +; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: cmp w13, #255 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: csel w13, w13, w8, lo +; CHECK-NEXT: cmp w15, #255 +; CHECK-NEXT: csel w15, w15, w8, lo +; CHECK-NEXT: cmp w16, #255 +; CHECK-NEXT: csel w16, w16, w8, lo +; CHECK-NEXT: cmp w11, #255 +; CHECK-NEXT: mov v0.s[1], w14 +; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: cmp w17, #255 +; CHECK-NEXT: fmov s2, w15 +; CHECK-NEXT: csel w17, w17, w8, lo +; CHECK-NEXT: cmp w18, #255 +; CHECK-NEXT: csel w18, w18, w8, lo +; CHECK-NEXT: cmp w0, #255 +; CHECK-NEXT: csel w0, w0, w8, lo +; CHECK-NEXT: cmp w1, #255 +; CHECK-NEXT: fmov s3, w18 +; CHECK-NEXT: csel w10, w1, w8, lo +; CHECK-NEXT: cmp w2, #255 +; CHECK-NEXT: mov v2.s[1], w13 +; CHECK-NEXT: csel w14, w2, w8, lo +; CHECK-NEXT: cmp w3, #255 +; CHECK-NEXT: fcvtzu w2, d1 +; CHECK-NEXT: csel w1, w3, w8, lo +; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: mov v3.s[1], w17 +; CHECK-NEXT: fmov s4, w1 +; CHECK-NEXT: mov v0.s[2], w9 +; CHECK-NEXT: mov v2.s[2], w16 +; CHECK-NEXT: cmp w2, #255 +; CHECK-NEXT: mov v4.s[1], w14 +; CHECK-NEXT: fcvtzu w13, d1 +; CHECK-NEXT: csel w9, w2, w8, lo +; CHECK-NEXT: mov v3.s[2], w0 +; CHECK-NEXT: mov v0.s[3], w12 +; CHECK-NEXT: mov v2.s[3], w11 +; CHECK-NEXT: mov v4.s[2], w9 +; CHECK-NEXT: cmp w13, #255 +; CHECK-NEXT: csel w8, w13, w8, lo +; CHECK-NEXT: mov v3.s[3], w10 +; CHECK-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-NEXT: mov v4.s[3], w8 +; CHECK-NEXT: uzp1 v1.8h, v4.8h, v3.8h +; CHECK-NEXT: uzp1 v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptoui.sat.v16f64.v16i8(<16 x double> %f) ret <16 x i8> %x @@ -2906,46 +2885,44 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { ; CHECK-LABEL: test_unsigned_v8f64_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: mov d5, v2.d[1] -; CHECK-NEXT: mov w10, #65535 // =0xffff -; CHECK-NEXT: fcvtzu w9, d3 -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fcvtzu w14, d1 +; CHECK-NEXT: mov d4, v2.d[1] +; CHECK-NEXT: mov d5, v3.d[1] +; CHECK-NEXT: mov w11, #65535 // =0xffff +; CHECK-NEXT: fcvtzu w9, d2 +; CHECK-NEXT: fcvtzu w10, d3 +; CHECK-NEXT: fcvtzu w14, d0 +; CHECK-NEXT: fcvtzu w15, d1 ; CHECK-NEXT: fcvtzu w8, d4 ; CHECK-NEXT: mov d4, v0.d[1] -; CHECK-NEXT: fcvtzu w11, d5 -; CHECK-NEXT: fcvtzu w13, d3 -; CHECK-NEXT: cmp w8, w10 -; CHECK-NEXT: fcvtzu w15, d4 -; CHECK-NEXT: csel w8, w8, w10, lo -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w9, w9, w10, lo -; CHECK-NEXT: cmp w11, w10 -; CHECK-NEXT: fmov s4, w9 -; CHECK-NEXT: csel w9, w11, w10, lo -; CHECK-NEXT: cmp w12, w10 -; CHECK-NEXT: fcvtzu w11, d0 -; CHECK-NEXT: mov v4.s[1], w8 -; CHECK-NEXT: csel w8, w12, w10, lo -; CHECK-NEXT: cmp w13, w10 -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: csel w8, w13, w10, lo -; CHECK-NEXT: cmp w14, w10 -; CHECK-NEXT: mov v3.s[1], w9 -; CHECK-NEXT: csel w9, w14, w10, lo -; CHECK-NEXT: cmp w15, w10 -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: csel w9, w15, w10, lo -; CHECK-NEXT: cmp w11, w10 -; CHECK-NEXT: mov v2.s[1], w8 -; CHECK-NEXT: csel w8, w11, w10, lo -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b +; CHECK-NEXT: fcvtzu w12, d5 +; CHECK-NEXT: mov d0, v1.d[1] +; CHECK-NEXT: cmp w8, w11 +; CHECK-NEXT: fcvtzu w13, d4 +; CHECK-NEXT: csel w8, w8, w11, lo +; CHECK-NEXT: cmp w9, w11 +; CHECK-NEXT: csel w9, w9, w11, lo +; CHECK-NEXT: cmp w10, w11 +; CHECK-NEXT: csel w10, w10, w11, lo +; CHECK-NEXT: cmp w12, w11 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: csel w12, w12, w11, lo +; CHECK-NEXT: cmp w13, w11 +; CHECK-NEXT: csel w13, w13, w11, lo +; CHECK-NEXT: cmp w14, w11 +; CHECK-NEXT: csel w14, w14, w11, lo +; CHECK-NEXT: mov v1.s[1], w8 +; CHECK-NEXT: cmp w15, w11 +; CHECK-NEXT: fmov s2, w14 +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: csel w9, w15, w11, lo +; CHECK-NEXT: mov v2.s[1], w13 +; CHECK-NEXT: mov v1.s[2], w10 +; CHECK-NEXT: cmp w8, w11 +; CHECK-NEXT: csel w8, w8, w11, lo +; CHECK-NEXT: mov v2.s[2], w9 +; CHECK-NEXT: mov v1.s[3], w12 +; CHECK-NEXT: mov v2.s[3], w8 +; CHECK-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptoui.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -2954,83 +2931,81 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) { ; CHECK-LABEL: test_unsigned_v16f64_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v3.d[1] -; CHECK-NEXT: mov d17, v2.d[1] +; CHECK-NEXT: mov d16, v2.d[1] +; CHECK-NEXT: mov d17, v3.d[1] ; CHECK-NEXT: mov w8, #65535 // =0xffff -; CHECK-NEXT: fcvtzu w9, d3 +; CHECK-NEXT: fcvtzu w10, d2 +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzu w11, d3 ; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: fcvtzu w10, d1 -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu w11, d2 -; CHECK-NEXT: fcvtzu w12, d0 +; CHECK-NEXT: fcvtzu w15, d0 +; CHECK-NEXT: fcvtzu w16, d1 ; CHECK-NEXT: mov d0, v7.d[1] -; CHECK-NEXT: mov d2, v6.d[1] -; CHECK-NEXT: fcvtzu w14, d7 -; CHECK-NEXT: fcvtzu w13, d16 -; CHECK-NEXT: fcvtzu w16, d17 -; CHECK-NEXT: fcvtzu w15, d6 -; CHECK-NEXT: fcvtzu w17, d3 -; CHECK-NEXT: mov d6, v5.d[1] -; CHECK-NEXT: mov d3, v4.d[1] -; CHECK-NEXT: fcvtzu w18, d1 -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: csel w13, w13, w8, lo +; CHECK-NEXT: mov d1, v4.d[1] +; CHECK-NEXT: fcvtzu w18, d6 +; CHECK-NEXT: fcvtzu w9, d16 +; CHECK-NEXT: fcvtzu w12, d17 +; CHECK-NEXT: mov d16, v6.d[1] +; CHECK-NEXT: fcvtzu w13, d2 +; CHECK-NEXT: fcvtzu w0, d7 +; CHECK-NEXT: fcvtzu w3, d4 +; CHECK-NEXT: fcvtzu w1, d0 +; CHECK-NEXT: fcvtzu w2, d1 +; CHECK-NEXT: mov d1, v5.d[1] ; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s19, w9 -; CHECK-NEXT: csel w9, w16, w8, lo -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: fcvtzu w16, d0 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: mov v19.s[1], w13 -; CHECK-NEXT: csel w13, w17, w8, lo +; CHECK-NEXT: fcvtzu w17, d16 +; CHECK-NEXT: csel w14, w9, w8, lo ; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w18, w8 -; CHECK-NEXT: fmov s18, w11 -; CHECK-NEXT: csel w11, w18, w8, lo +; CHECK-NEXT: cmp w11, w8 +; CHECK-NEXT: csel w9, w11, w8, lo ; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: fcvtzu w17, d2 +; CHECK-NEXT: fcvtzu w11, d3 ; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fcvtzu w18, d6 -; CHECK-NEXT: mov v18.s[1], w9 -; CHECK-NEXT: csel w9, w16, w8, lo -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: fmov s17, w10 -; CHECK-NEXT: csel w10, w14, w8, lo -; CHECK-NEXT: fcvtzu w16, d5 -; CHECK-NEXT: fmov s23, w10 -; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fcvtzu w14, d3 -; CHECK-NEXT: csel w10, w17, w8, lo +; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: csel w13, w13, w8, lo ; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: fcvtzu w17, d4 -; CHECK-NEXT: mov v17.s[1], w13 -; CHECK-NEXT: mov v23.s[1], w9 -; CHECK-NEXT: csel w9, w15, w8, lo -; CHECK-NEXT: cmp w18, w8 -; CHECK-NEXT: fmov s22, w9 -; CHECK-NEXT: csel w9, w18, w8, lo +; CHECK-NEXT: csel w15, w15, w8, lo ; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s16, w12 -; CHECK-NEXT: mov v22.s[1], w10 -; CHECK-NEXT: csel w10, w16, w8, lo -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: fmov s21, w10 -; CHECK-NEXT: csel w10, w14, w8, lo +; CHECK-NEXT: csel w16, w16, w8, lo +; CHECK-NEXT: cmp w11, w8 +; CHECK-NEXT: mov v0.s[1], w14 +; CHECK-NEXT: csel w11, w11, w8, lo ; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: csel w8, w17, w8, lo -; CHECK-NEXT: mov v16.s[1], w11 -; CHECK-NEXT: mov v21.s[1], w9 -; CHECK-NEXT: fmov s20, w8 -; CHECK-NEXT: adrp x8, .LCPI85_0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI85_0] -; CHECK-NEXT: mov v20.s[1], w10 -; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-NEXT: fmov s2, w15 +; CHECK-NEXT: csel w17, w17, w8, lo +; CHECK-NEXT: cmp w18, w8 +; CHECK-NEXT: csel w18, w18, w8, lo +; CHECK-NEXT: cmp w0, w8 +; CHECK-NEXT: csel w0, w0, w8, lo +; CHECK-NEXT: cmp w1, w8 +; CHECK-NEXT: fmov s3, w18 +; CHECK-NEXT: csel w10, w1, w8, lo +; CHECK-NEXT: cmp w2, w8 +; CHECK-NEXT: mov v2.s[1], w13 +; CHECK-NEXT: csel w14, w2, w8, lo +; CHECK-NEXT: cmp w3, w8 +; CHECK-NEXT: fcvtzu w2, d5 +; CHECK-NEXT: csel w1, w3, w8, lo +; CHECK-NEXT: mov v3.s[1], w17 +; CHECK-NEXT: fcvtzu w13, d1 +; CHECK-NEXT: fmov s4, w1 +; CHECK-NEXT: mov v0.s[2], w9 +; CHECK-NEXT: mov v2.s[2], w16 +; CHECK-NEXT: cmp w2, w8 +; CHECK-NEXT: mov v4.s[1], w14 +; CHECK-NEXT: csel w9, w2, w8, lo +; CHECK-NEXT: mov v3.s[2], w0 +; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: mov v0.s[3], w12 +; CHECK-NEXT: csel w8, w13, w8, lo +; CHECK-NEXT: mov v2.s[3], w11 +; CHECK-NEXT: mov v4.s[2], w9 +; CHECK-NEXT: mov v3.s[3], w10 +; CHECK-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-NEXT: mov v4.s[3], w8 +; CHECK-NEXT: uzp1 v1.8h, v4.8h, v3.8h ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptoui.sat.v16f64.v16i16(<16 x double> %f) ret <16 x i16> %x diff --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll index 3f590226c4715..8030387d327da 100644 --- a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll +++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll @@ -37,20 +37,8 @@ entry: define <8 x i8> @extract_2_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: extract_2_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: mov w10, v0.s[3] -; CHECK-NEXT: mov v0.b[1], w8 -; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov v0.b[2], w9 -; CHECK-NEXT: mov w9, v1.s[2] -; CHECK-NEXT: mov v0.b[3], w10 -; CHECK-NEXT: mov v0.b[4], v1.b[0] -; CHECK-NEXT: mov v0.b[5], w8 -; CHECK-NEXT: mov w8, v1.s[3] -; CHECK-NEXT: mov v0.b[6], w9 -; CHECK-NEXT: mov v0.b[7], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: ret entry: %a0 = extractelement <4 x i32> %a, i32 0 diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll index 0ef64789ad972..b689f58f76847 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll @@ -675,57 +675,33 @@ define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8 ret <16 x i8> %i16 } - -; CHECK: .LCPI16_0: -; CHECK: .byte 0 -; CHECK: .byte 1 -; CHECK: .byte 4 -; CHECK: .byte 5 -; CHECK: .byte 16 -; CHECK: .byte 17 -; CHECK: .byte 20 -; CHECK: .byte 21 -; CHECK: .byte 32 -; CHECK: .byte 33 -; CHECK: .byte 36 -; CHECK: .byte 37 -; CHECK: .byte 48 -; CHECK: .byte 49 -; CHECK: .byte 52 -; CHECK: .byte 53 define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l249, <2 x double> %l267, <2 x double> %l285, <2 x double> %l303, <2 x double> %l321, <2 x double> %l339) { ; CHECK-LABEL: test: ; CHECK: // %bb.0: ; CHECK-NEXT: frintm v0.2d, v0.2d -; CHECK-NEXT: frintm v4.2d, v4.2d -; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: frintm v1.2d, v1.2d -; CHECK-NEXT: frintm v5.2d, v5.2d ; CHECK-NEXT: frintm v2.2d, v2.2d -; CHECK-NEXT: frintm v6.2d, v6.2d ; CHECK-NEXT: frintm v3.2d, v3.2d +; CHECK-NEXT: frintm v4.2d, v4.2d +; CHECK-NEXT: frintm v5.2d, v5.2d +; CHECK-NEXT: frintm v6.2d, v6.2d ; CHECK-NEXT: frintm v7.2d, v7.2d ; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v4.2d, v4.2d ; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: xtn v16.2s, v0.2d -; CHECK-NEXT: xtn v20.2s, v4.2d -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: xtn v17.2s, v1.2d -; CHECK-NEXT: xtn v21.2s, v5.2d -; CHECK-NEXT: xtn v18.2s, v2.2d -; CHECK-NEXT: xtn v22.2s, v6.2d -; CHECK-NEXT: xtn v19.2s, v3.2d -; CHECK-NEXT: xtn v23.2s, v7.2d -; CHECK-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b -; CHECK-NEXT: tbl v2.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b -; CHECK-NEXT: uzp1 v0.8h, v1.8h, v2.8h -; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-NEXT: uzp1 v3.4s, v4.4s, v5.4s +; CHECK-NEXT: uzp1 v1.4s, v6.4s, v7.4s +; CHECK-NEXT: uzp1 v2.8h, v0.8h, v2.8h +; CHECK-NEXT: uzp1 v1.8h, v3.8h, v1.8h +; CHECK-NEXT: uzp1 v0.8h, v2.8h, v1.8h +; CHECK-NEXT: uzp2 v1.8h, v2.8h, v1.8h ; CHECK-NEXT: ret %l214 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l213) %l215 = fptosi <2 x double> %l214 to <2 x i16> diff --git a/llvm/test/CodeGen/AArch64/vcvt-oversize.ll b/llvm/test/CodeGen/AArch64/vcvt-oversize.ll index 380bdbcc7f740..611940546bc1a 100644 --- a/llvm/test/CodeGen/AArch64/vcvt-oversize.ll +++ b/llvm/test/CodeGen/AArch64/vcvt-oversize.ll @@ -9,9 +9,8 @@ define <8 x i8> @float_to_i8(ptr %in) { ; CHECK-NEXT: fadd v0.4s, v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: uzp1 v0.8b, v1.8b, v0.8b +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: ret %l = load <8 x float>, ptr %in %scale = fmul <8 x float> %l,