diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index d2f2e7931656..16638a6c24d8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1050,6 +1050,14 @@ class TargetLoweringBase { return Legal; } + // Use this to bypass the builtin legalization decisions for EVTs. The builtin + // scheme may lead to undesirable results (eg power-of-two-padding or + // scalarization) for EVT-typed nodes (eg v7f16). + virtual Optional getCustomTypeConversion(LLVMContext &Context, + EVT VT) const { + return None; + } + /// Return how this operation should be treated: either it is legal, needs to /// be promoted to a larger size, needs to be expanded to some other code /// sequence, or the target has a custom expander for it. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 0a8e2b0adf41..f1a38e15b2dd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -422,6 +422,27 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, return Val; } + // Vector/Vector bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + if (ValueVT.isScalableVector()) { + assert(PartEVT.getVectorElementCount() == + ValueVT.getVectorElementCount()); + // Promote or truncate. + return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); + } + + // Shorten and promote. + assert(PartEVT.getVectorNumElements() >= ValueVT.getVectorNumElements()); + if (PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + EVT ClippedVT = + EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(), + ValueVT.getVectorNumElements()); + Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ClippedVT, Val, + DAG.getVectorIdxConstant(0, DL)); + } + // Promoted vector extract return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); } @@ -611,6 +632,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val, const SDLoc &DL, EVT PartVT) { + if (!PartVT.isVector()) return SDValue(); @@ -618,19 +640,34 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val, ElementCount PartNumElts = PartVT.getVectorElementCount(); ElementCount ValueNumElts = ValueVT.getVectorElementCount(); + // Widening a scalable vector to another scalable vector is done by inserting + // the vector into a larger undef one. + if (PartVT.isFixedLengthVector() && + (PartNumElts.getFixedValue() > ValueNumElts.getFixedValue())) { + // Promote first? + if (PartVT.getVectorElementType() != ValueVT.getVectorElementType()) { + if (PartVT.getVectorElementType().getScalarSizeInBits() < + ValueVT.getVectorElementType().getScalarSizeInBits()) { + return SDValue(); + } + + // Promote, then extract. + EVT PromotedVT = + EVT::getVectorVT(*DAG.getContext(), PartVT.getVectorElementType(), + ValueVT.getVectorNumElements()); + Val = DAG.getAnyExtOrTrunc(Val, DL, PromotedVT); + } + } else if (PartNumElts.isScalable()) + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), + Val, DAG.getVectorIdxConstant(0, DL)); // We only support widening vectors with equivalent element types and // fixed/scalable properties. If a target needs to widen a fixed-length type // to a scalable one, it should be possible to use INSERT_SUBVECTOR below. - if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) || + else if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) || PartNumElts.isScalable() != ValueNumElts.isScalable() || PartVT.getVectorElementType() != ValueVT.getVectorElementType()) return SDValue(); - // Widening a scalable vector to another scalable vector is done by inserting - // the vector into a larger undef one. - if (PartNumElts.isScalable()) - return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), - Val, DAG.getVectorIdxConstant(0, DL)); EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 74946c09fad9..8e4db9644456 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -956,6 +956,11 @@ void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { TargetLoweringBase::LegalizeKind TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { + // Fully customized legalization. + Optional CustomLK = getCustomTypeConversion(Context, VT); + if (CustomLK) + return *CustomLK; + // If this is a simple type, use the ComputeRegisterProp mechanism. if (VT.isSimple()) { MVT SVT = VT.getSimpleVT(); diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td index 54a212a69579..5e60da9949bc 100644 --- a/llvm/lib/Target/VE/VECallingConv.td +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -30,6 +30,9 @@ class CCIfNotSubtarget class CCIfVPU : CCIfSubtarget<"enableVPU()",A>; class CCIfNotVPU : CCIfNotSubtarget<"enableVPU()",A>; +class CCIfPacked : CCIfVPU>; +class CCIfNotPacked : CCIfNotSubtarget<"hasPackedMode()",A>; + def CC_VE_C_Stack: CallingConv<[ // F128 are assigned to the stack in 16-byte aligned units CCIfType<[f128], CCAssignToStackWithShadow<16, 16, [SX7]>>, @@ -176,16 +179,14 @@ def RetCC_VE_C : CallingConv<[ ///// fastcc - fast vreg passing ///// def CC_VE_Fast : CallingConv<[ // Virtual packed registers. - CCIfVPU>>, // vector --> generic vector registers - CCIfVPU>>, - - CCIfVPU>>, // vector mask --> generic vector mask registers @@ -193,7 +194,7 @@ def CC_VE_Fast : CallingConv<[ CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>>, // pair of vector mask --> generic vector mask registers - CCIfVPU>>, // Default to the standard cc @@ -202,7 +203,7 @@ def CC_VE_Fast : CallingConv<[ def RetCC_VE_Fast : CallingConv<[ // Virtual packed registers. - CCIfVPU>>, @@ -210,7 +211,7 @@ def RetCC_VE_Fast : CallingConv<[ CCIfVPU>>, - CCIfVPU>>, // vector mask --> generic vector mask registers @@ -218,7 +219,7 @@ def RetCC_VE_Fast : CallingConv<[ CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>>, // pair of vector mask --> generic vector mask registers - CCIfVPU>>, diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index 6d9eca342511..d73c3437b9bb 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -3777,3 +3777,201 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerVAARG(Op, DAG); } } + +static bool isPackableElemVT(EVT VT) { + if (VT.isVector()) + return false; + return VT.getScalarSizeInBits() <= 32; +} + +static bool isVectorRegisterVT(EVT VT) { + if (!VT.isVector() || VT.isScalableVector()) + return false; + unsigned NumElems = VT.getVectorNumElements(); + EVT ElemVT = VT.getVectorElementType(); + + // Not a legal element count. + if ((NumElems != 256) && (NumElems != 512)) + return false; + + // Legal as both regular and packed vectors. + if (ElemVT == MVT::i1 || ElemVT == MVT::i32 || ElemVT == MVT::f32) + return true; + + // Only legal in regular mode. + return NumElems == 256; +} + +static TargetLoweringBase::LegalizeKind +getPromoteElementConversion(LLVMContext &Context, EVT ElemVT, + unsigned NumElems) { + using LegalizeKind = TargetLoweringBase::LegalizeKind; + using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction; + + LegalizeTypeAction LTA; + MVT PromotedElemVT; + if (ElemVT.isFloatingPoint()) { + PromotedElemVT = MVT::f32; + LTA = LegalizeTypeAction::TypePromoteFloat; + } else { + assert(ElemVT.isInteger()); + PromotedElemVT = MVT::i32; + LTA = LegalizeTypeAction::TypePromoteInteger; + } + return LegalizeKind(LTA, EVT::getVectorVT(Context, PromotedElemVT, NumElems)); +} + +static TargetLoweringBase::LegalizeKind +getWidenVectorConversion(LLVMContext &Context, EVT ElemVT, + unsigned LegalNumElems) { + using LegalizeKind = TargetLoweringBase::LegalizeKind; + using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction; + + return LegalizeKind(LegalizeTypeAction::TypeWidenVector, + EVT::getVectorVT(Context, ElemVT, LegalNumElems)); +} + +static TargetLoweringBase::LegalizeKind +getSplitVectorConversion(LLVMContext &Context, EVT ElemVT, unsigned NumElems) { + using LegalizeKind = TargetLoweringBase::LegalizeKind; + using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction; + + return LegalizeKind(LegalizeTypeAction::TypeSplitVector, + EVT::getVectorVT(Context, ElemVT, (NumElems + 1) / 2)); +} + +Optional +VETargetLowering::getCustomTypeConversion(LLVMContext &Context, EVT VT) const { + // Do not interfere with SPU legalization. + if (!VT.isVector() || !Subtarget->enableVPU() || + VT.getVectorNumElements() == 1) + return None; + + EVT ElemVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + auto ElemBits = ElemVT.getScalarSizeInBits(); + + // Only use packed mode when surpassing the regular (256 elements) vector + // size. + const bool RequiresPackedRegister = + isOverPackedType(VT) || (isPackableElemVT(ElemVT) && NumElems > 256); + + // Already a legal type. + if (isVectorRegisterVT(VT) && + (!RequiresPackedRegister || Subtarget->hasPackedMode())) + return None; + + // Promote small elements to i/f32. + if (1 < ElemBits && ElemBits < 32) + return getPromoteElementConversion(Context, ElemVT, NumElems); + + // Excessive element size. + if (ElemBits > 64) + return None; // Defer to builtin expansion for oversized vectors. + + // Only use packed mode when surpassing the regular (256 elements) vector + // size. + const bool UsePackedRegister = + Subtarget->hasPackedMode() && RequiresPackedRegister; + + // Widen to register width. + const unsigned RegisterNumElems = UsePackedRegister ? 512 : 256; + if (NumElems < RegisterNumElems) + return getWidenVectorConversion(Context, ElemVT, RegisterNumElems); + + // Split to register width. + // TODO: Teach isel to split non-power-of-two vectors. + if (NumElems > RegisterNumElems && (NumElems % 2 == 0)) + return getSplitVectorConversion(Context, ElemVT, NumElems); + + // Type is either legal or not custom converted. + return None; +} + +Optional +VETargetLowering::getRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT) const { + using RegisterCount = VETargetLowering::RegisterCountPair; + if (CC != CallingConv::Fast) + return None; + if (!VT.isVector() || VT.isScalableVector()) + return None; + + MVT RegisterVT; + EVT IntermediateVT; + unsigned NumIntermediates; + unsigned NumRegs = getVectorTypeBreakdownForCallingConv( + Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); + return RegisterCount{RegisterVT, NumRegs}; +} + +unsigned VETargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + auto DefaultImpl = [&]() { + return TargetLoweringBase::getVectorTypeBreakdownForCallingConv( + Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); + }; + + auto ElemVT = VT.getVectorElementType(); + unsigned NumElems = VT.isScalableVector() ? 0 : VT.getVectorNumElements(); + const bool RequiresPackedRegister = + !VT.isScalableVector() && + (isOverPackedType(VT) || (isPackableElemVT(ElemVT) && NumElems > 256)); + + if (CC != CallingConv::Fast || VT.isScalableVector() || + (isVectorRegisterVT(VT) && + !(Subtarget->hasPackedMode() && RequiresPackedRegister))) + return DefaultImpl(); + + // fastcc - map everything to vregs. + auto LK = getCustomTypeConversion(Context, VT); + // Non-custom converted type - back to builtin logic. + if (!LK.hasValue()) + return DefaultImpl(); + + // Compute the fixed point of the custom type conversion rules. + // We want to have the same vector layout inside functions as well as across + // function boundaries. + + // IntermediateVT : used to copy the parts. + IntermediateVT = VT; + NumIntermediates = 1; + + EVT NextVT; + do { + NextVT = LK->second; + auto LTA = LK->first; + + switch (LTA) { + default: + return DefaultImpl(); + + case LegalizeTypeAction::TypePromoteFloat: + case LegalizeTypeAction::TypePromoteInteger: + // Promote elements across call boundaries. + IntermediateVT = NextVT; + break; + + case LegalizeTypeAction::TypeWidenVector: + // Retain all information about the original vector length. + // That is, keep the IntermediateVT at the original vector length if + // possible + break; + + case LegalizeTypeAction::TypeSplitVector: + // The last split results in the intermediate VT used for copying vectors + // at calls. + IntermediateVT = NextVT; + NumIntermediates *= 2; + break; + } + + LK = getCustomTypeConversion(Context, NextVT); + } while (LK.hasValue()); + + RegisterVT = NextVT.getSimpleVT(); + + // Must converge in a valid RegisterVT. + return NumIntermediates; +} diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h index cfd2a0aeafb0..b264b7cd7b7a 100644 --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -195,6 +195,41 @@ class VETargetLowering final : public TargetLowering, public VELoweringInfo { return ISD::ANY_EXTEND; } + /// Custom CC Mapping { + using RegisterCountPair = std::pair; + // Map all vector EVTs to vector or vector mask registers. + MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, + EVT VT) const override { + auto Opt = getRegistersForCallingConv(Context, CC, VT); + if (!Opt.hasValue()) + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); + return Opt->first; + } + + unsigned getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const override { + auto Opt = getRegistersForCallingConv(Context, CC, VT); + if (!Opt.hasValue()) + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); + return Opt->second; + } + + Optional getRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const; + + unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT, + EVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const override; + /// } Custom CC Mapping + + /// Custom Lower { + + Optional getCustomTypeConversion(LLVMContext &Context, + EVT VT) const override; const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, diff --git a/llvm/test/CodeGen/VE/Packed/vec_add_packed.ll b/llvm/test/CodeGen/VE/Packed/vec_add_packed.ll index 857914b93425..95e626ce720c 100644 --- a/llvm/test/CodeGen/VE/Packed/vec_add_packed.ll +++ b/llvm/test/CodeGen/VE/Packed/vec_add_packed.ll @@ -27,18 +27,9 @@ define fastcc <2 x i64> @vec_add_v2f64(<2 x i64> %a, <2 x i64> %b) { define fastcc <3 x i64> @vec_add_v3f64(<3 x i64> %a, <3 x i64> %b) { ; CHECK-LABEL: vec_add_v3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: lsv %v0(2), %s5 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: lsv %v1(2), %s2 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vadds.l %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: vadds.l %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = add <3 x i64> %a, %b ret <3 x i64> %r @@ -198,24 +189,9 @@ define fastcc <2 x i32> @vec_add_v2f32(<2 x i32> %a, <2 x i32> %b) { define fastcc <3 x i32> @vec_add_v3f32(<3 x i32> %a, <3 x i32> %b) { ; CHECK-LABEL: vec_add_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s4, %s4, (32)0 -; CHECK-NEXT: and %s3, %s3, (32)0 -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: and %s3, %s5, (32)0 -; CHECK-NEXT: lsv %v0(2), %s3 -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: and %s0, %s2, (32)0 -; CHECK-NEXT: lsv %v1(2), %s0 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vadds.w.sx %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = add <3 x i32> %a, %b ret <3 x i32> %r diff --git a/llvm/test/CodeGen/VE/Packed/vec_and_packed.ll b/llvm/test/CodeGen/VE/Packed/vec_and_packed.ll index 39ef745f56cc..f215120cba42 100644 --- a/llvm/test/CodeGen/VE/Packed/vec_and_packed.ll +++ b/llvm/test/CodeGen/VE/Packed/vec_and_packed.ll @@ -27,18 +27,9 @@ define fastcc <2 x i64> @vec_and_v2f64(<2 x i64> %a, <2 x i64> %b) { define fastcc <3 x i64> @vec_and_v3f64(<3 x i64> %a, <3 x i64> %b) { ; CHECK-LABEL: vec_and_v3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: lsv %v0(2), %s5 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: lsv %v1(2), %s2 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vand %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: vand %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = and <3 x i64> %a, %b ret <3 x i64> %r @@ -198,24 +189,9 @@ define fastcc <2 x i32> @vec_and_v2f32(<2 x i32> %a, <2 x i32> %b) { define fastcc <3 x i32> @vec_and_v3f32(<3 x i32> %a, <3 x i32> %b) { ; CHECK-LABEL: vec_and_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s4, %s4, (32)0 -; CHECK-NEXT: and %s3, %s3, (32)0 -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: and %s3, %s5, (32)0 -; CHECK-NEXT: lsv %v0(2), %s3 -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: and %s0, %s2, (32)0 -; CHECK-NEXT: lsv %v1(2), %s0 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: pvand.lo %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: pvand.lo %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = and <3 x i32> %a, %b ret <3 x i32> %r diff --git a/llvm/test/CodeGen/VE/Packed/vec_fadd_packed.ll b/llvm/test/CodeGen/VE/Packed/vec_fadd_packed.ll index f8fe006cfde4..4c759603288b 100644 --- a/llvm/test/CodeGen/VE/Packed/vec_fadd_packed.ll +++ b/llvm/test/CodeGen/VE/Packed/vec_fadd_packed.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+packed | FileCheck %s ; Function Attrs: nounwind @@ -26,18 +27,9 @@ define fastcc <2 x double> @vec_add_v2f64(<2 x double> %a, <2 x double> %b) { define fastcc <3 x double> @vec_add_v3f64(<3 x double> %a, <3 x double> %b) { ; CHECK-LABEL: vec_add_v3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: lsv %v0(2), %s5 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: lsv %v1(2), %s2 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vfadd.d %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: vfadd.d %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = fadd <3 x double> %a, %b ret <3 x double> %r @@ -197,18 +189,9 @@ define fastcc <2 x float> @vec_add_v2f32(<2 x float> %a, <2 x float> %b) { define fastcc <3 x float> @vec_add_v3f32(<3 x float> %a, <3 x float> %b) { ; CHECK-LABEL: vec_add_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: lsv %v0(2), %s5 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: lsv %v1(2), %s2 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: pvfadd.up %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: pvfadd.up %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = fadd <3 x float> %a, %b ret <3 x float> %r diff --git a/llvm/test/CodeGen/VE/Packed/vec_mul_packed.ll b/llvm/test/CodeGen/VE/Packed/vec_mul_packed.ll index 20a874bbe1c5..2ee5e7591306 100644 --- a/llvm/test/CodeGen/VE/Packed/vec_mul_packed.ll +++ b/llvm/test/CodeGen/VE/Packed/vec_mul_packed.ll @@ -27,18 +27,9 @@ define fastcc <2 x i64> @vec_mul_v2f64(<2 x i64> %a, <2 x i64> %b) { define fastcc <3 x i64> @vec_mul_v3f64(<3 x i64> %a, <3 x i64> %b) { ; CHECK-LABEL: vec_mul_v3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: lsv %v0(2), %s5 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: lsv %v1(2), %s2 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vmuls.l %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: vmuls.l %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = mul <3 x i64> %a, %b ret <3 x i64> %r @@ -198,24 +189,9 @@ define fastcc <2 x i32> @vec_mul_v2f32(<2 x i32> %a, <2 x i32> %b) { define fastcc <3 x i32> @vec_mul_v3f32(<3 x i32> %a, <3 x i32> %b) { ; CHECK-LABEL: vec_mul_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s4, %s4, (32)0 -; CHECK-NEXT: and %s3, %s3, (32)0 -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: and %s3, %s5, (32)0 -; CHECK-NEXT: lsv %v0(2), %s3 -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: and %s0, %s2, (32)0 -; CHECK-NEXT: lsv %v1(2), %s0 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vmuls.w.sx %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: vmuls.w.sx %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = mul <3 x i32> %a, %b ret <3 x i32> %r diff --git a/llvm/test/CodeGen/VE/Packed/vec_or_packed.ll b/llvm/test/CodeGen/VE/Packed/vec_or_packed.ll index 2f741875bda9..365be1cf6e27 100644 --- a/llvm/test/CodeGen/VE/Packed/vec_or_packed.ll +++ b/llvm/test/CodeGen/VE/Packed/vec_or_packed.ll @@ -27,18 +27,9 @@ define fastcc <2 x i64> @vec_or_v2f64(<2 x i64> %a, <2 x i64> %b) { define fastcc <3 x i64> @vec_or_v3f64(<3 x i64> %a, <3 x i64> %b) { ; CHECK-LABEL: vec_or_v3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: lsv %v0(2), %s5 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: lsv %v1(2), %s2 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vor %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: vor %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = or <3 x i64> %a, %b ret <3 x i64> %r @@ -198,24 +189,9 @@ define fastcc <2 x i32> @vec_or_v2f32(<2 x i32> %a, <2 x i32> %b) { define fastcc <3 x i32> @vec_or_v3f32(<3 x i32> %a, <3 x i32> %b) { ; CHECK-LABEL: vec_or_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s4, %s4, (32)0 -; CHECK-NEXT: and %s3, %s3, (32)0 -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: and %s3, %s5, (32)0 -; CHECK-NEXT: lsv %v0(2), %s3 -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: and %s0, %s2, (32)0 -; CHECK-NEXT: lsv %v1(2), %s0 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: pvor.lo %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: pvor.lo %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = or <3 x i32> %a, %b ret <3 x i32> %r diff --git a/llvm/test/CodeGen/VE/Packed/vec_xor_packed.ll b/llvm/test/CodeGen/VE/Packed/vec_xor_packed.ll index bc550e68d68f..bf09ad07a7ed 100644 --- a/llvm/test/CodeGen/VE/Packed/vec_xor_packed.ll +++ b/llvm/test/CodeGen/VE/Packed/vec_xor_packed.ll @@ -27,18 +27,9 @@ define fastcc <2 x i64> @vec_xor_v2f64(<2 x i64> %a, <2 x i64> %b) { define fastcc <3 x i64> @vec_xor_v3f64(<3 x i64> %a, <3 x i64> %b) { ; CHECK-LABEL: vec_xor_v3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: lsv %v0(2), %s5 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: lsv %v1(2), %s2 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vxor %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: vxor %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = xor <3 x i64> %a, %b ret <3 x i64> %r @@ -198,24 +189,9 @@ define fastcc <2 x i32> @vec_xor_v2f32(<2 x i32> %a, <2 x i32> %b) { define fastcc <3 x i32> @vec_xor_v3f32(<3 x i32> %a, <3 x i32> %b) { ; CHECK-LABEL: vec_xor_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s4, %s4, (32)0 -; CHECK-NEXT: and %s3, %s3, (32)0 -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: and %s3, %s5, (32)0 -; CHECK-NEXT: lsv %v0(2), %s3 -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: and %s0, %s2, (32)0 -; CHECK-NEXT: lsv %v1(2), %s0 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: pvxor.lo %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: pvxor.lo %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = xor <3 x i32> %a, %b ret <3 x i32> %r diff --git a/llvm/test/CodeGen/VE/Vector/fastcc_callee.ll b/llvm/test/CodeGen/VE/Vector/fastcc_callee.ll index 04df0d3e6c52..0af9f2ee3bba 100644 --- a/llvm/test/CodeGen/VE/Vector/fastcc_callee.ll +++ b/llvm/test/CodeGen/VE/Vector/fastcc_callee.ll @@ -146,3 +146,327 @@ define fastcc <256 x i1> @vmp_cc_bug(<256 x i1> %vm1, <256 x i1> %vm2, <512 x i1 ; CHECK-NEXT: b.l.t (, %s10) ret <256 x i1> %vm6 } + + +;;; Non-simple vector types. + +;; Expect non-power-of-two vector that fit inside one vector register to be widened. +define fastcc <17 x i64> @vreg_arg_v17i64_r1(<256 x i64> %p0, <17 x i64> %p1) { +; CHECK-LABEL: vreg_arg_v17i64_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + ret <17 x i64> %p1 +} + +define fastcc <17 x i32> @vreg_arg_v17i32_r1(<256 x i32> %p0, <17 x i32> %p1) { +; CHECK-LABEL: vreg_arg_v17i32_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + ret <17 x i32> %p1 +} + +define fastcc <17 x i1> @vm_arg_v17i1_r1(<256 x i1> %p0, <17 x i1> %p1) { +; CHECK-LABEL: vm_arg_v17i1_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: andm %vm1, %vm0, %vm2 +; CHECK-NEXT: b.l.t (, %s10) + ret <17 x i1> %p1 +} + +;; Expect over-sized non-power-of-two vectors to be split(64bit elements) and widened. +define fastcc <334 x i64> @vreg_arg_v334i64_r1(<256 x i64> %p0, <334 x i64> %p1) { +; CHECK-LABEL: vreg_arg_v334i64_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v1, (0)1, %v2 +; CHECK-NEXT: b.l.t (, %s10) + ret <334 x i64> %p1 +} + +define fastcc <334 x i32> @vreg_arg_v334i32_r1(<256 x i32> %p0, <334 x i32> %p1) { +; CHECK-LABEL: vreg_arg_v334i32_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v1, (0)1, %v2 +; CHECK-NEXT: b.l.t (, %s10) + ret <334 x i32> %p1 +} + +; FIXME: This test documents a bug in cc lowering: +; %p1 should live in 'VMP3' and there should be a copy from that to 'VMP1' here. +define fastcc <334 x i1> @vm_arg_v334i1_r1(<256 x i1> %p0, <334 x i1> %p1) { +; CHECK-LABEL: vm_arg_v334i1_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: andm %vm1, %vm0, %vm2 +; CHECK-NEXT: andm %vm2, %vm0, %vm3 +; CHECK-NEXT: b.l.t (, %s10) + ret <334 x i1> %p1 +} + +;; Vectors with over-sized elements. +; TODO: Implement custom element splitting to get this into vregs. +define fastcc <17 x i128> @vreg_arg_v17i128_r1(<256 x i128> %p0, <17 x i128> %p1) { +; CHECK-LABEL: vreg_arg_v17i128_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: ld %s1, 4280(, %s11) +; CHECK-NEXT: ld %s2, 4288(, %s11) +; CHECK-NEXT: ld %s3, 4296(, %s11) +; CHECK-NEXT: ld %s4, 4304(, %s11) +; CHECK-NEXT: ld %s5, 4312(, %s11) +; CHECK-NEXT: ld %s6, 4320(, %s11) +; CHECK-NEXT: ld %s7, 4328(, %s11) +; CHECK-NEXT: ld %s34, 4336(, %s11) +; CHECK-NEXT: ld %s35, 4344(, %s11) +; CHECK-NEXT: ld %s36, 4352(, %s11) +; CHECK-NEXT: ld %s37, 4360(, %s11) +; CHECK-NEXT: ld %s38, 4368(, %s11) +; CHECK-NEXT: ld %s39, 4376(, %s11) +; CHECK-NEXT: ld %s40, 4384(, %s11) +; CHECK-NEXT: ld %s41, 4392(, %s11) +; CHECK-NEXT: ld %s42, 4400(, %s11) +; CHECK-NEXT: ld %s43, 4408(, %s11) +; CHECK-NEXT: ld %s44, 4416(, %s11) +; CHECK-NEXT: ld %s45, 4424(, %s11) +; CHECK-NEXT: ld %s46, 4432(, %s11) +; CHECK-NEXT: ld %s47, 4440(, %s11) +; CHECK-NEXT: ld %s48, 4448(, %s11) +; CHECK-NEXT: ld %s49, 4456(, %s11) +; CHECK-NEXT: ld %s50, 4464(, %s11) +; CHECK-NEXT: ld %s51, 4472(, %s11) +; CHECK-NEXT: ld %s52, 4480(, %s11) +; CHECK-NEXT: ld %s53, 4488(, %s11) +; CHECK-NEXT: ld %s54, 4496(, %s11) +; CHECK-NEXT: ld %s55, 4504(, %s11) +; CHECK-NEXT: ld %s56, 4512(, %s11) +; CHECK-NEXT: ld %s57, 4544(, %s11) +; CHECK-NEXT: ld %s58, 4536(, %s11) +; CHECK-NEXT: ld %s59, 4528(, %s11) +; CHECK-NEXT: ld %s60, 4520(, %s11) +; CHECK-NEXT: st %s57, 264(, %s0) +; CHECK-NEXT: st %s58, 256(, %s0) +; CHECK-NEXT: st %s59, 248(, %s0) +; CHECK-NEXT: st %s60, 240(, %s0) +; CHECK-NEXT: st %s56, 232(, %s0) +; CHECK-NEXT: st %s55, 224(, %s0) +; CHECK-NEXT: st %s54, 216(, %s0) +; CHECK-NEXT: st %s53, 208(, %s0) +; CHECK-NEXT: st %s52, 200(, %s0) +; CHECK-NEXT: st %s51, 192(, %s0) +; CHECK-NEXT: st %s50, 184(, %s0) +; CHECK-NEXT: st %s49, 176(, %s0) +; CHECK-NEXT: st %s48, 168(, %s0) +; CHECK-NEXT: st %s47, 160(, %s0) +; CHECK-NEXT: st %s46, 152(, %s0) +; CHECK-NEXT: st %s45, 144(, %s0) +; CHECK-NEXT: st %s44, 136(, %s0) +; CHECK-NEXT: st %s43, 128(, %s0) +; CHECK-NEXT: st %s42, 120(, %s0) +; CHECK-NEXT: st %s41, 112(, %s0) +; CHECK-NEXT: st %s40, 104(, %s0) +; CHECK-NEXT: st %s39, 96(, %s0) +; CHECK-NEXT: st %s38, 88(, %s0) +; CHECK-NEXT: st %s37, 80(, %s0) +; CHECK-NEXT: st %s36, 72(, %s0) +; CHECK-NEXT: st %s35, 64(, %s0) +; CHECK-NEXT: st %s34, 56(, %s0) +; CHECK-NEXT: st %s7, 48(, %s0) +; CHECK-NEXT: st %s6, 40(, %s0) +; CHECK-NEXT: st %s5, 32(, %s0) +; CHECK-NEXT: st %s4, 24(, %s0) +; CHECK-NEXT: st %s3, 16(, %s0) +; CHECK-NEXT: st %s2, 8(, %s0) +; CHECK-NEXT: st %s1, (, %s0) +; CHECK-NEXT: b.l.t (, %s10) + ret <17 x i128> %p1 +} + +; TODO: Implement custom element splitting to get this into vregs. +define fastcc <17 x i65> @vreg_arg_v17i65_r1(<256 x i65> %p0, <17 x i65> %p1) { +; CHECK-LABEL: vreg_arg_v17i65_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: ld %s2, 4304(, %s11) +; CHECK-NEXT: ld %s3, 4320(, %s11) +; CHECK-NEXT: ld %s1, 4312(, %s11) +; CHECK-NEXT: ld %s5, 4336(, %s11) +; CHECK-NEXT: ld %s4, 4328(, %s11) +; CHECK-NEXT: ld %s6, 4352(, %s11) +; CHECK-NEXT: ld %s7, 4344(, %s11) +; CHECK-NEXT: ld %s34, 4368(, %s11) +; CHECK-NEXT: ld %s35, 4360(, %s11) +; CHECK-NEXT: ld %s36, 4384(, %s11) +; CHECK-NEXT: ld %s37, 4376(, %s11) +; CHECK-NEXT: ld %s38, 4400(, %s11) +; CHECK-NEXT: ld %s39, 4392(, %s11) +; CHECK-NEXT: ld %s40, 4416(, %s11) +; CHECK-NEXT: ld %s41, 4408(, %s11) +; CHECK-NEXT: ld %s42, 4432(, %s11) +; CHECK-NEXT: ld %s43, 4424(, %s11) +; CHECK-NEXT: ld %s44, 4448(, %s11) +; CHECK-NEXT: ld %s45, 4440(, %s11) +; CHECK-NEXT: ld %s46, 4464(, %s11) +; CHECK-NEXT: ld %s47, 4456(, %s11) +; CHECK-NEXT: ld %s48, 4480(, %s11) +; CHECK-NEXT: ld %s49, 4472(, %s11) +; CHECK-NEXT: ld %s50, 4496(, %s11) +; CHECK-NEXT: ld %s51, 4488(, %s11) +; CHECK-NEXT: ld %s52, 4512(, %s11) +; CHECK-NEXT: ld %s53, 4504(, %s11) +; CHECK-NEXT: ld %s54, 4528(, %s11) +; CHECK-NEXT: ld %s55, 4520(, %s11) +; CHECK-NEXT: ld %s56, 4288(, %s11) +; CHECK-NEXT: ld %s57, 4280(, %s11) +; CHECK-NEXT: ld %s58, 4544(, %s11) +; CHECK-NEXT: ld %s59, 4296(, %s11) +; CHECK-NEXT: ld %s60, 4536(, %s11) +; CHECK-NEXT: st %s57, (, %s0) +; CHECK-NEXT: and %s57, 1, %s58 +; CHECK-NEXT: st1b %s57, 138(, %s0) +; CHECK-NEXT: srl %s57, %s60, 48 +; CHECK-NEXT: st2b %s57, 136(, %s0) +; CHECK-NEXT: sll %s57, %s59, 1 +; CHECK-NEXT: and %s56, 1, %s56 +; CHECK-NEXT: or %s56, %s56, %s57 +; CHECK-NEXT: st %s56, 8(, %s0) +; CHECK-NEXT: srl %s56, %s55, 49 +; CHECK-NEXT: and %s54, 1, %s54 +; CHECK-NEXT: sll %s54, %s54, 15 +; CHECK-NEXT: or %s54, %s56, %s54 +; CHECK-NEXT: sll %s56, %s60, 16 +; CHECK-NEXT: or %s54, %s54, %s56 +; CHECK-NEXT: st %s54, 128(, %s0) +; CHECK-NEXT: srl %s54, %s53, 50 +; CHECK-NEXT: and %s52, 1, %s52 +; CHECK-NEXT: sll %s52, %s52, 14 +; CHECK-NEXT: or %s52, %s52, %s54 +; CHECK-NEXT: sll %s54, %s55, 15 +; CHECK-NEXT: or %s52, %s52, %s54 +; CHECK-NEXT: st %s52, 120(, %s0) +; CHECK-NEXT: srl %s52, %s51, 51 +; CHECK-NEXT: and %s50, 1, %s50 +; CHECK-NEXT: sll %s50, %s50, 13 +; CHECK-NEXT: or %s50, %s52, %s50 +; CHECK-NEXT: sll %s52, %s53, 14 +; CHECK-NEXT: or %s50, %s50, %s52 +; CHECK-NEXT: st %s50, 112(, %s0) +; CHECK-NEXT: srl %s50, %s49, 52 +; CHECK-NEXT: and %s48, 1, %s48 +; CHECK-NEXT: sll %s48, %s48, 12 +; CHECK-NEXT: or %s48, %s48, %s50 +; CHECK-NEXT: sll %s50, %s51, 13 +; CHECK-NEXT: or %s48, %s48, %s50 +; CHECK-NEXT: st %s48, 104(, %s0) +; CHECK-NEXT: srl %s48, %s47, 53 +; CHECK-NEXT: and %s46, 1, %s46 +; CHECK-NEXT: sll %s46, %s46, 11 +; CHECK-NEXT: or %s46, %s48, %s46 +; CHECK-NEXT: sll %s48, %s49, 12 +; CHECK-NEXT: or %s46, %s46, %s48 +; CHECK-NEXT: st %s46, 96(, %s0) +; CHECK-NEXT: srl %s46, %s45, 54 +; CHECK-NEXT: and %s44, 1, %s44 +; CHECK-NEXT: sll %s44, %s44, 10 +; CHECK-NEXT: or %s44, %s44, %s46 +; CHECK-NEXT: sll %s46, %s47, 11 +; CHECK-NEXT: or %s44, %s44, %s46 +; CHECK-NEXT: st %s44, 88(, %s0) +; CHECK-NEXT: srl %s44, %s43, 55 +; CHECK-NEXT: and %s42, 1, %s42 +; CHECK-NEXT: sll %s42, %s42, 9 +; CHECK-NEXT: or %s42, %s44, %s42 +; CHECK-NEXT: sll %s44, %s45, 10 +; CHECK-NEXT: or %s42, %s42, %s44 +; CHECK-NEXT: st %s42, 80(, %s0) +; CHECK-NEXT: srl %s42, %s41, 56 +; CHECK-NEXT: and %s40, 1, %s40 +; CHECK-NEXT: sll %s40, %s40, 8 +; CHECK-NEXT: or %s40, %s40, %s42 +; CHECK-NEXT: sll %s42, %s43, 9 +; CHECK-NEXT: or %s40, %s40, %s42 +; CHECK-NEXT: st %s40, 72(, %s0) +; CHECK-NEXT: srl %s40, %s39, 57 +; CHECK-NEXT: and %s38, 1, %s38 +; CHECK-NEXT: sll %s38, %s38, 7 +; CHECK-NEXT: or %s38, %s40, %s38 +; CHECK-NEXT: sll %s40, %s41, 8 +; CHECK-NEXT: or %s38, %s38, %s40 +; CHECK-NEXT: st %s38, 64(, %s0) +; CHECK-NEXT: srl %s38, %s37, 58 +; CHECK-NEXT: and %s36, 1, %s36 +; CHECK-NEXT: sll %s36, %s36, 6 +; CHECK-NEXT: or %s36, %s36, %s38 +; CHECK-NEXT: sll %s38, %s39, 7 +; CHECK-NEXT: or %s36, %s36, %s38 +; CHECK-NEXT: st %s36, 56(, %s0) +; CHECK-NEXT: srl %s36, %s35, 59 +; CHECK-NEXT: and %s34, 1, %s34 +; CHECK-NEXT: sll %s34, %s34, 5 +; CHECK-NEXT: or %s34, %s36, %s34 +; CHECK-NEXT: sll %s36, %s37, 6 +; CHECK-NEXT: or %s34, %s34, %s36 +; CHECK-NEXT: st %s34, 48(, %s0) +; CHECK-NEXT: srl %s34, %s7, 60 +; CHECK-NEXT: and %s6, 1, %s6 +; CHECK-NEXT: sll %s6, %s6, 4 +; CHECK-NEXT: or %s6, %s6, %s34 +; CHECK-NEXT: sll %s34, %s35, 5 +; CHECK-NEXT: or %s6, %s6, %s34 +; CHECK-NEXT: st %s6, 40(, %s0) +; CHECK-NEXT: srl %s6, %s4, 61 +; CHECK-NEXT: and %s5, 1, %s5 +; CHECK-NEXT: sll %s5, %s5, 3 +; CHECK-NEXT: or %s5, %s6, %s5 +; CHECK-NEXT: sll %s6, %s7, 4 +; CHECK-NEXT: or %s5, %s5, %s6 +; CHECK-NEXT: st %s5, 32(, %s0) +; CHECK-NEXT: srl %s5, %s1, 62 +; CHECK-NEXT: and %s3, 1, %s3 +; CHECK-NEXT: sll %s3, %s3, 2 +; CHECK-NEXT: or %s3, %s3, %s5 +; CHECK-NEXT: sll %s4, %s4, 3 +; CHECK-NEXT: or %s3, %s3, %s4 +; CHECK-NEXT: st %s3, 24(, %s0) +; CHECK-NEXT: srl %s3, %s59, 63 +; CHECK-NEXT: and %s2, 1, %s2 +; CHECK-NEXT: sll %s2, %s2, 1 +; CHECK-NEXT: or %s2, %s3, %s2 +; CHECK-NEXT: sll %s1, %s1, 2 +; CHECK-NEXT: or %s1, %s2, %s1 +; CHECK-NEXT: st %s1, 16(, %s0) +; CHECK-NEXT: b.l.t (, %s10) + ret <17 x i65> %p1 +} + +;; Vectors with under-sized elements. +define fastcc <17 x i16> @vreg_arg_v17i16_r1(<256 x i16> %p0, <17 x i16> %p1) { +; CHECK-LABEL: vreg_arg_v17i16_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + ret <17 x i16> %p1 +} + +define fastcc <17 x i13> @vreg_arg_v17i13_r1(<256 x i13> %p0, <17 x i13> %p1) { +; CHECK-LABEL: vreg_arg_v17i13_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + ret <17 x i13> %p1 +} diff --git a/llvm/test/CodeGen/VE/Vector/fastcc_caller.ll b/llvm/test/CodeGen/VE/Vector/fastcc_caller.ll index 987d87c269a3..752950099d3e 100644 --- a/llvm/test/CodeGen/VE/Vector/fastcc_caller.ll +++ b/llvm/test/CodeGen/VE/Vector/fastcc_caller.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s @@ -11,7 +12,22 @@ declare void @test(i64) define fastcc i32 @sample_call() { ; CHECK-LABEL: sample_call: -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: lea %s0, sample_add@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, sample_add@hi(, %s0) @@ -19,13 +35,31 @@ define fastcc i32 @sample_call() { ; CHECK-NEXT: or %s1, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) %r = tail call fastcc i32 @sample_add(i32 1, i32 2) ret i32 %r } define fastcc i32 @stack_call_int() { ; CHECK-LABEL: stack_call_int: -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -256(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: or %s0, 10, (0)1 ; CHECK-NEXT: st %s0, 248(, %s11) ; CHECK-NEXT: or %s34, 9, (0)1 @@ -43,13 +77,31 @@ define fastcc i32 @stack_call_int() { ; CHECK-NEXT: st %s34, 240(, %s11) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) %r = tail call fastcc i32 @stack_callee_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) ret i32 %r } define fastcc i32 @stack_call_int_szext() { ; CHECK-LABEL: stack_call_int_szext: -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -256(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: or %s0, -1, (0)1 ; CHECK-NEXT: st %s0, 248(, %s11) ; CHECK-NEXT: lea %s34, 65535 @@ -67,13 +119,31 @@ define fastcc i32 @stack_call_int_szext() { ; CHECK-NEXT: st %s34, 240(, %s11) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 - %r = tail call fastcc i32 @stack_callee_int_szext(i1 signext -1, i8 -1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i16 zeroext -1, i8 signext -1) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %r = tail call fastcc i32 @stack_callee_int_szext(i1 -1, i8 -1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i16 -1, i8 -1) ret i32 %r } define fastcc float @stack_call_float() { ; CHECK-LABEL: stack_call_float: -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -256(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB3_2: ; CHECK-NEXT: lea.sl %s0, 1092616192 ; CHECK-NEXT: st %s0, 248(, %s11) ; CHECK-NEXT: lea.sl %s34, 1091567616 @@ -91,13 +161,31 @@ define fastcc float @stack_call_float() { ; CHECK-NEXT: st %s34, 240(, %s11) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) %r = tail call fastcc float @stack_callee_float(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0) ret float %r } define fastcc float @stack_call_float2(float %p0) { ; CHECK-LABEL: stack_call_float2: -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -256(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: st %s0, 248(, %s11) ; CHECK-NEXT: lea %s1, stack_callee_float@lo ; CHECK-NEXT: and %s1, %s1, (32)0 @@ -112,32 +200,68 @@ define fastcc float @stack_call_float2(float %p0) { ; CHECK-NEXT: or %s7, 0, %s0 ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) %r = tail call fastcc float @stack_callee_float(float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0) ret float %r } ; Vector argument passing (fastcc feature) -; +; declare fastcc <256 x i32> @get_v256i32() declare fastcc void @vsample_v(<256 x i32>) declare fastcc void @vsample_iv(i32, <256 x i32>) define void @caller_vret() { -; CHECK: caller_vret: -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-LABEL: caller_vret: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB5_2: ; CHECK-NEXT: lea %s0, get_v256i32@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) %r = tail call fastcc <256 x i32> @get_v256i32() ret void } define void @caller_vret_pass_p0() { ; CHECK-LABEL: caller_vret_pass_p0: -; CHECK: .LBB{{[0-9]+}}_2: -; CHECK: lea %s0, get_v256i32@lo +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: lea %s0, get_v256i32@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0) ; CHECK-NEXT: bsic %s10, (, %s12) @@ -146,6 +270,9 @@ define void @caller_vret_pass_p0() { ; CHECK-NEXT: lea.sl %s12, vsample_v@hi(, %s0) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) %p = tail call fastcc <256 x i32> @get_v256i32() call fastcc void @vsample_v(<256 x i32> %p) ret void @@ -153,8 +280,24 @@ define void @caller_vret_pass_p0() { define void @caller_vret_pass_p1(i32 %s) { ; CHECK-LABEL: caller_vret_pass_p1: -; CHECK: .LBB{{[0-9]+}}_2: -; CHECK: or %s18, 0, %s0 +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: or %s18, 0, %s0 ; CHECK-NEXT: lea %s0, get_v256i32@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0) @@ -164,6 +307,11 @@ define void @caller_vret_pass_p1(i32 %s) { ; CHECK-NEXT: lea.sl %s12, vsample_iv@hi(, %s0) ; CHECK-NEXT: or %s0, 0, %s18 ; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) %p = tail call fastcc <256 x i32> @get_v256i32() call fastcc void @vsample_iv(i32 %s, <256 x i32> %p) ret void @@ -174,7 +322,22 @@ declare fastcc void @vsample_vvv(<256 x i32>, <256 x i32>, <256 x i32>) define void @caller_vret_pass_p01() { ; CHECK-LABEL: caller_vret_pass_p01: -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB8_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: lea %s0, get_v256i32@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0) @@ -187,6 +350,9 @@ define void @caller_vret_pass_p01() { ; CHECK-NEXT: vor %v1, (0)1, %v0 ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) %p = tail call fastcc <256 x i32> @get_v256i32() call fastcc void @vsample_vv(<256 x i32> %p, <256 x i32> %p) ret void @@ -194,7 +360,22 @@ define void @caller_vret_pass_p01() { define void @caller_vret_pass_p012() { ; CHECK-LABEL: caller_vret_pass_p012: -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB9_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: lea %s0, get_v256i32@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, get_v256i32@hi(, %s0) @@ -210,6 +391,9 @@ define void @caller_vret_pass_p012() { ; CHECK-NEXT: vor %v2, (0)1, %v0 ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) %p = tail call fastcc <256 x i32> @get_v256i32() call fastcc void @vsample_vvv(<256 x i32> %p, <256 x i32> %p, <256 x i32> %p) ret void @@ -221,14 +405,29 @@ declare fastcc void @vsample_vvvvvvv(<256 x i32>, <256 x i32>, <256 x i32>, <256 ; TODO improve vreg copy (redundant lea+lvl emitted) define fastcc void @roundtrip_caller_callee(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6) { ; CHECK-LABEL: roundtrip_caller_callee: -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB10_2: ; CHECK-NEXT: lea %s16, 256 ; CHECK-NEXT: lvl %s16 ; CHECK-NEXT: vor %v7, (0)1, %v0 ; CHECK-NEXT: lea %s0, vsample_vvvvvvv@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, vsample_vvvvvvv@hi(, %s0) -; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lea %s16, 256 ; CHECK-NEXT: lvl %s16 ; CHECK-NEXT: vor %v0, (0)1, %v1 ; CHECK-NEXT: lea %s16, 256 @@ -251,6 +450,469 @@ define fastcc void @roundtrip_caller_callee(<256 x i32> %p0, <256 x i32> %p1, <2 ; CHECK-NEXT: vor %v6, (0)1, %v7 ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) call fastcc void @vsample_vvvvvvv(<256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6, <256 x i32> %p0) ret void } + + +;;; Non-simple vector types. + +declare fastcc void @vsample_v17i64(<17 x i64>) + +;; Expect non-power-of-two vector that fit inside one vector register to be widened. +define fastcc void @vreg_arg_v17i64_r1(<256 x i64> %p0, <17 x i64> %p1) { +; CHECK-LABEL: vreg_arg_v17i64_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: lea %s0, vsample_v17i64@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, vsample_v17i64@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + call fastcc void @vsample_v17i64(<17 x i64> %p1) + ret void +} + +declare fastcc void @vsample_v17i32(<17 x i32>) + +;; Expect non-power-of-two vector that fit inside one vector register to be widened. +define fastcc void @vreg_arg_v17i32_r1(<256 x i32> %p0, <17 x i32> %p1) { +; CHECK-LABEL: vreg_arg_v17i32_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB12_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: lea %s0, vsample_v17i32@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, vsample_v17i32@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + call fastcc void @vsample_v17i32(<17 x i32> %p1) + ret void +} + +declare fastcc void @vsample_v17i1(<17 x i1>) + +;; Expect non-power-of-two vector that fit inside one vector register to be widened. +define fastcc void @vreg_arg_v17i1_r1(<256 x i1> %p0, <17 x i1> %p1) { +; CHECK-LABEL: vreg_arg_v17i1_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB13_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: andm %vm1, %vm0, %vm2 +; CHECK-NEXT: lea %s0, vsample_v17i1@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, vsample_v17i1@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + call fastcc void @vsample_v17i1(<17 x i1> %p1) + ret void +} + +;; Expect over-sized non-power-of-two vectors to be split(64bit elements) and widened. +declare fastcc void @vsample_v334i64(<334 x i64>) + +define fastcc void @vreg_arg_v334i64_r1(<256 x i64> %p0, <334 x i64> %p1) { +; CHECK-LABEL: vreg_arg_v334i64_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB14_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB14_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v2 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v1, (0)1, %v3 +; CHECK-NEXT: lea %s0, vsample_v334i64@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, vsample_v334i64@hi(, %s0) +; CHECK-NEXT: # kill: def $v0 killed $v0 def $vp0 killed $v1 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + call fastcc void @vsample_v334i64(<334 x i64> %p1) + ret void +} + +declare fastcc void @vsample_v334i32(<334 x i32>) + +define fastcc void @vreg_arg_v334i32_r1(<256 x i32> %p0, <334 x i32> %p1) { +; CHECK-LABEL: vreg_arg_v334i32_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB15_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: lea %s0, vsample_v334i32@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, vsample_v334i32@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + call fastcc void @vsample_v334i32(<334 x i32> %p1) + ret void +} + +declare fastcc void @vsample_v334i1(<334 x i1>) + +; FIXME: This test documents a bug in cc lowering: +; %p1 should live in 'VMP3' and there should be a copy from that to 'VMP1' here. +define fastcc void @vreg_arg_v334i1_r1(<256 x i1> %p0, <334 x i1> %p1) { +; CHECK-LABEL: vreg_arg_v334i1_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB16_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: # kill: def $vm2 killed $vm2 killed $vmp1 def $vmp1 +; CHECK-NEXT: # kill: def $vm3 killed $vm3 killed $vmp1 def $vmp1 +; CHECK-NEXT: lea %s0, vsample_v334i1@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, vsample_v334i1@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + call fastcc void @vsample_v334i1(<334 x i1> %p1) + ret void +} + +; TODO: Implement custom element splitting to get this into vregs. +declare fastcc void @vsample_v17i128(<17 x i128>) + +define fastcc void @vreg_arg_v17i128_r1(<256 x i64> %p0, <17 x i128> %p1) { +; CHECK-LABEL: vreg_arg_v17i128_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -448(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB17_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: ld %s34, 240(, %s9) +; CHECK-NEXT: ld %s35, 248(, %s9) +; CHECK-NEXT: ld %s36, 256(, %s9) +; CHECK-NEXT: ld %s37, 264(, %s9) +; CHECK-NEXT: ld %s38, 272(, %s9) +; CHECK-NEXT: ld %s39, 280(, %s9) +; CHECK-NEXT: ld %s40, 288(, %s9) +; CHECK-NEXT: ld %s41, 296(, %s9) +; CHECK-NEXT: ld %s42, 304(, %s9) +; CHECK-NEXT: ld %s43, 312(, %s9) +; CHECK-NEXT: ld %s44, 320(, %s9) +; CHECK-NEXT: ld %s45, 328(, %s9) +; CHECK-NEXT: ld %s46, 336(, %s9) +; CHECK-NEXT: ld %s47, 344(, %s9) +; CHECK-NEXT: ld %s48, 352(, %s9) +; CHECK-NEXT: ld %s49, 360(, %s9) +; CHECK-NEXT: ld %s50, 368(, %s9) +; CHECK-NEXT: ld %s51, 376(, %s9) +; CHECK-NEXT: ld %s52, 384(, %s9) +; CHECK-NEXT: ld %s53, 392(, %s9) +; CHECK-NEXT: ld %s54, 400(, %s9) +; CHECK-NEXT: ld %s55, 408(, %s9) +; CHECK-NEXT: ld %s56, 416(, %s9) +; CHECK-NEXT: ld %s57, 424(, %s9) +; CHECK-NEXT: ld %s58, 432(, %s9) +; CHECK-NEXT: ld %s59, 440(, %s9) +; CHECK-NEXT: st %s59, 440(, %s11) +; CHECK-NEXT: st %s58, 432(, %s11) +; CHECK-NEXT: st %s57, 424(, %s11) +; CHECK-NEXT: st %s56, 416(, %s11) +; CHECK-NEXT: st %s55, 408(, %s11) +; CHECK-NEXT: st %s54, 400(, %s11) +; CHECK-NEXT: st %s53, 392(, %s11) +; CHECK-NEXT: st %s52, 384(, %s11) +; CHECK-NEXT: st %s51, 376(, %s11) +; CHECK-NEXT: st %s50, 368(, %s11) +; CHECK-NEXT: st %s49, 360(, %s11) +; CHECK-NEXT: st %s48, 352(, %s11) +; CHECK-NEXT: st %s47, 344(, %s11) +; CHECK-NEXT: st %s46, 336(, %s11) +; CHECK-NEXT: st %s45, 328(, %s11) +; CHECK-NEXT: st %s44, 320(, %s11) +; CHECK-NEXT: st %s43, 312(, %s11) +; CHECK-NEXT: st %s42, 304(, %s11) +; CHECK-NEXT: st %s41, 296(, %s11) +; CHECK-NEXT: st %s40, 288(, %s11) +; CHECK-NEXT: st %s39, 280(, %s11) +; CHECK-NEXT: st %s38, 272(, %s11) +; CHECK-NEXT: st %s37, 264(, %s11) +; CHECK-NEXT: st %s36, 256(, %s11) +; CHECK-NEXT: st %s35, 248(, %s11) +; CHECK-NEXT: lea %s35, vsample_v17i128@lo +; CHECK-NEXT: and %s35, %s35, (32)0 +; CHECK-NEXT: lea.sl %s12, vsample_v17i128@hi(, %s35) +; CHECK-NEXT: st %s34, 240(, %s11) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + call fastcc void @vsample_v17i128(<17 x i128> %p1) + ret void +} + +declare fastcc void @vsample_v17i65(<17 x i65>) + +define fastcc void @vreg_arg_v17i65_r1(<256 x i64> %p0, <17 x i65> %p1) { +; CHECK-LABEL: vreg_arg_v17i65_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -448(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB18_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB18_2: +; CHECK-NEXT: ld %s34, 240(, %s9) +; CHECK-NEXT: ld %s35, 248(, %s9) +; CHECK-NEXT: ld %s36, 256(, %s9) +; CHECK-NEXT: ld %s37, 264(, %s9) +; CHECK-NEXT: ld %s38, 272(, %s9) +; CHECK-NEXT: ld %s39, 280(, %s9) +; CHECK-NEXT: ld %s40, 288(, %s9) +; CHECK-NEXT: ld %s41, 296(, %s9) +; CHECK-NEXT: ld %s42, 304(, %s9) +; CHECK-NEXT: ld %s43, 312(, %s9) +; CHECK-NEXT: ld %s44, 320(, %s9) +; CHECK-NEXT: ld %s45, 328(, %s9) +; CHECK-NEXT: ld %s46, 336(, %s9) +; CHECK-NEXT: ld %s47, 344(, %s9) +; CHECK-NEXT: ld %s48, 352(, %s9) +; CHECK-NEXT: ld %s49, 360(, %s9) +; CHECK-NEXT: ld %s50, 368(, %s9) +; CHECK-NEXT: ld %s51, 376(, %s9) +; CHECK-NEXT: ld %s52, 384(, %s9) +; CHECK-NEXT: ld %s53, 392(, %s9) +; CHECK-NEXT: ld %s54, 400(, %s9) +; CHECK-NEXT: ld %s55, 408(, %s9) +; CHECK-NEXT: ld %s56, 416(, %s9) +; CHECK-NEXT: ld %s57, 424(, %s9) +; CHECK-NEXT: ld %s58, 432(, %s9) +; CHECK-NEXT: ld %s59, 440(, %s9) +; CHECK-NEXT: st %s59, 440(, %s11) +; CHECK-NEXT: st %s58, 432(, %s11) +; CHECK-NEXT: st %s57, 424(, %s11) +; CHECK-NEXT: st %s56, 416(, %s11) +; CHECK-NEXT: st %s55, 408(, %s11) +; CHECK-NEXT: st %s54, 400(, %s11) +; CHECK-NEXT: st %s53, 392(, %s11) +; CHECK-NEXT: st %s52, 384(, %s11) +; CHECK-NEXT: st %s51, 376(, %s11) +; CHECK-NEXT: st %s50, 368(, %s11) +; CHECK-NEXT: st %s49, 360(, %s11) +; CHECK-NEXT: st %s48, 352(, %s11) +; CHECK-NEXT: st %s47, 344(, %s11) +; CHECK-NEXT: st %s46, 336(, %s11) +; CHECK-NEXT: st %s45, 328(, %s11) +; CHECK-NEXT: st %s44, 320(, %s11) +; CHECK-NEXT: st %s43, 312(, %s11) +; CHECK-NEXT: st %s42, 304(, %s11) +; CHECK-NEXT: st %s41, 296(, %s11) +; CHECK-NEXT: st %s40, 288(, %s11) +; CHECK-NEXT: st %s39, 280(, %s11) +; CHECK-NEXT: st %s38, 272(, %s11) +; CHECK-NEXT: st %s37, 264(, %s11) +; CHECK-NEXT: st %s36, 256(, %s11) +; CHECK-NEXT: st %s35, 248(, %s11) +; CHECK-NEXT: lea %s35, vsample_v17i65@lo +; CHECK-NEXT: and %s35, %s35, (32)0 +; CHECK-NEXT: lea.sl %s12, vsample_v17i65@hi(, %s35) +; CHECK-NEXT: st %s34, 240(, %s11) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + call fastcc void @vsample_v17i65(<17 x i65> %p1) + ret void +} + +;; Vectors with under-sized elements. +declare fastcc void @vsample_v17i16(<17 x i16>) + +define fastcc void @vreg_arg_v17i16_r1(<256 x i16> %p0, <17 x i16> %p1) { +; CHECK-LABEL: vreg_arg_v17i16_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB19_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB19_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: lea %s0, vsample_v17i16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, vsample_v17i16@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + call fastcc void @vsample_v17i16(<17 x i16> %p1) + ret void +} + +declare fastcc void @vsample_v17i13(<17 x i13>) + +define fastcc void @vreg_arg_v17i13_r1(<256 x i13> %p0, <17 x i13> %p1) { +; CHECK-LABEL: vreg_arg_v17i13_r1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB20_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: lea %s0, vsample_v17i13@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, vsample_v17i13@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + call fastcc void @vsample_v17i13(<17 x i13> %p1) + ret void +} diff --git a/llvm/test/CodeGen/VE/Vector/vec_add.ll b/llvm/test/CodeGen/VE/Vector/vec_add.ll index ca096af63266..9837d92b6cc8 100644 --- a/llvm/test/CodeGen/VE/Vector/vec_add.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_add.ll @@ -124,719 +124,14 @@ define fastcc <256 x i16> @add_vv_v256i16(<256 x i16> %x, <256 x i16> %y) { define fastcc <128 x i16> @add_vv_v128i16(<128 x i16> %x, <128 x i16> %y) { ; CHECK-LABEL: add_vv_v128i16: ; CHECK: # %bb.0: -; CHECK-NEXT: st %s9, (, %s11) -; CHECK-NEXT: st %s10, 8(, %s11) -; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s11, -496(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB8_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ld %s61, 24(, %s14) -; CHECK-NEXT: or %s62, 0, %s0 -; CHECK-NEXT: lea %s63, 315 -; CHECK-NEXT: shm.l %s63, (%s61) -; CHECK-NEXT: shm.l %s8, 8(%s61) -; CHECK-NEXT: shm.l %s11, 16(%s61) -; CHECK-NEXT: monc -; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: st %s18, 544(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s19, 552(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s20, 560(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s21, 568(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s22, 576(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s23, 584(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s24, 592(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s25, 600(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s26, 608(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s27, 616(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s28, 624(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s29, 632(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s30, 640(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s31, 648(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s32, 656(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s33, 664(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s32, 2112(, %s11) -; CHECK-NEXT: ld2b.zx %s31, 2104(, %s11) -; CHECK-NEXT: ld2b.zx %s28, 2080(, %s11) -; CHECK-NEXT: ld2b.zx %s27, 2072(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2064(, %s11) -; CHECK-NEXT: ld2b.zx %s35, 2056(, %s11) -; CHECK-NEXT: ld2b.zx %s36, 2048(, %s11) -; CHECK-NEXT: ld2b.zx %s23, 2040(, %s11) -; CHECK-NEXT: ld2b.zx %s22, 2032(, %s11) -; CHECK-NEXT: ld2b.zx %s21, 2024(, %s11) -; CHECK-NEXT: ld2b.zx %s20, 2016(, %s11) -; CHECK-NEXT: ld2b.zx %s19, 2008(, %s11) -; CHECK-NEXT: ld2b.zx %s18, 2000(, %s11) -; CHECK-NEXT: ld2b.zx %s63, 1992(, %s11) -; CHECK-NEXT: ld2b.zx %s62, 1984(, %s11) -; CHECK-NEXT: ld2b.zx %s61, 1976(, %s11) -; CHECK-NEXT: ld2b.zx %s60, 1968(, %s11) -; CHECK-NEXT: ld2b.zx %s59, 1960(, %s11) -; CHECK-NEXT: ld2b.zx %s58, 1952(, %s11) -; CHECK-NEXT: ld2b.zx %s57, 1944(, %s11) -; CHECK-NEXT: ld2b.zx %s56, 1936(, %s11) -; CHECK-NEXT: ld2b.zx %s55, 1928(, %s11) -; CHECK-NEXT: ld2b.zx %s54, 1920(, %s11) -; CHECK-NEXT: ld2b.zx %s53, 1912(, %s11) -; CHECK-NEXT: ld2b.zx %s52, 1904(, %s11) -; CHECK-NEXT: ld2b.zx %s51, 1896(, %s11) -; CHECK-NEXT: ld2b.zx %s50, 1888(, %s11) -; CHECK-NEXT: ld2b.zx %s49, 1880(, %s11) -; CHECK-NEXT: ld2b.zx %s48, 1872(, %s11) -; CHECK-NEXT: ld2b.zx %s47, 1864(, %s11) -; CHECK-NEXT: ld2b.zx %s46, 1856(, %s11) -; CHECK-NEXT: ld2b.zx %s45, 1848(, %s11) -; CHECK-NEXT: ld2b.zx %s44, 1840(, %s11) -; CHECK-NEXT: ld2b.zx %s43, 1832(, %s11) -; CHECK-NEXT: ld2b.zx %s42, 1824(, %s11) -; CHECK-NEXT: ld2b.zx %s41, 1816(, %s11) -; CHECK-NEXT: ld2b.zx %s40, 1808(, %s11) -; CHECK-NEXT: ld2b.zx %s39, 1800(, %s11) -; CHECK-NEXT: ld2b.zx %s38, 1792(, %s11) -; CHECK-NEXT: ld2b.zx %s37, 1784(, %s11) -; CHECK-NEXT: ld2b.zx %s24, 1776(, %s11) -; CHECK-NEXT: ld2b.zx %s25, 1768(, %s11) -; CHECK-NEXT: ld2b.zx %s26, 1704(, %s11) -; CHECK-NEXT: ld2b.zx %s29, 1760(, %s11) -; CHECK-NEXT: ld2b.zx %s30, 1712(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1720(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s26 -; CHECK-NEXT: stl %s1, 492(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 1728(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s30 -; CHECK-NEXT: stl %s2, 488(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 1736(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s33 -; CHECK-NEXT: stl %s3, 484(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 1744(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 480(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 1752(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s5, %s2 -; CHECK-NEXT: stl %s2, 476(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 736(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s6, %s3 -; CHECK-NEXT: stl %s3, 472(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 744(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s7, %s1 -; CHECK-NEXT: stl %s1, 468(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 752(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s29 -; CHECK-NEXT: stl %s2, 464(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 760(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s25 -; CHECK-NEXT: stl %s3, 460(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 768(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s24 -; CHECK-NEXT: stl %s1, 456(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 776(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s37 -; CHECK-NEXT: stl %s2, 452(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 784(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s38 -; CHECK-NEXT: stl %s3, 448(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 792(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s39 -; CHECK-NEXT: stl %s1, 444(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 800(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s40 -; CHECK-NEXT: stl %s2, 440(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 808(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s41 -; CHECK-NEXT: stl %s3, 436(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 816(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s42 -; CHECK-NEXT: stl %s1, 432(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 824(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s43 -; CHECK-NEXT: stl %s2, 428(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 832(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s44 -; CHECK-NEXT: stl %s3, 424(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 840(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s45 -; CHECK-NEXT: stl %s1, 420(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 848(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s46 -; CHECK-NEXT: stl %s2, 416(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 856(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s47 -; CHECK-NEXT: stl %s3, 412(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 864(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s48 -; CHECK-NEXT: stl %s1, 408(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 872(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s49 -; CHECK-NEXT: stl %s2, 404(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 880(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s50 -; CHECK-NEXT: stl %s3, 400(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 888(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s51 -; CHECK-NEXT: stl %s1, 396(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 896(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s52 -; CHECK-NEXT: stl %s2, 392(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 904(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s53 -; CHECK-NEXT: stl %s3, 388(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 912(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s54 -; CHECK-NEXT: stl %s1, 384(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 920(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s55 -; CHECK-NEXT: stl %s2, 380(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 928(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s56 -; CHECK-NEXT: stl %s3, 376(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 936(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s57 -; CHECK-NEXT: stl %s1, 372(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 944(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s58 -; CHECK-NEXT: stl %s2, 368(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 952(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s59 -; CHECK-NEXT: stl %s3, 364(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 960(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s60 -; CHECK-NEXT: stl %s1, 360(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 968(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s61 -; CHECK-NEXT: stl %s2, 356(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 976(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s62 -; CHECK-NEXT: stl %s3, 352(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 984(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s63 -; CHECK-NEXT: stl %s1, 348(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 992(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s18 -; CHECK-NEXT: stl %s2, 344(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 1000(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s19 -; CHECK-NEXT: stl %s3, 340(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 1008(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s20 -; CHECK-NEXT: stl %s1, 336(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 1016(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s21 -; CHECK-NEXT: stl %s2, 332(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 1024(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s22 -; CHECK-NEXT: stl %s3, 328(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 1032(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s23 -; CHECK-NEXT: stl %s1, 324(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 1040(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s36 -; CHECK-NEXT: stl %s2, 320(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 1048(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s35 -; CHECK-NEXT: stl %s3, 316(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 1056(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s34 -; CHECK-NEXT: stl %s1, 312(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2096(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s2, %s27 -; CHECK-NEXT: stl %s2, 308(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s2, 2088(, %s11) -; CHECK-NEXT: adds.w.sx %s3, %s3, %s28 -; CHECK-NEXT: stl %s3, 304(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s3, 1064(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1072(, %s11) -; CHECK-NEXT: ld2b.zx %s5, 1080(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 1088(, %s11) -; CHECK-NEXT: adds.w.sx %s2, %s3, %s2 -; CHECK-NEXT: stl %s2, 300(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 296(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s5, %s31 -; CHECK-NEXT: stl %s1, 292(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s6, %s32 -; CHECK-NEXT: stl %s1, 288(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2144(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 2136(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 2128(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 2120(, %s11) -; CHECK-NEXT: ld2b.zx %s5, 1096(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 1104(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 1112(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 1120(, %s11) -; CHECK-NEXT: adds.w.sx %s4, %s5, %s4 -; CHECK-NEXT: stl %s4, 284(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s3, %s6, %s3 -; CHECK-NEXT: stl %s3, 280(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s2, %s7, %s2 -; CHECK-NEXT: stl %s2, 276(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s34, %s1 -; CHECK-NEXT: stl %s1, 272(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s34, 2176(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2168(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 2160(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 2152(, %s11) -; CHECK-NEXT: ld2b.zx %s5, 1128(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 1136(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1144(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1152(, %s11) -; CHECK-NEXT: adds.w.sx %s4, %s5, %s4 -; CHECK-NEXT: stl %s4, 268(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s3, %s6, %s3 -; CHECK-NEXT: stl %s3, 264(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s1, %s7 -; CHECK-NEXT: stl %s1, 260(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s2, %s34 -; CHECK-NEXT: stl %s1, 256(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s7, 2208(, %s11) -; CHECK-NEXT: ld2b.zx %s5, 2200(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 2192(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2184(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 1160(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1168(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1176(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1184(, %s11) -; CHECK-NEXT: adds.w.sx %s6, %s34, %s6 -; CHECK-NEXT: stl %s6, 252(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s1, %s4 -; CHECK-NEXT: stl %s1, 248(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s2, %s5 -; CHECK-NEXT: stl %s1, 244(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s3, %s7 -; CHECK-NEXT: stl %s1, 240(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s34, 2240(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2232(, %s11) -; CHECK-NEXT: ld2b.zx %s5, 2224(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2216(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1192(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1200(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1208(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1216(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s7 -; CHECK-NEXT: stl %s1, 236(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s2, %s5 -; CHECK-NEXT: stl %s1, 232(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s3, %s6 -; CHECK-NEXT: stl %s1, 228(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s4, %s34 -; CHECK-NEXT: stl %s1, 224(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s5, 2272(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2264(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2256(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2248(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1224(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1232(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1240(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1248(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s34 -; CHECK-NEXT: stl %s1, 220(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s2, %s7 -; CHECK-NEXT: stl %s1, 216(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s3, %s6 -; CHECK-NEXT: stl %s1, 212(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s4, %s5 -; CHECK-NEXT: stl %s1, 208(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s5, 2304(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2296(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2288(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2280(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1256(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1264(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1272(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1280(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s34 -; CHECK-NEXT: stl %s1, 204(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s2, %s7 -; CHECK-NEXT: stl %s1, 200(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s3, %s6 -; CHECK-NEXT: stl %s1, 196(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s4, %s5 -; CHECK-NEXT: stl %s1, 192(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s5, 2336(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2328(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2320(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2312(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1288(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1296(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1304(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1312(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s34 -; CHECK-NEXT: stl %s1, 188(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s2, %s7 -; CHECK-NEXT: stl %s1, 184(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s1, %s3, %s6 -; CHECK-NEXT: stl %s1, 180(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: adds.w.sx %s33, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2368(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2360(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2352(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2344(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1320(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1328(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1336(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1344(, %s11) -; CHECK-NEXT: adds.w.sx %s32, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s31, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s30, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s29, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2400(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2392(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2384(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2376(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1352(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1360(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1368(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1376(, %s11) -; CHECK-NEXT: adds.w.sx %s28, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s27, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s26, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s25, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2432(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2424(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2416(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2408(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1384(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1392(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1400(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1408(, %s11) -; CHECK-NEXT: adds.w.sx %s24, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s23, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s22, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s21, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2464(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2456(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2448(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2440(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1416(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1424(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1432(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1440(, %s11) -; CHECK-NEXT: adds.w.sx %s20, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s19, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s18, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s63, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2496(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2488(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2480(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2472(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1448(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1456(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1464(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1472(, %s11) -; CHECK-NEXT: adds.w.sx %s62, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s61, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s60, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s59, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2528(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2520(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2512(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2504(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1480(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1488(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1496(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1504(, %s11) -; CHECK-NEXT: adds.w.sx %s58, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s57, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s56, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s55, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2560(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2552(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2544(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2536(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1512(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1520(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1528(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1536(, %s11) -; CHECK-NEXT: adds.w.sx %s54, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s53, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s52, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s51, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2592(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2584(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2576(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2568(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1544(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1552(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1560(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1568(, %s11) -; CHECK-NEXT: adds.w.sx %s50, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s49, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s48, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s47, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2624(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2616(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2608(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2600(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1576(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1584(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1592(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1600(, %s11) -; CHECK-NEXT: adds.w.sx %s46, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s45, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s44, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s43, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2656(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2648(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2640(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2632(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1608(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1616(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1624(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1632(, %s11) -; CHECK-NEXT: adds.w.sx %s42, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s41, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s40, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s39, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2688(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2680(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2672(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2664(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1640(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1648(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1656(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1664(, %s11) -; CHECK-NEXT: adds.w.sx %s38, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s37, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s36, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s35, %s4, %s5 -; CHECK-NEXT: ld2b.zx %s5, 2720(, %s11) -; CHECK-NEXT: ld2b.zx %s6, 2712(, %s11) -; CHECK-NEXT: ld2b.zx %s7, 2704(, %s11) -; CHECK-NEXT: ld2b.zx %s34, 2696(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1672(, %s11) -; CHECK-NEXT: ld2b.zx %s2, 1680(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1688(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1696(, %s11) -; CHECK-NEXT: adds.w.sx %s1, %s1, %s34 -; CHECK-NEXT: adds.w.sx %s2, %s2, %s7 -; CHECK-NEXT: adds.w.sx %s3, %s3, %s6 -; CHECK-NEXT: adds.w.sx %s4, %s4, %s5 -; CHECK-NEXT: st2b %s4, 254(, %s0) -; CHECK-NEXT: st2b %s3, 252(, %s0) -; CHECK-NEXT: st2b %s2, 250(, %s0) -; CHECK-NEXT: st2b %s1, 248(, %s0) -; CHECK-NEXT: st2b %s35, 246(, %s0) -; CHECK-NEXT: st2b %s36, 244(, %s0) -; CHECK-NEXT: st2b %s37, 242(, %s0) -; CHECK-NEXT: st2b %s38, 240(, %s0) -; CHECK-NEXT: st2b %s39, 238(, %s0) -; CHECK-NEXT: st2b %s40, 236(, %s0) -; CHECK-NEXT: st2b %s41, 234(, %s0) -; CHECK-NEXT: st2b %s42, 232(, %s0) -; CHECK-NEXT: st2b %s43, 230(, %s0) -; CHECK-NEXT: st2b %s44, 228(, %s0) -; CHECK-NEXT: st2b %s45, 226(, %s0) -; CHECK-NEXT: st2b %s46, 224(, %s0) -; CHECK-NEXT: st2b %s47, 222(, %s0) -; CHECK-NEXT: st2b %s48, 220(, %s0) -; CHECK-NEXT: st2b %s49, 218(, %s0) -; CHECK-NEXT: st2b %s50, 216(, %s0) -; CHECK-NEXT: st2b %s51, 214(, %s0) -; CHECK-NEXT: st2b %s52, 212(, %s0) -; CHECK-NEXT: st2b %s53, 210(, %s0) -; CHECK-NEXT: st2b %s54, 208(, %s0) -; CHECK-NEXT: st2b %s55, 206(, %s0) -; CHECK-NEXT: st2b %s56, 204(, %s0) -; CHECK-NEXT: st2b %s57, 202(, %s0) -; CHECK-NEXT: st2b %s58, 200(, %s0) -; CHECK-NEXT: st2b %s59, 198(, %s0) -; CHECK-NEXT: st2b %s60, 196(, %s0) -; CHECK-NEXT: st2b %s61, 194(, %s0) -; CHECK-NEXT: st2b %s62, 192(, %s0) -; CHECK-NEXT: st2b %s63, 190(, %s0) -; CHECK-NEXT: st2b %s18, 188(, %s0) -; CHECK-NEXT: st2b %s19, 186(, %s0) -; CHECK-NEXT: st2b %s20, 184(, %s0) -; CHECK-NEXT: st2b %s21, 182(, %s0) -; CHECK-NEXT: st2b %s22, 180(, %s0) -; CHECK-NEXT: st2b %s23, 178(, %s0) -; CHECK-NEXT: st2b %s24, 176(, %s0) -; CHECK-NEXT: st2b %s25, 174(, %s0) -; CHECK-NEXT: st2b %s26, 172(, %s0) -; CHECK-NEXT: st2b %s27, 170(, %s0) -; CHECK-NEXT: st2b %s28, 168(, %s0) -; CHECK-NEXT: st2b %s29, 166(, %s0) -; CHECK-NEXT: st2b %s30, 164(, %s0) -; CHECK-NEXT: st2b %s31, 162(, %s0) -; CHECK-NEXT: st2b %s32, 160(, %s0) -; CHECK-NEXT: st2b %s33, 158(, %s0) -; CHECK-NEXT: ldl.sx %s1, 180(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 156(, %s0) -; CHECK-NEXT: ldl.sx %s1, 184(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 154(, %s0) -; CHECK-NEXT: ldl.sx %s1, 188(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 152(, %s0) -; CHECK-NEXT: ldl.sx %s1, 192(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 150(, %s0) -; CHECK-NEXT: ldl.sx %s1, 196(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 148(, %s0) -; CHECK-NEXT: ldl.sx %s1, 200(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 146(, %s0) -; CHECK-NEXT: ldl.sx %s1, 204(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 144(, %s0) -; CHECK-NEXT: ldl.sx %s1, 208(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 142(, %s0) -; CHECK-NEXT: ldl.sx %s1, 212(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 140(, %s0) -; CHECK-NEXT: ldl.sx %s1, 216(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 138(, %s0) -; CHECK-NEXT: ldl.sx %s1, 220(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 136(, %s0) -; CHECK-NEXT: ldl.sx %s1, 224(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 134(, %s0) -; CHECK-NEXT: ldl.sx %s1, 228(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 132(, %s0) -; CHECK-NEXT: ldl.sx %s1, 232(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 130(, %s0) -; CHECK-NEXT: ldl.sx %s1, 236(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 128(, %s0) -; CHECK-NEXT: ldl.sx %s1, 240(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 126(, %s0) -; CHECK-NEXT: ldl.sx %s1, 244(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 124(, %s0) -; CHECK-NEXT: ldl.sx %s1, 248(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 122(, %s0) -; CHECK-NEXT: ldl.sx %s1, 252(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 120(, %s0) -; CHECK-NEXT: ldl.sx %s1, 256(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 118(, %s0) -; CHECK-NEXT: ldl.sx %s1, 260(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 116(, %s0) -; CHECK-NEXT: ldl.sx %s1, 264(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 114(, %s0) -; CHECK-NEXT: ldl.sx %s1, 268(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 112(, %s0) -; CHECK-NEXT: ldl.sx %s1, 272(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 110(, %s0) -; CHECK-NEXT: ldl.sx %s1, 276(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 108(, %s0) -; CHECK-NEXT: ldl.sx %s1, 280(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 106(, %s0) -; CHECK-NEXT: ldl.sx %s1, 284(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 104(, %s0) -; CHECK-NEXT: ldl.sx %s1, 288(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 102(, %s0) -; CHECK-NEXT: ldl.sx %s1, 292(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 100(, %s0) -; CHECK-NEXT: ldl.sx %s1, 296(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 98(, %s0) -; CHECK-NEXT: ldl.sx %s1, 300(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 96(, %s0) -; CHECK-NEXT: ldl.sx %s1, 304(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 94(, %s0) -; CHECK-NEXT: ldl.sx %s1, 308(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 92(, %s0) -; CHECK-NEXT: ldl.sx %s1, 312(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 90(, %s0) -; CHECK-NEXT: ldl.sx %s1, 316(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 88(, %s0) -; CHECK-NEXT: ldl.sx %s1, 320(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 86(, %s0) -; CHECK-NEXT: ldl.sx %s1, 324(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 84(, %s0) -; CHECK-NEXT: ldl.sx %s1, 328(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 82(, %s0) -; CHECK-NEXT: ldl.sx %s1, 332(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 80(, %s0) -; CHECK-NEXT: ldl.sx %s1, 336(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 78(, %s0) -; CHECK-NEXT: ldl.sx %s1, 340(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 76(, %s0) -; CHECK-NEXT: ldl.sx %s1, 344(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 74(, %s0) -; CHECK-NEXT: ldl.sx %s1, 348(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 72(, %s0) -; CHECK-NEXT: ldl.sx %s1, 352(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 70(, %s0) -; CHECK-NEXT: ldl.sx %s1, 356(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 68(, %s0) -; CHECK-NEXT: ldl.sx %s1, 360(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 66(, %s0) -; CHECK-NEXT: ldl.sx %s1, 364(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 64(, %s0) -; CHECK-NEXT: ldl.sx %s1, 368(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 62(, %s0) -; CHECK-NEXT: ldl.sx %s1, 372(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 60(, %s0) -; CHECK-NEXT: ldl.sx %s1, 376(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 58(, %s0) -; CHECK-NEXT: ldl.sx %s1, 380(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 56(, %s0) -; CHECK-NEXT: ldl.sx %s1, 384(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 54(, %s0) -; CHECK-NEXT: ldl.sx %s1, 388(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 52(, %s0) -; CHECK-NEXT: ldl.sx %s1, 392(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 50(, %s0) -; CHECK-NEXT: ldl.sx %s1, 396(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 48(, %s0) -; CHECK-NEXT: ldl.sx %s1, 400(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 46(, %s0) -; CHECK-NEXT: ldl.sx %s1, 404(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 44(, %s0) -; CHECK-NEXT: ldl.sx %s1, 408(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 42(, %s0) -; CHECK-NEXT: ldl.sx %s1, 412(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 40(, %s0) -; CHECK-NEXT: ldl.sx %s1, 416(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 38(, %s0) -; CHECK-NEXT: ldl.sx %s1, 420(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 36(, %s0) -; CHECK-NEXT: ldl.sx %s1, 424(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 34(, %s0) -; CHECK-NEXT: ldl.sx %s1, 428(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 32(, %s0) -; CHECK-NEXT: ldl.sx %s1, 432(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 30(, %s0) -; CHECK-NEXT: ldl.sx %s1, 436(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 28(, %s0) -; CHECK-NEXT: ldl.sx %s1, 440(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 26(, %s0) -; CHECK-NEXT: ldl.sx %s1, 444(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 24(, %s0) -; CHECK-NEXT: ldl.sx %s1, 448(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 22(, %s0) -; CHECK-NEXT: ldl.sx %s1, 452(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 20(, %s0) -; CHECK-NEXT: ldl.sx %s1, 456(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 18(, %s0) -; CHECK-NEXT: ldl.sx %s1, 460(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 16(, %s0) -; CHECK-NEXT: ldl.sx %s1, 464(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 14(, %s0) -; CHECK-NEXT: ldl.sx %s1, 468(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 12(, %s0) -; CHECK-NEXT: ldl.sx %s1, 472(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 10(, %s0) -; CHECK-NEXT: ldl.sx %s1, 476(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 8(, %s0) -; CHECK-NEXT: ldl.sx %s1, 480(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 6(, %s0) -; CHECK-NEXT: ldl.sx %s1, 484(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 4(, %s0) -; CHECK-NEXT: ldl.sx %s1, 488(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 2(, %s0) -; CHECK-NEXT: ldl.sx %s1, 492(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, (, %s0) -; CHECK-NEXT: ld %s33, 664(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s32, 656(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s31, 648(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s30, 640(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s29, 632(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s28, 624(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s27, 616(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s26, 608(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s25, 600(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s24, 592(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s23, 584(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s22, 576(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s21, 568(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s20, 560(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s19, 552(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s18, 544(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: or %s11, 0, %s9 -; CHECK-NEXT: ld %s10, 8(, %s11) -; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lea %s1, 65535 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvand.lo %v1, %s1, %v1 +; CHECK-NEXT: pvand.lo %v0, %s1, %v0 +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %z = add <128 x i16> %x, %y ret <128 x i16> %z diff --git a/llvm/test/CodeGen/VE/Vector/vec_and.ll b/llvm/test/CodeGen/VE/Vector/vec_and.ll index 65dda7b7d356..47cdda66bf78 100644 --- a/llvm/test/CodeGen/VE/Vector/vec_and.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_and.ll @@ -123,7 +123,16 @@ define fastcc <256 x i16> @and_vv_v256i16(<256 x i16> %x, <256 x i16> %y) { ; Function Attrs: nounwind define fastcc <128 x i16> @and_vv_v128i16(<128 x i16> %x, <128 x i16> %y) { ; CHECK-LABEL: and_vv_v128i16: -; CHECK-NOT: vand +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lea %s1, 65535 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvand.lo %v1, %s1, %v1 +; CHECK-NEXT: pvand.lo %v0, %s1, %v0 +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvand.lo %v0, %v0, %v1 +; CHECK-NEXT: b.l.t (, %s10) %z = and <128 x i16> %x, %y ret <128 x i16> %z } diff --git a/llvm/test/CodeGen/VE/Vector/vec_broadcast.ll b/llvm/test/CodeGen/VE/Vector/vec_broadcast.ll index d8ab536524b9..5ff4df1e32b7 100644 --- a/llvm/test/CodeGen/VE/Vector/vec_broadcast.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_broadcast.ll @@ -190,135 +190,9 @@ define fastcc <256 x i16> @brd_v256i16(i16 %s) { define fastcc <128 x i16> @brd_v128i16(i16 %s) { ; CHECK-LABEL: brd_v128i16: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: st2b %s1, 254(, %s0) -; CHECK-NEXT: st2b %s1, 252(, %s0) -; CHECK-NEXT: st2b %s1, 250(, %s0) -; CHECK-NEXT: st2b %s1, 248(, %s0) -; CHECK-NEXT: st2b %s1, 246(, %s0) -; CHECK-NEXT: st2b %s1, 244(, %s0) -; CHECK-NEXT: st2b %s1, 242(, %s0) -; CHECK-NEXT: st2b %s1, 240(, %s0) -; CHECK-NEXT: st2b %s1, 238(, %s0) -; CHECK-NEXT: st2b %s1, 236(, %s0) -; CHECK-NEXT: st2b %s1, 234(, %s0) -; CHECK-NEXT: st2b %s1, 232(, %s0) -; CHECK-NEXT: st2b %s1, 230(, %s0) -; CHECK-NEXT: st2b %s1, 228(, %s0) -; CHECK-NEXT: st2b %s1, 226(, %s0) -; CHECK-NEXT: st2b %s1, 224(, %s0) -; CHECK-NEXT: st2b %s1, 222(, %s0) -; CHECK-NEXT: st2b %s1, 220(, %s0) -; CHECK-NEXT: st2b %s1, 218(, %s0) -; CHECK-NEXT: st2b %s1, 216(, %s0) -; CHECK-NEXT: st2b %s1, 214(, %s0) -; CHECK-NEXT: st2b %s1, 212(, %s0) -; CHECK-NEXT: st2b %s1, 210(, %s0) -; CHECK-NEXT: st2b %s1, 208(, %s0) -; CHECK-NEXT: st2b %s1, 206(, %s0) -; CHECK-NEXT: st2b %s1, 204(, %s0) -; CHECK-NEXT: st2b %s1, 202(, %s0) -; CHECK-NEXT: st2b %s1, 200(, %s0) -; CHECK-NEXT: st2b %s1, 198(, %s0) -; CHECK-NEXT: st2b %s1, 196(, %s0) -; CHECK-NEXT: st2b %s1, 194(, %s0) -; CHECK-NEXT: st2b %s1, 192(, %s0) -; CHECK-NEXT: st2b %s1, 190(, %s0) -; CHECK-NEXT: st2b %s1, 188(, %s0) -; CHECK-NEXT: st2b %s1, 186(, %s0) -; CHECK-NEXT: st2b %s1, 184(, %s0) -; CHECK-NEXT: st2b %s1, 182(, %s0) -; CHECK-NEXT: st2b %s1, 180(, %s0) -; CHECK-NEXT: st2b %s1, 178(, %s0) -; CHECK-NEXT: st2b %s1, 176(, %s0) -; CHECK-NEXT: st2b %s1, 174(, %s0) -; CHECK-NEXT: st2b %s1, 172(, %s0) -; CHECK-NEXT: st2b %s1, 170(, %s0) -; CHECK-NEXT: st2b %s1, 168(, %s0) -; CHECK-NEXT: st2b %s1, 166(, %s0) -; CHECK-NEXT: st2b %s1, 164(, %s0) -; CHECK-NEXT: st2b %s1, 162(, %s0) -; CHECK-NEXT: st2b %s1, 160(, %s0) -; CHECK-NEXT: st2b %s1, 158(, %s0) -; CHECK-NEXT: st2b %s1, 156(, %s0) -; CHECK-NEXT: st2b %s1, 154(, %s0) -; CHECK-NEXT: st2b %s1, 152(, %s0) -; CHECK-NEXT: st2b %s1, 150(, %s0) -; CHECK-NEXT: st2b %s1, 148(, %s0) -; CHECK-NEXT: st2b %s1, 146(, %s0) -; CHECK-NEXT: st2b %s1, 144(, %s0) -; CHECK-NEXT: st2b %s1, 142(, %s0) -; CHECK-NEXT: st2b %s1, 140(, %s0) -; CHECK-NEXT: st2b %s1, 138(, %s0) -; CHECK-NEXT: st2b %s1, 136(, %s0) -; CHECK-NEXT: st2b %s1, 134(, %s0) -; CHECK-NEXT: st2b %s1, 132(, %s0) -; CHECK-NEXT: st2b %s1, 130(, %s0) -; CHECK-NEXT: st2b %s1, 128(, %s0) -; CHECK-NEXT: st2b %s1, 126(, %s0) -; CHECK-NEXT: st2b %s1, 124(, %s0) -; CHECK-NEXT: st2b %s1, 122(, %s0) -; CHECK-NEXT: st2b %s1, 120(, %s0) -; CHECK-NEXT: st2b %s1, 118(, %s0) -; CHECK-NEXT: st2b %s1, 116(, %s0) -; CHECK-NEXT: st2b %s1, 114(, %s0) -; CHECK-NEXT: st2b %s1, 112(, %s0) -; CHECK-NEXT: st2b %s1, 110(, %s0) -; CHECK-NEXT: st2b %s1, 108(, %s0) -; CHECK-NEXT: st2b %s1, 106(, %s0) -; CHECK-NEXT: st2b %s1, 104(, %s0) -; CHECK-NEXT: st2b %s1, 102(, %s0) -; CHECK-NEXT: st2b %s1, 100(, %s0) -; CHECK-NEXT: st2b %s1, 98(, %s0) -; CHECK-NEXT: st2b %s1, 96(, %s0) -; CHECK-NEXT: st2b %s1, 94(, %s0) -; CHECK-NEXT: st2b %s1, 92(, %s0) -; CHECK-NEXT: st2b %s1, 90(, %s0) -; CHECK-NEXT: st2b %s1, 88(, %s0) -; CHECK-NEXT: st2b %s1, 86(, %s0) -; CHECK-NEXT: st2b %s1, 84(, %s0) -; CHECK-NEXT: st2b %s1, 82(, %s0) -; CHECK-NEXT: st2b %s1, 80(, %s0) -; CHECK-NEXT: st2b %s1, 78(, %s0) -; CHECK-NEXT: st2b %s1, 76(, %s0) -; CHECK-NEXT: st2b %s1, 74(, %s0) -; CHECK-NEXT: st2b %s1, 72(, %s0) -; CHECK-NEXT: st2b %s1, 70(, %s0) -; CHECK-NEXT: st2b %s1, 68(, %s0) -; CHECK-NEXT: st2b %s1, 66(, %s0) -; CHECK-NEXT: st2b %s1, 64(, %s0) -; CHECK-NEXT: st2b %s1, 62(, %s0) -; CHECK-NEXT: st2b %s1, 60(, %s0) -; CHECK-NEXT: st2b %s1, 58(, %s0) -; CHECK-NEXT: st2b %s1, 56(, %s0) -; CHECK-NEXT: st2b %s1, 54(, %s0) -; CHECK-NEXT: st2b %s1, 52(, %s0) -; CHECK-NEXT: st2b %s1, 50(, %s0) -; CHECK-NEXT: st2b %s1, 48(, %s0) -; CHECK-NEXT: st2b %s1, 46(, %s0) -; CHECK-NEXT: st2b %s1, 44(, %s0) -; CHECK-NEXT: st2b %s1, 42(, %s0) -; CHECK-NEXT: st2b %s1, 40(, %s0) -; CHECK-NEXT: st2b %s1, 38(, %s0) -; CHECK-NEXT: st2b %s1, 36(, %s0) -; CHECK-NEXT: st2b %s1, 34(, %s0) -; CHECK-NEXT: st2b %s1, 32(, %s0) -; CHECK-NEXT: st2b %s1, 30(, %s0) -; CHECK-NEXT: st2b %s1, 28(, %s0) -; CHECK-NEXT: st2b %s1, 26(, %s0) -; CHECK-NEXT: st2b %s1, 24(, %s0) -; CHECK-NEXT: st2b %s1, 22(, %s0) -; CHECK-NEXT: st2b %s1, 20(, %s0) -; CHECK-NEXT: st2b %s1, 18(, %s0) -; CHECK-NEXT: st2b %s1, 16(, %s0) -; CHECK-NEXT: st2b %s1, 14(, %s0) -; CHECK-NEXT: st2b %s1, 12(, %s0) -; CHECK-NEXT: st2b %s1, 10(, %s0) -; CHECK-NEXT: st2b %s1, 8(, %s0) -; CHECK-NEXT: st2b %s1, 6(, %s0) -; CHECK-NEXT: st2b %s1, 4(, %s0) -; CHECK-NEXT: st2b %s1, 2(, %s0) -; CHECK-NEXT: st2b %s1, (, %s0) +; CHECK-NEXT: lea %s1, 128 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vbrd %v0, %s0 ; CHECK-NEXT: b.l.t (, %s10) %val = insertelement <128 x i16> undef, i16 %s, i32 0 %ret = shufflevector <128 x i16> %val, <128 x i16> undef, <128 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/VE/Vector/vec_fadd.ll b/llvm/test/CodeGen/VE/Vector/vec_fadd.ll index 50ee2fb0e1f3..61ef37ed2999 100644 --- a/llvm/test/CodeGen/VE/Vector/vec_fadd.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_fadd.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=-packed,+vpu | FileCheck %s ; Function Attrs: nounwind @@ -26,18 +27,9 @@ define fastcc <2 x double> @vec_add_v2f64(<2 x double> %a, <2 x double> %b) { define fastcc <3 x double> @vec_add_v3f64(<3 x double> %a, <3 x double> %b) { ; CHECK-LABEL: vec_add_v3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: lsv %v0(2), %s5 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: lsv %v1(2), %s2 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vfadd.d %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: vfadd.d %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = fadd <3 x double> %a, %b ret <3 x double> %r @@ -194,18 +186,9 @@ define fastcc <2 x float> @vec_add_v2f32(<2 x float> %a, <2 x float> %b) { define fastcc <3 x float> @vec_add_v3f32(<3 x float> %a, <3 x float> %b) { ; CHECK-LABEL: vec_add_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s3 -; CHECK-NEXT: lsv %v0(1), %s4 -; CHECK-NEXT: lsv %v0(2), %s5 -; CHECK-NEXT: lsv %v1(0), %s0 -; CHECK-NEXT: lsv %v1(1), %s1 -; CHECK-NEXT: lsv %v1(2), %s2 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: pvfadd.up %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) +; CHECK-NEXT: pvfadd.up %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %r = fadd <3 x float> %a, %b ret <3 x float> %r diff --git a/llvm/test/CodeGen/VE/Vector/vec_srem.ll b/llvm/test/CodeGen/VE/Vector/vec_srem.ll index 271d743b74e3..e6878b032800 100644 --- a/llvm/test/CodeGen/VE/Vector/vec_srem.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_srem.ll @@ -145,994 +145,21 @@ define fastcc <256 x i16> @srem_vv_v256i16(<256 x i16> %x, <256 x i16> %y) { define fastcc <128 x i16> @srem_vv_v128i16(<128 x i16> %x, <128 x i16> %y) { ; CHECK-LABEL: srem_vv_v128i16: ; CHECK: # %bb.0: -; CHECK-NEXT: st %s9, (, %s11) -; CHECK-NEXT: st %s10, 8(, %s11) -; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s11, -496(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB8_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ld %s61, 24(, %s14) -; CHECK-NEXT: or %s62, 0, %s0 -; CHECK-NEXT: lea %s63, 315 -; CHECK-NEXT: shm.l %s63, (%s61) -; CHECK-NEXT: shm.l %s8, 8(%s61) -; CHECK-NEXT: shm.l %s11, 16(%s61) -; CHECK-NEXT: monc -; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: st %s18, 544(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s19, 552(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s20, 560(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s21, 568(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s22, 576(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s23, 584(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s24, 592(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s25, 600(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s26, 608(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s27, 616(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s28, 624(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s29, 632(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s30, 640(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s31, 648(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s32, 656(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s33, 664(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: and %s7, %s7, (32)0 -; CHECK-NEXT: sla.w.sx %s34, %s7, 16 -; CHECK-NEXT: sra.w.sx %s34, %s34, 16 -; CHECK-NEXT: and %s6, %s6, (32)0 -; CHECK-NEXT: sla.w.sx %s35, %s6, 16 -; CHECK-NEXT: sra.w.sx %s35, %s35, 16 -; CHECK-NEXT: and %s5, %s5, (32)0 -; CHECK-NEXT: sla.w.sx %s36, %s5, 16 -; CHECK-NEXT: sra.w.sx %s36, %s36, 16 -; CHECK-NEXT: and %s4, %s4, (32)0 -; CHECK-NEXT: sla.w.sx %s37, %s4, 16 -; CHECK-NEXT: sra.w.sx %s37, %s37, 16 -; CHECK-NEXT: and %s3, %s3, (32)0 -; CHECK-NEXT: sla.w.sx %s38, %s3, 16 -; CHECK-NEXT: sra.w.sx %s38, %s38, 16 -; CHECK-NEXT: and %s2, %s2, (32)0 -; CHECK-NEXT: sla.w.sx %s39, %s2, 16 -; CHECK-NEXT: sra.w.sx %s39, %s39, 16 -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: sla.w.sx %s40, %s1, 16 -; CHECK-NEXT: sra.w.sx %s40, %s40, 16 -; CHECK-NEXT: ld2b.sx %s30, 2096(, %s11) -; CHECK-NEXT: ld2b.sx %s41, 2088(, %s11) -; CHECK-NEXT: ld2b.sx %s42, 2080(, %s11) -; CHECK-NEXT: ld2b.sx %s43, 2072(, %s11) -; CHECK-NEXT: ld2b.sx %s44, 2064(, %s11) -; CHECK-NEXT: ld2b.sx %s45, 2056(, %s11) -; CHECK-NEXT: ld2b.sx %s46, 2048(, %s11) -; CHECK-NEXT: ld2b.sx %s47, 2040(, %s11) -; CHECK-NEXT: ld2b.sx %s48, 2032(, %s11) -; CHECK-NEXT: ld2b.sx %s49, 2024(, %s11) -; CHECK-NEXT: ld2b.sx %s50, 2016(, %s11) -; CHECK-NEXT: ld2b.sx %s51, 2008(, %s11) -; CHECK-NEXT: ld2b.sx %s52, 2000(, %s11) -; CHECK-NEXT: ld2b.sx %s53, 1992(, %s11) -; CHECK-NEXT: ld2b.sx %s54, 1984(, %s11) -; CHECK-NEXT: ld2b.sx %s55, 1976(, %s11) -; CHECK-NEXT: ld2b.sx %s56, 1968(, %s11) -; CHECK-NEXT: ld2b.sx %s57, 1960(, %s11) -; CHECK-NEXT: ld2b.sx %s58, 1952(, %s11) -; CHECK-NEXT: ld2b.sx %s59, 1944(, %s11) -; CHECK-NEXT: ld2b.sx %s60, 1936(, %s11) -; CHECK-NEXT: ld2b.sx %s61, 1928(, %s11) -; CHECK-NEXT: ld2b.sx %s62, 1920(, %s11) -; CHECK-NEXT: ld2b.sx %s63, 1912(, %s11) -; CHECK-NEXT: ld2b.sx %s18, 1904(, %s11) -; CHECK-NEXT: ld2b.sx %s19, 1896(, %s11) -; CHECK-NEXT: ld2b.sx %s20, 1888(, %s11) -; CHECK-NEXT: ld2b.sx %s21, 1880(, %s11) -; CHECK-NEXT: ld2b.sx %s22, 1872(, %s11) -; CHECK-NEXT: ld2b.sx %s23, 1864(, %s11) -; CHECK-NEXT: ld2b.sx %s24, 1856(, %s11) -; CHECK-NEXT: ld2b.sx %s25, 1848(, %s11) -; CHECK-NEXT: ld2b.sx %s26, 1840(, %s11) -; CHECK-NEXT: ld2b.sx %s27, 1832(, %s11) -; CHECK-NEXT: ld2b.sx %s28, 1824(, %s11) -; CHECK-NEXT: ld2b.sx %s29, 1704(, %s11) -; CHECK-NEXT: ld2b.sx %s31, 1816(, %s11) -; CHECK-NEXT: ld2b.sx %s32, 1808(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1712(, %s11) -; CHECK-NEXT: divs.w.sx %s40, %s40, %s29 -; CHECK-NEXT: muls.w.sx %s40, %s40, %s29 -; CHECK-NEXT: ld2b.sx %s29, 1800(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s1, %s40 -; CHECK-NEXT: stl %s1, 492(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 1720(, %s11) -; CHECK-NEXT: divs.w.sx %s39, %s39, %s33 -; CHECK-NEXT: muls.w.sx %s39, %s39, %s33 -; CHECK-NEXT: ld2b.sx %s40, 1792(, %s11) -; CHECK-NEXT: subs.w.sx %s2, %s2, %s39 -; CHECK-NEXT: stl %s2, 488(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s2, 1728(, %s11) -; CHECK-NEXT: divs.w.sx %s38, %s38, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s38, %s1 -; CHECK-NEXT: ld2b.sx %s38, 1784(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 484(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 1736(, %s11) -; CHECK-NEXT: divs.w.sx %s3, %s37, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s3, %s2 -; CHECK-NEXT: ld2b.sx %s3, 1776(, %s11) -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 480(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s2, 1744(, %s11) -; CHECK-NEXT: divs.w.sx %s4, %s36, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s4, %s1 -; CHECK-NEXT: ld2b.sx %s4, 1768(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s5, %s1 -; CHECK-NEXT: stl %s1, 476(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 1752(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s35, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: ld2b.sx %s5, 1760(, %s11) -; CHECK-NEXT: subs.w.sx %s2, %s6, %s2 -; CHECK-NEXT: stl %s2, 472(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s2, 736(, %s11) -; CHECK-NEXT: divs.w.sx %s6, %s34, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s6, %s1 -; CHECK-NEXT: ld2b.sx %s6, 744(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s7, %s1 -; CHECK-NEXT: stl %s1, 468(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s5 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s5 -; CHECK-NEXT: ld2b.sx %s5, 752(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 464(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s6, %s4 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s4 -; CHECK-NEXT: ld2b.sx %s2, 760(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s6, %s1 -; CHECK-NEXT: stl %s1, 460(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s5, %s3 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 768(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s5, %s1 -; CHECK-NEXT: stl %s1, 456(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s38 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s38 -; CHECK-NEXT: ld2b.sx %s4, 776(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 452(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s40 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s40 -; CHECK-NEXT: ld2b.sx %s2, 784(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 448(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s29 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s29 -; CHECK-NEXT: ld2b.sx %s3, 792(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 444(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s32 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s32 -; CHECK-NEXT: ld2b.sx %s4, 800(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 440(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s31 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s31 -; CHECK-NEXT: ld2b.sx %s2, 808(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 436(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s28 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s28 -; CHECK-NEXT: ld2b.sx %s3, 816(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 432(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s27 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s27 -; CHECK-NEXT: ld2b.sx %s4, 824(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 428(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s26 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s26 -; CHECK-NEXT: ld2b.sx %s2, 832(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 424(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s25 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s25 -; CHECK-NEXT: ld2b.sx %s3, 840(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 420(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s24 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s24 -; CHECK-NEXT: ld2b.sx %s4, 848(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 416(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s23 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s23 -; CHECK-NEXT: ld2b.sx %s2, 856(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 412(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s22 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s22 -; CHECK-NEXT: ld2b.sx %s3, 864(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 408(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s21 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s21 -; CHECK-NEXT: ld2b.sx %s4, 872(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 404(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s20 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s20 -; CHECK-NEXT: ld2b.sx %s2, 880(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 400(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s19 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s19 -; CHECK-NEXT: ld2b.sx %s3, 888(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 396(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s18 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s18 -; CHECK-NEXT: ld2b.sx %s4, 896(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 392(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s63 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s63 -; CHECK-NEXT: ld2b.sx %s2, 904(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 388(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s62 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s62 -; CHECK-NEXT: ld2b.sx %s3, 912(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 384(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s61 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s61 -; CHECK-NEXT: ld2b.sx %s4, 920(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 380(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s60 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s60 -; CHECK-NEXT: ld2b.sx %s2, 928(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 376(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s59 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s59 -; CHECK-NEXT: ld2b.sx %s3, 936(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 372(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s58 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s58 -; CHECK-NEXT: ld2b.sx %s4, 944(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 368(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s57 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s57 -; CHECK-NEXT: ld2b.sx %s2, 952(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 364(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s56 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s56 -; CHECK-NEXT: ld2b.sx %s3, 960(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 360(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s55 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s55 -; CHECK-NEXT: ld2b.sx %s4, 968(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 356(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s54 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s54 -; CHECK-NEXT: ld2b.sx %s2, 976(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 352(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s53 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s53 -; CHECK-NEXT: ld2b.sx %s3, 984(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 348(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s52 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s52 -; CHECK-NEXT: ld2b.sx %s4, 992(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 344(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s51 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s51 -; CHECK-NEXT: ld2b.sx %s2, 1000(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 340(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s50 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s50 -; CHECK-NEXT: ld2b.sx %s3, 1008(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 336(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s49 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s49 -; CHECK-NEXT: ld2b.sx %s4, 1016(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 332(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s48 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s48 -; CHECK-NEXT: ld2b.sx %s2, 1024(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 328(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s47 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s47 -; CHECK-NEXT: ld2b.sx %s3, 1032(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 324(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s46 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s46 -; CHECK-NEXT: ld2b.sx %s4, 1040(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 320(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s45 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s45 -; CHECK-NEXT: ld2b.sx %s2, 1048(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 316(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s44 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s44 -; CHECK-NEXT: ld2b.sx %s3, 1056(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 312(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s43 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s43 -; CHECK-NEXT: ld2b.sx %s4, 1064(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 308(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s3, %s42 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s42 -; CHECK-NEXT: ld2b.sx %s2, 1072(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 304(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s4, %s41 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s41 -; CHECK-NEXT: ld2b.sx %s3, 2104(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 300(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s1, %s2, %s30 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s30 -; CHECK-NEXT: ld2b.sx %s4, 1080(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 296(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2112(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1088(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 292(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2120(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1096(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 288(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2128(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1104(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 284(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2136(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1112(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 280(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2144(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1120(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 276(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2152(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1128(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 272(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2160(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1136(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 268(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2168(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1144(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 264(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2176(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1152(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 260(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2184(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1160(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 256(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2192(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1168(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 252(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2200(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1176(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 248(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2208(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1184(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 244(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2216(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1192(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 240(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2224(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1200(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 236(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2232(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1208(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 232(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2240(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1216(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 228(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2248(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1224(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 224(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2256(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1232(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 220(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2264(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1240(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 216(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2272(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1248(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 212(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2280(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1256(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 208(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2288(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1264(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 204(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2296(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1272(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 200(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2304(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1280(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 196(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2312(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1288(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 192(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2320(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1296(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s3, %s4, %s3 -; CHECK-NEXT: stl %s3, 188(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2328(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1304(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 184(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.sx %s1, 2336(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1312(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s61, %s4, %s3 -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2344(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1320(, %s11) -; CHECK-NEXT: subs.w.sx %s60, %s2, %s1 -; CHECK-NEXT: ld2b.sx %s1, 2352(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1328(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s59, %s4, %s3 -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2360(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1336(, %s11) -; CHECK-NEXT: subs.w.sx %s58, %s2, %s1 -; CHECK-NEXT: ld2b.sx %s1, 2368(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1344(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s57, %s4, %s3 -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2376(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1352(, %s11) -; CHECK-NEXT: subs.w.sx %s56, %s2, %s1 -; CHECK-NEXT: ld2b.sx %s1, 2384(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1360(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s55, %s4, %s3 -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2392(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1368(, %s11) -; CHECK-NEXT: subs.w.sx %s54, %s2, %s1 -; CHECK-NEXT: ld2b.sx %s1, 2400(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1376(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s53, %s4, %s3 -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2408(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1384(, %s11) -; CHECK-NEXT: subs.w.sx %s52, %s2, %s1 -; CHECK-NEXT: ld2b.sx %s1, 2416(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1392(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s51, %s4, %s3 -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2424(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1400(, %s11) -; CHECK-NEXT: subs.w.sx %s50, %s2, %s1 -; CHECK-NEXT: ld2b.sx %s1, 2432(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1408(, %s11) -; CHECK-NEXT: divs.w.sx %s5, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s5, %s3 -; CHECK-NEXT: subs.w.sx %s49, %s4, %s3 -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2440(, %s11) -; CHECK-NEXT: ld2b.sx %s4, 1416(, %s11) -; CHECK-NEXT: subs.w.sx %s48, %s2, %s1 -; CHECK-NEXT: ld2b.sx %s1, 2448(, %s11) -; CHECK-NEXT: ld2b.sx %s2, 1424(, %s11) -; CHECK-NEXT: divs.w.sx %s24, %s4, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s24, %s3 -; CHECK-NEXT: subs.w.sx %s47, %s4, %s3 -; CHECK-NEXT: divs.w.sx %s3, %s2, %s1 -; CHECK-NEXT: muls.w.sx %s3, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s24, 2456(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1432(, %s11) -; CHECK-NEXT: subs.w.sx %s46, %s2, %s3 -; CHECK-NEXT: ld2b.sx %s2, 2464(, %s11) -; CHECK-NEXT: ld2b.sx %s3, 1440(, %s11) -; CHECK-NEXT: divs.w.sx %s26, %s1, %s24 -; CHECK-NEXT: muls.w.sx %s24, %s26, %s24 -; CHECK-NEXT: subs.w.sx %s24, %s1, %s24 -; CHECK-NEXT: divs.w.sx %s1, %s3, %s2 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s2 -; CHECK-NEXT: ld2b.sx %s2, 2472(, %s11) -; CHECK-NEXT: ld2b.sx %s26, 1448(, %s11) -; CHECK-NEXT: subs.w.sx %s45, %s3, %s1 -; CHECK-NEXT: ld2b.sx %s3, 2480(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1456(, %s11) -; CHECK-NEXT: divs.w.sx %s28, %s26, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s28, %s2 -; CHECK-NEXT: subs.w.sx %s44, %s26, %s2 -; CHECK-NEXT: divs.w.sx %s26, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s26, %s3 -; CHECK-NEXT: ld2b.sx %s26, 2488(, %s11) -; CHECK-NEXT: ld2b.sx %s28, 1464(, %s11) -; CHECK-NEXT: subs.w.sx %s43, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2496(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1472(, %s11) -; CHECK-NEXT: divs.w.sx %s30, %s28, %s26 -; CHECK-NEXT: muls.w.sx %s26, %s30, %s26 -; CHECK-NEXT: subs.w.sx %s26, %s28, %s26 -; CHECK-NEXT: divs.w.sx %s28, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s28, %s3 -; CHECK-NEXT: ld2b.sx %s28, 2504(, %s11) -; CHECK-NEXT: ld2b.sx %s30, 1480(, %s11) -; CHECK-NEXT: subs.w.sx %s42, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2512(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1488(, %s11) -; CHECK-NEXT: divs.w.sx %s32, %s30, %s28 -; CHECK-NEXT: muls.w.sx %s28, %s32, %s28 -; CHECK-NEXT: subs.w.sx %s28, %s30, %s28 -; CHECK-NEXT: divs.w.sx %s30, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s30, %s3 -; CHECK-NEXT: ld2b.sx %s30, 2520(, %s11) -; CHECK-NEXT: ld2b.sx %s32, 1496(, %s11) -; CHECK-NEXT: subs.w.sx %s41, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2528(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1504(, %s11) -; CHECK-NEXT: divs.w.sx %s33, %s32, %s30 -; CHECK-NEXT: muls.w.sx %s30, %s33, %s30 -; CHECK-NEXT: subs.w.sx %s30, %s32, %s30 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2536(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1512(, %s11) -; CHECK-NEXT: subs.w.sx %s40, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2544(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1520(, %s11) -; CHECK-NEXT: divs.w.sx %s31, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s31, %s31, %s32 -; CHECK-NEXT: subs.w.sx %s31, %s33, %s31 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2552(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1528(, %s11) -; CHECK-NEXT: subs.w.sx %s39, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2560(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1536(, %s11) -; CHECK-NEXT: divs.w.sx %s29, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s29, %s29, %s32 -; CHECK-NEXT: subs.w.sx %s29, %s33, %s29 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2568(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1544(, %s11) -; CHECK-NEXT: subs.w.sx %s38, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2576(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1552(, %s11) -; CHECK-NEXT: divs.w.sx %s27, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s27, %s27, %s32 -; CHECK-NEXT: subs.w.sx %s27, %s33, %s27 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2584(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1560(, %s11) -; CHECK-NEXT: subs.w.sx %s37, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2592(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1568(, %s11) -; CHECK-NEXT: divs.w.sx %s25, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s25, %s25, %s32 -; CHECK-NEXT: subs.w.sx %s25, %s33, %s25 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2600(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1576(, %s11) -; CHECK-NEXT: subs.w.sx %s36, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2608(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1584(, %s11) -; CHECK-NEXT: divs.w.sx %s23, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s23, %s23, %s32 -; CHECK-NEXT: subs.w.sx %s23, %s33, %s23 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2616(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1592(, %s11) -; CHECK-NEXT: subs.w.sx %s35, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2624(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1600(, %s11) -; CHECK-NEXT: divs.w.sx %s22, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s22, %s22, %s32 -; CHECK-NEXT: subs.w.sx %s22, %s33, %s22 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2632(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1608(, %s11) -; CHECK-NEXT: subs.w.sx %s34, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2640(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1616(, %s11) -; CHECK-NEXT: divs.w.sx %s21, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s21, %s21, %s32 -; CHECK-NEXT: subs.w.sx %s21, %s33, %s21 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2648(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1624(, %s11) -; CHECK-NEXT: subs.w.sx %s7, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2656(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1632(, %s11) -; CHECK-NEXT: divs.w.sx %s20, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s20, %s20, %s32 -; CHECK-NEXT: subs.w.sx %s20, %s33, %s20 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2664(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1640(, %s11) -; CHECK-NEXT: subs.w.sx %s6, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2672(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1648(, %s11) -; CHECK-NEXT: divs.w.sx %s19, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s19, %s19, %s32 -; CHECK-NEXT: subs.w.sx %s19, %s33, %s19 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2680(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1656(, %s11) -; CHECK-NEXT: subs.w.sx %s5, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2688(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1664(, %s11) -; CHECK-NEXT: divs.w.sx %s18, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s18, %s18, %s32 -; CHECK-NEXT: subs.w.sx %s18, %s33, %s18 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2696(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1672(, %s11) -; CHECK-NEXT: subs.w.sx %s4, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2704(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1680(, %s11) -; CHECK-NEXT: divs.w.sx %s63, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s63, %s63, %s32 -; CHECK-NEXT: subs.w.sx %s63, %s33, %s63 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.sx %s32, 2712(, %s11) -; CHECK-NEXT: ld2b.sx %s33, 1688(, %s11) -; CHECK-NEXT: subs.w.sx %s2, %s1, %s3 -; CHECK-NEXT: ld2b.sx %s3, 2720(, %s11) -; CHECK-NEXT: ld2b.sx %s1, 1696(, %s11) -; CHECK-NEXT: divs.w.sx %s62, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s62, %s62, %s32 -; CHECK-NEXT: subs.w.sx %s62, %s33, %s62 -; CHECK-NEXT: divs.w.sx %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: subs.w.sx %s1, %s1, %s3 -; CHECK-NEXT: st2b %s1, 254(, %s0) -; CHECK-NEXT: st2b %s62, 252(, %s0) -; CHECK-NEXT: st2b %s2, 250(, %s0) -; CHECK-NEXT: st2b %s63, 248(, %s0) -; CHECK-NEXT: st2b %s4, 246(, %s0) -; CHECK-NEXT: st2b %s18, 244(, %s0) -; CHECK-NEXT: st2b %s5, 242(, %s0) -; CHECK-NEXT: st2b %s19, 240(, %s0) -; CHECK-NEXT: st2b %s6, 238(, %s0) -; CHECK-NEXT: st2b %s20, 236(, %s0) -; CHECK-NEXT: st2b %s7, 234(, %s0) -; CHECK-NEXT: st2b %s21, 232(, %s0) -; CHECK-NEXT: st2b %s34, 230(, %s0) -; CHECK-NEXT: st2b %s22, 228(, %s0) -; CHECK-NEXT: st2b %s35, 226(, %s0) -; CHECK-NEXT: st2b %s23, 224(, %s0) -; CHECK-NEXT: st2b %s36, 222(, %s0) -; CHECK-NEXT: st2b %s25, 220(, %s0) -; CHECK-NEXT: st2b %s37, 218(, %s0) -; CHECK-NEXT: st2b %s27, 216(, %s0) -; CHECK-NEXT: st2b %s38, 214(, %s0) -; CHECK-NEXT: st2b %s29, 212(, %s0) -; CHECK-NEXT: st2b %s39, 210(, %s0) -; CHECK-NEXT: st2b %s31, 208(, %s0) -; CHECK-NEXT: st2b %s40, 206(, %s0) -; CHECK-NEXT: st2b %s30, 204(, %s0) -; CHECK-NEXT: st2b %s41, 202(, %s0) -; CHECK-NEXT: st2b %s28, 200(, %s0) -; CHECK-NEXT: st2b %s42, 198(, %s0) -; CHECK-NEXT: st2b %s26, 196(, %s0) -; CHECK-NEXT: st2b %s43, 194(, %s0) -; CHECK-NEXT: st2b %s44, 192(, %s0) -; CHECK-NEXT: st2b %s45, 190(, %s0) -; CHECK-NEXT: st2b %s24, 188(, %s0) -; CHECK-NEXT: st2b %s46, 186(, %s0) -; CHECK-NEXT: st2b %s47, 184(, %s0) -; CHECK-NEXT: st2b %s48, 182(, %s0) -; CHECK-NEXT: st2b %s49, 180(, %s0) -; CHECK-NEXT: st2b %s50, 178(, %s0) -; CHECK-NEXT: st2b %s51, 176(, %s0) -; CHECK-NEXT: st2b %s52, 174(, %s0) -; CHECK-NEXT: st2b %s53, 172(, %s0) -; CHECK-NEXT: st2b %s54, 170(, %s0) -; CHECK-NEXT: st2b %s55, 168(, %s0) -; CHECK-NEXT: st2b %s56, 166(, %s0) -; CHECK-NEXT: st2b %s57, 164(, %s0) -; CHECK-NEXT: st2b %s58, 162(, %s0) -; CHECK-NEXT: st2b %s59, 160(, %s0) -; CHECK-NEXT: st2b %s60, 158(, %s0) -; CHECK-NEXT: st2b %s61, 156(, %s0) -; CHECK-NEXT: ldl.sx %s1, 184(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 154(, %s0) -; CHECK-NEXT: ldl.sx %s1, 188(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 152(, %s0) -; CHECK-NEXT: ldl.sx %s1, 192(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 150(, %s0) -; CHECK-NEXT: ldl.sx %s1, 196(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 148(, %s0) -; CHECK-NEXT: ldl.sx %s1, 200(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 146(, %s0) -; CHECK-NEXT: ldl.sx %s1, 204(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 144(, %s0) -; CHECK-NEXT: ldl.sx %s1, 208(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 142(, %s0) -; CHECK-NEXT: ldl.sx %s1, 212(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 140(, %s0) -; CHECK-NEXT: ldl.sx %s1, 216(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 138(, %s0) -; CHECK-NEXT: ldl.sx %s1, 220(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 136(, %s0) -; CHECK-NEXT: ldl.sx %s1, 224(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 134(, %s0) -; CHECK-NEXT: ldl.sx %s1, 228(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 132(, %s0) -; CHECK-NEXT: ldl.sx %s1, 232(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 130(, %s0) -; CHECK-NEXT: ldl.sx %s1, 236(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 128(, %s0) -; CHECK-NEXT: ldl.sx %s1, 240(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 126(, %s0) -; CHECK-NEXT: ldl.sx %s1, 244(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 124(, %s0) -; CHECK-NEXT: ldl.sx %s1, 248(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 122(, %s0) -; CHECK-NEXT: ldl.sx %s1, 252(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 120(, %s0) -; CHECK-NEXT: ldl.sx %s1, 256(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 118(, %s0) -; CHECK-NEXT: ldl.sx %s1, 260(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 116(, %s0) -; CHECK-NEXT: ldl.sx %s1, 264(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 114(, %s0) -; CHECK-NEXT: ldl.sx %s1, 268(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 112(, %s0) -; CHECK-NEXT: ldl.sx %s1, 272(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 110(, %s0) -; CHECK-NEXT: ldl.sx %s1, 276(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 108(, %s0) -; CHECK-NEXT: ldl.sx %s1, 280(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 106(, %s0) -; CHECK-NEXT: ldl.sx %s1, 284(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 104(, %s0) -; CHECK-NEXT: ldl.sx %s1, 288(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 102(, %s0) -; CHECK-NEXT: ldl.sx %s1, 292(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 100(, %s0) -; CHECK-NEXT: ldl.sx %s1, 296(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 98(, %s0) -; CHECK-NEXT: ldl.sx %s1, 300(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 96(, %s0) -; CHECK-NEXT: ldl.sx %s1, 304(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 94(, %s0) -; CHECK-NEXT: ldl.sx %s1, 308(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 92(, %s0) -; CHECK-NEXT: ldl.sx %s1, 312(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 90(, %s0) -; CHECK-NEXT: ldl.sx %s1, 316(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 88(, %s0) -; CHECK-NEXT: ldl.sx %s1, 320(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 86(, %s0) -; CHECK-NEXT: ldl.sx %s1, 324(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 84(, %s0) -; CHECK-NEXT: ldl.sx %s1, 328(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 82(, %s0) -; CHECK-NEXT: ldl.sx %s1, 332(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 80(, %s0) -; CHECK-NEXT: ldl.sx %s1, 336(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 78(, %s0) -; CHECK-NEXT: ldl.sx %s1, 340(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 76(, %s0) -; CHECK-NEXT: ldl.sx %s1, 344(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 74(, %s0) -; CHECK-NEXT: ldl.sx %s1, 348(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 72(, %s0) -; CHECK-NEXT: ldl.sx %s1, 352(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 70(, %s0) -; CHECK-NEXT: ldl.sx %s1, 356(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 68(, %s0) -; CHECK-NEXT: ldl.sx %s1, 360(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 66(, %s0) -; CHECK-NEXT: ldl.sx %s1, 364(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 64(, %s0) -; CHECK-NEXT: ldl.sx %s1, 368(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 62(, %s0) -; CHECK-NEXT: ldl.sx %s1, 372(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 60(, %s0) -; CHECK-NEXT: ldl.sx %s1, 376(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 58(, %s0) -; CHECK-NEXT: ldl.sx %s1, 380(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 56(, %s0) -; CHECK-NEXT: ldl.sx %s1, 384(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 54(, %s0) -; CHECK-NEXT: ldl.sx %s1, 388(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 52(, %s0) -; CHECK-NEXT: ldl.sx %s1, 392(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 50(, %s0) -; CHECK-NEXT: ldl.sx %s1, 396(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 48(, %s0) -; CHECK-NEXT: ldl.sx %s1, 400(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 46(, %s0) -; CHECK-NEXT: ldl.sx %s1, 404(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 44(, %s0) -; CHECK-NEXT: ldl.sx %s1, 408(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 42(, %s0) -; CHECK-NEXT: ldl.sx %s1, 412(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 40(, %s0) -; CHECK-NEXT: ldl.sx %s1, 416(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 38(, %s0) -; CHECK-NEXT: ldl.sx %s1, 420(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 36(, %s0) -; CHECK-NEXT: ldl.sx %s1, 424(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 34(, %s0) -; CHECK-NEXT: ldl.sx %s1, 428(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 32(, %s0) -; CHECK-NEXT: ldl.sx %s1, 432(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 30(, %s0) -; CHECK-NEXT: ldl.sx %s1, 436(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 28(, %s0) -; CHECK-NEXT: ldl.sx %s1, 440(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 26(, %s0) -; CHECK-NEXT: ldl.sx %s1, 444(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 24(, %s0) -; CHECK-NEXT: ldl.sx %s1, 448(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 22(, %s0) -; CHECK-NEXT: ldl.sx %s1, 452(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 20(, %s0) -; CHECK-NEXT: ldl.sx %s1, 456(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 18(, %s0) -; CHECK-NEXT: ldl.sx %s1, 460(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 16(, %s0) -; CHECK-NEXT: ldl.sx %s1, 464(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 14(, %s0) -; CHECK-NEXT: ldl.sx %s1, 468(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 12(, %s0) -; CHECK-NEXT: ldl.sx %s1, 472(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 10(, %s0) -; CHECK-NEXT: ldl.sx %s1, 476(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 8(, %s0) -; CHECK-NEXT: ldl.sx %s1, 480(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 6(, %s0) -; CHECK-NEXT: ldl.sx %s1, 484(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 4(, %s0) -; CHECK-NEXT: ldl.sx %s1, 488(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 2(, %s0) -; CHECK-NEXT: ldl.sx %s1, 492(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, (, %s0) -; CHECK-NEXT: ld %s33, 664(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s32, 656(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s31, 648(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s30, 640(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s29, 632(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s28, 624(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s27, 616(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s26, 608(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s25, 600(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s24, 592(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s23, 584(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s22, 576(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s21, 568(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s20, 560(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s19, 552(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s18, 544(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: or %s11, 0, %s9 -; CHECK-NEXT: ld %s10, 8(, %s11) -; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lea %s1, 65535 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvand.lo %v1, %s1, %v1 +; CHECK-NEXT: or %s2, 16, (0)1 +; CHECK-NEXT: pvsll.lo %v1, %v1, %s2 +; CHECK-NEXT: pvsra.lo %v1, %v1, %s2 +; CHECK-NEXT: pvand.lo %v0, %s1, %v0 +; CHECK-NEXT: pvsll.lo %v0, %v0, %s2 +; CHECK-NEXT: pvsra.lo %v0, %v0, %s2 +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vdivs.w.sx %v2, %v0, %v1 +; CHECK-NEXT: vmuls.w.sx %v1, %v2, %v1 +; CHECK-NEXT: vsubs.w.sx %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %z = srem <128 x i16> %x, %y ret <128 x i16> %z diff --git a/llvm/test/CodeGen/VE/Vector/vec_urem.ll b/llvm/test/CodeGen/VE/Vector/vec_urem.ll index 51e95e54579e..c78c716b4f4a 100644 --- a/llvm/test/CodeGen/VE/Vector/vec_urem.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_urem.ll @@ -143,980 +143,20 @@ define fastcc <256 x i16> @urem_vv_v256i16(<256 x i16> %x, <256 x i16> %y) { define fastcc <128 x i16> @urem_vv_v128i16(<128 x i16> %x, <128 x i16> %y) { ; CHECK-LABEL: urem_vv_v128i16: ; CHECK: # %bb.0: -; CHECK-NEXT: st %s9, (, %s11) -; CHECK-NEXT: st %s10, 8(, %s11) -; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s11, -496(, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB8_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ld %s61, 24(, %s14) -; CHECK-NEXT: or %s62, 0, %s0 -; CHECK-NEXT: lea %s63, 315 -; CHECK-NEXT: shm.l %s63, (%s61) -; CHECK-NEXT: shm.l %s8, 8(%s61) -; CHECK-NEXT: shm.l %s11, 16(%s61) -; CHECK-NEXT: monc -; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: st %s18, 544(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s19, 552(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s20, 560(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s21, 568(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s22, 576(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s23, 584(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s24, 592(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s25, 600(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s26, 608(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s27, 616(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s28, 624(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s29, 632(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s30, 640(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s31, 648(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s32, 656(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: st %s33, 664(, %s11) # 8-byte Folded Spill -; CHECK-NEXT: and %s7, %s7, (48)0 -; CHECK-NEXT: and %s6, %s6, (48)0 -; CHECK-NEXT: and %s5, %s5, (48)0 -; CHECK-NEXT: and %s4, %s4, (48)0 -; CHECK-NEXT: and %s3, %s3, (48)0 -; CHECK-NEXT: and %s2, %s2, (48)0 -; CHECK-NEXT: and %s1, %s1, (48)0 -; CHECK-NEXT: ld2b.zx %s34, 2096(, %s11) -; CHECK-NEXT: ld2b.zx %s35, 2088(, %s11) -; CHECK-NEXT: ld2b.zx %s36, 2080(, %s11) -; CHECK-NEXT: ld2b.zx %s37, 2072(, %s11) -; CHECK-NEXT: ld2b.zx %s38, 2064(, %s11) -; CHECK-NEXT: ld2b.zx %s39, 2056(, %s11) -; CHECK-NEXT: ld2b.zx %s40, 2048(, %s11) -; CHECK-NEXT: ld2b.zx %s41, 2040(, %s11) -; CHECK-NEXT: ld2b.zx %s42, 2032(, %s11) -; CHECK-NEXT: ld2b.zx %s43, 2024(, %s11) -; CHECK-NEXT: ld2b.zx %s44, 2016(, %s11) -; CHECK-NEXT: ld2b.zx %s45, 2008(, %s11) -; CHECK-NEXT: ld2b.zx %s46, 2000(, %s11) -; CHECK-NEXT: ld2b.zx %s47, 1992(, %s11) -; CHECK-NEXT: ld2b.zx %s48, 1984(, %s11) -; CHECK-NEXT: ld2b.zx %s49, 1976(, %s11) -; CHECK-NEXT: ld2b.zx %s50, 1968(, %s11) -; CHECK-NEXT: ld2b.zx %s51, 1960(, %s11) -; CHECK-NEXT: ld2b.zx %s52, 1952(, %s11) -; CHECK-NEXT: ld2b.zx %s53, 1944(, %s11) -; CHECK-NEXT: ld2b.zx %s54, 1936(, %s11) -; CHECK-NEXT: ld2b.zx %s55, 1928(, %s11) -; CHECK-NEXT: ld2b.zx %s56, 1920(, %s11) -; CHECK-NEXT: ld2b.zx %s57, 1912(, %s11) -; CHECK-NEXT: ld2b.zx %s58, 1904(, %s11) -; CHECK-NEXT: ld2b.zx %s59, 1896(, %s11) -; CHECK-NEXT: ld2b.zx %s60, 1888(, %s11) -; CHECK-NEXT: ld2b.zx %s61, 1880(, %s11) -; CHECK-NEXT: ld2b.zx %s62, 1872(, %s11) -; CHECK-NEXT: ld2b.zx %s63, 1864(, %s11) -; CHECK-NEXT: ld2b.zx %s18, 1856(, %s11) -; CHECK-NEXT: ld2b.zx %s19, 1848(, %s11) -; CHECK-NEXT: ld2b.zx %s20, 1840(, %s11) -; CHECK-NEXT: ld2b.zx %s21, 1832(, %s11) -; CHECK-NEXT: ld2b.zx %s22, 1824(, %s11) -; CHECK-NEXT: ld2b.zx %s23, 1816(, %s11) -; CHECK-NEXT: ld2b.zx %s24, 1808(, %s11) -; CHECK-NEXT: ld2b.zx %s25, 1800(, %s11) -; CHECK-NEXT: ld2b.zx %s26, 1792(, %s11) -; CHECK-NEXT: ld2b.zx %s27, 1784(, %s11) -; CHECK-NEXT: ld2b.zx %s28, 1776(, %s11) -; CHECK-NEXT: ld2b.zx %s29, 1704(, %s11) -; CHECK-NEXT: ld2b.zx %s30, 1768(, %s11) -; CHECK-NEXT: ld2b.zx %s31, 1760(, %s11) -; CHECK-NEXT: ld2b.zx %s32, 1712(, %s11) -; CHECK-NEXT: divu.w %s33, %s1, %s29 -; CHECK-NEXT: muls.w.sx %s29, %s33, %s29 -; CHECK-NEXT: ld2b.zx %s33, 1720(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s1, %s29 -; CHECK-NEXT: stl %s1, 492(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s32 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s32 -; CHECK-NEXT: ld2b.zx %s29, 1728(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 488(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s33 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s33 -; CHECK-NEXT: ld2b.zx %s2, 1736(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 484(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s29 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s29 -; CHECK-NEXT: ld2b.zx %s3, 1744(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 480(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s5, %s2 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s2 -; CHECK-NEXT: ld2b.zx %s2, 1752(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s5, %s1 -; CHECK-NEXT: stl %s1, 476(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s6, %s3 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 736(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s6, %s1 -; CHECK-NEXT: stl %s1, 472(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s7, %s2 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s2 -; CHECK-NEXT: ld2b.zx %s2, 744(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s7, %s1 -; CHECK-NEXT: stl %s1, 468(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s31 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s31 -; CHECK-NEXT: ld2b.zx %s4, 752(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 464(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s30 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s30 -; CHECK-NEXT: ld2b.zx %s3, 760(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 460(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s28 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s28 -; CHECK-NEXT: ld2b.zx %s2, 768(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 456(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s27 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s27 -; CHECK-NEXT: ld2b.zx %s4, 776(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 452(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s26 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s26 -; CHECK-NEXT: ld2b.zx %s3, 784(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 448(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s25 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s25 -; CHECK-NEXT: ld2b.zx %s2, 792(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 444(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s24 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s24 -; CHECK-NEXT: ld2b.zx %s4, 800(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 440(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s23 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s23 -; CHECK-NEXT: ld2b.zx %s3, 808(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 436(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s22 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s22 -; CHECK-NEXT: ld2b.zx %s2, 816(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 432(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s21 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s21 -; CHECK-NEXT: ld2b.zx %s4, 824(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 428(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s20 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s20 -; CHECK-NEXT: ld2b.zx %s3, 832(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 424(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s19 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s19 -; CHECK-NEXT: ld2b.zx %s2, 840(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 420(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s18 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s18 -; CHECK-NEXT: ld2b.zx %s4, 848(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 416(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s63 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s63 -; CHECK-NEXT: ld2b.zx %s3, 856(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 412(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s62 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s62 -; CHECK-NEXT: ld2b.zx %s2, 864(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 408(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s61 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s61 -; CHECK-NEXT: ld2b.zx %s4, 872(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 404(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s60 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s60 -; CHECK-NEXT: ld2b.zx %s3, 880(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 400(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s59 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s59 -; CHECK-NEXT: ld2b.zx %s2, 888(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 396(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s58 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s58 -; CHECK-NEXT: ld2b.zx %s4, 896(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 392(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s57 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s57 -; CHECK-NEXT: ld2b.zx %s3, 904(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 388(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s56 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s56 -; CHECK-NEXT: ld2b.zx %s2, 912(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 384(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s55 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s55 -; CHECK-NEXT: ld2b.zx %s4, 920(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 380(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s54 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s54 -; CHECK-NEXT: ld2b.zx %s3, 928(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 376(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s53 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s53 -; CHECK-NEXT: ld2b.zx %s2, 936(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 372(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s52 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s52 -; CHECK-NEXT: ld2b.zx %s4, 944(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 368(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s51 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s51 -; CHECK-NEXT: ld2b.zx %s3, 952(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 364(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s50 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s50 -; CHECK-NEXT: ld2b.zx %s2, 960(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 360(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s49 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s49 -; CHECK-NEXT: ld2b.zx %s4, 968(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 356(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s48 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s48 -; CHECK-NEXT: ld2b.zx %s3, 976(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 352(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s47 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s47 -; CHECK-NEXT: ld2b.zx %s2, 984(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 348(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s46 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s46 -; CHECK-NEXT: ld2b.zx %s4, 992(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 344(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s45 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s45 -; CHECK-NEXT: ld2b.zx %s3, 1000(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 340(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s44 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s44 -; CHECK-NEXT: ld2b.zx %s2, 1008(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 336(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s43 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s43 -; CHECK-NEXT: ld2b.zx %s4, 1016(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 332(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s42 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s42 -; CHECK-NEXT: ld2b.zx %s3, 1024(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 328(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s41 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s41 -; CHECK-NEXT: ld2b.zx %s2, 1032(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 324(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s40 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s40 -; CHECK-NEXT: ld2b.zx %s4, 1040(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 320(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s39 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s39 -; CHECK-NEXT: ld2b.zx %s3, 1048(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 316(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s38 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s38 -; CHECK-NEXT: ld2b.zx %s2, 1056(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 312(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s37 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s37 -; CHECK-NEXT: ld2b.zx %s4, 1064(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 308(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s2, %s36 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s36 -; CHECK-NEXT: ld2b.zx %s3, 1072(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s2, %s1 -; CHECK-NEXT: stl %s1, 304(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s4, %s35 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s35 -; CHECK-NEXT: ld2b.zx %s2, 2104(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s4, %s1 -; CHECK-NEXT: stl %s1, 300(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s1, %s3, %s34 -; CHECK-NEXT: muls.w.sx %s1, %s1, %s34 -; CHECK-NEXT: ld2b.zx %s4, 1080(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 296(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2112(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1088(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 292(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2120(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1096(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 288(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2128(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1104(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 284(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2136(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1112(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 280(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2144(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1120(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 276(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2152(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1128(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 272(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2160(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1136(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 268(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2168(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1144(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 264(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2176(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1152(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 260(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2184(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1160(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 256(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2192(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1168(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 252(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2200(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1176(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 248(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2208(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1184(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 244(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2216(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1192(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 240(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2224(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1200(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 236(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2232(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1208(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 232(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2240(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1216(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 228(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2248(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1224(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 224(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2256(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1232(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 220(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2264(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1240(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 216(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2272(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1248(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 212(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2280(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1256(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 208(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2288(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1264(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 204(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2296(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1272(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 200(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2304(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1280(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 196(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2312(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1288(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 192(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2320(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1296(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s2, %s4, %s2 -; CHECK-NEXT: stl %s2, 188(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2328(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1304(, %s11) -; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 -; CHECK-NEXT: stl %s1, 184(, %s11) # 4-byte Folded Spill -; CHECK-NEXT: ld2b.zx %s1, 2336(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1312(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s61, %s4, %s2 -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2344(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1320(, %s11) -; CHECK-NEXT: subs.w.sx %s60, %s3, %s1 -; CHECK-NEXT: ld2b.zx %s1, 2352(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1328(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s59, %s4, %s2 -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2360(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1336(, %s11) -; CHECK-NEXT: subs.w.sx %s58, %s3, %s1 -; CHECK-NEXT: ld2b.zx %s1, 2368(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1344(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s57, %s4, %s2 -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2376(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1352(, %s11) -; CHECK-NEXT: subs.w.sx %s56, %s3, %s1 -; CHECK-NEXT: ld2b.zx %s1, 2384(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1360(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s55, %s4, %s2 -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2392(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1368(, %s11) -; CHECK-NEXT: subs.w.sx %s54, %s3, %s1 -; CHECK-NEXT: ld2b.zx %s1, 2400(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1376(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s53, %s4, %s2 -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2408(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1384(, %s11) -; CHECK-NEXT: subs.w.sx %s52, %s3, %s1 -; CHECK-NEXT: ld2b.zx %s1, 2416(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1392(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s51, %s4, %s2 -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2424(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1400(, %s11) -; CHECK-NEXT: subs.w.sx %s50, %s3, %s1 -; CHECK-NEXT: ld2b.zx %s1, 2432(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1408(, %s11) -; CHECK-NEXT: divu.w %s5, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s5, %s2 -; CHECK-NEXT: subs.w.sx %s49, %s4, %s2 -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2440(, %s11) -; CHECK-NEXT: ld2b.zx %s4, 1416(, %s11) -; CHECK-NEXT: subs.w.sx %s48, %s3, %s1 -; CHECK-NEXT: ld2b.zx %s1, 2448(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1424(, %s11) -; CHECK-NEXT: divu.w %s24, %s4, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s24, %s2 -; CHECK-NEXT: subs.w.sx %s47, %s4, %s2 -; CHECK-NEXT: divu.w %s2, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 -; CHECK-NEXT: ld2b.zx %s2, 2456(, %s11) -; CHECK-NEXT: ld2b.zx %s24, 1432(, %s11) -; CHECK-NEXT: subs.w.sx %s46, %s3, %s1 -; CHECK-NEXT: ld2b.zx %s1, 2464(, %s11) -; CHECK-NEXT: ld2b.zx %s3, 1440(, %s11) -; CHECK-NEXT: divu.w %s26, %s24, %s2 -; CHECK-NEXT: muls.w.sx %s2, %s26, %s2 -; CHECK-NEXT: subs.w.sx %s45, %s24, %s2 -; CHECK-NEXT: divu.w %s24, %s3, %s1 -; CHECK-NEXT: muls.w.sx %s1, %s24, %s1 -; CHECK-NEXT: ld2b.zx %s24, 2472(, %s11) -; CHECK-NEXT: ld2b.zx %s26, 1448(, %s11) -; CHECK-NEXT: subs.w.sx %s44, %s3, %s1 -; CHECK-NEXT: ld2b.zx %s3, 2480(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1456(, %s11) -; CHECK-NEXT: divu.w %s28, %s26, %s24 -; CHECK-NEXT: muls.w.sx %s24, %s28, %s24 -; CHECK-NEXT: subs.w.sx %s24, %s26, %s24 -; CHECK-NEXT: divu.w %s26, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s26, %s3 -; CHECK-NEXT: ld2b.zx %s26, 2488(, %s11) -; CHECK-NEXT: ld2b.zx %s28, 1464(, %s11) -; CHECK-NEXT: subs.w.sx %s43, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2496(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1472(, %s11) -; CHECK-NEXT: divu.w %s30, %s28, %s26 -; CHECK-NEXT: muls.w.sx %s26, %s30, %s26 -; CHECK-NEXT: subs.w.sx %s26, %s28, %s26 -; CHECK-NEXT: divu.w %s28, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s28, %s3 -; CHECK-NEXT: ld2b.zx %s28, 2504(, %s11) -; CHECK-NEXT: ld2b.zx %s30, 1480(, %s11) -; CHECK-NEXT: subs.w.sx %s42, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2512(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1488(, %s11) -; CHECK-NEXT: divu.w %s32, %s30, %s28 -; CHECK-NEXT: muls.w.sx %s28, %s32, %s28 -; CHECK-NEXT: subs.w.sx %s28, %s30, %s28 -; CHECK-NEXT: divu.w %s30, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s30, %s3 -; CHECK-NEXT: ld2b.zx %s30, 2520(, %s11) -; CHECK-NEXT: ld2b.zx %s32, 1496(, %s11) -; CHECK-NEXT: subs.w.sx %s41, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2528(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1504(, %s11) -; CHECK-NEXT: divu.w %s33, %s32, %s30 -; CHECK-NEXT: muls.w.sx %s30, %s33, %s30 -; CHECK-NEXT: subs.w.sx %s30, %s32, %s30 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2536(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1512(, %s11) -; CHECK-NEXT: subs.w.sx %s40, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2544(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1520(, %s11) -; CHECK-NEXT: divu.w %s31, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s31, %s31, %s32 -; CHECK-NEXT: subs.w.sx %s31, %s33, %s31 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2552(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1528(, %s11) -; CHECK-NEXT: subs.w.sx %s39, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2560(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1536(, %s11) -; CHECK-NEXT: divu.w %s29, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s29, %s29, %s32 -; CHECK-NEXT: subs.w.sx %s29, %s33, %s29 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2568(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1544(, %s11) -; CHECK-NEXT: subs.w.sx %s38, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2576(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1552(, %s11) -; CHECK-NEXT: divu.w %s27, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s27, %s27, %s32 -; CHECK-NEXT: subs.w.sx %s27, %s33, %s27 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2584(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1560(, %s11) -; CHECK-NEXT: subs.w.sx %s37, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2592(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1568(, %s11) -; CHECK-NEXT: divu.w %s25, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s25, %s25, %s32 -; CHECK-NEXT: subs.w.sx %s25, %s33, %s25 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2600(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1576(, %s11) -; CHECK-NEXT: subs.w.sx %s36, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2608(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1584(, %s11) -; CHECK-NEXT: divu.w %s23, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s23, %s23, %s32 -; CHECK-NEXT: subs.w.sx %s23, %s33, %s23 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2616(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1592(, %s11) -; CHECK-NEXT: subs.w.sx %s35, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2624(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1600(, %s11) -; CHECK-NEXT: divu.w %s22, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s22, %s22, %s32 -; CHECK-NEXT: subs.w.sx %s22, %s33, %s22 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2632(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1608(, %s11) -; CHECK-NEXT: subs.w.sx %s34, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2640(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1616(, %s11) -; CHECK-NEXT: divu.w %s21, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s21, %s21, %s32 -; CHECK-NEXT: subs.w.sx %s21, %s33, %s21 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2648(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1624(, %s11) -; CHECK-NEXT: subs.w.sx %s7, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2656(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1632(, %s11) -; CHECK-NEXT: divu.w %s20, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s20, %s20, %s32 -; CHECK-NEXT: subs.w.sx %s20, %s33, %s20 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2664(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1640(, %s11) -; CHECK-NEXT: subs.w.sx %s6, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2672(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1648(, %s11) -; CHECK-NEXT: divu.w %s19, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s19, %s19, %s32 -; CHECK-NEXT: subs.w.sx %s19, %s33, %s19 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2680(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1656(, %s11) -; CHECK-NEXT: subs.w.sx %s5, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2688(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1664(, %s11) -; CHECK-NEXT: divu.w %s18, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s18, %s18, %s32 -; CHECK-NEXT: subs.w.sx %s18, %s33, %s18 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2696(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1672(, %s11) -; CHECK-NEXT: subs.w.sx %s4, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2704(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1680(, %s11) -; CHECK-NEXT: divu.w %s63, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s63, %s63, %s32 -; CHECK-NEXT: subs.w.sx %s63, %s33, %s63 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: ld2b.zx %s32, 2712(, %s11) -; CHECK-NEXT: ld2b.zx %s33, 1688(, %s11) -; CHECK-NEXT: subs.w.sx %s2, %s1, %s3 -; CHECK-NEXT: ld2b.zx %s3, 2720(, %s11) -; CHECK-NEXT: ld2b.zx %s1, 1696(, %s11) -; CHECK-NEXT: divu.w %s62, %s33, %s32 -; CHECK-NEXT: muls.w.sx %s62, %s62, %s32 -; CHECK-NEXT: subs.w.sx %s62, %s33, %s62 -; CHECK-NEXT: divu.w %s32, %s1, %s3 -; CHECK-NEXT: muls.w.sx %s3, %s32, %s3 -; CHECK-NEXT: subs.w.sx %s1, %s1, %s3 -; CHECK-NEXT: st2b %s1, 254(, %s0) -; CHECK-NEXT: st2b %s62, 252(, %s0) -; CHECK-NEXT: st2b %s2, 250(, %s0) -; CHECK-NEXT: st2b %s63, 248(, %s0) -; CHECK-NEXT: st2b %s4, 246(, %s0) -; CHECK-NEXT: st2b %s18, 244(, %s0) -; CHECK-NEXT: st2b %s5, 242(, %s0) -; CHECK-NEXT: st2b %s19, 240(, %s0) -; CHECK-NEXT: st2b %s6, 238(, %s0) -; CHECK-NEXT: st2b %s20, 236(, %s0) -; CHECK-NEXT: st2b %s7, 234(, %s0) -; CHECK-NEXT: st2b %s21, 232(, %s0) -; CHECK-NEXT: st2b %s34, 230(, %s0) -; CHECK-NEXT: st2b %s22, 228(, %s0) -; CHECK-NEXT: st2b %s35, 226(, %s0) -; CHECK-NEXT: st2b %s23, 224(, %s0) -; CHECK-NEXT: st2b %s36, 222(, %s0) -; CHECK-NEXT: st2b %s25, 220(, %s0) -; CHECK-NEXT: st2b %s37, 218(, %s0) -; CHECK-NEXT: st2b %s27, 216(, %s0) -; CHECK-NEXT: st2b %s38, 214(, %s0) -; CHECK-NEXT: st2b %s29, 212(, %s0) -; CHECK-NEXT: st2b %s39, 210(, %s0) -; CHECK-NEXT: st2b %s31, 208(, %s0) -; CHECK-NEXT: st2b %s40, 206(, %s0) -; CHECK-NEXT: st2b %s30, 204(, %s0) -; CHECK-NEXT: st2b %s41, 202(, %s0) -; CHECK-NEXT: st2b %s28, 200(, %s0) -; CHECK-NEXT: st2b %s42, 198(, %s0) -; CHECK-NEXT: st2b %s26, 196(, %s0) -; CHECK-NEXT: st2b %s43, 194(, %s0) -; CHECK-NEXT: st2b %s24, 192(, %s0) -; CHECK-NEXT: st2b %s44, 190(, %s0) -; CHECK-NEXT: st2b %s45, 188(, %s0) -; CHECK-NEXT: st2b %s46, 186(, %s0) -; CHECK-NEXT: st2b %s47, 184(, %s0) -; CHECK-NEXT: st2b %s48, 182(, %s0) -; CHECK-NEXT: st2b %s49, 180(, %s0) -; CHECK-NEXT: st2b %s50, 178(, %s0) -; CHECK-NEXT: st2b %s51, 176(, %s0) -; CHECK-NEXT: st2b %s52, 174(, %s0) -; CHECK-NEXT: st2b %s53, 172(, %s0) -; CHECK-NEXT: st2b %s54, 170(, %s0) -; CHECK-NEXT: st2b %s55, 168(, %s0) -; CHECK-NEXT: st2b %s56, 166(, %s0) -; CHECK-NEXT: st2b %s57, 164(, %s0) -; CHECK-NEXT: st2b %s58, 162(, %s0) -; CHECK-NEXT: st2b %s59, 160(, %s0) -; CHECK-NEXT: st2b %s60, 158(, %s0) -; CHECK-NEXT: st2b %s61, 156(, %s0) -; CHECK-NEXT: ldl.sx %s1, 184(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 154(, %s0) -; CHECK-NEXT: ldl.sx %s1, 188(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 152(, %s0) -; CHECK-NEXT: ldl.sx %s1, 192(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 150(, %s0) -; CHECK-NEXT: ldl.sx %s1, 196(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 148(, %s0) -; CHECK-NEXT: ldl.sx %s1, 200(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 146(, %s0) -; CHECK-NEXT: ldl.sx %s1, 204(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 144(, %s0) -; CHECK-NEXT: ldl.sx %s1, 208(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 142(, %s0) -; CHECK-NEXT: ldl.sx %s1, 212(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 140(, %s0) -; CHECK-NEXT: ldl.sx %s1, 216(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 138(, %s0) -; CHECK-NEXT: ldl.sx %s1, 220(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 136(, %s0) -; CHECK-NEXT: ldl.sx %s1, 224(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 134(, %s0) -; CHECK-NEXT: ldl.sx %s1, 228(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 132(, %s0) -; CHECK-NEXT: ldl.sx %s1, 232(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 130(, %s0) -; CHECK-NEXT: ldl.sx %s1, 236(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 128(, %s0) -; CHECK-NEXT: ldl.sx %s1, 240(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 126(, %s0) -; CHECK-NEXT: ldl.sx %s1, 244(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 124(, %s0) -; CHECK-NEXT: ldl.sx %s1, 248(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 122(, %s0) -; CHECK-NEXT: ldl.sx %s1, 252(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 120(, %s0) -; CHECK-NEXT: ldl.sx %s1, 256(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 118(, %s0) -; CHECK-NEXT: ldl.sx %s1, 260(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 116(, %s0) -; CHECK-NEXT: ldl.sx %s1, 264(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 114(, %s0) -; CHECK-NEXT: ldl.sx %s1, 268(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 112(, %s0) -; CHECK-NEXT: ldl.sx %s1, 272(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 110(, %s0) -; CHECK-NEXT: ldl.sx %s1, 276(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 108(, %s0) -; CHECK-NEXT: ldl.sx %s1, 280(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 106(, %s0) -; CHECK-NEXT: ldl.sx %s1, 284(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 104(, %s0) -; CHECK-NEXT: ldl.sx %s1, 288(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 102(, %s0) -; CHECK-NEXT: ldl.sx %s1, 292(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 100(, %s0) -; CHECK-NEXT: ldl.sx %s1, 296(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 98(, %s0) -; CHECK-NEXT: ldl.sx %s1, 300(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 96(, %s0) -; CHECK-NEXT: ldl.sx %s1, 304(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 94(, %s0) -; CHECK-NEXT: ldl.sx %s1, 308(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 92(, %s0) -; CHECK-NEXT: ldl.sx %s1, 312(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 90(, %s0) -; CHECK-NEXT: ldl.sx %s1, 316(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 88(, %s0) -; CHECK-NEXT: ldl.sx %s1, 320(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 86(, %s0) -; CHECK-NEXT: ldl.sx %s1, 324(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 84(, %s0) -; CHECK-NEXT: ldl.sx %s1, 328(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 82(, %s0) -; CHECK-NEXT: ldl.sx %s1, 332(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 80(, %s0) -; CHECK-NEXT: ldl.sx %s1, 336(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 78(, %s0) -; CHECK-NEXT: ldl.sx %s1, 340(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 76(, %s0) -; CHECK-NEXT: ldl.sx %s1, 344(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 74(, %s0) -; CHECK-NEXT: ldl.sx %s1, 348(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 72(, %s0) -; CHECK-NEXT: ldl.sx %s1, 352(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 70(, %s0) -; CHECK-NEXT: ldl.sx %s1, 356(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 68(, %s0) -; CHECK-NEXT: ldl.sx %s1, 360(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 66(, %s0) -; CHECK-NEXT: ldl.sx %s1, 364(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 64(, %s0) -; CHECK-NEXT: ldl.sx %s1, 368(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 62(, %s0) -; CHECK-NEXT: ldl.sx %s1, 372(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 60(, %s0) -; CHECK-NEXT: ldl.sx %s1, 376(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 58(, %s0) -; CHECK-NEXT: ldl.sx %s1, 380(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 56(, %s0) -; CHECK-NEXT: ldl.sx %s1, 384(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 54(, %s0) -; CHECK-NEXT: ldl.sx %s1, 388(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 52(, %s0) -; CHECK-NEXT: ldl.sx %s1, 392(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 50(, %s0) -; CHECK-NEXT: ldl.sx %s1, 396(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 48(, %s0) -; CHECK-NEXT: ldl.sx %s1, 400(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 46(, %s0) -; CHECK-NEXT: ldl.sx %s1, 404(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 44(, %s0) -; CHECK-NEXT: ldl.sx %s1, 408(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 42(, %s0) -; CHECK-NEXT: ldl.sx %s1, 412(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 40(, %s0) -; CHECK-NEXT: ldl.sx %s1, 416(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 38(, %s0) -; CHECK-NEXT: ldl.sx %s1, 420(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 36(, %s0) -; CHECK-NEXT: ldl.sx %s1, 424(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 34(, %s0) -; CHECK-NEXT: ldl.sx %s1, 428(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 32(, %s0) -; CHECK-NEXT: ldl.sx %s1, 432(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 30(, %s0) -; CHECK-NEXT: ldl.sx %s1, 436(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 28(, %s0) -; CHECK-NEXT: ldl.sx %s1, 440(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 26(, %s0) -; CHECK-NEXT: ldl.sx %s1, 444(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 24(, %s0) -; CHECK-NEXT: ldl.sx %s1, 448(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 22(, %s0) -; CHECK-NEXT: ldl.sx %s1, 452(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 20(, %s0) -; CHECK-NEXT: ldl.sx %s1, 456(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 18(, %s0) -; CHECK-NEXT: ldl.sx %s1, 460(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 16(, %s0) -; CHECK-NEXT: ldl.sx %s1, 464(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 14(, %s0) -; CHECK-NEXT: ldl.sx %s1, 468(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 12(, %s0) -; CHECK-NEXT: ldl.sx %s1, 472(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 10(, %s0) -; CHECK-NEXT: ldl.sx %s1, 476(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 8(, %s0) -; CHECK-NEXT: ldl.sx %s1, 480(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 6(, %s0) -; CHECK-NEXT: ldl.sx %s1, 484(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 4(, %s0) -; CHECK-NEXT: ldl.sx %s1, 488(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, 2(, %s0) -; CHECK-NEXT: ldl.sx %s1, 492(, %s11) # 4-byte Folded Reload -; CHECK-NEXT: st2b %s1, (, %s0) -; CHECK-NEXT: ld %s33, 664(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s32, 656(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s31, 648(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s30, 640(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s29, 632(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s28, 624(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s27, 616(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s26, 608(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s25, 600(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s24, 592(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s23, 584(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s22, 576(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s21, 568(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s20, 560(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s19, 552(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: ld %s18, 544(, %s11) # 8-byte Folded Reload -; CHECK-NEXT: or %s11, 0, %s9 -; CHECK-NEXT: ld %s10, 8(, %s11) -; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lea %s1, 65535 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvand.lo %v1, %s1, %v1 +; CHECK-NEXT: lea %s2, 128 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: pvand.lo %v1, %s1, %v1 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvand.lo %v0, %s1, %v0 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: pvand.lo %v0, %s1, %v0 +; CHECK-NEXT: vdivu.w %v2, %v0, %v1 +; CHECK-NEXT: vmuls.w.sx %v1, %v2, %v1 +; CHECK-NEXT: vsubs.w.sx %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) %z = urem <128 x i16> %x, %y ret <128 x i16> %z diff --git a/llvm/test/CodeGen/VE/Vector/vr_call_1_8.ll b/llvm/test/CodeGen/VE/Vector/vr_call_1_8.ll index e9ec4a7c0c8c..24722d237d3e 100644 --- a/llvm/test/CodeGen/VE/Vector/vr_call_1_8.ll +++ b/llvm/test/CodeGen/VE/Vector/vr_call_1_8.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ve -mattr=+vpu,+packed | FileCheck %s ;;; Test vector register through function calls. @@ -53,18 +54,9 @@ define fastcc <2 x float> @vfadds2(<2 x float> %0, <2 x float> %1) { define fastcc <3 x double> @vfaddd3(<3 x double> %0, <3 x double> %1) { ; CHECK-LABEL: vfaddd3: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s0 -; CHECK-NEXT: lsv %v0(1), %s1 -; CHECK-NEXT: lsv %v0(2), %s2 -; CHECK-NEXT: lsv %v1(0), %s3 -; CHECK-NEXT: lsv %v1(1), %s4 -; CHECK-NEXT: lsv %v1(2), %s5 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 ; CHECK-NEXT: vfadd.d %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) ; CHECK-NEXT: b.l.t (, %s10) %3 = fadd fast <3 x double> %1, %0 ret <3 x double> %3 @@ -74,18 +66,9 @@ define fastcc <3 x double> @vfaddd3(<3 x double> %0, <3 x double> %1) { define fastcc <3 x float> @vfadds3(<3 x float> %0, <3 x float> %1) { ; CHECK-LABEL: vfadds3: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s0 -; CHECK-NEXT: lsv %v0(1), %s1 -; CHECK-NEXT: lsv %v0(2), %s2 -; CHECK-NEXT: lsv %v1(0), %s3 -; CHECK-NEXT: lsv %v1(1), %s4 -; CHECK-NEXT: lsv %v1(2), %s5 ; CHECK-NEXT: or %s0, 3, (0)1 ; CHECK-NEXT: lvl %s0 ; CHECK-NEXT: pvfadd.up %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) ; CHECK-NEXT: b.l.t (, %s10) %3 = fadd fast <3 x float> %1, %0 ret <3 x float> %3 @@ -131,26 +114,9 @@ define fastcc <5 x double> @vfaddd5(<5 x double> %0, <5 x double> %1) { define fastcc <5 x float> @vfadds5(<5 x float> %0, <5 x float> %1) { ; CHECK-LABEL: vfadds5: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s0 -; CHECK-NEXT: lsv %v0(1), %s1 -; CHECK-NEXT: lsv %v0(2), %s2 -; CHECK-NEXT: lsv %v0(3), %s3 -; CHECK-NEXT: lsv %v0(4), %s4 -; CHECK-NEXT: lsv %v1(0), %s5 -; CHECK-NEXT: ldu %s0, 244(, %s11) -; CHECK-NEXT: ldu %s1, 252(, %s11) -; CHECK-NEXT: lsv %v1(1), %s6 -; CHECK-NEXT: lsv %v1(2), %s7 -; CHECK-NEXT: lsv %v1(3), %s0 -; CHECK-NEXT: lsv %v1(4), %s1 ; CHECK-NEXT: or %s0, 5, (0)1 ; CHECK-NEXT: lvl %s0 ; CHECK-NEXT: pvfadd.up %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) -; CHECK-NEXT: lvs %s3, %v0(3) -; CHECK-NEXT: lvs %s4, %v0(4) ; CHECK-NEXT: b.l.t (, %s10) %3 = fadd fast <5 x float> %1, %0 ret <5 x float> %3 @@ -172,31 +138,9 @@ define fastcc <6 x double> @vfaddd6(<6 x double> %0, <6 x double> %1) { define fastcc <6 x float> @vfadds6(<6 x float> %0, <6 x float> %1) { ; CHECK-LABEL: vfadds6: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s0 -; CHECK-NEXT: lsv %v0(1), %s1 -; CHECK-NEXT: lsv %v0(2), %s2 -; CHECK-NEXT: lsv %v0(3), %s3 -; CHECK-NEXT: lsv %v0(4), %s4 -; CHECK-NEXT: lsv %v0(5), %s5 -; CHECK-NEXT: lsv %v1(0), %s6 -; CHECK-NEXT: lsv %v1(1), %s7 -; CHECK-NEXT: ldu %s0, 244(, %s11) -; CHECK-NEXT: ldu %s1, 252(, %s11) -; CHECK-NEXT: ldu %s2, 260(, %s11) -; CHECK-NEXT: ldu %s3, 268(, %s11) -; CHECK-NEXT: lsv %v1(2), %s0 -; CHECK-NEXT: lsv %v1(3), %s1 -; CHECK-NEXT: lsv %v1(4), %s2 -; CHECK-NEXT: lsv %v1(5), %s3 ; CHECK-NEXT: or %s0, 6, (0)1 ; CHECK-NEXT: lvl %s0 ; CHECK-NEXT: pvfadd.up %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) -; CHECK-NEXT: lvs %s3, %v0(3) -; CHECK-NEXT: lvs %s4, %v0(4) -; CHECK-NEXT: lvs %s5, %v0(5) ; CHECK-NEXT: b.l.t (, %s10) %3 = fadd fast <6 x float> %1, %0 ret <6 x float> %3 @@ -218,36 +162,9 @@ define fastcc <7 x double> @vfaddd7(<7 x double> %0, <7 x double> %1) { define fastcc <7 x float> @vfadds7(<7 x float> %0, <7 x float> %1) { ; CHECK-LABEL: vfadds7: ; CHECK: # %bb.0: -; CHECK-NEXT: lsv %v0(0), %s0 -; CHECK-NEXT: lsv %v0(1), %s1 -; CHECK-NEXT: lsv %v0(2), %s2 -; CHECK-NEXT: lsv %v0(3), %s3 -; CHECK-NEXT: lsv %v0(4), %s4 -; CHECK-NEXT: lsv %v0(5), %s5 -; CHECK-NEXT: ldu %s0, 244(, %s11) -; CHECK-NEXT: ldu %s1, 252(, %s11) -; CHECK-NEXT: lsv %v0(6), %s6 -; CHECK-NEXT: lsv %v1(0), %s7 -; CHECK-NEXT: lsv %v1(1), %s0 -; CHECK-NEXT: lsv %v1(2), %s1 -; CHECK-NEXT: ldu %s0, 260(, %s11) -; CHECK-NEXT: ldu %s1, 268(, %s11) -; CHECK-NEXT: ldu %s2, 276(, %s11) -; CHECK-NEXT: ldu %s3, 284(, %s11) -; CHECK-NEXT: lsv %v1(3), %s0 -; CHECK-NEXT: lsv %v1(4), %s1 -; CHECK-NEXT: lsv %v1(5), %s2 -; CHECK-NEXT: lsv %v1(6), %s3 ; CHECK-NEXT: or %s0, 7, (0)1 ; CHECK-NEXT: lvl %s0 ; CHECK-NEXT: pvfadd.up %v0, %v1, %v0 -; CHECK-NEXT: lvs %s0, %v0(0) -; CHECK-NEXT: lvs %s1, %v0(1) -; CHECK-NEXT: lvs %s2, %v0(2) -; CHECK-NEXT: lvs %s3, %v0(3) -; CHECK-NEXT: lvs %s4, %v0(4) -; CHECK-NEXT: lvs %s5, %v0(5) -; CHECK-NEXT: lvs %s6, %v0(6) ; CHECK-NEXT: b.l.t (, %s10) %3 = fadd fast <7 x float> %1, %0 ret <7 x float> %3 @@ -277,4 +194,22 @@ define fastcc <8 x float> @vfadds8(<8 x float> %0, <8 x float> %1) { ret <8 x float> %3 } +; Function Attrs: nofree norecurse nosync nounwind readnone willreturn mustprogress +define fastcc <7 x i17> @vi17adds7(<7 x i17> %0, <7 x i17> %1) { +; CHECK-LABEL: vi17adds7: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lea %s1, 131071 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvand.lo %v0, %s1, %v0 +; CHECK-NEXT: pvand.lo %v1, %s1, %v1 +; CHECK-NEXT: or %s0, 7, (0)1 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.w.sx %v0, %v1, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %3 = add nsw nuw <7 x i17> %1, %0 + ret <7 x i17> %3 +} + + !2 = !{!"clang version 13.0.0 (git@kaz7.github.com:sx-aurora-dev/llvm-project.git 03f601d9c971b381f909c9da25ecbee9e71aabbe)"}