diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index 1d1b0f9c6ae2a..71a70d9c2dd46 100644 --- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -42,6 +42,7 @@ #include "llvm/TargetParser/Triple.h" #include #include +#include #include #include #include @@ -59,6 +60,45 @@ static cl::opt NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), "stores to their single precision " "counterparts")); +// Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16, +// v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e. +// INST.h for v16, INST.w for v32, INST.d for v64. +TargetLoweringBase::LegalizeTypeAction +MipsSETargetLowering::getPreferredVectorAction(MVT VT) const { + if (this->Subtarget.hasMSA()) { + switch (VT.SimpleTy) { + // Leave v2i1 vectors to be promoted to larger ones. + // Other i1 types will be promoted by default. + case MVT::v2i1: + return TypePromoteInteger; + break; + // 16-bit vector types (v2 and longer) + case MVT::v2i8: + // 32-bit vector types (v2 and longer) + case MVT::v2i16: + case MVT::v4i8: + // 64-bit vector types (v2 and longer) + case MVT::v2i32: + case MVT::v4i16: + case MVT::v8i8: + return TypeWidenVector; + break; + // Only word (.w) and doubleword (.d) are available for floating point + // vectors. That means floating point vectors should be either v2f64 + // or v4f32. + // Here we only explicitly widen the f32 types - f16 will be promoted + // by default. + case MVT::v2f32: + case MVT::v3f32: + return TypeWidenVector; + // v2i64 is already 128-bit wide. + default: + break; + } + } + return TargetLoweringBase::getPreferredVectorAction(VT); +} + MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI) : MipsTargetLowering(TM, STI) { @@ -2929,8 +2969,14 @@ static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, // if the type is v8i16 and all the indices are less than 8 then the second // operand is unused and can be replaced with anything. We choose to replace it // with the used operand since this reduces the number of instructions overall. +// +// NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats +// UNDEFs as same as SPLATI index. +// For other instances we use the last valid index if UNDEF is +// encountered. static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, const SmallVector &Indices, + const bool isSPLATI, SelectionDAG &DAG) { SmallVector Ops; SDValue Op0; @@ -2942,6 +2988,9 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, SDLoc DL(Op); int ResTyNumElts = ResTy.getVectorNumElements(); + assert(Indices[0] >= 0 && + "shuffle mask starts with an UNDEF, which is not expected"); + for (int i = 0; i < ResTyNumElts; ++i) { // Idx == -1 means UNDEF int Idx = Indices[i]; @@ -2951,9 +3000,17 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) Using2ndVec = true; } - - for (int Idx : Indices) + int LastValidIndex = 0; + for (size_t i = 0; i < Indices.size(); i++) { + int Idx = Indices[i]; + if (Idx < 0) { + // Continue using splati index or use the last valid index. + Idx = isSPLATI ? Indices[0] : LastValidIndex; + } else { + LastValidIndex = Idx; + } Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy)); + } SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); @@ -2996,7 +3053,7 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, // splati.[bhwd] is preferable to the others but is matched from // MipsISD::VSHF. if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) - return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, true, DAG); SDValue Result; if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) return Result; @@ -3012,7 +3069,7 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, return Result; if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) return Result; - return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, false, DAG); } MachineBasicBlock * diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.h b/llvm/lib/Target/Mips/MipsSEISelLowering.h index 43b88a9f09522..675131aefb6dd 100644 --- a/llvm/lib/Target/Mips/MipsSEISelLowering.h +++ b/llvm/lib/Target/Mips/MipsSEISelLowering.h @@ -45,6 +45,9 @@ class TargetRegisterClass; MachineMemOperand::Flags Flags = MachineMemOperand::MONone, unsigned *Fast = nullptr) const override; + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(MVT VT) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll index 28a7dc046139b..383e5ef19cebf 100644 --- a/llvm/test/CodeGen/Mips/cconv/vector.ll +++ b/llvm/test/CodeGen/Mips/cconv/vector.ll @@ -48,102 +48,86 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) { ; ; MIPS32R5EB-LABEL: i8_2: ; MIPS32R5EB: # %bb.0: -; MIPS32R5EB-NEXT: addiu $sp, $sp, -64 -; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 64 -; MIPS32R5EB-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: addiu $sp, $sp, -48 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48 +; MIPS32R5EB-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill ; MIPS32R5EB-NEXT: .cfi_offset 31, -4 ; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 ; MIPS32R5EB-NEXT: and $sp, $sp, $1 -; MIPS32R5EB-NEXT: sh $5, 48($sp) -; MIPS32R5EB-NEXT: sh $4, 52($sp) -; MIPS32R5EB-NEXT: lbu $1, 49($sp) -; MIPS32R5EB-NEXT: sw $1, 28($sp) -; MIPS32R5EB-NEXT: lbu $1, 48($sp) -; MIPS32R5EB-NEXT: sw $1, 20($sp) -; MIPS32R5EB-NEXT: lbu $1, 53($sp) -; MIPS32R5EB-NEXT: sw $1, 12($sp) -; MIPS32R5EB-NEXT: lbu $1, 52($sp) -; MIPS32R5EB-NEXT: sw $1, 4($sp) -; MIPS32R5EB-NEXT: ld.d $w0, 16($sp) -; MIPS32R5EB-NEXT: ld.d $w1, 0($sp) -; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0 -; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 -; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3] -; MIPS32R5EB-NEXT: sb $2, 45($sp) -; MIPS32R5EB-NEXT: sb $1, 44($sp) -; MIPS32R5EB-NEXT: lhu $2, 44($sp) +; MIPS32R5EB-NEXT: sh $5, 16($sp) +; MIPS32R5EB-NEXT: sh $4, 0($sp) +; MIPS32R5EB-NEXT: ld.b $w0, 16($sp) +; MIPS32R5EB-NEXT: ld.b $w1, 0($sp) +; MIPS32R5EB-NEXT: addv.b $w0, $w1, $w0 +; MIPS32R5EB-NEXT: shf.b $w0, $w0, 177 +; MIPS32R5EB-NEXT: copy_u.h $2, $w0[0] ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: addiu $sp, $sp, 64 +; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop ; -; MIPS64R5-LABEL: i8_2: -; MIPS64R5: # %bb.0: -; MIPS64R5-NEXT: daddiu $sp, $sp, -16 -; MIPS64R5-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R5-NEXT: sh $5, 8($sp) -; MIPS64R5-NEXT: sh $4, 12($sp) -; MIPS64R5-NEXT: lb $1, 9($sp) -; MIPS64R5-NEXT: lb $2, 8($sp) -; MIPS64R5-NEXT: insert.d $w0[0], $2 -; MIPS64R5-NEXT: insert.d $w0[1], $1 -; MIPS64R5-NEXT: lb $1, 13($sp) -; MIPS64R5-NEXT: lb $2, 12($sp) -; MIPS64R5-NEXT: insert.d $w1[0], $2 -; MIPS64R5-NEXT: insert.d $w1[1], $1 -; MIPS64R5-NEXT: addv.d $w0, $w1, $w0 -; MIPS64R5-NEXT: copy_s.d $1, $w0[0] -; MIPS64R5-NEXT: copy_s.d $2, $w0[1] -; MIPS64R5-NEXT: sb $2, 5($sp) -; MIPS64R5-NEXT: sb $1, 4($sp) -; MIPS64R5-NEXT: lh $2, 4($sp) -; MIPS64R5-NEXT: daddiu $sp, $sp, 16 -; MIPS64R5-NEXT: jr $ra -; MIPS64R5-NEXT: nop -; ; MIPS32R5EL-LABEL: i8_2: ; MIPS32R5EL: # %bb.0: -; MIPS32R5EL-NEXT: addiu $sp, $sp, -64 -; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 64 -; MIPS32R5EL-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: addiu $sp, $sp, -48 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48 +; MIPS32R5EL-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill ; MIPS32R5EL-NEXT: .cfi_offset 31, -4 ; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 ; MIPS32R5EL-NEXT: and $sp, $sp, $1 -; MIPS32R5EL-NEXT: sh $5, 48($sp) -; MIPS32R5EL-NEXT: sh $4, 52($sp) -; MIPS32R5EL-NEXT: lbu $1, 49($sp) -; MIPS32R5EL-NEXT: sw $1, 24($sp) -; MIPS32R5EL-NEXT: lbu $1, 48($sp) -; MIPS32R5EL-NEXT: sw $1, 16($sp) -; MIPS32R5EL-NEXT: lbu $1, 53($sp) -; MIPS32R5EL-NEXT: sw $1, 8($sp) -; MIPS32R5EL-NEXT: lbu $1, 52($sp) -; MIPS32R5EL-NEXT: sw $1, 0($sp) -; MIPS32R5EL-NEXT: ld.d $w0, 16($sp) -; MIPS32R5EL-NEXT: ld.d $w1, 0($sp) -; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0 -; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2] -; MIPS32R5EL-NEXT: sb $2, 45($sp) -; MIPS32R5EL-NEXT: sb $1, 44($sp) -; MIPS32R5EL-NEXT: lhu $2, 44($sp) +; MIPS32R5EL-NEXT: sh $5, 16($sp) +; MIPS32R5EL-NEXT: sh $4, 0($sp) +; MIPS32R5EL-NEXT: ld.b $w0, 16($sp) +; MIPS32R5EL-NEXT: ld.b $w1, 0($sp) +; MIPS32R5EL-NEXT: addv.b $w0, $w1, $w0 +; MIPS32R5EL-NEXT: copy_u.h $2, $w0[0] ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: addiu $sp, $sp, 64 +; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop +; +; MIPS64R5EB-LABEL: i8_2: +; MIPS64R5EB: # %bb.0: +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -48 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 48 +; MIPS64R5EB-NEXT: sh $5, 16($sp) +; MIPS64R5EB-NEXT: sh $4, 0($sp) +; MIPS64R5EB-NEXT: ld.b $w0, 16($sp) +; MIPS64R5EB-NEXT: ld.b $w1, 0($sp) +; MIPS64R5EB-NEXT: addv.b $w0, $w1, $w0 +; MIPS64R5EB-NEXT: shf.b $w0, $w0, 177 +; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0] +; MIPS64R5EB-NEXT: sh $1, 44($sp) +; MIPS64R5EB-NEXT: lh $2, 44($sp) +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 48 +; MIPS64R5EB-NEXT: jr $ra +; +; MIPS64R5EL-LABEL: i8_2: +; MIPS64R5EL: # %bb.0: +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -48 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 48 +; MIPS64R5EL-NEXT: sh $5, 16($sp) +; MIPS64R5EL-NEXT: sh $4, 0($sp) +; MIPS64R5EL-NEXT: ld.b $w0, 16($sp) +; MIPS64R5EL-NEXT: ld.b $w1, 0($sp) +; MIPS64R5EL-NEXT: addv.b $w0, $w1, $w0 +; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0] +; MIPS64R5EL-NEXT: sh $1, 44($sp) +; MIPS64R5EL-NEXT: lh $2, 44($sp) +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 48 +; MIPS64R5EL-NEXT: jr $ra +; MIPS64R5EL-NEXT: nop %1 = add <2 x i8> %a, %b ret <2 x i8> %1 } @@ -229,127 +213,110 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x ; ; MIPS32R5EB-LABEL: i8x2_7: ; MIPS32R5EB: # %bb.0: # %entry -; MIPS32R5EB-NEXT: addiu $sp, $sp, -144 -; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 144 -; MIPS32R5EB-NEXT: sw $ra, 140($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: sw $fp, 136($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: addiu $sp, $sp, -128 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 128 +; MIPS32R5EB-NEXT: sw $ra, 124($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 120($sp) # 4-byte Folded Spill ; MIPS32R5EB-NEXT: .cfi_offset 31, -4 ; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 ; MIPS32R5EB-NEXT: and $sp, $sp, $1 -; MIPS32R5EB-NEXT: sh $5, 128($sp) -; MIPS32R5EB-NEXT: sh $4, 132($sp) -; MIPS32R5EB-NEXT: lbu $1, 129($sp) -; MIPS32R5EB-NEXT: sw $1, 76($sp) -; MIPS32R5EB-NEXT: lbu $1, 128($sp) -; MIPS32R5EB-NEXT: sw $1, 68($sp) -; MIPS32R5EB-NEXT: lbu $1, 133($sp) -; MIPS32R5EB-NEXT: sw $1, 60($sp) -; MIPS32R5EB-NEXT: lbu $1, 132($sp) -; MIPS32R5EB-NEXT: sw $1, 52($sp) -; MIPS32R5EB-NEXT: ld.d $w0, 64($sp) -; MIPS32R5EB-NEXT: ld.d $w1, 48($sp) -; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0 -; MIPS32R5EB-NEXT: sh $6, 124($sp) -; MIPS32R5EB-NEXT: lbu $1, 125($sp) -; MIPS32R5EB-NEXT: sw $1, 92($sp) -; MIPS32R5EB-NEXT: lbu $1, 124($sp) -; MIPS32R5EB-NEXT: sw $1, 84($sp) -; MIPS32R5EB-NEXT: ld.d $w1, 80($sp) -; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EB-NEXT: sh $7, 120($sp) -; MIPS32R5EB-NEXT: lbu $1, 121($sp) -; MIPS32R5EB-NEXT: sw $1, 108($sp) -; MIPS32R5EB-NEXT: lbu $1, 120($sp) -; MIPS32R5EB-NEXT: sw $1, 100($sp) -; MIPS32R5EB-NEXT: ld.d $w1, 96($sp) -; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EB-NEXT: lbu $1, 163($fp) -; MIPS32R5EB-NEXT: sw $1, 12($sp) -; MIPS32R5EB-NEXT: lbu $1, 162($fp) -; MIPS32R5EB-NEXT: sw $1, 4($sp) -; MIPS32R5EB-NEXT: ld.d $w1, 0($sp) -; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EB-NEXT: lbu $1, 167($fp) -; MIPS32R5EB-NEXT: sw $1, 28($sp) -; MIPS32R5EB-NEXT: lbu $1, 166($fp) -; MIPS32R5EB-NEXT: sw $1, 20($sp) -; MIPS32R5EB-NEXT: ld.d $w1, 16($sp) -; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EB-NEXT: lbu $1, 171($fp) -; MIPS32R5EB-NEXT: sw $1, 44($sp) -; MIPS32R5EB-NEXT: lbu $1, 170($fp) -; MIPS32R5EB-NEXT: sw $1, 36($sp) -; MIPS32R5EB-NEXT: ld.d $w1, 32($sp) -; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 -; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3] -; MIPS32R5EB-NEXT: sb $2, 117($sp) -; MIPS32R5EB-NEXT: sb $1, 116($sp) -; MIPS32R5EB-NEXT: lhu $2, 116($sp) +; MIPS32R5EB-NEXT: sh $5, 16($sp) +; MIPS32R5EB-NEXT: sh $4, 0($sp) +; MIPS32R5EB-NEXT: ld.b $w0, 16($sp) +; MIPS32R5EB-NEXT: ld.b $w1, 0($sp) +; MIPS32R5EB-NEXT: addv.b $w0, $w1, $w0 +; MIPS32R5EB-NEXT: sh $6, 32($sp) +; MIPS32R5EB-NEXT: ld.b $w1, 32($sp) +; MIPS32R5EB-NEXT: addv.b $w0, $w0, $w1 +; MIPS32R5EB-NEXT: sh $7, 48($sp) +; MIPS32R5EB-NEXT: ld.b $w1, 48($sp) +; MIPS32R5EB-NEXT: addv.b $w0, $w0, $w1 +; MIPS32R5EB-NEXT: lhu $1, 146($fp) +; MIPS32R5EB-NEXT: sh $1, 64($sp) +; MIPS32R5EB-NEXT: ld.b $w1, 64($sp) +; MIPS32R5EB-NEXT: addv.b $w0, $w0, $w1 +; MIPS32R5EB-NEXT: lhu $1, 150($fp) +; MIPS32R5EB-NEXT: sh $1, 80($sp) +; MIPS32R5EB-NEXT: ld.b $w1, 80($sp) +; MIPS32R5EB-NEXT: addv.b $w0, $w0, $w1 +; MIPS32R5EB-NEXT: lhu $1, 154($fp) +; MIPS32R5EB-NEXT: sh $1, 96($sp) +; MIPS32R5EB-NEXT: ld.b $w1, 96($sp) +; MIPS32R5EB-NEXT: addv.b $w0, $w0, $w1 +; MIPS32R5EB-NEXT: shf.b $w0, $w0, 177 +; MIPS32R5EB-NEXT: copy_u.h $2, $w0[0] ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 136($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: lw $ra, 140($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: addiu $sp, $sp, 144 +; MIPS32R5EB-NEXT: lw $fp, 120($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 124($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: addiu $sp, $sp, 128 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop ; -; MIPS64R5-LABEL: i8x2_7: -; MIPS64R5: # %bb.0: # %entry -; MIPS64R5-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5-NEXT: sh $5, 24($sp) -; MIPS64R5-NEXT: sh $4, 28($sp) -; MIPS64R5-NEXT: lb $1, 25($sp) -; MIPS64R5-NEXT: lb $2, 24($sp) -; MIPS64R5-NEXT: insert.d $w0[0], $2 -; MIPS64R5-NEXT: insert.d $w0[1], $1 -; MIPS64R5-NEXT: lb $1, 29($sp) -; MIPS64R5-NEXT: lb $2, 28($sp) -; MIPS64R5-NEXT: insert.d $w1[0], $2 -; MIPS64R5-NEXT: insert.d $w1[1], $1 -; MIPS64R5-NEXT: addv.d $w0, $w1, $w0 -; MIPS64R5-NEXT: sh $6, 20($sp) -; MIPS64R5-NEXT: lb $1, 21($sp) -; MIPS64R5-NEXT: lb $2, 20($sp) -; MIPS64R5-NEXT: insert.d $w1[0], $2 -; MIPS64R5-NEXT: insert.d $w1[1], $1 -; MIPS64R5-NEXT: addv.d $w0, $w0, $w1 -; MIPS64R5-NEXT: sh $7, 16($sp) -; MIPS64R5-NEXT: lb $1, 17($sp) -; MIPS64R5-NEXT: lb $2, 16($sp) -; MIPS64R5-NEXT: insert.d $w1[0], $2 -; MIPS64R5-NEXT: insert.d $w1[1], $1 -; MIPS64R5-NEXT: addv.d $w0, $w0, $w1 -; MIPS64R5-NEXT: sh $8, 12($sp) -; MIPS64R5-NEXT: lb $1, 13($sp) -; MIPS64R5-NEXT: lb $2, 12($sp) -; MIPS64R5-NEXT: insert.d $w1[0], $2 -; MIPS64R5-NEXT: insert.d $w1[1], $1 -; MIPS64R5-NEXT: addv.d $w0, $w0, $w1 -; MIPS64R5-NEXT: sh $9, 8($sp) -; MIPS64R5-NEXT: lb $1, 9($sp) -; MIPS64R5-NEXT: lb $2, 8($sp) -; MIPS64R5-NEXT: insert.d $w1[0], $2 -; MIPS64R5-NEXT: insert.d $w1[1], $1 -; MIPS64R5-NEXT: addv.d $w0, $w0, $w1 -; MIPS64R5-NEXT: sh $10, 4($sp) -; MIPS64R5-NEXT: lb $1, 5($sp) -; MIPS64R5-NEXT: lb $2, 4($sp) -; MIPS64R5-NEXT: insert.d $w1[0], $2 -; MIPS64R5-NEXT: insert.d $w1[1], $1 -; MIPS64R5-NEXT: addv.d $w0, $w0, $w1 -; MIPS64R5-NEXT: copy_s.d $1, $w0[0] -; MIPS64R5-NEXT: copy_s.d $2, $w0[1] -; MIPS64R5-NEXT: sb $2, 1($sp) -; MIPS64R5-NEXT: sb $1, 0($sp) -; MIPS64R5-NEXT: lh $2, 0($sp) -; MIPS64R5-NEXT: daddiu $sp, $sp, 32 -; MIPS64R5-NEXT: jr $ra -; MIPS64R5-NEXT: nop +; MIPS64R5EB-LABEL: i8x2_7: +; MIPS64R5EB: # %bb.0: # %entry +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -128 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 128 +; MIPS64R5EB-NEXT: sh $5, 16($sp) +; MIPS64R5EB-NEXT: sh $4, 0($sp) +; MIPS64R5EB-NEXT: ld.b $w0, 16($sp) +; MIPS64R5EB-NEXT: ld.b $w1, 0($sp) +; MIPS64R5EB-NEXT: addv.b $w0, $w1, $w0 +; MIPS64R5EB-NEXT: sh $6, 32($sp) +; MIPS64R5EB-NEXT: ld.b $w1, 32($sp) +; MIPS64R5EB-NEXT: addv.b $w0, $w0, $w1 +; MIPS64R5EB-NEXT: sh $7, 48($sp) +; MIPS64R5EB-NEXT: ld.b $w1, 48($sp) +; MIPS64R5EB-NEXT: addv.b $w0, $w0, $w1 +; MIPS64R5EB-NEXT: sh $8, 64($sp) +; MIPS64R5EB-NEXT: ld.b $w1, 64($sp) +; MIPS64R5EB-NEXT: addv.b $w0, $w0, $w1 +; MIPS64R5EB-NEXT: sh $9, 80($sp) +; MIPS64R5EB-NEXT: ld.b $w1, 80($sp) +; MIPS64R5EB-NEXT: addv.b $w0, $w0, $w1 +; MIPS64R5EB-NEXT: sh $10, 96($sp) +; MIPS64R5EB-NEXT: ld.b $w1, 96($sp) +; MIPS64R5EB-NEXT: addv.b $w0, $w0, $w1 +; MIPS64R5EB-NEXT: shf.b $w0, $w0, 177 +; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0] +; MIPS64R5EB-NEXT: sh $1, 124($sp) +; MIPS64R5EB-NEXT: lh $2, 124($sp) +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 128 +; MIPS64R5EB-NEXT: jr $ra +; MIPS64R5EB-NEXT: nop +; +; MIPS64R5EL-LABEL: i8x2_7: +; MIPS64R5EL: # %bb.0: # %entry +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -128 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 128 +; MIPS64R5EL-NEXT: sh $5, 16($sp) +; MIPS64R5EL-NEXT: sh $4, 0($sp) +; MIPS64R5EL-NEXT: ld.b $w0, 16($sp) +; MIPS64R5EL-NEXT: ld.b $w1, 0($sp) +; MIPS64R5EL-NEXT: addv.b $w0, $w1, $w0 +; MIPS64R5EL-NEXT: sh $6, 32($sp) +; MIPS64R5EL-NEXT: ld.b $w1, 32($sp) +; MIPS64R5EL-NEXT: addv.b $w0, $w0, $w1 +; MIPS64R5EL-NEXT: sh $7, 48($sp) +; MIPS64R5EL-NEXT: ld.b $w1, 48($sp) +; MIPS64R5EL-NEXT: addv.b $w0, $w0, $w1 +; MIPS64R5EL-NEXT: sh $8, 64($sp) +; MIPS64R5EL-NEXT: ld.b $w1, 64($sp) +; MIPS64R5EL-NEXT: addv.b $w0, $w0, $w1 +; MIPS64R5EL-NEXT: sh $9, 80($sp) +; MIPS64R5EL-NEXT: ld.b $w1, 80($sp) +; MIPS64R5EL-NEXT: addv.b $w0, $w0, $w1 +; MIPS64R5EL-NEXT: sh $10, 96($sp) +; MIPS64R5EL-NEXT: ld.b $w1, 96($sp) +; MIPS64R5EL-NEXT: addv.b $w0, $w0, $w1 +; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0] +; MIPS64R5EL-NEXT: sh $1, 124($sp) +; MIPS64R5EL-NEXT: lh $2, 124($sp) +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 128 +; MIPS64R5EL-NEXT: jr $ra +; MIPS64R5EL-NEXT: nop ; ; MIPS32EL-LABEL: i8x2_7: ; MIPS32EL: # %bb.0: # %entry @@ -387,70 +354,44 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x ; ; MIPS32R5EL-LABEL: i8x2_7: ; MIPS32R5EL: # %bb.0: # %entry -; MIPS32R5EL-NEXT: addiu $sp, $sp, -144 -; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 144 -; MIPS32R5EL-NEXT: sw $ra, 140($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: sw $fp, 136($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: addiu $sp, $sp, -128 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 128 +; MIPS32R5EL-NEXT: sw $ra, 124($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 120($sp) # 4-byte Folded Spill ; MIPS32R5EL-NEXT: .cfi_offset 31, -4 ; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 ; MIPS32R5EL-NEXT: and $sp, $sp, $1 -; MIPS32R5EL-NEXT: sh $5, 128($sp) -; MIPS32R5EL-NEXT: sh $4, 132($sp) -; MIPS32R5EL-NEXT: lbu $1, 129($sp) -; MIPS32R5EL-NEXT: sw $1, 72($sp) -; MIPS32R5EL-NEXT: lbu $1, 128($sp) +; MIPS32R5EL-NEXT: sh $5, 16($sp) +; MIPS32R5EL-NEXT: sh $4, 0($sp) +; MIPS32R5EL-NEXT: ld.b $w0, 16($sp) +; MIPS32R5EL-NEXT: ld.b $w1, 0($sp) +; MIPS32R5EL-NEXT: addv.b $w0, $w1, $w0 +; MIPS32R5EL-NEXT: sh $6, 32($sp) +; MIPS32R5EL-NEXT: ld.b $w1, 32($sp) +; MIPS32R5EL-NEXT: addv.b $w0, $w0, $w1 +; MIPS32R5EL-NEXT: sh $7, 48($sp) +; MIPS32R5EL-NEXT: ld.b $w1, 48($sp) +; MIPS32R5EL-NEXT: addv.b $w0, $w0, $w1 +; MIPS32R5EL-NEXT: lw $1, 144($fp) ; MIPS32R5EL-NEXT: sw $1, 64($sp) -; MIPS32R5EL-NEXT: lbu $1, 133($sp) -; MIPS32R5EL-NEXT: sw $1, 56($sp) -; MIPS32R5EL-NEXT: lbu $1, 132($sp) -; MIPS32R5EL-NEXT: sw $1, 48($sp) -; MIPS32R5EL-NEXT: ld.d $w0, 64($sp) -; MIPS32R5EL-NEXT: ld.d $w1, 48($sp) -; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0 -; MIPS32R5EL-NEXT: sh $6, 124($sp) -; MIPS32R5EL-NEXT: lbu $1, 125($sp) -; MIPS32R5EL-NEXT: sw $1, 88($sp) -; MIPS32R5EL-NEXT: lbu $1, 124($sp) +; MIPS32R5EL-NEXT: ld.b $w1, 64($sp) +; MIPS32R5EL-NEXT: addv.b $w0, $w0, $w1 +; MIPS32R5EL-NEXT: lw $1, 148($fp) ; MIPS32R5EL-NEXT: sw $1, 80($sp) -; MIPS32R5EL-NEXT: ld.d $w1, 80($sp) -; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EL-NEXT: sh $7, 120($sp) -; MIPS32R5EL-NEXT: lbu $1, 121($sp) -; MIPS32R5EL-NEXT: sw $1, 104($sp) -; MIPS32R5EL-NEXT: lbu $1, 120($sp) +; MIPS32R5EL-NEXT: ld.b $w1, 80($sp) +; MIPS32R5EL-NEXT: addv.b $w0, $w0, $w1 +; MIPS32R5EL-NEXT: lw $1, 152($fp) ; MIPS32R5EL-NEXT: sw $1, 96($sp) -; MIPS32R5EL-NEXT: ld.d $w1, 96($sp) -; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EL-NEXT: lbu $1, 161($fp) -; MIPS32R5EL-NEXT: sw $1, 8($sp) -; MIPS32R5EL-NEXT: lbu $1, 160($fp) -; MIPS32R5EL-NEXT: sw $1, 0($sp) -; MIPS32R5EL-NEXT: ld.d $w1, 0($sp) -; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EL-NEXT: lbu $1, 165($fp) -; MIPS32R5EL-NEXT: sw $1, 24($sp) -; MIPS32R5EL-NEXT: lbu $1, 164($fp) -; MIPS32R5EL-NEXT: sw $1, 16($sp) -; MIPS32R5EL-NEXT: ld.d $w1, 16($sp) -; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EL-NEXT: lbu $1, 169($fp) -; MIPS32R5EL-NEXT: sw $1, 40($sp) -; MIPS32R5EL-NEXT: lbu $1, 168($fp) -; MIPS32R5EL-NEXT: sw $1, 32($sp) -; MIPS32R5EL-NEXT: ld.d $w1, 32($sp) -; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2] -; MIPS32R5EL-NEXT: sb $2, 117($sp) -; MIPS32R5EL-NEXT: sb $1, 116($sp) -; MIPS32R5EL-NEXT: lhu $2, 116($sp) +; MIPS32R5EL-NEXT: ld.b $w1, 96($sp) +; MIPS32R5EL-NEXT: addv.b $w0, $w0, $w1 +; MIPS32R5EL-NEXT: copy_u.h $2, $w0[0] ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 136($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: lw $ra, 140($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: addiu $sp, $sp, 144 +; MIPS32R5EL-NEXT: lw $fp, 120($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 124($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: addiu $sp, $sp, 128 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop entry: @@ -514,77 +455,64 @@ define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) { ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop ; -; MIPS32R5-LABEL: i8_4: -; MIPS32R5: # %bb.0: -; MIPS32R5-NEXT: addiu $sp, $sp, -16 -; MIPS32R5-NEXT: .cfi_def_cfa_offset 16 -; MIPS32R5-NEXT: sw $5, 8($sp) -; MIPS32R5-NEXT: sw $4, 12($sp) -; MIPS32R5-NEXT: lbu $1, 9($sp) -; MIPS32R5-NEXT: lbu $2, 8($sp) -; MIPS32R5-NEXT: insert.w $w0[0], $2 -; MIPS32R5-NEXT: insert.w $w0[1], $1 -; MIPS32R5-NEXT: lbu $1, 10($sp) -; MIPS32R5-NEXT: insert.w $w0[2], $1 -; MIPS32R5-NEXT: lbu $1, 11($sp) -; MIPS32R5-NEXT: insert.w $w0[3], $1 -; MIPS32R5-NEXT: lbu $1, 13($sp) -; MIPS32R5-NEXT: lbu $2, 12($sp) -; MIPS32R5-NEXT: insert.w $w1[0], $2 -; MIPS32R5-NEXT: insert.w $w1[1], $1 -; MIPS32R5-NEXT: lbu $1, 14($sp) -; MIPS32R5-NEXT: insert.w $w1[2], $1 -; MIPS32R5-NEXT: lbu $1, 15($sp) -; MIPS32R5-NEXT: insert.w $w1[3], $1 -; MIPS32R5-NEXT: addv.w $w0, $w1, $w0 -; MIPS32R5-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5-NEXT: copy_s.w $3, $w0[2] -; MIPS32R5-NEXT: copy_s.w $4, $w0[3] -; MIPS32R5-NEXT: sb $4, 7($sp) -; MIPS32R5-NEXT: sb $3, 6($sp) -; MIPS32R5-NEXT: sb $2, 5($sp) -; MIPS32R5-NEXT: sb $1, 4($sp) -; MIPS32R5-NEXT: lw $2, 4($sp) -; MIPS32R5-NEXT: addiu $sp, $sp, 16 -; MIPS32R5-NEXT: jr $ra -; MIPS32R5-NEXT: nop +; MIPS32R5EB-LABEL: i8_4: +; MIPS32R5EB: # %bb.0: +; MIPS32R5EB-NEXT: addiu $sp, $sp, -48 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48 +; MIPS32R5EB-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 +; MIPS32R5EB-NEXT: move $fp, $sp +; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 +; MIPS32R5EB-NEXT: addiu $1, $zero, -16 +; MIPS32R5EB-NEXT: and $sp, $sp, $1 +; MIPS32R5EB-NEXT: sw $5, 16($sp) +; MIPS32R5EB-NEXT: sw $4, 0($sp) +; MIPS32R5EB-NEXT: ld.b $w0, 16($sp) +; MIPS32R5EB-NEXT: ld.b $w1, 0($sp) +; MIPS32R5EB-NEXT: addv.b $w0, $w1, $w0 +; MIPS32R5EB-NEXT: shf.b $w0, $w0, 27 +; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0] +; MIPS32R5EB-NEXT: move $sp, $fp +; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: addiu $sp, $sp, 48 +; MIPS32R5EB-NEXT: jr $ra +; MIPS32R5EB-NEXT: nop ; -; MIPS64R5-LABEL: i8_4: -; MIPS64R5: # %bb.0: -; MIPS64R5-NEXT: daddiu $sp, $sp, -16 -; MIPS64R5-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R5-NEXT: sw $5, 8($sp) -; MIPS64R5-NEXT: sw $4, 12($sp) -; MIPS64R5-NEXT: lbu $1, 9($sp) -; MIPS64R5-NEXT: lbu $2, 8($sp) -; MIPS64R5-NEXT: insert.w $w0[0], $2 -; MIPS64R5-NEXT: insert.w $w0[1], $1 -; MIPS64R5-NEXT: lbu $1, 10($sp) -; MIPS64R5-NEXT: insert.w $w0[2], $1 -; MIPS64R5-NEXT: lbu $1, 11($sp) -; MIPS64R5-NEXT: insert.w $w0[3], $1 -; MIPS64R5-NEXT: lbu $1, 13($sp) -; MIPS64R5-NEXT: lbu $2, 12($sp) -; MIPS64R5-NEXT: insert.w $w1[0], $2 -; MIPS64R5-NEXT: insert.w $w1[1], $1 -; MIPS64R5-NEXT: lbu $1, 14($sp) -; MIPS64R5-NEXT: insert.w $w1[2], $1 -; MIPS64R5-NEXT: lbu $1, 15($sp) -; MIPS64R5-NEXT: insert.w $w1[3], $1 -; MIPS64R5-NEXT: addv.w $w0, $w1, $w0 -; MIPS64R5-NEXT: copy_s.w $1, $w0[0] -; MIPS64R5-NEXT: copy_s.w $2, $w0[1] -; MIPS64R5-NEXT: copy_s.w $3, $w0[2] -; MIPS64R5-NEXT: copy_s.w $4, $w0[3] -; MIPS64R5-NEXT: sb $4, 7($sp) -; MIPS64R5-NEXT: sb $3, 6($sp) -; MIPS64R5-NEXT: sb $2, 5($sp) -; MIPS64R5-NEXT: sb $1, 4($sp) -; MIPS64R5-NEXT: lw $2, 4($sp) -; MIPS64R5-NEXT: daddiu $sp, $sp, 16 -; MIPS64R5-NEXT: jr $ra -; MIPS64R5-NEXT: nop +; MIPS64R5EB-LABEL: i8_4: +; MIPS64R5EB: # %bb.0: +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EB-NEXT: sll $1, $5, 0 +; MIPS64R5EB-NEXT: sw $1, 16($sp) +; MIPS64R5EB-NEXT: sll $1, $4, 0 +; MIPS64R5EB-NEXT: sw $1, 0($sp) +; MIPS64R5EB-NEXT: ld.b $w0, 16($sp) +; MIPS64R5EB-NEXT: ld.b $w1, 0($sp) +; MIPS64R5EB-NEXT: addv.b $w0, $w1, $w0 +; MIPS64R5EB-NEXT: shf.b $w0, $w0, 27 +; MIPS64R5EB-NEXT: copy_s.w $2, $w0[0] +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: jr $ra +; MIPS64R5EB-NEXT: nop +; +; MIPS64R5EL-LABEL: i8_4: +; MIPS64R5EL: # %bb.0: +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EL-NEXT: sll $1, $5, 0 +; MIPS64R5EL-NEXT: sw $1, 16($sp) +; MIPS64R5EL-NEXT: sll $1, $4, 0 +; MIPS64R5EL-NEXT: sw $1, 0($sp) +; MIPS64R5EL-NEXT: ld.b $w0, 16($sp) +; MIPS64R5EL-NEXT: ld.b $w1, 0($sp) +; MIPS64R5EL-NEXT: addv.b $w0, $w1, $w0 +; MIPS64R5EL-NEXT: copy_s.w $2, $w0[0] +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: jr $ra +; MIPS64R5EL-NEXT: nop %1 = add <4 x i8> %a, %b ret <4 x i8> %1 } @@ -704,66 +632,16 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) { ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 ; MIPS32R5EB-NEXT: and $sp, $sp, $1 -; MIPS32R5EB-NEXT: sw $6, 24($sp) -; MIPS32R5EB-NEXT: lbu $1, 25($sp) -; MIPS32R5EB-NEXT: lbu $2, 24($sp) -; MIPS32R5EB-NEXT: sw $7, 28($sp) -; MIPS32R5EB-NEXT: insert.h $w0[0], $2 -; MIPS32R5EB-NEXT: insert.h $w0[1], $1 -; MIPS32R5EB-NEXT: lbu $1, 26($sp) -; MIPS32R5EB-NEXT: sw $4, 32($sp) -; MIPS32R5EB-NEXT: insert.h $w0[2], $1 -; MIPS32R5EB-NEXT: lbu $1, 27($sp) -; MIPS32R5EB-NEXT: insert.h $w0[3], $1 -; MIPS32R5EB-NEXT: lbu $1, 28($sp) -; MIPS32R5EB-NEXT: sw $5, 36($sp) -; MIPS32R5EB-NEXT: insert.h $w0[4], $1 -; MIPS32R5EB-NEXT: lbu $1, 33($sp) -; MIPS32R5EB-NEXT: lbu $2, 32($sp) -; MIPS32R5EB-NEXT: insert.h $w1[0], $2 -; MIPS32R5EB-NEXT: insert.h $w1[1], $1 -; MIPS32R5EB-NEXT: lbu $1, 29($sp) -; MIPS32R5EB-NEXT: lbu $2, 34($sp) -; MIPS32R5EB-NEXT: insert.h $w1[2], $2 -; MIPS32R5EB-NEXT: insert.h $w0[5], $1 -; MIPS32R5EB-NEXT: lbu $1, 35($sp) -; MIPS32R5EB-NEXT: lbu $2, 31($sp) -; MIPS32R5EB-NEXT: lbu $3, 30($sp) -; MIPS32R5EB-NEXT: lbu $4, 39($sp) -; MIPS32R5EB-NEXT: insert.h $w0[6], $3 -; MIPS32R5EB-NEXT: insert.h $w0[7], $2 -; MIPS32R5EB-NEXT: insert.h $w1[3], $1 -; MIPS32R5EB-NEXT: lbu $1, 36($sp) -; MIPS32R5EB-NEXT: insert.h $w1[4], $1 -; MIPS32R5EB-NEXT: lbu $1, 37($sp) -; MIPS32R5EB-NEXT: insert.h $w1[5], $1 -; MIPS32R5EB-NEXT: lbu $1, 38($sp) -; MIPS32R5EB-NEXT: insert.h $w1[6], $1 -; MIPS32R5EB-NEXT: insert.h $w1[7], $4 -; MIPS32R5EB-NEXT: addv.h $w0, $w1, $w0 -; MIPS32R5EB-NEXT: copy_s.h $1, $w0[0] -; MIPS32R5EB-NEXT: copy_s.h $2, $w0[1] -; MIPS32R5EB-NEXT: copy_s.h $3, $w0[2] -; MIPS32R5EB-NEXT: copy_s.h $4, $w0[3] -; MIPS32R5EB-NEXT: copy_s.h $5, $w0[4] -; MIPS32R5EB-NEXT: copy_s.h $6, $w0[5] -; MIPS32R5EB-NEXT: copy_s.h $7, $w0[6] -; MIPS32R5EB-NEXT: copy_s.h $8, $w0[7] -; MIPS32R5EB-NEXT: sb $8, 23($sp) -; MIPS32R5EB-NEXT: sb $7, 22($sp) -; MIPS32R5EB-NEXT: sb $6, 21($sp) -; MIPS32R5EB-NEXT: sb $5, 20($sp) -; MIPS32R5EB-NEXT: sb $4, 19($sp) -; MIPS32R5EB-NEXT: sb $3, 18($sp) -; MIPS32R5EB-NEXT: sb $2, 17($sp) -; MIPS32R5EB-NEXT: sb $1, 16($sp) -; MIPS32R5EB-NEXT: lw $1, 20($sp) -; MIPS32R5EB-NEXT: sw $1, 12($sp) -; MIPS32R5EB-NEXT: lw $1, 16($sp) -; MIPS32R5EB-NEXT: sw $1, 4($sp) -; MIPS32R5EB-NEXT: ld.w $w0, 0($sp) -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] +; MIPS32R5EB-NEXT: sw $7, 20($sp) +; MIPS32R5EB-NEXT: sw $6, 16($sp) +; MIPS32R5EB-NEXT: sw $5, 4($sp) +; MIPS32R5EB-NEXT: sw $4, 0($sp) +; MIPS32R5EB-NEXT: ld.b $w0, 16($sp) +; MIPS32R5EB-NEXT: ld.b $w1, 0($sp) +; MIPS32R5EB-NEXT: addv.b $w0, $w1, $w0 +; MIPS32R5EB-NEXT: shf.b $w0, $w0, 27 +; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0] +; MIPS32R5EB-NEXT: copy_s.w $3, $w0[1] ; MIPS32R5EB-NEXT: move $sp, $fp ; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload @@ -771,65 +649,35 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) { ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop ; -; MIPS64R5-LABEL: i8_8: -; MIPS64R5: # %bb.0: -; MIPS64R5-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5-NEXT: sd $5, 16($sp) -; MIPS64R5-NEXT: lbu $1, 17($sp) -; MIPS64R5-NEXT: lbu $2, 16($sp) -; MIPS64R5-NEXT: sd $4, 24($sp) -; MIPS64R5-NEXT: insert.h $w0[0], $2 -; MIPS64R5-NEXT: insert.h $w0[1], $1 -; MIPS64R5-NEXT: lbu $1, 18($sp) -; MIPS64R5-NEXT: insert.h $w0[2], $1 -; MIPS64R5-NEXT: lbu $1, 19($sp) -; MIPS64R5-NEXT: insert.h $w0[3], $1 -; MIPS64R5-NEXT: lbu $1, 20($sp) -; MIPS64R5-NEXT: insert.h $w0[4], $1 -; MIPS64R5-NEXT: lbu $1, 25($sp) -; MIPS64R5-NEXT: lbu $2, 24($sp) -; MIPS64R5-NEXT: insert.h $w1[0], $2 -; MIPS64R5-NEXT: insert.h $w1[1], $1 -; MIPS64R5-NEXT: lbu $1, 21($sp) -; MIPS64R5-NEXT: lbu $2, 26($sp) -; MIPS64R5-NEXT: insert.h $w1[2], $2 -; MIPS64R5-NEXT: insert.h $w0[5], $1 -; MIPS64R5-NEXT: lbu $1, 27($sp) -; MIPS64R5-NEXT: lbu $2, 23($sp) -; MIPS64R5-NEXT: lbu $3, 22($sp) -; MIPS64R5-NEXT: lbu $4, 31($sp) -; MIPS64R5-NEXT: insert.h $w0[6], $3 -; MIPS64R5-NEXT: insert.h $w0[7], $2 -; MIPS64R5-NEXT: insert.h $w1[3], $1 -; MIPS64R5-NEXT: lbu $1, 28($sp) -; MIPS64R5-NEXT: insert.h $w1[4], $1 -; MIPS64R5-NEXT: lbu $1, 29($sp) -; MIPS64R5-NEXT: insert.h $w1[5], $1 -; MIPS64R5-NEXT: lbu $1, 30($sp) -; MIPS64R5-NEXT: insert.h $w1[6], $1 -; MIPS64R5-NEXT: insert.h $w1[7], $4 -; MIPS64R5-NEXT: addv.h $w0, $w1, $w0 -; MIPS64R5-NEXT: copy_s.h $1, $w0[0] -; MIPS64R5-NEXT: copy_s.h $2, $w0[1] -; MIPS64R5-NEXT: copy_s.h $3, $w0[2] -; MIPS64R5-NEXT: copy_s.h $4, $w0[3] -; MIPS64R5-NEXT: copy_s.h $5, $w0[4] -; MIPS64R5-NEXT: copy_s.h $6, $w0[5] -; MIPS64R5-NEXT: copy_s.h $7, $w0[6] -; MIPS64R5-NEXT: copy_s.h $8, $w0[7] -; MIPS64R5-NEXT: sb $8, 15($sp) -; MIPS64R5-NEXT: sb $7, 14($sp) -; MIPS64R5-NEXT: sb $6, 13($sp) -; MIPS64R5-NEXT: sb $5, 12($sp) -; MIPS64R5-NEXT: sb $4, 11($sp) -; MIPS64R5-NEXT: sb $3, 10($sp) -; MIPS64R5-NEXT: sb $2, 9($sp) -; MIPS64R5-NEXT: sb $1, 8($sp) -; MIPS64R5-NEXT: ld $2, 8($sp) -; MIPS64R5-NEXT: daddiu $sp, $sp, 32 -; MIPS64R5-NEXT: jr $ra -; MIPS64R5-NEXT: nop +; MIPS64R5EB-LABEL: i8_8: +; MIPS64R5EB: # %bb.0: +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EB-NEXT: sd $5, 16($sp) +; MIPS64R5EB-NEXT: sd $4, 0($sp) +; MIPS64R5EB-NEXT: ld.b $w0, 16($sp) +; MIPS64R5EB-NEXT: ld.b $w1, 0($sp) +; MIPS64R5EB-NEXT: addv.b $w0, $w1, $w0 +; MIPS64R5EB-NEXT: shf.b $w0, $w0, 27 +; MIPS64R5EB-NEXT: shf.w $w0, $w0, 177 +; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0] +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: jr $ra +; MIPS64R5EB-NEXT: nop +; +; MIPS64R5EL-LABEL: i8_8: +; MIPS64R5EL: # %bb.0: +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EL-NEXT: sd $5, 16($sp) +; MIPS64R5EL-NEXT: sd $4, 0($sp) +; MIPS64R5EL-NEXT: ld.b $w0, 16($sp) +; MIPS64R5EL-NEXT: ld.b $w1, 0($sp) +; MIPS64R5EL-NEXT: addv.b $w0, $w1, $w0 +; MIPS64R5EL-NEXT: copy_s.d $2, $w0[0] +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: jr $ra +; MIPS64R5EL-NEXT: nop ; ; MIPS32R5EL-LABEL: i8_8: ; MIPS32R5EL: # %bb.0: @@ -843,66 +691,15 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) { ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 ; MIPS32R5EL-NEXT: and $sp, $sp, $1 -; MIPS32R5EL-NEXT: sw $6, 24($sp) -; MIPS32R5EL-NEXT: lbu $1, 25($sp) -; MIPS32R5EL-NEXT: lbu $2, 24($sp) -; MIPS32R5EL-NEXT: sw $7, 28($sp) -; MIPS32R5EL-NEXT: insert.h $w0[0], $2 -; MIPS32R5EL-NEXT: insert.h $w0[1], $1 -; MIPS32R5EL-NEXT: lbu $1, 26($sp) -; MIPS32R5EL-NEXT: sw $4, 32($sp) -; MIPS32R5EL-NEXT: insert.h $w0[2], $1 -; MIPS32R5EL-NEXT: lbu $1, 27($sp) -; MIPS32R5EL-NEXT: insert.h $w0[3], $1 -; MIPS32R5EL-NEXT: lbu $1, 28($sp) -; MIPS32R5EL-NEXT: sw $5, 36($sp) -; MIPS32R5EL-NEXT: insert.h $w0[4], $1 -; MIPS32R5EL-NEXT: lbu $1, 33($sp) -; MIPS32R5EL-NEXT: lbu $2, 32($sp) -; MIPS32R5EL-NEXT: insert.h $w1[0], $2 -; MIPS32R5EL-NEXT: insert.h $w1[1], $1 -; MIPS32R5EL-NEXT: lbu $1, 29($sp) -; MIPS32R5EL-NEXT: lbu $2, 34($sp) -; MIPS32R5EL-NEXT: insert.h $w1[2], $2 -; MIPS32R5EL-NEXT: insert.h $w0[5], $1 -; MIPS32R5EL-NEXT: lbu $1, 35($sp) -; MIPS32R5EL-NEXT: lbu $2, 31($sp) -; MIPS32R5EL-NEXT: lbu $3, 30($sp) -; MIPS32R5EL-NEXT: lbu $4, 39($sp) -; MIPS32R5EL-NEXT: insert.h $w0[6], $3 -; MIPS32R5EL-NEXT: insert.h $w0[7], $2 -; MIPS32R5EL-NEXT: insert.h $w1[3], $1 -; MIPS32R5EL-NEXT: lbu $1, 36($sp) -; MIPS32R5EL-NEXT: insert.h $w1[4], $1 -; MIPS32R5EL-NEXT: lbu $1, 37($sp) -; MIPS32R5EL-NEXT: insert.h $w1[5], $1 -; MIPS32R5EL-NEXT: lbu $1, 38($sp) -; MIPS32R5EL-NEXT: insert.h $w1[6], $1 -; MIPS32R5EL-NEXT: insert.h $w1[7], $4 -; MIPS32R5EL-NEXT: addv.h $w0, $w1, $w0 -; MIPS32R5EL-NEXT: copy_s.h $1, $w0[0] -; MIPS32R5EL-NEXT: copy_s.h $2, $w0[1] -; MIPS32R5EL-NEXT: copy_s.h $3, $w0[2] -; MIPS32R5EL-NEXT: copy_s.h $4, $w0[3] -; MIPS32R5EL-NEXT: copy_s.h $5, $w0[4] -; MIPS32R5EL-NEXT: copy_s.h $6, $w0[5] -; MIPS32R5EL-NEXT: copy_s.h $7, $w0[6] -; MIPS32R5EL-NEXT: copy_s.h $8, $w0[7] -; MIPS32R5EL-NEXT: sb $8, 23($sp) -; MIPS32R5EL-NEXT: sb $7, 22($sp) -; MIPS32R5EL-NEXT: sb $6, 21($sp) -; MIPS32R5EL-NEXT: sb $5, 20($sp) -; MIPS32R5EL-NEXT: sb $4, 19($sp) -; MIPS32R5EL-NEXT: sb $3, 18($sp) -; MIPS32R5EL-NEXT: sb $2, 17($sp) -; MIPS32R5EL-NEXT: sb $1, 16($sp) -; MIPS32R5EL-NEXT: lw $1, 20($sp) -; MIPS32R5EL-NEXT: sw $1, 8($sp) -; MIPS32R5EL-NEXT: lw $1, 16($sp) -; MIPS32R5EL-NEXT: sw $1, 0($sp) -; MIPS32R5EL-NEXT: ld.w $w0, 0($sp) +; MIPS32R5EL-NEXT: sw $7, 20($sp) +; MIPS32R5EL-NEXT: sw $6, 16($sp) +; MIPS32R5EL-NEXT: sw $5, 4($sp) +; MIPS32R5EL-NEXT: sw $4, 0($sp) +; MIPS32R5EL-NEXT: ld.b $w0, 16($sp) +; MIPS32R5EL-NEXT: ld.b $w1, 0($sp) +; MIPS32R5EL-NEXT: addv.b $w0, $w1, $w0 ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] +; MIPS32R5EL-NEXT: copy_s.w $3, $w0[1] ; MIPS32R5EL-NEXT: move $sp, $fp ; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload @@ -1221,102 +1018,86 @@ define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) { ; ; MIPS32R5EB-LABEL: i16_2: ; MIPS32R5EB: # %bb.0: -; MIPS32R5EB-NEXT: addiu $sp, $sp, -64 -; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 64 -; MIPS32R5EB-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: addiu $sp, $sp, -48 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48 +; MIPS32R5EB-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill ; MIPS32R5EB-NEXT: .cfi_offset 31, -4 ; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 ; MIPS32R5EB-NEXT: and $sp, $sp, $1 -; MIPS32R5EB-NEXT: sw $5, 48($sp) -; MIPS32R5EB-NEXT: sw $4, 52($sp) -; MIPS32R5EB-NEXT: lhu $1, 50($sp) -; MIPS32R5EB-NEXT: sw $1, 28($sp) -; MIPS32R5EB-NEXT: lhu $1, 48($sp) -; MIPS32R5EB-NEXT: sw $1, 20($sp) -; MIPS32R5EB-NEXT: lhu $1, 54($sp) -; MIPS32R5EB-NEXT: sw $1, 12($sp) -; MIPS32R5EB-NEXT: lhu $1, 52($sp) -; MIPS32R5EB-NEXT: sw $1, 4($sp) -; MIPS32R5EB-NEXT: ld.d $w0, 16($sp) -; MIPS32R5EB-NEXT: ld.d $w1, 0($sp) -; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0 -; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 -; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3] -; MIPS32R5EB-NEXT: sh $2, 46($sp) -; MIPS32R5EB-NEXT: sh $1, 44($sp) -; MIPS32R5EB-NEXT: lw $2, 44($sp) +; MIPS32R5EB-NEXT: sw $5, 16($sp) +; MIPS32R5EB-NEXT: sw $4, 0($sp) +; MIPS32R5EB-NEXT: ld.h $w0, 16($sp) +; MIPS32R5EB-NEXT: ld.h $w1, 0($sp) +; MIPS32R5EB-NEXT: addv.h $w0, $w1, $w0 +; MIPS32R5EB-NEXT: shf.h $w0, $w0, 177 +; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0] ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: addiu $sp, $sp, 64 +; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop ; -; MIPS64R5-LABEL: i16_2: -; MIPS64R5: # %bb.0: -; MIPS64R5-NEXT: daddiu $sp, $sp, -16 -; MIPS64R5-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R5-NEXT: sw $5, 8($sp) -; MIPS64R5-NEXT: sw $4, 12($sp) -; MIPS64R5-NEXT: lh $1, 10($sp) -; MIPS64R5-NEXT: lh $2, 8($sp) -; MIPS64R5-NEXT: insert.d $w0[0], $2 -; MIPS64R5-NEXT: insert.d $w0[1], $1 -; MIPS64R5-NEXT: lh $1, 14($sp) -; MIPS64R5-NEXT: lh $2, 12($sp) -; MIPS64R5-NEXT: insert.d $w1[0], $2 -; MIPS64R5-NEXT: insert.d $w1[1], $1 -; MIPS64R5-NEXT: addv.d $w0, $w1, $w0 -; MIPS64R5-NEXT: copy_s.d $1, $w0[0] -; MIPS64R5-NEXT: copy_s.d $2, $w0[1] -; MIPS64R5-NEXT: sh $2, 6($sp) -; MIPS64R5-NEXT: sh $1, 4($sp) -; MIPS64R5-NEXT: lw $2, 4($sp) -; MIPS64R5-NEXT: daddiu $sp, $sp, 16 -; MIPS64R5-NEXT: jr $ra -; MIPS64R5-NEXT: nop -; ; MIPS32R5EL-LABEL: i16_2: ; MIPS32R5EL: # %bb.0: -; MIPS32R5EL-NEXT: addiu $sp, $sp, -64 -; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 64 -; MIPS32R5EL-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: addiu $sp, $sp, -48 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48 +; MIPS32R5EL-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill ; MIPS32R5EL-NEXT: .cfi_offset 31, -4 ; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 ; MIPS32R5EL-NEXT: and $sp, $sp, $1 -; MIPS32R5EL-NEXT: sw $5, 48($sp) -; MIPS32R5EL-NEXT: sw $4, 52($sp) -; MIPS32R5EL-NEXT: lhu $1, 50($sp) -; MIPS32R5EL-NEXT: sw $1, 24($sp) -; MIPS32R5EL-NEXT: lhu $1, 48($sp) -; MIPS32R5EL-NEXT: sw $1, 16($sp) -; MIPS32R5EL-NEXT: lhu $1, 54($sp) -; MIPS32R5EL-NEXT: sw $1, 8($sp) -; MIPS32R5EL-NEXT: lhu $1, 52($sp) -; MIPS32R5EL-NEXT: sw $1, 0($sp) -; MIPS32R5EL-NEXT: ld.d $w0, 16($sp) -; MIPS32R5EL-NEXT: ld.d $w1, 0($sp) -; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0 -; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2] -; MIPS32R5EL-NEXT: sh $2, 46($sp) -; MIPS32R5EL-NEXT: sh $1, 44($sp) -; MIPS32R5EL-NEXT: lw $2, 44($sp) +; MIPS32R5EL-NEXT: sw $5, 16($sp) +; MIPS32R5EL-NEXT: sw $4, 0($sp) +; MIPS32R5EL-NEXT: ld.h $w0, 16($sp) +; MIPS32R5EL-NEXT: ld.h $w1, 0($sp) +; MIPS32R5EL-NEXT: addv.h $w0, $w1, $w0 +; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: addiu $sp, $sp, 64 +; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop +; +; MIPS64R5EB-LABEL: i16_2: +; MIPS64R5EB: # %bb.0: +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EB-NEXT: sll $1, $5, 0 +; MIPS64R5EB-NEXT: sw $1, 16($sp) +; MIPS64R5EB-NEXT: sll $1, $4, 0 +; MIPS64R5EB-NEXT: sw $1, 0($sp) +; MIPS64R5EB-NEXT: ld.h $w0, 16($sp) +; MIPS64R5EB-NEXT: ld.h $w1, 0($sp) +; MIPS64R5EB-NEXT: addv.h $w0, $w1, $w0 +; MIPS64R5EB-NEXT: shf.h $w0, $w0, 177 +; MIPS64R5EB-NEXT: copy_s.w $2, $w0[0] +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: jr $ra +; MIPS64R5EB-NEXT: nop +; +; MIPS64R5EL-LABEL: i16_2: +; MIPS64R5EL: # %bb.0: +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EL-NEXT: sll $1, $5, 0 +; MIPS64R5EL-NEXT: sw $1, 16($sp) +; MIPS64R5EL-NEXT: sll $1, $4, 0 +; MIPS64R5EL-NEXT: sw $1, 0($sp) +; MIPS64R5EL-NEXT: ld.h $w0, 16($sp) +; MIPS64R5EL-NEXT: ld.h $w1, 0($sp) +; MIPS64R5EL-NEXT: addv.h $w0, $w1, $w0 +; MIPS64R5EL-NEXT: copy_s.w $2, $w0[0] +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: jr $ra %1 = add <2 x i16> %a, %b ret <2 x i16> %1 } @@ -1384,84 +1165,50 @@ define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) { ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 ; MIPS32R5EB-NEXT: and $sp, $sp, $1 -; MIPS32R5EB-NEXT: sw $6, 24($sp) -; MIPS32R5EB-NEXT: sw $7, 28($sp) -; MIPS32R5EB-NEXT: lhu $1, 26($sp) -; MIPS32R5EB-NEXT: lhu $2, 24($sp) -; MIPS32R5EB-NEXT: sw $4, 32($sp) -; MIPS32R5EB-NEXT: insert.w $w0[0], $2 -; MIPS32R5EB-NEXT: insert.w $w0[1], $1 -; MIPS32R5EB-NEXT: lhu $1, 28($sp) -; MIPS32R5EB-NEXT: sw $5, 36($sp) -; MIPS32R5EB-NEXT: insert.w $w0[2], $1 -; MIPS32R5EB-NEXT: lhu $1, 30($sp) -; MIPS32R5EB-NEXT: insert.w $w0[3], $1 -; MIPS32R5EB-NEXT: lhu $1, 34($sp) -; MIPS32R5EB-NEXT: lhu $2, 32($sp) -; MIPS32R5EB-NEXT: insert.w $w1[0], $2 -; MIPS32R5EB-NEXT: insert.w $w1[1], $1 -; MIPS32R5EB-NEXT: lhu $1, 36($sp) -; MIPS32R5EB-NEXT: insert.w $w1[2], $1 -; MIPS32R5EB-NEXT: lhu $1, 38($sp) -; MIPS32R5EB-NEXT: insert.w $w1[3], $1 -; MIPS32R5EB-NEXT: addv.w $w0, $w1, $w0 -; MIPS32R5EB-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $3, $w0[2] -; MIPS32R5EB-NEXT: copy_s.w $4, $w0[3] -; MIPS32R5EB-NEXT: sh $4, 22($sp) -; MIPS32R5EB-NEXT: sh $3, 20($sp) -; MIPS32R5EB-NEXT: sh $2, 18($sp) -; MIPS32R5EB-NEXT: sh $1, 16($sp) -; MIPS32R5EB-NEXT: lw $1, 20($sp) -; MIPS32R5EB-NEXT: sw $1, 12($sp) -; MIPS32R5EB-NEXT: lw $1, 16($sp) -; MIPS32R5EB-NEXT: sw $1, 4($sp) -; MIPS32R5EB-NEXT: ld.w $w0, 0($sp) -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] +; MIPS32R5EB-NEXT: sw $7, 20($sp) +; MIPS32R5EB-NEXT: sw $6, 16($sp) +; MIPS32R5EB-NEXT: sw $5, 4($sp) +; MIPS32R5EB-NEXT: sw $4, 0($sp) +; MIPS32R5EB-NEXT: ld.h $w0, 16($sp) +; MIPS32R5EB-NEXT: ld.h $w1, 0($sp) +; MIPS32R5EB-NEXT: addv.h $w0, $w1, $w0 +; MIPS32R5EB-NEXT: shf.h $w0, $w0, 177 +; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0] +; MIPS32R5EB-NEXT: copy_s.w $3, $w0[1] ; MIPS32R5EB-NEXT: move $sp, $fp ; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EB-NEXT: jr $ra -; MIPS32R5EB-NEXT: nop ; -; MIPS64R5-LABEL: i16_4: -; MIPS64R5: # %bb.0: -; MIPS64R5-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5-NEXT: sd $5, 16($sp) -; MIPS64R5-NEXT: sd $4, 24($sp) -; MIPS64R5-NEXT: lhu $1, 18($sp) -; MIPS64R5-NEXT: lhu $2, 16($sp) -; MIPS64R5-NEXT: insert.w $w0[0], $2 -; MIPS64R5-NEXT: insert.w $w0[1], $1 -; MIPS64R5-NEXT: lhu $1, 20($sp) -; MIPS64R5-NEXT: insert.w $w0[2], $1 -; MIPS64R5-NEXT: lhu $1, 22($sp) -; MIPS64R5-NEXT: insert.w $w0[3], $1 -; MIPS64R5-NEXT: lhu $1, 26($sp) -; MIPS64R5-NEXT: lhu $2, 24($sp) -; MIPS64R5-NEXT: insert.w $w1[0], $2 -; MIPS64R5-NEXT: insert.w $w1[1], $1 -; MIPS64R5-NEXT: lhu $1, 28($sp) -; MIPS64R5-NEXT: insert.w $w1[2], $1 -; MIPS64R5-NEXT: lhu $1, 30($sp) -; MIPS64R5-NEXT: insert.w $w1[3], $1 -; MIPS64R5-NEXT: addv.w $w0, $w1, $w0 -; MIPS64R5-NEXT: copy_s.w $1, $w0[0] -; MIPS64R5-NEXT: copy_s.w $2, $w0[1] -; MIPS64R5-NEXT: copy_s.w $3, $w0[2] -; MIPS64R5-NEXT: copy_s.w $4, $w0[3] -; MIPS64R5-NEXT: sh $4, 14($sp) -; MIPS64R5-NEXT: sh $3, 12($sp) -; MIPS64R5-NEXT: sh $2, 10($sp) -; MIPS64R5-NEXT: sh $1, 8($sp) -; MIPS64R5-NEXT: ld $2, 8($sp) -; MIPS64R5-NEXT: daddiu $sp, $sp, 32 -; MIPS64R5-NEXT: jr $ra -; MIPS64R5-NEXT: nop +; MIPS64R5EB-LABEL: i16_4: +; MIPS64R5EB: # %bb.0: +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EB-NEXT: sd $5, 16($sp) +; MIPS64R5EB-NEXT: sd $4, 0($sp) +; MIPS64R5EB-NEXT: ld.h $w0, 16($sp) +; MIPS64R5EB-NEXT: ld.h $w1, 0($sp) +; MIPS64R5EB-NEXT: addv.h $w0, $w1, $w0 +; MIPS64R5EB-NEXT: shf.h $w0, $w0, 27 +; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0] +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: jr $ra +; MIPS64R5EB-NEXT: nop +; +; MIPS64R5EL-LABEL: i16_4: +; MIPS64R5EL: # %bb.0: +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EL-NEXT: sd $5, 16($sp) +; MIPS64R5EL-NEXT: sd $4, 0($sp) +; MIPS64R5EL-NEXT: ld.h $w0, 16($sp) +; MIPS64R5EL-NEXT: ld.h $w1, 0($sp) +; MIPS64R5EL-NEXT: addv.h $w0, $w1, $w0 +; MIPS64R5EL-NEXT: copy_s.d $2, $w0[0] +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: jr $ra +; MIPS64R5EL-NEXT: nop ; ; MIPS32R5EL-LABEL: i16_4: ; MIPS32R5EL: # %bb.0: @@ -1475,42 +1222,15 @@ define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) { ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 ; MIPS32R5EL-NEXT: and $sp, $sp, $1 -; MIPS32R5EL-NEXT: sw $6, 24($sp) -; MIPS32R5EL-NEXT: sw $7, 28($sp) -; MIPS32R5EL-NEXT: lhu $1, 26($sp) -; MIPS32R5EL-NEXT: lhu $2, 24($sp) -; MIPS32R5EL-NEXT: sw $4, 32($sp) -; MIPS32R5EL-NEXT: insert.w $w0[0], $2 -; MIPS32R5EL-NEXT: insert.w $w0[1], $1 -; MIPS32R5EL-NEXT: lhu $1, 28($sp) -; MIPS32R5EL-NEXT: sw $5, 36($sp) -; MIPS32R5EL-NEXT: insert.w $w0[2], $1 -; MIPS32R5EL-NEXT: lhu $1, 30($sp) -; MIPS32R5EL-NEXT: insert.w $w0[3], $1 -; MIPS32R5EL-NEXT: lhu $1, 34($sp) -; MIPS32R5EL-NEXT: lhu $2, 32($sp) -; MIPS32R5EL-NEXT: insert.w $w1[0], $2 -; MIPS32R5EL-NEXT: insert.w $w1[1], $1 -; MIPS32R5EL-NEXT: lhu $1, 36($sp) -; MIPS32R5EL-NEXT: insert.w $w1[2], $1 -; MIPS32R5EL-NEXT: lhu $1, 38($sp) -; MIPS32R5EL-NEXT: insert.w $w1[3], $1 -; MIPS32R5EL-NEXT: addv.w $w0, $w1, $w0 -; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] -; MIPS32R5EL-NEXT: copy_s.w $4, $w0[3] -; MIPS32R5EL-NEXT: sh $4, 22($sp) -; MIPS32R5EL-NEXT: sh $3, 20($sp) -; MIPS32R5EL-NEXT: sh $2, 18($sp) -; MIPS32R5EL-NEXT: sh $1, 16($sp) -; MIPS32R5EL-NEXT: lw $1, 20($sp) -; MIPS32R5EL-NEXT: sw $1, 8($sp) -; MIPS32R5EL-NEXT: lw $1, 16($sp) -; MIPS32R5EL-NEXT: sw $1, 0($sp) -; MIPS32R5EL-NEXT: ld.w $w0, 0($sp) +; MIPS32R5EL-NEXT: sw $7, 20($sp) +; MIPS32R5EL-NEXT: sw $6, 16($sp) +; MIPS32R5EL-NEXT: sw $5, 4($sp) +; MIPS32R5EL-NEXT: sw $4, 0($sp) +; MIPS32R5EL-NEXT: ld.h $w0, 16($sp) +; MIPS32R5EL-NEXT: ld.h $w1, 0($sp) +; MIPS32R5EL-NEXT: addv.h $w0, $w1, $w0 ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] +; MIPS32R5EL-NEXT: copy_s.w $3, $w0[1] ; MIPS32R5EL-NEXT: move $sp, $fp ; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload @@ -1730,16 +1450,15 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) { ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 ; MIPS32R5EB-NEXT: and $sp, $sp, $1 -; MIPS32R5EB-NEXT: sw $7, 28($sp) -; MIPS32R5EB-NEXT: sw $6, 20($sp) -; MIPS32R5EB-NEXT: sw $5, 12($sp) -; MIPS32R5EB-NEXT: sw $4, 4($sp) -; MIPS32R5EB-NEXT: ld.d $w0, 16($sp) -; MIPS32R5EB-NEXT: ld.d $w1, 0($sp) -; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0 -; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] +; MIPS32R5EB-NEXT: sw $7, 20($sp) +; MIPS32R5EB-NEXT: sw $6, 16($sp) +; MIPS32R5EB-NEXT: sw $5, 4($sp) +; MIPS32R5EB-NEXT: sw $4, 0($sp) +; MIPS32R5EB-NEXT: ld.w $w0, 16($sp) +; MIPS32R5EB-NEXT: ld.w $w1, 0($sp) +; MIPS32R5EB-NEXT: addv.w $w0, $w1, $w0 +; MIPS32R5EB-NEXT: copy_s.w $2, $w0[0] +; MIPS32R5EB-NEXT: copy_s.w $3, $w0[1] ; MIPS32R5EB-NEXT: move $sp, $fp ; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload @@ -1751,18 +1470,13 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) { ; MIPS64R5EB: # %bb.0: ; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 ; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EB-NEXT: dsrl $1, $5, 32 -; MIPS64R5EB-NEXT: insert.d $w0[0], $1 -; MIPS64R5EB-NEXT: insert.d $w0[1], $5 -; MIPS64R5EB-NEXT: dsrl $1, $4, 32 -; MIPS64R5EB-NEXT: insert.d $w1[0], $1 -; MIPS64R5EB-NEXT: insert.d $w1[1], $4 -; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0 -; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0] -; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1] -; MIPS64R5EB-NEXT: sw $2, 12($sp) -; MIPS64R5EB-NEXT: sw $1, 8($sp) -; MIPS64R5EB-NEXT: ld $2, 8($sp) +; MIPS64R5EB-NEXT: sd $5, 16($sp) +; MIPS64R5EB-NEXT: sd $4, 0($sp) +; MIPS64R5EB-NEXT: ld.w $w0, 16($sp) +; MIPS64R5EB-NEXT: ld.w $w1, 0($sp) +; MIPS64R5EB-NEXT: addv.w $w0, $w1, $w0 +; MIPS64R5EB-NEXT: shf.w $w0, $w0, 177 +; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0] ; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop @@ -1779,15 +1493,15 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) { ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 ; MIPS32R5EL-NEXT: and $sp, $sp, $1 -; MIPS32R5EL-NEXT: sw $7, 24($sp) +; MIPS32R5EL-NEXT: sw $7, 20($sp) ; MIPS32R5EL-NEXT: sw $6, 16($sp) -; MIPS32R5EL-NEXT: sw $5, 8($sp) +; MIPS32R5EL-NEXT: sw $5, 4($sp) ; MIPS32R5EL-NEXT: sw $4, 0($sp) -; MIPS32R5EL-NEXT: ld.d $w0, 16($sp) -; MIPS32R5EL-NEXT: ld.d $w1, 0($sp) -; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0 +; MIPS32R5EL-NEXT: ld.w $w0, 16($sp) +; MIPS32R5EL-NEXT: ld.w $w1, 0($sp) +; MIPS32R5EL-NEXT: addv.w $w0, $w1, $w0 ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] +; MIPS32R5EL-NEXT: copy_s.w $3, $w0[1] ; MIPS32R5EL-NEXT: move $sp, $fp ; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload @@ -1800,19 +1514,11 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) { ; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 ; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 ; MIPS64R5EL-NEXT: sd $5, 16($sp) -; MIPS64R5EL-NEXT: sd $4, 24($sp) -; MIPS64R5EL-NEXT: lw $1, 20($sp) -; MIPS64R5EL-NEXT: insert.d $w0[0], $5 -; MIPS64R5EL-NEXT: insert.d $w0[1], $1 -; MIPS64R5EL-NEXT: lw $1, 28($sp) -; MIPS64R5EL-NEXT: insert.d $w1[0], $4 -; MIPS64R5EL-NEXT: insert.d $w1[1], $1 -; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0 -; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0] -; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1] -; MIPS64R5EL-NEXT: sw $2, 12($sp) -; MIPS64R5EL-NEXT: sw $1, 8($sp) -; MIPS64R5EL-NEXT: ld $2, 8($sp) +; MIPS64R5EL-NEXT: sd $4, 0($sp) +; MIPS64R5EL-NEXT: ld.w $w0, 16($sp) +; MIPS64R5EL-NEXT: ld.w $w1, 0($sp) +; MIPS64R5EL-NEXT: addv.w $w0, $w1, $w0 +; MIPS64R5EL-NEXT: copy_s.d $2, $w0[0] ; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop @@ -2561,31 +2267,11 @@ define <8 x i8> @ret_8_i8() { ; ; MIPS32R5EB-LABEL: ret_8_i8: ; MIPS32R5EB: # %bb.0: -; MIPS32R5EB-NEXT: addiu $sp, $sp, -32 -; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 31, -4 -; MIPS32R5EB-NEXT: .cfi_offset 30, -8 -; MIPS32R5EB-NEXT: move $fp, $sp -; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 -; MIPS32R5EB-NEXT: addiu $1, $zero, -16 -; MIPS32R5EB-NEXT: and $sp, $sp, $1 ; MIPS32R5EB-NEXT: lui $1, %hi(gv8i8) ; MIPS32R5EB-NEXT: lw $2, %lo(gv8i8)($1) -; MIPS32R5EB-NEXT: sw $2, 4($sp) ; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv8i8) -; MIPS32R5EB-NEXT: lw $1, 4($1) -; MIPS32R5EB-NEXT: sw $1, 12($sp) -; MIPS32R5EB-NEXT: ld.w $w0, 0($sp) -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] -; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: addiu $sp, $sp, 32 +; MIPS32R5EB-NEXT: lw $3, 4($1) ; MIPS32R5EB-NEXT: jr $ra -; MIPS32R5EB-NEXT: nop ; ; MIPS64R5-LABEL: ret_8_i8: ; MIPS64R5: # %bb.0: @@ -2599,29 +2285,10 @@ define <8 x i8> @ret_8_i8() { ; ; MIPS32R5EL-LABEL: ret_8_i8: ; MIPS32R5EL: # %bb.0: -; MIPS32R5EL-NEXT: addiu $sp, $sp, -32 -; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 31, -4 -; MIPS32R5EL-NEXT: .cfi_offset 30, -8 -; MIPS32R5EL-NEXT: move $fp, $sp -; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 -; MIPS32R5EL-NEXT: addiu $1, $zero, -16 -; MIPS32R5EL-NEXT: and $sp, $sp, $1 ; MIPS32R5EL-NEXT: lui $1, %hi(gv8i8) ; MIPS32R5EL-NEXT: lw $2, %lo(gv8i8)($1) -; MIPS32R5EL-NEXT: sw $2, 0($sp) ; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv8i8) -; MIPS32R5EL-NEXT: lw $1, 4($1) -; MIPS32R5EL-NEXT: sw $1, 8($sp) -; MIPS32R5EL-NEXT: ld.w $w0, 0($sp) -; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] -; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: addiu $sp, $sp, 32 +; MIPS32R5EL-NEXT: lw $3, 4($1) ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop %1 = load <8 x i8>, ptr @gv8i8 @@ -2738,29 +2405,10 @@ define <4 x i16> @ret_4_i16() { ; ; MIPS32R5EB-LABEL: ret_4_i16: ; MIPS32R5EB: # %bb.0: -; MIPS32R5EB-NEXT: addiu $sp, $sp, -32 -; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 31, -4 -; MIPS32R5EB-NEXT: .cfi_offset 30, -8 -; MIPS32R5EB-NEXT: move $fp, $sp -; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 -; MIPS32R5EB-NEXT: addiu $1, $zero, -16 -; MIPS32R5EB-NEXT: and $sp, $sp, $1 ; MIPS32R5EB-NEXT: lui $1, %hi(gv4i16) ; MIPS32R5EB-NEXT: lw $2, %lo(gv4i16)($1) -; MIPS32R5EB-NEXT: sw $2, 4($sp) ; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv4i16) -; MIPS32R5EB-NEXT: lw $1, 4($1) -; MIPS32R5EB-NEXT: sw $1, 12($sp) -; MIPS32R5EB-NEXT: ld.w $w0, 0($sp) -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] -; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: addiu $sp, $sp, 32 +; MIPS32R5EB-NEXT: lw $3, 4($1) ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop ; @@ -2776,29 +2424,10 @@ define <4 x i16> @ret_4_i16() { ; ; MIPS32R5EL-LABEL: ret_4_i16: ; MIPS32R5EL: # %bb.0: -; MIPS32R5EL-NEXT: addiu $sp, $sp, -32 -; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 31, -4 -; MIPS32R5EL-NEXT: .cfi_offset 30, -8 -; MIPS32R5EL-NEXT: move $fp, $sp -; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 -; MIPS32R5EL-NEXT: addiu $1, $zero, -16 -; MIPS32R5EL-NEXT: and $sp, $sp, $1 ; MIPS32R5EL-NEXT: lui $1, %hi(gv4i16) ; MIPS32R5EL-NEXT: lw $2, %lo(gv4i16)($1) -; MIPS32R5EL-NEXT: sw $2, 0($sp) ; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv4i16) -; MIPS32R5EL-NEXT: lw $1, 4($1) -; MIPS32R5EL-NEXT: sw $1, 8($sp) -; MIPS32R5EL-NEXT: ld.w $w0, 0($sp) -; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] -; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: addiu $sp, $sp, 32 +; MIPS32R5EL-NEXT: lw $3, 4($1) ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop %1 = load <4 x i16>, ptr @gv4i16 @@ -2877,29 +2506,10 @@ define <2 x i32> @ret_2_i32() { ; ; MIPS32R5EB-LABEL: ret_2_i32: ; MIPS32R5EB: # %bb.0: -; MIPS32R5EB-NEXT: addiu $sp, $sp, -32 -; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 31, -4 -; MIPS32R5EB-NEXT: .cfi_offset 30, -8 -; MIPS32R5EB-NEXT: move $fp, $sp -; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 -; MIPS32R5EB-NEXT: addiu $1, $zero, -16 -; MIPS32R5EB-NEXT: and $sp, $sp, $1 ; MIPS32R5EB-NEXT: lui $1, %hi(gv2i32) ; MIPS32R5EB-NEXT: lw $2, %lo(gv2i32)($1) -; MIPS32R5EB-NEXT: sw $2, 4($sp) ; MIPS32R5EB-NEXT: addiu $1, $1, %lo(gv2i32) -; MIPS32R5EB-NEXT: lw $1, 4($1) -; MIPS32R5EB-NEXT: sw $1, 12($sp) -; MIPS32R5EB-NEXT: ld.w $w0, 0($sp) -; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] -; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: addiu $sp, $sp, 32 +; MIPS32R5EB-NEXT: lw $3, 4($1) ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop ; @@ -2915,29 +2525,10 @@ define <2 x i32> @ret_2_i32() { ; ; MIPS32R5EL-LABEL: ret_2_i32: ; MIPS32R5EL: # %bb.0: -; MIPS32R5EL-NEXT: addiu $sp, $sp, -32 -; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 31, -4 -; MIPS32R5EL-NEXT: .cfi_offset 30, -8 -; MIPS32R5EL-NEXT: move $fp, $sp -; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 -; MIPS32R5EL-NEXT: addiu $1, $zero, -16 -; MIPS32R5EL-NEXT: and $sp, $sp, $1 ; MIPS32R5EL-NEXT: lui $1, %hi(gv2i32) ; MIPS32R5EL-NEXT: lw $2, %lo(gv2i32)($1) -; MIPS32R5EL-NEXT: sw $2, 0($sp) ; MIPS32R5EL-NEXT: addiu $1, $1, %lo(gv2i32) -; MIPS32R5EL-NEXT: lw $1, 4($1) -; MIPS32R5EL-NEXT: sw $1, 8($sp) -; MIPS32R5EL-NEXT: ld.w $w0, 0($sp) -; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] -; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] -; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: addiu $sp, $sp, 32 +; MIPS32R5EL-NEXT: lw $3, 4($1) ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop %1 = load <2 x i32>, ptr @gv2i32 @@ -3424,9 +3015,9 @@ define void @call_i8_4() { ; ; MIPS32R5EB-LABEL: call_i8_4: ; MIPS32R5EB: # %bb.0: # %entry -; MIPS32R5EB-NEXT: addiu $sp, $sp, -32 -; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: addiu $sp, $sp, -24 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 24 +; MIPS32R5EB-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32R5EB-NEXT: .cfi_offset 31, -4 ; MIPS32R5EB-NEXT: lui $1, 1543 ; MIPS32R5EB-NEXT: ori $4, $1, 2314 @@ -3436,17 +3027,17 @@ define void @call_i8_4() { ; MIPS32R5EB-NEXT: nop ; MIPS32R5EB-NEXT: lui $1, %hi(gv4i8) ; MIPS32R5EB-NEXT: sw $2, %lo(gv4i8)($1) -; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: addiu $sp, $sp, 32 +; MIPS32R5EB-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: addiu $sp, $sp, 24 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop ; ; MIPS64R5EB-LABEL: call_i8_4: ; MIPS64R5EB: # %bb.0: # %entry -; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EB-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill -; MIPS64R5EB-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill ; MIPS64R5EB-NEXT: .cfi_offset 31, -8 ; MIPS64R5EB-NEXT: .cfi_offset 28, -16 ; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_4))) @@ -3461,9 +3052,9 @@ define void @call_i8_4() { ; MIPS64R5EB-NEXT: nop ; MIPS64R5EB-NEXT: ld $1, %got_disp(gv4i8)($gp) ; MIPS64R5EB-NEXT: sw $2, 0($1) -; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -3512,9 +3103,9 @@ define void @call_i8_4() { ; ; MIPS32R5EL-LABEL: call_i8_4: ; MIPS32R5EL: # %bb.0: # %entry -; MIPS32R5EL-NEXT: addiu $sp, $sp, -32 -; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: addiu $sp, $sp, -24 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 24 +; MIPS32R5EL-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32R5EL-NEXT: .cfi_offset 31, -4 ; MIPS32R5EL-NEXT: lui $1, 2569 ; MIPS32R5EL-NEXT: ori $4, $1, 1798 @@ -3523,17 +3114,17 @@ define void @call_i8_4() { ; MIPS32R5EL-NEXT: nop ; MIPS32R5EL-NEXT: lui $1, %hi(gv4i8) ; MIPS32R5EL-NEXT: sw $2, %lo(gv4i8)($1) -; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: addiu $sp, $sp, 32 +; MIPS32R5EL-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: addiu $sp, $sp, 24 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop ; ; MIPS64R5EL-LABEL: call_i8_4: ; MIPS64R5EL: # %bb.0: # %entry -; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EL-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill -; MIPS64R5EL-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill ; MIPS64R5EL-NEXT: .cfi_offset 31, -8 ; MIPS64R5EL-NEXT: .cfi_offset 28, -16 ; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_4))) @@ -3547,9 +3138,9 @@ define void @call_i8_4() { ; MIPS64R5EL-NEXT: nop ; MIPS64R5EL-NEXT: ld $1, %got_disp(gv4i8)($gp) ; MIPS64R5EL-NEXT: sw $2, 0($1) -; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop entry: @@ -3641,10 +3232,10 @@ define void @call_i8_8() { ; ; MIPS64R5EB-LABEL: call_i8_8: ; MIPS64R5EB: # %bb.0: # %entry -; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EB-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill -; MIPS64R5EB-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill ; MIPS64R5EB-NEXT: .cfi_offset 31, -8 ; MIPS64R5EB-NEXT: .cfi_offset 28, -16 ; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_8))) @@ -3667,9 +3258,9 @@ define void @call_i8_8() { ; MIPS64R5EB-NEXT: nop ; MIPS64R5EB-NEXT: ld $1, %got_disp(gv8i8)($gp) ; MIPS64R5EB-NEXT: sd $2, 0($1) -; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -3748,10 +3339,10 @@ define void @call_i8_8() { ; ; MIPS64R5EL-LABEL: call_i8_8: ; MIPS64R5EL: # %bb.0: # %entry -; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EL-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill -; MIPS64R5EL-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill ; MIPS64R5EL-NEXT: .cfi_offset 31, -8 ; MIPS64R5EL-NEXT: .cfi_offset 28, -16 ; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_8))) @@ -3769,9 +3360,9 @@ define void @call_i8_8() { ; MIPS64R5EL-NEXT: nop ; MIPS64R5EL-NEXT: ld $1, %got_disp(gv8i8)($gp) ; MIPS64R5EL-NEXT: sd $2, 0($1) -; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop entry: @@ -4059,9 +3650,9 @@ define void @calli16_2() { ; ; MIPS32R5EB-LABEL: calli16_2: ; MIPS32R5EB: # %bb.0: # %entry -; MIPS32R5EB-NEXT: addiu $sp, $sp, -32 -; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: addiu $sp, $sp, -24 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 24 +; MIPS32R5EB-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32R5EB-NEXT: .cfi_offset 31, -4 ; MIPS32R5EB-NEXT: lui $1, 6 ; MIPS32R5EB-NEXT: ori $4, $1, 7 @@ -4071,17 +3662,17 @@ define void @calli16_2() { ; MIPS32R5EB-NEXT: nop ; MIPS32R5EB-NEXT: lui $1, %hi(gv2i16) ; MIPS32R5EB-NEXT: sw $2, %lo(gv2i16)($1) -; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: addiu $sp, $sp, 32 +; MIPS32R5EB-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: addiu $sp, $sp, 24 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop ; ; MIPS64R5EB-LABEL: calli16_2: ; MIPS64R5EB: # %bb.0: # %entry -; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EB-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill -; MIPS64R5EB-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill ; MIPS64R5EB-NEXT: .cfi_offset 31, -8 ; MIPS64R5EB-NEXT: .cfi_offset 28, -16 ; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(calli16_2))) @@ -4096,9 +3687,9 @@ define void @calli16_2() { ; MIPS64R5EB-NEXT: nop ; MIPS64R5EB-NEXT: ld $1, %got_disp(gv2i16)($gp) ; MIPS64R5EB-NEXT: sw $2, 0($1) -; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -4149,9 +3740,9 @@ define void @calli16_2() { ; ; MIPS32R5EL-LABEL: calli16_2: ; MIPS32R5EL: # %bb.0: # %entry -; MIPS32R5EL-NEXT: addiu $sp, $sp, -32 -; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: addiu $sp, $sp, -24 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 24 +; MIPS32R5EL-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32R5EL-NEXT: .cfi_offset 31, -4 ; MIPS32R5EL-NEXT: lui $1, 7 ; MIPS32R5EL-NEXT: ori $4, $1, 6 @@ -4161,17 +3752,17 @@ define void @calli16_2() { ; MIPS32R5EL-NEXT: nop ; MIPS32R5EL-NEXT: lui $1, %hi(gv2i16) ; MIPS32R5EL-NEXT: sw $2, %lo(gv2i16)($1) -; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: addiu $sp, $sp, 32 +; MIPS32R5EL-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: addiu $sp, $sp, 24 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop ; ; MIPS64R5EL-LABEL: calli16_2: ; MIPS64R5EL: # %bb.0: # %entry -; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EL-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill -; MIPS64R5EL-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill ; MIPS64R5EL-NEXT: .cfi_offset 31, -8 ; MIPS64R5EL-NEXT: .cfi_offset 28, -16 ; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(calli16_2))) @@ -4186,9 +3777,9 @@ define void @calli16_2() { ; MIPS64R5EL-NEXT: nop ; MIPS64R5EL-NEXT: ld $1, %got_disp(gv2i16)($gp) ; MIPS64R5EL-NEXT: sw $2, 0($1) -; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop entry: @@ -4282,10 +3873,10 @@ define void @calli16_4() { ; ; MIPS64R5EB-LABEL: calli16_4: ; MIPS64R5EB: # %bb.0: # %entry -; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EB-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill -; MIPS64R5EB-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill ; MIPS64R5EB-NEXT: .cfi_offset 31, -8 ; MIPS64R5EB-NEXT: .cfi_offset 28, -16 ; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(calli16_4))) @@ -4308,9 +3899,9 @@ define void @calli16_4() { ; MIPS64R5EB-NEXT: nop ; MIPS64R5EB-NEXT: ld $1, %got_disp(gv4i16)($gp) ; MIPS64R5EB-NEXT: sd $2, 0($1) -; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -4398,10 +3989,10 @@ define void @calli16_4() { ; ; MIPS64R5EL-LABEL: calli16_4: ; MIPS64R5EL: # %bb.0: # %entry -; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EL-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill -; MIPS64R5EL-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill ; MIPS64R5EL-NEXT: .cfi_offset 31, -8 ; MIPS64R5EL-NEXT: .cfi_offset 28, -16 ; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(calli16_4))) @@ -4424,9 +4015,9 @@ define void @calli16_4() { ; MIPS64R5EL-NEXT: nop ; MIPS64R5EL-NEXT: ld $1, %got_disp(gv4i16)($gp) ; MIPS64R5EL-NEXT: sd $2, 0($1) -; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop entry: @@ -4807,10 +4398,10 @@ define void @calli32_2() { ; ; MIPS64R5EB-LABEL: calli32_2: ; MIPS64R5EB: # %bb.0: # %entry -; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EB-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill -; MIPS64R5EB-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill ; MIPS64R5EB-NEXT: .cfi_offset 31, -8 ; MIPS64R5EB-NEXT: .cfi_offset 28, -16 ; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(calli32_2))) @@ -4826,9 +4417,9 @@ define void @calli32_2() { ; MIPS64R5EB-NEXT: nop ; MIPS64R5EB-NEXT: ld $1, %got_disp(gv2i32)($gp) ; MIPS64R5EB-NEXT: sd $2, 0($1) -; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload -; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -4862,10 +4453,10 @@ define void @calli32_2() { ; ; MIPS64R5EL-LABEL: calli32_2: ; MIPS64R5EL: # %bb.0: # %entry -; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EL-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill -; MIPS64R5EL-NEXT: sd $gp, 16($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill ; MIPS64R5EL-NEXT: .cfi_offset 31, -8 ; MIPS64R5EL-NEXT: .cfi_offset 28, -16 ; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(calli32_2))) @@ -4882,9 +4473,9 @@ define void @calli32_2() { ; MIPS64R5EL-NEXT: nop ; MIPS64R5EL-NEXT: ld $1, %got_disp(gv2i32)($gp) ; MIPS64R5EL-NEXT: sd $2, 0($1) -; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload -; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll index 820259d7c7bc2..4fc3f57aa002d 100644 --- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll +++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll @@ -2066,46 +2066,38 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind { ret void } -; TODO: What code should be emitted? -define void @truncstore() nounwind { -; O32-LABEL: truncstore: +; After legalizing shorter vectors with legal element sizes, this test is +; no longer called truncstore. +define void @store_i8_32bit() nounwind { +; O32-LABEL: store_i8_32bit: ; O32: # %bb.0: ; O32-NEXT: lui $2, %hi(_gp_disp) ; O32-NEXT: addiu $2, $2, %lo(_gp_disp) ; O32-NEXT: addu $1, $2, $25 ; O32-NEXT: lw $1, %got(v4i8)($1) -; O32-NEXT: addiu $2, $zero, 255 -; O32-NEXT: sb $2, 3($1) -; O32-NEXT: sb $2, 2($1) -; O32-NEXT: sb $2, 1($1) +; O32-NEXT: addiu $2, $zero, -1 ; O32-NEXT: jr $ra -; O32-NEXT: sb $2, 0($1) +; O32-NEXT: sw $2, 0($1) ; -; N32-LABEL: truncstore: +; N32-LABEL: store_i8_32bit: ; N32: # %bb.0: -; N32-NEXT: lui $1, %hi(%neg(%gp_rel(truncstore))) +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(store_i8_32bit))) ; N32-NEXT: addu $1, $1, $25 -; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(truncstore))) +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(store_i8_32bit))) ; N32-NEXT: lw $1, %got_disp(v4i8)($1) -; N32-NEXT: addiu $2, $zero, 255 -; N32-NEXT: sb $2, 3($1) -; N32-NEXT: sb $2, 2($1) -; N32-NEXT: sb $2, 1($1) +; N32-NEXT: addiu $2, $zero, -1 ; N32-NEXT: jr $ra -; N32-NEXT: sb $2, 0($1) +; N32-NEXT: sw $2, 0($1) ; -; N64-LABEL: truncstore: +; N64-LABEL: store_i8_32bit: ; N64: # %bb.0: -; N64-NEXT: lui $1, %hi(%neg(%gp_rel(truncstore))) +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(store_i8_32bit))) ; N64-NEXT: daddu $1, $1, $25 -; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(truncstore))) +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(store_i8_32bit))) ; N64-NEXT: ld $1, %got_disp(v4i8)($1) -; N64-NEXT: addiu $2, $zero, 255 -; N64-NEXT: sb $2, 3($1) -; N64-NEXT: sb $2, 2($1) -; N64-NEXT: sb $2, 1($1) +; N64-NEXT: addiu $2, $zero, -1 ; N64-NEXT: jr $ra -; N64-NEXT: sb $2, 0($1) +; N64-NEXT: sw $2, 0($1) store volatile <4 x i8> , ptr @v4i8 ret void }