diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ab734ffb25dbd..9674de7738838 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1287,6 +1287,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_FMINIMUM: case ISD::FMAXIMUM: case ISD::VP_FMAXIMUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: case ISD::SDIV: case ISD::VP_SDIV: case ISD::UDIV: case ISD::VP_UDIV: case ISD::FDIV: case ISD::VP_FDIV: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 381794caeb85b..c1aefee3793c9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -861,12 +861,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_ROUND, MVT::v4bf16, Custom); // AArch64 has implementations of a lot of rounding-like FP operations. + // clang-format off for (auto Op : {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::FROUND, ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT, + ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, ISD::STRICT_FROUND, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, @@ -877,6 +879,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, if (Subtarget->hasFullFP16()) setOperationAction(Op, MVT::f16, Legal); } + // clang-format on // Basic strict FP operations are legal for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, @@ -1194,6 +1197,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, ISD::FEXP10, ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUM, ISD::FMAXIMUM, + ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FSQRT, ISD::STRICT_FRINT, @@ -1202,6 +1206,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM}) setOperationAction(Op, MVT::v1f64, Expand); // clang-format on + for (auto Op : {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL, @@ -1345,12 +1350,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } } - // AArch64 has implementations of a lot of rounding-like FP operations. for (auto Op : {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, - ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR, - ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, ISD::STRICT_FRINT, - ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) { + ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE, + ISD::STRICT_FFLOOR, ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, + ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, + ISD::STRICT_FROUNDEVEN}) { for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) setOperationAction(Op, Ty, Legal); if (Subtarget->hasFullFP16()) @@ -1961,10 +1966,10 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) { (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())) for (unsigned Opcode : {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM, - ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM, - ISD::STRICT_FMAXNUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, - ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA, - ISD::STRICT_FSQRT}) + ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, ISD::STRICT_FMINIMUM, + ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, + ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) setOperationAction(Opcode, VT, Legal); // Strict fp extend and trunc are legal diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 4374d92a5b7b1..33d05d6039b09 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5050,6 +5050,19 @@ def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(fminnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)), + (FMINNMDrr FPR64:$a, FPR64:$b)>; +def : Pat<(fminnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)), + (FMINNMSrr FPR32:$a, FPR32:$b)>; +def : Pat<(fminnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)), + (FMINNMHrr FPR16:$a, FPR16:$b)>; +def : Pat<(fmaxnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)), + (FMAXNMDrr FPR64:$a, FPR64:$b)>; +def : Pat<(fmaxnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)), + (FMAXNMSrr FPR32:$a, FPR32:$b)>; +def : Pat<(fmaxnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)), + (FMAXNMHrr FPR16:$a, FPR16:$b)>; + //===----------------------------------------------------------------------===// // Floating point three operand instructions. //===----------------------------------------------------------------------===// @@ -5554,6 +5567,27 @@ defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; +def : Pat<(v2f64 (fminnum_ieee (v2f64 V128:$Rn), (v2f64 V128:$Rm))), + (v2f64 (FMINNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rm)))>; +def : Pat<(v4f32 (fminnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))), + (v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>; +def : Pat<(v8f16 (fminnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))), + (v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>; +def : Pat<(v2f32 (fminnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))), + (v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>; +def : Pat<(v4f16 (fminnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))), + (v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>; +def : Pat<(v2f64 (fmaxnum_ieee (v2f64 V128:$Rn), (v2f64 V128:$Rm))), + (v2f64 (FMAXNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rm)))>; +def : Pat<(v4f32 (fmaxnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))), + (v4f32 (FMAXNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>; +def : Pat<(v8f16 (fmaxnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))), + (v8f16 (FMAXNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>; +def : Pat<(v2f32 (fmaxnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))), + (v2f32 (FMAXNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>; +def : Pat<(v4f16 (fmaxnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))), + (v4f16 (FMAXNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>; + // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the // instruction expects the addend first, while the fma intrinsic puts it last. defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", diff --git a/llvm/test/CodeGen/AArch64/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AArch64/combine_andor_with_cmps.ll index 783683cf7e844..89cb25f3d9d75 100644 --- a/llvm/test/CodeGen/AArch64/combine_andor_with_cmps.ll +++ b/llvm/test/CodeGen/AArch64/combine_andor_with_cmps.ll @@ -31,9 +31,6 @@ define i1 @test2(double %arg1, double %arg2, double %arg3) #0 { ret i1 %or1 } -; It is illegal to apply the optimization in the following two test cases -; because FMINNUM_IEEE and FMAXNUM_IEEE are not supported. - define i1 @test3(float %arg1, float %arg2, float %arg3) { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: @@ -41,8 +38,8 @@ define i1 @test3(float %arg1, float %arg2, float %arg3) { ; CHECK-NEXT: fadd s0, s0, s3 ; CHECK-NEXT: fmov s3, #2.00000000 ; CHECK-NEXT: fadd s1, s1, s3 -; CHECK-NEXT: fcmp s1, s2 -; CHECK-NEXT: fccmp s0, s2, #0, lt +; CHECK-NEXT: fmaxnm s0, s0, s1 +; CHECK-NEXT: fcmp s0, s2 ; CHECK-NEXT: cset w0, lt ; CHECK-NEXT: ret %add1 = fadd nnan float %arg1, 1.0 @@ -60,8 +57,8 @@ define i1 @test4(float %arg1, float %arg2, float %arg3) { ; CHECK-NEXT: fadd s0, s0, s3 ; CHECK-NEXT: fmov s3, #2.00000000 ; CHECK-NEXT: fadd s1, s1, s3 -; CHECK-NEXT: fcmp s1, s2 -; CHECK-NEXT: fccmp s0, s2, #4, gt +; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcmp s0, s2 ; CHECK-NEXT: cset w0, gt ; CHECK-NEXT: ret %add1 = fadd nnan float %arg1, 1.0 diff --git a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll new file mode 100644 index 0000000000000..b8406179f3cb3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll @@ -0,0 +1,474 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=aarch64 --mattr=+fullfp16 < %s | FileCheck %s --check-prefix=AARCH64 + +;;;;;;;;;;;;;;;; max_f64 +define double @max_nnan_f64(double %a, double %b) { +; AARCH64-LABEL: max_nnan_f64: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm d0, d0, d1 +; AARCH64-NEXT: ret +entry: + %c = call nnan double @llvm.maximumnum.f64(double %a, double %b) + ret double %c +} + +define <2 x double> @max_nnan_v2f64(<2 x double> %a, <2 x double> %b) { +; AARCH64-LABEL: max_nnan_v2f64: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v1.2d +; AARCH64-NEXT: ret +entry: + %c = call nnan <2 x double> @llvm.maximumnum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %c +} + +define <3 x double> @max_nnan_v3f64(<3 x double> %a, <3 x double> %b) { +; AARCH64-LABEL: max_nnan_v3f64: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: // kill: def $d3 killed $d3 def $q3 +; AARCH64-NEXT: // kill: def $d0 killed $d0 def $q0 +; AARCH64-NEXT: // kill: def $d4 killed $d4 def $q4 +; AARCH64-NEXT: // kill: def $d1 killed $d1 def $q1 +; AARCH64-NEXT: // kill: def $d2 killed $d2 def $q2 +; AARCH64-NEXT: // kill: def $d5 killed $d5 def $q5 +; AARCH64-NEXT: mov v3.d[1], v4.d[0] +; AARCH64-NEXT: mov v0.d[1], v1.d[0] +; AARCH64-NEXT: fmaxnm v2.2d, v2.2d, v5.2d +; AARCH64-NEXT: // kill: def $d2 killed $d2 killed $q2 +; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v3.2d +; AARCH64-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; AARCH64-NEXT: // kill: def $d0 killed $d0 killed $q0 +; AARCH64-NEXT: // kill: def $d1 killed $d1 killed $q1 +; AARCH64-NEXT: ret +entry: + %c = call nnan <3 x double> @llvm.maximumnum.v3f64(<3 x double> %a, <3 x double> %b) + ret <3 x double> %c +} + +define <4 x double> @max_nnan_v4f64(<4 x double> %a, <4 x double> %b) { +; AARCH64-LABEL: max_nnan_v4f64: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm v1.2d, v1.2d, v3.2d +; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v2.2d +; AARCH64-NEXT: ret +entry: + %c = call nnan <4 x double> @llvm.maximumnum.v4f64(<4 x double> %a, <4 x double> %b) + ret <4 x double> %c +} + +;;;;;;;;;;;;;;;;;; max_f32 +define float @max_nnan_f32(float %a, float %b) { +; AARCH64-LABEL: max_nnan_f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm s0, s0, s1 +; AARCH64-NEXT: ret +entry: + %c = call nnan float @llvm.maximumnum.f32(float %a, float %b) + ret float %c +} + +define <2 x float> @max_nnan_v2f32(<2 x float> %a, <2 x float> %b) { +; AARCH64-LABEL: max_nnan_v2f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm v0.2s, v0.2s, v1.2s +; AARCH64-NEXT: ret +entry: + %c = call nnan <2 x float> @llvm.maximumnum.v2f32(<2 x float> %a, <2 x float> %b) + ret <2 x float> %c +} + +define <3 x float> @max_nnan_v3f32(<3 x float> %a, <3 x float> %b) { +; AARCH64-LABEL: max_nnan_v3f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; AARCH64-NEXT: ret +entry: + %c = call nnan <3 x float> @llvm.maximumnum.v3f32(<3 x float> %a, <3 x float> %b) + ret <3 x float> %c +} + +define <4 x float> @max_nnan_v4f32(<4 x float> %a, <4 x float> %b) { +; AARCH64-LABEL: max_nnan_v4f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; AARCH64-NEXT: ret +entry: + %c = call nnan <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %c +} + +define <5 x float> @max_nnan_v5f32(<5 x float> %a, <5 x float> %b) { +; AARCH64-LABEL: max_nnan_v5f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: // kill: def $s0 killed $s0 def $q0 +; AARCH64-NEXT: // kill: def $s5 killed $s5 def $q5 +; AARCH64-NEXT: // kill: def $s1 killed $s1 def $q1 +; AARCH64-NEXT: // kill: def $s6 killed $s6 def $q6 +; AARCH64-NEXT: // kill: def $s2 killed $s2 def $q2 +; AARCH64-NEXT: // kill: def $s7 killed $s7 def $q7 +; AARCH64-NEXT: // kill: def $s3 killed $s3 def $q3 +; AARCH64-NEXT: mov x8, sp +; AARCH64-NEXT: // kill: def $s4 killed $s4 def $q4 +; AARCH64-NEXT: mov v0.s[1], v1.s[0] +; AARCH64-NEXT: mov v5.s[1], v6.s[0] +; AARCH64-NEXT: mov v0.s[2], v2.s[0] +; AARCH64-NEXT: mov v5.s[2], v7.s[0] +; AARCH64-NEXT: ldr s2, [sp, #8] +; AARCH64-NEXT: fmaxnm v4.4s, v4.4s, v2.4s +; AARCH64-NEXT: // kill: def $s4 killed $s4 killed $q4 +; AARCH64-NEXT: mov v0.s[3], v3.s[0] +; AARCH64-NEXT: ld1 { v5.s }[3], [x8] +; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v5.4s +; AARCH64-NEXT: mov s1, v0.s[1] +; AARCH64-NEXT: mov s2, v0.s[2] +; AARCH64-NEXT: mov s3, v0.s[3] +; AARCH64-NEXT: // kill: def $s0 killed $s0 killed $q0 +; AARCH64-NEXT: ret +entry: + %c = call nnan <5 x float> @llvm.maximumnum.v5f32(<5 x float> %a, <5 x float> %b) + ret <5 x float> %c +} + +define <8 x float> @max_nnan_v8f32(<8 x float> %a, <8 x float> %b) { +; AARCH64-LABEL: max_nnan_v8f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm v1.4s, v1.4s, v3.4s +; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v2.4s +; AARCH64-NEXT: ret +entry: + %c = call nnan <8 x float> @llvm.maximumnum.v8f32(<8 x float> %a, <8 x float> %b) + ret <8 x float> %c +} + +;;;;;;;;;;;;;;;;;; max_f16 +define half @max_nnan_f16(half %a, half %b) { +; AARCH64-LABEL: max_nnan_f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm h0, h0, h1 +; AARCH64-NEXT: ret +entry: + %c = call nnan half @llvm.maximumnum.f16(half %a, half %b) + ret half %c +} + +define <2 x half> @max_nnan_v2f16(<2 x half> %a, <2 x half> %b) { +; AARCH64-LABEL: max_nnan_v2f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm v0.4h, v0.4h, v1.4h +; AARCH64-NEXT: ret +entry: + %c = call nnan <2 x half> @llvm.maximumnum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %c +} + +define <4 x half> @max_nnan_v4f16(<4 x half> %a, <4 x half> %b) { +; AARCH64-LABEL: max_nnan_v4f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm v0.4h, v0.4h, v1.4h +; AARCH64-NEXT: ret +entry: + %c = call nnan <4 x half> @llvm.maximumnum.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %c +} + +define <8 x half> @max_nnan_v8f16(<8 x half> %a, <8 x half> %b) { +; AARCH64-LABEL: max_nnan_v8f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v1.8h +; AARCH64-NEXT: ret +entry: + %c = call nnan <8 x half> @llvm.maximumnum.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %c +} + +define <9 x half> @max_nnan_v9f16(<9 x half> %a, <9 x half> %b) { +; AARCH64-LABEL: max_nnan_v9f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: // kill: def $h0 killed $h0 def $q0 +; AARCH64-NEXT: // kill: def $h1 killed $h1 def $q1 +; AARCH64-NEXT: // kill: def $h2 killed $h2 def $q2 +; AARCH64-NEXT: add x9, sp, #16 +; AARCH64-NEXT: // kill: def $h3 killed $h3 def $q3 +; AARCH64-NEXT: // kill: def $h4 killed $h4 def $q4 +; AARCH64-NEXT: // kill: def $h5 killed $h5 def $q5 +; AARCH64-NEXT: // kill: def $h6 killed $h6 def $q6 +; AARCH64-NEXT: // kill: def $h7 killed $h7 def $q7 +; AARCH64-NEXT: mov v0.h[1], v1.h[0] +; AARCH64-NEXT: ldr h1, [sp, #8] +; AARCH64-NEXT: ld1 { v1.h }[1], [x9] +; AARCH64-NEXT: add x9, sp, #24 +; AARCH64-NEXT: mov v0.h[2], v2.h[0] +; AARCH64-NEXT: ldr h2, [sp, #72] +; AARCH64-NEXT: ld1 { v1.h }[2], [x9] +; AARCH64-NEXT: add x9, sp, #32 +; AARCH64-NEXT: mov v0.h[3], v3.h[0] +; AARCH64-NEXT: ld1 { v1.h }[3], [x9] +; AARCH64-NEXT: add x9, sp, #40 +; AARCH64-NEXT: ldr h3, [sp] +; AARCH64-NEXT: ld1 { v1.h }[4], [x9] +; AARCH64-NEXT: add x9, sp, #48 +; AARCH64-NEXT: fmaxnm v2.8h, v3.8h, v2.8h +; AARCH64-NEXT: mov v0.h[4], v4.h[0] +; AARCH64-NEXT: ld1 { v1.h }[5], [x9] +; AARCH64-NEXT: add x9, sp, #56 +; AARCH64-NEXT: str h2, [x8, #16] +; AARCH64-NEXT: mov v0.h[5], v5.h[0] +; AARCH64-NEXT: ld1 { v1.h }[6], [x9] +; AARCH64-NEXT: add x9, sp, #64 +; AARCH64-NEXT: mov v0.h[6], v6.h[0] +; AARCH64-NEXT: ld1 { v1.h }[7], [x9] +; AARCH64-NEXT: mov v0.h[7], v7.h[0] +; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v1.8h +; AARCH64-NEXT: str q0, [x8] +; AARCH64-NEXT: ret +entry: + %c = call nnan <9 x half> @llvm.maximumnum.v9f16(<9 x half> %a, <9 x half> %b) + ret <9 x half> %c +} + +define <16 x half> @max_nnan_v16f16(<16 x half> %a, <16 x half> %b) { +; AARCH64-LABEL: max_nnan_v16f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fmaxnm v1.8h, v1.8h, v3.8h +; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v2.8h +; AARCH64-NEXT: ret +entry: + %c = call nnan <16 x half> @llvm.maximumnum.v16f16(<16 x half> %a, <16 x half> %b) + ret <16 x half> %c +} + +;;;;;;;;;;;;;;;; min_f64 +define double @min_nnan_f64(double %a, double %b) { +; AARCH64-LABEL: min_nnan_f64: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm d0, d0, d1 +; AARCH64-NEXT: ret +entry: + %c = call nnan double @llvm.minimumnum.f64(double %a, double %b) + ret double %c +} + +define <2 x double> @min_nnan_v2f64(<2 x double> %a, <2 x double> %b) { +; AARCH64-LABEL: min_nnan_v2f64: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm v0.2d, v0.2d, v1.2d +; AARCH64-NEXT: ret +entry: + %c = call nnan <2 x double> @llvm.minimumnum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %c +} + +define <3 x double> @min_nnan_v3f64(<3 x double> %a, <3 x double> %b) { +; AARCH64-LABEL: min_nnan_v3f64: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: // kill: def $d3 killed $d3 def $q3 +; AARCH64-NEXT: // kill: def $d0 killed $d0 def $q0 +; AARCH64-NEXT: // kill: def $d4 killed $d4 def $q4 +; AARCH64-NEXT: // kill: def $d1 killed $d1 def $q1 +; AARCH64-NEXT: // kill: def $d2 killed $d2 def $q2 +; AARCH64-NEXT: // kill: def $d5 killed $d5 def $q5 +; AARCH64-NEXT: mov v3.d[1], v4.d[0] +; AARCH64-NEXT: mov v0.d[1], v1.d[0] +; AARCH64-NEXT: fminnm v2.2d, v2.2d, v5.2d +; AARCH64-NEXT: // kill: def $d2 killed $d2 killed $q2 +; AARCH64-NEXT: fminnm v0.2d, v0.2d, v3.2d +; AARCH64-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; AARCH64-NEXT: // kill: def $d0 killed $d0 killed $q0 +; AARCH64-NEXT: // kill: def $d1 killed $d1 killed $q1 +; AARCH64-NEXT: ret +entry: + %c = call nnan <3 x double> @llvm.minimumnum.v3f64(<3 x double> %a, <3 x double> %b) + ret <3 x double> %c +} + +define <4 x double> @min_nnan_v4f64(<4 x double> %a, <4 x double> %b) { +; AARCH64-LABEL: min_nnan_v4f64: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm v1.2d, v1.2d, v3.2d +; AARCH64-NEXT: fminnm v0.2d, v0.2d, v2.2d +; AARCH64-NEXT: ret +entry: + %c = call nnan <4 x double> @llvm.minimumnum.v4f64(<4 x double> %a, <4 x double> %b) + ret <4 x double> %c +} + +;;;;;;;;;;;;;;;;;; min_f32 +define float @min_nnan_f32(float %a, float %b) { +; AARCH64-LABEL: min_nnan_f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm s0, s0, s1 +; AARCH64-NEXT: ret +entry: + %c = call nnan float @llvm.minimumnum.f32(float %a, float %b) + ret float %c +} + +define <2 x float> @min_nnan_v2f32(<2 x float> %a, <2 x float> %b) { +; AARCH64-LABEL: min_nnan_v2f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm v0.2s, v0.2s, v1.2s +; AARCH64-NEXT: ret +entry: + %c = call nnan <2 x float> @llvm.minimumnum.v2f32(<2 x float> %a, <2 x float> %b) + ret <2 x float> %c +} + +define <3 x float> @min_nnan_v3f32(<3 x float> %a, <3 x float> %b) { +; AARCH64-LABEL: min_nnan_v3f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm v0.4s, v0.4s, v1.4s +; AARCH64-NEXT: ret +entry: + %c = call nnan <3 x float> @llvm.minimumnum.v3f32(<3 x float> %a, <3 x float> %b) + ret <3 x float> %c +} + +define <4 x float> @min_nnan_v4f32(<4 x float> %a, <4 x float> %b) { +; AARCH64-LABEL: min_nnan_v4f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm v0.4s, v0.4s, v1.4s +; AARCH64-NEXT: ret +entry: + %c = call nnan <4 x float> @llvm.minimumnum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %c +} + +define <5 x float> @min_nnan_v5f32(<5 x float> %a, <5 x float> %b) { +; AARCH64-LABEL: min_nnan_v5f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: // kill: def $s0 killed $s0 def $q0 +; AARCH64-NEXT: // kill: def $s5 killed $s5 def $q5 +; AARCH64-NEXT: // kill: def $s1 killed $s1 def $q1 +; AARCH64-NEXT: // kill: def $s6 killed $s6 def $q6 +; AARCH64-NEXT: // kill: def $s2 killed $s2 def $q2 +; AARCH64-NEXT: // kill: def $s7 killed $s7 def $q7 +; AARCH64-NEXT: // kill: def $s3 killed $s3 def $q3 +; AARCH64-NEXT: mov x8, sp +; AARCH64-NEXT: // kill: def $s4 killed $s4 def $q4 +; AARCH64-NEXT: mov v0.s[1], v1.s[0] +; AARCH64-NEXT: mov v5.s[1], v6.s[0] +; AARCH64-NEXT: mov v0.s[2], v2.s[0] +; AARCH64-NEXT: mov v5.s[2], v7.s[0] +; AARCH64-NEXT: ldr s2, [sp, #8] +; AARCH64-NEXT: fminnm v4.4s, v4.4s, v2.4s +; AARCH64-NEXT: // kill: def $s4 killed $s4 killed $q4 +; AARCH64-NEXT: mov v0.s[3], v3.s[0] +; AARCH64-NEXT: ld1 { v5.s }[3], [x8] +; AARCH64-NEXT: fminnm v0.4s, v0.4s, v5.4s +; AARCH64-NEXT: mov s1, v0.s[1] +; AARCH64-NEXT: mov s2, v0.s[2] +; AARCH64-NEXT: mov s3, v0.s[3] +; AARCH64-NEXT: // kill: def $s0 killed $s0 killed $q0 +; AARCH64-NEXT: ret +entry: + %c = call nnan <5 x float> @llvm.minimumnum.v5f32(<5 x float> %a, <5 x float> %b) + ret <5 x float> %c +} + +define <8 x float> @min_nnan_v8f32(<8 x float> %a, <8 x float> %b) { +; AARCH64-LABEL: min_nnan_v8f32: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm v1.4s, v1.4s, v3.4s +; AARCH64-NEXT: fminnm v0.4s, v0.4s, v2.4s +; AARCH64-NEXT: ret +entry: + %c = call nnan <8 x float> @llvm.minimumnum.v8f32(<8 x float> %a, <8 x float> %b) + ret <8 x float> %c +} + +;;;;;;;;;;;;;;;;;; min_f16 +define half @min_nnan_f16(half %a, half %b) { +; AARCH64-LABEL: min_nnan_f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm h0, h0, h1 +; AARCH64-NEXT: ret +entry: + %c = call nnan half @llvm.minimumnum.f16(half %a, half %b) + ret half %c +} + +define <2 x half> @min_nnan_v2f16(<2 x half> %a, <2 x half> %b) { +; AARCH64-LABEL: min_nnan_v2f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm v0.4h, v0.4h, v1.4h +; AARCH64-NEXT: ret +entry: + %c = call nnan <2 x half> @llvm.minimumnum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %c +} + +define <4 x half> @min_nnan_v4f16(<4 x half> %a, <4 x half> %b) { +; AARCH64-LABEL: min_nnan_v4f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm v0.4h, v0.4h, v1.4h +; AARCH64-NEXT: ret +entry: + %c = call nnan <4 x half> @llvm.minimumnum.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %c +} + +define <8 x half> @min_nnan_v8f16(<8 x half> %a, <8 x half> %b) { +; AARCH64-LABEL: min_nnan_v8f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm v0.8h, v0.8h, v1.8h +; AARCH64-NEXT: ret +entry: + %c = call nnan <8 x half> @llvm.minimumnum.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %c +} + +define <9 x half> @min_nnan_v9f16(<9 x half> %a, <9 x half> %b) { +; AARCH64-LABEL: min_nnan_v9f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: // kill: def $h0 killed $h0 def $q0 +; AARCH64-NEXT: // kill: def $h1 killed $h1 def $q1 +; AARCH64-NEXT: // kill: def $h2 killed $h2 def $q2 +; AARCH64-NEXT: add x9, sp, #16 +; AARCH64-NEXT: // kill: def $h3 killed $h3 def $q3 +; AARCH64-NEXT: // kill: def $h4 killed $h4 def $q4 +; AARCH64-NEXT: // kill: def $h5 killed $h5 def $q5 +; AARCH64-NEXT: // kill: def $h6 killed $h6 def $q6 +; AARCH64-NEXT: // kill: def $h7 killed $h7 def $q7 +; AARCH64-NEXT: mov v0.h[1], v1.h[0] +; AARCH64-NEXT: ldr h1, [sp, #8] +; AARCH64-NEXT: ld1 { v1.h }[1], [x9] +; AARCH64-NEXT: add x9, sp, #24 +; AARCH64-NEXT: mov v0.h[2], v2.h[0] +; AARCH64-NEXT: ldr h2, [sp, #72] +; AARCH64-NEXT: ld1 { v1.h }[2], [x9] +; AARCH64-NEXT: add x9, sp, #32 +; AARCH64-NEXT: mov v0.h[3], v3.h[0] +; AARCH64-NEXT: ld1 { v1.h }[3], [x9] +; AARCH64-NEXT: add x9, sp, #40 +; AARCH64-NEXT: ldr h3, [sp] +; AARCH64-NEXT: ld1 { v1.h }[4], [x9] +; AARCH64-NEXT: add x9, sp, #48 +; AARCH64-NEXT: fminnm v2.8h, v3.8h, v2.8h +; AARCH64-NEXT: mov v0.h[4], v4.h[0] +; AARCH64-NEXT: ld1 { v1.h }[5], [x9] +; AARCH64-NEXT: add x9, sp, #56 +; AARCH64-NEXT: str h2, [x8, #16] +; AARCH64-NEXT: mov v0.h[5], v5.h[0] +; AARCH64-NEXT: ld1 { v1.h }[6], [x9] +; AARCH64-NEXT: add x9, sp, #64 +; AARCH64-NEXT: mov v0.h[6], v6.h[0] +; AARCH64-NEXT: ld1 { v1.h }[7], [x9] +; AARCH64-NEXT: mov v0.h[7], v7.h[0] +; AARCH64-NEXT: fminnm v0.8h, v0.8h, v1.8h +; AARCH64-NEXT: str q0, [x8] +; AARCH64-NEXT: ret +entry: + %c = call nnan <9 x half> @llvm.minimumnum.v9f16(<9 x half> %a, <9 x half> %b) + ret <9 x half> %c +} + +define <16 x half> @min_nnan_v16f16(<16 x half> %a, <16 x half> %b) { +; AARCH64-LABEL: min_nnan_v16f16: +; AARCH64: // %bb.0: // %entry +; AARCH64-NEXT: fminnm v1.8h, v1.8h, v3.8h +; AARCH64-NEXT: fminnm v0.8h, v0.8h, v2.8h +; AARCH64-NEXT: ret +entry: + %c = call nnan <16 x half> @llvm.minimumnum.v16f16(<16 x half> %a, <16 x half> %b) + ret <16 x half> %c +}