Skip to content

Commit 4a8f2f2

Browse files
authored
[Legalizer] Expand fmaximum and fminimum (#67301)
According to langref, llvm.maximum/minimum has -0.0 < +0.0 semantics and propagates NaN. Expand the nodes on targets not supporting the operation, by adding extra check for NaN and using is_fpclass to check zero signs.
1 parent e2b8af7 commit 4a8f2f2

File tree

9 files changed

+1040
-31
lines changed

9 files changed

+1040
-31
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5238,6 +5238,9 @@ class TargetLowering : public TargetLoweringBase {
52385238
/// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
52395239
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
52405240

5241+
/// Expand fminimum/fmaximum into multiple comparison with selects.
5242+
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const;
5243+
52415244
/// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
52425245
/// \param N Node to expand
52435246
/// \returns The expansion result

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3556,6 +3556,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
35563556
Results.push_back(Expanded);
35573557
break;
35583558
}
3559+
case ISD::FMINIMUM:
3560+
case ISD::FMAXIMUM: {
3561+
if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG))
3562+
Results.push_back(Expanded);
3563+
break;
3564+
}
35593565
case ISD::FSIN:
35603566
case ISD::FCOS: {
35613567
EVT VT = Node->getValueType(0);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
10491049
return;
10501050
}
10511051
break;
1052+
case ISD::FMINIMUM:
1053+
case ISD::FMAXIMUM:
1054+
if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)) {
1055+
Results.push_back(Expanded);
1056+
return;
1057+
}
1058+
break;
10521059
case ISD::SMIN:
10531060
case ISD::SMAX:
10541061
case ISD::UMIN:

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8381,6 +8381,64 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
83818381
return SDValue();
83828382
}
83838383

8384+
SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8385+
SelectionDAG &DAG) const {
8386+
SDLoc DL(N);
8387+
SDValue LHS = N->getOperand(0);
8388+
SDValue RHS = N->getOperand(1);
8389+
unsigned Opc = N->getOpcode();
8390+
EVT VT = N->getValueType(0);
8391+
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8392+
bool IsMax = Opc == ISD::FMAXIMUM;
8393+
8394+
if (VT.isVector() &&
8395+
isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType()))
8396+
return SDValue();
8397+
8398+
// First, implement comparison not propagating NaN. If no native fmin or fmax
8399+
// available, use plain select with setcc instead.
8400+
SDValue MinMax;
8401+
unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8402+
unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8403+
if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8404+
MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS);
8405+
} else if (isOperationLegalOrCustom(CompOpc, VT)) {
8406+
MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS);
8407+
} else {
8408+
// NaN (if exists) will be propagated later, so orderness doesn't matter.
8409+
SDValue Compare =
8410+
DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8411+
MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS);
8412+
}
8413+
8414+
// Propagate any NaN of both operands
8415+
if (!N->getFlags().hasNoNaNs() &&
8416+
(!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8417+
ConstantFP *FPNaN = ConstantFP::get(
8418+
*DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT)));
8419+
MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8420+
DAG.getConstantFP(*FPNaN, DL, VT), MinMax);
8421+
}
8422+
8423+
// fminimum/fmaximum requires -0.0 less than +0.0
8424+
if (!N->getFlags().hasNoSignedZeros() && !DAG.isKnownNeverZeroFloat(RHS) &&
8425+
!DAG.isKnownNeverZeroFloat(LHS)) {
8426+
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8427+
DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8428+
SDValue TestZero =
8429+
DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8430+
SDValue LCmp = DAG.getSelect(
8431+
DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8432+
MinMax);
8433+
SDValue RCmp = DAG.getSelect(
8434+
DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8435+
LCmp);
8436+
MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax);
8437+
}
8438+
8439+
return MinMax;
8440+
}
8441+
83848442
/// Returns a true value if if this FPClassTest can be performed with an ordered
83858443
/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
83868444
/// std::nullopt if it cannot be performed as a compare with 0.

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,15 +1555,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
15551555

15561556
if (Subtarget->hasNEON()) {
15571557
// vmin and vmax aren't available in a scalar form, so we can use
1558-
// a NEON instruction with an undef lane instead. This has a performance
1559-
// penalty on some cores, so we don't do this unless we have been
1560-
// asked to by the core tuning model.
1561-
if (Subtarget->useNEONForSinglePrecisionFP()) {
1562-
setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1563-
setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1564-
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1565-
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
1566-
}
1558+
// a NEON instruction with an undef lane instead.
1559+
setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1560+
setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1561+
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1562+
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
15671563
setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
15681564
setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
15691565
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -524,8 +524,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
524524
if (Subtarget.is64Bit())
525525
setOperationAction(ISD::FPOWI, MVT::i32, Custom);
526526

527-
if (!Subtarget.hasStdExtZfa())
528-
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
527+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
528+
Subtarget.hasStdExtZfa() ? Legal : Custom);
529529
}
530530

531531
if (Subtarget.hasStdExtFOrZfinx()) {
@@ -548,10 +548,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
548548
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
549549
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
550550

551-
if (Subtarget.hasStdExtZfa())
551+
if (Subtarget.hasStdExtZfa()) {
552552
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
553-
else
553+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
554+
} else {
554555
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
556+
}
555557
}
556558

557559
if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
@@ -566,6 +568,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
566568
if (Subtarget.hasStdExtZfa()) {
567569
setOperationAction(FPRndMode, MVT::f64, Legal);
568570
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
571+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
569572
} else {
570573
if (Subtarget.is64Bit())
571574
setOperationAction(FPRndMode, MVT::f64, Custom);

llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,10 @@ define float @fminnum32_intrinsic(float %x, float %y) {
4646
define float @fminnum32_nsz_intrinsic(float %x, float %y) {
4747
; ARMV7-LABEL: fminnum32_nsz_intrinsic:
4848
; ARMV7: @ %bb.0:
49-
; ARMV7-NEXT: vmov s0, r0
50-
; ARMV7-NEXT: vmov s2, r1
51-
; ARMV7-NEXT: vcmp.f32 s0, s2
52-
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
53-
; ARMV7-NEXT: vmovlt.f32 s2, s0
54-
; ARMV7-NEXT: vmov r0, s2
49+
; ARMV7-NEXT: vmov s0, r1
50+
; ARMV7-NEXT: vmov s2, r0
51+
; ARMV7-NEXT: vmin.f32 d0, d1, d0
52+
; ARMV7-NEXT: vmov r0, s0
5553
; ARMV7-NEXT: bx lr
5654
;
5755
; ARMV8-LABEL: fminnum32_nsz_intrinsic:
@@ -78,9 +76,7 @@ define float @fminnum32_non_zero_intrinsic(float %x) {
7876
; ARMV7: @ %bb.0:
7977
; ARMV7-NEXT: vmov.f32 s0, #-1.000000e+00
8078
; ARMV7-NEXT: vmov s2, r0
81-
; ARMV7-NEXT: vcmp.f32 s2, s0
82-
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
83-
; ARMV7-NEXT: vmovlt.f32 s0, s2
79+
; ARMV7-NEXT: vmin.f32 d0, d1, d0
8480
; ARMV7-NEXT: vmov r0, s0
8581
; ARMV7-NEXT: bx lr
8682
;
@@ -136,12 +132,10 @@ define float @fmaxnum32_intrinsic(float %x, float %y) {
136132
define float @fmaxnum32_nsz_intrinsic(float %x, float %y) {
137133
; ARMV7-LABEL: fmaxnum32_nsz_intrinsic:
138134
; ARMV7: @ %bb.0:
139-
; ARMV7-NEXT: vmov s0, r0
140-
; ARMV7-NEXT: vmov s2, r1
141-
; ARMV7-NEXT: vcmp.f32 s0, s2
142-
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
143-
; ARMV7-NEXT: vmovgt.f32 s2, s0
144-
; ARMV7-NEXT: vmov r0, s2
135+
; ARMV7-NEXT: vmov s0, r1
136+
; ARMV7-NEXT: vmov s2, r0
137+
; ARMV7-NEXT: vmax.f32 d0, d1, d0
138+
; ARMV7-NEXT: vmov r0, s0
145139
; ARMV7-NEXT: bx lr
146140
;
147141
; ARMV8-LABEL: fmaxnum32_nsz_intrinsic:
@@ -210,9 +204,7 @@ define float @fmaxnum32_non_zero_intrinsic(float %x) {
210204
; ARMV7: @ %bb.0:
211205
; ARMV7-NEXT: vmov.f32 s0, #1.000000e+00
212206
; ARMV7-NEXT: vmov s2, r0
213-
; ARMV7-NEXT: vcmp.f32 s2, s0
214-
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
215-
; ARMV7-NEXT: vmovgt.f32 s0, s2
207+
; ARMV7-NEXT: vmax.f32 d0, d1, d0
216208
; ARMV7-NEXT: vmov r0, s0
217209
; ARMV7-NEXT: bx lr
218210
;
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
3+
4+
define fp128 @f128_minimum(fp128 %a, fp128 %b) {
5+
; CHECK-LABEL: f128_minimum:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: xscmpuqp 0, 2, 3
8+
; CHECK-NEXT: vmr 4, 2
9+
; CHECK-NEXT: bge 0, .LBB0_8
10+
; CHECK-NEXT: # %bb.1: # %entry
11+
; CHECK-NEXT: bun 0, .LBB0_9
12+
; CHECK-NEXT: .LBB0_2: # %entry
13+
; CHECK-NEXT: xststdcqp 0, 2, 4
14+
; CHECK-NEXT: bc 4, 2, .LBB0_10
15+
; CHECK-NEXT: .LBB0_3: # %entry
16+
; CHECK-NEXT: xststdcqp 0, 3, 4
17+
; CHECK-NEXT: bc 12, 2, .LBB0_5
18+
; CHECK-NEXT: .LBB0_4: # %entry
19+
; CHECK-NEXT: vmr 3, 2
20+
; CHECK-NEXT: .LBB0_5: # %entry
21+
; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
22+
; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l
23+
; CHECK-NEXT: lxv 34, 0(3)
24+
; CHECK-NEXT: xscmpuqp 0, 4, 2
25+
; CHECK-NEXT: beq 0, .LBB0_7
26+
; CHECK-NEXT: # %bb.6: # %entry
27+
; CHECK-NEXT: vmr 3, 4
28+
; CHECK-NEXT: .LBB0_7: # %entry
29+
; CHECK-NEXT: vmr 2, 3
30+
; CHECK-NEXT: blr
31+
; CHECK-NEXT: .LBB0_8: # %entry
32+
; CHECK-NEXT: vmr 4, 3
33+
; CHECK-NEXT: bnu 0, .LBB0_2
34+
; CHECK-NEXT: .LBB0_9:
35+
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
36+
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
37+
; CHECK-NEXT: lxv 36, 0(3)
38+
; CHECK-NEXT: xststdcqp 0, 2, 4
39+
; CHECK-NEXT: bc 12, 2, .LBB0_3
40+
; CHECK-NEXT: .LBB0_10: # %entry
41+
; CHECK-NEXT: vmr 2, 4
42+
; CHECK-NEXT: xststdcqp 0, 3, 4
43+
; CHECK-NEXT: bc 4, 2, .LBB0_4
44+
; CHECK-NEXT: b .LBB0_5
45+
entry:
46+
%m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b)
47+
ret fp128 %m
48+
}
49+
50+
define fp128 @f128_maximum(fp128 %a, fp128 %b) {
51+
; CHECK-LABEL: f128_maximum:
52+
; CHECK: # %bb.0: # %entry
53+
; CHECK-NEXT: xscmpuqp 0, 2, 3
54+
; CHECK-NEXT: vmr 4, 2
55+
; CHECK-NEXT: ble 0, .LBB1_8
56+
; CHECK-NEXT: # %bb.1: # %entry
57+
; CHECK-NEXT: bun 0, .LBB1_9
58+
; CHECK-NEXT: .LBB1_2: # %entry
59+
; CHECK-NEXT: xststdcqp 0, 2, 8
60+
; CHECK-NEXT: bc 4, 2, .LBB1_10
61+
; CHECK-NEXT: .LBB1_3: # %entry
62+
; CHECK-NEXT: xststdcqp 0, 3, 8
63+
; CHECK-NEXT: bc 12, 2, .LBB1_5
64+
; CHECK-NEXT: .LBB1_4: # %entry
65+
; CHECK-NEXT: vmr 3, 2
66+
; CHECK-NEXT: .LBB1_5: # %entry
67+
; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha
68+
; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l
69+
; CHECK-NEXT: lxv 34, 0(3)
70+
; CHECK-NEXT: xscmpuqp 0, 4, 2
71+
; CHECK-NEXT: beq 0, .LBB1_7
72+
; CHECK-NEXT: # %bb.6: # %entry
73+
; CHECK-NEXT: vmr 3, 4
74+
; CHECK-NEXT: .LBB1_7: # %entry
75+
; CHECK-NEXT: vmr 2, 3
76+
; CHECK-NEXT: blr
77+
; CHECK-NEXT: .LBB1_8: # %entry
78+
; CHECK-NEXT: vmr 4, 3
79+
; CHECK-NEXT: bnu 0, .LBB1_2
80+
; CHECK-NEXT: .LBB1_9:
81+
; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha
82+
; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l
83+
; CHECK-NEXT: lxv 36, 0(3)
84+
; CHECK-NEXT: xststdcqp 0, 2, 8
85+
; CHECK-NEXT: bc 12, 2, .LBB1_3
86+
; CHECK-NEXT: .LBB1_10: # %entry
87+
; CHECK-NEXT: vmr 2, 4
88+
; CHECK-NEXT: xststdcqp 0, 3, 8
89+
; CHECK-NEXT: bc 4, 2, .LBB1_4
90+
; CHECK-NEXT: b .LBB1_5
91+
entry:
92+
%m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b)
93+
ret fp128 %m
94+
}
95+
96+
declare fp128 @llvm.minimum.f128(fp128, fp128)
97+
declare fp128 @llvm.maximum.f128(fp128, fp128)

0 commit comments

Comments
 (0)