Skip to content

Commit 72fb379

Browse files
wzssyqaYour Name
and
Your Name
authored
AArch64: Select FCANONICALIZE (#104429)
FMINNM/FMAXNM instructions of AArch64 follow IEEE754-2008. We can use them to canonicalize a floating point number. And FMINNUM_IEEE/FMAXNUM_IEEE is used by something like expanding FMINIMUMNUM/FMAXIMUMNUM, so let's define them. --------- Co-authored-by: Your Name <[email protected]>
1 parent 9c81a24 commit 72fb379

File tree

4 files changed

+1188
-15
lines changed

4 files changed

+1188
-15
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
775775
ISD::FMAXNUM,
776776
ISD::FMINIMUM,
777777
ISD::FMAXIMUM,
778+
ISD::FCANONICALIZE,
778779
ISD::STRICT_FADD,
779780
ISD::STRICT_FSUB,
780781
ISD::STRICT_FMUL,
@@ -818,6 +819,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
818819
setOperationPromotedToType(ISD::FROUNDEVEN, V4Narrow, MVT::v4f32);
819820
setOperationPromotedToType(ISD::FRINT, V4Narrow, MVT::v4f32);
820821
setOperationPromotedToType(ISD::FNEARBYINT, V4Narrow, MVT::v4f32);
822+
setOperationPromotedToType(ISD::FCANONICALIZE, V4Narrow, MVT::v4f32);
821823

822824
setOperationAction(ISD::FABS, V4Narrow, Legal);
823825
setOperationAction(ISD::FNEG, V4Narrow, Legal);
@@ -851,6 +853,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
851853
setOperationAction(ISD::SELECT, V8Narrow, Expand);
852854
setOperationAction(ISD::SELECT_CC, V8Narrow, Expand);
853855
setOperationAction(ISD::FP_EXTEND, V8Narrow, Expand);
856+
setOperationPromotedToType(ISD::FCANONICALIZE, V8Narrow, MVT::v8f32);
854857
};
855858

856859
if (!Subtarget->hasFullFP16()) {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5052,17 +5052,25 @@ def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
50525052

50535053
def : Pat<(fminnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)),
50545054
(FMINNMDrr FPR64:$a, FPR64:$b)>;
5055-
def : Pat<(fminnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
5056-
(FMINNMSrr FPR32:$a, FPR32:$b)>;
5057-
def : Pat<(fminnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
5058-
(FMINNMHrr FPR16:$a, FPR16:$b)>;
50595055
def : Pat<(fmaxnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)),
50605056
(FMAXNMDrr FPR64:$a, FPR64:$b)>;
5057+
def : Pat<(f64 (fcanonicalize f64:$a)),
5058+
(FMINNMDrr f64:$a, f64:$a)>;
5059+
def : Pat<(fminnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
5060+
(FMINNMSrr FPR32:$a, FPR32:$b)>;
50615061
def : Pat<(fmaxnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
50625062
(FMAXNMSrr FPR32:$a, FPR32:$b)>;
5063+
def : Pat<(f32 (fcanonicalize f32:$a)),
5064+
(FMINNMSrr f32:$a, f32:$a)>;
5065+
5066+
let Predicates = [HasFullFP16] in {
5067+
def : Pat<(fminnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
5068+
(FMINNMHrr FPR16:$a, FPR16:$b)>;
50635069
def : Pat<(fmaxnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
50645070
(FMAXNMHrr FPR16:$a, FPR16:$b)>;
5065-
5071+
def : Pat<(f16 (fcanonicalize f16:$a)),
5072+
(FMINNMHrr f16:$a, f16:$a)>;
5073+
}
50665074
//===----------------------------------------------------------------------===//
50675075
// Floating point three operand instructions.
50685076
//===----------------------------------------------------------------------===//
@@ -5567,26 +5575,41 @@ defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
55675575
defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
55685576
defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
55695577

5578+
let Predicates = [HasNEON] in {
55705579
def : Pat<(v2f64 (fminnum_ieee (v2f64 V128:$Rn), (v2f64 V128:$Rm))),
55715580
(v2f64 (FMINNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rm)))>;
5572-
def : Pat<(v4f32 (fminnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
5573-
(v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>;
5574-
def : Pat<(v8f16 (fminnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5575-
(v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5576-
def : Pat<(v2f32 (fminnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5577-
(v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>;
5578-
def : Pat<(v4f16 (fminnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
5579-
(v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
55805581
def : Pat<(v2f64 (fmaxnum_ieee (v2f64 V128:$Rn), (v2f64 V128:$Rm))),
55815582
(v2f64 (FMAXNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rm)))>;
5583+
def : Pat<(v2f64 (fcanonicalize (v2f64 V128:$Rn))),
5584+
(v2f64 (FMINNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rn)))>;
5585+
def : Pat<(v4f32 (fminnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
5586+
(v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>;
55825587
def : Pat<(v4f32 (fmaxnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
55835588
(v4f32 (FMAXNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>;
5584-
def : Pat<(v8f16 (fmaxnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5585-
(v8f16 (FMAXNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5589+
def : Pat<(v4f32 (fcanonicalize (v4f32 V128:$Rn))),
5590+
(v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rn)))>;
5591+
def : Pat<(v2f32 (fminnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5592+
(v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>;
55865593
def : Pat<(v2f32 (fmaxnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
55875594
(v2f32 (FMAXNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>;
5595+
def : Pat<(v2f32 (fcanonicalize (v2f32 V64:$Rn))),
5596+
(v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rn)))>;
5597+
}
5598+
5599+
let Predicates = [HasNEON, HasFullFP16] in {
5600+
def : Pat<(v8f16 (fminnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5601+
(v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5602+
def : Pat<(v8f16 (fmaxnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5603+
(v8f16 (FMAXNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5604+
def : Pat<(v8f16 (fcanonicalize (v8f16 V128:$Rn))),
5605+
(v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rn)))>;
5606+
def : Pat<(v4f16 (fminnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
5607+
(v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
55885608
def : Pat<(v4f16 (fmaxnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
55895609
(v4f16 (FMAXNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
5610+
def : Pat<(v4f16 (fcanonicalize (v4f16 V64:$Rn))),
5611+
(v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rn)))>;
5612+
}
55905613

55915614
// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
55925615
// instruction expects the addend first, while the fma intrinsic puts it last.

0 commit comments

Comments
 (0)