Skip to content

[ARM] Stop gluing FP comparisons to FMSTAT #116676

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 11 additions & 15 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4971,14 +4971,14 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const SDLoc &dl,
bool Signaling) const {
assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
SDValue Flags;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
dl, MVT::Glue, LHS, RHS);
Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT,
LHS, RHS);
else
Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
dl, MVT::Glue, LHS);
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
FlagsVT, LHS);
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Flags);
}

/// duplicateCmp - Glue values can have only one use, so this function
Expand All @@ -4991,15 +4991,11 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));

assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
Cmp = Cmp.getOperand(0);
Opc = Cmp.getOpcode();
if (Opc == ARMISD::CMPFP)
Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
else {
assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
}
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
SDValue Flags = Cmp.getOperand(0);
assert((Flags.getOpcode() == ARMISD::CMPFP ||
Flags.getOpcode() == ARMISD::CMPFPw0) &&
"unexpected operand of FMSTAT");
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Flags);
}

// This function returns three things: the arithmetic computation itself
Expand Down
56 changes: 37 additions & 19 deletions llvm/lib/Target/ARM/ARMInstrVFP.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,36 @@
//
//===----------------------------------------------------------------------===//

def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
def SDT_CMPFP : SDTypeProfile<1, 2, [
SDTCisVT<0, FlagsVT>, // out flags
SDTCisFP<1>, // lhs
SDTCisSameAs<2, 1> // rhs
]>;

def SDT_CMPFP0 : SDTypeProfile<1, 1, [
SDTCisVT<0, FlagsVT>, // out flags
SDTCisFP<1> // operand
]>;

def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
SDTCisVT<2, f64>]>;

def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;

def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfpe0: SDNode<"ARMISD::CMPFPEw0",SDT_CMPFP0, [SDNPOutGlue]>;
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_CMPFP>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0>;
def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_CMPFP>;
def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>;

def arm_fmstat : SDNode<"ARMISD::FMSTAT",
SDTypeProfile<0, 1, [
SDTCisVT<0, FlagsVT> // in flags
]>,
[SDNPOutGlue] // TODO: Change Glue to a normal result.
>;

def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>;
Expand Down Expand Up @@ -606,12 +623,12 @@ let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", "",
[(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>;
[(set FPSCR_NZCV, (arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm)))]>;

def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", "",
[(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> {
[(set FPSCR_NZCV, (arm_cmpfpe SPR:$Sd, SPR:$Sm))]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
Expand All @@ -620,17 +637,17 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
[(arm_cmpfpe (f16 HPR:$Sd), (f16 HPR:$Sm))]>;
[(set FPSCR_NZCV, (arm_cmpfpe (f16 HPR:$Sd), (f16 HPR:$Sm)))]>;

def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", "",
[(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
[(set FPSCR_NZCV, (arm_cmpfp DPR:$Dd, (f64 DPR:$Dm)))]>;

def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", "",
[(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
[(set FPSCR_NZCV, (arm_cmpfp SPR:$Sd, SPR:$Sm))]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
Expand All @@ -639,7 +656,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm",
[(arm_cmpfp (f16 HPR:$Sd), (f16 HPR:$Sm))]>;
[(set FPSCR_NZCV, (arm_cmpfp (f16 HPR:$Sd), (f16 HPR:$Sm)))]>;
} // Defs = [FPSCR_NZCV]

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -669,15 +686,15 @@ let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", "",
[(arm_cmpfpe0 (f64 DPR:$Dd))]> {
[(set FPSCR_NZCV, (arm_cmpfpe0 (f64 DPR:$Dd)))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}

def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", "",
[(arm_cmpfpe0 SPR:$Sd)]> {
[(set FPSCR_NZCV, (arm_cmpfpe0 SPR:$Sd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;

Expand All @@ -689,23 +706,23 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0",
[(arm_cmpfpe0 (f16 HPR:$Sd))]> {
[(set FPSCR_NZCV, (arm_cmpfpe0 (f16 HPR:$Sd)))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}

def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", "",
[(arm_cmpfp0 (f64 DPR:$Dd))]> {
[(set FPSCR_NZCV, (arm_cmpfp0 (f64 DPR:$Dd)))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}

def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", "",
[(arm_cmpfp0 SPR:$Sd)]> {
[(set FPSCR_NZCV, (arm_cmpfp0 SPR:$Sd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;

Expand All @@ -717,7 +734,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0",
[(arm_cmpfp0 (f16 HPR:$Sd))]> {
[(set FPSCR_NZCV, (arm_cmpfp0 (f16 HPR:$Sd)))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
Expand Down Expand Up @@ -2492,7 +2509,8 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
Rt = 0b1111 /* apsr_nzcv */ in
def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
"vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
"vmrs", "\tAPSR_nzcv, fpscr",
[(arm_fmstat FPSCR_NZCV)]>;

// Application level FPSCR -> GPR
let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/ARM/ARMRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,9 @@ def VCCR : RegisterClass<"ARM", [i32, v16i1, v8i1, v4i1, v2i1], 32, (add VPR)> {

// FPSCR, when the flags at the top of it are used as the input or
// output to an instruction such as MVE VADC.
def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)>;
def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)> {
let CopyCost = -1;
}

// Scalar single precision floating point register class..
// FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/ARM/fcmp-xo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr {
; NEON-LABEL: float128:
; NEON: @ %bb.0:
; NEON-NEXT: mov.w r0, #1124073472
; NEON-NEXT: vmov.f32 s2, #5.000000e-01
; NEON-NEXT: vmov d3, r0, r0
; NEON-NEXT: vmov.f32 s4, #-5.000000e-01
; NEON-NEXT: vcmp.f32 s6, s0
; NEON-NEXT: vmov.f32 s4, #5.000000e-01
; NEON-NEXT: vmov d1, r0, r0
; NEON-NEXT: vmov.f32 s6, #-5.000000e-01
; NEON-NEXT: vcmp.f32 s2, s0
; NEON-NEXT: vmrs APSR_nzcv, fpscr
; NEON-NEXT: vselgt.f32 s0, s4, s2
; NEON-NEXT: vselgt.f32 s0, s6, s4
; NEON-NEXT: bx lr
%1 = fcmp nsz olt float %a0, 128.000000e+00
%2 = select i1 %1, float -5.000000e-01, float 5.000000e-01
Expand Down
58 changes: 29 additions & 29 deletions llvm/test/CodeGen/ARM/fp16-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -700,9 +700,9 @@ define half @select_cc1(ptr %a0) {

; CHECK-LABEL: select_cc1:

; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 s0,
; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vseleq.f16 s0,

; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
Expand All @@ -728,9 +728,9 @@ define half @select_cc_ge1(ptr %a0) {

; CHECK-LABEL: select_cc_ge1:

; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vselge.f16 s0,

; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
Expand All @@ -751,9 +751,9 @@ define half @select_cc_ge2(ptr %a0) {

; CHECK-LABEL: select_cc_ge2:

; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vselge.f16 s0,

; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
Expand All @@ -774,9 +774,9 @@ define half @select_cc_ge3(ptr %a0) {

; CHECK-LABEL: select_cc_ge3:

; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vselge.f16 s0,

; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
Expand All @@ -797,9 +797,9 @@ define half @select_cc_ge4(ptr %a0) {

; CHECK-LABEL: select_cc_ge4:

; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vselge.f16 s0, s{{.}}, s{{.}}

; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
Expand All @@ -821,9 +821,9 @@ define half @select_cc_gt1(ptr %a0) {

; CHECK-LABEL: select_cc_gt1:

; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}

; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
Expand All @@ -844,9 +844,9 @@ define half @select_cc_gt2(ptr %a0) {

; CHECK-LABEL: select_cc_gt2:

; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}

; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
Expand All @@ -867,9 +867,9 @@ define half @select_cc_gt3(ptr %a0) {

; CHECK-LABEL: select_cc_gt3:

; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}

; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
Expand All @@ -890,9 +890,9 @@ define half @select_cc_gt4(ptr %a0) {

; CHECK-LABEL: select_cc_gt4:

; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-HARDFP-FULLFP16: vcmp.f16
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}

; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
Expand Down Expand Up @@ -923,10 +923,10 @@ entry:
; CHECK-LABEL: select_cc4:

; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}}
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]]
; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]]

Expand Down
Loading
Loading