Skip to content

Commit ad9dcd9

Browse files
authored
Reland "[ARM] Stop gluing FP comparisons to FMSTAT" (#117248)
Following #116547, this changes the result of `ARMISD::CMPFP*` and the operand of `ARMISD::FMSTAT` from a special `Glue` type to a normal type. This change allows comparisons to be CSEd and scheduled around as can be seen in the test changes. Note that `ARMISD::FMSTAT` is still glued to its consumer nodes; this is going to be changed in a separate patch. This patch also sets `CopyCost` of `cl_FPSCR_NZCV` register class to a negative value. The reason is the same as for CCR register class: it makes DAG scheduler and InstrEmitter try to avoid copies of `FPCSR_NZCV` register to / from virtual registers. Previously, this was not necessary, since no attempt was made to create copies in the first place. `TRI::getCrossCopyRegClass` is modified in a way that prevents DAG scheduler from copying FPSCR into a virtual register. The register allocator might need to spill the virtual register, but that only seem to work in Thumb mode.
1 parent 1683f84 commit ad9dcd9

19 files changed

+2943
-4818
lines changed

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,8 @@ const TargetRegisterClass *
299299
ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
300300
if (RC == &ARM::CCRRegClass)
301301
return &ARM::rGPRRegClass; // Can't copy CCR registers.
302+
if (RC == &ARM::cl_FPSCR_NZCVRegClass)
303+
return &ARM::rGPRRegClass;
302304
return RC;
303305
}
304306

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4971,14 +4971,14 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
49714971
SelectionDAG &DAG, const SDLoc &dl,
49724972
bool Signaling) const {
49734973
assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4974-
SDValue Cmp;
4974+
SDValue Flags;
49754975
if (!isFloatingPointZero(RHS))
4976-
Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4977-
dl, MVT::Glue, LHS, RHS);
4976+
Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT,
4977+
LHS, RHS);
49784978
else
4979-
Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4980-
dl, MVT::Glue, LHS);
4981-
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4979+
Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
4980+
FlagsVT, LHS);
4981+
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Flags);
49824982
}
49834983

49844984
/// duplicateCmp - Glue values can have only one use, so this function
@@ -4991,15 +4991,11 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
49914991
return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
49924992

49934993
assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
4994-
Cmp = Cmp.getOperand(0);
4995-
Opc = Cmp.getOpcode();
4996-
if (Opc == ARMISD::CMPFP)
4997-
Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4998-
else {
4999-
assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
5000-
Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
5001-
}
5002-
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4994+
SDValue Flags = Cmp.getOperand(0);
4995+
assert((Flags.getOpcode() == ARMISD::CMPFP ||
4996+
Flags.getOpcode() == ARMISD::CMPFPw0) &&
4997+
"unexpected operand of FMSTAT");
4998+
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Flags);
50034999
}
50045000

50055001
// This function returns three things: the arithmetic computation itself

llvm/lib/Target/ARM/ARMInstrVFP.td

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,36 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
13+
def SDT_CMPFP : SDTypeProfile<1, 2, [
14+
SDTCisVT<0, FlagsVT>, // out flags
15+
SDTCisFP<1>, // lhs
16+
SDTCisSameAs<2, 1> // rhs
17+
]>;
18+
19+
def SDT_CMPFP0 : SDTypeProfile<1, 1, [
20+
SDTCisVT<0, FlagsVT>, // out flags
21+
SDTCisFP<1> // operand
22+
]>;
23+
1424
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
1525
SDTCisSameAs<1, 2>]>;
1626
def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
1727
SDTCisVT<2, f64>]>;
1828

1929
def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
2030

21-
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
22-
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
23-
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
24-
def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_ARMCmp, [SDNPOutGlue]>;
25-
def arm_cmpfpe0: SDNode<"ARMISD::CMPFPEw0",SDT_CMPFP0, [SDNPOutGlue]>;
31+
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_CMPFP>;
32+
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0>;
33+
def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_CMPFP>;
34+
def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>;
35+
36+
def arm_fmstat : SDNode<"ARMISD::FMSTAT",
37+
SDTypeProfile<0, 1, [
38+
SDTCisVT<0, FlagsVT> // in flags
39+
]>,
40+
[SDNPOutGlue] // TODO: Change Glue to a normal result.
41+
>;
42+
2643
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
2744
def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
2845
def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>;
@@ -606,12 +623,12 @@ let Defs = [FPSCR_NZCV] in {
606623
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
607624
(outs), (ins DPR:$Dd, DPR:$Dm),
608625
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", "",
609-
[(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>;
626+
[(set FPSCR_NZCV, (arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm)))]>;
610627

611628
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
612629
(outs), (ins SPR:$Sd, SPR:$Sm),
613630
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", "",
614-
[(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> {
631+
[(set FPSCR_NZCV, (arm_cmpfpe SPR:$Sd, SPR:$Sm))]> {
615632
// Some single precision VFP instructions may be executed on both NEON and
616633
// VFP pipelines on A8.
617634
let D = VFPNeonA8Domain;
@@ -620,17 +637,17 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
620637
def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
621638
(outs), (ins HPR:$Sd, HPR:$Sm),
622639
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
623-
[(arm_cmpfpe (f16 HPR:$Sd), (f16 HPR:$Sm))]>;
640+
[(set FPSCR_NZCV, (arm_cmpfpe (f16 HPR:$Sd), (f16 HPR:$Sm)))]>;
624641

625642
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
626643
(outs), (ins DPR:$Dd, DPR:$Dm),
627644
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", "",
628-
[(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
645+
[(set FPSCR_NZCV, (arm_cmpfp DPR:$Dd, (f64 DPR:$Dm)))]>;
629646

630647
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
631648
(outs), (ins SPR:$Sd, SPR:$Sm),
632649
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", "",
633-
[(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
650+
[(set FPSCR_NZCV, (arm_cmpfp SPR:$Sd, SPR:$Sm))]> {
634651
// Some single precision VFP instructions may be executed on both NEON and
635652
// VFP pipelines on A8.
636653
let D = VFPNeonA8Domain;
@@ -639,7 +656,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
639656
def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
640657
(outs), (ins HPR:$Sd, HPR:$Sm),
641658
IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm",
642-
[(arm_cmpfp (f16 HPR:$Sd), (f16 HPR:$Sm))]>;
659+
[(set FPSCR_NZCV, (arm_cmpfp (f16 HPR:$Sd), (f16 HPR:$Sm)))]>;
643660
} // Defs = [FPSCR_NZCV]
644661

645662
//===----------------------------------------------------------------------===//
@@ -669,15 +686,15 @@ let Defs = [FPSCR_NZCV] in {
669686
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
670687
(outs), (ins DPR:$Dd),
671688
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", "",
672-
[(arm_cmpfpe0 (f64 DPR:$Dd))]> {
689+
[(set FPSCR_NZCV, (arm_cmpfpe0 (f64 DPR:$Dd)))]> {
673690
let Inst{3-0} = 0b0000;
674691
let Inst{5} = 0;
675692
}
676693

677694
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
678695
(outs), (ins SPR:$Sd),
679696
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", "",
680-
[(arm_cmpfpe0 SPR:$Sd)]> {
697+
[(set FPSCR_NZCV, (arm_cmpfpe0 SPR:$Sd))]> {
681698
let Inst{3-0} = 0b0000;
682699
let Inst{5} = 0;
683700

@@ -689,23 +706,23 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
689706
def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
690707
(outs), (ins HPR:$Sd),
691708
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0",
692-
[(arm_cmpfpe0 (f16 HPR:$Sd))]> {
709+
[(set FPSCR_NZCV, (arm_cmpfpe0 (f16 HPR:$Sd)))]> {
693710
let Inst{3-0} = 0b0000;
694711
let Inst{5} = 0;
695712
}
696713

697714
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
698715
(outs), (ins DPR:$Dd),
699716
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", "",
700-
[(arm_cmpfp0 (f64 DPR:$Dd))]> {
717+
[(set FPSCR_NZCV, (arm_cmpfp0 (f64 DPR:$Dd)))]> {
701718
let Inst{3-0} = 0b0000;
702719
let Inst{5} = 0;
703720
}
704721

705722
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
706723
(outs), (ins SPR:$Sd),
707724
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", "",
708-
[(arm_cmpfp0 SPR:$Sd)]> {
725+
[(set FPSCR_NZCV, (arm_cmpfp0 SPR:$Sd))]> {
709726
let Inst{3-0} = 0b0000;
710727
let Inst{5} = 0;
711728

@@ -717,7 +734,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
717734
def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
718735
(outs), (ins HPR:$Sd),
719736
IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0",
720-
[(arm_cmpfp0 (f16 HPR:$Sd))]> {
737+
[(set FPSCR_NZCV, (arm_cmpfp0 (f16 HPR:$Sd)))]> {
721738
let Inst{3-0} = 0b0000;
722739
let Inst{5} = 0;
723740
}
@@ -2492,7 +2509,8 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
24922509
let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
24932510
Rt = 0b1111 /* apsr_nzcv */ in
24942511
def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
2495-
"vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
2512+
"vmrs", "\tAPSR_nzcv, fpscr",
2513+
[(arm_fmstat FPSCR_NZCV)]>;
24962514

24972515
// Application level FPSCR -> GPR
24982516
let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in

llvm/lib/Target/ARM/ARMRegisterInfo.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,9 @@ def VCCR : RegisterClass<"ARM", [i32, v16i1, v8i1, v4i1, v2i1], 32, (add VPR)> {
413413

414414
// FPSCR, when the flags at the top of it are used as the input or
415415
// output to an instruction such as MVE VADC.
416-
def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)>;
416+
def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)> {
417+
let CopyCost = -1;
418+
}
417419

418420
// Scalar single precision floating point register class..
419421
// FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack

llvm/test/CodeGen/ARM/fcmp-xo.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr {
5454
; NEON-LABEL: float128:
5555
; NEON: @ %bb.0:
5656
; NEON-NEXT: mov.w r0, #1124073472
57-
; NEON-NEXT: vmov.f32 s2, #5.000000e-01
58-
; NEON-NEXT: vmov d3, r0, r0
59-
; NEON-NEXT: vmov.f32 s4, #-5.000000e-01
60-
; NEON-NEXT: vcmp.f32 s6, s0
57+
; NEON-NEXT: vmov.f32 s4, #5.000000e-01
58+
; NEON-NEXT: vmov d1, r0, r0
59+
; NEON-NEXT: vmov.f32 s6, #-5.000000e-01
60+
; NEON-NEXT: vcmp.f32 s2, s0
6161
; NEON-NEXT: vmrs APSR_nzcv, fpscr
62-
; NEON-NEXT: vselgt.f32 s0, s4, s2
62+
; NEON-NEXT: vselgt.f32 s0, s6, s4
6363
; NEON-NEXT: bx lr
6464
%1 = fcmp nsz olt float %a0, 128.000000e+00
6565
%2 = select i1 %1, float -5.000000e-01, float 5.000000e-01

llvm/test/CodeGen/ARM/fp16-instructions.ll

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -700,9 +700,9 @@ define half @select_cc1(ptr %a0) {
700700

701701
; CHECK-LABEL: select_cc1:
702702

703-
; CHECK-HARDFP-FULLFP16: vcmp.f16
704-
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
705-
; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 s0,
703+
; CHECK-HARDFP-FULLFP16: vcmp.f16
704+
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
705+
; CHECK-HARDFP-FULLFP16: vseleq.f16 s0,
706706

707707
; CHECK-SOFTFP-FP16-A32: vcmp.f32
708708
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -728,9 +728,9 @@ define half @select_cc_ge1(ptr %a0) {
728728

729729
; CHECK-LABEL: select_cc_ge1:
730730

731-
; CHECK-HARDFP-FULLFP16: vcmp.f16
732-
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
733-
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
731+
; CHECK-HARDFP-FULLFP16: vcmp.f16
732+
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
733+
; CHECK-HARDFP-FULLFP16: vselge.f16 s0,
734734

735735
; CHECK-SOFTFP-FP16-A32: vcmp.f32
736736
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -751,9 +751,9 @@ define half @select_cc_ge2(ptr %a0) {
751751

752752
; CHECK-LABEL: select_cc_ge2:
753753

754-
; CHECK-HARDFP-FULLFP16: vcmp.f16
755-
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
756-
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
754+
; CHECK-HARDFP-FULLFP16: vcmp.f16
755+
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
756+
; CHECK-HARDFP-FULLFP16: vselge.f16 s0,
757757

758758
; CHECK-SOFTFP-FP16-A32: vcmp.f32
759759
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -774,9 +774,9 @@ define half @select_cc_ge3(ptr %a0) {
774774

775775
; CHECK-LABEL: select_cc_ge3:
776776

777-
; CHECK-HARDFP-FULLFP16: vcmp.f16
778-
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
779-
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
777+
; CHECK-HARDFP-FULLFP16: vcmp.f16
778+
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
779+
; CHECK-HARDFP-FULLFP16: vselge.f16 s0,
780780

781781
; CHECK-SOFTFP-FP16-A32: vcmp.f32
782782
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -797,9 +797,9 @@ define half @select_cc_ge4(ptr %a0) {
797797

798798
; CHECK-LABEL: select_cc_ge4:
799799

800-
; CHECK-HARDFP-FULLFP16: vcmp.f16
801-
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
802-
; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
800+
; CHECK-HARDFP-FULLFP16: vcmp.f16
801+
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
802+
; CHECK-HARDFP-FULLFP16: vselge.f16 s0, s{{.}}, s{{.}}
803803

804804
; CHECK-SOFTFP-FP16-A32: vcmp.f32
805805
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -821,9 +821,9 @@ define half @select_cc_gt1(ptr %a0) {
821821

822822
; CHECK-LABEL: select_cc_gt1:
823823

824-
; CHECK-HARDFP-FULLFP16: vcmp.f16
825-
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
826-
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
824+
; CHECK-HARDFP-FULLFP16: vcmp.f16
825+
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
826+
; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}
827827

828828
; CHECK-SOFTFP-FP16-A32: vcmp.f32
829829
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -844,9 +844,9 @@ define half @select_cc_gt2(ptr %a0) {
844844

845845
; CHECK-LABEL: select_cc_gt2:
846846

847-
; CHECK-HARDFP-FULLFP16: vcmp.f16
848-
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
849-
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
847+
; CHECK-HARDFP-FULLFP16: vcmp.f16
848+
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
849+
; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}
850850

851851
; CHECK-SOFTFP-FP16-A32: vcmp.f32
852852
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -867,9 +867,9 @@ define half @select_cc_gt3(ptr %a0) {
867867

868868
; CHECK-LABEL: select_cc_gt3:
869869

870-
; CHECK-HARDFP-FULLFP16: vcmp.f16
871-
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
872-
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
870+
; CHECK-HARDFP-FULLFP16: vcmp.f16
871+
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
872+
; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}
873873

874874
; CHECK-SOFTFP-FP16-A32: vcmp.f32
875875
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -890,9 +890,9 @@ define half @select_cc_gt4(ptr %a0) {
890890

891891
; CHECK-LABEL: select_cc_gt4:
892892

893-
; CHECK-HARDFP-FULLFP16: vcmp.f16
894-
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
895-
; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
893+
; CHECK-HARDFP-FULLFP16: vcmp.f16
894+
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
895+
; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}
896896

897897
; CHECK-SOFTFP-FP16-A32: vcmp.f32
898898
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -923,10 +923,10 @@ entry:
923923
; CHECK-LABEL: select_cc4:
924924

925925
; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
926+
; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
926927
; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}}
928+
; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
927929
; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
928-
; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
929-
; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
930930
; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]]
931931
; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]]
932932

0 commit comments

Comments
 (0)