Skip to content

Commit aff98e4

Browse files
authored
[ARM] Stop gluing 1-bit shifts (#116547)
1. When two (or more) nodes are glued, DAG scheduler will always schedule them as one piece, i.e. it will not allow any instructions to be scheduled between them. It does so because if nodes are glued this usually means that there is an implicit register dependency between them, and an intervening node could clobber this physical register. When emitting such nodes into machine IR, they will also be stuck together, e.g.: ``` %9:gpr = MOVsrl_glue killed %8, implicit-def $cpsr %10:gpr = RRX %3, implicit $cpsr ``` 2. If a node has Glue result, SelectionDAG will not try to CSE this node. If it did, it would break the implicit physical register dependency. In practice this means that if a node with Glue result has multiple uses, it has to be duplicated before each use. This the reason for `ARMTargetLowering::duplicateCmp` to exist. When using normal data dependency, dependent nodes can freely be scheduled around. If there is a physical register dependency between nodes, the physical register will be copied to/from a virtual register, allowing other nodes to intervene between them. The resulting machine IR might look like this: ``` %9:gpr = LSRs1 killed %8, implicit-def $cpsr %10:gpr = COPY $cpsr %11:gpr = ORRrsi killed %9, %3, 242, 14 /* CC::al */, $noreg, $noreg %12:gpr = BICri killed %11, -2147483648, 14 /* CC::al */, $noreg, $noreg $cpsr = COPY %10 %13:gpr = RRX %3, implicit $cpsr ``` The two copies are likely to be eliminated by register coalescer, given that there are no instructions between them that clobber this physical register. If the copies are unwanted in the first place (they could be expensive or impossible), DAG scheduler will try to avoid inserting them wherever possible, and the resulting machine IR will look like this: ``` %9:gpr = LSRs1 killed %8, implicit-def $cpsr %10:gpr = ORRrsi killed %9, %3, 242, 14 /* CC::al */, $noreg, $noreg %11:gpr = BICri killed %10, -2147483648, 14 /* CC::al */, $noreg, $noreg %12:gpr = RRX %3, implicit $cpsr ``` On ARM, arithmetic operations and LSLS already use the new data flow approach. This patch extends it to include 1-bit shifts. Pull Request: #116547
1 parent 27dcae5 commit aff98e4

File tree

8 files changed

+84
-68
lines changed

8 files changed

+84
-68
lines changed

llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2590,14 +2590,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
25902590
return true;
25912591
}
25922592

2593-
case ARM::MOVsrl_glue:
2594-
case ARM::MOVsra_glue: {
2593+
case ARM::LSRs1:
2594+
case ARM::ASRs1: {
25952595
// These are just fancy MOVs instructions.
25962596
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
25972597
MI.getOperand(0).getReg())
25982598
.add(MI.getOperand(1))
25992599
.addImm(ARM_AM::getSORegOpc(
2600-
(Opcode == ARM::MOVsrl_glue ? ARM_AM::lsr : ARM_AM::asr), 1))
2600+
(Opcode == ARM::LSRs1 ? ARM_AM::lsr : ARM_AM::asr), 1))
26012601
.add(predOps(ARMCC::AL))
26022602
.addReg(ARM::CPSR, RegState::Define);
26032603
MI.eraseFromParent();

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149149
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150150
cl::init(2));
151151

152+
/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153+
constexpr MVT FlagsVT = MVT::i32;
154+
152155
// The APCS parameter registers.
153156
static const MCPhysReg GPRArgRegs[] = {
154157
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -1730,14 +1733,14 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
17301733
MAKE_CASE(ARMISD::ASRL)
17311734
MAKE_CASE(ARMISD::LSRL)
17321735
MAKE_CASE(ARMISD::LSLL)
1733-
MAKE_CASE(ARMISD::SRL_GLUE)
1734-
MAKE_CASE(ARMISD::SRA_GLUE)
1736+
MAKE_CASE(ARMISD::LSLS)
1737+
MAKE_CASE(ARMISD::LSRS1)
1738+
MAKE_CASE(ARMISD::ASRS1)
17351739
MAKE_CASE(ARMISD::RRX)
17361740
MAKE_CASE(ARMISD::ADDC)
17371741
MAKE_CASE(ARMISD::ADDE)
17381742
MAKE_CASE(ARMISD::SUBC)
17391743
MAKE_CASE(ARMISD::SUBE)
1740-
MAKE_CASE(ARMISD::LSLS)
17411744
MAKE_CASE(ARMISD::VMOVRRD)
17421745
MAKE_CASE(ARMISD::VMOVDRR)
17431746
MAKE_CASE(ARMISD::VMOVhr)
@@ -6846,10 +6849,10 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
68466849
SDValue Lo, Hi;
68476850
std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
68486851

6849-
// First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
6850-
// captures the result into a carry flag.
6851-
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6852-
Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6852+
// First, build a LSRS1/ASRS1 op, which shifts the top part by one and
6853+
// captures the shifted out bit into a carry flag.
6854+
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
6855+
Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
68536856

68546857
// The low part is an ARMISD::RRX operand, which shifts the carry in.
68556858
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,15 +101,15 @@ class VectorType;
101101

102102
BCC_i64,
103103

104-
SRL_GLUE, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
105-
SRA_GLUE, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
106-
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
104+
LSLS, // Flag-setting shift left.
105+
LSRS1, // Flag-setting logical shift right by one bit.
106+
ASRS1, // Flag-setting arithmetic shift right by one bit.
107+
RRX, // Shift right one bit with carry in.
107108

108109
ADDC, // Add with carry
109110
ADDE, // Add using carry
110111
SUBC, // Sub with carry
111112
SUBE, // Sub using carry
112-
LSLS, // Shift left producing carry
113113

114114
VMOVRRD, // double to two gprs.
115115
VMOVDRR, // Two gprs to double.

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
// ARM specific DAG Nodes.
1515
//
1616

17+
/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
18+
defvar FlagsVT = i32;
19+
1720
// Type profiles.
1821
def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
1922
SDTCisVT<1, i32> ]>;
@@ -77,6 +80,18 @@ def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
7780
SDTCisVT<2, i32>, SDTCisVT<3, i32>,
7881
SDTCisVT<4, i32>]>;
7982

83+
def SDTIntUnaryOpWithFlagsOut : SDTypeProfile<2, 1, [
84+
SDTCisInt<0>, // result
85+
SDTCisVT<1, FlagsVT>, // out flags
86+
SDTCisSameAs<2, 0> // operand
87+
]>;
88+
89+
def SDTIntUnaryOpWithFlagsIn : SDTypeProfile<1, 2, [
90+
SDTCisInt<0>, // result
91+
SDTCisSameAs<1, 0>, // operand
92+
SDTCisVT<1, FlagsVT> // in flags
93+
]>;
94+
8095
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
8196
[SDTCisSameAs<0, 2>,
8297
SDTCisSameAs<0, 3>,
@@ -191,9 +206,9 @@ def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
191206
def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
192207
def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
193208

194-
def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
195-
def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
196-
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
209+
def ARMlsrs1 : SDNode<"ARMISD::LSRS1", SDTIntUnaryOpWithFlagsOut>;
210+
def ARMasrs1 : SDNode<"ARMISD::ASRS1", SDTIntUnaryOpWithFlagsOut>;
211+
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>;
197212

198213
def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
199214
[SDNPCommutative]>;
@@ -3730,20 +3745,17 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
37303745
Requires<[IsARM, HasV6T2]>;
37313746

37323747
let Uses = [CPSR] in
3733-
def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
3734-
[(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
3735-
Requires<[IsARM]>, Sched<[WriteALU]>;
3736-
3737-
// These aren't really mov instructions, but we have to define them this way
3738-
// due to glue operands.
3748+
def RRX : PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
3749+
[(set GPR:$Rd, (ARMrrx GPR:$Rm, CPSR))]>,
3750+
UnaryDP, Requires<[IsARM]>, Sched<[WriteALU]>;
37393751

37403752
let Defs = [CPSR] in {
3741-
def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
3742-
[(set GPR:$dst, (ARMsrl_glue GPR:$src))]>, UnaryDP,
3743-
Sched<[WriteALU]>, Requires<[IsARM]>;
3744-
def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
3745-
[(set GPR:$dst, (ARMsra_glue GPR:$src))]>, UnaryDP,
3746-
Sched<[WriteALU]>, Requires<[IsARM]>;
3753+
def LSRs1 : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
3754+
[(set GPR:$dst, CPSR, (ARMlsrs1 GPR:$src))]>,
3755+
UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
3756+
def ASRs1 : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
3757+
[(set GPR:$dst, CPSR, (ARMasrs1 GPR:$src))]>,
3758+
UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
37473759
}
37483760

37493761
//===----------------------------------------------------------------------===//

llvm/lib/Target/ARM/ARMInstrThumb2.td

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2787,8 +2787,9 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
27872787

27882788
let Uses = [CPSR] in {
27892789
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
2790-
"rrx", "\t$Rd, $Rm",
2791-
[(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> {
2790+
"rrx", "\t$Rd, $Rm",
2791+
[(set rGPR:$Rd, (ARMrrx rGPR:$Rm, CPSR))]>,
2792+
Sched<[WriteALU]> {
27922793
let Inst{31-27} = 0b11101;
27932794
let Inst{26-25} = 0b01;
27942795
let Inst{24-21} = 0b0010;
@@ -2800,12 +2801,13 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
28002801
}
28012802
}
28022803

2804+
// These differ from t2LSRri / t2ASRri in that they are flag-setting
2805+
// and have a hardcoded shift amount = 1.
28032806
let isCodeGenOnly = 1, Defs = [CPSR] in {
2804-
def t2MOVsrl_glue : T2TwoRegShiftImm<
2805-
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
2806-
"lsrs", ".w\t$Rd, $Rm, #1",
2807-
[(set rGPR:$Rd, (ARMsrl_glue rGPR:$Rm))]>,
2808-
Sched<[WriteALU]> {
2807+
def t2LSRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
2808+
"lsrs", ".w\t$Rd, $Rm, #1",
2809+
[(set rGPR:$Rd, CPSR, (ARMlsrs1 rGPR:$Rm))]>,
2810+
Sched<[WriteALU]> {
28092811
let Inst{31-27} = 0b11101;
28102812
let Inst{26-25} = 0b01;
28112813
let Inst{24-21} = 0b0010;
@@ -2816,11 +2818,10 @@ def t2MOVsrl_glue : T2TwoRegShiftImm<
28162818
let Inst{14-12} = 0b000;
28172819
let Inst{7-6} = 0b01;
28182820
}
2819-
def t2MOVsra_glue : T2TwoRegShiftImm<
2820-
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
2821-
"asrs", ".w\t$Rd, $Rm, #1",
2822-
[(set rGPR:$Rd, (ARMsra_glue rGPR:$Rm))]>,
2823-
Sched<[WriteALU]> {
2821+
def t2ASRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
2822+
"asrs", ".w\t$Rd, $Rm, #1",
2823+
[(set rGPR:$Rd, CPSR, (ARMasrs1 rGPR:$Rm))]>,
2824+
Sched<[WriteALU]> {
28242825
let Inst{31-27} = 0b11101;
28252826
let Inst{26-25} = 0b01;
28262827
let Inst{24-21} = 0b0010;

llvm/lib/Target/ARM/ARMScheduleM7.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ def M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>;
325325
def : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS],
326326
(instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$",
327327
"t2(SUB|CMP|CMNz|TEQ|TST)rs$",
328-
"t2MOVsr(a|l)")>;
328+
"t2(A|L)SRs1$")>;
329329
def : InstRW<[WriteALUsi, M7Read_ISS],
330330
(instregex "t2MVNs")>;
331331

@@ -335,7 +335,7 @@ def : InstRW<[WriteALUsi, M7Read_ISS],
335335
// but the results prove to be better than trying to get them exact.
336336

337337
def : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>;
338-
def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>;
338+
def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)r", "tROR")>;
339339

340340
// Instructions that use the shifter, but have normal timing.
341341

llvm/lib/Target/ARM/ARMScheduleM85.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ def : InstRW<[M85WriteALUsi, M85ReadALUsi],
436436
def : InstRW<[M85WriteShift2],
437437
(instregex "t2RRX$")>;
438438
def : InstRW<[WriteALU],
439-
(instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)", "t2MOVsr(a|l)")>;
439+
(instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)")>;
440440

441441
// Instructions that use the shifter, but have normal timing
442442

llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -628,13 +628,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
628628
; ARM5-NEXT: mla r0, r1, r12, r4
629629
; ARM5-NEXT: bic r0, r0, #-2147483648
630630
; ARM5-NEXT: lsrs r0, r0, #1
631-
; ARM5-NEXT: rrx r1, r3
631+
; ARM5-NEXT: rrx r2, r3
632632
; ARM5-NEXT: orr r0, r0, r3, lsl #30
633633
; ARM5-NEXT: ldr r3, .LCPI5_2
634-
; ARM5-NEXT: bic r2, r0, #-2147483648
634+
; ARM5-NEXT: bic r1, r0, #-2147483648
635635
; ARM5-NEXT: mov r0, #0
636-
; ARM5-NEXT: subs r1, r1, r3
637-
; ARM5-NEXT: sbcs r1, r2, #1
636+
; ARM5-NEXT: subs r2, r2, r3
637+
; ARM5-NEXT: sbcs r1, r1, #1
638638
; ARM5-NEXT: movlo r0, #1
639639
; ARM5-NEXT: pop {r4, pc}
640640
; ARM5-NEXT: .p2align 2
@@ -656,13 +656,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
656656
; ARM6-NEXT: mla r0, r1, r12, r0
657657
; ARM6-NEXT: bic r0, r0, #-2147483648
658658
; ARM6-NEXT: lsrs r0, r0, #1
659-
; ARM6-NEXT: rrx r1, r3
659+
; ARM6-NEXT: rrx r2, r3
660660
; ARM6-NEXT: orr r0, r0, r3, lsl #30
661661
; ARM6-NEXT: ldr r3, .LCPI5_2
662-
; ARM6-NEXT: bic r2, r0, #-2147483648
662+
; ARM6-NEXT: bic r1, r0, #-2147483648
663663
; ARM6-NEXT: mov r0, #0
664-
; ARM6-NEXT: subs r1, r1, r3
665-
; ARM6-NEXT: sbcs r1, r2, #1
664+
; ARM6-NEXT: subs r2, r2, r3
665+
; ARM6-NEXT: sbcs r1, r1, #1
666666
; ARM6-NEXT: movlo r0, #1
667667
; ARM6-NEXT: pop {r11, pc}
668668
; ARM6-NEXT: .p2align 2
@@ -686,14 +686,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
686686
; ARM7-NEXT: mla r0, r1, r12, r0
687687
; ARM7-NEXT: bic r0, r0, #-2147483648
688688
; ARM7-NEXT: lsrs r0, r0, #1
689-
; ARM7-NEXT: rrx r1, r3
689+
; ARM7-NEXT: rrx r2, r3
690690
; ARM7-NEXT: orr r0, r0, r3, lsl #30
691691
; ARM7-NEXT: movw r3, #24026
692-
; ARM7-NEXT: bic r2, r0, #-2147483648
692+
; ARM7-NEXT: bic r1, r0, #-2147483648
693693
; ARM7-NEXT: movt r3, #48461
694-
; ARM7-NEXT: subs r1, r1, r3
694+
; ARM7-NEXT: subs r2, r2, r3
695695
; ARM7-NEXT: mov r0, #0
696-
; ARM7-NEXT: sbcs r1, r2, #1
696+
; ARM7-NEXT: sbcs r1, r1, #1
697697
; ARM7-NEXT: movwlo r0, #1
698698
; ARM7-NEXT: pop {r11, pc}
699699
;
@@ -709,14 +709,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
709709
; ARM8-NEXT: mla r0, r1, r12, r0
710710
; ARM8-NEXT: bic r0, r0, #-2147483648
711711
; ARM8-NEXT: lsrs r0, r0, #1
712-
; ARM8-NEXT: rrx r1, r3
712+
; ARM8-NEXT: rrx r2, r3
713713
; ARM8-NEXT: orr r0, r0, r3, lsl #30
714714
; ARM8-NEXT: movw r3, #24026
715-
; ARM8-NEXT: bic r2, r0, #-2147483648
715+
; ARM8-NEXT: bic r1, r0, #-2147483648
716716
; ARM8-NEXT: movt r3, #48461
717-
; ARM8-NEXT: subs r1, r1, r3
717+
; ARM8-NEXT: subs r2, r2, r3
718718
; ARM8-NEXT: mov r0, #0
719-
; ARM8-NEXT: sbcs r1, r2, #1
719+
; ARM8-NEXT: sbcs r1, r1, #1
720720
; ARM8-NEXT: movwlo r0, #1
721721
; ARM8-NEXT: pop {r11, pc}
722722
;
@@ -732,14 +732,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
732732
; NEON7-NEXT: mla r0, r1, r12, r0
733733
; NEON7-NEXT: bic r0, r0, #-2147483648
734734
; NEON7-NEXT: lsrs r0, r0, #1
735-
; NEON7-NEXT: rrx r1, r3
735+
; NEON7-NEXT: rrx r2, r3
736736
; NEON7-NEXT: orr r0, r0, r3, lsl #30
737737
; NEON7-NEXT: movw r3, #24026
738-
; NEON7-NEXT: bic r2, r0, #-2147483648
738+
; NEON7-NEXT: bic r1, r0, #-2147483648
739739
; NEON7-NEXT: movt r3, #48461
740-
; NEON7-NEXT: subs r1, r1, r3
740+
; NEON7-NEXT: subs r2, r2, r3
741741
; NEON7-NEXT: mov r0, #0
742-
; NEON7-NEXT: sbcs r1, r2, #1
742+
; NEON7-NEXT: sbcs r1, r1, #1
743743
; NEON7-NEXT: movwlo r0, #1
744744
; NEON7-NEXT: pop {r11, pc}
745745
;
@@ -755,14 +755,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
755755
; NEON8-NEXT: mla r0, r1, r12, r0
756756
; NEON8-NEXT: bic r0, r0, #-2147483648
757757
; NEON8-NEXT: lsrs r0, r0, #1
758-
; NEON8-NEXT: rrx r1, r3
758+
; NEON8-NEXT: rrx r2, r3
759759
; NEON8-NEXT: orr r0, r0, r3, lsl #30
760760
; NEON8-NEXT: movw r3, #24026
761-
; NEON8-NEXT: bic r2, r0, #-2147483648
761+
; NEON8-NEXT: bic r1, r0, #-2147483648
762762
; NEON8-NEXT: movt r3, #48461
763-
; NEON8-NEXT: subs r1, r1, r3
763+
; NEON8-NEXT: subs r2, r2, r3
764764
; NEON8-NEXT: mov r0, #0
765-
; NEON8-NEXT: sbcs r1, r2, #1
765+
; NEON8-NEXT: sbcs r1, r1, #1
766766
; NEON8-NEXT: movwlo r0, #1
767767
; NEON8-NEXT: pop {r11, pc}
768768
%urem = urem i63 %X, 1234567890

0 commit comments

Comments
 (0)