Skip to content

Commit c4c9d4f

Browse files
authored
[M68k] Add support for MOVEQ instruction (#88542)
Add support for the moveq instruction, which is both faster and smaller (1/2 to 1/3 the size) than a move with immediate to register. This change introduces the instruction, along with a set of pseudoinstructions to handle immediate moves to a register that is lowered post-RA. Pseudos are used as moveq can only write to the full register, which makes matching i8 and i16 immediate loads difficult in tablegen. Furthermore, selecting moveq before RA constrains that immediate to be moved into a data register, which may not be optimal. The bulk of this change are fixes to existing tests, which cover the new functionality sufficiently.
1 parent 39f1b2d commit c4c9d4f

25 files changed

+162
-74
lines changed

llvm/lib/Target/M68k/M68kExpandPseudo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@ bool M68kExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
8080
default:
8181
return false;
8282

83+
case M68k::MOVI8di:
84+
return TII->ExpandMOVI(MIB, MVT::i8);
85+
case M68k::MOVI16ri:
86+
return TII->ExpandMOVI(MIB, MVT::i16);
87+
case M68k::MOVI32ri:
88+
return TII->ExpandMOVI(MIB, MVT::i32);
89+
8390
case M68k::MOVXd16d8:
8491
return TII->ExpandMOVX_RR(MIB, MVT::i16, MVT::i8);
8592
case M68k::MOVXd32d8:

llvm/lib/Target/M68k/M68kInstrData.td

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
///
2020
/// Pseudo:
2121
///
22-
/// MOVSX [x] MOVZX [x] MOVX [x]
22+
/// MOVI [x] MOVSX [x] MOVZX [x] MOVX [x]
2323
///
2424
/// Map:
2525
///
@@ -165,11 +165,12 @@ foreach AM = MxMoveSupportedAMs in {
165165
} // foreach AM
166166

167167
// R <- I
168+
// No pattern, as all immediate -> register moves are matched to the MOVI pseudo
168169
class MxMove_RI<MxType TYPE, string DST_REG, MxMoveEncoding ENC,
169170
MxImmOpBundle SRC = !cast<MxImmOpBundle>("MxOp"#TYPE.Size#"AddrMode_i"),
170171
MxOpBundle DST = !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#DST_REG)>
171172
: MxMove<TYPE.Prefix, (outs DST.Op:$dst), (ins SRC.Op:$src),
172-
[(set TYPE.VT:$dst, SRC.ImmPat:$src)], ENC>;
173+
[(null_frag)], ENC>;
173174

174175
foreach REG = ["r", "a", "d"] in {
175176
foreach TYPE = !if(!eq(REG, "d"), [MxType8, MxType16, MxType32], [MxType16, MxType32]) in
@@ -242,6 +243,24 @@ def : Pat<(store MxType32.BPat :$src, MxType32.BPat :$dst),
242243
def : Pat<(store MxType32.BPat :$src, MxType32.JPat :$dst),
243244
(MOV32ji MxType32.JOp :$dst, MxType32.IOp :$src)>;
244245

246+
//===----------------------------------------------------------------------===//
247+
// MOVEQ
248+
//===----------------------------------------------------------------------===//
249+
250+
/// ------------+---------+---+-----------------------
251+
/// F E D C | B A 9 | 8 | 7 6 5 4 3 2 1 0
252+
/// ------------+---------+---+-----------------------
253+
/// 0 1 1 1 | REG | 0 | DATA
254+
/// ------------+---------+---+-----------------------
255+
256+
// No pattern, as all immediate -> register moves are matched to the MOVI pseudo
257+
let Defs = [CCR] in
258+
def MOVQ : MxInst<(outs MxDRD32:$dst), (ins Mxi8imm:$imm),
259+
"moveq\t$imm, $dst",
260+
[(null_frag)]> {
261+
let Inst = (descend 0b0111, (operand "$dst", 3), 0b0, (operand "$imm", 8));
262+
}
263+
245264
//===----------------------------------------------------------------------===//
246265
// MOVEM
247266
//
@@ -496,7 +515,23 @@ class MxPseudoMove_RR<MxType DST, MxType SRC, list<dag> PAT = []>
496515

497516
class MxPseudoMove_RM<MxType DST, MxOperand SRCOpd, list<dag> PAT = []>
498517
: MxPseudo<(outs DST.ROp:$dst), (ins SRCOpd:$src), PAT>;
499-
}
518+
519+
520+
// These Pseudos handle loading immediates to registers.
521+
// They are expanded post-RA into either move or moveq instructions,
522+
// depending on size, destination register class, and immediate value.
523+
// This is done with pseudoinstructions in order to not constrain RA to
524+
// data registers if moveq matches.
525+
class MxPseudoMove_DI<MxType TYPE>
526+
: MxPseudo<(outs TYPE.ROp:$dst), (ins TYPE.IOp:$src),
527+
[(set TYPE.ROp:$dst, imm:$src)]>;
528+
529+
// i8 imm -> reg can always be converted to moveq,
530+
// but we still emit a pseudo for consistency.
531+
def MOVI8di : MxPseudoMove_DI<MxType8d>;
532+
def MOVI16ri : MxPseudoMove_DI<MxType16r>;
533+
def MOVI32ri : MxPseudoMove_DI<MxType32r>;
534+
} // let Defs = [CCR]
500535

501536
/// This group of Pseudos is analogues to the real x86 extending moves, but
502537
/// since M68k does not have those we need to emulate. These instructions

llvm/lib/Target/M68k/M68kInstrInfo.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,40 @@ void M68kInstrInfo::AddZExt(MachineBasicBlock &MBB,
346346
BuildMI(MBB, I, DL, get(And), Reg).addReg(Reg).addImm(Mask);
347347
}
348348

349+
// Convert MOVI to MOVQ if the target is a data register and the immediate
350+
// fits in a sign-extended i8, otherwise emit a plain MOV.
351+
bool M68kInstrInfo::ExpandMOVI(MachineInstrBuilder &MIB, MVT MVTSize) const {
352+
Register Reg = MIB->getOperand(0).getReg();
353+
int64_t Imm = MIB->getOperand(1).getImm();
354+
bool IsAddressReg = false;
355+
356+
const auto *DR32 = RI.getRegClass(M68k::DR32RegClassID);
357+
const auto *AR32 = RI.getRegClass(M68k::AR32RegClassID);
358+
const auto *AR16 = RI.getRegClass(M68k::AR16RegClassID);
359+
360+
if (AR16->contains(Reg) || AR32->contains(Reg))
361+
IsAddressReg = true;
362+
363+
LLVM_DEBUG(dbgs() << "Expand " << *MIB.getInstr() << " to ");
364+
365+
if (MVTSize == MVT::i8 || (!IsAddressReg && Imm >= -128 && Imm <= 127)) {
366+
LLVM_DEBUG(dbgs() << "MOVEQ\n");
367+
368+
// We need to assign to the full register to make IV happy
369+
Register SReg =
370+
MVTSize == MVT::i32 ? Reg : Register(RI.getMatchingMegaReg(Reg, DR32));
371+
assert(SReg && "No viable MEGA register available");
372+
373+
MIB->setDesc(get(M68k::MOVQ));
374+
MIB->getOperand(0).setReg(SReg);
375+
} else {
376+
LLVM_DEBUG(dbgs() << "MOVE\n");
377+
MIB->setDesc(get(MVTSize == MVT::i16 ? M68k::MOV16ri : M68k::MOV32ri));
378+
}
379+
380+
return true;
381+
}
382+
349383
bool M68kInstrInfo::ExpandMOVX_RR(MachineInstrBuilder &MIB, MVT MVTDst,
350384
MVT MVTSrc) const {
351385
unsigned Move = MVTDst == MVT::i16 ? M68k::MOV16rr : M68k::MOV32rr;

llvm/lib/Target/M68k/M68kInstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,9 @@ class M68kInstrInfo : public M68kGenInstrInfo {
302302
void AddZExt(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
303303
DebugLoc DL, unsigned Reg, MVT From, MVT To) const;
304304

305+
/// Move immediate to register
306+
bool ExpandMOVI(MachineInstrBuilder &MIB, MVT MVTSize) const;
307+
305308
/// Move across register classes without extension
306309
bool ExpandMOVX_RR(MachineInstrBuilder &MIB, MVT MVTDst, MVT MVTSrc) const;
307310

llvm/test/CodeGen/M68k/Arith/add-with-overflow.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ define fastcc i1 @test6(i32 %v1, i32 %v2, ptr %X) nounwind {
3535
; CHECK-NEXT: ; %bb.1: ; %normal
3636
; CHECK-NEXT: move.l #0, (%a0)
3737
; CHECK-NEXT: .LBB1_2: ; %carry
38-
; CHECK-NEXT: move.b #0, %d0
38+
; CHECK-NEXT: moveq #0, %d0
3939
; CHECK-NEXT: rts
4040
entry:
4141
%t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)

llvm/test/CodeGen/M68k/Arith/add.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ define fastcc void @test3(ptr inreg %a) nounwind {
4343
; CHECK-NEXT: suba.l #4, %sp
4444
; CHECK-NEXT: movem.l %d2, (0,%sp) ; 8-byte Folded Spill
4545
; CHECK-NEXT: move.l (%a0), %d0
46-
; CHECK-NEXT: move.l #0, %d1
46+
; CHECK-NEXT: moveq #0, %d1
4747
; CHECK-NEXT: move.l #-2147483648, %d2
4848
; CHECK-NEXT: add.l (4,%a0), %d2
4949
; CHECK-NEXT: addx.l %d0, %d1
@@ -64,7 +64,7 @@ define fastcc void @test4(ptr inreg %a) nounwind {
6464
; CHECK-NEXT: suba.l #4, %sp
6565
; CHECK-NEXT: movem.l %d2, (0,%sp) ; 8-byte Folded Spill
6666
; CHECK-NEXT: move.l (%a0), %d0
67-
; CHECK-NEXT: move.l #0, %d1
67+
; CHECK-NEXT: moveq #0, %d1
6868
; CHECK-NEXT: move.l #128, %d2
6969
; CHECK-NEXT: add.l (4,%a0), %d2
7070
; CHECK-NEXT: addx.l %d0, %d1

llvm/test/CodeGen/M68k/Arith/bitwise.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ define i64 @lshr64(i64 %a, i64 %b) nounwind {
242242
; CHECK-NEXT: add.l #-32, %d1
243243
; CHECK-NEXT: bmi .LBB18_1
244244
; CHECK-NEXT: ; %bb.2:
245-
; CHECK-NEXT: move.l #0, %d0
245+
; CHECK-NEXT: moveq #0, %d0
246246
; CHECK-NEXT: bra .LBB18_3
247247
; CHECK-NEXT: .LBB18_1:
248248
; CHECK-NEXT: move.l %d2, %d0
@@ -301,7 +301,7 @@ define i64 @ashr64(i64 %a, i64 %b) nounwind {
301301
; CHECK-NEXT: add.l #-32, %d3
302302
; CHECK-NEXT: bmi .LBB19_5
303303
; CHECK-NEXT: ; %bb.4:
304-
; CHECK-NEXT: move.l #31, %d2
304+
; CHECK-NEXT: moveq #31, %d2
305305
; CHECK-NEXT: .LBB19_5:
306306
; CHECK-NEXT: asr.l %d2, %d0
307307
; CHECK-NEXT: movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload
@@ -322,7 +322,7 @@ define i64 @shl64(i64 %a, i64 %b) nounwind {
322322
; CHECK-NEXT: add.l #-32, %d0
323323
; CHECK-NEXT: bmi .LBB20_1
324324
; CHECK-NEXT: ; %bb.2:
325-
; CHECK-NEXT: move.l #0, %d1
325+
; CHECK-NEXT: moveq #0, %d1
326326
; CHECK-NEXT: bra .LBB20_3
327327
; CHECK-NEXT: .LBB20_1:
328328
; CHECK-NEXT: move.l %d2, %d1

llvm/test/CodeGen/M68k/Arith/divide-by-constant.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ define zeroext i8 @test3(i8 zeroext %x, i8 zeroext %c) {
4040
; CHECK-NEXT: move.b (11,%sp), %d0
4141
; CHECK-NEXT: and.l #255, %d0
4242
; CHECK-NEXT: muls #171, %d0
43-
; CHECK-NEXT: move.w #9, %d1
43+
; CHECK-NEXT: moveq #9, %d1
4444
; CHECK-NEXT: lsr.w %d1, %d0
4545
; CHECK-NEXT: and.l #65535, %d0
4646
; CHECK-NEXT: rts
@@ -58,7 +58,7 @@ define signext i16 @test4(i16 signext %x) nounwind {
5858
; CHECK-NEXT: muls #1986, %d0
5959
; CHECK-NEXT: asr.l #8, %d0
6060
; CHECK-NEXT: asr.l #8, %d0
61-
; CHECK-NEXT: move.w #15, %d1
61+
; CHECK-NEXT: moveq #15, %d1
6262
; CHECK-NEXT: move.w %d0, %d2
6363
; CHECK-NEXT: lsr.w %d1, %d2
6464
; CHECK-NEXT: add.w %d2, %d0
@@ -94,7 +94,7 @@ define signext i16 @test6(i16 signext %x) nounwind {
9494
; CHECK-NEXT: muls #26215, %d0
9595
; CHECK-NEXT: asr.l #8, %d0
9696
; CHECK-NEXT: asr.l #8, %d0
97-
; CHECK-NEXT: move.w #15, %d1
97+
; CHECK-NEXT: moveq #15, %d1
9898
; CHECK-NEXT: move.w %d0, %d2
9999
; CHECK-NEXT: lsr.w %d1, %d2
100100
; CHECK-NEXT: asr.w #2, %d0
@@ -128,7 +128,7 @@ define i8 @test8(i8 %x) nounwind {
128128
; CHECK-NEXT: lsr.b #1, %d0
129129
; CHECK-NEXT: and.l #255, %d0
130130
; CHECK-NEXT: muls #211, %d0
131-
; CHECK-NEXT: move.w #13, %d1
131+
; CHECK-NEXT: moveq #13, %d1
132132
; CHECK-NEXT: lsr.w %d1, %d0
133133
; CHECK-NEXT: ; kill: def $bd0 killed $bd0 killed $d0
134134
; CHECK-NEXT: rts
@@ -143,7 +143,7 @@ define i8 @test9(i8 %x) nounwind {
143143
; CHECK-NEXT: lsr.b #2, %d0
144144
; CHECK-NEXT: and.l #255, %d0
145145
; CHECK-NEXT: muls #71, %d0
146-
; CHECK-NEXT: move.w #11, %d1
146+
; CHECK-NEXT: moveq #11, %d1
147147
; CHECK-NEXT: lsr.w %d1, %d0
148148
; CHECK-NEXT: ; kill: def $bd0 killed $bd0 killed $d0
149149
; CHECK-NEXT: rts
@@ -156,11 +156,11 @@ define i32 @testsize1(i32 %x) minsize nounwind {
156156
; CHECK: ; %bb.0: ; %entry
157157
; CHECK-NEXT: suba.l #4, %sp
158158
; CHECK-NEXT: movem.l %d2, (0,%sp) ; 8-byte Folded Spill
159-
; CHECK-NEXT: move.l #31, %d1
159+
; CHECK-NEXT: moveq #31, %d1
160160
; CHECK-NEXT: move.l (8,%sp), %d0
161161
; CHECK-NEXT: move.l %d0, %d2
162162
; CHECK-NEXT: asr.l %d1, %d2
163-
; CHECK-NEXT: move.l #27, %d1
163+
; CHECK-NEXT: moveq #27, %d1
164164
; CHECK-NEXT: lsr.l %d1, %d2
165165
; CHECK-NEXT: add.l %d2, %d0
166166
; CHECK-NEXT: asr.l #5, %d0

llvm/test/CodeGen/M68k/Arith/imul.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define i64 @mul4_64(i64 %A) {
1919
; CHECK-NEXT: suba.l #4, %sp
2020
; CHECK-NEXT: .cfi_def_cfa_offset -8
2121
; CHECK-NEXT: movem.l %d2, (0,%sp) ; 8-byte Folded Spill
22-
; CHECK-NEXT: move.l #30, %d0
22+
; CHECK-NEXT: moveq #30, %d0
2323
; CHECK-NEXT: move.l (12,%sp), %d1
2424
; CHECK-NEXT: move.l %d1, %d2
2525
; CHECK-NEXT: lsr.l %d0, %d2
@@ -38,7 +38,7 @@ define i32 @mul4096_32(i32 %A) {
3838
; CHECK-LABEL: mul4096_32:
3939
; CHECK: .cfi_startproc
4040
; CHECK-NEXT: ; %bb.0:
41-
; CHECK-NEXT: move.l #12, %d1
41+
; CHECK-NEXT: moveq #12, %d1
4242
; CHECK-NEXT: move.l (4,%sp), %d0
4343
; CHECK-NEXT: lsl.l %d1, %d0
4444
; CHECK-NEXT: rts
@@ -53,11 +53,11 @@ define i64 @mul4096_64(i64 %A) {
5353
; CHECK-NEXT: suba.l #8, %sp
5454
; CHECK-NEXT: .cfi_def_cfa_offset -12
5555
; CHECK-NEXT: movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
56-
; CHECK-NEXT: move.l #20, %d0
56+
; CHECK-NEXT: moveq #20, %d0
5757
; CHECK-NEXT: move.l (16,%sp), %d1
5858
; CHECK-NEXT: move.l %d1, %d2
5959
; CHECK-NEXT: lsr.l %d0, %d2
60-
; CHECK-NEXT: move.l #12, %d3
60+
; CHECK-NEXT: moveq #12, %d3
6161
; CHECK-NEXT: move.l (12,%sp), %d0
6262
; CHECK-NEXT: lsl.l %d3, %d0
6363
; CHECK-NEXT: or.l %d2, %d0
@@ -73,7 +73,7 @@ define i32 @mulmin4096_32(i32 %A) {
7373
; CHECK-LABEL: mulmin4096_32:
7474
; CHECK: .cfi_startproc
7575
; CHECK-NEXT: ; %bb.0:
76-
; CHECK-NEXT: move.l #12, %d1
76+
; CHECK-NEXT: moveq #12, %d1
7777
; CHECK-NEXT: move.l (4,%sp), %d0
7878
; CHECK-NEXT: lsl.l %d1, %d0
7979
; CHECK-NEXT: neg.l %d0
@@ -89,11 +89,11 @@ define i64 @mulmin4096_64(i64 %A) {
8989
; CHECK-NEXT: suba.l #8, %sp
9090
; CHECK-NEXT: .cfi_def_cfa_offset -12
9191
; CHECK-NEXT: movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill
92-
; CHECK-NEXT: move.l #20, %d0
92+
; CHECK-NEXT: moveq #20, %d0
9393
; CHECK-NEXT: move.l (16,%sp), %d1
9494
; CHECK-NEXT: move.l %d1, %d2
9595
; CHECK-NEXT: lsr.l %d0, %d2
96-
; CHECK-NEXT: move.l #12, %d3
96+
; CHECK-NEXT: moveq #12, %d3
9797
; CHECK-NEXT: move.l (12,%sp), %d0
9898
; CHECK-NEXT: lsl.l %d3, %d0
9999
; CHECK-NEXT: or.l %d2, %d0
@@ -258,7 +258,7 @@ define i32 @mul0_32(i32 %A) {
258258
; CHECK-LABEL: mul0_32:
259259
; CHECK: .cfi_startproc
260260
; CHECK-NEXT: ; %bb.0:
261-
; CHECK-NEXT: move.l #0, %d0
261+
; CHECK-NEXT: moveq #0, %d0
262262
; CHECK-NEXT: rts
263263
%mul = mul i32 %A, 0
264264
ret i32 %mul

llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ entry:
2424
define zeroext i8 @smul_i8_no_ovf(i8 signext %a, i8 signext %b) nounwind ssp {
2525
; CHECK-LABEL: smul_i8_no_ovf:
2626
; CHECK: ; %bb.0: ; %entry
27-
; CHECK-NEXT: move.l #42, %d0
27+
; CHECK-NEXT: moveq #42, %d0
2828
; CHECK-NEXT: rts
2929
entry:
3030
%smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
@@ -70,15 +70,15 @@ define fastcc i1 @test1(i32 %v1, i32 %v2) nounwind {
7070
; CHECK-NEXT: lea (no,%pc), %a0
7171
; CHECK-NEXT: move.l %a0, (%sp)
7272
; CHECK-NEXT: jsr printf@PLT
73-
; CHECK-NEXT: move.b #0, %d0
73+
; CHECK-NEXT: moveq #0, %d0
7474
; CHECK-NEXT: adda.l #12, %sp
7575
; CHECK-NEXT: rts
7676
; CHECK-NEXT: .LBB3_1: ; %normal
7777
; CHECK-NEXT: move.l %d0, (4,%sp)
7878
; CHECK-NEXT: lea (ok,%pc), %a0
7979
; CHECK-NEXT: move.l %a0, (%sp)
8080
; CHECK-NEXT: jsr printf@PLT
81-
; CHECK-NEXT: move.b #1, %d0
81+
; CHECK-NEXT: moveq #1, %d0
8282
; CHECK-NEXT: adda.l #12, %sp
8383
; CHECK-NEXT: rts
8484
entry:
@@ -108,15 +108,15 @@ define fastcc i1 @test2(i32 %v1, i32 %v2) nounwind {
108108
; CHECK-NEXT: lea (no,%pc), %a0
109109
; CHECK-NEXT: move.l %a0, (%sp)
110110
; CHECK-NEXT: jsr printf@PLT
111-
; CHECK-NEXT: move.b #0, %d0
111+
; CHECK-NEXT: moveq #0, %d0
112112
; CHECK-NEXT: adda.l #12, %sp
113113
; CHECK-NEXT: rts
114114
; CHECK-NEXT: .LBB4_2: ; %normal
115115
; CHECK-NEXT: move.l %d0, (4,%sp)
116116
; CHECK-NEXT: lea (ok,%pc), %a0
117117
; CHECK-NEXT: move.l %a0, (%sp)
118118
; CHECK-NEXT: jsr printf@PLT
119-
; CHECK-NEXT: move.b #1, %d0
119+
; CHECK-NEXT: moveq #1, %d0
120120
; CHECK-NEXT: adda.l #12, %sp
121121
; CHECK-NEXT: rts
122122
entry:
@@ -155,7 +155,7 @@ define i32 @test4(i32 %a, i32 %b) nounwind readnone {
155155
; CHECK: ; %bb.0: ; %entry
156156
; CHECK-NEXT: move.l (8,%sp), %d0
157157
; CHECK-NEXT: add.l (4,%sp), %d0
158-
; CHECK-NEXT: move.l #4, %d1
158+
; CHECK-NEXT: moveq #4, %d1
159159
; CHECK-NEXT: muls.l %d1, %d0
160160
; CHECK-NEXT: rts
161161
entry:

0 commit comments

Comments
 (0)