diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp index 47071d29c2cd4..e9455fdd23ba5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp @@ -183,7 +183,6 @@ void LoongArchMergeBaseOffsetOpt::foldOffset( MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20, MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail, int64_t Offset) { - assert(isInt<32>(Offset) && "Unexpected offset"); // Put the offset back in Hi and the Lo Hi20.getOperand(1).setOffset(Offset); Lo12.getOperand(2).setOffset(Offset); @@ -209,22 +208,35 @@ void LoongArchMergeBaseOffsetOpt::foldOffset( // instructions and deletes TailAdd and the instructions that produced the // offset. // -// Base address lowering is of the form: -// Hi20: pcalau12i vreg1, %pc_hi20(s) -// Lo12: addi.d vreg2, vreg1, %pc_lo12(s) -// / \ -// / \ -// / \ -// / The large offset can be of two forms: \ -// 1) Offset that has non zero bits in lower 2) Offset that has non zero -// 12 bits and upper 20 bits bits in upper 20 bits only -// OffsetHi: lu12i.w vreg3, 4 -// OffsetLo: ori voff, vreg3, 188 OffsetHi: lu12i.w voff, 128 -// \ / -// \ / -// \ / -// \ / -// TailAdd: add.d vreg4, vreg2, voff +// (The instructions marked with "!" are not necessarily present) +// +// Base address lowering is of the form: +// Hi20: pcalau12i vreg1, %pc_hi20(s) +// +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s) +// | Lo20: lu32i.d vreg2, %pc64_lo20(s) ! +// +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) ! +// | +// | The large offset can be one of the forms: +// | +// +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits: +// | OffsetHi20: lu12i.w vreg3, 4 +// | OffsetLo12: ori voff, vreg3, 188 ------------------+ +// | | +// +-> 2) Offset that has non zero bits in Hi20 bits only: | +// | OffsetHi20: lu12i.w voff, 128 ------------------+ +// | | +// +-> 3) Offset that has non zero bits in Lo20 bits: | +// | OffsetHi20: lu12i.w vreg3, 121 ! | +// | OffsetLo12: ori voff, vreg3, 122 ! | +// | OffsetLo20: lu32i.d voff, 123 ------------------+ +// +-> 4) Offset that has non zero bits in Hi12 bits: | +// OffsetHi20: lu12i.w vreg3, 121 ! | +// OffsetLo12: ori voff, vreg3, 122 ! | +// OffsetLo20: lu32i.d vreg3, 123 ! | +// OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+ +// | +// TailAdd: add.d vreg4, vreg2, voff <------------------+ +// bool LoongArchMergeBaseOffsetOpt::foldLargeOffset( MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20, MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd, @@ -235,55 +247,81 @@ bool LoongArchMergeBaseOffsetOpt::foldLargeOffset( Register Rs = TailAdd.getOperand(1).getReg(); Register Rt = TailAdd.getOperand(2).getReg(); Register Reg = Rs == GAReg ? Rt : Rs; + SmallVector Instrs; + int64_t Offset = 0; + int64_t Mask = -1; + + // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]: + for (int i = 0; i < 4; i++) { + // Handle Reg is R0. + if (Reg == LoongArch::R0) + break; - // Can't fold if the register has more than one use. - if (!Reg.isVirtual() || !MRI->hasOneUse(Reg)) - return false; - // This can point to an ORI or a LU12I.W: - MachineInstr &OffsetTail = *MRI->getVRegDef(Reg); - if (OffsetTail.getOpcode() == LoongArch::ORI) { - // The offset value has non zero bits in both %hi and %lo parts. - // Detect an ORI that feeds from a LU12I.W instruction. - MachineOperand &OriImmOp = OffsetTail.getOperand(2); - if (OriImmOp.getTargetFlags() != LoongArchII::MO_None) + // Can't fold if the register has more than one use. + if (!Reg.isVirtual() || !MRI->hasOneUse(Reg)) return false; - Register OriReg = OffsetTail.getOperand(1).getReg(); - int64_t OffLo = OriImmOp.getImm(); - - // Handle rs1 of ORI is R0. - if (OriReg == LoongArch::R0) { - LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail); - foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo); - OffsetTail.eraseFromParent(); - return true; - } - MachineInstr &OffsetLu12i = *MRI->getVRegDef(OriReg); - MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand(1); - if (OffsetLu12i.getOpcode() != LoongArch::LU12I_W || - Lu12iImmOp.getTargetFlags() != LoongArchII::MO_None || - !MRI->hasOneUse(OffsetLu12i.getOperand(0).getReg())) + MachineInstr *Curr = MRI->getVRegDef(Reg); + if (!Curr) + break; + + switch (Curr->getOpcode()) { + default: + // Can't fold if the instruction opcode is unexpected. return false; - int64_t Offset = SignExtend64<32>(Lu12iImmOp.getImm() << 12); - Offset += OffLo; - // LU12I.W+ORI sign extends the result. - Offset = SignExtend64<32>(Offset); - LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail - << " " << OffsetLu12i); - foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset); - OffsetTail.eraseFromParent(); - OffsetLu12i.eraseFromParent(); - return true; - } else if (OffsetTail.getOpcode() == LoongArch::LU12I_W) { - // The offset value has all zero bits in the lower 12 bits. Only LU12I.W - // exists. - LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); - int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12); - foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset); - OffsetTail.eraseFromParent(); - return true; + case LoongArch::ORI: { + MachineOperand ImmOp = Curr->getOperand(2); + if (ImmOp.getTargetFlags() != LoongArchII::MO_None) + return false; + Offset += ImmOp.getImm(); + Reg = Curr->getOperand(1).getReg(); + Instrs.push_back(Curr); + break; + } + case LoongArch::LU12I_W: { + MachineOperand ImmOp = Curr->getOperand(1); + if (ImmOp.getTargetFlags() != LoongArchII::MO_None) + return false; + Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask; + Reg = LoongArch::R0; + Instrs.push_back(Curr); + break; + } + case LoongArch::LU32I_D: { + MachineOperand ImmOp = Curr->getOperand(2); + if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20) + return false; + Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask; + Mask ^= 0x000FFFFF00000000ULL; + Reg = Curr->getOperand(1).getReg(); + Instrs.push_back(Curr); + break; + } + case LoongArch::LU52I_D: { + MachineOperand ImmOp = Curr->getOperand(2); + if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12) + return false; + Offset += ImmOp.getImm() << 52; + Mask ^= 0xFFF0000000000000ULL; + Reg = Curr->getOperand(1).getReg(); + Instrs.push_back(Curr); + break; + } + } } - return false; + + // Can't fold if the offset is not extracted. + if (!Offset) + return false; + + foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset); + LLVM_DEBUG(dbgs() << " Offset Instrs:\n"); + for (auto I : Instrs) { + LLVM_DEBUG(dbgs() << " " << *I); + I->eraseFromParent(); + } + + return true; } bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20, @@ -344,13 +382,6 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20, [[fallthrough]]; case LoongArch::ADD_D: // The offset is too large to fit in the immediate field of ADDI. - // This can be in two forms: - // 1) LU12I.W hi_offset followed by: - // ORI lo_offset - // This happens in case the offset has non zero bits in - // both hi 20 and lo 12 bits. - // 2) LU12I.W (offset20) - // This happens in case the lower 12 bits of the offset are zeros. return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg); break; } diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll index b53f94303b6ea..9df5532d51179 100644 --- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll +++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll @@ -1100,14 +1100,11 @@ define dso_local ptr @load_addr_offset_281474439839744() nounwind { ; ; LA64-LARGE-LABEL: load_addr_offset_281474439839744: ; LA64-LARGE: # %bb.0: # %entry -; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64) -; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64) -; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64) -; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64) +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+2251795518717952) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+2251795518717952) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+2251795518717952) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+2251795518717952) ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 -; LA64-LARGE-NEXT: ori $a1, $zero, 0 -; LA64-LARGE-NEXT: lu32i.d $a1, 524287 -; LA64-LARGE-NEXT: add.d $a0, $a0, $a1 ; LA64-LARGE-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 281474439839744) @@ -1131,14 +1128,11 @@ define dso_local ptr @load_addr_offset_248792680471040() nounwind { ; ; LA64-LARGE-LABEL: load_addr_offset_248792680471040: ; LA64-LARGE: # %bb.0: # %entry -; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64) -; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64) -; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64) -; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64) +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+1990341443768320) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+1990341443768320) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+1990341443768320) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+1990341443768320) ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 -; LA64-LARGE-NEXT: lu12i.w $a1, 502733 -; LA64-LARGE-NEXT: lu32i.d $a1, 463412 -; LA64-LARGE-NEXT: add.d $a0, $a0, $a1 ; LA64-LARGE-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 248792680471040) @@ -1163,15 +1157,11 @@ define dso_local ptr @load_addr_offset_9380351707272() nounwind { ; ; LA64-LARGE-LABEL: load_addr_offset_9380351707272: ; LA64-LARGE: # %bb.0: # %entry -; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64) -; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64) -; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64) -; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64) +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+75042813658176) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+75042813658176) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+75042813658176) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+75042813658176) ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 -; LA64-LARGE-NEXT: lu12i.w $a1, 279556 -; LA64-LARGE-NEXT: ori $a1, $a1, 1088 -; LA64-LARGE-NEXT: lu32i.d $a1, 17472 -; LA64-LARGE-NEXT: add.d $a0, $a0, $a1 ; LA64-LARGE-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 9380351707272) @@ -1194,13 +1184,11 @@ define dso_local ptr @load_addr_offset_562949953421312() nounwind { ; ; LA64-LARGE-LABEL: load_addr_offset_562949953421312: ; LA64-LARGE: # %bb.0: # %entry -; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64) -; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64) -; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64) -; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64) +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+4503599627370496) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+4503599627370496) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+4503599627370496) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+4503599627370496) ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 -; LA64-LARGE-NEXT: lu52i.d $a1, $zero, 1 -; LA64-LARGE-NEXT: add.d $a0, $a0, $a1 ; LA64-LARGE-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 562949953421312) @@ -1226,16 +1214,11 @@ define dso_local ptr @load_addr_offset_614749556925924693() nounwind { ; ; LA64-LARGE-LABEL: load_addr_offset_614749556925924693: ; LA64-LARGE: # %bb.0: # %entry -; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64) -; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64) -; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64) -; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64) +; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+4917996455407397544) +; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+4917996455407397544) +; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+4917996455407397544) +; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+4917996455407397544) ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 -; LA64-LARGE-NEXT: lu12i.w $a1, 209666 -; LA64-LARGE-NEXT: ori $a1, $a1, 2728 -; LA64-LARGE-NEXT: lu32i.d $a1, 15288 -; LA64-LARGE-NEXT: lu52i.d $a1, $a1, 1092 -; LA64-LARGE-NEXT: add.d $a0, $a0, $a1 ; LA64-LARGE-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 614749556925924693)