-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LoongArch] Add a pass to rewrite rd to r0 for non-computational instrs whose return values are unused #94590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…rs whose return values are unused This patch adds a peephole pass `LoongArchDeadRegisterDefinitions`. It rewrites `rd` to `r0` when `rd` is marked as dead. It may improve the register allocation and reduce pipeline hazards on CPUs without register renaming and OOO.
@llvm/pr-subscribers-backend-loongarch Author: hev (heiher) ChangesThis patch adds a peephole pass Patch is 43.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94590.diff 11 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt
index 5085e23f82a7b..cadc335a621f2 100644
--- a/llvm/lib/Target/LoongArch/CMakeLists.txt
+++ b/llvm/lib/Target/LoongArch/CMakeLists.txt
@@ -16,6 +16,7 @@ add_public_tablegen_target(LoongArchCommonTableGen)
add_llvm_target(LoongArchCodeGen
LoongArchAsmPrinter.cpp
+ LoongArchDeadRegisterDefinitions.cpp
LoongArchExpandAtomicPseudoInsts.cpp
LoongArchExpandPseudoInsts.cpp
LoongArchFrameLowering.cpp
diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h
index 0928ea31054a4..adfb844ee31b6 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.h
+++ b/llvm/lib/Target/LoongArch/LoongArch.h
@@ -33,12 +33,14 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
MCOperand &MCOp,
const AsmPrinter &AP);
+FunctionPass *createLoongArchDeadRegisterDefinitionsPass();
FunctionPass *createLoongArchExpandAtomicPseudoPass();
FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
FunctionPass *createLoongArchOptWInstrsPass();
FunctionPass *createLoongArchPreRAExpandPseudoPass();
FunctionPass *createLoongArchExpandPseudoPass();
void initializeLoongArchDAGToDAGISelLegacyPass(PassRegistry &);
+void initializeLoongArchDeadRegisterDefinitionsPass(PassRegistry &);
void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &);
void initializeLoongArchOptWInstrsPass(PassRegistry &);
void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &);
diff --git a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp
new file mode 100644
index 0000000000000..6ff9b6ac25b7c
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp
@@ -0,0 +1,108 @@
+//=== LoongArchDeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass rewrites Rd to r0 for instrs whose return values are unused.
+//
+//===---------------------------------------------------------------------===//
+
+#include "LoongArch.h"
+#include "LoongArchInstrInfo.h"
+#include "LoongArchSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "loongarch-dead-defs"
+#define LoongArch_DEAD_REG_DEF_NAME "LoongArch Dead register definitions"
+
+STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
+
+namespace {
+class LoongArchDeadRegisterDefinitions : public MachineFunctionPass {
+public:
+ static char ID;
+
+ LoongArchDeadRegisterDefinitions() : MachineFunctionPass(ID) {}
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addPreserved<LiveStacks>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return LoongArch_DEAD_REG_DEF_NAME; }
+};
+} // end anonymous namespace
+
+char LoongArchDeadRegisterDefinitions::ID = 0;
+INITIALIZE_PASS(LoongArchDeadRegisterDefinitions, DEBUG_TYPE,
+ LoongArch_DEAD_REG_DEF_NAME, false, false)
+
+FunctionPass *llvm::createLoongArchDeadRegisterDefinitionsPass() {
+ return new LoongArchDeadRegisterDefinitions();
+}
+
+bool LoongArchDeadRegisterDefinitions::runOnMachineFunction(
+ MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ LLVM_DEBUG(dbgs() << "***** LoongArchDeadRegisterDefinitions *****\n");
+
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ // We only handle non-computational instructions.
+ const MCInstrDesc &Desc = MI.getDesc();
+ if (!Desc.mayLoad() && !Desc.mayStore() &&
+ !Desc.hasUnmodeledSideEffects())
+ continue;
+ for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || MO.isEarlyClobber())
+ continue;
+ // Be careful not to change the register if it's a tied operand.
+ if (MI.isRegTiedToUseOperand(I)) {
+ LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
+ continue;
+ }
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual() || !MO.isDead())
+ continue;
+ LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
+ MI.print(dbgs()));
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
+ if (!(RC && RC->contains(LoongArch::R0))) {
+ LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
+ continue;
+ }
+ assert(LIS.hasInterval(Reg));
+ LIS.removeInterval(Reg);
+ MO.setReg(LoongArch::R0);
+ LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n ";
+ MI.print(dbgs()));
+ ++NumDeadDefsReplaced;
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index c29c1b5933218..137fe1d04f45b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -34,11 +34,19 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() {
RegisterTargetMachine<LoongArchTargetMachine> X(getTheLoongArch32Target());
RegisterTargetMachine<LoongArchTargetMachine> Y(getTheLoongArch64Target());
auto *PR = PassRegistry::getPassRegistry();
+ initializeLoongArchDeadRegisterDefinitionsPass(*PR);
initializeLoongArchOptWInstrsPass(*PR);
initializeLoongArchPreRAExpandPseudoPass(*PR);
initializeLoongArchDAGToDAGISelLegacyPass(*PR);
}
+static cl::opt<bool> EnableLoongArchDeadRegisterElimination(
+ "loongarch-enable-dead-defs", cl::Hidden,
+ cl::desc("Enable the pass that removes dead"
+ " definitons and replaces stores to"
+ " them with stores to r0"),
+ cl::init(true));
+
static cl::opt<bool>
EnableLoopDataPrefetch("loongarch-enable-loop-data-prefetch", cl::Hidden,
cl::desc("Enable the loop data prefetch pass"),
@@ -148,6 +156,8 @@ class LoongArchPassConfig : public TargetPassConfig {
void addPreEmitPass2() override;
void addMachineSSAOptimization() override;
void addPreRegAlloc() override;
+ bool addRegAssignAndRewriteFast() override;
+ bool addRegAssignAndRewriteOptimized() override;
};
} // end namespace
@@ -200,3 +210,17 @@ void LoongArchPassConfig::addMachineSSAOptimization() {
void LoongArchPassConfig::addPreRegAlloc() {
addPass(createLoongArchPreRAExpandPseudoPass());
}
+
+bool LoongArchPassConfig::addRegAssignAndRewriteFast() {
+ if (TM->getOptLevel() != CodeGenOptLevel::None &&
+ EnableLoongArchDeadRegisterElimination)
+ addPass(createLoongArchDeadRegisterDefinitionsPass());
+ return TargetPassConfig::addRegAssignAndRewriteFast();
+}
+
+bool LoongArchPassConfig::addRegAssignAndRewriteOptimized() {
+ if (TM->getOptLevel() != CodeGenOptLevel::None &&
+ EnableLoongArchDeadRegisterElimination)
+ addPass(createLoongArchDeadRegisterDefinitionsPass());
+ return TargetPassConfig::addRegAssignAndRewriteOptimized();
+}
diff --git a/llvm/test/CodeGen/LoongArch/global-address.ll b/llvm/test/CodeGen/LoongArch/global-address.ll
index 0c8958b6ab336..fb2929572f31a 100644
--- a/llvm/test/CodeGen/LoongArch/global-address.ll
+++ b/llvm/test/CodeGen/LoongArch/global-address.ll
@@ -14,40 +14,40 @@ define void @foo() nounwind {
; LA32NOPIC: # %bb.0:
; LA32NOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; LA32NOPIC-NEXT: ld.w $a0, $a0, %got_pc_lo12(G)
-; LA32NOPIC-NEXT: ld.w $a0, $a0, 0
+; LA32NOPIC-NEXT: ld.w $zero, $a0, 0
; LA32NOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
; LA32NOPIC-NEXT: addi.w $a0, $a0, %pc_lo12(g)
-; LA32NOPIC-NEXT: ld.w $a0, $a0, 0
+; LA32NOPIC-NEXT: ld.w $zero, $a0, 0
; LA32NOPIC-NEXT: ret
;
; LA32PIC-LABEL: foo:
; LA32PIC: # %bb.0:
; LA32PIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; LA32PIC-NEXT: ld.w $a0, $a0, %got_pc_lo12(G)
-; LA32PIC-NEXT: ld.w $a0, $a0, 0
+; LA32PIC-NEXT: ld.w $zero, $a0, 0
; LA32PIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
; LA32PIC-NEXT: addi.w $a0, $a0, %pc_lo12(.Lg$local)
-; LA32PIC-NEXT: ld.w $a0, $a0, 0
+; LA32PIC-NEXT: ld.w $zero, $a0, 0
; LA32PIC-NEXT: ret
;
; LA64NOPIC-LABEL: foo:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; LA64NOPIC-NEXT: ld.d $a0, $a0, %got_pc_lo12(G)
-; LA64NOPIC-NEXT: ld.w $a0, $a0, 0
+; LA64NOPIC-NEXT: ld.w $zero, $a0, 0
; LA64NOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
; LA64NOPIC-NEXT: addi.d $a0, $a0, %pc_lo12(g)
-; LA64NOPIC-NEXT: ld.w $a0, $a0, 0
+; LA64NOPIC-NEXT: ld.w $zero, $a0, 0
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: foo:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; LA64PIC-NEXT: ld.d $a0, $a0, %got_pc_lo12(G)
-; LA64PIC-NEXT: ld.w $a0, $a0, 0
+; LA64PIC-NEXT: ld.w $zero, $a0, 0
; LA64PIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
; LA64PIC-NEXT: addi.d $a0, $a0, %pc_lo12(.Lg$local)
-; LA64PIC-NEXT: ld.w $a0, $a0, 0
+; LA64PIC-NEXT: ld.w $zero, $a0, 0
; LA64PIC-NEXT: ret
;
; LA64LARGENOPIC-LABEL: foo:
@@ -57,13 +57,13 @@ define void @foo() nounwind {
; LA64LARGENOPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0
-; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0
+; LA64LARGENOPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGENOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %pc_lo12(g)
; LA64LARGENOPIC-NEXT: lu32i.d $t8, %pc64_lo20(g)
; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g)
; LA64LARGENOPIC-NEXT: add.d $a0, $t8, $a0
-; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0
+; LA64LARGENOPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGENOPIC-NEXT: ret
;
; LA64LARGEPIC-LABEL: foo:
@@ -73,13 +73,13 @@ define void @foo() nounwind {
; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0
-; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0
+; LA64LARGEPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGEPIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(.Lg$local)
; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(.Lg$local)
; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(.Lg$local)
; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0
-; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0
+; LA64LARGEPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGEPIC-NEXT: ret
%V = load volatile i32, ptr @G
%v = load volatile i32, ptr @g
diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll
index 4a59e2af533e7..d94da34e14c4c 100644
--- a/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll
+++ b/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll
@@ -178,7 +178,7 @@ entry:
define void @csrrd_d_noret() {
; CHECK-LABEL: csrrd_d_noret:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrrd $a0, 1
+; CHECK-NEXT: csrrd $zero, 1
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1)
@@ -240,7 +240,7 @@ entry:
define void @iocsrrd_d_noret(i32 %a) {
; CHECK-LABEL: iocsrrd_d_noret:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: iocsrrd.d $a0, $a0
+; CHECK-NEXT: iocsrrd.d $zero, $a0
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a)
@@ -290,7 +290,7 @@ entry:
define void @lddir_d_noret(i64 %a) {
; CHECK-LABEL: lddir_d_noret:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lddir $a0, $a0, 1
+; CHECK-NEXT: lddir $zero, $a0, 1
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1)
diff --git a/llvm/test/CodeGen/LoongArch/intrinsic.ll b/llvm/test/CodeGen/LoongArch/intrinsic.ll
index f49a2500ad3c7..6d8adb5da9261 100644
--- a/llvm/test/CodeGen/LoongArch/intrinsic.ll
+++ b/llvm/test/CodeGen/LoongArch/intrinsic.ll
@@ -73,7 +73,7 @@ entry:
define void @movfcsr2gr_noret() nounwind {
; CHECK-LABEL: movfcsr2gr_noret:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movfcsr2gr $a0, $fcsr1
+; CHECK-NEXT: movfcsr2gr $zero, $fcsr1
; CHECK-NEXT: ret
entry:
%res = call i32 @llvm.loongarch.movfcsr2gr(i32 1)
@@ -103,7 +103,7 @@ entry:
define void @csrrd_w_noret() {
; CHECK-LABEL: csrrd_w_noret:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrrd $a0, 1
+; CHECK-NEXT: csrrd $zero, 1
; CHECK-NEXT: ret
entry:
%0 = tail call i32 @llvm.loongarch.csrrd.w(i32 1)
@@ -185,7 +185,7 @@ entry:
define void @iocsrrd_b_noret(i32 %a) {
; CHECK-LABEL: iocsrrd_b_noret:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: iocsrrd.b $a0, $a0
+; CHECK-NEXT: iocsrrd.b $zero, $a0
; CHECK-NEXT: ret
entry:
%0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 %a)
@@ -195,7 +195,7 @@ entry:
define void @iocsrrd_h_noret(i32 %a) {
; CHECK-LABEL: iocsrrd_h_noret:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: iocsrrd.h $a0, $a0
+; CHECK-NEXT: iocsrrd.h $zero, $a0
; CHECK-NEXT: ret
entry:
%0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 %a)
@@ -205,7 +205,7 @@ entry:
define void @iocsrrd_w_noret(i32 %a) {
; CHECK-LABEL: iocsrrd_w_noret:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: iocsrrd.w $a0, $a0
+; CHECK-NEXT: iocsrrd.w $zero, $a0
; CHECK-NEXT: ret
entry:
%0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 %a)
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll
index 02375a925723d..8b909e3314d64 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll
@@ -21,7 +21,7 @@ define void @foo_br_eq(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: beq $a2, $a0, .LBB1_2
; LA32-NEXT: # %bb.1: # %test
-; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB1_2: # %end
; LA32-NEXT: ret
;
@@ -31,7 +31,7 @@ define void @foo_br_eq(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: beq $a2, $a0, .LBB1_2
; LA64-NEXT: # %bb.1: # %test
-; LA64-NEXT: ld.w $a0, $a1, 0
+; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB1_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
@@ -51,7 +51,7 @@ define void @foo_br_ne(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bne $a2, $a0, .LBB2_2
; LA32-NEXT: # %bb.1: # %test
-; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB2_2: # %end
; LA32-NEXT: ret
;
@@ -61,7 +61,7 @@ define void @foo_br_ne(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bne $a2, $a0, .LBB2_2
; LA64-NEXT: # %bb.1: # %test
-; LA64-NEXT: ld.w $a0, $a1, 0
+; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB2_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
@@ -81,7 +81,7 @@ define void @foo_br_slt(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: blt $a2, $a0, .LBB3_2
; LA32-NEXT: # %bb.1: # %test
-; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB3_2: # %end
; LA32-NEXT: ret
;
@@ -91,7 +91,7 @@ define void @foo_br_slt(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: blt $a2, $a0, .LBB3_2
; LA64-NEXT: # %bb.1: # %test
-; LA64-NEXT: ld.w $a0, $a1, 0
+; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB3_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
@@ -111,7 +111,7 @@ define void @foo_br_sge(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bge $a2, $a0, .LBB4_2
; LA32-NEXT: # %bb.1: # %test
-; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB4_2: # %end
; LA32-NEXT: ret
;
@@ -121,7 +121,7 @@ define void @foo_br_sge(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bge $a2, $a0, .LBB4_2
; LA64-NEXT: # %bb.1: # %test
-; LA64-NEXT: ld.w $a0, $a1, 0
+; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB4_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
@@ -141,7 +141,7 @@ define void @foo_br_ult(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bltu $a2, $a0, .LBB5_2
; LA32-NEXT: # %bb.1: # %test
-; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB5_2: # %end
; LA32-NEXT: ret
;
@@ -151,7 +151,7 @@ define void @foo_br_ult(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bltu $a2, $a0, .LBB5_2
; LA64-NEXT: # %bb.1: # %test
-; LA64-NEXT: ld.w $a0, $a1, 0
+; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB5_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
@@ -171,7 +171,7 @@ define void @foo_br_uge(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bgeu $a2, $a0, .LBB6_2
; LA32-NEXT: # %bb.1: # %test
-; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB6_2: # %end
; LA32-NEXT: ret
;
@@ -181,7 +181,7 @@ define void @foo_br_uge(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bgeu $a2, $a0, .LBB6_2
; LA64-NEXT: # %bb.1: # %test
-; LA64-NEXT: ld.w $a0, $a1, 0
+; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB6_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
@@ -202,7 +202,7 @@ define void @foo_br_sgt(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: blt $a0, $a2, .LBB7_2
; LA32-NEXT: # %bb.1: # %test
-; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB7_2: # %end
; LA32-NEXT: ret
;
@@ -212,7 +212,7 @@ define void @foo_br_sgt(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: blt $a0, $a2, .LBB7_2
; LA64-NEXT: # %bb.1: # %test
-; LA64-NEXT: ld.w $a0, $a1, 0
+; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB7_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
@@ -232,7 +232,7 @@ define void @foo_br_sle(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bge $a0, $a2, .LBB8_2
; LA32-NEXT: # %bb.1: # %test
-; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB8_2: # %end
; LA32-NEXT: ret
;
@@ -242,7 +242,7 @@ define void @foo_br_sle(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bge $a0, $a2, .LBB8_2
; LA64-NEXT: # %bb.1: # %test
-; LA64-NEXT: ld.w $a0, $a1, 0
+; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB8_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
@@ -262,7 +262,7 @@ define void @foo_br_ugt(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bltu $a0, $a2, .LBB9_2
; LA32-NEXT: # %bb.1: # %test
-; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB9_2: # %end
; LA32-NEXT: ret
;
@@ -272,7 +272,7 @@ define void @foo_br_ugt(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT:...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Good morning from the UK, It looks as though this change may have introduced failures in a couple of tests: https://lab.llvm.org/buildbot/#/builders/247/builds/19544 FAIL: LLVM::opt-pipeline.ll Is anyone able to take a look? Many thanks in advance, |
Thanks for your report. That was caused by two implicitly related PRs being pulled in, and I'm working on fixing it. |
Just checking in, Thanks for the first test fix! 👍 Is there work being done on the second failure? |
Oops. I overlooked that I didn't have assertions enabled locally, I'll fix that right away. |
Thanks again 👍 |
This patch adds a peephole pass
LoongArchDeadRegisterDefinitions
. It rewritesrd
tor0
whenrd
is marked as dead. It may improve the register allocation and reduce pipeline hazards on CPUs without register renaming and OOO.