diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index ebf06bc57948f..1a2f31e199336 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -1203,19 +1203,6 @@ class TargetRegisterInfo : public MCRegisterInfo { virtual bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const { return false; } - - /// Returns if the architecture being targeted has the required Pseudo - /// Instructions for initializing the register. By default this returns false, - /// but where it is overriden for an architecture, the behaviour will be - /// different. This can either be a check to ensure the Register Class is - /// present, or to return true as an indication the architecture supports the - /// pass. If using the method that does not check for the Register Class, it - /// is imperative to ensure all required Pseudo Instructions are implemented, - /// otherwise compilation may fail with an `Unexpected register class` error. - virtual bool - doesRegClassHavePseudoInitUndef(const TargetRegisterClass *RC) const { - return false; - } }; //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index b4b018f080914..bfaa6450779ae 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -333,13 +333,13 @@ class TargetSubtargetInfo : public MCSubtargetInfo { /// Get the list of MacroFusion predicates. virtual std::vector getMacroFusions() const { return {}; }; - /// supportsInitUndef is used to determine if an architecture supports - /// the Init Undef Pass. By default, it is assumed that it will not support - /// the pass, with architecture specific overrides providing the information - /// where they are implemented. - virtual bool supportsInitUndef() const { return false; } + /// Whether the target has instructions where an early-clobber result + /// operand cannot overlap with an undef input operand. + virtual bool requiresDisjointEarlyClobberAndUndef() const { + // Conservatively assume such instructions exist by default. + return true; + } }; - } // end namespace llvm #endif // LLVM_CODEGEN_TARGETSUBTARGETINFO_H diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index d6f7c0d7cf0f5..5c8aa8b62d3e3 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -120,8 +120,6 @@ bool InitUndef::handleReg(MachineInstr *MI) { continue; if (!UseMO.getReg().isVirtual()) continue; - if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg()))) - continue; if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI)) Changed |= fixupIllOperand(MI, UseMO); @@ -140,8 +138,6 @@ bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, continue; if (UseMO.isTied()) continue; - if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg()))) - continue; Register Reg = UseMO.getReg(); if (NewRegs.count(Reg)) @@ -246,13 +242,9 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, bool InitUndef::runOnMachineFunction(MachineFunction &MF) { ST = &MF.getSubtarget(); - // supportsInitUndef is implemented to reflect if an architecture has support - // for the InitUndef pass. Support comes from having the relevant Pseudo - // instructions that can be used to initialize the register. The function - // returns false by default so requires an implementation per architecture. - // Support can be added by overriding the function in a way that best fits - // the architecture. - if (!ST->supportsInitUndef()) + // The pass is only needed if early-clobber defs and undef ops cannot be + // allocated to the same register. + if (!ST->requiresDisjointEarlyClobberAndUndef()) return false; MRI = &MF.getRegInfo(); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 7b74eab96c567..a4ae8a1be3225 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1587,6 +1587,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // the nop. return true; } + + bool requiresDisjointEarlyClobberAndUndef() const override { + // AMDGPU doesn't care if early-clobber and undef operands are allocated + // to the same register. + return false; + } }; class GCNUserSGPRUsageInfo { diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h index c3d002f29272d..7f0f9305e1fa6 100644 --- a/llvm/lib/Target/AMDGPU/R600Subtarget.h +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h @@ -160,6 +160,12 @@ class R600Subtarget final : public R600GenSubtargetInfo, unsigned getMinWavesPerEU() const override { return AMDGPU::IsaInfo::getMinWavesPerEU(this); } + + bool requiresDisjointEarlyClobberAndUndef() const override { + // AMDGPU doesn't care if early-clobber and undef operands are allocated + // to the same register. + return false; + } }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 58b5e98fd30b1..926d702b4092a 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -240,20 +240,6 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo { unsigned SrcSubReg) const override; int getSEHRegNum(unsigned i) const { return getEncodingValue(i); } - - bool doesRegClassHavePseudoInitUndef( - const TargetRegisterClass *RC) const override { - (void)RC; - // For the ARM Architecture we want to always return true because all - // required PseudoInitUndef types have been added. If compilation fails due - // to `Unexpected register class`, this is likely to be because the specific - // register being used is not support by Init Undef and needs the Pseudo - // Instruction adding to ARMInstrInfo.td. If this is implemented as a - // conditional check, this could create a false positive where Init Undef is - // not running, skipping the instruction and moving to the next. This could - // lead to illegal instructions being generated by the register allocator. - return true; - } }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 00239ff94b7ba..fa20f4b590bea 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -209,13 +209,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo { return &InstrInfo->getRegisterInfo(); } - /// The correct instructions have been implemented to initialize undef - /// registers, therefore the ARM Architecture is supported by the Init Undef - /// Pass. This will return true as the pass needs to be supported for all - /// types of instructions. The pass will then perform more checks to ensure it - /// should be applying the Pseudo Instructions. - bool supportsInitUndef() const override { return true; } - const CallLowering *getCallLowering() const override; InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 98a712af08539..cb0bb77d1fcbc 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -130,11 +130,6 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; - bool doesRegClassHavePseudoInitUndef( - const TargetRegisterClass *RC) const override { - return isVRRegClass(RC); - } - static bool isVRRegClass(const TargetRegisterClass *RC) { return RISCVRI::isVRegClass(RC->TSFlags) && RISCVRI::getNF(RC->TSFlags) == 1; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index ea54ff1df0b7c..bf9ed3f3d7165 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -306,8 +306,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { unsigned getTailDupAggressiveThreshold() const { return TuneInfo->TailDupAggressiveThreshold; } - - bool supportsInitUndef() const override { return hasVInstructions(); } }; } // End llvm namespace diff --git a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll index 1a60f8752dd57..4fb0c2775a7a7 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll @@ -354,11 +354,10 @@ define dso_local i32 @test_store_release_i64(i32, i64 %val, ptr %addr) { } ; The stxp result cannot be allocated to the same register as the inputs. -; FIXME: This is a miscompile. define dso_local i32 @test_stxp_undef(ptr %p, i64 %x) nounwind { ; CHECK-LABEL: test_stxp_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: stxp w8, x8, x1, [x0] +; CHECK-NEXT: stxp w8, x9, x1, [x0] ; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %res = call i32 @llvm.aarch64.stxp(i64 undef, i64 %x, ptr %p) diff --git a/llvm/test/CodeGen/AArch64/init-undef.mir b/llvm/test/CodeGen/AArch64/init-undef.mir new file mode 100644 index 0000000000000..7935c09d7df5e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/init-undef.mir @@ -0,0 +1,27 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64-- -run-pass=init-undef -o - %s | FileCheck %s + +--- +name: test_stxp_undef +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_stxp_undef + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF + ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:gpr64 = INIT_UNDEF + ; CHECK-NEXT: early-clobber %3:gpr32 = STXPX killed [[INIT_UNDEF]], [[COPY]], [[COPY1]] :: (volatile store (s128)) + ; CHECK-NEXT: $w0 = COPY %3 + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %1:gpr64 = COPY $x1 + %0:gpr64common = COPY $x0 + %3:gpr64 = IMPLICIT_DEF + early-clobber %2:gpr32 = STXPX killed %3, %1, %0 :: (volatile store (s128)) + $w0 = COPY %2 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll index 0f3fdf08696d6..6a0dbbe356a16 100644 --- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll +++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -25,7 +25,7 @@ define void @last_chance_recoloring_failure() { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb ; CHECK-NEXT: li a0, 55 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vloxseg2ei32.v v16, (a0), v8 +; CHECK-NEXT: vloxseg2ei32.v v16, (a1), v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 @@ -81,7 +81,7 @@ define void @last_chance_recoloring_failure() { ; SUBREGLIVENESS-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb ; SUBREGLIVENESS-NEXT: li a0, 55 ; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v16, (a0), v8 +; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v16, (a1), v8 ; SUBREGLIVENESS-NEXT: csrr a0, vlenb ; SUBREGLIVENESS-NEXT: slli a0, a0, 3 ; SUBREGLIVENESS-NEXT: add a0, sp, a0