diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index bb772fc5da922..f275a19a3cf7e 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -59,6 +59,9 @@ class RISCVExpandAtomicPseudo : public MachineFunctionPass { bool expandAtomicCmpXchg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked, int Width, MachineBasicBlock::iterator &NextMBBI); + bool expandAMOCAS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + bool IsPaired, int Width, + MachineBasicBlock::iterator &NextMBBI); #ifndef NDEBUG unsigned getInstSizeInBytes(const MachineFunction &MF) const { unsigned Size = 0; @@ -145,6 +148,14 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI); case RISCV::PseudoMaskedCmpXchg32: return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI); + case RISCV::PseudoAMOCAS_W: + return expandAMOCAS(MBB, MBBI, false, 32, NextMBBI); + case RISCV::PseudoAMOCAS_D_RV64: + return expandAMOCAS(MBB, MBBI, false, 64, NextMBBI); + case RISCV::PseudoAMOCAS_D_RV32: + return expandAMOCAS(MBB, MBBI, true, 64, NextMBBI); + case RISCV::PseudoAMOCAS_Q: + return expandAMOCAS(MBB, MBBI, true, 128, NextMBBI); } return false; @@ -256,6 +267,74 @@ static unsigned getSCForRMW(AtomicOrdering Ordering, int Width, llvm_unreachable("Unexpected SC width\n"); } +static unsigned getAMOCASForRMW32(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { + if (Subtarget->hasStdExtZtso()) + return RISCV::AMOCAS_W; + switch (Ordering) { + default: + llvm_unreachable("Unexpected AtomicOrdering"); + case AtomicOrdering::Monotonic: + return RISCV::AMOCAS_W; + case AtomicOrdering::Acquire: + return RISCV::AMOCAS_W_AQ; + case AtomicOrdering::Release: + return RISCV::AMOCAS_W_RL; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + return RISCV::AMOCAS_W_AQ_RL; + } +} + +static unsigned getAMOCASForRMW64(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { + if (Subtarget->hasStdExtZtso()) + return RISCV::AMOCAS_D; + switch (Ordering) { + default: + llvm_unreachable("Unexpected AtomicOrdering"); + case AtomicOrdering::Monotonic: + return RISCV::AMOCAS_D; + case AtomicOrdering::Acquire: + return RISCV::AMOCAS_D_AQ; + case AtomicOrdering::Release: + return RISCV::AMOCAS_D_RL; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + return RISCV::AMOCAS_D_AQ_RL; + } +} + +static unsigned getAMOCASForRMW128(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { + if (Subtarget->hasStdExtZtso()) + return RISCV::AMOCAS_Q; + switch (Ordering) { + default: + llvm_unreachable("Unexpected AtomicOrdering"); + case AtomicOrdering::Monotonic: + return RISCV::AMOCAS_Q; + case AtomicOrdering::Acquire: + return RISCV::AMOCAS_Q_AQ; + case AtomicOrdering::Release: + return RISCV::AMOCAS_Q_RL; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + return RISCV::AMOCAS_Q_AQ_RL; + } +} + +static unsigned getAMOCASForRMW(AtomicOrdering Ordering, int Width, + const RISCVSubtarget *Subtarget) { + if (Width == 32) + return getAMOCASForRMW32(Ordering, Subtarget); + if (Width == 64) + return getAMOCASForRMW64(Ordering, Subtarget); + if (Width == 128) + return getAMOCASForRMW128(Ordering, Subtarget); + llvm_unreachable("Unexpected AMOCAS width\n"); +} + static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, @@ -728,6 +807,38 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( return true; } +static Register getGPRPairEvenReg(Register PairedReg) { + assert(PairedReg >= RISCV::X0_PD && PairedReg <= RISCV::X30_PD && + "Invalid GPR pair"); + return (PairedReg - RISCV::X0_PD) * 2 + RISCV::X0; +} + +bool RISCVExpandAtomicPseudo::expandAMOCAS( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsPaired, + int Width, MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + Register DestReg = MI.getOperand(0).getReg(); + if (IsPaired) + DestReg = getGPRPairEvenReg(DestReg); + Register AddrReg = MI.getOperand(1).getReg(); + Register NewValReg = MI.getOperand(3).getReg(); + if (IsPaired) + NewValReg = getGPRPairEvenReg(NewValReg); + AtomicOrdering Ordering = + static_cast(MI.getOperand(4).getImm()); + + MachineInstr *NewMI = + BuildMI(MBB, MBBI, DL, TII->get(getAMOCASForRMW(Ordering, Width, STI))) + .addReg(DestReg, RegState::Define) + .addReg(AddrReg) + .addReg(NewValReg); + + MI.eraseFromParent(); + return true; +} + } // end of anonymous namespace INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo", diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 0b682b0cbb338..9dc621aef3ac4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -621,7 +621,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } if (Subtarget.hasStdExtA()) { - setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); + if (Subtarget.hasStdExtZacas()) + setMaxAtomicSizeInBitsSupported(Subtarget.getXLen() * 2); + else + setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); setMinCmpXchgSizeInBits(32); } else if (Subtarget.hasForcedAtomics()) { setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); @@ -1339,6 +1342,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, XLenVT, LibCall); } + // Set atomic_cmp_swap operations to expand to AMOCAS.D (RV32) and AMOCAS.Q + // (RV64). + if (Subtarget.hasStdExtZacas()) + setOperationAction(ISD::ATOMIC_CMP_SWAP, + Subtarget.is64Bit() ? MVT::i128 : MVT::i64, Custom); + if (Subtarget.hasVendorXTHeadMemIdx()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC; ++im) { @@ -11075,6 +11084,57 @@ static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); } +// Create an even/odd pair of X registers holding integer value V. +static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V, MVT VT, + MVT SubRegVT) { + SDLoc DL(V.getNode()); + auto [VLo, VHi] = DAG.SplitScalar(V, DL, SubRegVT, SubRegVT); + SDValue RegClass = + DAG.getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32); + SDValue SubReg0 = DAG.getTargetConstant(RISCV::sub_32, DL, MVT::i32); + SDValue SubReg1 = DAG.getTargetConstant(RISCV::sub_32_hi, DL, MVT::i32); + const SDValue Ops[] = {RegClass, VLo, SubReg0, VHi, SubReg1}; + return SDValue( + DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops), 0); +} + +static void ReplaceCMP_SWAP_2XLenResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VT = N->getSimpleValueType(0); + assert(N->getValueType(0) == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && + "AtomicCmpSwap on types less than 2*XLen should be legal"); + assert(Subtarget.hasStdExtZacas()); + MVT XLenVT = Subtarget.getXLenVT(); + + SDLoc DL(N); + MachineMemOperand *MemOp = cast(N)->getMemOperand(); + AtomicOrdering Ordering = MemOp->getMergedOrdering(); + SDValue Ops[] = { + N->getOperand(1), // Ptr + createGPRPairNode(DAG, N->getOperand(2), VT, XLenVT), // Compare value + createGPRPairNode(DAG, N->getOperand(3), VT, XLenVT), // Store value + DAG.getTargetConstant(static_cast(Ordering), DL, + MVT::i32), // Ordering + N->getOperand(0), // Chain in + }; + + unsigned Opcode = + (VT == MVT::i64 ? RISCV::PseudoAMOCAS_D_RV32 : RISCV::PseudoAMOCAS_Q); + MachineSDNode *CmpSwap = DAG.getMachineNode( + Opcode, DL, DAG.getVTList(MVT::Untyped, MVT::Other), Ops); + DAG.setNodeMemRefs(CmpSwap, {MemOp}); + + unsigned SubReg1 = RISCV::sub_32, SubReg2 = RISCV::sub_32_hi; + SDValue Lo = + DAG.getTargetExtractSubreg(SubReg1, DL, XLenVT, SDValue(CmpSwap, 0)); + SDValue Hi = + DAG.getTargetExtractSubreg(SubReg2, DL, XLenVT, SDValue(CmpSwap, 0)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, VT, Lo, Hi)); + Results.push_back(SDValue(CmpSwap, 1)); +} + void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { @@ -11082,6 +11142,9 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom type legalize this operation!"); + case ISD::ATOMIC_CMP_SWAP: + ReplaceCMP_SWAP_2XLenResults(N, Results, DAG, Subtarget); + break; case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index c8301fcc6b938..eb94def2a9798 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -295,6 +295,28 @@ multiclass PseudoCmpXchgPat; } +let Predicates = [HasStdExtZacas] in { +class PseudoAMOCAS + : Pseudo<(outs RC:$res), + (ins GPR:$addr, RC:$cmpval, RC:$newval, ixlenimm:$ordering), []> { + let Constraints = "$res = $cmpval"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} +def PseudoAMOCAS_W: PseudoAMOCAS; +defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoAMOCAS_W>; + +let Predicates = [HasStdExtZacas, IsRV32] in +def PseudoAMOCAS_D_RV32: PseudoAMOCAS; + +let Predicates = [HasStdExtZacas, IsRV64] in { + def PseudoAMOCAS_D_RV64: PseudoAMOCAS; + defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoAMOCAS_D_RV64>; + def PseudoAMOCAS_Q: PseudoAMOCAS; +} +} + def PseudoCmpXchg32 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index c59c9b294d793..3bd9e01aeeb1b 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -571,6 +571,16 @@ def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add X0_PD, X2_PD, X4_PD )>; +let RegInfos = RegInfoByHwMode<[RV32, RV64], [RegInfo<32, 32, 32>, RegInfo<64, 64, 64>]> in +def GPRPair : RegisterClass<"RISCV", [untyped], 64, (add + X10_PD, X12_PD, X14_PD, X16_PD, + X6_PD, + X28_PD, X30_PD, + X8_PD, + X18_PD, X20_PD, X22_PD, X24_PD, X26_PD, + X0_PD, X2_PD, X4_PD +)>; + // The register class is added for inline assembly for vector mask types. def VM : VRegThis Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_3 Depth 2 -; CHECK-NEXT: .LBB0_3: # %do_cmpxchg -; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lr.w.aqrl a3, (a0) -; CHECK-NEXT: bne a3, a1, .LBB0_1 -; CHECK-NEXT: # %bb.4: # %do_cmpxchg -; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=2 -; CHECK-NEXT: sc.w.rl a4, a2, (a0) -; CHECK-NEXT: bnez a4, .LBB0_3 -; CHECK-NEXT: # %bb.5: # %do_cmpxchg -; CHECK-NEXT: # %bb.2: # %exit -; CHECK-NEXT: ret +; RV32IA-LABEL: cmpxchg_and_branch1: +; RV32IA: # %bb.0: # %entry +; RV32IA-NEXT: .LBB0_1: # %do_cmpxchg +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB0_3 Depth 2 +; RV32IA-NEXT: .LBB0_3: # %do_cmpxchg +; RV32IA-NEXT: # Parent Loop BB0_1 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB0_1 +; RV32IA-NEXT: # %bb.4: # %do_cmpxchg +; RV32IA-NEXT: # in Loop: Header=BB0_3 Depth=2 +; RV32IA-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB0_3 +; RV32IA-NEXT: # %bb.5: # %do_cmpxchg +; RV32IA-NEXT: # %bb.2: # %exit +; RV32IA-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_and_branch1: +; RV32IA-ZACAS: # %bb.0: # %entry +; RV32IA-ZACAS-NEXT: .LBB0_1: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: mv a3, a1 +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) +; RV32IA-ZACAS-NEXT: bne a3, a1, .LBB0_1 +; RV32IA-ZACAS-NEXT: # %bb.2: # %exit +; RV32IA-ZACAS-NEXT: ret +; +; RV64IA-LABEL: cmpxchg_and_branch1: +; RV64IA: # %bb.0: # %entry +; RV64IA-NEXT: .LBB0_1: # %do_cmpxchg +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB0_3 Depth 2 +; RV64IA-NEXT: .LBB0_3: # %do_cmpxchg +; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-NEXT: bne a3, a1, .LBB0_1 +; RV64IA-NEXT: # %bb.4: # %do_cmpxchg +; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2 +; RV64IA-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-NEXT: bnez a4, .LBB0_3 +; RV64IA-NEXT: # %bb.5: # %do_cmpxchg +; RV64IA-NEXT: # %bb.2: # %exit +; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_and_branch1: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: .LBB0_1: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: mv a3, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) +; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB0_1 +; RV64IA-ZACAS-NEXT: # %bb.2: # %exit +; RV64IA-ZACAS-NEXT: ret entry: br label %do_cmpxchg do_cmpxchg: @@ -36,25 +78,65 @@ exit: } define void @cmpxchg_and_branch2(ptr %ptr, i32 signext %cmp, i32 signext %val) nounwind { -; CHECK-LABEL: cmpxchg_and_branch2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .LBB1_1: # %do_cmpxchg -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB1_3 Depth 2 -; CHECK-NEXT: .LBB1_3: # %do_cmpxchg -; CHECK-NEXT: # Parent Loop BB1_1 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lr.w.aqrl a3, (a0) -; CHECK-NEXT: bne a3, a1, .LBB1_5 -; CHECK-NEXT: # %bb.4: # %do_cmpxchg -; CHECK-NEXT: # in Loop: Header=BB1_3 Depth=2 -; CHECK-NEXT: sc.w.rl a4, a2, (a0) -; CHECK-NEXT: bnez a4, .LBB1_3 -; CHECK-NEXT: .LBB1_5: # %do_cmpxchg -; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: beq a3, a1, .LBB1_1 -; CHECK-NEXT: # %bb.2: # %exit -; CHECK-NEXT: ret +; RV32IA-LABEL: cmpxchg_and_branch2: +; RV32IA: # %bb.0: # %entry +; RV32IA-NEXT: .LBB1_1: # %do_cmpxchg +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB1_3 Depth 2 +; RV32IA-NEXT: .LBB1_3: # %do_cmpxchg +; RV32IA-NEXT: # Parent Loop BB1_1 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB1_5 +; RV32IA-NEXT: # %bb.4: # %do_cmpxchg +; RV32IA-NEXT: # in Loop: Header=BB1_3 Depth=2 +; RV32IA-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB1_3 +; RV32IA-NEXT: .LBB1_5: # %do_cmpxchg +; RV32IA-NEXT: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-NEXT: beq a3, a1, .LBB1_1 +; RV32IA-NEXT: # %bb.2: # %exit +; RV32IA-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_and_branch2: +; RV32IA-ZACAS: # %bb.0: # %entry +; RV32IA-ZACAS-NEXT: .LBB1_1: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: mv a3, a1 +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) +; RV32IA-ZACAS-NEXT: beq a3, a1, .LBB1_1 +; RV32IA-ZACAS-NEXT: # %bb.2: # %exit +; RV32IA-ZACAS-NEXT: ret +; +; RV64IA-LABEL: cmpxchg_and_branch2: +; RV64IA: # %bb.0: # %entry +; RV64IA-NEXT: .LBB1_1: # %do_cmpxchg +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB1_3 Depth 2 +; RV64IA-NEXT: .LBB1_3: # %do_cmpxchg +; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-NEXT: bne a3, a1, .LBB1_5 +; RV64IA-NEXT: # %bb.4: # %do_cmpxchg +; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2 +; RV64IA-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-NEXT: bnez a4, .LBB1_3 +; RV64IA-NEXT: .LBB1_5: # %do_cmpxchg +; RV64IA-NEXT: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-NEXT: beq a3, a1, .LBB1_1 +; RV64IA-NEXT: # %bb.2: # %exit +; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_and_branch2: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: .LBB1_1: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: mv a3, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) +; RV64IA-ZACAS-NEXT: beq a3, a1, .LBB1_1 +; RV64IA-ZACAS-NEXT: # %bb.2: # %exit +; RV64IA-ZACAS-NEXT: ret entry: br label %do_cmpxchg do_cmpxchg: @@ -96,6 +178,36 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV32IA-NEXT: # %bb.2: # %exit ; RV32IA-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_masked_and_branch1: +; RV32IA-ZACAS: # %bb.0: # %entry +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-ZACAS-NEXT: li a0, 255 +; RV32IA-ZACAS-NEXT: sll a0, a0, a4 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a2, a2, a4 +; RV32IA-ZACAS-NEXT: .LBB2_1: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: # Child Loop BB2_3 Depth 2 +; RV32IA-ZACAS-NEXT: .LBB2_3: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # Parent Loop BB2_1 Depth=1 +; RV32IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-ZACAS-NEXT: and a5, a4, a0 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB2_1 +; RV32IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # in Loop: Header=BB2_3 Depth=2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a2 +; RV32IA-ZACAS-NEXT: and a5, a5, a0 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB2_3 +; RV32IA-ZACAS-NEXT: # %bb.5: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # %bb.2: # %exit +; RV32IA-ZACAS-NEXT: ret +; ; RV64IA-LABEL: cmpxchg_masked_and_branch1: ; RV64IA: # %bb.0: # %entry ; RV64IA-NEXT: andi a3, a0, -4 @@ -125,6 +237,36 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV64IA-NEXT: # %bb.5: # %do_cmpxchg ; RV64IA-NEXT: # %bb.2: # %exit ; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_masked_and_branch1: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-ZACAS-NEXT: li a0, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a0, a4 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 +; RV64IA-ZACAS-NEXT: .LBB2_1: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: # Child Loop BB2_3 Depth 2 +; RV64IA-ZACAS-NEXT: .LBB2_3: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # Parent Loop BB2_1 Depth=1 +; RV64IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-ZACAS-NEXT: and a5, a4, a0 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB2_1 +; RV64IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # in Loop: Header=BB2_3 Depth=2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a2 +; RV64IA-ZACAS-NEXT: and a5, a5, a0 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB2_3 +; RV64IA-ZACAS-NEXT: # %bb.5: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # %bb.2: # %exit +; RV64IA-ZACAS-NEXT: ret entry: br label %do_cmpxchg do_cmpxchg: @@ -169,6 +311,39 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV32IA-NEXT: # %bb.2: # %exit ; RV32IA-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_masked_and_branch2: +; RV32IA-ZACAS: # %bb.0: # %entry +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-ZACAS-NEXT: li a0, 255 +; RV32IA-ZACAS-NEXT: sll a0, a0, a4 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a2, a2, a4 +; RV32IA-ZACAS-NEXT: .LBB3_1: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: # Child Loop BB3_3 Depth 2 +; RV32IA-ZACAS-NEXT: .LBB3_3: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # Parent Loop BB3_1 Depth=1 +; RV32IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-ZACAS-NEXT: and a5, a4, a0 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB3_5 +; RV32IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # in Loop: Header=BB3_3 Depth=2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a2 +; RV32IA-ZACAS-NEXT: and a5, a5, a0 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB3_3 +; RV32IA-ZACAS-NEXT: .LBB3_5: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-ZACAS-NEXT: and a4, a4, a0 +; RV32IA-ZACAS-NEXT: beq a1, a4, .LBB3_1 +; RV32IA-ZACAS-NEXT: # %bb.2: # %exit +; RV32IA-ZACAS-NEXT: ret +; ; RV64IA-LABEL: cmpxchg_masked_and_branch2: ; RV64IA: # %bb.0: # %entry ; RV64IA-NEXT: andi a3, a0, -4 @@ -201,6 +376,39 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV64IA-NEXT: beq a1, a4, .LBB3_1 ; RV64IA-NEXT: # %bb.2: # %exit ; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_masked_and_branch2: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-ZACAS-NEXT: li a0, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a0, a4 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 +; RV64IA-ZACAS-NEXT: .LBB3_1: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: # Child Loop BB3_3 Depth 2 +; RV64IA-ZACAS-NEXT: .LBB3_3: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # Parent Loop BB3_1 Depth=1 +; RV64IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-ZACAS-NEXT: and a5, a4, a0 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB3_5 +; RV64IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # in Loop: Header=BB3_3 Depth=2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a2 +; RV64IA-ZACAS-NEXT: and a5, a5, a0 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB3_3 +; RV64IA-ZACAS-NEXT: .LBB3_5: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-ZACAS-NEXT: and a4, a4, a0 +; RV64IA-ZACAS-NEXT: beq a1, a4, .LBB3_1 +; RV64IA-ZACAS-NEXT: # %bb.2: # %exit +; RV64IA-ZACAS-NEXT: ret entry: br label %do_cmpxchg do_cmpxchg: @@ -212,25 +420,65 @@ exit: } define void @cmpxchg_and_irrelevant_branch(ptr %ptr, i32 signext %cmp, i32 signext %val, i1 zeroext %bool) nounwind { -; CHECK-LABEL: cmpxchg_and_irrelevant_branch: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .LBB4_1: # %do_cmpxchg -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB4_3 Depth 2 -; CHECK-NEXT: .LBB4_3: # %do_cmpxchg -; CHECK-NEXT: # Parent Loop BB4_1 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lr.w.aqrl a4, (a0) -; CHECK-NEXT: bne a4, a1, .LBB4_5 -; CHECK-NEXT: # %bb.4: # %do_cmpxchg -; CHECK-NEXT: # in Loop: Header=BB4_3 Depth=2 -; CHECK-NEXT: sc.w.rl a5, a2, (a0) -; CHECK-NEXT: bnez a5, .LBB4_3 -; CHECK-NEXT: .LBB4_5: # %do_cmpxchg -; CHECK-NEXT: # in Loop: Header=BB4_1 Depth=1 -; CHECK-NEXT: beqz a3, .LBB4_1 -; CHECK-NEXT: # %bb.2: # %exit -; CHECK-NEXT: ret +; RV32IA-LABEL: cmpxchg_and_irrelevant_branch: +; RV32IA: # %bb.0: # %entry +; RV32IA-NEXT: .LBB4_1: # %do_cmpxchg +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB4_3 Depth 2 +; RV32IA-NEXT: .LBB4_3: # %do_cmpxchg +; RV32IA-NEXT: # Parent Loop BB4_1 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a4, (a0) +; RV32IA-NEXT: bne a4, a1, .LBB4_5 +; RV32IA-NEXT: # %bb.4: # %do_cmpxchg +; RV32IA-NEXT: # in Loop: Header=BB4_3 Depth=2 +; RV32IA-NEXT: sc.w.rl a5, a2, (a0) +; RV32IA-NEXT: bnez a5, .LBB4_3 +; RV32IA-NEXT: .LBB4_5: # %do_cmpxchg +; RV32IA-NEXT: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-NEXT: beqz a3, .LBB4_1 +; RV32IA-NEXT: # %bb.2: # %exit +; RV32IA-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_and_irrelevant_branch: +; RV32IA-ZACAS: # %bb.0: # %entry +; RV32IA-ZACAS-NEXT: .LBB4_1: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: mv a4, a1 +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a4, a2, (a0) +; RV32IA-ZACAS-NEXT: beqz a3, .LBB4_1 +; RV32IA-ZACAS-NEXT: # %bb.2: # %exit +; RV32IA-ZACAS-NEXT: ret +; +; RV64IA-LABEL: cmpxchg_and_irrelevant_branch: +; RV64IA: # %bb.0: # %entry +; RV64IA-NEXT: .LBB4_1: # %do_cmpxchg +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB4_3 Depth 2 +; RV64IA-NEXT: .LBB4_3: # %do_cmpxchg +; RV64IA-NEXT: # Parent Loop BB4_1 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a4, (a0) +; RV64IA-NEXT: bne a4, a1, .LBB4_5 +; RV64IA-NEXT: # %bb.4: # %do_cmpxchg +; RV64IA-NEXT: # in Loop: Header=BB4_3 Depth=2 +; RV64IA-NEXT: sc.w.rl a5, a2, (a0) +; RV64IA-NEXT: bnez a5, .LBB4_3 +; RV64IA-NEXT: .LBB4_5: # %do_cmpxchg +; RV64IA-NEXT: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-NEXT: beqz a3, .LBB4_1 +; RV64IA-NEXT: # %bb.2: # %exit +; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_and_irrelevant_branch: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: .LBB4_1: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: mv a4, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a4, a2, (a0) +; RV64IA-ZACAS-NEXT: beqz a3, .LBB4_1 +; RV64IA-ZACAS-NEXT: # %bb.2: # %exit +; RV64IA-ZACAS-NEXT: ret entry: br label %do_cmpxchg do_cmpxchg: diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll index f25571b5cf253..4c1ac38be0630 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IA %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IA-ZACAS %s ; This test ensures that the output of the 'lr.w' instruction is sign-extended. ; Previously, the default zero-extension was being used and 'cmp' parameter @@ -21,6 +23,14 @@ define i1 @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 signext %cmp, ; RV64IA-NEXT: xor a1, a3, a1 ; RV64IA-NEXT: seqz a0, a1 ; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: mv a3, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) +; RV64IA-ZACAS-NEXT: xor a1, a3, a1 +; RV64IA-ZACAS-NEXT: seqz a0, a1 +; RV64IA-ZACAS-NEXT: ret i32 signext %val) nounwind { entry: %0 = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll index eea4cb72938af..b29fc80ef0473 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -3,12 +3,16 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s @@ -125,6 +129,29 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: .LBB1_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV32IA-ZACAS-NEXT: .LBB1_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -184,6 +211,29 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: .LBB1_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV64IA-ZACAS-NEXT: .LBB1_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -206,6 +256,50 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: bnez a5, .LBB1_1 ; RV64IA-TSO-NEXT: .LBB1_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB1_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB1_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic ret void } @@ -247,6 +341,29 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: .LBB2_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV32IA-ZACAS-NEXT: .LBB2_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -306,6 +423,29 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: .LBB2_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV64IA-ZACAS-NEXT: .LBB2_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -328,6 +468,50 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: bnez a5, .LBB2_1 ; RV64IA-TSO-NEXT: .LBB2_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB2_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB2_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire ret void } @@ -369,6 +553,29 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: .LBB3_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV32IA-ZACAS-NEXT: .LBB3_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_release_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -428,6 +635,29 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: .LBB3_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV64IA-ZACAS-NEXT: .LBB3_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_release_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -450,6 +680,50 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: bnez a5, .LBB3_1 ; RV64IA-TSO-NEXT: .LBB3_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB3_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB3_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic ret void } @@ -491,6 +765,29 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: .LBB4_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV32IA-ZACAS-NEXT: .LBB4_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_release_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -550,6 +847,29 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: .LBB4_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV64IA-ZACAS-NEXT: .LBB4_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_release_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -572,6 +892,50 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: bnez a5, .LBB4_1 ; RV64IA-TSO-NEXT: .LBB4_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB4_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB4_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release acquire ret void } @@ -613,6 +977,29 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: .LBB5_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV32IA-ZACAS-NEXT: .LBB5_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -672,6 +1059,29 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: .LBB5_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV64IA-ZACAS-NEXT: .LBB5_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -694,6 +1104,50 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: bnez a5, .LBB5_1 ; RV64IA-TSO-NEXT: .LBB5_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB5_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB5_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic ret void } @@ -735,6 +1189,29 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: .LBB6_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV32IA-ZACAS-NEXT: .LBB6_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -794,6 +1271,29 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: .LBB6_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV64IA-ZACAS-NEXT: .LBB6_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -816,6 +1316,50 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: bnez a5, .LBB6_1 ; RV64IA-TSO-NEXT: .LBB6_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB6_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB6_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel acquire ret void } @@ -1164,6 +1708,30 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV32IA-WMO-NEXT: .LBB11_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB11_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB11_1 +; RV32IA-ZACAS-NEXT: .LBB11_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1225,6 +1793,30 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-WMO-NEXT: .LBB11_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addi a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB11_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB11_1 +; RV64IA-ZACAS-NEXT: .LBB11_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1248,6 +1840,52 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-TSO-NEXT: bnez a4, .LBB11_1 ; RV64IA-TSO-NEXT: .LBB11_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB11_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB11_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB11_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB11_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB11_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB11_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic ret void } @@ -1290,6 +1928,30 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32IA-WMO-NEXT: .LBB12_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB12_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB12_1 +; RV32IA-ZACAS-NEXT: .LBB12_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1351,6 +2013,30 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-NEXT: .LBB12_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addi a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB12_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB12_1 +; RV64IA-ZACAS-NEXT: .LBB12_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1374,6 +2060,52 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-NEXT: bnez a4, .LBB12_1 ; RV64IA-TSO-NEXT: .LBB12_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB12_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB12_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB12_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB12_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB12_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB12_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire ret void } @@ -1416,6 +2148,30 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV32IA-WMO-NEXT: .LBB13_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB13_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB13_1 +; RV32IA-ZACAS-NEXT: .LBB13_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1477,6 +2233,30 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-WMO-NEXT: .LBB13_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addi a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB13_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB13_1 +; RV64IA-ZACAS-NEXT: .LBB13_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1500,6 +2280,52 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-TSO-NEXT: bnez a4, .LBB13_1 ; RV64IA-TSO-NEXT: .LBB13_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB13_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB13_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB13_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB13_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB13_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB13_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic ret void } @@ -1542,6 +2368,30 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32IA-WMO-NEXT: .LBB14_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB14_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB14_1 +; RV32IA-ZACAS-NEXT: .LBB14_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_release_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1603,6 +2453,30 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-NEXT: .LBB14_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addi a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB14_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB14_1 +; RV64IA-ZACAS-NEXT: .LBB14_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_release_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1626,6 +2500,52 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-NEXT: bnez a4, .LBB14_1 ; RV64IA-TSO-NEXT: .LBB14_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB14_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB14_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB14_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB14_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB14_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB14_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release acquire ret void } @@ -1668,6 +2588,30 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV32IA-WMO-NEXT: .LBB15_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB15_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB15_1 +; RV32IA-ZACAS-NEXT: .LBB15_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1729,6 +2673,30 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-WMO-NEXT: .LBB15_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addi a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB15_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB15_1 +; RV64IA-ZACAS-NEXT: .LBB15_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1752,6 +2720,52 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-TSO-NEXT: bnez a4, .LBB15_1 ; RV64IA-TSO-NEXT: .LBB15_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB15_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB15_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB15_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB15_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB15_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB15_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic ret void } @@ -1794,6 +2808,30 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32IA-WMO-NEXT: .LBB16_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB16_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB16_1 +; RV32IA-ZACAS-NEXT: .LBB16_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1855,6 +2893,30 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-NEXT: .LBB16_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addi a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB16_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB16_1 +; RV64IA-ZACAS-NEXT: .LBB16_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1878,6 +2940,52 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-NEXT: bnez a4, .LBB16_1 ; RV64IA-TSO-NEXT: .LBB16_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB16_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB16_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB16_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB16_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB16_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB16_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel acquire ret void } @@ -2130,16 +3238,32 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_monotonic_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB20_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 -; RV32IA-NEXT: sc.w a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB20_1 -; RV32IA-NEXT: .LBB20_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB20_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB20_1 +; RV32IA-WMO-NEXT: .LBB20_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB20_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB20_1 +; RV32IA-TSO-NEXT: .LBB20_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic: ; RV64I: # %bb.0: @@ -2154,17 +3278,44 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_monotonic_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB20_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 -; RV64IA-NEXT: sc.w a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB20_1 -; RV64IA-NEXT: .LBB20_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB20_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB20_1 +; RV64IA-WMO-NEXT: .LBB20_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB20_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB20_1 +; RV64IA-TSO-NEXT: .LBB20_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ret void } @@ -2194,6 +3345,11 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV32IA-WMO-NEXT: .LBB21_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 @@ -2230,6 +3386,12 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV64IA-WMO-NEXT: .LBB21_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2241,6 +3403,15 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV64IA-TSO-NEXT: bnez a4, .LBB21_1 ; RV64IA-TSO-NEXT: .LBB21_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic ret void } @@ -2270,6 +3441,11 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV32IA-WMO-NEXT: .LBB22_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 @@ -2306,6 +3482,12 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64IA-WMO-NEXT: .LBB22_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2317,6 +3499,15 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64IA-TSO-NEXT: bnez a4, .LBB22_1 ; RV64IA-TSO-NEXT: .LBB22_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire ret void } @@ -2346,6 +3537,11 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV32IA-WMO-NEXT: .LBB23_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.rl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_release_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 @@ -2382,6 +3578,12 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV64IA-WMO-NEXT: .LBB23_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.rl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_release_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2393,6 +3595,15 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV64IA-TSO-NEXT: bnez a4, .LBB23_1 ; RV64IA-TSO-NEXT: .LBB23_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.rl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.rl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic ret void } @@ -2422,6 +3633,11 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV32IA-WMO-NEXT: .LBB24_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_release_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 @@ -2458,6 +3674,12 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64IA-WMO-NEXT: .LBB24_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_release_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2469,6 +3691,15 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64IA-TSO-NEXT: bnez a4, .LBB24_1 ; RV64IA-TSO-NEXT: .LBB24_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release acquire ret void } @@ -2498,6 +3729,11 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV32IA-WMO-NEXT: .LBB25_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 @@ -2534,6 +3770,12 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV64IA-WMO-NEXT: .LBB25_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2545,6 +3787,15 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV64IA-TSO-NEXT: bnez a4, .LBB25_1 ; RV64IA-TSO-NEXT: .LBB25_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic ret void } @@ -2574,6 +3825,11 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV32IA-WMO-NEXT: .LBB26_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 @@ -2610,6 +3866,12 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64IA-WMO-NEXT: .LBB26_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2621,6 +3883,15 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64IA-TSO-NEXT: bnez a4, .LBB26_1 ; RV64IA-TSO-NEXT: .LBB26_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel acquire ret void } @@ -2639,16 +3910,32 @@ define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_seq_cst_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB27_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB27_1 -; RV32IA-NEXT: .LBB27_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB27_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB27_1 +; RV32IA-WMO-NEXT: .LBB27_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB27_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB27_1 +; RV32IA-TSO-NEXT: .LBB27_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_seq_cst_monotonic: ; RV64I: # %bb.0: @@ -2663,17 +3950,44 @@ define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_seq_cst_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB27_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB27_1 -; RV64IA-NEXT: .LBB27_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB27_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB27_1 +; RV64IA-WMO-NEXT: .LBB27_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB27_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB27_1 +; RV64IA-TSO-NEXT: .LBB27_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic ret void } @@ -2692,16 +4006,32 @@ define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_seq_cst_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB28_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB28_1 -; RV32IA-NEXT: .LBB28_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB28_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB28_1 +; RV32IA-WMO-NEXT: .LBB28_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB28_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB28_1 +; RV32IA-TSO-NEXT: .LBB28_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_seq_cst_acquire: ; RV64I: # %bb.0: @@ -2716,17 +4046,44 @@ define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_seq_cst_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB28_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB28_1 -; RV64IA-NEXT: .LBB28_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB28_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB28_1 +; RV64IA-WMO-NEXT: .LBB28_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB28_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB28_1 +; RV64IA-TSO-NEXT: .LBB28_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst acquire ret void } @@ -2745,16 +4102,32 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_seq_cst_seq_cst: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB29_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB29_1 -; RV32IA-NEXT: .LBB29_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB29_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB29_1 +; RV32IA-WMO-NEXT: .LBB29_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB29_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB29_1 +; RV32IA-TSO-NEXT: .LBB29_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_seq_cst_seq_cst: ; RV64I: # %bb.0: @@ -2769,17 +4142,44 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_seq_cst_seq_cst: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB29_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB29_1 -; RV64IA-NEXT: .LBB29_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB29_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB29_1 +; RV64IA-WMO-NEXT: .LBB29_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB29_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB29_1 +; RV64IA-TSO-NEXT: .LBB29_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst ret void } @@ -2801,21 +4201,46 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_monotonic_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 -; RV32IA-NEXT: li a4, 0 -; RV32IA-NEXT: li a5, 0 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a4 +; RV32IA-WMO-NEXT: li a4, 0 +; RV32IA-WMO-NEXT: li a5, 0 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a4 +; RV32IA-TSO-NEXT: li a4, 0 +; RV32IA-TSO-NEXT: li a5, 0 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_monotonic_monotonic: ; RV64I: # %bb.0: @@ -2830,16 +4255,44 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_monotonic_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB30_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 -; RV64IA-NEXT: sc.d a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB30_1 -; RV64IA-NEXT: .LBB30_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB30_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB30_1 +; RV64IA-WMO-NEXT: .LBB30_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB30_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB30_1 +; RV64IA-TSO-NEXT: .LBB30_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ret void } @@ -2862,22 +4315,48 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_acquire_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 2 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: li a5, 0 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 2 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a5 +; RV32IA-WMO-NEXT: li a5, 0 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aq a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 2 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a5 +; RV32IA-TSO-NEXT: li a5, 0 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_acquire_monotonic: ; RV64I: # %bb.0: @@ -2903,6 +4382,11 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV64IA-WMO-NEXT: .LBB31_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 @@ -2913,6 +4397,18 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV64IA-TSO-NEXT: bnez a4, .LBB31_1 ; RV64IA-TSO-NEXT: .LBB31_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aq a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic ret void } @@ -2935,22 +4431,48 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_acquire_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 2 -; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a6 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_acquire_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a6, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 2 +; RV32IA-WMO-NEXT: li a5, 2 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a6 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_acquire_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aq a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_acquire_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a6, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 2 +; RV32IA-TSO-NEXT: li a5, 2 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a6 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_acquire_acquire: ; RV64I: # %bb.0: @@ -2976,6 +4498,11 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64IA-WMO-NEXT: .LBB32_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 @@ -2986,6 +4513,18 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64IA-TSO-NEXT: bnez a4, .LBB32_1 ; RV64IA-TSO-NEXT: .LBB32_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aq a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire ret void } @@ -3008,22 +4547,48 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_release_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 3 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: li a5, 0 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_release_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 3 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a5 +; RV32IA-WMO-NEXT: li a5, 0 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_release_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.rl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_release_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 3 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a5 +; RV32IA-TSO-NEXT: li a5, 0 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_release_monotonic: ; RV64I: # %bb.0: @@ -3049,6 +4614,11 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV64IA-WMO-NEXT: .LBB33_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.rl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_release_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 @@ -3059,6 +4629,18 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV64IA-TSO-NEXT: bnez a4, .LBB33_1 ; RV64IA-TSO-NEXT: .LBB33_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.rl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.rl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic ret void } @@ -3081,22 +4663,48 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_release_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 3 -; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a6 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_release_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a6, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 3 +; RV32IA-WMO-NEXT: li a5, 2 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a6 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_release_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_release_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a6, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 3 +; RV32IA-TSO-NEXT: li a5, 2 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a6 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_release_acquire: ; RV64I: # %bb.0: @@ -3122,6 +4730,11 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64IA-WMO-NEXT: .LBB34_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_release_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 @@ -3132,6 +4745,18 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64IA-TSO-NEXT: bnez a4, .LBB34_1 ; RV64IA-TSO-NEXT: .LBB34_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release acquire ret void } @@ -3154,22 +4779,48 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_acq_rel_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 4 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: li a5, 0 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 4 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a5 +; RV32IA-WMO-NEXT: li a5, 0 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 4 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a5 +; RV32IA-TSO-NEXT: li a5, 0 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_acq_rel_monotonic: ; RV64I: # %bb.0: @@ -3195,6 +4846,11 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV64IA-WMO-NEXT: .LBB35_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 @@ -3205,6 +4861,18 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV64IA-TSO-NEXT: bnez a4, .LBB35_1 ; RV64IA-TSO-NEXT: .LBB35_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic ret void } @@ -3227,22 +4895,48 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_acq_rel_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 4 -; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a6 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a6, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 4 +; RV32IA-WMO-NEXT: li a5, 2 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a6 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a6, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 4 +; RV32IA-TSO-NEXT: li a5, 2 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a6 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_acq_rel_acquire: ; RV64I: # %bb.0: @@ -3268,6 +4962,11 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64IA-WMO-NEXT: .LBB36_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 @@ -3278,6 +4977,18 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64IA-TSO-NEXT: bnez a4, .LBB36_1 ; RV64IA-TSO-NEXT: .LBB36_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel acquire ret void } @@ -3300,22 +5011,48 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_seq_cst_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 5 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: li a5, 0 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 5 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a5 +; RV32IA-WMO-NEXT: li a5, 0 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 5 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a5 +; RV32IA-TSO-NEXT: li a5, 0 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_seq_cst_monotonic: ; RV64I: # %bb.0: @@ -3330,16 +5067,44 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_seq_cst_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB37_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 -; RV64IA-NEXT: sc.d.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB37_1 -; RV64IA-NEXT: .LBB37_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB37_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB37_1 +; RV64IA-WMO-NEXT: .LBB37_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB37_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB37_1 +; RV64IA-TSO-NEXT: .LBB37_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic ret void } @@ -3362,22 +5127,48 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_seq_cst_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 5 -; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a6 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a6, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 5 +; RV32IA-WMO-NEXT: li a5, 2 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a6 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a6, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 5 +; RV32IA-TSO-NEXT: li a5, 2 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a6 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_seq_cst_acquire: ; RV64I: # %bb.0: @@ -3392,16 +5183,44 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_seq_cst_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB38_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 -; RV64IA-NEXT: sc.d.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB38_1 -; RV64IA-NEXT: .LBB38_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB38_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB38_1 +; RV64IA-WMO-NEXT: .LBB38_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB38_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB38_1 +; RV64IA-TSO-NEXT: .LBB38_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst acquire ret void } @@ -3424,25 +5243,51 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_seq_cst_seq_cst: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 5 -; RV32IA-NEXT: li a5, 5 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a6 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a6, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 5 +; RV32IA-WMO-NEXT: li a5, 5 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a6 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret ; -; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst: -; RV64I: # %bb.0: +; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a6, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 5 +; RV32IA-TSO-NEXT: li a5, 5 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a6 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd a1, 0(sp) @@ -3454,16 +5299,1321 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_seq_cst_seq_cst: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB39_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 -; RV64IA-NEXT: sc.d.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB39_1 -; RV64IA-NEXT: .LBB39_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB39_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB39_1 +; RV64IA-WMO-NEXT: .LBB39_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB39_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB39_1 +; RV64IA-TSO-NEXT: .LBB39_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst seq_cst ret void } + +define void @cmpxchg_i128_monotonic_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a5, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a6, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a5, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a4, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a5, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a6, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a6, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a5, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: mv a1, a4 +; RV32IA-NEXT: li a4, 0 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a4 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: li a5, 0 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a4 +; RV64IA-WMO-NEXT: li a4, 0 +; RV64IA-WMO-NEXT: li a5, 0 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a4 +; RV64IA-TSO-NEXT: li a4, 0 +; RV64IA-TSO-NEXT: li a5, 0 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i128_acquire_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_acquire_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a6, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_acquire_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a6, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a6, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 2 +; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_acquire_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a5, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: li a5, 0 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_acquire_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 2 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a5 +; RV64IA-WMO-NEXT: li a5, 0 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_acquire_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aq a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_acquire_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 2 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a5 +; RV64IA-TSO-NEXT: li a5, 0 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire monotonic + ret void +} + +define void @cmpxchg_i128_acquire_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_acquire_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_acquire_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a5, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a5, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 2 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a1, a6 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_acquire_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a6, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: li a5, 2 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_acquire_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a6, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 2 +; RV64IA-WMO-NEXT: li a5, 2 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a6 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_acquire_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aq a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_acquire_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a6, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 2 +; RV64IA-TSO-NEXT: li a5, 2 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a6 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire acquire + ret void +} + +define void @cmpxchg_i128_release_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_release_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a6, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 3 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_release_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a6, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a6, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 3 +; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_release_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a5, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: li a5, 0 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_release_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 3 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a5 +; RV64IA-WMO-NEXT: li a5, 0 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_release_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.rl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_release_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 3 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a5 +; RV64IA-TSO-NEXT: li a5, 0 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val release monotonic + ret void +} + +define void @cmpxchg_i128_release_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_release_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 3 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_release_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a5, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a5, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 3 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a1, a6 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_release_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a6, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 3 +; RV64I-NEXT: li a5, 2 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_release_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a6, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 3 +; RV64IA-WMO-NEXT: li a5, 2 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a6 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_release_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_release_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a6, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 3 +; RV64IA-TSO-NEXT: li a5, 2 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a6 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val release acquire + ret void +} + +define void @cmpxchg_i128_acq_rel_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a6, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a6, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a6, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 4 +; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a5, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 4 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: li a5, 0 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 4 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a5 +; RV64IA-WMO-NEXT: li a5, 0 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 4 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a5 +; RV64IA-TSO-NEXT: li a5, 0 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acq_rel monotonic + ret void +} + +define void @cmpxchg_i128_acq_rel_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 4 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a5, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a5, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 4 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a1, a6 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a6, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 4 +; RV64I-NEXT: li a5, 2 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a6, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 4 +; RV64IA-WMO-NEXT: li a5, 2 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a6 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a6, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 4 +; RV64IA-TSO-NEXT: li a5, 2 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a6 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acq_rel acquire + ret void +} + +define void @cmpxchg_i128_seq_cst_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a6, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a6, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a6, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a5, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: li a5, 0 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 5 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a5 +; RV64IA-WMO-NEXT: li a5, 0 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 5 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a5 +; RV64IA-TSO-NEXT: li a5, 0 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst monotonic + ret void +} + +define void @cmpxchg_i128_seq_cst_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a5, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a5, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a1, a6 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a6, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: li a5, 2 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a6, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 5 +; RV64IA-WMO-NEXT: li a5, 2 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a6 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a6, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 5 +; RV64IA-TSO-NEXT: li a5, 2 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a6 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst acquire + ret void +} + +define void @cmpxchg_i128_seq_cst_seq_cst(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: li a5, 5 +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a5, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a5, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: li a5, 5 +; RV32IA-NEXT: mv a1, a6 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a6, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: li a5, 5 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a6, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 5 +; RV64IA-WMO-NEXT: li a5, 5 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a6 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a6, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 5 +; RV64IA-TSO-NEXT: li a5, 5 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a6 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst seq_cst + ret void +}