-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[Xtensa] Implement Xtensa S32C1I Option and atomics lowering. #137134
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-xtensa Author: Andrei Safronov (andreisfr) ChangesImplement Xtensa S32C1I Option and use s32c1i instruction to implement atomics operations. Patch is 279.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137134.diff 13 Files Affected:
diff --git a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
index 6b355e6363b22..f3873ca4dbbe2 100644
--- a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
+++ b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
@@ -114,9 +114,10 @@ static DecodeStatus DecodeMR23RegisterClass(MCInst &Inst, uint64_t RegNo,
}
const MCPhysReg SRDecoderTable[] = {
- Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17,
- Xtensa::M0, 32, Xtensa::M1, 33, Xtensa::M2, 34,
- Xtensa::M3, 35, Xtensa::WINDOWBASE, 72, Xtensa::WINDOWSTART, 73};
+ Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17,
+ Xtensa::SCOMPARE1, 12, Xtensa::M0, 32, Xtensa::M1, 33,
+ Xtensa::M2, 34, Xtensa::M3, 35, Xtensa::WINDOWBASE, 72,
+ Xtensa::WINDOWSTART, 73, Xtensa::ATOMCTL, 99};
static DecodeStatus DecodeSRRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
index 792faf811aca9..59b7582c0268f 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
@@ -86,6 +86,9 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits) {
case Xtensa::WINDOWBASE:
case Xtensa::WINDOWSTART:
return FeatureBits[Xtensa::FeatureWindowed];
+ case Xtensa::ATOMCTL:
+ case Xtensa::SCOMPARE1:
+ return FeatureBits[Xtensa::FeatureWindowed];
case Xtensa::NoRegister:
return false;
}
diff --git a/llvm/lib/Target/Xtensa/XtensaFeatures.td b/llvm/lib/Target/Xtensa/XtensaFeatures.td
index 2a47214946401..623573840953b 100644
--- a/llvm/lib/Target/Xtensa/XtensaFeatures.td
+++ b/llvm/lib/Target/Xtensa/XtensaFeatures.td
@@ -67,3 +67,19 @@ def FeatureDiv32 : SubtargetFeature<"div32", "HasDiv32", "true",
"Enable Xtensa Div32 option">;
def HasDiv32 : Predicate<"Subtarget->hasDiv32()">,
AssemblerPredicate<(all_of FeatureDiv32)>;
+
+def FeatureS32C1I : SubtargetFeature<"s32c1i", "HasS32C1I", "true",
+ "Enable Xtensa S32C1I option">;
+def HasS32C1I : Predicate<"Subtarget->hasS32C1I()">,
+ AssemblerPredicate<(all_of FeatureS32C1I)>;
+
+// Assume that lock-free native-width atomics are available, even if the target
+// and operating system combination would not usually provide them. The user
+// is responsible for providing any necessary __sync implementations. Code
+// built with this feature is not ABI-compatible with code built without this
+// feature, if atomic variables are exposed across the ABI boundary.
+def FeatureForcedAtomics : SubtargetFeature<"forced-atomics", "HasForcedAtomics", "true",
+ "Assume that lock-free native-width atomics are available">;
+def HasForcedAtomics : Predicate<"Subtarget->hasForcedAtomics()">,
+ AssemblerPredicate<(all_of FeatureForcedAtomics)>;
+def HasAtomicLdSt : Predicate<"Subtarget->hasS32C1I() || Subtarget->hasForcedAtomics()">;
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index b17840aad9b4d..e74c5c1e61b5d 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -175,6 +175,40 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ // to have the best chance and doing something good with fences custom lower
+ // them
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ if (!Subtarget.hasS32C1I()) {
+ for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+ I <= MVT::LAST_INTEGER_VALUETYPE; ++I) {
+ MVT VT = MVT::SimpleValueType(I);
+ if (isTypeLegal(VT)) {
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
+ }
+ }
+ }
+
+ if (Subtarget.hasS32C1I()) {
+ setMaxAtomicSizeInBitsSupported(32);
+ setMinCmpXchgSizeInBits(32);
+ } else if (Subtarget.hasForcedAtomics()) {
+ setMaxAtomicSizeInBitsSupported(32);
+ } else {
+ setMaxAtomicSizeInBitsSupported(0);
+ }
+
// Compute derived properties from the register classes
computeRegisterProperties(STI.getRegisterInfo());
}
@@ -1241,6 +1275,13 @@ bool XtensaTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return false;
}
+SDValue XtensaTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+SDLoc DL(Op);
+SDValue Chain = Op.getOperand(0);
+return DAG.getNode(XtensaISD::MEMW, DL, MVT::Other, Chain);
+}
+
SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1282,6 +1323,8 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
return LowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG, false);
+ case ISD::ATOMIC_FENCE:
+ return LowerATOMIC_FENCE(Op, DAG);
default:
report_fatal_error("Unexpected node to lower");
}
@@ -1383,6 +1426,731 @@ XtensaTargetLowering::emitSelectCC(MachineInstr &MI,
return SinkMBB;
}
+// Emit instructions for atomic_cmp_swap node for 8/16 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ int isByteOperand) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineBasicBlock *thisBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &CmpVal = MI.getOperand(2);
+ MachineOperand &SwpVal = MI.getOperand(3);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
+
+ unsigned ByteOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
+ .addReg(R1)
+ .addReg(AtomValAddr.getReg());
+
+ unsigned AddrAlign = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
+ .addReg(AtomValAddr.getReg())
+ .addReg(ByteOffs);
+
+ unsigned BitOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
+ .addReg(ByteOffs)
+ .addImm(3);
+
+ unsigned Mask1 = MRI.createVirtualRegister(RC);
+ if (isByteOperand) {
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
+ } else {
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
+
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
+
+ unsigned Mask2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
+
+ unsigned Mask3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
+
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
+
+ unsigned R4 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), R4).addReg(R3).addReg(Mask3);
+
+ unsigned Cmp1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Cmp1).addReg(CmpVal.getReg());
+
+ unsigned Swp1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Swp1).addReg(SwpVal.getReg());
+
+ BB = BBLoop;
+
+ unsigned MaskPhi = MRI.createVirtualRegister(RC);
+ unsigned MaskLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, BB->begin(), DL, TII.get(Xtensa::PHI), MaskPhi)
+ .addReg(MaskLoop)
+ .addMBB(BBLoop)
+ .addReg(R4)
+ .addMBB(thisBB);
+
+ unsigned Cmp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Cmp2).addReg(Cmp1).addReg(MaskPhi);
+
+ unsigned Swp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Swp2).addReg(Swp1).addReg(MaskPhi);
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(Cmp2);
+
+ unsigned Swp3 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp3)
+ .addReg(Swp2)
+ .addReg(AddrAlign)
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), MaskLoop).addReg(Swp3).addReg(Mask3);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(MaskLoop)
+ .addReg(MaskPhi)
+ .addMBB(BBLoop);
+
+ BB->addSuccessor(BBLoop);
+ BB->addSuccessor(BBExit);
+
+ BB = BBExit;
+ auto St = BBExit->begin();
+
+ unsigned R5 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SRL), R5).addReg(Swp3);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::AND), Res.getReg())
+ .addReg(R5)
+ .addReg(Mask1);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit instructions for atomic_swap node for 8/16 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ int isByteOperand) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop1 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop2 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop3 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop4 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop1);
+ F->insert(It, BBLoop2);
+ F->insert(It, BBLoop3);
+ F->insert(It, BBLoop4);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop1);
+ BBLoop1->addSuccessor(BBLoop2);
+ BBLoop2->addSuccessor(BBLoop3);
+ BBLoop2->addSuccessor(BBLoop4);
+ BBLoop3->addSuccessor(BBLoop2);
+ BBLoop3->addSuccessor(BBLoop4);
+ BBLoop4->addSuccessor(BBLoop1);
+ BBLoop4->addSuccessor(BBExit);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &SwpVal = MI.getOperand(2);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
+
+ unsigned ByteOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
+ .addReg(R1)
+ .addReg(AtomValAddr.getReg());
+
+ unsigned AddrAlign = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
+ .addReg(AtomValAddr.getReg())
+ .addReg(ByteOffs);
+
+ unsigned BitOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
+ .addReg(ByteOffs)
+ .addImm(3);
+
+ unsigned Mask1 = MRI.createVirtualRegister(RC);
+ if (isByteOperand) {
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
+ } else {
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
+
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
+
+ unsigned Mask2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
+
+ unsigned Mask3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
+
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
+
+ unsigned R4 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), R4).addReg(R3).addReg(Mask3);
+
+ unsigned SwpValShifted = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), SwpValShifted)
+ .addReg(SwpVal.getReg());
+
+ unsigned R5 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R5).addReg(AddrAlign).addImm(0);
+
+ unsigned AtomVal = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), AtomVal).addReg(R5).addReg(Mask2);
+
+ unsigned AtomValPhi = MRI.createVirtualRegister(RC);
+ unsigned AtomValLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop1, BBLoop1->begin(), DL, TII.get(Xtensa::PHI), AtomValPhi)
+ .addReg(AtomValLoop)
+ .addMBB(BBLoop4)
+ .addReg(AtomVal)
+ .addMBB(BB);
+
+ BB = BBLoop1;
+
+ BuildMI(BB, DL, TII.get(Xtensa::MEMW));
+
+ unsigned R6 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::L32I), R6).addReg(AddrAlign).addImm(0);
+
+ unsigned R7 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::AND), R7).addReg(R6).addReg(Mask3);
+
+ unsigned MaskPhi = MRI.createVirtualRegister(RC);
+ unsigned MaskLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop2, BBLoop2->begin(), DL, TII.get(Xtensa::PHI), MaskPhi)
+ .addReg(MaskLoop)
+ .addMBB(BBLoop3)
+ .addReg(R7)
+ .addMBB(BBLoop1);
+
+ BB = BBLoop2;
+
+ unsigned Swp1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Swp1)
+ .addReg(SwpValShifted)
+ .addReg(MaskPhi);
+
+ unsigned AtomVal1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), AtomVal1)
+ .addReg(AtomValPhi)
+ .addReg(MaskPhi);
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(AtomVal1);
+
+ unsigned Swp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp2)
+ .addReg(Swp1)
+ .addReg(AddrAlign)
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BEQ))
+ .addReg(AtomVal1)
+ .addReg(Swp2)
+ .addMBB(BBLoop4);
+
+ BB = BBLoop3;
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), MaskLoop).addReg(Swp2).addReg(Mask3);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(MaskLoop)
+ .addReg(MaskPhi)
+ .addMBB(BBLoop2);
+
+ BB = BBLoop4;
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), AtomValLoop).addReg(Swp2).addReg(Mask2);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(AtomValLoop)
+ .addReg(AtomValPhi)
+ .addMBB(BBLoop1);
+
+ BB = BBExit;
+
+ auto St = BB->begin();
+
+ unsigned R8 = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SRL), R8).addReg(AtomValLoop);
+
+ if (isByteOperand) {
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SEXT), Res.getReg())
+ .addReg(R8)
+ .addImm(7);
+ } else {
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SEXT), Res.getReg())
+ .addReg(R8)
+ .addImm(15);
+ }
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit instructions for atomic_swap node for 32 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop);
+ BBLoop->addSuccessor(BBLoop);
+ BBLoop->addSuccessor(BBExit);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &SwpVal = MI.getOperand(2);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MEMW));
+
+ unsigned AtomVal = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), AtomVal)
+ .addReg(AtomValAddr.getReg())
+ .addImm(0);
+
+ unsigned AtomValLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop, BBLoop->begin(), DL, TII.get(Xtensa::PHI), Res.getReg())
+ .addReg(AtomValLoop)
+ .addMBB(BBLoop)
+ .addReg(AtomVal)
+ .addMBB(BB);
+
+ BB = BBLoop;
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(Res.getReg());
+
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), AtomValLoop)
+ .addReg(SwpVal.getReg())
+ .addReg(AtomValAddr.getReg())
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(AtomValLoop)
+ .addReg(Res.getReg())
+ .addMBB(BBLoop);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *XtensaTargetLowering::emitAtomicRMW(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode,
+ bool inv,
+ bool minmax) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineBasicBlock *ThisBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLV...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17, | ||
Xtensa::SCOMPARE1, 12, Xtensa::M0, 32, Xtensa::M1, 33, | ||
Xtensa::M2, 34, Xtensa::M3, 35, Xtensa::WINDOWBASE, 72, | ||
Xtensa::WINDOWSTART, 73, Xtensa::ATOMCTL, 99}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand this table, it's mixed MCPhysReg and then a random number? Should this be some kind of pair or struct?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; | ||
I <= MVT::LAST_INTEGER_VALUETYPE; ++I) { | ||
MVT VT = MVT::SimpleValueType(I); | ||
if (isTypeLegal(VT)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You only make one type legal, so why not just setOperationAction({list_of_atomic_opcodes}, MVT::i32) legal? Alternatively define a list of legal types
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you very much for review. I'm sorry for delay, this PRs was blocked by other PRs and tasks. I refactored most of the code code according your suggestions and remove this initialization.
// to have the best chance and doing something good with fences custom lower | ||
// them |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove or replace comment with something more informative about why it needs custom lowering
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
BB = BBExit; | ||
auto St = BBExit->begin(); | ||
|
||
unsigned R5 = MRI.createVirtualRegister(RC); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unsigned R5 = MRI.createVirtualRegister(RC); | |
Register R5 = MRI.createVirtualRegister(RC); |
Use Register instead of unsigned throughout
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
MachineBasicBlock *BBLoop1 = F->CreateMachineBasicBlock(LLVM_BB); | ||
F->insert(++BB->getIterator(), BBLoop1); | ||
BB->addSuccessor(BBLoop1); | ||
MachineBasicBlock *BBLoop2 = F->CreateMachineBasicBlock(LLVM_BB); | ||
F->insert(++BB->getIterator(), BBLoop2); | ||
BB->addSuccessor(BBLoop2); | ||
BBLoop2->addSuccessor(BBLoop1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can possibly use MachineBasicBlock::splitAt?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. Now AtomicExpand pass is used instead of this functionality.
return BB; | ||
} | ||
|
||
MachineBasicBlock *XtensaTargetLowering::emitAtomicRMW(MachineInstr &MI, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason you need this custom post-selection expansion instead of using the default in AtomicExpandPass?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. Now AtomicExpand pass is used instead of this functionality.
MachineBasicBlock * | ||
XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI, | ||
MachineBasicBlock *BB) const { | ||
const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Directly use the target specific instance
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_SWAP, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure any of this is doing anything. The default expansion only emits the libcall (really should migrate these to using the explicit libcall action..), and I'm not sure what the default is for the atomic runtime libcalls.
To expand atomics you need to override shouldExpandAtomicRMWInIR
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
SDValue XtensaTargetLowering::LowerATOMIC_FENCE(SDValue Op, | ||
SelectionDAG &DAG) const { | ||
SDLoc DL(Op); | ||
SDValue Chain = Op.getOperand(0); | ||
return DAG.getNode(XtensaISD::MEMW, DL, MVT::Other, Chain); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Formatting. Can you make this directly legal and select to memw, or do you really need to do something in the combiner with memw?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
@@ -117,6 +118,10 @@ class XtensaTargetLowering : public TargetLowering { | |||
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, | |||
SelectionDAG &DAG) const override; | |||
|
|||
bool shouldInsertFencesForAtomic(const Instruction *I) const override { | |||
return true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should get test coverage in test/Transforms/AtomicExpand
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added test for AtomicExpand pass.
Ping. |
Implement Xtensa S32C1I Option and use s32c1i instruction to implement atomics operations.
b1177ed
to
b127324
Compare
Rebased on latest release |
Remove redundant code. Simplify atomics implementation by using AtomicExpand pass.
; RUN: llc -mtriple=xtensa -mattr=+windowed -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA | ||
; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
; RUN: llc -mtriple=xtensa -mattr=+windowed -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA | |
; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC | |
; RUN: llc -mtriple=xtensa -mattr=+windowed < %s | FileCheck %s --check-prefixes=XTENSA | |
; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
; RUN: llc -mtriple=xtensa -mattr=+windowed -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA | ||
; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
; RUN: llc -mtriple=xtensa -mattr=+windowed -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA | |
; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC | |
; RUN: llc -mtriple=xtensa -mattr=+windowed < %s | FileCheck %s --check-prefixes=XTENSA | |
; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC |
Don't blindly add -verify-machineinstrs to every test, this is the default in EXPENSIVE_CH
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for comment. Fixed.
; RUN: llc -mtriple=xtensa -mattr=+windowed -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA | ||
; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -mattr=+forced-atomics -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
; RUN: llc -mtriple=xtensa -mattr=+windowed -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA | |
; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -mattr=+forced-atomics -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC | |
; RUN: llc -mtriple=xtensa -mattr=+windowed < %s | FileCheck %s --check-prefixes=XTENSA | |
; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, in EXPENSIVE_CHECKS mode, -verify-machineinstrs is the default unless TM->isMachineVerifierClean() returns false .
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for comment. Fixed.
config.suffixes = [".ll"] | ||
|
||
targets = set(config.root.targets_to_build.split()) | ||
if not "Xtensa" in targets: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
config.suffixes = [".ll"] | |
targets = set(config.root.targets_to_build.split()) | |
if not "Xtensa" in targets: | |
if not "Xtensa" in config.root.targets: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
; RUN: llc -mtriple=xtensa -mattr=+windowed -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA | ||
; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -mattr=+forced-atomics -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, in EXPENSIVE_CHECKS mode, -verify-machineinstrs is the default unless TM->isMachineVerifierClean() returns false .
Can use a more elaborated comment to help future readers :) Not just copy the patch title |
Fixed tests and additional comments to PR description. |
…37134) Implement Xtensa S32C1I Option. Implement atomic_cmp_swap_32 operation using s32c1i instruction. Use atomic_cmp_swap_32 operation and AtomicExpand pass to implement atomics operations.
Implement Xtensa S32C1I Option and use s32c1i instruction to implement atomics operations.
Implement atomic_cmp_swap_32 operation using s32c1i instruction. Use atomic_cmp_swap_32 operation and AtomicExpand pass to implement atomics operations.