-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[PowerPC] Use setbc for values from vector compare conditions #114858
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-powerpc Author: None (RolandF77) ChangesFor P10 use the setbc instruction to get int values from vector compare conditions. Full diff: https://github.com/llvm/llvm-project/pull/114858.diff 5 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d8f3095ed7fb68..a5cd136478c096 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1858,6 +1858,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
case PPCISD::STORE_COND:
return "PPCISD::STORE_COND";
+ case PPCISD::SETBC:
+ return "PPCISD::SETBC";
+ case PPCISD::SETBCR:
+ return "PPCISD::SETBCR";
}
return nullptr;
}
@@ -11264,31 +11268,55 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
- // Now that we have the comparison, emit a copy from the CR to a GPR.
- // This is flagged to the above dot comparison.
- SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
- DAG.getRegister(PPC::CR6, MVT::i32),
- CompNode.getValue(1));
-
// Unpack the result based on how the target uses it.
- unsigned BitNo; // Bit # of CR6.
- bool InvertBit; // Invert result?
+ unsigned BitNo; // Bit # of CR6.
+ bool InvertBit; // Invert result?
+ unsigned Bitx;
+ unsigned SetOp;
switch (Op.getConstantOperandVal(1)) {
- default: // Can't happen, don't crash on invalid number though.
- case 0: // Return the value of the EQ bit of CR6.
- BitNo = 0; InvertBit = false;
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Return the value of the EQ bit of CR6.
+ BitNo = 0;
+ InvertBit = false;
+ Bitx = PPC::sub_eq;
+ SetOp = PPCISD::SETBC;
break;
- case 1: // Return the inverted value of the EQ bit of CR6.
- BitNo = 0; InvertBit = true;
+ case 1: // Return the inverted value of the EQ bit of CR6.
+ BitNo = 0;
+ InvertBit = true;
+ Bitx = PPC::sub_eq;
+ SetOp = PPCISD::SETBCR;
break;
- case 2: // Return the value of the LT bit of CR6.
- BitNo = 2; InvertBit = false;
+ case 2: // Return the value of the LT bit of CR6.
+ BitNo = 2;
+ InvertBit = false;
+ Bitx = PPC::sub_lt;
+ SetOp = PPCISD::SETBC;
break;
- case 3: // Return the inverted value of the LT bit of CR6.
- BitNo = 2; InvertBit = true;
+ case 3: // Return the inverted value of the LT bit of CR6.
+ BitNo = 2;
+ InvertBit = true;
+ Bitx = PPC::sub_lt;
+ SetOp = PPCISD::SETBCR;
break;
}
+ if (Subtarget.isISA3_1()) {
+ SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
+ SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
+ SDValue CRBit =
+ SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+ CR6Reg, SubRegIdx, CompNode.getValue(1)),
+ 0);
+ return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
+ }
+
+ // Now that we have the comparison, emit a copy from the CR to a GPR.
+ // This is flagged to the above dot comparison.
+ SDValue Flags =
+ DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
+ DAG.getRegister(PPC::CR6, MVT::i32), CompNode.getValue(1));
+
// Shift the bit into the low position.
Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index dde45e4cf6f4ae..1c63444db427db 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -501,6 +501,12 @@ namespace llvm {
/// Constrained floating point add in round-to-zero mode.
STRICT_FADDRTZ,
+ /// SETBC - The ISA 3.1 (P10) SETBC instruction.
+ SETBC,
+
+ /// SETBCR - The ISA 3.1 (P10) SETBCR instruction.
+ SETBCR,
+
// NOTE: The nodes below may require PC-Rel specific patterns if the
// address could be PC-Relative. When adding new nodes below, consider
// whether or not the address can be PC-Relative and add the corresponding
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index c4b8597b1df9ff..1b7c54bb5ce185 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -79,6 +79,10 @@ def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
]>;
+def SDT_PPCsetbc : SDTypeProfile<1, 1, [
+ SDTCisInt<0>, SDTCisInt<1>
+]>;
+
//===----------------------------------------------------------------------===//
// ISA 3.1 specific PPCISD nodes.
//
@@ -91,6 +95,8 @@ def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
[]>;
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
+def PPCsetbc : SDNode<"PPCISD::SETBC", SDT_PPCsetbc, []>;
+def PPCsetbcr : SDNode<"PPCISD::SETBCR", SDT_PPCsetbc, []>;
//===----------------------------------------------------------------------===//
@@ -1397,10 +1403,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [P
let Predicates = [IsISA3_1] in {
def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RST), (ins crbitrc:$BI),
- "setbc $RST, $BI", IIC_IntCompare, []>,
+ "setbc $RST, $BI", IIC_IntCompare,
+ [(set i32:$RST, (PPCsetbc i1:$BI))]>,
SExt32To64, ZExt32To64;
def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RST), (ins crbitrc:$BI),
- "setbcr $RST, $BI", IIC_IntCompare, []>,
+ "setbcr $RST, $BI", IIC_IntCompare,
+ [(set i32:$RST, (PPCsetbcr i1:$BI))]>,
SExt32To64, ZExt32To64;
def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RST), (ins crbitrc:$BI),
"setnbc $RST, $BI", IIC_IntCompare, []>,
diff --git a/llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll b/llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll
new file mode 100644
index 00000000000000..4c8d34895f6b20
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
+
+define range(i64 -2147483648, 2147483648) i64 @cmpgt(<1 x i128> noundef %a, <1 x i128> noundef %b) local_unnamed_addr {
+; CHECK: vcmpgtuq. v2, v3, v2
+; CHECK: setbc r3, 4*cr6+lt
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpgtuq.p(i32 2, <1 x i128> %b, <1 x i128> %a)
+ %conv = sext i32 %0 to i64
+ ret i64 %conv
+}
+
+declare i32 @llvm.ppc.altivec.vcmpgtuq.p(i32, <1 x i128>, <1 x i128>)
diff --git a/llvm/test/CodeGen/PowerPC/vcmp-setbc.ll b/llvm/test/CodeGen/PowerPC/vcmp-setbc.ll
new file mode 100644
index 00000000000000..2c9088b61b034f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vcmp-setbc.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc-ibm-aix -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+
+define signext i32 @cmpgtw(<4 x i32> noundef %a, <4 x i32> noundef %b) local_unnamed_addr {
+; CHECK: vcmpgtsw. v2, v2, v3
+; CHECK: setbc r3, 4*cr6+lt
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 2, <4 x i32> %a, <4 x i32> %b)
+ ret i32 %0
+}
+
+define signext i32 @cmpanynew(<4 x i32> noundef %a, <4 x i32> noundef %b) local_unnamed_addr {
+; CHECK: vcmpequw. v2, v2, v3
+; CHECK: setbcr r3, 4*cr6+lt
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpequw.p(i32 3, <4 x i32> %a, <4 x i32> %b)
+ ret i32 %0
+}
+
+define signext i32 @cmpallneh(<8 x i16> noundef %a, <8 x i16> noundef %b) local_unnamed_addr {
+; CHECK: vcmpequh. v2, v2, v3
+; CHECK: setbc r3, 4*cr6+eq
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpequh.p(i32 0, <8 x i16> %a, <8 x i16> %b)
+ ret i32 %0
+}
+
+define signext i32 @cmpeqb(<16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr {
+; CHECK: vcmpequb. v2, v2, v3
+; CHECK: setbcr r3, 4*cr6+eq
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpequb.p(i32 1, <16 x i8> %a, <16 x i8> %b)
+ ret i32 %0
+}
+
+declare i32 @llvm.ppc.altivec.vcmpgtsw.p(i32, <4 x i32>, <4 x i32>)
+
+declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)
+
+declare i32 @llvm.ppc.altivec.vcmpequh.p(i32, <8 x i16>, <8 x i16>)
+
+declare i32 @llvm.ppc.altivec.vcmpequb.p(i32, <16 x i8>, <16 x i8>)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32); | ||
SDValue CRBit = | ||
SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, | ||
CR6Reg, SubRegIdx, CompNode.getValue(1)), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Maybe pull out CompNode.getValue(1)
since its used in both P10 and pre-P10 case.
✅ With the latest revision this PR passed the C/C++ code formatter. |
Linux x64 failure I think was an unresolved lldb test - not related to patch. Windows x64 passed. |
For P10 use the setbc instruction to get int values from vector compare conditions.