Skip to content

Commit a475180

Browse files
authored
[PowerPC] Use setbc for values from vector compare conditions (#114858)
For P10 use the setbc instruction to get int values from vector compare summary condition results.
1 parent b185b85 commit a475180

File tree

5 files changed

+122
-19
lines changed

5 files changed

+122
-19
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1842,6 +1842,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
18421842
case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
18431843
case PPCISD::STORE_COND:
18441844
return "PPCISD::STORE_COND";
1845+
case PPCISD::SETBC:
1846+
return "PPCISD::SETBC";
1847+
case PPCISD::SETBCR:
1848+
return "PPCISD::SETBCR";
18451849
}
18461850
return nullptr;
18471851
}
@@ -11256,31 +11260,55 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1125611260
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
1125711261
SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
1125811262

11259-
// Now that we have the comparison, emit a copy from the CR to a GPR.
11260-
// This is flagged to the above dot comparison.
11261-
SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11262-
DAG.getRegister(PPC::CR6, MVT::i32),
11263-
CompNode.getValue(1));
11264-
1126511263
// Unpack the result based on how the target uses it.
11266-
unsigned BitNo; // Bit # of CR6.
11267-
bool InvertBit; // Invert result?
11264+
unsigned BitNo; // Bit # of CR6.
11265+
bool InvertBit; // Invert result?
11266+
unsigned Bitx;
11267+
unsigned SetOp;
1126811268
switch (Op.getConstantOperandVal(1)) {
11269-
default: // Can't happen, don't crash on invalid number though.
11270-
case 0: // Return the value of the EQ bit of CR6.
11271-
BitNo = 0; InvertBit = false;
11269+
default: // Can't happen, don't crash on invalid number though.
11270+
case 0: // Return the value of the EQ bit of CR6.
11271+
BitNo = 0;
11272+
InvertBit = false;
11273+
Bitx = PPC::sub_eq;
11274+
SetOp = PPCISD::SETBC;
1127211275
break;
11273-
case 1: // Return the inverted value of the EQ bit of CR6.
11274-
BitNo = 0; InvertBit = true;
11276+
case 1: // Return the inverted value of the EQ bit of CR6.
11277+
BitNo = 0;
11278+
InvertBit = true;
11279+
Bitx = PPC::sub_eq;
11280+
SetOp = PPCISD::SETBCR;
1127511281
break;
11276-
case 2: // Return the value of the LT bit of CR6.
11277-
BitNo = 2; InvertBit = false;
11282+
case 2: // Return the value of the LT bit of CR6.
11283+
BitNo = 2;
11284+
InvertBit = false;
11285+
Bitx = PPC::sub_lt;
11286+
SetOp = PPCISD::SETBC;
1127811287
break;
11279-
case 3: // Return the inverted value of the LT bit of CR6.
11280-
BitNo = 2; InvertBit = true;
11288+
case 3: // Return the inverted value of the LT bit of CR6.
11289+
BitNo = 2;
11290+
InvertBit = true;
11291+
Bitx = PPC::sub_lt;
11292+
SetOp = PPCISD::SETBCR;
1128111293
break;
1128211294
}
1128311295

11296+
SDValue GlueOp = CompNode.getValue(1);
11297+
if (Subtarget.isISA3_1()) {
11298+
SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11299+
SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11300+
SDValue CRBit =
11301+
SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11302+
CR6Reg, SubRegIdx, GlueOp),
11303+
0);
11304+
return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11305+
}
11306+
11307+
// Now that we have the comparison, emit a copy from the CR to a GPR.
11308+
// This is flagged to the above dot comparison.
11309+
SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11310+
DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11311+
1128411312
// Shift the bit into the low position.
1128511313
Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
1128611314
DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,12 @@ namespace llvm {
501501
/// Constrained floating point add in round-to-zero mode.
502502
STRICT_FADDRTZ,
503503

504+
/// SETBC - The ISA 3.1 (P10) SETBC instruction.
505+
SETBC,
506+
507+
/// SETBCR - The ISA 3.1 (P10) SETBCR instruction.
508+
SETBCR,
509+
504510
// NOTE: The nodes below may require PC-Rel specific patterns if the
505511
// address could be PC-Relative. When adding new nodes below, consider
506512
// whether or not the address can be PC-Relative and add the corresponding

llvm/lib/Target/PowerPC/PPCInstrP10.td

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
7979
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
8080
]>;
8181

82+
def SDT_PPCsetbc : SDTypeProfile<1, 1, [
83+
SDTCisInt<0>, SDTCisInt<1>
84+
]>;
85+
8286
//===----------------------------------------------------------------------===//
8387
// ISA 3.1 specific PPCISD nodes.
8488
//
@@ -91,6 +95,8 @@ def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
9195
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
9296
[]>;
9397
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
98+
def PPCsetbc : SDNode<"PPCISD::SETBC", SDT_PPCsetbc, []>;
99+
def PPCsetbcr : SDNode<"PPCISD::SETBCR", SDT_PPCsetbc, []>;
94100

95101
//===----------------------------------------------------------------------===//
96102

@@ -1401,10 +1407,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [P
14011407

14021408
let Predicates = [IsISA3_1] in {
14031409
def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RST), (ins crbitrc:$BI),
1404-
"setbc $RST, $BI", IIC_IntCompare, []>,
1410+
"setbc $RST, $BI", IIC_IntCompare,
1411+
[(set i32:$RST, (PPCsetbc i1:$BI))]>,
14051412
SExt32To64, ZExt32To64;
14061413
def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RST), (ins crbitrc:$BI),
1407-
"setbcr $RST, $BI", IIC_IntCompare, []>,
1414+
"setbcr $RST, $BI", IIC_IntCompare,
1415+
[(set i32:$RST, (PPCsetbcr i1:$BI))]>,
14081416
SExt32To64, ZExt32To64;
14091417
def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RST), (ins crbitrc:$BI),
14101418
"setnbc $RST, $BI", IIC_IntCompare, []>,
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; RUN: llc -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names \
2+
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
3+
; RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu \
4+
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
5+
6+
define range(i64 -2147483648, 2147483648) i64 @cmpgt(<1 x i128> noundef %a, <1 x i128> noundef %b) local_unnamed_addr {
7+
; CHECK: vcmpgtuq. v2, v3, v2
8+
; CHECK: setbc r3, 4*cr6+lt
9+
entry:
10+
%0 = tail call i32 @llvm.ppc.altivec.vcmpgtuq.p(i32 2, <1 x i128> %b, <1 x i128> %a)
11+
%conv = sext i32 %0 to i64
12+
ret i64 %conv
13+
}
14+
15+
declare i32 @llvm.ppc.altivec.vcmpgtuq.p(i32, <1 x i128>, <1 x i128>)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; RUN: llc -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names \
2+
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
3+
; RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu \
4+
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
5+
; RUN: llc -mcpu=pwr10 -mtriple=powerpc-ibm-aix -ppc-asm-full-reg-names \
6+
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
7+
8+
define signext i32 @cmpgtw(<4 x i32> noundef %a, <4 x i32> noundef %b) local_unnamed_addr {
9+
; CHECK: vcmpgtsw. v2, v2, v3
10+
; CHECK: setbc r3, 4*cr6+lt
11+
entry:
12+
%0 = tail call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 2, <4 x i32> %a, <4 x i32> %b)
13+
ret i32 %0
14+
}
15+
16+
define signext i32 @cmpanynew(<4 x i32> noundef %a, <4 x i32> noundef %b) local_unnamed_addr {
17+
; CHECK: vcmpequw. v2, v2, v3
18+
; CHECK: setbcr r3, 4*cr6+lt
19+
entry:
20+
%0 = tail call i32 @llvm.ppc.altivec.vcmpequw.p(i32 3, <4 x i32> %a, <4 x i32> %b)
21+
ret i32 %0
22+
}
23+
24+
define signext i32 @cmpallneh(<8 x i16> noundef %a, <8 x i16> noundef %b) local_unnamed_addr {
25+
; CHECK: vcmpequh. v2, v2, v3
26+
; CHECK: setbc r3, 4*cr6+eq
27+
entry:
28+
%0 = tail call i32 @llvm.ppc.altivec.vcmpequh.p(i32 0, <8 x i16> %a, <8 x i16> %b)
29+
ret i32 %0
30+
}
31+
32+
define signext i32 @cmpeqb(<16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr {
33+
; CHECK: vcmpequb. v2, v2, v3
34+
; CHECK: setbcr r3, 4*cr6+eq
35+
entry:
36+
%0 = tail call i32 @llvm.ppc.altivec.vcmpequb.p(i32 1, <16 x i8> %a, <16 x i8> %b)
37+
ret i32 %0
38+
}
39+
40+
declare i32 @llvm.ppc.altivec.vcmpgtsw.p(i32, <4 x i32>, <4 x i32>)
41+
42+
declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)
43+
44+
declare i32 @llvm.ppc.altivec.vcmpequh.p(i32, <8 x i16>, <8 x i16>)
45+
46+
declare i32 @llvm.ppc.altivec.vcmpequb.p(i32, <16 x i8>, <16 x i8>)

0 commit comments

Comments
 (0)