Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22807,15 +22807,15 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
Vec->getNumValues() != 1)
return SDValue();

// Targets may want to avoid this to prevent an expensive register transfer.
if (!TLI.shouldScalarizeBinop(Vec))
return SDValue();

EVT ResVT = ExtElt->getValueType(0);
if (Opc == ISD::SETCC &&
(ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
return SDValue();

// Targets may want to avoid this to prevent an expensive register transfer.
if (!TLI.shouldScalarizeBinop(Vec))
return SDValue();

// Extracting an element of a vector constant is constant-folded, so this
// transform is just replacing a vector op with a scalar op while moving the
// extract.
Expand All @@ -22834,8 +22834,21 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
EVT OpVT = Op0.getValueType().getVectorElementType();
Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
return DAG.getSetCC(DL, ResVT, Op0, Op1,
cast<CondCodeSDNode>(Vec->getOperand(2))->get());
SDValue NewVal = DAG.getSetCC(
DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
// We may need to sign- or zero-extend the result to match the same
// behaviour as the vector version of SETCC.
unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
if (ResVT != MVT::i1 &&
VecBoolContents != TargetLowering::UndefinedBooleanContent &&
VecBoolContents != TLI.getBooleanContents(ResVT)) {
if (VecBoolContents == TargetLowering::ZeroOrNegativeOneBooleanContent)
NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
DAG.getValueType(MVT::i1));
else
NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
}
return NewVal;
}
Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
Expand Down
29 changes: 25 additions & 4 deletions llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ define i128 @extract_icmp_v1i128(ptr %p) {
; CHECK-LABEL: extract_icmp_v1i128:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp x9, x8, [x0]
; CHECK-NEXT: mov x1, xzr
; CHECK-NEXT: orr x8, x9, x8
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x0, x8, #0, #1
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: ret
%load = load <1 x i128>, ptr %p, align 16
%cmp = icmp eq <1 x i128> %load, zeroinitializer
Expand Down Expand Up @@ -141,6 +142,26 @@ for.cond.cleanup:
}


; TODO: Combine the sbfx(cset) into a csetm
define i32 @issue_121372(<4 x i32> %v) {
; CHECK-LABEL: issue_121372:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx w8, w8, #0, #1
; CHECK-NEXT: cmp w8, #1
; CHECK-NEXT: csetm w0, lt
; CHECK-NEXT: ret
%cmp_ule = icmp ule <4 x i32> %v, zeroinitializer
%sext_v4i1 = sext <4 x i1> %cmp_ule to <4 x i32>
%cmp_sge = icmp sge <4 x i32> zeroinitializer, %sext_v4i1
%ext = extractelement <4 x i1> %cmp_sge, i32 0
%res = sext i1 %ext to i32
ret i32 %res
}


; Negative tests

define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) {
Expand All @@ -163,9 +184,9 @@ define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) {
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #235
; CHECK-NEXT: adrp x9, .LCPI7_0
; CHECK-NEXT: adrp x9, .LCPI8_0
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_0]
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_0]
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
; CHECK-NEXT: xtn v1.4h, v0.4s
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
Expand Down
Loading